xref: /titanic_50/usr/src/uts/common/fs/nfs/nfs3_srv.c (revision 570de38f63910201fdd77246630b7aa8f9dc5661)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */
27 /* All Rights Reserved */
28 
29 #include <sys/param.h>
30 #include <sys/types.h>
31 #include <sys/systm.h>
32 #include <sys/cred.h>
33 #include <sys/buf.h>
34 #include <sys/vfs.h>
35 #include <sys/vnode.h>
36 #include <sys/uio.h>
37 #include <sys/errno.h>
38 #include <sys/sysmacros.h>
39 #include <sys/statvfs.h>
40 #include <sys/kmem.h>
41 #include <sys/dirent.h>
42 #include <sys/cmn_err.h>
43 #include <sys/debug.h>
44 #include <sys/systeminfo.h>
45 #include <sys/flock.h>
46 #include <sys/nbmlock.h>
47 #include <sys/policy.h>
48 #include <sys/sdt.h>
49 
50 #include <rpc/types.h>
51 #include <rpc/auth.h>
52 #include <rpc/svc.h>
53 #include <rpc/rpc_rdma.h>
54 
55 #include <nfs/nfs.h>
56 #include <nfs/export.h>
57 #include <nfs/nfs_cmd.h>
58 
59 #include <sys/strsubr.h>
60 
61 #include <sys/tsol/label.h>
62 #include <sys/tsol/tndb.h>
63 
64 #include <sys/zone.h>
65 
66 #include <inet/ip.h>
67 #include <inet/ip6.h>
68 
69 /*
70  * These are the interface routines for the server side of the
71  * Network File System.  See the NFS version 3 protocol specification
72  * for a description of this interface.
73  */
74 
75 #ifdef DEBUG
76 int rfs3_do_pre_op_attr = 1;
77 int rfs3_do_post_op_attr = 1;
78 int rfs3_do_post_op_fh3 = 1;
79 #endif
80 
81 static writeverf3 write3verf;
82 
83 static int	sattr3_to_vattr(sattr3 *, struct vattr *);
84 static int	vattr_to_fattr3(struct vattr *, fattr3 *);
85 static int	vattr_to_wcc_attr(struct vattr *, wcc_attr *);
86 static void	vattr_to_pre_op_attr(struct vattr *, pre_op_attr *);
87 static void	vattr_to_wcc_data(struct vattr *, struct vattr *, wcc_data *);
88 static int	rdma_setup_read_data3(READ3args *, READ3resok *);
89 
90 extern int nfs_loaned_buffers;
91 
92 u_longlong_t nfs3_srv_caller_id;
93 
94 /* ARGSUSED */
95 void
96 rfs3_getattr(GETATTR3args *args, GETATTR3res *resp, struct exportinfo *exi,
97 	struct svc_req *req, cred_t *cr)
98 {
99 	int error;
100 	vnode_t *vp;
101 	struct vattr va;
102 
103 	vp = nfs3_fhtovp(&args->object, exi);
104 
105 	DTRACE_NFSV3_4(op__getattr__start, struct svc_req *, req,
106 	    cred_t *, cr, vnode_t *, vp, GETATTR3args *, args);
107 
108 	if (vp == NULL) {
109 		error = ESTALE;
110 		goto out;
111 	}
112 
113 	va.va_mask = AT_ALL;
114 	error = rfs4_delegated_getattr(vp, &va, 0, cr);
115 
116 	if (!error) {
117 		/* Lie about the object type for a referral */
118 		if (vn_is_nfs_reparse(vp, cr))
119 			va.va_type = VLNK;
120 
121 		/* overflow error if time or size is out of range */
122 		error = vattr_to_fattr3(&va, &resp->resok.obj_attributes);
123 		if (error)
124 			goto out;
125 		resp->status = NFS3_OK;
126 
127 		DTRACE_NFSV3_4(op__getattr__done, struct svc_req *, req,
128 		    cred_t *, cr, vnode_t *, vp, GETATTR3res *, resp);
129 
130 		VN_RELE(vp);
131 
132 		return;
133 	}
134 
135 out:
136 	if (curthread->t_flag & T_WOULDBLOCK) {
137 		curthread->t_flag &= ~T_WOULDBLOCK;
138 		resp->status = NFS3ERR_JUKEBOX;
139 	} else
140 		resp->status = puterrno3(error);
141 
142 	DTRACE_NFSV3_4(op__getattr__done, struct svc_req *, req,
143 	    cred_t *, cr, vnode_t *, vp, GETATTR3res *, resp);
144 
145 	if (vp != NULL)
146 		VN_RELE(vp);
147 }
148 
149 void *
150 rfs3_getattr_getfh(GETATTR3args *args)
151 {
152 
153 	return (&args->object);
154 }
155 
156 void
157 rfs3_setattr(SETATTR3args *args, SETATTR3res *resp, struct exportinfo *exi,
158 	struct svc_req *req, cred_t *cr)
159 {
160 	int error;
161 	vnode_t *vp;
162 	struct vattr *bvap;
163 	struct vattr bva;
164 	struct vattr *avap;
165 	struct vattr ava;
166 	int flag;
167 	int in_crit = 0;
168 	struct flock64 bf;
169 	caller_context_t ct;
170 
171 	bvap = NULL;
172 	avap = NULL;
173 
174 	vp = nfs3_fhtovp(&args->object, exi);
175 
176 	DTRACE_NFSV3_4(op__setattr__start, struct svc_req *, req,
177 	    cred_t *, cr, vnode_t *, vp, SETATTR3args *, args);
178 
179 	if (vp == NULL) {
180 		error = ESTALE;
181 		goto out;
182 	}
183 
184 	error = sattr3_to_vattr(&args->new_attributes, &ava);
185 	if (error)
186 		goto out;
187 
188 	if (is_system_labeled()) {
189 		bslabel_t *clabel = req->rq_label;
190 
191 		ASSERT(clabel != NULL);
192 		DTRACE_PROBE2(tx__rfs3__log__info__opsetattr__clabel, char *,
193 		    "got client label from request(1)", struct svc_req *, req);
194 
195 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
196 			if (!do_rfs_label_check(clabel, vp, EQUALITY_CHECK,
197 			    exi)) {
198 				resp->status = NFS3ERR_ACCES;
199 				goto out1;
200 			}
201 		}
202 	}
203 
204 	/*
205 	 * We need to specially handle size changes because of
206 	 * possible conflicting NBMAND locks. Get into critical
207 	 * region before VOP_GETATTR, so the size attribute is
208 	 * valid when checking conflicts.
209 	 *
210 	 * Also, check to see if the v4 side of the server has
211 	 * delegated this file.  If so, then we return JUKEBOX to
212 	 * allow the client to retrasmit its request.
213 	 */
214 	if (vp->v_type == VREG && (ava.va_mask & AT_SIZE)) {
215 		if (nbl_need_check(vp)) {
216 			nbl_start_crit(vp, RW_READER);
217 			in_crit = 1;
218 		}
219 	}
220 
221 	bva.va_mask = AT_ALL;
222 	error = rfs4_delegated_getattr(vp, &bva, 0, cr);
223 
224 	/*
225 	 * If we can't get the attributes, then we can't do the
226 	 * right access checking.  So, we'll fail the request.
227 	 */
228 	if (error)
229 		goto out;
230 
231 #ifdef DEBUG
232 	if (rfs3_do_pre_op_attr)
233 		bvap = &bva;
234 #else
235 	bvap = &bva;
236 #endif
237 
238 	if (rdonly(exi, req) || vn_is_readonly(vp)) {
239 		resp->status = NFS3ERR_ROFS;
240 		goto out1;
241 	}
242 
243 	if (args->guard.check &&
244 	    (args->guard.obj_ctime.seconds != bva.va_ctime.tv_sec ||
245 	    args->guard.obj_ctime.nseconds != bva.va_ctime.tv_nsec)) {
246 		resp->status = NFS3ERR_NOT_SYNC;
247 		goto out1;
248 	}
249 
250 	if (args->new_attributes.mtime.set_it == SET_TO_CLIENT_TIME)
251 		flag = ATTR_UTIME;
252 	else
253 		flag = 0;
254 
255 	/*
256 	 * If the filesystem is exported with nosuid, then mask off
257 	 * the setuid and setgid bits.
258 	 */
259 	if ((ava.va_mask & AT_MODE) && vp->v_type == VREG &&
260 	    (exi->exi_export.ex_flags & EX_NOSUID))
261 		ava.va_mode &= ~(VSUID | VSGID);
262 
263 	ct.cc_sysid = 0;
264 	ct.cc_pid = 0;
265 	ct.cc_caller_id = nfs3_srv_caller_id;
266 	ct.cc_flags = CC_DONTBLOCK;
267 
268 	/*
269 	 * We need to specially handle size changes because it is
270 	 * possible for the client to create a file with modes
271 	 * which indicate read-only, but with the file opened for
272 	 * writing.  If the client then tries to set the size of
273 	 * the file, then the normal access checking done in
274 	 * VOP_SETATTR would prevent the client from doing so,
275 	 * although it should be legal for it to do so.  To get
276 	 * around this, we do the access checking for ourselves
277 	 * and then use VOP_SPACE which doesn't do the access
278 	 * checking which VOP_SETATTR does. VOP_SPACE can only
279 	 * operate on VREG files, let VOP_SETATTR handle the other
280 	 * extremely rare cases.
281 	 * Also the client should not be allowed to change the
282 	 * size of the file if there is a conflicting non-blocking
283 	 * mandatory lock in the region the change.
284 	 */
285 	if (vp->v_type == VREG && (ava.va_mask & AT_SIZE)) {
286 		if (in_crit) {
287 			u_offset_t offset;
288 			ssize_t length;
289 
290 			if (ava.va_size < bva.va_size) {
291 				offset = ava.va_size;
292 				length = bva.va_size - ava.va_size;
293 			} else {
294 				offset = bva.va_size;
295 				length = ava.va_size - bva.va_size;
296 			}
297 			if (nbl_conflict(vp, NBL_WRITE, offset, length, 0,
298 			    NULL)) {
299 				error = EACCES;
300 				goto out;
301 			}
302 		}
303 
304 		if (crgetuid(cr) == bva.va_uid && ava.va_size != bva.va_size) {
305 			ava.va_mask &= ~AT_SIZE;
306 			bf.l_type = F_WRLCK;
307 			bf.l_whence = 0;
308 			bf.l_start = (off64_t)ava.va_size;
309 			bf.l_len = 0;
310 			bf.l_sysid = 0;
311 			bf.l_pid = 0;
312 			error = VOP_SPACE(vp, F_FREESP, &bf, FWRITE,
313 			    (offset_t)ava.va_size, cr, &ct);
314 		}
315 	}
316 
317 	if (!error && ava.va_mask)
318 		error = VOP_SETATTR(vp, &ava, flag, cr, &ct);
319 
320 	/* check if a monitor detected a delegation conflict */
321 	if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
322 		resp->status = NFS3ERR_JUKEBOX;
323 		goto out1;
324 	}
325 
326 #ifdef DEBUG
327 	if (rfs3_do_post_op_attr) {
328 		ava.va_mask = AT_ALL;
329 		avap = rfs4_delegated_getattr(vp, &ava, 0, cr) ? NULL : &ava;
330 	} else
331 		avap = NULL;
332 #else
333 	ava.va_mask = AT_ALL;
334 	avap = rfs4_delegated_getattr(vp, &ava, 0, cr) ? NULL : &ava;
335 #endif
336 
337 	/*
338 	 * Force modified metadata out to stable storage.
339 	 */
340 	(void) VOP_FSYNC(vp, FNODSYNC, cr, &ct);
341 
342 	if (error)
343 		goto out;
344 
345 	if (in_crit)
346 		nbl_end_crit(vp);
347 
348 	resp->status = NFS3_OK;
349 	vattr_to_wcc_data(bvap, avap, &resp->resok.obj_wcc);
350 
351 	DTRACE_NFSV3_4(op__setattr__done, struct svc_req *, req,
352 	    cred_t *, cr, vnode_t *, vp, SETATTR3res *, resp);
353 
354 	VN_RELE(vp);
355 
356 	return;
357 
358 out:
359 	if (curthread->t_flag & T_WOULDBLOCK) {
360 		curthread->t_flag &= ~T_WOULDBLOCK;
361 		resp->status = NFS3ERR_JUKEBOX;
362 	} else
363 		resp->status = puterrno3(error);
364 out1:
365 	DTRACE_NFSV3_4(op__setattr__done, struct svc_req *, req,
366 	    cred_t *, cr, vnode_t *, vp, SETATTR3res *, resp);
367 
368 	if (vp != NULL) {
369 		if (in_crit)
370 			nbl_end_crit(vp);
371 		VN_RELE(vp);
372 	}
373 	vattr_to_wcc_data(bvap, avap, &resp->resfail.obj_wcc);
374 }
375 
376 void *
377 rfs3_setattr_getfh(SETATTR3args *args)
378 {
379 
380 	return (&args->object);
381 }
382 
383 /* ARGSUSED */
384 void
385 rfs3_lookup(LOOKUP3args *args, LOOKUP3res *resp, struct exportinfo *exi,
386 	struct svc_req *req, cred_t *cr)
387 {
388 	int error;
389 	vnode_t *vp;
390 	vnode_t *dvp;
391 	struct vattr *vap;
392 	struct vattr va;
393 	struct vattr *dvap;
394 	struct vattr dva;
395 	nfs_fh3 *fhp;
396 	struct sec_ol sec = {0, 0};
397 	bool_t publicfh_flag = FALSE, auth_weak = FALSE;
398 	struct sockaddr *ca;
399 	char *name = NULL;
400 
401 	dvap = NULL;
402 
403 	/*
404 	 * Allow lookups from the root - the default
405 	 * location of the public filehandle.
406 	 */
407 	if (exi != NULL && (exi->exi_export.ex_flags & EX_PUBLIC)) {
408 		dvp = rootdir;
409 		VN_HOLD(dvp);
410 
411 		DTRACE_NFSV3_4(op__lookup__start, struct svc_req *, req,
412 		    cred_t *, cr, vnode_t *, dvp, LOOKUP3args *, args);
413 	} else {
414 		dvp = nfs3_fhtovp(&args->what.dir, exi);
415 
416 		DTRACE_NFSV3_4(op__lookup__start, struct svc_req *, req,
417 		    cred_t *, cr, vnode_t *, dvp, LOOKUP3args *, args);
418 
419 		if (dvp == NULL) {
420 			error = ESTALE;
421 			goto out;
422 		}
423 	}
424 
425 #ifdef DEBUG
426 	if (rfs3_do_pre_op_attr) {
427 		dva.va_mask = AT_ALL;
428 		dvap = VOP_GETATTR(dvp, &dva, 0, cr, NULL) ? NULL : &dva;
429 	}
430 #else
431 	dva.va_mask = AT_ALL;
432 	dvap = VOP_GETATTR(dvp, &dva, 0, cr, NULL) ? NULL : &dva;
433 #endif
434 
435 	if (args->what.name == nfs3nametoolong) {
436 		resp->status = NFS3ERR_NAMETOOLONG;
437 		goto out1;
438 	}
439 
440 	if (args->what.name == NULL || *(args->what.name) == '\0') {
441 		resp->status = NFS3ERR_ACCES;
442 		goto out1;
443 	}
444 
445 	fhp = &args->what.dir;
446 	if (strcmp(args->what.name, "..") == 0 &&
447 	    EQFID(&exi->exi_fid, FH3TOFIDP(fhp))) {
448 		resp->status = NFS3ERR_NOENT;
449 		goto out1;
450 	}
451 
452 	ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
453 	name = nfscmd_convname(ca, exi, args->what.name,
454 	    NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
455 
456 	if (name == NULL) {
457 		resp->status = NFS3ERR_ACCES;
458 		goto out1;
459 	}
460 
461 	/*
462 	 * If the public filehandle is used then allow
463 	 * a multi-component lookup
464 	 */
465 	if (PUBLIC_FH3(&args->what.dir)) {
466 		publicfh_flag = TRUE;
467 		error = rfs_publicfh_mclookup(name, dvp, cr, &vp,
468 		    &exi, &sec);
469 		if (error && exi != NULL)
470 			exi_rele(exi); /* See comment below Re: publicfh_flag */
471 		/*
472 		 * Since WebNFS may bypass MOUNT, we need to ensure this
473 		 * request didn't come from an unlabeled admin_low client.
474 		 */
475 		if (is_system_labeled() && error == 0) {
476 			int		addr_type;
477 			void		*ipaddr;
478 			tsol_tpc_t	*tp;
479 
480 			if (ca->sa_family == AF_INET) {
481 				addr_type = IPV4_VERSION;
482 				ipaddr = &((struct sockaddr_in *)ca)->sin_addr;
483 			} else if (ca->sa_family == AF_INET6) {
484 				addr_type = IPV6_VERSION;
485 				ipaddr = &((struct sockaddr_in6 *)
486 				    ca)->sin6_addr;
487 			}
488 			tp = find_tpc(ipaddr, addr_type, B_FALSE);
489 			if (tp == NULL || tp->tpc_tp.tp_doi !=
490 			    l_admin_low->tsl_doi || tp->tpc_tp.host_type !=
491 			    SUN_CIPSO) {
492 				if (exi != NULL)
493 					exi_rele(exi);
494 				VN_RELE(vp);
495 				resp->status = NFS3ERR_ACCES;
496 				error = 1;
497 			}
498 			if (tp != NULL)
499 				TPC_RELE(tp);
500 		}
501 	} else {
502 		error = VOP_LOOKUP(dvp, name, &vp,
503 		    NULL, 0, NULL, cr, NULL, NULL, NULL);
504 	}
505 
506 	if (name != args->what.name)
507 		kmem_free(name, MAXPATHLEN + 1);
508 
509 	if (is_system_labeled() && error == 0) {
510 		bslabel_t *clabel = req->rq_label;
511 
512 		ASSERT(clabel != NULL);
513 		DTRACE_PROBE2(tx__rfs3__log__info__oplookup__clabel, char *,
514 		    "got client label from request(1)", struct svc_req *, req);
515 
516 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
517 			if (!do_rfs_label_check(clabel, dvp,
518 			    DOMINANCE_CHECK, exi)) {
519 				if (publicfh_flag && exi != NULL)
520 					exi_rele(exi);
521 				VN_RELE(vp);
522 				resp->status = NFS3ERR_ACCES;
523 				error = 1;
524 			}
525 		}
526 	}
527 
528 #ifdef DEBUG
529 	if (rfs3_do_post_op_attr) {
530 		dva.va_mask = AT_ALL;
531 		dvap = VOP_GETATTR(dvp, &dva, 0, cr, NULL) ? NULL : &dva;
532 	} else
533 		dvap = NULL;
534 #else
535 	dva.va_mask = AT_ALL;
536 	dvap = VOP_GETATTR(dvp, &dva, 0, cr, NULL) ? NULL : &dva;
537 #endif
538 
539 	if (error)
540 		goto out;
541 
542 	if (sec.sec_flags & SEC_QUERY) {
543 		error = makefh3_ol(&resp->resok.object, exi, sec.sec_index);
544 	} else {
545 		error = makefh3(&resp->resok.object, vp, exi);
546 		if (!error && publicfh_flag && !chk_clnt_sec(exi, req))
547 			auth_weak = TRUE;
548 	}
549 
550 	if (error) {
551 		VN_RELE(vp);
552 		goto out;
553 	}
554 
555 	/*
556 	 * If publicfh_flag is true then we have called rfs_publicfh_mclookup
557 	 * and have obtained a new exportinfo in exi which needs to be
558 	 * released. Note the the original exportinfo pointed to by exi
559 	 * will be released by the caller, common_dispatch.
560 	 */
561 	if (publicfh_flag)
562 		exi_rele(exi);
563 
564 #ifdef DEBUG
565 	if (rfs3_do_post_op_attr) {
566 		va.va_mask = AT_ALL;
567 		vap = rfs4_delegated_getattr(vp, &va, 0, cr) ? NULL : &va;
568 	} else
569 		vap = NULL;
570 #else
571 	va.va_mask = AT_ALL;
572 	vap = rfs4_delegated_getattr(vp, &va, 0, cr) ? NULL : &va;
573 #endif
574 
575 	VN_RELE(vp);
576 
577 	resp->status = NFS3_OK;
578 	vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
579 	vattr_to_post_op_attr(dvap, &resp->resok.dir_attributes);
580 
581 	/*
582 	 * If it's public fh, no 0x81, and client's flavor is
583 	 * invalid, set WebNFS status to WNFSERR_CLNT_FLAVOR now.
584 	 * Then set RPC status to AUTH_TOOWEAK in common_dispatch.
585 	 */
586 	if (auth_weak)
587 		resp->status = (enum nfsstat3)WNFSERR_CLNT_FLAVOR;
588 
589 	DTRACE_NFSV3_4(op__lookup__done, struct svc_req *, req,
590 	    cred_t *, cr, vnode_t *, dvp, LOOKUP3res *, resp);
591 	VN_RELE(dvp);
592 
593 	return;
594 
595 out:
596 	if (curthread->t_flag & T_WOULDBLOCK) {
597 		curthread->t_flag &= ~T_WOULDBLOCK;
598 		resp->status = NFS3ERR_JUKEBOX;
599 	} else
600 		resp->status = puterrno3(error);
601 out1:
602 	DTRACE_NFSV3_4(op__lookup__done, struct svc_req *, req,
603 	    cred_t *, cr, vnode_t *, dvp, LOOKUP3res *, resp);
604 
605 	if (dvp != NULL)
606 		VN_RELE(dvp);
607 	vattr_to_post_op_attr(dvap, &resp->resfail.dir_attributes);
608 
609 }
610 
611 void *
612 rfs3_lookup_getfh(LOOKUP3args *args)
613 {
614 
615 	return (&args->what.dir);
616 }
617 
618 /* ARGSUSED */
619 void
620 rfs3_access(ACCESS3args *args, ACCESS3res *resp, struct exportinfo *exi,
621 	struct svc_req *req, cred_t *cr)
622 {
623 	int error;
624 	vnode_t *vp;
625 	struct vattr *vap;
626 	struct vattr va;
627 	int checkwriteperm;
628 	boolean_t dominant_label = B_FALSE;
629 	boolean_t equal_label = B_FALSE;
630 	boolean_t admin_low_client;
631 
632 	vap = NULL;
633 
634 	vp = nfs3_fhtovp(&args->object, exi);
635 
636 	DTRACE_NFSV3_4(op__access__start, struct svc_req *, req,
637 	    cred_t *, cr, vnode_t *, vp, ACCESS3args *, args);
638 
639 	if (vp == NULL) {
640 		error = ESTALE;
641 		goto out;
642 	}
643 
644 	/*
645 	 * If the file system is exported read only, it is not appropriate
646 	 * to check write permissions for regular files and directories.
647 	 * Special files are interpreted by the client, so the underlying
648 	 * permissions are sent back to the client for interpretation.
649 	 */
650 	if (rdonly(exi, req) && (vp->v_type == VREG || vp->v_type == VDIR))
651 		checkwriteperm = 0;
652 	else
653 		checkwriteperm = 1;
654 
655 	/*
656 	 * We need the mode so that we can correctly determine access
657 	 * permissions relative to a mandatory lock file.  Access to
658 	 * mandatory lock files is denied on the server, so it might
659 	 * as well be reflected to the server during the open.
660 	 */
661 	va.va_mask = AT_MODE;
662 	error = VOP_GETATTR(vp, &va, 0, cr, NULL);
663 	if (error)
664 		goto out;
665 
666 #ifdef DEBUG
667 	if (rfs3_do_post_op_attr)
668 		vap = &va;
669 #else
670 	vap = &va;
671 #endif
672 
673 	resp->resok.access = 0;
674 
675 	if (is_system_labeled()) {
676 		bslabel_t *clabel = req->rq_label;
677 
678 		ASSERT(clabel != NULL);
679 		DTRACE_PROBE2(tx__rfs3__log__info__opaccess__clabel, char *,
680 		    "got client label from request(1)", struct svc_req *, req);
681 
682 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
683 			if ((equal_label = do_rfs_label_check(clabel, vp,
684 			    EQUALITY_CHECK, exi)) == B_FALSE) {
685 				dominant_label = do_rfs_label_check(clabel,
686 				    vp, DOMINANCE_CHECK, exi);
687 			} else
688 				dominant_label = B_TRUE;
689 			admin_low_client = B_FALSE;
690 		} else
691 			admin_low_client = B_TRUE;
692 	}
693 
694 	if (args->access & ACCESS3_READ) {
695 		error = VOP_ACCESS(vp, VREAD, 0, cr, NULL);
696 		if (error) {
697 			if (curthread->t_flag & T_WOULDBLOCK)
698 				goto out;
699 		} else if (!MANDLOCK(vp, va.va_mode) &&
700 		    (!is_system_labeled() || admin_low_client ||
701 		    dominant_label))
702 			resp->resok.access |= ACCESS3_READ;
703 	}
704 	if ((args->access & ACCESS3_LOOKUP) && vp->v_type == VDIR) {
705 		error = VOP_ACCESS(vp, VEXEC, 0, cr, NULL);
706 		if (error) {
707 			if (curthread->t_flag & T_WOULDBLOCK)
708 				goto out;
709 		} else if (!is_system_labeled() || admin_low_client ||
710 		    dominant_label)
711 			resp->resok.access |= ACCESS3_LOOKUP;
712 	}
713 	if (checkwriteperm &&
714 	    (args->access & (ACCESS3_MODIFY|ACCESS3_EXTEND))) {
715 		error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL);
716 		if (error) {
717 			if (curthread->t_flag & T_WOULDBLOCK)
718 				goto out;
719 		} else if (!MANDLOCK(vp, va.va_mode) &&
720 		    (!is_system_labeled() || admin_low_client || equal_label)) {
721 			resp->resok.access |=
722 			    (args->access & (ACCESS3_MODIFY|ACCESS3_EXTEND));
723 		}
724 	}
725 	if (checkwriteperm &&
726 	    (args->access & ACCESS3_DELETE) && vp->v_type == VDIR) {
727 		error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL);
728 		if (error) {
729 			if (curthread->t_flag & T_WOULDBLOCK)
730 				goto out;
731 		} else if (!is_system_labeled() || admin_low_client ||
732 		    equal_label)
733 			resp->resok.access |= ACCESS3_DELETE;
734 	}
735 	if (args->access & ACCESS3_EXECUTE) {
736 		error = VOP_ACCESS(vp, VEXEC, 0, cr, NULL);
737 		if (error) {
738 			if (curthread->t_flag & T_WOULDBLOCK)
739 				goto out;
740 		} else if (!MANDLOCK(vp, va.va_mode) &&
741 		    (!is_system_labeled() || admin_low_client ||
742 		    dominant_label))
743 			resp->resok.access |= ACCESS3_EXECUTE;
744 	}
745 
746 #ifdef DEBUG
747 	if (rfs3_do_post_op_attr) {
748 		va.va_mask = AT_ALL;
749 		vap = rfs4_delegated_getattr(vp, &va, 0, cr) ? NULL : &va;
750 	} else
751 		vap = NULL;
752 #else
753 	va.va_mask = AT_ALL;
754 	vap = rfs4_delegated_getattr(vp, &va, 0, cr) ? NULL : &va;
755 #endif
756 
757 	resp->status = NFS3_OK;
758 	vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
759 
760 	DTRACE_NFSV3_4(op__access__done, struct svc_req *, req,
761 	    cred_t *, cr, vnode_t *, vp, ACCESS3res *, resp);
762 
763 	VN_RELE(vp);
764 
765 	return;
766 
767 out:
768 	if (curthread->t_flag & T_WOULDBLOCK) {
769 		curthread->t_flag &= ~T_WOULDBLOCK;
770 		resp->status = NFS3ERR_JUKEBOX;
771 	} else
772 		resp->status = puterrno3(error);
773 	DTRACE_NFSV3_4(op__access__done, struct svc_req *, req,
774 	    cred_t *, cr, vnode_t *, vp, ACCESS3res *, resp);
775 	if (vp != NULL)
776 		VN_RELE(vp);
777 	vattr_to_post_op_attr(vap, &resp->resfail.obj_attributes);
778 }
779 
780 void *
781 rfs3_access_getfh(ACCESS3args *args)
782 {
783 
784 	return (&args->object);
785 }
786 
787 /* ARGSUSED */
788 void
789 rfs3_readlink(READLINK3args *args, READLINK3res *resp, struct exportinfo *exi,
790 	struct svc_req *req, cred_t *cr)
791 {
792 	int error;
793 	vnode_t *vp;
794 	struct vattr *vap;
795 	struct vattr va;
796 	struct iovec iov;
797 	struct uio uio;
798 	char *data;
799 	struct sockaddr *ca;
800 	char *name = NULL;
801 	int is_referral = 0;
802 
803 	vap = NULL;
804 
805 	vp = nfs3_fhtovp(&args->symlink, exi);
806 
807 	DTRACE_NFSV3_4(op__readlink__start, struct svc_req *, req,
808 	    cred_t *, cr, vnode_t *, vp, READLINK3args *, args);
809 
810 	if (vp == NULL) {
811 		error = ESTALE;
812 		goto out;
813 	}
814 
815 	va.va_mask = AT_ALL;
816 	error = VOP_GETATTR(vp, &va, 0, cr, NULL);
817 	if (error)
818 		goto out;
819 
820 #ifdef DEBUG
821 	if (rfs3_do_post_op_attr)
822 		vap = &va;
823 #else
824 	vap = &va;
825 #endif
826 
827 	/* We lied about the object type for a referral */
828 	if (vn_is_nfs_reparse(vp, cr))
829 		is_referral = 1;
830 
831 	if (vp->v_type != VLNK && !is_referral) {
832 		resp->status = NFS3ERR_INVAL;
833 		goto out1;
834 	}
835 
836 	if (MANDLOCK(vp, va.va_mode)) {
837 		resp->status = NFS3ERR_ACCES;
838 		goto out1;
839 	}
840 
841 	if (is_system_labeled()) {
842 		bslabel_t *clabel = req->rq_label;
843 
844 		ASSERT(clabel != NULL);
845 		DTRACE_PROBE2(tx__rfs3__log__info__opreadlink__clabel, char *,
846 		    "got client label from request(1)", struct svc_req *, req);
847 
848 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
849 			if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
850 			    exi)) {
851 				resp->status = NFS3ERR_ACCES;
852 				goto out1;
853 			}
854 		}
855 	}
856 
857 	data = kmem_alloc(MAXPATHLEN + 1, KM_SLEEP);
858 
859 	if (is_referral) {
860 		char *s;
861 		size_t strsz;
862 
863 		/* Get an artificial symlink based on a referral */
864 		s = build_symlink(vp, cr, &strsz);
865 		global_svstat_ptr[3][NFS_REFERLINKS].value.ui64++;
866 		DTRACE_PROBE2(nfs3serv__func__referral__reflink,
867 		    vnode_t *, vp, char *, s);
868 		if (s == NULL)
869 			error = EINVAL;
870 		else {
871 			error = 0;
872 			(void) strlcpy(data, s, MAXPATHLEN + 1);
873 			kmem_free(s, strsz);
874 		}
875 
876 	} else {
877 
878 		iov.iov_base = data;
879 		iov.iov_len = MAXPATHLEN;
880 		uio.uio_iov = &iov;
881 		uio.uio_iovcnt = 1;
882 		uio.uio_segflg = UIO_SYSSPACE;
883 		uio.uio_extflg = UIO_COPY_CACHED;
884 		uio.uio_loffset = 0;
885 		uio.uio_resid = MAXPATHLEN;
886 
887 		error = VOP_READLINK(vp, &uio, cr, NULL);
888 
889 		if (!error)
890 			*(data + MAXPATHLEN - uio.uio_resid) = '\0';
891 	}
892 
893 #ifdef DEBUG
894 	if (rfs3_do_post_op_attr) {
895 		va.va_mask = AT_ALL;
896 		vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
897 	} else
898 		vap = NULL;
899 #else
900 	va.va_mask = AT_ALL;
901 	vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
902 #endif
903 	/* Lie about object type again just to be consistent */
904 	if (is_referral && vap != NULL)
905 		vap->va_type = VLNK;
906 
907 #if 0 /* notyet */
908 	/*
909 	 * Don't do this.  It causes local disk writes when just
910 	 * reading the file and the overhead is deemed larger
911 	 * than the benefit.
912 	 */
913 	/*
914 	 * Force modified metadata out to stable storage.
915 	 */
916 	(void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
917 #endif
918 
919 	if (error) {
920 		kmem_free(data, MAXPATHLEN + 1);
921 		goto out;
922 	}
923 
924 	ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
925 	name = nfscmd_convname(ca, exi, data, NFSCMD_CONV_OUTBOUND,
926 	    MAXPATHLEN + 1);
927 
928 	if (name == NULL) {
929 		/*
930 		 * Even though the conversion failed, we return
931 		 * something. We just don't translate it.
932 		 */
933 		name = data;
934 	}
935 
936 	resp->status = NFS3_OK;
937 	vattr_to_post_op_attr(vap, &resp->resok.symlink_attributes);
938 	resp->resok.data = name;
939 
940 	DTRACE_NFSV3_4(op__readlink__done, struct svc_req *, req,
941 	    cred_t *, cr, vnode_t *, vp, READLINK3res *, resp);
942 	VN_RELE(vp);
943 
944 	if (name != data)
945 		kmem_free(data, MAXPATHLEN + 1);
946 
947 	return;
948 
949 out:
950 	if (curthread->t_flag & T_WOULDBLOCK) {
951 		curthread->t_flag &= ~T_WOULDBLOCK;
952 		resp->status = NFS3ERR_JUKEBOX;
953 	} else
954 		resp->status = puterrno3(error);
955 out1:
956 	DTRACE_NFSV3_4(op__readlink__done, struct svc_req *, req,
957 	    cred_t *, cr, vnode_t *, vp, READLINK3res *, resp);
958 	if (vp != NULL)
959 		VN_RELE(vp);
960 	vattr_to_post_op_attr(vap, &resp->resfail.symlink_attributes);
961 }
962 
963 void *
964 rfs3_readlink_getfh(READLINK3args *args)
965 {
966 
967 	return (&args->symlink);
968 }
969 
970 void
971 rfs3_readlink_free(READLINK3res *resp)
972 {
973 
974 	if (resp->status == NFS3_OK)
975 		kmem_free(resp->resok.data, MAXPATHLEN + 1);
976 }
977 
978 /*
979  * Server routine to handle read
980  * May handle RDMA data as well as mblks
981  */
982 /* ARGSUSED */
983 void
984 rfs3_read(READ3args *args, READ3res *resp, struct exportinfo *exi,
985 	struct svc_req *req, cred_t *cr)
986 {
987 	int error;
988 	vnode_t *vp;
989 	struct vattr *vap;
990 	struct vattr va;
991 	struct iovec iov;
992 	struct uio uio;
993 	u_offset_t offset;
994 	mblk_t *mp;
995 	int alloc_err = 0;
996 	int in_crit = 0;
997 	int need_rwunlock = 0;
998 	caller_context_t ct;
999 	int rdma_used = 0;
1000 	int loaned_buffers;
1001 	struct uio *uiop;
1002 
1003 	vap = NULL;
1004 
1005 	vp = nfs3_fhtovp(&args->file, exi);
1006 
1007 	DTRACE_NFSV3_4(op__read__start, struct svc_req *, req,
1008 	    cred_t *, cr, vnode_t *, vp, READ3args *, args);
1009 
1010 	if (vp == NULL) {
1011 		error = ESTALE;
1012 		goto out;
1013 	}
1014 
1015 	if (args->wlist)
1016 		rdma_used = 1;
1017 
1018 	/* use loaned buffers for TCP */
1019 	loaned_buffers = (nfs_loaned_buffers && !rdma_used) ? 1 : 0;
1020 
1021 	if (is_system_labeled()) {
1022 		bslabel_t *clabel = req->rq_label;
1023 
1024 		ASSERT(clabel != NULL);
1025 		DTRACE_PROBE2(tx__rfs3__log__info__opread__clabel, char *,
1026 		    "got client label from request(1)", struct svc_req *, req);
1027 
1028 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
1029 			if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
1030 			    exi)) {
1031 				resp->status = NFS3ERR_ACCES;
1032 				goto out1;
1033 			}
1034 		}
1035 	}
1036 
1037 	ct.cc_sysid = 0;
1038 	ct.cc_pid = 0;
1039 	ct.cc_caller_id = nfs3_srv_caller_id;
1040 	ct.cc_flags = CC_DONTBLOCK;
1041 
1042 	/*
1043 	 * Enter the critical region before calling VOP_RWLOCK
1044 	 * to avoid a deadlock with write requests.
1045 	 */
1046 	if (nbl_need_check(vp)) {
1047 		nbl_start_crit(vp, RW_READER);
1048 		in_crit = 1;
1049 		if (nbl_conflict(vp, NBL_READ, args->offset, args->count, 0,
1050 		    NULL)) {
1051 			error = EACCES;
1052 			goto out;
1053 		}
1054 	}
1055 
1056 	error = VOP_RWLOCK(vp, V_WRITELOCK_FALSE, &ct);
1057 
1058 	/* check if a monitor detected a delegation conflict */
1059 	if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
1060 		resp->status = NFS3ERR_JUKEBOX;
1061 		goto out1;
1062 	}
1063 
1064 	need_rwunlock = 1;
1065 
1066 	va.va_mask = AT_ALL;
1067 	error = VOP_GETATTR(vp, &va, 0, cr, &ct);
1068 
1069 	/*
1070 	 * If we can't get the attributes, then we can't do the
1071 	 * right access checking.  So, we'll fail the request.
1072 	 */
1073 	if (error)
1074 		goto out;
1075 
1076 #ifdef DEBUG
1077 	if (rfs3_do_post_op_attr)
1078 		vap = &va;
1079 #else
1080 	vap = &va;
1081 #endif
1082 
1083 	if (vp->v_type != VREG) {
1084 		resp->status = NFS3ERR_INVAL;
1085 		goto out1;
1086 	}
1087 
1088 	if (crgetuid(cr) != va.va_uid) {
1089 		error = VOP_ACCESS(vp, VREAD, 0, cr, &ct);
1090 		if (error) {
1091 			if (curthread->t_flag & T_WOULDBLOCK)
1092 				goto out;
1093 			error = VOP_ACCESS(vp, VEXEC, 0, cr, &ct);
1094 			if (error)
1095 				goto out;
1096 		}
1097 	}
1098 
1099 	if (MANDLOCK(vp, va.va_mode)) {
1100 		resp->status = NFS3ERR_ACCES;
1101 		goto out1;
1102 	}
1103 
1104 	offset = args->offset;
1105 	if (offset >= va.va_size) {
1106 		VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
1107 		if (in_crit)
1108 			nbl_end_crit(vp);
1109 		resp->status = NFS3_OK;
1110 		vattr_to_post_op_attr(vap, &resp->resok.file_attributes);
1111 		resp->resok.count = 0;
1112 		resp->resok.eof = TRUE;
1113 		resp->resok.data.data_len = 0;
1114 		resp->resok.data.data_val = NULL;
1115 		resp->resok.data.mp = NULL;
1116 		/* RDMA */
1117 		resp->resok.wlist = args->wlist;
1118 		resp->resok.wlist_len = resp->resok.count;
1119 		if (resp->resok.wlist)
1120 			clist_zero_len(resp->resok.wlist);
1121 		goto done;
1122 	}
1123 
1124 	if (args->count == 0) {
1125 		VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
1126 		if (in_crit)
1127 			nbl_end_crit(vp);
1128 		resp->status = NFS3_OK;
1129 		vattr_to_post_op_attr(vap, &resp->resok.file_attributes);
1130 		resp->resok.count = 0;
1131 		resp->resok.eof = FALSE;
1132 		resp->resok.data.data_len = 0;
1133 		resp->resok.data.data_val = NULL;
1134 		resp->resok.data.mp = NULL;
1135 		/* RDMA */
1136 		resp->resok.wlist = args->wlist;
1137 		resp->resok.wlist_len = resp->resok.count;
1138 		if (resp->resok.wlist)
1139 			clist_zero_len(resp->resok.wlist);
1140 		goto done;
1141 	}
1142 
1143 	/*
1144 	 * do not allocate memory more the max. allowed
1145 	 * transfer size
1146 	 */
1147 	if (args->count > rfs3_tsize(req))
1148 		args->count = rfs3_tsize(req);
1149 
1150 	if (loaned_buffers) {
1151 		uiop = (uio_t *)rfs_setup_xuio(vp);
1152 		ASSERT(uiop != NULL);
1153 		uiop->uio_segflg = UIO_SYSSPACE;
1154 		uiop->uio_loffset = args->offset;
1155 		uiop->uio_resid = args->count;
1156 
1157 		/* Jump to do the read if successful */
1158 		if (VOP_REQZCBUF(vp, UIO_READ, (xuio_t *)uiop, cr, &ct) == 0) {
1159 			/*
1160 			 * Need to hold the vnode until after VOP_RETZCBUF()
1161 			 * is called.
1162 			 */
1163 			VN_HOLD(vp);
1164 			goto doio_read;
1165 		}
1166 
1167 		DTRACE_PROBE2(nfss__i__reqzcbuf_failed, int,
1168 		    uiop->uio_loffset, int, uiop->uio_resid);
1169 
1170 		uiop->uio_extflg = 0;
1171 		/* failure to setup for zero copy */
1172 		rfs_free_xuio((void *)uiop);
1173 		loaned_buffers = 0;
1174 	}
1175 
1176 	/*
1177 	 * If returning data via RDMA Write, then grab the chunk list.
1178 	 * If we aren't returning READ data w/RDMA_WRITE, then grab
1179 	 * a mblk.
1180 	 */
1181 	if (rdma_used) {
1182 		mp = NULL;
1183 		(void) rdma_get_wchunk(req, &iov, args->wlist);
1184 	} else {
1185 		/*
1186 		 * mp will contain the data to be sent out in the read reply.
1187 		 * This will be freed after the reply has been sent out (by the
1188 		 * driver).
1189 		 * Let's roundup the data to a BYTES_PER_XDR_UNIT multiple, so
1190 		 * that the call to xdrmblk_putmblk() never fails.
1191 		 */
1192 		mp = allocb_wait(RNDUP(args->count), BPRI_MED, STR_NOSIG,
1193 		    &alloc_err);
1194 		ASSERT(mp != NULL);
1195 		ASSERT(alloc_err == 0);
1196 
1197 		iov.iov_base = (caddr_t)mp->b_datap->db_base;
1198 		iov.iov_len = args->count;
1199 	}
1200 
1201 	uio.uio_iov = &iov;
1202 	uio.uio_iovcnt = 1;
1203 	uio.uio_segflg = UIO_SYSSPACE;
1204 	uio.uio_extflg = UIO_COPY_CACHED;
1205 	uio.uio_loffset = args->offset;
1206 	uio.uio_resid = args->count;
1207 	uiop = &uio;
1208 
1209 doio_read:
1210 	error = VOP_READ(vp, uiop, 0, cr, &ct);
1211 
1212 	if (error) {
1213 		if (mp)
1214 			freemsg(mp);
1215 		/* check if a monitor detected a delegation conflict */
1216 		if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
1217 			resp->status = NFS3ERR_JUKEBOX;
1218 			goto out1;
1219 		}
1220 		goto out;
1221 	}
1222 
1223 	/* make mblk using zc buffers */
1224 	if (loaned_buffers) {
1225 		mp = uio_to_mblk(uiop);
1226 		ASSERT(mp != NULL);
1227 	}
1228 
1229 	va.va_mask = AT_ALL;
1230 	error = VOP_GETATTR(vp, &va, 0, cr, &ct);
1231 
1232 #ifdef DEBUG
1233 	if (rfs3_do_post_op_attr) {
1234 		if (error)
1235 			vap = NULL;
1236 		else
1237 			vap = &va;
1238 	} else
1239 		vap = NULL;
1240 #else
1241 	if (error)
1242 		vap = NULL;
1243 	else
1244 		vap = &va;
1245 #endif
1246 
1247 	VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
1248 
1249 	if (in_crit)
1250 		nbl_end_crit(vp);
1251 
1252 	resp->status = NFS3_OK;
1253 	vattr_to_post_op_attr(vap, &resp->resok.file_attributes);
1254 	resp->resok.count = args->count - uiop->uio_resid;
1255 	if (!error && offset + resp->resok.count == va.va_size)
1256 		resp->resok.eof = TRUE;
1257 	else
1258 		resp->resok.eof = FALSE;
1259 	resp->resok.data.data_len = resp->resok.count;
1260 
1261 	if (mp)
1262 		rfs_rndup_mblks(mp, resp->resok.count, loaned_buffers);
1263 
1264 	resp->resok.data.mp = mp;
1265 	resp->resok.size = (uint_t)args->count;
1266 
1267 	if (rdma_used) {
1268 		resp->resok.data.data_val = (caddr_t)iov.iov_base;
1269 		if (!rdma_setup_read_data3(args, &(resp->resok))) {
1270 			resp->status = NFS3ERR_INVAL;
1271 		}
1272 	} else {
1273 		resp->resok.data.data_val = (caddr_t)mp->b_datap->db_base;
1274 		(resp->resok).wlist = NULL;
1275 	}
1276 
1277 done:
1278 	DTRACE_NFSV3_4(op__read__done, struct svc_req *, req,
1279 	    cred_t *, cr, vnode_t *, vp, READ3res *, resp);
1280 
1281 	VN_RELE(vp);
1282 
1283 	return;
1284 
1285 out:
1286 	if (curthread->t_flag & T_WOULDBLOCK) {
1287 		curthread->t_flag &= ~T_WOULDBLOCK;
1288 		resp->status = NFS3ERR_JUKEBOX;
1289 	} else
1290 		resp->status = puterrno3(error);
1291 out1:
1292 	DTRACE_NFSV3_4(op__read__done, struct svc_req *, req,
1293 	    cred_t *, cr, vnode_t *, vp, READ3res *, resp);
1294 
1295 	if (vp != NULL) {
1296 		if (need_rwunlock)
1297 			VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
1298 		if (in_crit)
1299 			nbl_end_crit(vp);
1300 		VN_RELE(vp);
1301 	}
1302 	vattr_to_post_op_attr(vap, &resp->resfail.file_attributes);
1303 }
1304 
1305 void
1306 rfs3_read_free(READ3res *resp)
1307 {
1308 	mblk_t *mp;
1309 
1310 	if (resp->status == NFS3_OK) {
1311 		mp = resp->resok.data.mp;
1312 		if (mp != NULL)
1313 			freemsg(mp);
1314 	}
1315 }
1316 
1317 void *
1318 rfs3_read_getfh(READ3args *args)
1319 {
1320 
1321 	return (&args->file);
1322 }
1323 
1324 #define	MAX_IOVECS	12
1325 
1326 #ifdef DEBUG
1327 static int rfs3_write_hits = 0;
1328 static int rfs3_write_misses = 0;
1329 #endif
1330 
1331 void
1332 rfs3_write(WRITE3args *args, WRITE3res *resp, struct exportinfo *exi,
1333 	struct svc_req *req, cred_t *cr)
1334 {
1335 	int error;
1336 	vnode_t *vp;
1337 	struct vattr *bvap = NULL;
1338 	struct vattr bva;
1339 	struct vattr *avap = NULL;
1340 	struct vattr ava;
1341 	u_offset_t rlimit;
1342 	struct uio uio;
1343 	struct iovec iov[MAX_IOVECS];
1344 	mblk_t *m;
1345 	struct iovec *iovp;
1346 	int iovcnt;
1347 	int ioflag;
1348 	cred_t *savecred;
1349 	int in_crit = 0;
1350 	int rwlock_ret = -1;
1351 	caller_context_t ct;
1352 
1353 	vp = nfs3_fhtovp(&args->file, exi);
1354 
1355 	DTRACE_NFSV3_4(op__write__start, struct svc_req *, req,
1356 	    cred_t *, cr, vnode_t *, vp, WRITE3args *, args);
1357 
1358 	if (vp == NULL) {
1359 		error = ESTALE;
1360 		goto err;
1361 	}
1362 
1363 	if (is_system_labeled()) {
1364 		bslabel_t *clabel = req->rq_label;
1365 
1366 		ASSERT(clabel != NULL);
1367 		DTRACE_PROBE2(tx__rfs3__log__info__opwrite__clabel, char *,
1368 		    "got client label from request(1)", struct svc_req *, req);
1369 
1370 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
1371 			if (!do_rfs_label_check(clabel, vp, EQUALITY_CHECK,
1372 			    exi)) {
1373 				resp->status = NFS3ERR_ACCES;
1374 				goto err1;
1375 			}
1376 		}
1377 	}
1378 
1379 	ct.cc_sysid = 0;
1380 	ct.cc_pid = 0;
1381 	ct.cc_caller_id = nfs3_srv_caller_id;
1382 	ct.cc_flags = CC_DONTBLOCK;
1383 
1384 	/*
1385 	 * We have to enter the critical region before calling VOP_RWLOCK
1386 	 * to avoid a deadlock with ufs.
1387 	 */
1388 	if (nbl_need_check(vp)) {
1389 		nbl_start_crit(vp, RW_READER);
1390 		in_crit = 1;
1391 		if (nbl_conflict(vp, NBL_WRITE, args->offset, args->count, 0,
1392 		    NULL)) {
1393 			error = EACCES;
1394 			goto err;
1395 		}
1396 	}
1397 
1398 	rwlock_ret = VOP_RWLOCK(vp, V_WRITELOCK_TRUE, &ct);
1399 
1400 	/* check if a monitor detected a delegation conflict */
1401 	if (rwlock_ret == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
1402 		resp->status = NFS3ERR_JUKEBOX;
1403 		rwlock_ret = -1;
1404 		goto err1;
1405 	}
1406 
1407 
1408 	bva.va_mask = AT_ALL;
1409 	error = VOP_GETATTR(vp, &bva, 0, cr, &ct);
1410 
1411 	/*
1412 	 * If we can't get the attributes, then we can't do the
1413 	 * right access checking.  So, we'll fail the request.
1414 	 */
1415 	if (error)
1416 		goto err;
1417 
1418 	bvap = &bva;
1419 #ifdef DEBUG
1420 	if (!rfs3_do_pre_op_attr)
1421 		bvap = NULL;
1422 #endif
1423 	avap = bvap;
1424 
1425 	if (args->count != args->data.data_len) {
1426 		resp->status = NFS3ERR_INVAL;
1427 		goto err1;
1428 	}
1429 
1430 	if (rdonly(exi, req)) {
1431 		resp->status = NFS3ERR_ROFS;
1432 		goto err1;
1433 	}
1434 
1435 	if (vp->v_type != VREG) {
1436 		resp->status = NFS3ERR_INVAL;
1437 		goto err1;
1438 	}
1439 
1440 	if (crgetuid(cr) != bva.va_uid &&
1441 	    (error = VOP_ACCESS(vp, VWRITE, 0, cr, &ct)))
1442 		goto err;
1443 
1444 	if (MANDLOCK(vp, bva.va_mode)) {
1445 		resp->status = NFS3ERR_ACCES;
1446 		goto err1;
1447 	}
1448 
1449 	if (args->count == 0) {
1450 		resp->status = NFS3_OK;
1451 		vattr_to_wcc_data(bvap, avap, &resp->resok.file_wcc);
1452 		resp->resok.count = 0;
1453 		resp->resok.committed = args->stable;
1454 		resp->resok.verf = write3verf;
1455 		goto out;
1456 	}
1457 
1458 	if (args->mblk != NULL) {
1459 		iovcnt = 0;
1460 		for (m = args->mblk; m != NULL; m = m->b_cont)
1461 			iovcnt++;
1462 		if (iovcnt <= MAX_IOVECS) {
1463 #ifdef DEBUG
1464 			rfs3_write_hits++;
1465 #endif
1466 			iovp = iov;
1467 		} else {
1468 #ifdef DEBUG
1469 			rfs3_write_misses++;
1470 #endif
1471 			iovp = kmem_alloc(sizeof (*iovp) * iovcnt, KM_SLEEP);
1472 		}
1473 		mblk_to_iov(args->mblk, iovcnt, iovp);
1474 
1475 	} else if (args->rlist != NULL) {
1476 		iovcnt = 1;
1477 		iovp = iov;
1478 		iovp->iov_base = (char *)((args->rlist)->u.c_daddr3);
1479 		iovp->iov_len = args->count;
1480 	} else {
1481 		iovcnt = 1;
1482 		iovp = iov;
1483 		iovp->iov_base = args->data.data_val;
1484 		iovp->iov_len = args->count;
1485 	}
1486 
1487 	uio.uio_iov = iovp;
1488 	uio.uio_iovcnt = iovcnt;
1489 
1490 	uio.uio_segflg = UIO_SYSSPACE;
1491 	uio.uio_extflg = UIO_COPY_DEFAULT;
1492 	uio.uio_loffset = args->offset;
1493 	uio.uio_resid = args->count;
1494 	uio.uio_llimit = curproc->p_fsz_ctl;
1495 	rlimit = uio.uio_llimit - args->offset;
1496 	if (rlimit < (u_offset_t)uio.uio_resid)
1497 		uio.uio_resid = (int)rlimit;
1498 
1499 	if (args->stable == UNSTABLE)
1500 		ioflag = 0;
1501 	else if (args->stable == FILE_SYNC)
1502 		ioflag = FSYNC;
1503 	else if (args->stable == DATA_SYNC)
1504 		ioflag = FDSYNC;
1505 	else {
1506 		if (iovp != iov)
1507 			kmem_free(iovp, sizeof (*iovp) * iovcnt);
1508 		resp->status = NFS3ERR_INVAL;
1509 		goto err1;
1510 	}
1511 
1512 	/*
1513 	 * We're changing creds because VM may fault and we need
1514 	 * the cred of the current thread to be used if quota
1515 	 * checking is enabled.
1516 	 */
1517 	savecred = curthread->t_cred;
1518 	curthread->t_cred = cr;
1519 	error = VOP_WRITE(vp, &uio, ioflag, cr, &ct);
1520 	curthread->t_cred = savecred;
1521 
1522 	if (iovp != iov)
1523 		kmem_free(iovp, sizeof (*iovp) * iovcnt);
1524 
1525 	/* check if a monitor detected a delegation conflict */
1526 	if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
1527 		resp->status = NFS3ERR_JUKEBOX;
1528 		goto err1;
1529 	}
1530 
1531 	ava.va_mask = AT_ALL;
1532 	avap = VOP_GETATTR(vp, &ava, 0, cr, &ct) ? NULL : &ava;
1533 
1534 #ifdef DEBUG
1535 	if (!rfs3_do_post_op_attr)
1536 		avap = NULL;
1537 #endif
1538 
1539 	if (error)
1540 		goto err;
1541 
1542 	/*
1543 	 * If we were unable to get the V_WRITELOCK_TRUE, then we
1544 	 * may not have accurate after attrs, so check if
1545 	 * we have both attributes, they have a non-zero va_seq, and
1546 	 * va_seq has changed by exactly one,
1547 	 * if not, turn off the before attr.
1548 	 */
1549 	if (rwlock_ret != V_WRITELOCK_TRUE) {
1550 		if (bvap == NULL || avap == NULL ||
1551 		    bvap->va_seq == 0 || avap->va_seq == 0 ||
1552 		    avap->va_seq != (bvap->va_seq + 1)) {
1553 			bvap = NULL;
1554 		}
1555 	}
1556 
1557 	resp->status = NFS3_OK;
1558 	vattr_to_wcc_data(bvap, avap, &resp->resok.file_wcc);
1559 	resp->resok.count = args->count - uio.uio_resid;
1560 	resp->resok.committed = args->stable;
1561 	resp->resok.verf = write3verf;
1562 	goto out;
1563 
1564 err:
1565 	if (curthread->t_flag & T_WOULDBLOCK) {
1566 		curthread->t_flag &= ~T_WOULDBLOCK;
1567 		resp->status = NFS3ERR_JUKEBOX;
1568 	} else
1569 		resp->status = puterrno3(error);
1570 err1:
1571 	vattr_to_wcc_data(bvap, avap, &resp->resfail.file_wcc);
1572 out:
1573 	DTRACE_NFSV3_4(op__write__done, struct svc_req *, req,
1574 	    cred_t *, cr, vnode_t *, vp, WRITE3res *, resp);
1575 
1576 	if (vp != NULL) {
1577 		if (rwlock_ret != -1)
1578 			VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, &ct);
1579 		if (in_crit)
1580 			nbl_end_crit(vp);
1581 		VN_RELE(vp);
1582 	}
1583 }
1584 
1585 void *
1586 rfs3_write_getfh(WRITE3args *args)
1587 {
1588 
1589 	return (&args->file);
1590 }
1591 
1592 void
1593 rfs3_create(CREATE3args *args, CREATE3res *resp, struct exportinfo *exi,
1594 	struct svc_req *req, cred_t *cr)
1595 {
1596 	int error;
1597 	int in_crit = 0;
1598 	vnode_t *vp;
1599 	vnode_t *tvp = NULL;
1600 	vnode_t *dvp;
1601 	struct vattr *vap;
1602 	struct vattr va;
1603 	struct vattr *dbvap;
1604 	struct vattr dbva;
1605 	struct vattr *davap;
1606 	struct vattr dava;
1607 	enum vcexcl excl;
1608 	nfstime3 *mtime;
1609 	len_t reqsize;
1610 	bool_t trunc;
1611 	struct sockaddr *ca;
1612 	char *name = NULL;
1613 
1614 	dbvap = NULL;
1615 	davap = NULL;
1616 
1617 	dvp = nfs3_fhtovp(&args->where.dir, exi);
1618 
1619 	DTRACE_NFSV3_4(op__create__start, struct svc_req *, req,
1620 	    cred_t *, cr, vnode_t *, dvp, CREATE3args *, args);
1621 
1622 	if (dvp == NULL) {
1623 		error = ESTALE;
1624 		goto out;
1625 	}
1626 
1627 #ifdef DEBUG
1628 	if (rfs3_do_pre_op_attr) {
1629 		dbva.va_mask = AT_ALL;
1630 		dbvap = VOP_GETATTR(dvp, &dbva, 0, cr, NULL) ? NULL : &dbva;
1631 	} else
1632 		dbvap = NULL;
1633 #else
1634 	dbva.va_mask = AT_ALL;
1635 	dbvap = VOP_GETATTR(dvp, &dbva, 0, cr, NULL) ? NULL : &dbva;
1636 #endif
1637 	davap = dbvap;
1638 
1639 	if (args->where.name == nfs3nametoolong) {
1640 		resp->status = NFS3ERR_NAMETOOLONG;
1641 		goto out1;
1642 	}
1643 
1644 	if (args->where.name == NULL || *(args->where.name) == '\0') {
1645 		resp->status = NFS3ERR_ACCES;
1646 		goto out1;
1647 	}
1648 
1649 	if (rdonly(exi, req)) {
1650 		resp->status = NFS3ERR_ROFS;
1651 		goto out1;
1652 	}
1653 
1654 	if (is_system_labeled()) {
1655 		bslabel_t *clabel = req->rq_label;
1656 
1657 		ASSERT(clabel != NULL);
1658 		DTRACE_PROBE2(tx__rfs3__log__info__opcreate__clabel, char *,
1659 		    "got client label from request(1)", struct svc_req *, req);
1660 
1661 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
1662 			if (!do_rfs_label_check(clabel, dvp, EQUALITY_CHECK,
1663 			    exi)) {
1664 				resp->status = NFS3ERR_ACCES;
1665 				goto out1;
1666 			}
1667 		}
1668 	}
1669 
1670 	ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
1671 	name = nfscmd_convname(ca, exi, args->where.name,
1672 	    NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
1673 
1674 	if (name == NULL) {
1675 		/* This is really a Solaris EILSEQ */
1676 		resp->status = NFS3ERR_INVAL;
1677 		goto out1;
1678 	}
1679 
1680 	if (args->how.mode == EXCLUSIVE) {
1681 		va.va_mask = AT_TYPE | AT_MODE | AT_MTIME;
1682 		va.va_type = VREG;
1683 		va.va_mode = (mode_t)0;
1684 		/*
1685 		 * Ensure no time overflows and that types match
1686 		 */
1687 		mtime = (nfstime3 *)&args->how.createhow3_u.verf;
1688 		va.va_mtime.tv_sec = mtime->seconds % INT32_MAX;
1689 		va.va_mtime.tv_nsec = mtime->nseconds;
1690 		excl = EXCL;
1691 	} else {
1692 		error = sattr3_to_vattr(&args->how.createhow3_u.obj_attributes,
1693 		    &va);
1694 		if (error)
1695 			goto out;
1696 		va.va_mask |= AT_TYPE;
1697 		va.va_type = VREG;
1698 		if (args->how.mode == GUARDED)
1699 			excl = EXCL;
1700 		else {
1701 			excl = NONEXCL;
1702 
1703 			/*
1704 			 * During creation of file in non-exclusive mode
1705 			 * if size of file is being set then make sure
1706 			 * that if the file already exists that no conflicting
1707 			 * non-blocking mandatory locks exists in the region
1708 			 * being modified. If there are conflicting locks fail
1709 			 * the operation with EACCES.
1710 			 */
1711 			if (va.va_mask & AT_SIZE) {
1712 				struct vattr tva;
1713 
1714 				/*
1715 				 * Does file already exist?
1716 				 */
1717 				error = VOP_LOOKUP(dvp, name, &tvp,
1718 				    NULL, 0, NULL, cr, NULL, NULL, NULL);
1719 
1720 				/*
1721 				 * Check to see if the file has been delegated
1722 				 * to a v4 client.  If so, then begin recall of
1723 				 * the delegation and return JUKEBOX to allow
1724 				 * the client to retrasmit its request.
1725 				 */
1726 
1727 				trunc = va.va_size == 0;
1728 				if (!error &&
1729 				    rfs4_check_delegated(FWRITE, tvp, trunc)) {
1730 					resp->status = NFS3ERR_JUKEBOX;
1731 					goto out1;
1732 				}
1733 
1734 				/*
1735 				 * Check for NBMAND lock conflicts
1736 				 */
1737 				if (!error && nbl_need_check(tvp)) {
1738 					u_offset_t offset;
1739 					ssize_t len;
1740 
1741 					nbl_start_crit(tvp, RW_READER);
1742 					in_crit = 1;
1743 
1744 					tva.va_mask = AT_SIZE;
1745 					error = VOP_GETATTR(tvp, &tva, 0, cr,
1746 					    NULL);
1747 					/*
1748 					 * Can't check for conflicts, so return
1749 					 * error.
1750 					 */
1751 					if (error)
1752 						goto out;
1753 
1754 					offset = tva.va_size < va.va_size ?
1755 					    tva.va_size : va.va_size;
1756 					len = tva.va_size < va.va_size ?
1757 					    va.va_size - tva.va_size :
1758 					    tva.va_size - va.va_size;
1759 					if (nbl_conflict(tvp, NBL_WRITE,
1760 					    offset, len, 0, NULL)) {
1761 						error = EACCES;
1762 						goto out;
1763 					}
1764 				} else if (tvp) {
1765 					VN_RELE(tvp);
1766 					tvp = NULL;
1767 				}
1768 			}
1769 		}
1770 		if (va.va_mask & AT_SIZE)
1771 			reqsize = va.va_size;
1772 	}
1773 
1774 	/*
1775 	 * Must specify the mode.
1776 	 */
1777 	if (!(va.va_mask & AT_MODE)) {
1778 		resp->status = NFS3ERR_INVAL;
1779 		goto out1;
1780 	}
1781 
1782 	/*
1783 	 * If the filesystem is exported with nosuid, then mask off
1784 	 * the setuid and setgid bits.
1785 	 */
1786 	if (va.va_type == VREG && (exi->exi_export.ex_flags & EX_NOSUID))
1787 		va.va_mode &= ~(VSUID | VSGID);
1788 
1789 tryagain:
1790 	/*
1791 	 * The file open mode used is VWRITE.  If the client needs
1792 	 * some other semantic, then it should do the access checking
1793 	 * itself.  It would have been nice to have the file open mode
1794 	 * passed as part of the arguments.
1795 	 */
1796 	error = VOP_CREATE(dvp, name, &va, excl, VWRITE,
1797 	    &vp, cr, 0, NULL, NULL);
1798 
1799 #ifdef DEBUG
1800 	if (rfs3_do_post_op_attr) {
1801 		dava.va_mask = AT_ALL;
1802 		davap = VOP_GETATTR(dvp, &dava, 0, cr, NULL) ? NULL : &dava;
1803 	} else
1804 		davap = NULL;
1805 #else
1806 	dava.va_mask = AT_ALL;
1807 	davap = VOP_GETATTR(dvp, &dava, 0, cr, NULL) ? NULL : &dava;
1808 #endif
1809 
1810 	if (error) {
1811 		/*
1812 		 * If we got something other than file already exists
1813 		 * then just return this error.  Otherwise, we got
1814 		 * EEXIST.  If we were doing a GUARDED create, then
1815 		 * just return this error.  Otherwise, we need to
1816 		 * make sure that this wasn't a duplicate of an
1817 		 * exclusive create request.
1818 		 *
1819 		 * The assumption is made that a non-exclusive create
1820 		 * request will never return EEXIST.
1821 		 */
1822 		if (error != EEXIST || args->how.mode == GUARDED)
1823 			goto out;
1824 		/*
1825 		 * Lookup the file so that we can get a vnode for it.
1826 		 */
1827 		error = VOP_LOOKUP(dvp, name, &vp, NULL, 0,
1828 		    NULL, cr, NULL, NULL, NULL);
1829 		if (error) {
1830 			/*
1831 			 * We couldn't find the file that we thought that
1832 			 * we just created.  So, we'll just try creating
1833 			 * it again.
1834 			 */
1835 			if (error == ENOENT)
1836 				goto tryagain;
1837 			goto out;
1838 		}
1839 
1840 		/*
1841 		 * If the file is delegated to a v4 client, go ahead
1842 		 * and initiate recall, this create is a hint that a
1843 		 * conflicting v3 open has occurred.
1844 		 */
1845 
1846 		if (rfs4_check_delegated(FWRITE, vp, FALSE)) {
1847 			VN_RELE(vp);
1848 			resp->status = NFS3ERR_JUKEBOX;
1849 			goto out1;
1850 		}
1851 
1852 		va.va_mask = AT_ALL;
1853 		vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
1854 
1855 		mtime = (nfstime3 *)&args->how.createhow3_u.verf;
1856 		/* % with INT32_MAX to prevent overflows */
1857 		if (args->how.mode == EXCLUSIVE && (vap == NULL ||
1858 		    vap->va_mtime.tv_sec !=
1859 		    (mtime->seconds % INT32_MAX) ||
1860 		    vap->va_mtime.tv_nsec != mtime->nseconds)) {
1861 			VN_RELE(vp);
1862 			error = EEXIST;
1863 			goto out;
1864 		}
1865 	} else {
1866 
1867 		if ((args->how.mode == UNCHECKED ||
1868 		    args->how.mode == GUARDED) &&
1869 		    args->how.createhow3_u.obj_attributes.size.set_it &&
1870 		    va.va_size == 0)
1871 			trunc = TRUE;
1872 		else
1873 			trunc = FALSE;
1874 
1875 		if (rfs4_check_delegated(FWRITE, vp, trunc)) {
1876 			VN_RELE(vp);
1877 			resp->status = NFS3ERR_JUKEBOX;
1878 			goto out1;
1879 		}
1880 
1881 		va.va_mask = AT_ALL;
1882 		vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
1883 
1884 		/*
1885 		 * We need to check to make sure that the file got
1886 		 * created to the indicated size.  If not, we do a
1887 		 * setattr to try to change the size, but we don't
1888 		 * try too hard.  This shouldn't a problem as most
1889 		 * clients will only specifiy a size of zero which
1890 		 * local file systems handle.  However, even if
1891 		 * the client does specify a non-zero size, it can
1892 		 * still recover by checking the size of the file
1893 		 * after it has created it and then issue a setattr
1894 		 * request of its own to set the size of the file.
1895 		 */
1896 		if (vap != NULL &&
1897 		    (args->how.mode == UNCHECKED ||
1898 		    args->how.mode == GUARDED) &&
1899 		    args->how.createhow3_u.obj_attributes.size.set_it &&
1900 		    vap->va_size != reqsize) {
1901 			va.va_mask = AT_SIZE;
1902 			va.va_size = reqsize;
1903 			(void) VOP_SETATTR(vp, &va, 0, cr, NULL);
1904 			va.va_mask = AT_ALL;
1905 			vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
1906 		}
1907 	}
1908 
1909 	if (name != args->where.name)
1910 		kmem_free(name, MAXPATHLEN + 1);
1911 
1912 #ifdef DEBUG
1913 	if (!rfs3_do_post_op_attr)
1914 		vap = NULL;
1915 #endif
1916 
1917 #ifdef DEBUG
1918 	if (!rfs3_do_post_op_fh3)
1919 		resp->resok.obj.handle_follows = FALSE;
1920 	else {
1921 #endif
1922 	error = makefh3(&resp->resok.obj.handle, vp, exi);
1923 	if (error)
1924 		resp->resok.obj.handle_follows = FALSE;
1925 	else
1926 		resp->resok.obj.handle_follows = TRUE;
1927 #ifdef DEBUG
1928 	}
1929 #endif
1930 
1931 	/*
1932 	 * Force modified data and metadata out to stable storage.
1933 	 */
1934 	(void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
1935 	(void) VOP_FSYNC(dvp, 0, cr, NULL);
1936 
1937 	VN_RELE(vp);
1938 	if (tvp != NULL) {
1939 		if (in_crit)
1940 			nbl_end_crit(tvp);
1941 		VN_RELE(tvp);
1942 	}
1943 
1944 	resp->status = NFS3_OK;
1945 	vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
1946 	vattr_to_wcc_data(dbvap, davap, &resp->resok.dir_wcc);
1947 
1948 	DTRACE_NFSV3_4(op__create__done, struct svc_req *, req,
1949 	    cred_t *, cr, vnode_t *, dvp, CREATE3res *, resp);
1950 
1951 	VN_RELE(dvp);
1952 	return;
1953 
1954 out:
1955 	if (curthread->t_flag & T_WOULDBLOCK) {
1956 		curthread->t_flag &= ~T_WOULDBLOCK;
1957 		resp->status = NFS3ERR_JUKEBOX;
1958 	} else
1959 		resp->status = puterrno3(error);
1960 out1:
1961 	DTRACE_NFSV3_4(op__create__done, struct svc_req *, req,
1962 	    cred_t *, cr, vnode_t *, dvp, CREATE3res *, resp);
1963 
1964 	if (name != NULL && name != args->where.name)
1965 		kmem_free(name, MAXPATHLEN + 1);
1966 
1967 	if (tvp != NULL) {
1968 		if (in_crit)
1969 			nbl_end_crit(tvp);
1970 		VN_RELE(tvp);
1971 	}
1972 	if (dvp != NULL)
1973 		VN_RELE(dvp);
1974 	vattr_to_wcc_data(dbvap, davap, &resp->resfail.dir_wcc);
1975 }
1976 
1977 void *
1978 rfs3_create_getfh(CREATE3args *args)
1979 {
1980 
1981 	return (&args->where.dir);
1982 }
1983 
1984 void
1985 rfs3_mkdir(MKDIR3args *args, MKDIR3res *resp, struct exportinfo *exi,
1986 	struct svc_req *req, cred_t *cr)
1987 {
1988 	int error;
1989 	vnode_t *vp = NULL;
1990 	vnode_t *dvp;
1991 	struct vattr *vap;
1992 	struct vattr va;
1993 	struct vattr *dbvap;
1994 	struct vattr dbva;
1995 	struct vattr *davap;
1996 	struct vattr dava;
1997 	struct sockaddr *ca;
1998 	char *name = NULL;
1999 
2000 	dbvap = NULL;
2001 	davap = NULL;
2002 
2003 	dvp = nfs3_fhtovp(&args->where.dir, exi);
2004 
2005 	DTRACE_NFSV3_4(op__mkdir__start, struct svc_req *, req,
2006 	    cred_t *, cr, vnode_t *, dvp, MKDIR3args *, args);
2007 
2008 	if (dvp == NULL) {
2009 		error = ESTALE;
2010 		goto out;
2011 	}
2012 
2013 #ifdef DEBUG
2014 	if (rfs3_do_pre_op_attr) {
2015 		dbva.va_mask = AT_ALL;
2016 		dbvap = VOP_GETATTR(dvp, &dbva, 0, cr, NULL) ? NULL : &dbva;
2017 	} else
2018 		dbvap = NULL;
2019 #else
2020 	dbva.va_mask = AT_ALL;
2021 	dbvap = VOP_GETATTR(dvp, &dbva, 0, cr, NULL) ? NULL : &dbva;
2022 #endif
2023 	davap = dbvap;
2024 
2025 	if (args->where.name == nfs3nametoolong) {
2026 		resp->status = NFS3ERR_NAMETOOLONG;
2027 		goto out1;
2028 	}
2029 
2030 	if (args->where.name == NULL || *(args->where.name) == '\0') {
2031 		resp->status = NFS3ERR_ACCES;
2032 		goto out1;
2033 	}
2034 
2035 	if (rdonly(exi, req)) {
2036 		resp->status = NFS3ERR_ROFS;
2037 		goto out1;
2038 	}
2039 
2040 	if (is_system_labeled()) {
2041 		bslabel_t *clabel = req->rq_label;
2042 
2043 		ASSERT(clabel != NULL);
2044 		DTRACE_PROBE2(tx__rfs3__log__info__opmkdir__clabel, char *,
2045 		    "got client label from request(1)", struct svc_req *, req);
2046 
2047 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
2048 			if (!do_rfs_label_check(clabel, dvp, EQUALITY_CHECK,
2049 			    exi)) {
2050 				resp->status = NFS3ERR_ACCES;
2051 				goto out1;
2052 			}
2053 		}
2054 	}
2055 
2056 	error = sattr3_to_vattr(&args->attributes, &va);
2057 	if (error)
2058 		goto out;
2059 
2060 	if (!(va.va_mask & AT_MODE)) {
2061 		resp->status = NFS3ERR_INVAL;
2062 		goto out1;
2063 	}
2064 
2065 	ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2066 	name = nfscmd_convname(ca, exi, args->where.name,
2067 	    NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2068 
2069 	if (name == NULL) {
2070 		resp->status = NFS3ERR_INVAL;
2071 		goto out1;
2072 	}
2073 
2074 	va.va_mask |= AT_TYPE;
2075 	va.va_type = VDIR;
2076 
2077 	error = VOP_MKDIR(dvp, name, &va, &vp, cr, NULL, 0, NULL);
2078 
2079 	if (name != args->where.name)
2080 		kmem_free(name, MAXPATHLEN + 1);
2081 
2082 #ifdef DEBUG
2083 	if (rfs3_do_post_op_attr) {
2084 		dava.va_mask = AT_ALL;
2085 		davap = VOP_GETATTR(dvp, &dava, 0, cr, NULL) ? NULL : &dava;
2086 	} else
2087 		davap = NULL;
2088 #else
2089 	dava.va_mask = AT_ALL;
2090 	davap = VOP_GETATTR(dvp, &dava, 0, cr, NULL) ? NULL : &dava;
2091 #endif
2092 
2093 	/*
2094 	 * Force modified data and metadata out to stable storage.
2095 	 */
2096 	(void) VOP_FSYNC(dvp, 0, cr, NULL);
2097 
2098 	if (error)
2099 		goto out;
2100 
2101 #ifdef DEBUG
2102 	if (!rfs3_do_post_op_fh3)
2103 		resp->resok.obj.handle_follows = FALSE;
2104 	else {
2105 #endif
2106 	error = makefh3(&resp->resok.obj.handle, vp, exi);
2107 	if (error)
2108 		resp->resok.obj.handle_follows = FALSE;
2109 	else
2110 		resp->resok.obj.handle_follows = TRUE;
2111 #ifdef DEBUG
2112 	}
2113 #endif
2114 
2115 #ifdef DEBUG
2116 	if (rfs3_do_post_op_attr) {
2117 		va.va_mask = AT_ALL;
2118 		vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
2119 	} else
2120 		vap = NULL;
2121 #else
2122 	va.va_mask = AT_ALL;
2123 	vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
2124 #endif
2125 
2126 	/*
2127 	 * Force modified data and metadata out to stable storage.
2128 	 */
2129 	(void) VOP_FSYNC(vp, 0, cr, NULL);
2130 
2131 	VN_RELE(vp);
2132 
2133 	resp->status = NFS3_OK;
2134 	vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
2135 	vattr_to_wcc_data(dbvap, davap, &resp->resok.dir_wcc);
2136 
2137 	DTRACE_NFSV3_4(op__mkdir__done, struct svc_req *, req,
2138 	    cred_t *, cr, vnode_t *, dvp, MKDIR3res *, resp);
2139 	VN_RELE(dvp);
2140 
2141 	return;
2142 
2143 out:
2144 	if (curthread->t_flag & T_WOULDBLOCK) {
2145 		curthread->t_flag &= ~T_WOULDBLOCK;
2146 		resp->status = NFS3ERR_JUKEBOX;
2147 	} else
2148 		resp->status = puterrno3(error);
2149 out1:
2150 	DTRACE_NFSV3_4(op__mkdir__done, struct svc_req *, req,
2151 	    cred_t *, cr, vnode_t *, dvp, MKDIR3res *, resp);
2152 	if (dvp != NULL)
2153 		VN_RELE(dvp);
2154 	vattr_to_wcc_data(dbvap, davap, &resp->resfail.dir_wcc);
2155 }
2156 
2157 void *
2158 rfs3_mkdir_getfh(MKDIR3args *args)
2159 {
2160 
2161 	return (&args->where.dir);
2162 }
2163 
2164 void
2165 rfs3_symlink(SYMLINK3args *args, SYMLINK3res *resp, struct exportinfo *exi,
2166 	struct svc_req *req, cred_t *cr)
2167 {
2168 	int error;
2169 	vnode_t *vp;
2170 	vnode_t *dvp;
2171 	struct vattr *vap;
2172 	struct vattr va;
2173 	struct vattr *dbvap;
2174 	struct vattr dbva;
2175 	struct vattr *davap;
2176 	struct vattr dava;
2177 	struct sockaddr *ca;
2178 	char *name = NULL;
2179 	char *symdata = NULL;
2180 
2181 	dbvap = NULL;
2182 	davap = NULL;
2183 
2184 	dvp = nfs3_fhtovp(&args->where.dir, exi);
2185 
2186 	DTRACE_NFSV3_4(op__symlink__start, struct svc_req *, req,
2187 	    cred_t *, cr, vnode_t *, dvp, SYMLINK3args *, args);
2188 
2189 	if (dvp == NULL) {
2190 		error = ESTALE;
2191 		goto err;
2192 	}
2193 
2194 #ifdef DEBUG
2195 	if (rfs3_do_pre_op_attr) {
2196 		dbva.va_mask = AT_ALL;
2197 		dbvap = VOP_GETATTR(dvp, &dbva, 0, cr, NULL) ? NULL : &dbva;
2198 	} else
2199 		dbvap = NULL;
2200 #else
2201 	dbva.va_mask = AT_ALL;
2202 	dbvap = VOP_GETATTR(dvp, &dbva, 0, cr, NULL) ? NULL : &dbva;
2203 #endif
2204 	davap = dbvap;
2205 
2206 	if (args->where.name == nfs3nametoolong) {
2207 		resp->status = NFS3ERR_NAMETOOLONG;
2208 		goto err1;
2209 	}
2210 
2211 	if (args->where.name == NULL || *(args->where.name) == '\0') {
2212 		resp->status = NFS3ERR_ACCES;
2213 		goto err1;
2214 	}
2215 
2216 	if (rdonly(exi, req)) {
2217 		resp->status = NFS3ERR_ROFS;
2218 		goto err1;
2219 	}
2220 
2221 	if (is_system_labeled()) {
2222 		bslabel_t *clabel = req->rq_label;
2223 
2224 		ASSERT(clabel != NULL);
2225 		DTRACE_PROBE2(tx__rfs3__log__info__opsymlink__clabel, char *,
2226 		    "got client label from request(1)", struct svc_req *, req);
2227 
2228 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
2229 			if (!do_rfs_label_check(clabel, dvp, EQUALITY_CHECK,
2230 			    exi)) {
2231 				resp->status = NFS3ERR_ACCES;
2232 				goto err1;
2233 			}
2234 		}
2235 	}
2236 
2237 	error = sattr3_to_vattr(&args->symlink.symlink_attributes, &va);
2238 	if (error)
2239 		goto err;
2240 
2241 	if (!(va.va_mask & AT_MODE)) {
2242 		resp->status = NFS3ERR_INVAL;
2243 		goto err1;
2244 	}
2245 
2246 	if (args->symlink.symlink_data == nfs3nametoolong) {
2247 		resp->status = NFS3ERR_NAMETOOLONG;
2248 		goto err1;
2249 	}
2250 
2251 	ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2252 	name = nfscmd_convname(ca, exi, args->where.name,
2253 	    NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2254 
2255 	if (name == NULL) {
2256 		/* This is really a Solaris EILSEQ */
2257 		resp->status = NFS3ERR_INVAL;
2258 		goto err1;
2259 	}
2260 
2261 	symdata = nfscmd_convname(ca, exi, args->symlink.symlink_data,
2262 	    NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2263 	if (symdata == NULL) {
2264 		/* This is really a Solaris EILSEQ */
2265 		resp->status = NFS3ERR_INVAL;
2266 		goto err1;
2267 	}
2268 
2269 
2270 	va.va_mask |= AT_TYPE;
2271 	va.va_type = VLNK;
2272 
2273 	error = VOP_SYMLINK(dvp, name, &va, symdata, cr, NULL, 0);
2274 
2275 #ifdef DEBUG
2276 	if (rfs3_do_post_op_attr) {
2277 		dava.va_mask = AT_ALL;
2278 		davap = VOP_GETATTR(dvp, &dava, 0, cr, NULL) ? NULL : &dava;
2279 	} else
2280 		davap = NULL;
2281 #else
2282 	dava.va_mask = AT_ALL;
2283 	davap = VOP_GETATTR(dvp, &dava, 0, cr, NULL) ? NULL : &dava;
2284 #endif
2285 
2286 	if (error)
2287 		goto err;
2288 
2289 	error = VOP_LOOKUP(dvp, name, &vp, NULL, 0, NULL, cr,
2290 	    NULL, NULL, NULL);
2291 
2292 	/*
2293 	 * Force modified data and metadata out to stable storage.
2294 	 */
2295 	(void) VOP_FSYNC(dvp, 0, cr, NULL);
2296 
2297 
2298 	resp->status = NFS3_OK;
2299 	if (error) {
2300 		resp->resok.obj.handle_follows = FALSE;
2301 		vattr_to_post_op_attr(NULL, &resp->resok.obj_attributes);
2302 		vattr_to_wcc_data(dbvap, davap, &resp->resok.dir_wcc);
2303 		goto out;
2304 	}
2305 
2306 #ifdef DEBUG
2307 	if (!rfs3_do_post_op_fh3)
2308 		resp->resok.obj.handle_follows = FALSE;
2309 	else {
2310 #endif
2311 	error = makefh3(&resp->resok.obj.handle, vp, exi);
2312 	if (error)
2313 		resp->resok.obj.handle_follows = FALSE;
2314 	else
2315 		resp->resok.obj.handle_follows = TRUE;
2316 #ifdef DEBUG
2317 	}
2318 #endif
2319 
2320 #ifdef DEBUG
2321 	if (rfs3_do_post_op_attr) {
2322 		va.va_mask = AT_ALL;
2323 		vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
2324 	} else
2325 		vap = NULL;
2326 #else
2327 	va.va_mask = AT_ALL;
2328 	vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
2329 #endif
2330 
2331 	/*
2332 	 * Force modified data and metadata out to stable storage.
2333 	 */
2334 	(void) VOP_FSYNC(vp, 0, cr, NULL);
2335 
2336 	VN_RELE(vp);
2337 
2338 	vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
2339 	vattr_to_wcc_data(dbvap, davap, &resp->resok.dir_wcc);
2340 	goto out;
2341 
2342 err:
2343 	if (curthread->t_flag & T_WOULDBLOCK) {
2344 		curthread->t_flag &= ~T_WOULDBLOCK;
2345 		resp->status = NFS3ERR_JUKEBOX;
2346 	} else
2347 		resp->status = puterrno3(error);
2348 err1:
2349 	vattr_to_wcc_data(dbvap, davap, &resp->resfail.dir_wcc);
2350 out:
2351 	if (name != NULL && name != args->where.name)
2352 		kmem_free(name, MAXPATHLEN + 1);
2353 	if (symdata != NULL && symdata != args->symlink.symlink_data)
2354 		kmem_free(symdata, MAXPATHLEN + 1);
2355 
2356 	DTRACE_NFSV3_4(op__symlink__done, struct svc_req *, req,
2357 	    cred_t *, cr, vnode_t *, dvp, SYMLINK3res *, resp);
2358 
2359 	if (dvp != NULL)
2360 		VN_RELE(dvp);
2361 }
2362 
2363 void *
2364 rfs3_symlink_getfh(SYMLINK3args *args)
2365 {
2366 
2367 	return (&args->where.dir);
2368 }
2369 
2370 void
2371 rfs3_mknod(MKNOD3args *args, MKNOD3res *resp, struct exportinfo *exi,
2372 	struct svc_req *req, cred_t *cr)
2373 {
2374 	int error;
2375 	vnode_t *vp;
2376 	vnode_t *realvp;
2377 	vnode_t *dvp;
2378 	struct vattr *vap;
2379 	struct vattr va;
2380 	struct vattr *dbvap;
2381 	struct vattr dbva;
2382 	struct vattr *davap;
2383 	struct vattr dava;
2384 	int mode;
2385 	enum vcexcl excl;
2386 	struct sockaddr *ca;
2387 	char *name = NULL;
2388 
2389 	dbvap = NULL;
2390 	davap = NULL;
2391 
2392 	dvp = nfs3_fhtovp(&args->where.dir, exi);
2393 
2394 	DTRACE_NFSV3_4(op__mknod__start, struct svc_req *, req,
2395 	    cred_t *, cr, vnode_t *, dvp, MKNOD3args *, args);
2396 
2397 	if (dvp == NULL) {
2398 		error = ESTALE;
2399 		goto out;
2400 	}
2401 
2402 #ifdef DEBUG
2403 	if (rfs3_do_pre_op_attr) {
2404 		dbva.va_mask = AT_ALL;
2405 		dbvap = VOP_GETATTR(dvp, &dbva, 0, cr, NULL) ? NULL : &dbva;
2406 	} else
2407 		dbvap = NULL;
2408 #else
2409 	dbva.va_mask = AT_ALL;
2410 	dbvap = VOP_GETATTR(dvp, &dbva, 0, cr, NULL) ? NULL : &dbva;
2411 #endif
2412 	davap = dbvap;
2413 
2414 	if (args->where.name == nfs3nametoolong) {
2415 		resp->status = NFS3ERR_NAMETOOLONG;
2416 		goto out1;
2417 	}
2418 
2419 	if (args->where.name == NULL || *(args->where.name) == '\0') {
2420 		resp->status = NFS3ERR_ACCES;
2421 		goto out1;
2422 	}
2423 
2424 	if (rdonly(exi, req)) {
2425 		resp->status = NFS3ERR_ROFS;
2426 		goto out1;
2427 	}
2428 
2429 	if (is_system_labeled()) {
2430 		bslabel_t *clabel = req->rq_label;
2431 
2432 		ASSERT(clabel != NULL);
2433 		DTRACE_PROBE2(tx__rfs3__log__info__opmknod__clabel, char *,
2434 		    "got client label from request(1)", struct svc_req *, req);
2435 
2436 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
2437 			if (!do_rfs_label_check(clabel, dvp, EQUALITY_CHECK,
2438 			    exi)) {
2439 				resp->status = NFS3ERR_ACCES;
2440 				goto out1;
2441 			}
2442 		}
2443 	}
2444 
2445 	switch (args->what.type) {
2446 	case NF3CHR:
2447 	case NF3BLK:
2448 		error = sattr3_to_vattr(
2449 		    &args->what.mknoddata3_u.device.dev_attributes, &va);
2450 		if (error)
2451 			goto out;
2452 		if (secpolicy_sys_devices(cr) != 0) {
2453 			resp->status = NFS3ERR_PERM;
2454 			goto out1;
2455 		}
2456 		if (args->what.type == NF3CHR)
2457 			va.va_type = VCHR;
2458 		else
2459 			va.va_type = VBLK;
2460 		va.va_rdev = makedevice(
2461 		    args->what.mknoddata3_u.device.spec.specdata1,
2462 		    args->what.mknoddata3_u.device.spec.specdata2);
2463 		va.va_mask |= AT_TYPE | AT_RDEV;
2464 		break;
2465 	case NF3SOCK:
2466 		error = sattr3_to_vattr(
2467 		    &args->what.mknoddata3_u.pipe_attributes, &va);
2468 		if (error)
2469 			goto out;
2470 		va.va_type = VSOCK;
2471 		va.va_mask |= AT_TYPE;
2472 		break;
2473 	case NF3FIFO:
2474 		error = sattr3_to_vattr(
2475 		    &args->what.mknoddata3_u.pipe_attributes, &va);
2476 		if (error)
2477 			goto out;
2478 		va.va_type = VFIFO;
2479 		va.va_mask |= AT_TYPE;
2480 		break;
2481 	default:
2482 		resp->status = NFS3ERR_BADTYPE;
2483 		goto out1;
2484 	}
2485 
2486 	/*
2487 	 * Must specify the mode.
2488 	 */
2489 	if (!(va.va_mask & AT_MODE)) {
2490 		resp->status = NFS3ERR_INVAL;
2491 		goto out1;
2492 	}
2493 
2494 	ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2495 	name = nfscmd_convname(ca, exi, args->where.name,
2496 	    NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2497 
2498 	if (name == NULL) {
2499 		resp->status = NFS3ERR_INVAL;
2500 		goto out1;
2501 	}
2502 
2503 	excl = EXCL;
2504 
2505 	mode = 0;
2506 
2507 	error = VOP_CREATE(dvp, name, &va, excl, mode,
2508 	    &vp, cr, 0, NULL, NULL);
2509 
2510 	if (name != args->where.name)
2511 		kmem_free(name, MAXPATHLEN + 1);
2512 
2513 #ifdef DEBUG
2514 	if (rfs3_do_post_op_attr) {
2515 		dava.va_mask = AT_ALL;
2516 		davap = VOP_GETATTR(dvp, &dava, 0, cr, NULL) ? NULL : &dava;
2517 	} else
2518 		davap = NULL;
2519 #else
2520 	dava.va_mask = AT_ALL;
2521 	davap = VOP_GETATTR(dvp, &dava, 0, cr, NULL) ? NULL : &dava;
2522 #endif
2523 
2524 	/*
2525 	 * Force modified data and metadata out to stable storage.
2526 	 */
2527 	(void) VOP_FSYNC(dvp, 0, cr, NULL);
2528 
2529 	if (error)
2530 		goto out;
2531 
2532 	resp->status = NFS3_OK;
2533 
2534 #ifdef DEBUG
2535 	if (!rfs3_do_post_op_fh3)
2536 		resp->resok.obj.handle_follows = FALSE;
2537 	else {
2538 #endif
2539 	error = makefh3(&resp->resok.obj.handle, vp, exi);
2540 	if (error)
2541 		resp->resok.obj.handle_follows = FALSE;
2542 	else
2543 		resp->resok.obj.handle_follows = TRUE;
2544 #ifdef DEBUG
2545 	}
2546 #endif
2547 
2548 #ifdef DEBUG
2549 	if (rfs3_do_post_op_attr) {
2550 		va.va_mask = AT_ALL;
2551 		vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
2552 	} else
2553 		vap = NULL;
2554 #else
2555 	va.va_mask = AT_ALL;
2556 	vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
2557 #endif
2558 
2559 	/*
2560 	 * Force modified metadata out to stable storage.
2561 	 *
2562 	 * if a underlying vp exists, pass it to VOP_FSYNC
2563 	 */
2564 	if (VOP_REALVP(vp, &realvp, NULL) == 0)
2565 		(void) VOP_FSYNC(realvp, FNODSYNC, cr, NULL);
2566 	else
2567 		(void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
2568 
2569 	VN_RELE(vp);
2570 
2571 	vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
2572 	vattr_to_wcc_data(dbvap, davap, &resp->resok.dir_wcc);
2573 	DTRACE_NFSV3_4(op__mknod__done, struct svc_req *, req,
2574 	    cred_t *, cr, vnode_t *, dvp, MKNOD3res *, resp);
2575 	VN_RELE(dvp);
2576 	return;
2577 
2578 out:
2579 	if (curthread->t_flag & T_WOULDBLOCK) {
2580 		curthread->t_flag &= ~T_WOULDBLOCK;
2581 		resp->status = NFS3ERR_JUKEBOX;
2582 	} else
2583 		resp->status = puterrno3(error);
2584 out1:
2585 	DTRACE_NFSV3_4(op__mknod__done, struct svc_req *, req,
2586 	    cred_t *, cr, vnode_t *, dvp, MKNOD3res *, resp);
2587 	if (dvp != NULL)
2588 		VN_RELE(dvp);
2589 	vattr_to_wcc_data(dbvap, davap, &resp->resfail.dir_wcc);
2590 }
2591 
2592 void *
2593 rfs3_mknod_getfh(MKNOD3args *args)
2594 {
2595 
2596 	return (&args->where.dir);
2597 }
2598 
2599 void
2600 rfs3_remove(REMOVE3args *args, REMOVE3res *resp, struct exportinfo *exi,
2601 	struct svc_req *req, cred_t *cr)
2602 {
2603 	int error = 0;
2604 	vnode_t *vp;
2605 	struct vattr *bvap;
2606 	struct vattr bva;
2607 	struct vattr *avap;
2608 	struct vattr ava;
2609 	vnode_t *targvp = NULL;
2610 	struct sockaddr *ca;
2611 	char *name = NULL;
2612 
2613 	bvap = NULL;
2614 	avap = NULL;
2615 
2616 	vp = nfs3_fhtovp(&args->object.dir, exi);
2617 
2618 	DTRACE_NFSV3_4(op__remove__start, struct svc_req *, req,
2619 	    cred_t *, cr, vnode_t *, vp, REMOVE3args *, args);
2620 
2621 	if (vp == NULL) {
2622 		error = ESTALE;
2623 		goto err;
2624 	}
2625 
2626 #ifdef DEBUG
2627 	if (rfs3_do_pre_op_attr) {
2628 		bva.va_mask = AT_ALL;
2629 		bvap = VOP_GETATTR(vp, &bva, 0, cr, NULL) ? NULL : &bva;
2630 	} else
2631 		bvap = NULL;
2632 #else
2633 	bva.va_mask = AT_ALL;
2634 	bvap = VOP_GETATTR(vp, &bva, 0, cr, NULL) ? NULL : &bva;
2635 #endif
2636 	avap = bvap;
2637 
2638 	if (vp->v_type != VDIR) {
2639 		resp->status = NFS3ERR_NOTDIR;
2640 		goto err1;
2641 	}
2642 
2643 	if (args->object.name == nfs3nametoolong) {
2644 		resp->status = NFS3ERR_NAMETOOLONG;
2645 		goto err1;
2646 	}
2647 
2648 	if (args->object.name == NULL || *(args->object.name) == '\0') {
2649 		resp->status = NFS3ERR_ACCES;
2650 		goto err1;
2651 	}
2652 
2653 	if (rdonly(exi, req)) {
2654 		resp->status = NFS3ERR_ROFS;
2655 		goto err1;
2656 	}
2657 
2658 	if (is_system_labeled()) {
2659 		bslabel_t *clabel = req->rq_label;
2660 
2661 		ASSERT(clabel != NULL);
2662 		DTRACE_PROBE2(tx__rfs3__log__info__opremove__clabel, char *,
2663 		    "got client label from request(1)", struct svc_req *, req);
2664 
2665 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
2666 			if (!do_rfs_label_check(clabel, vp, EQUALITY_CHECK,
2667 			    exi)) {
2668 				resp->status = NFS3ERR_ACCES;
2669 				goto err1;
2670 			}
2671 		}
2672 	}
2673 
2674 	ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2675 	name = nfscmd_convname(ca, exi, args->object.name,
2676 	    NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2677 
2678 	if (name == NULL) {
2679 		resp->status = NFS3ERR_INVAL;
2680 		goto err1;
2681 	}
2682 
2683 	/*
2684 	 * Check for a conflict with a non-blocking mandatory share
2685 	 * reservation and V4 delegations
2686 	 */
2687 	error = VOP_LOOKUP(vp, name, &targvp, NULL, 0,
2688 	    NULL, cr, NULL, NULL, NULL);
2689 	if (error != 0)
2690 		goto err;
2691 
2692 	if (rfs4_check_delegated(FWRITE, targvp, TRUE)) {
2693 		resp->status = NFS3ERR_JUKEBOX;
2694 		goto err1;
2695 	}
2696 
2697 	if (!nbl_need_check(targvp)) {
2698 		error = VOP_REMOVE(vp, name, cr, NULL, 0);
2699 	} else {
2700 		nbl_start_crit(targvp, RW_READER);
2701 		if (nbl_conflict(targvp, NBL_REMOVE, 0, 0, 0, NULL)) {
2702 			error = EACCES;
2703 		} else {
2704 			error = VOP_REMOVE(vp, name, cr, NULL, 0);
2705 		}
2706 		nbl_end_crit(targvp);
2707 	}
2708 	VN_RELE(targvp);
2709 	targvp = NULL;
2710 
2711 #ifdef DEBUG
2712 	if (rfs3_do_post_op_attr) {
2713 		ava.va_mask = AT_ALL;
2714 		avap = VOP_GETATTR(vp, &ava, 0, cr, NULL) ? NULL : &ava;
2715 	} else
2716 		avap = NULL;
2717 #else
2718 	ava.va_mask = AT_ALL;
2719 	avap = VOP_GETATTR(vp, &ava, 0, cr, NULL) ? NULL : &ava;
2720 #endif
2721 
2722 	/*
2723 	 * Force modified data and metadata out to stable storage.
2724 	 */
2725 	(void) VOP_FSYNC(vp, 0, cr, NULL);
2726 
2727 	if (error)
2728 		goto err;
2729 
2730 	resp->status = NFS3_OK;
2731 	vattr_to_wcc_data(bvap, avap, &resp->resok.dir_wcc);
2732 	goto out;
2733 
2734 err:
2735 	if (curthread->t_flag & T_WOULDBLOCK) {
2736 		curthread->t_flag &= ~T_WOULDBLOCK;
2737 		resp->status = NFS3ERR_JUKEBOX;
2738 	} else
2739 		resp->status = puterrno3(error);
2740 err1:
2741 	vattr_to_wcc_data(bvap, avap, &resp->resfail.dir_wcc);
2742 out:
2743 	DTRACE_NFSV3_4(op__remove__done, struct svc_req *, req,
2744 	    cred_t *, cr, vnode_t *, vp, REMOVE3res *, resp);
2745 
2746 	if (name != NULL && name != args->object.name)
2747 		kmem_free(name, MAXPATHLEN + 1);
2748 
2749 	if (vp != NULL)
2750 		VN_RELE(vp);
2751 }
2752 
2753 void *
2754 rfs3_remove_getfh(REMOVE3args *args)
2755 {
2756 
2757 	return (&args->object.dir);
2758 }
2759 
2760 void
2761 rfs3_rmdir(RMDIR3args *args, RMDIR3res *resp, struct exportinfo *exi,
2762 	struct svc_req *req, cred_t *cr)
2763 {
2764 	int error;
2765 	vnode_t *vp;
2766 	struct vattr *bvap;
2767 	struct vattr bva;
2768 	struct vattr *avap;
2769 	struct vattr ava;
2770 	struct sockaddr *ca;
2771 	char *name = NULL;
2772 
2773 	bvap = NULL;
2774 	avap = NULL;
2775 
2776 	vp = nfs3_fhtovp(&args->object.dir, exi);
2777 
2778 	DTRACE_NFSV3_4(op__rmdir__start, struct svc_req *, req,
2779 	    cred_t *, cr, vnode_t *, vp, RMDIR3args *, args);
2780 
2781 	if (vp == NULL) {
2782 		error = ESTALE;
2783 		goto err;
2784 	}
2785 
2786 #ifdef DEBUG
2787 	if (rfs3_do_pre_op_attr) {
2788 		bva.va_mask = AT_ALL;
2789 		bvap = VOP_GETATTR(vp, &bva, 0, cr, NULL) ? NULL : &bva;
2790 	} else
2791 		bvap = NULL;
2792 #else
2793 	bva.va_mask = AT_ALL;
2794 	bvap = VOP_GETATTR(vp, &bva, 0, cr, NULL) ? NULL : &bva;
2795 #endif
2796 	avap = bvap;
2797 
2798 	if (vp->v_type != VDIR) {
2799 		resp->status = NFS3ERR_NOTDIR;
2800 		goto err1;
2801 	}
2802 
2803 	if (args->object.name == nfs3nametoolong) {
2804 		resp->status = NFS3ERR_NAMETOOLONG;
2805 		goto err1;
2806 	}
2807 
2808 	if (args->object.name == NULL || *(args->object.name) == '\0') {
2809 		resp->status = NFS3ERR_ACCES;
2810 		goto err1;
2811 	}
2812 
2813 	if (rdonly(exi, req)) {
2814 		resp->status = NFS3ERR_ROFS;
2815 		goto err1;
2816 	}
2817 
2818 	if (is_system_labeled()) {
2819 		bslabel_t *clabel = req->rq_label;
2820 
2821 		ASSERT(clabel != NULL);
2822 		DTRACE_PROBE2(tx__rfs3__log__info__opremovedir__clabel, char *,
2823 		    "got client label from request(1)", struct svc_req *, req);
2824 
2825 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
2826 			if (!do_rfs_label_check(clabel, vp, EQUALITY_CHECK,
2827 			    exi)) {
2828 				resp->status = NFS3ERR_ACCES;
2829 				goto err1;
2830 			}
2831 		}
2832 	}
2833 
2834 	ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2835 	name = nfscmd_convname(ca, exi, args->object.name,
2836 	    NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2837 
2838 	if (name == NULL) {
2839 		resp->status = NFS3ERR_INVAL;
2840 		goto err1;
2841 	}
2842 
2843 	error = VOP_RMDIR(vp, name, rootdir, cr, NULL, 0);
2844 
2845 	if (name != args->object.name)
2846 		kmem_free(name, MAXPATHLEN + 1);
2847 
2848 #ifdef DEBUG
2849 	if (rfs3_do_post_op_attr) {
2850 		ava.va_mask = AT_ALL;
2851 		avap = VOP_GETATTR(vp, &ava, 0, cr, NULL) ? NULL : &ava;
2852 	} else
2853 		avap = NULL;
2854 #else
2855 	ava.va_mask = AT_ALL;
2856 	avap = VOP_GETATTR(vp, &ava, 0, cr, NULL) ? NULL : &ava;
2857 #endif
2858 
2859 	/*
2860 	 * Force modified data and metadata out to stable storage.
2861 	 */
2862 	(void) VOP_FSYNC(vp, 0, cr, NULL);
2863 
2864 	if (error) {
2865 		/*
2866 		 * System V defines rmdir to return EEXIST, not ENOTEMPTY,
2867 		 * if the directory is not empty.  A System V NFS server
2868 		 * needs to map NFS3ERR_EXIST to NFS3ERR_NOTEMPTY to transmit
2869 		 * over the wire.
2870 		 */
2871 		if (error == EEXIST)
2872 			error = ENOTEMPTY;
2873 		goto err;
2874 	}
2875 
2876 	resp->status = NFS3_OK;
2877 	vattr_to_wcc_data(bvap, avap, &resp->resok.dir_wcc);
2878 	goto out;
2879 
2880 err:
2881 	if (curthread->t_flag & T_WOULDBLOCK) {
2882 		curthread->t_flag &= ~T_WOULDBLOCK;
2883 		resp->status = NFS3ERR_JUKEBOX;
2884 	} else
2885 		resp->status = puterrno3(error);
2886 err1:
2887 	vattr_to_wcc_data(bvap, avap, &resp->resfail.dir_wcc);
2888 out:
2889 	DTRACE_NFSV3_4(op__rmdir__done, struct svc_req *, req,
2890 	    cred_t *, cr, vnode_t *, vp, RMDIR3res *, resp);
2891 	if (vp != NULL)
2892 		VN_RELE(vp);
2893 
2894 }
2895 
2896 void *
2897 rfs3_rmdir_getfh(RMDIR3args *args)
2898 {
2899 
2900 	return (&args->object.dir);
2901 }
2902 
2903 void
2904 rfs3_rename(RENAME3args *args, RENAME3res *resp, struct exportinfo *exi,
2905 	struct svc_req *req, cred_t *cr)
2906 {
2907 	int error = 0;
2908 	vnode_t *fvp;
2909 	vnode_t *tvp;
2910 	vnode_t *targvp;
2911 	struct vattr *fbvap;
2912 	struct vattr fbva;
2913 	struct vattr *favap;
2914 	struct vattr fava;
2915 	struct vattr *tbvap;
2916 	struct vattr tbva;
2917 	struct vattr *tavap;
2918 	struct vattr tava;
2919 	nfs_fh3 *fh3;
2920 	struct exportinfo *to_exi;
2921 	vnode_t *srcvp = NULL;
2922 	bslabel_t *clabel;
2923 	struct sockaddr *ca;
2924 	char *name = NULL;
2925 	char *toname = NULL;
2926 
2927 	fbvap = NULL;
2928 	favap = NULL;
2929 	tbvap = NULL;
2930 	tavap = NULL;
2931 	tvp = NULL;
2932 
2933 	fvp = nfs3_fhtovp(&args->from.dir, exi);
2934 
2935 	DTRACE_NFSV3_4(op__rename__start, struct svc_req *, req,
2936 	    cred_t *, cr, vnode_t *, fvp, RENAME3args *, args);
2937 
2938 	if (fvp == NULL) {
2939 		error = ESTALE;
2940 		goto err;
2941 	}
2942 
2943 	if (is_system_labeled()) {
2944 		clabel = req->rq_label;
2945 		ASSERT(clabel != NULL);
2946 		DTRACE_PROBE2(tx__rfs3__log__info__oprename__clabel, char *,
2947 		    "got client label from request(1)", struct svc_req *, req);
2948 
2949 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
2950 			if (!do_rfs_label_check(clabel, fvp, EQUALITY_CHECK,
2951 			    exi)) {
2952 				resp->status = NFS3ERR_ACCES;
2953 				goto err1;
2954 			}
2955 		}
2956 	}
2957 
2958 #ifdef DEBUG
2959 	if (rfs3_do_pre_op_attr) {
2960 		fbva.va_mask = AT_ALL;
2961 		fbvap = VOP_GETATTR(fvp, &fbva, 0, cr, NULL) ? NULL : &fbva;
2962 	} else
2963 		fbvap = NULL;
2964 #else
2965 	fbva.va_mask = AT_ALL;
2966 	fbvap = VOP_GETATTR(fvp, &fbva, 0, cr, NULL) ? NULL : &fbva;
2967 #endif
2968 	favap = fbvap;
2969 
2970 	fh3 = &args->to.dir;
2971 	to_exi = checkexport(&fh3->fh3_fsid, FH3TOXFIDP(fh3));
2972 	if (to_exi == NULL) {
2973 		resp->status = NFS3ERR_ACCES;
2974 		goto err1;
2975 	}
2976 	exi_rele(to_exi);
2977 
2978 	if (to_exi != exi) {
2979 		resp->status = NFS3ERR_XDEV;
2980 		goto err1;
2981 	}
2982 
2983 	tvp = nfs3_fhtovp(&args->to.dir, exi);
2984 	if (tvp == NULL) {
2985 		error = ESTALE;
2986 		goto err;
2987 	}
2988 
2989 #ifdef DEBUG
2990 	if (rfs3_do_pre_op_attr) {
2991 		tbva.va_mask = AT_ALL;
2992 		tbvap = VOP_GETATTR(tvp, &tbva, 0, cr, NULL) ? NULL : &tbva;
2993 	} else
2994 		tbvap = NULL;
2995 #else
2996 	tbva.va_mask = AT_ALL;
2997 	tbvap = VOP_GETATTR(tvp, &tbva, 0, cr, NULL) ? NULL : &tbva;
2998 #endif
2999 	tavap = tbvap;
3000 
3001 	if (fvp->v_type != VDIR || tvp->v_type != VDIR) {
3002 		resp->status = NFS3ERR_NOTDIR;
3003 		goto err1;
3004 	}
3005 
3006 	if (args->from.name == nfs3nametoolong ||
3007 	    args->to.name == nfs3nametoolong) {
3008 		resp->status = NFS3ERR_NAMETOOLONG;
3009 		goto err1;
3010 	}
3011 	if (args->from.name == NULL || *(args->from.name) == '\0' ||
3012 	    args->to.name == NULL || *(args->to.name) == '\0') {
3013 		resp->status = NFS3ERR_ACCES;
3014 		goto err1;
3015 	}
3016 
3017 	if (rdonly(exi, req)) {
3018 		resp->status = NFS3ERR_ROFS;
3019 		goto err1;
3020 	}
3021 
3022 	if (is_system_labeled()) {
3023 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
3024 			if (!do_rfs_label_check(clabel, tvp, EQUALITY_CHECK,
3025 			    exi)) {
3026 				resp->status = NFS3ERR_ACCES;
3027 				goto err1;
3028 			}
3029 		}
3030 	}
3031 
3032 	ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
3033 	name = nfscmd_convname(ca, exi, args->from.name,
3034 	    NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
3035 
3036 	if (name == NULL) {
3037 		resp->status = NFS3ERR_INVAL;
3038 		goto err1;
3039 	}
3040 
3041 	toname = nfscmd_convname(ca, exi, args->to.name,
3042 	    NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
3043 
3044 	if (toname == NULL) {
3045 		resp->status = NFS3ERR_INVAL;
3046 		goto err1;
3047 	}
3048 
3049 	/*
3050 	 * Check for a conflict with a non-blocking mandatory share
3051 	 * reservation or V4 delegations.
3052 	 */
3053 	error = VOP_LOOKUP(fvp, name, &srcvp, NULL, 0,
3054 	    NULL, cr, NULL, NULL, NULL);
3055 	if (error != 0)
3056 		goto err;
3057 
3058 	/*
3059 	 * If we rename a delegated file we should recall the
3060 	 * delegation, since future opens should fail or would
3061 	 * refer to a new file.
3062 	 */
3063 	if (rfs4_check_delegated(FWRITE, srcvp, FALSE)) {
3064 		resp->status = NFS3ERR_JUKEBOX;
3065 		goto err1;
3066 	}
3067 
3068 	/*
3069 	 * Check for renaming over a delegated file.  Check rfs4_deleg_policy
3070 	 * first to avoid VOP_LOOKUP if possible.
3071 	 */
3072 	if (rfs4_deleg_policy != SRV_NEVER_DELEGATE &&
3073 	    VOP_LOOKUP(tvp, toname, &targvp, NULL, 0, NULL, cr,
3074 	    NULL, NULL, NULL) == 0) {
3075 
3076 		if (rfs4_check_delegated(FWRITE, targvp, TRUE)) {
3077 			VN_RELE(targvp);
3078 			resp->status = NFS3ERR_JUKEBOX;
3079 			goto err1;
3080 		}
3081 		VN_RELE(targvp);
3082 	}
3083 
3084 	if (!nbl_need_check(srcvp)) {
3085 		error = VOP_RENAME(fvp, name, tvp, toname, cr, NULL, 0);
3086 	} else {
3087 		nbl_start_crit(srcvp, RW_READER);
3088 		if (nbl_conflict(srcvp, NBL_RENAME, 0, 0, 0, NULL))
3089 			error = EACCES;
3090 		else
3091 			error = VOP_RENAME(fvp, name, tvp, toname, cr, NULL, 0);
3092 		nbl_end_crit(srcvp);
3093 	}
3094 	if (error == 0)
3095 		vn_renamepath(tvp, srcvp, args->to.name,
3096 		    strlen(args->to.name));
3097 	VN_RELE(srcvp);
3098 	srcvp = NULL;
3099 
3100 #ifdef DEBUG
3101 	if (rfs3_do_post_op_attr) {
3102 		fava.va_mask = AT_ALL;
3103 		favap = VOP_GETATTR(fvp, &fava, 0, cr, NULL) ? NULL : &fava;
3104 		tava.va_mask = AT_ALL;
3105 		tavap = VOP_GETATTR(tvp, &tava, 0, cr, NULL) ? NULL : &tava;
3106 	} else {
3107 		favap = NULL;
3108 		tavap = NULL;
3109 	}
3110 #else
3111 	fava.va_mask = AT_ALL;
3112 	favap = VOP_GETATTR(fvp, &fava, 0, cr, NULL) ? NULL : &fava;
3113 	tava.va_mask = AT_ALL;
3114 	tavap = VOP_GETATTR(tvp, &tava, 0, cr, NULL) ? NULL : &tava;
3115 #endif
3116 
3117 	/*
3118 	 * Force modified data and metadata out to stable storage.
3119 	 */
3120 	(void) VOP_FSYNC(fvp, 0, cr, NULL);
3121 	(void) VOP_FSYNC(tvp, 0, cr, NULL);
3122 
3123 	if (error)
3124 		goto err;
3125 
3126 	resp->status = NFS3_OK;
3127 	vattr_to_wcc_data(fbvap, favap, &resp->resok.fromdir_wcc);
3128 	vattr_to_wcc_data(tbvap, tavap, &resp->resok.todir_wcc);
3129 	goto out;
3130 
3131 err:
3132 	if (curthread->t_flag & T_WOULDBLOCK) {
3133 		curthread->t_flag &= ~T_WOULDBLOCK;
3134 		resp->status = NFS3ERR_JUKEBOX;
3135 	} else {
3136 		resp->status = puterrno3(error);
3137 	}
3138 err1:
3139 	vattr_to_wcc_data(fbvap, favap, &resp->resfail.fromdir_wcc);
3140 	vattr_to_wcc_data(tbvap, tavap, &resp->resfail.todir_wcc);
3141 
3142 out:
3143 	if (name != NULL && name != args->from.name)
3144 		kmem_free(name, MAXPATHLEN + 1);
3145 	if (toname != NULL && toname != args->to.name)
3146 		kmem_free(toname, MAXPATHLEN + 1);
3147 
3148 	DTRACE_NFSV3_4(op__rename__done, struct svc_req *, req,
3149 	    cred_t *, cr, vnode_t *, fvp, RENAME3res *, resp);
3150 	if (fvp != NULL)
3151 		VN_RELE(fvp);
3152 	if (tvp != NULL)
3153 		VN_RELE(tvp);
3154 }
3155 
3156 void *
3157 rfs3_rename_getfh(RENAME3args *args)
3158 {
3159 
3160 	return (&args->from.dir);
3161 }
3162 
3163 void
3164 rfs3_link(LINK3args *args, LINK3res *resp, struct exportinfo *exi,
3165 	struct svc_req *req, cred_t *cr)
3166 {
3167 	int error;
3168 	vnode_t *vp;
3169 	vnode_t *dvp;
3170 	struct vattr *vap;
3171 	struct vattr va;
3172 	struct vattr *bvap;
3173 	struct vattr bva;
3174 	struct vattr *avap;
3175 	struct vattr ava;
3176 	nfs_fh3	*fh3;
3177 	struct exportinfo *to_exi;
3178 	bslabel_t *clabel;
3179 	struct sockaddr *ca;
3180 	char *name = NULL;
3181 
3182 	vap = NULL;
3183 	bvap = NULL;
3184 	avap = NULL;
3185 	dvp = NULL;
3186 
3187 	vp = nfs3_fhtovp(&args->file, exi);
3188 
3189 	DTRACE_NFSV3_4(op__link__start, struct svc_req *, req,
3190 	    cred_t *, cr, vnode_t *, vp, LINK3args *, args);
3191 
3192 	if (vp == NULL) {
3193 		error = ESTALE;
3194 		goto out;
3195 	}
3196 
3197 #ifdef DEBUG
3198 	if (rfs3_do_pre_op_attr) {
3199 		va.va_mask = AT_ALL;
3200 		vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3201 	} else
3202 		vap = NULL;
3203 #else
3204 	va.va_mask = AT_ALL;
3205 	vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3206 #endif
3207 
3208 	fh3 = &args->link.dir;
3209 	to_exi = checkexport(&fh3->fh3_fsid, FH3TOXFIDP(fh3));
3210 	if (to_exi == NULL) {
3211 		resp->status = NFS3ERR_ACCES;
3212 		goto out1;
3213 	}
3214 	exi_rele(to_exi);
3215 
3216 	if (to_exi != exi) {
3217 		resp->status = NFS3ERR_XDEV;
3218 		goto out1;
3219 	}
3220 
3221 	if (is_system_labeled()) {
3222 		clabel = req->rq_label;
3223 
3224 		ASSERT(clabel != NULL);
3225 		DTRACE_PROBE2(tx__rfs3__log__info__oplink__clabel, char *,
3226 		    "got client label from request(1)", struct svc_req *, req);
3227 
3228 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
3229 			if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
3230 			    exi)) {
3231 				resp->status = NFS3ERR_ACCES;
3232 				goto out1;
3233 			}
3234 		}
3235 	}
3236 
3237 	dvp = nfs3_fhtovp(&args->link.dir, exi);
3238 	if (dvp == NULL) {
3239 		error = ESTALE;
3240 		goto out;
3241 	}
3242 
3243 #ifdef DEBUG
3244 	if (rfs3_do_pre_op_attr) {
3245 		bva.va_mask = AT_ALL;
3246 		bvap = VOP_GETATTR(dvp, &bva, 0, cr, NULL) ? NULL : &bva;
3247 	} else
3248 		bvap = NULL;
3249 #else
3250 	bva.va_mask = AT_ALL;
3251 	bvap = VOP_GETATTR(dvp, &bva, 0, cr, NULL) ? NULL : &bva;
3252 #endif
3253 
3254 	if (dvp->v_type != VDIR) {
3255 		resp->status = NFS3ERR_NOTDIR;
3256 		goto out1;
3257 	}
3258 
3259 	if (args->link.name == nfs3nametoolong) {
3260 		resp->status = NFS3ERR_NAMETOOLONG;
3261 		goto out1;
3262 	}
3263 
3264 	if (args->link.name == NULL || *(args->link.name) == '\0') {
3265 		resp->status = NFS3ERR_ACCES;
3266 		goto out1;
3267 	}
3268 
3269 	if (rdonly(exi, req)) {
3270 		resp->status = NFS3ERR_ROFS;
3271 		goto out1;
3272 	}
3273 
3274 	if (is_system_labeled()) {
3275 		DTRACE_PROBE2(tx__rfs3__log__info__oplinkdir__clabel, char *,
3276 		    "got client label from request(1)", struct svc_req *, req);
3277 
3278 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
3279 			if (!do_rfs_label_check(clabel, dvp, EQUALITY_CHECK,
3280 			    exi)) {
3281 				resp->status = NFS3ERR_ACCES;
3282 				goto out1;
3283 			}
3284 		}
3285 	}
3286 
3287 	ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
3288 	name = nfscmd_convname(ca, exi, args->link.name,
3289 	    NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
3290 
3291 	if (name == NULL) {
3292 		resp->status = NFS3ERR_SERVERFAULT;
3293 		goto out1;
3294 	}
3295 
3296 	error = VOP_LINK(dvp, vp, name, cr, NULL, 0);
3297 
3298 #ifdef DEBUG
3299 	if (rfs3_do_post_op_attr) {
3300 		va.va_mask = AT_ALL;
3301 		vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3302 		ava.va_mask = AT_ALL;
3303 		avap = VOP_GETATTR(dvp, &ava, 0, cr, NULL) ? NULL : &ava;
3304 	} else {
3305 		vap = NULL;
3306 		avap = NULL;
3307 	}
3308 #else
3309 	va.va_mask = AT_ALL;
3310 	vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3311 	ava.va_mask = AT_ALL;
3312 	avap = VOP_GETATTR(dvp, &ava, 0, cr, NULL) ? NULL : &ava;
3313 #endif
3314 
3315 	/*
3316 	 * Force modified data and metadata out to stable storage.
3317 	 */
3318 	(void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
3319 	(void) VOP_FSYNC(dvp, 0, cr, NULL);
3320 
3321 	if (error)
3322 		goto out;
3323 
3324 	VN_RELE(dvp);
3325 
3326 	resp->status = NFS3_OK;
3327 	vattr_to_post_op_attr(vap, &resp->resok.file_attributes);
3328 	vattr_to_wcc_data(bvap, avap, &resp->resok.linkdir_wcc);
3329 
3330 	DTRACE_NFSV3_4(op__link__done, struct svc_req *, req,
3331 	    cred_t *, cr, vnode_t *, vp, LINK3res *, resp);
3332 
3333 	VN_RELE(vp);
3334 
3335 	return;
3336 
3337 out:
3338 	if (curthread->t_flag & T_WOULDBLOCK) {
3339 		curthread->t_flag &= ~T_WOULDBLOCK;
3340 		resp->status = NFS3ERR_JUKEBOX;
3341 	} else
3342 		resp->status = puterrno3(error);
3343 out1:
3344 	if (name != NULL && name != args->link.name)
3345 		kmem_free(name, MAXPATHLEN + 1);
3346 
3347 	DTRACE_NFSV3_4(op__link__done, struct svc_req *, req,
3348 	    cred_t *, cr, vnode_t *, vp, LINK3res *, resp);
3349 
3350 	if (vp != NULL)
3351 		VN_RELE(vp);
3352 	if (dvp != NULL)
3353 		VN_RELE(dvp);
3354 	vattr_to_post_op_attr(vap, &resp->resfail.file_attributes);
3355 	vattr_to_wcc_data(bvap, avap, &resp->resfail.linkdir_wcc);
3356 }
3357 
3358 void *
3359 rfs3_link_getfh(LINK3args *args)
3360 {
3361 
3362 	return (&args->file);
3363 }
3364 
3365 /*
3366  * This macro defines the size of a response which contains attribute
3367  * information and one directory entry (whose length is specified by
3368  * the macro parameter).  If the incoming request is larger than this,
3369  * then we are guaranteed to be able to return at one directory entry
3370  * if one exists.  Therefore, we do not need to check for
3371  * NFS3ERR_TOOSMALL if the requested size is larger then this.  If it
3372  * is not, then we need to check to make sure that this error does not
3373  * need to be returned.
3374  *
3375  * NFS3_READDIR_MIN_COUNT is comprised of following :
3376  *
3377  * status - 1 * BYTES_PER_XDR_UNIT
3378  * attr. flag - 1 * BYTES_PER_XDR_UNIT
3379  * cookie verifier - 2 * BYTES_PER_XDR_UNIT
3380  * attributes  - NFS3_SIZEOF_FATTR3 * BYTES_PER_XDR_UNIT
3381  * boolean - 1 * BYTES_PER_XDR_UNIT
3382  * file id - 2 * BYTES_PER_XDR_UNIT
3383  * directory name length - 1 * BYTES_PER_XDR_UNIT
3384  * cookie - 2 * BYTES_PER_XDR_UNIT
3385  * end of list - 1 * BYTES_PER_XDR_UNIT
3386  * end of file - 1 * BYTES_PER_XDR_UNIT
3387  * Name length of directory to the nearest byte
3388  */
3389 
3390 #define	NFS3_READDIR_MIN_COUNT(length)	\
3391 	((1 + 1 + 2 + NFS3_SIZEOF_FATTR3 + 1 + 2 + 1 + 2 + 1 + 1) * \
3392 		BYTES_PER_XDR_UNIT + roundup((length), BYTES_PER_XDR_UNIT))
3393 
3394 /* ARGSUSED */
3395 void
3396 rfs3_readdir(READDIR3args *args, READDIR3res *resp, struct exportinfo *exi,
3397 	struct svc_req *req, cred_t *cr)
3398 {
3399 	int error;
3400 	vnode_t *vp;
3401 	struct vattr *vap;
3402 	struct vattr va;
3403 	struct iovec iov;
3404 	struct uio uio;
3405 	char *data;
3406 	int iseof;
3407 	int bufsize;
3408 	int namlen;
3409 	uint_t count;
3410 	struct sockaddr *ca;
3411 
3412 	vap = NULL;
3413 
3414 	vp = nfs3_fhtovp(&args->dir, exi);
3415 
3416 	DTRACE_NFSV3_4(op__readdir__start, struct svc_req *, req,
3417 	    cred_t *, cr, vnode_t *, vp, READDIR3args *, args);
3418 
3419 	if (vp == NULL) {
3420 		error = ESTALE;
3421 		goto out;
3422 	}
3423 
3424 	if (is_system_labeled()) {
3425 		bslabel_t *clabel = req->rq_label;
3426 
3427 		ASSERT(clabel != NULL);
3428 		DTRACE_PROBE2(tx__rfs3__log__info__opreaddir__clabel, char *,
3429 		    "got client label from request(1)", struct svc_req *, req);
3430 
3431 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
3432 			if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
3433 			    exi)) {
3434 				resp->status = NFS3ERR_ACCES;
3435 				goto out1;
3436 			}
3437 		}
3438 	}
3439 
3440 	(void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, NULL);
3441 
3442 #ifdef DEBUG
3443 	if (rfs3_do_pre_op_attr) {
3444 		va.va_mask = AT_ALL;
3445 		vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3446 	} else
3447 		vap = NULL;
3448 #else
3449 	va.va_mask = AT_ALL;
3450 	vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3451 #endif
3452 
3453 	if (vp->v_type != VDIR) {
3454 		resp->status = NFS3ERR_NOTDIR;
3455 		goto out1;
3456 	}
3457 
3458 	error = VOP_ACCESS(vp, VREAD, 0, cr, NULL);
3459 	if (error)
3460 		goto out;
3461 
3462 	/*
3463 	 * Now don't allow arbitrary count to alloc;
3464 	 * allow the maximum not to exceed rfs3_tsize()
3465 	 */
3466 	if (args->count > rfs3_tsize(req))
3467 		args->count = rfs3_tsize(req);
3468 
3469 	/*
3470 	 * Make sure that there is room to read at least one entry
3471 	 * if any are available.
3472 	 */
3473 	if (args->count < DIRENT64_RECLEN(MAXNAMELEN))
3474 		count = DIRENT64_RECLEN(MAXNAMELEN);
3475 	else
3476 		count = args->count;
3477 
3478 	data = kmem_alloc(count, KM_SLEEP);
3479 
3480 	iov.iov_base = data;
3481 	iov.iov_len = count;
3482 	uio.uio_iov = &iov;
3483 	uio.uio_iovcnt = 1;
3484 	uio.uio_segflg = UIO_SYSSPACE;
3485 	uio.uio_extflg = UIO_COPY_CACHED;
3486 	uio.uio_loffset = (offset_t)args->cookie;
3487 	uio.uio_resid = count;
3488 
3489 	error = VOP_READDIR(vp, &uio, cr, &iseof, NULL, 0);
3490 
3491 #ifdef DEBUG
3492 	if (rfs3_do_post_op_attr) {
3493 		va.va_mask = AT_ALL;
3494 		vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3495 	} else
3496 		vap = NULL;
3497 #else
3498 	va.va_mask = AT_ALL;
3499 	vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3500 #endif
3501 
3502 	if (error) {
3503 		kmem_free(data, count);
3504 		goto out;
3505 	}
3506 
3507 	/*
3508 	 * If the count was not large enough to be able to guarantee
3509 	 * to be able to return at least one entry, then need to
3510 	 * check to see if NFS3ERR_TOOSMALL should be returned.
3511 	 */
3512 	if (args->count < NFS3_READDIR_MIN_COUNT(MAXNAMELEN)) {
3513 		/*
3514 		 * bufsize is used to keep track of the size of the response.
3515 		 * It is primed with:
3516 		 *	1 for the status +
3517 		 *	1 for the dir_attributes.attributes boolean +
3518 		 *	2 for the cookie verifier
3519 		 * all times BYTES_PER_XDR_UNIT to convert from XDR units
3520 		 * to bytes.  If there are directory attributes to be
3521 		 * returned, then:
3522 		 *	NFS3_SIZEOF_FATTR3 for the dir_attributes.attr fattr3
3523 		 * time BYTES_PER_XDR_UNIT is added to account for them.
3524 		 */
3525 		bufsize = (1 + 1 + 2) * BYTES_PER_XDR_UNIT;
3526 		if (vap != NULL)
3527 			bufsize += NFS3_SIZEOF_FATTR3 * BYTES_PER_XDR_UNIT;
3528 		/*
3529 		 * An entry is composed of:
3530 		 *	1 for the true/false list indicator +
3531 		 *	2 for the fileid +
3532 		 *	1 for the length of the name +
3533 		 *	2 for the cookie +
3534 		 * all times BYTES_PER_XDR_UNIT to convert from
3535 		 * XDR units to bytes, plus the length of the name
3536 		 * rounded up to the nearest BYTES_PER_XDR_UNIT.
3537 		 */
3538 		if (count != uio.uio_resid) {
3539 			namlen = strlen(((struct dirent64 *)data)->d_name);
3540 			bufsize += (1 + 2 + 1 + 2) * BYTES_PER_XDR_UNIT +
3541 			    roundup(namlen, BYTES_PER_XDR_UNIT);
3542 		}
3543 		/*
3544 		 * We need to check to see if the number of bytes left
3545 		 * to go into the buffer will actually fit into the
3546 		 * buffer.  This is calculated as the size of this
3547 		 * entry plus:
3548 		 *	1 for the true/false list indicator +
3549 		 *	1 for the eof indicator
3550 		 * times BYTES_PER_XDR_UNIT to convert from from
3551 		 * XDR units to bytes.
3552 		 */
3553 		bufsize += (1 + 1) * BYTES_PER_XDR_UNIT;
3554 		if (bufsize > args->count) {
3555 			kmem_free(data, count);
3556 			resp->status = NFS3ERR_TOOSMALL;
3557 			goto out1;
3558 		}
3559 	}
3560 
3561 	/*
3562 	 * Have a valid readir buffer for the native character
3563 	 * set. Need to check if a conversion is necessary and
3564 	 * potentially rewrite the whole buffer. Note that if the
3565 	 * conversion expands names enough, the structure may not
3566 	 * fit. In this case, we need to drop entries until if fits
3567 	 * and patch the counts in order that the next readdir will
3568 	 * get the correct entries.
3569 	 */
3570 	ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
3571 	data = nfscmd_convdirent(ca, exi, data, count, &resp->status);
3572 
3573 
3574 	VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
3575 
3576 #if 0 /* notyet */
3577 	/*
3578 	 * Don't do this.  It causes local disk writes when just
3579 	 * reading the file and the overhead is deemed larger
3580 	 * than the benefit.
3581 	 */
3582 	/*
3583 	 * Force modified metadata out to stable storage.
3584 	 */
3585 	(void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
3586 #endif
3587 
3588 	resp->status = NFS3_OK;
3589 	vattr_to_post_op_attr(vap, &resp->resok.dir_attributes);
3590 	resp->resok.cookieverf = 0;
3591 	resp->resok.reply.entries = (entry3 *)data;
3592 	resp->resok.reply.eof = iseof;
3593 	resp->resok.size = count - uio.uio_resid;
3594 	resp->resok.count = args->count;
3595 	resp->resok.freecount = count;
3596 
3597 	DTRACE_NFSV3_4(op__readdir__done, struct svc_req *, req,
3598 	    cred_t *, cr, vnode_t *, vp, READDIR3res *, resp);
3599 
3600 	VN_RELE(vp);
3601 
3602 	return;
3603 
3604 out:
3605 	if (curthread->t_flag & T_WOULDBLOCK) {
3606 		curthread->t_flag &= ~T_WOULDBLOCK;
3607 		resp->status = NFS3ERR_JUKEBOX;
3608 	} else
3609 		resp->status = puterrno3(error);
3610 out1:
3611 	DTRACE_NFSV3_4(op__readdir__done, struct svc_req *, req,
3612 	    cred_t *, cr, vnode_t *, vp, READDIR3res *, resp);
3613 
3614 	if (vp != NULL) {
3615 		VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
3616 		VN_RELE(vp);
3617 	}
3618 	vattr_to_post_op_attr(vap, &resp->resfail.dir_attributes);
3619 }
3620 
3621 void *
3622 rfs3_readdir_getfh(READDIR3args *args)
3623 {
3624 
3625 	return (&args->dir);
3626 }
3627 
3628 void
3629 rfs3_readdir_free(READDIR3res *resp)
3630 {
3631 
3632 	if (resp->status == NFS3_OK)
3633 		kmem_free(resp->resok.reply.entries, resp->resok.freecount);
3634 }
3635 
3636 #ifdef nextdp
3637 #undef nextdp
3638 #endif
3639 #define	nextdp(dp)	((struct dirent64 *)((char *)(dp) + (dp)->d_reclen))
3640 
3641 /*
3642  * This macro computes the size of a response which contains
3643  * one directory entry including the attributes as well as file handle.
3644  * If the incoming request is larger than this, then we are guaranteed to be
3645  * able to return at least one more directory entry if one exists.
3646  *
3647  * NFS3_READDIRPLUS_ENTRY is made up of the following:
3648  *
3649  * boolean - 1 * BYTES_PER_XDR_UNIT
3650  * file id - 2 * BYTES_PER_XDR_UNIT
3651  * directory name length - 1 * BYTES_PER_XDR_UNIT
3652  * cookie - 2 * BYTES_PER_XDR_UNIT
3653  * attribute flag - 1 * BYTES_PER_XDR_UNIT
3654  * attributes - NFS3_SIZEOF_FATTR3 * BYTES_PER_XDR_UNIT
3655  * status byte for file handle - 1 *  BYTES_PER_XDR_UNIT
3656  * length of a file handle - 1 * BYTES_PER_XDR_UNIT
3657  * Maximum length of a file handle (NFS3_MAXFHSIZE)
3658  * name length of the entry to the nearest bytes
3659  */
3660 #define	NFS3_READDIRPLUS_ENTRY(namelen)	\
3661 	((1 + 2 + 1 + 2 + 1 + NFS3_SIZEOF_FATTR3 + 1 + 1) * \
3662 		BYTES_PER_XDR_UNIT + \
3663 	NFS3_MAXFHSIZE + roundup(namelen, BYTES_PER_XDR_UNIT))
3664 
3665 static int rfs3_readdir_unit = MAXBSIZE;
3666 
3667 /* ARGSUSED */
3668 void
3669 rfs3_readdirplus(READDIRPLUS3args *args, READDIRPLUS3res *resp,
3670 	struct exportinfo *exi, struct svc_req *req, cred_t *cr)
3671 {
3672 	int error;
3673 	vnode_t *vp;
3674 	struct vattr *vap;
3675 	struct vattr va;
3676 	struct iovec iov;
3677 	struct uio uio;
3678 	char *data;
3679 	int iseof;
3680 	struct dirent64 *dp;
3681 	vnode_t *nvp;
3682 	struct vattr *nvap;
3683 	struct vattr nva;
3684 	entryplus3_info *infop = NULL;
3685 	int size = 0;
3686 	int nents = 0;
3687 	int bufsize = 0;
3688 	int entrysize = 0;
3689 	int tofit = 0;
3690 	int rd_unit = rfs3_readdir_unit;
3691 	int prev_len;
3692 	int space_left;
3693 	int i;
3694 	uint_t *namlen = NULL;
3695 	char *ndata = NULL;
3696 	struct sockaddr *ca;
3697 	size_t ret;
3698 
3699 	vap = NULL;
3700 
3701 	vp = nfs3_fhtovp(&args->dir, exi);
3702 
3703 	DTRACE_NFSV3_4(op__readdirplus__start, struct svc_req *, req,
3704 	    cred_t *, cr, vnode_t *, vp, READDIRPLUS3args *, args);
3705 
3706 	if (vp == NULL) {
3707 		error = ESTALE;
3708 		goto out;
3709 	}
3710 
3711 	if (is_system_labeled()) {
3712 		bslabel_t *clabel = req->rq_label;
3713 
3714 		ASSERT(clabel != NULL);
3715 		DTRACE_PROBE2(tx__rfs3__log__info__opreaddirplus__clabel,
3716 		    char *, "got client label from request(1)",
3717 		    struct svc_req *, req);
3718 
3719 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
3720 			if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
3721 			    exi)) {
3722 				resp->status = NFS3ERR_ACCES;
3723 				goto out1;
3724 			}
3725 		}
3726 	}
3727 
3728 	(void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, NULL);
3729 
3730 #ifdef DEBUG
3731 	if (rfs3_do_pre_op_attr) {
3732 		va.va_mask = AT_ALL;
3733 		vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3734 	} else
3735 		vap = NULL;
3736 #else
3737 	va.va_mask = AT_ALL;
3738 	vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3739 #endif
3740 
3741 	if (vp->v_type != VDIR) {
3742 		error = ENOTDIR;
3743 		goto out;
3744 	}
3745 
3746 	error = VOP_ACCESS(vp, VREAD, 0, cr, NULL);
3747 	if (error)
3748 		goto out;
3749 
3750 	/*
3751 	 * Don't allow arbitrary counts for allocation
3752 	 */
3753 	if (args->maxcount > rfs3_tsize(req))
3754 		args->maxcount = rfs3_tsize(req);
3755 
3756 	/*
3757 	 * Make sure that there is room to read at least one entry
3758 	 * if any are available
3759 	 */
3760 	args->dircount = MIN(args->dircount, args->maxcount);
3761 
3762 	if (args->dircount < DIRENT64_RECLEN(MAXNAMELEN))
3763 		args->dircount = DIRENT64_RECLEN(MAXNAMELEN);
3764 
3765 	/*
3766 	 * This allocation relies on a minimum directory entry
3767 	 * being roughly 24 bytes.  Therefore, the namlen array
3768 	 * will have enough space based on the maximum number of
3769 	 * entries to read.
3770 	 */
3771 	namlen = kmem_alloc(args->dircount, KM_SLEEP);
3772 
3773 	space_left = args->dircount;
3774 	data = kmem_alloc(args->dircount, KM_SLEEP);
3775 	dp = (struct dirent64 *)data;
3776 	uio.uio_iov = &iov;
3777 	uio.uio_iovcnt = 1;
3778 	uio.uio_segflg = UIO_SYSSPACE;
3779 	uio.uio_extflg = UIO_COPY_CACHED;
3780 	uio.uio_loffset = (offset_t)args->cookie;
3781 
3782 	/*
3783 	 * bufsize is used to keep track of the size of the response as we
3784 	 * get post op attributes and filehandles for each entry.  This is
3785 	 * an optimization as the server may have read more entries than will
3786 	 * fit in the buffer specified by maxcount.  We stop calculating
3787 	 * post op attributes and filehandles once we have exceeded maxcount.
3788 	 * This will minimize the effect of truncation.
3789 	 *
3790 	 * It is primed with:
3791 	 *	1 for the status +
3792 	 *	1 for the dir_attributes.attributes boolean +
3793 	 *	2 for the cookie verifier
3794 	 * all times BYTES_PER_XDR_UNIT to convert from XDR units
3795 	 * to bytes.  If there are directory attributes to be
3796 	 * returned, then:
3797 	 *	NFS3_SIZEOF_FATTR3 for the dir_attributes.attr fattr3
3798 	 * time BYTES_PER_XDR_UNIT is added to account for them.
3799 	 */
3800 	bufsize = (1 + 1 + 2) * BYTES_PER_XDR_UNIT;
3801 	if (vap != NULL)
3802 		bufsize += NFS3_SIZEOF_FATTR3 * BYTES_PER_XDR_UNIT;
3803 
3804 getmoredents:
3805 	/*
3806 	 * Here we make a check so that our read unit is not larger than
3807 	 * the space left in the buffer.
3808 	 */
3809 	rd_unit = MIN(rd_unit, space_left);
3810 	iov.iov_base = (char *)dp;
3811 	iov.iov_len = rd_unit;
3812 	uio.uio_resid = rd_unit;
3813 	prev_len = rd_unit;
3814 
3815 	error = VOP_READDIR(vp, &uio, cr, &iseof, NULL, 0);
3816 
3817 	if (error) {
3818 		kmem_free(data, args->dircount);
3819 		goto out;
3820 	}
3821 
3822 	if (uio.uio_resid == prev_len && !iseof) {
3823 		if (nents == 0) {
3824 			kmem_free(data, args->dircount);
3825 			resp->status = NFS3ERR_TOOSMALL;
3826 			goto out1;
3827 		}
3828 
3829 		/*
3830 		 * We could not get any more entries, so get the attributes
3831 		 * and filehandle for the entries already obtained.
3832 		 */
3833 		goto good;
3834 	}
3835 
3836 	/*
3837 	 * We estimate the size of the response by assuming the
3838 	 * entry exists and attributes and filehandle are also valid
3839 	 */
3840 	for (size = prev_len - uio.uio_resid;
3841 	    size > 0;
3842 	    size -= dp->d_reclen, dp = nextdp(dp)) {
3843 
3844 		if (dp->d_ino == 0) {
3845 			nents++;
3846 			continue;
3847 		}
3848 
3849 		namlen[nents] = strlen(dp->d_name);
3850 		entrysize = NFS3_READDIRPLUS_ENTRY(namlen[nents]);
3851 
3852 		/*
3853 		 * We need to check to see if the number of bytes left
3854 		 * to go into the buffer will actually fit into the
3855 		 * buffer.  This is calculated as the size of this
3856 		 * entry plus:
3857 		 *	1 for the true/false list indicator +
3858 		 *	1 for the eof indicator
3859 		 * times BYTES_PER_XDR_UNIT to convert from XDR units
3860 		 * to bytes.
3861 		 *
3862 		 * Also check the dircount limit against the first entry read
3863 		 *
3864 		 */
3865 		tofit = entrysize + (1 + 1) * BYTES_PER_XDR_UNIT;
3866 		if (bufsize + tofit > args->maxcount) {
3867 			/*
3868 			 * We make a check here to see if this was the
3869 			 * first entry being measured.  If so, then maxcount
3870 			 * was too small to begin with and so we need to
3871 			 * return with NFS3ERR_TOOSMALL.
3872 			 */
3873 			if (nents == 0) {
3874 				kmem_free(data, args->dircount);
3875 				resp->status = NFS3ERR_TOOSMALL;
3876 				goto out1;
3877 			}
3878 			iseof = FALSE;
3879 			goto good;
3880 		}
3881 		bufsize += entrysize;
3882 		nents++;
3883 	}
3884 
3885 	/*
3886 	 * If there is enough room to fit at least 1 more entry including
3887 	 * post op attributes and filehandle in the buffer AND that we haven't
3888 	 * exceeded dircount then go back and get some more.
3889 	 */
3890 	if (!iseof &&
3891 	    (args->maxcount - bufsize) >= NFS3_READDIRPLUS_ENTRY(MAXNAMELEN)) {
3892 		space_left -= (prev_len - uio.uio_resid);
3893 		if (space_left >= DIRENT64_RECLEN(MAXNAMELEN))
3894 			goto getmoredents;
3895 
3896 		/* else, fall through */
3897 	}
3898 
3899 good:
3900 
3901 #ifdef DEBUG
3902 	if (rfs3_do_post_op_attr) {
3903 		va.va_mask = AT_ALL;
3904 		vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3905 	} else
3906 		vap = NULL;
3907 #else
3908 	va.va_mask = AT_ALL;
3909 	vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3910 #endif
3911 
3912 	VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
3913 
3914 	infop = kmem_alloc(nents * sizeof (struct entryplus3_info), KM_SLEEP);
3915 	resp->resok.infop = infop;
3916 
3917 	dp = (struct dirent64 *)data;
3918 	for (i = 0; i < nents; i++) {
3919 
3920 		if (dp->d_ino == 0) {
3921 			infop[i].attr.attributes = FALSE;
3922 			infop[i].fh.handle_follows = FALSE;
3923 			dp = nextdp(dp);
3924 			continue;
3925 		}
3926 
3927 		infop[i].namelen = namlen[i];
3928 
3929 		error = VOP_LOOKUP(vp, dp->d_name, &nvp, NULL, 0, NULL, cr,
3930 		    NULL, NULL, NULL);
3931 		if (error) {
3932 			infop[i].attr.attributes = FALSE;
3933 			infop[i].fh.handle_follows = FALSE;
3934 			dp = nextdp(dp);
3935 			continue;
3936 		}
3937 
3938 #ifdef DEBUG
3939 		if (rfs3_do_post_op_attr) {
3940 			nva.va_mask = AT_ALL;
3941 			nvap = rfs4_delegated_getattr(nvp, &nva, 0, cr) ?
3942 			    NULL : &nva;
3943 		} else
3944 			nvap = NULL;
3945 #else
3946 		nva.va_mask = AT_ALL;
3947 		nvap = rfs4_delegated_getattr(nvp, &nva, 0, cr) ? NULL : &nva;
3948 #endif
3949 		/* Lie about the object type for a referral */
3950 		if (vn_is_nfs_reparse(nvp, cr))
3951 			nvap->va_type = VLNK;
3952 
3953 		vattr_to_post_op_attr(nvap, &infop[i].attr);
3954 
3955 #ifdef DEBUG
3956 		if (!rfs3_do_post_op_fh3)
3957 			infop[i].fh.handle_follows = FALSE;
3958 		else {
3959 #endif
3960 		error = makefh3(&infop[i].fh.handle, nvp, exi);
3961 		if (!error)
3962 			infop[i].fh.handle_follows = TRUE;
3963 		else
3964 			infop[i].fh.handle_follows = FALSE;
3965 #ifdef DEBUG
3966 		}
3967 #endif
3968 
3969 		VN_RELE(nvp);
3970 		dp = nextdp(dp);
3971 	}
3972 
3973 	ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
3974 	ret = nfscmd_convdirplus(ca, exi, data, nents, args->dircount, &ndata);
3975 	if (ndata == NULL)
3976 		ndata = data;
3977 
3978 	if (ret > 0) {
3979 		/*
3980 		 * We had to drop one or more entries in order to fit
3981 		 * during the character conversion.  We need to patch
3982 		 * up the size and eof info.
3983 		 */
3984 		if (iseof)
3985 			iseof = FALSE;
3986 
3987 		ret = nfscmd_dropped_entrysize((struct dirent64 *)data,
3988 		    nents, ret);
3989 	}
3990 
3991 
3992 #if 0 /* notyet */
3993 	/*
3994 	 * Don't do this.  It causes local disk writes when just
3995 	 * reading the file and the overhead is deemed larger
3996 	 * than the benefit.
3997 	 */
3998 	/*
3999 	 * Force modified metadata out to stable storage.
4000 	 */
4001 	(void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
4002 #endif
4003 
4004 	kmem_free(namlen, args->dircount);
4005 
4006 	resp->status = NFS3_OK;
4007 	vattr_to_post_op_attr(vap, &resp->resok.dir_attributes);
4008 	resp->resok.cookieverf = 0;
4009 	resp->resok.reply.entries = (entryplus3 *)ndata;
4010 	resp->resok.reply.eof = iseof;
4011 	resp->resok.size = nents;
4012 	resp->resok.count = args->dircount - ret;
4013 	resp->resok.maxcount = args->maxcount;
4014 
4015 	DTRACE_NFSV3_4(op__readdirplus__done, struct svc_req *, req,
4016 	    cred_t *, cr, vnode_t *, vp, READDIRPLUS3res *, resp);
4017 	if (ndata != data)
4018 		kmem_free(data, args->dircount);
4019 
4020 
4021 	VN_RELE(vp);
4022 
4023 	return;
4024 
4025 out:
4026 	if (curthread->t_flag & T_WOULDBLOCK) {
4027 		curthread->t_flag &= ~T_WOULDBLOCK;
4028 		resp->status = NFS3ERR_JUKEBOX;
4029 	} else {
4030 		resp->status = puterrno3(error);
4031 	}
4032 out1:
4033 	DTRACE_NFSV3_4(op__readdirplus__done, struct svc_req *, req,
4034 	    cred_t *, cr, vnode_t *, vp, READDIRPLUS3res *, resp);
4035 
4036 	if (vp != NULL) {
4037 		VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
4038 		VN_RELE(vp);
4039 	}
4040 
4041 	if (namlen != NULL)
4042 		kmem_free(namlen, args->dircount);
4043 
4044 	vattr_to_post_op_attr(vap, &resp->resfail.dir_attributes);
4045 }
4046 
4047 void *
4048 rfs3_readdirplus_getfh(READDIRPLUS3args *args)
4049 {
4050 
4051 	return (&args->dir);
4052 }
4053 
4054 void
4055 rfs3_readdirplus_free(READDIRPLUS3res *resp)
4056 {
4057 
4058 	if (resp->status == NFS3_OK) {
4059 		kmem_free(resp->resok.reply.entries, resp->resok.count);
4060 		kmem_free(resp->resok.infop,
4061 		    resp->resok.size * sizeof (struct entryplus3_info));
4062 	}
4063 }
4064 
4065 /* ARGSUSED */
4066 void
4067 rfs3_fsstat(FSSTAT3args *args, FSSTAT3res *resp, struct exportinfo *exi,
4068 	struct svc_req *req, cred_t *cr)
4069 {
4070 	int error;
4071 	vnode_t *vp;
4072 	struct vattr *vap;
4073 	struct vattr va;
4074 	struct statvfs64 sb;
4075 
4076 	vap = NULL;
4077 
4078 	vp = nfs3_fhtovp(&args->fsroot, exi);
4079 
4080 	DTRACE_NFSV3_4(op__fsstat__start, struct svc_req *, req,
4081 	    cred_t *, cr, vnode_t *, vp, FSSTAT3args *, args);
4082 
4083 	if (vp == NULL) {
4084 		error = ESTALE;
4085 		goto out;
4086 	}
4087 
4088 	if (is_system_labeled()) {
4089 		bslabel_t *clabel = req->rq_label;
4090 
4091 		ASSERT(clabel != NULL);
4092 		DTRACE_PROBE2(tx__rfs3__log__info__opfsstat__clabel, char *,
4093 		    "got client label from request(1)", struct svc_req *, req);
4094 
4095 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
4096 			if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
4097 			    exi)) {
4098 				resp->status = NFS3ERR_ACCES;
4099 				goto out1;
4100 			}
4101 		}
4102 	}
4103 
4104 	error = VFS_STATVFS(vp->v_vfsp, &sb);
4105 
4106 #ifdef DEBUG
4107 	if (rfs3_do_post_op_attr) {
4108 		va.va_mask = AT_ALL;
4109 		vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
4110 	} else
4111 		vap = NULL;
4112 #else
4113 	va.va_mask = AT_ALL;
4114 	vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
4115 #endif
4116 
4117 	if (error)
4118 		goto out;
4119 
4120 	resp->status = NFS3_OK;
4121 	vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
4122 	if (sb.f_blocks != (fsblkcnt64_t)-1)
4123 		resp->resok.tbytes = (size3)sb.f_frsize * (size3)sb.f_blocks;
4124 	else
4125 		resp->resok.tbytes = (size3)sb.f_blocks;
4126 	if (sb.f_bfree != (fsblkcnt64_t)-1)
4127 		resp->resok.fbytes = (size3)sb.f_frsize * (size3)sb.f_bfree;
4128 	else
4129 		resp->resok.fbytes = (size3)sb.f_bfree;
4130 	if (sb.f_bavail != (fsblkcnt64_t)-1)
4131 		resp->resok.abytes = (size3)sb.f_frsize * (size3)sb.f_bavail;
4132 	else
4133 		resp->resok.abytes = (size3)sb.f_bavail;
4134 	resp->resok.tfiles = (size3)sb.f_files;
4135 	resp->resok.ffiles = (size3)sb.f_ffree;
4136 	resp->resok.afiles = (size3)sb.f_favail;
4137 	resp->resok.invarsec = 0;
4138 
4139 	DTRACE_NFSV3_4(op__fsstat__done, struct svc_req *, req,
4140 	    cred_t *, cr, vnode_t *, vp, FSSTAT3res *, resp);
4141 	VN_RELE(vp);
4142 
4143 	return;
4144 
4145 out:
4146 	if (curthread->t_flag & T_WOULDBLOCK) {
4147 		curthread->t_flag &= ~T_WOULDBLOCK;
4148 		resp->status = NFS3ERR_JUKEBOX;
4149 	} else
4150 		resp->status = puterrno3(error);
4151 out1:
4152 	DTRACE_NFSV3_4(op__fsstat__done, struct svc_req *, req,
4153 	    cred_t *, cr, vnode_t *, vp, FSSTAT3res *, resp);
4154 
4155 	if (vp != NULL)
4156 		VN_RELE(vp);
4157 	vattr_to_post_op_attr(vap, &resp->resfail.obj_attributes);
4158 }
4159 
4160 void *
4161 rfs3_fsstat_getfh(FSSTAT3args *args)
4162 {
4163 
4164 	return (&args->fsroot);
4165 }
4166 
4167 void
4168 rfs3_fsinfo(FSINFO3args *args, FSINFO3res *resp, struct exportinfo *exi,
4169 	struct svc_req *req, cred_t *cr)
4170 {
4171 	vnode_t *vp;
4172 	struct vattr *vap;
4173 	struct vattr va;
4174 	uint32_t xfer_size;
4175 	ulong_t l = 0;
4176 	int error;
4177 
4178 	vp = nfs3_fhtovp(&args->fsroot, exi);
4179 
4180 	DTRACE_NFSV3_4(op__fsinfo__start, struct svc_req *, req,
4181 	    cred_t *, cr, vnode_t *, vp, FSINFO3args *, args);
4182 
4183 	if (vp == NULL) {
4184 		if (curthread->t_flag & T_WOULDBLOCK) {
4185 			curthread->t_flag &= ~T_WOULDBLOCK;
4186 			resp->status = NFS3ERR_JUKEBOX;
4187 		} else
4188 			resp->status = NFS3ERR_STALE;
4189 		vattr_to_post_op_attr(NULL, &resp->resfail.obj_attributes);
4190 		goto out;
4191 	}
4192 
4193 	if (is_system_labeled()) {
4194 		bslabel_t *clabel = req->rq_label;
4195 
4196 		ASSERT(clabel != NULL);
4197 		DTRACE_PROBE2(tx__rfs3__log__info__opfsinfo__clabel, char *,
4198 		    "got client label from request(1)", struct svc_req *, req);
4199 
4200 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
4201 			if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
4202 			    exi)) {
4203 				resp->status = NFS3ERR_STALE;
4204 				vattr_to_post_op_attr(NULL,
4205 				    &resp->resfail.obj_attributes);
4206 				goto out;
4207 			}
4208 		}
4209 	}
4210 
4211 #ifdef DEBUG
4212 	if (rfs3_do_post_op_attr) {
4213 		va.va_mask = AT_ALL;
4214 		vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
4215 	} else
4216 		vap = NULL;
4217 #else
4218 	va.va_mask = AT_ALL;
4219 	vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
4220 #endif
4221 
4222 	resp->status = NFS3_OK;
4223 	vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
4224 	xfer_size = rfs3_tsize(req);
4225 	resp->resok.rtmax = xfer_size;
4226 	resp->resok.rtpref = xfer_size;
4227 	resp->resok.rtmult = DEV_BSIZE;
4228 	resp->resok.wtmax = xfer_size;
4229 	resp->resok.wtpref = xfer_size;
4230 	resp->resok.wtmult = DEV_BSIZE;
4231 	resp->resok.dtpref = MAXBSIZE;
4232 
4233 	/*
4234 	 * Large file spec: want maxfilesize based on limit of
4235 	 * underlying filesystem.  We can guess 2^31-1 if need be.
4236 	 */
4237 	error = VOP_PATHCONF(vp, _PC_FILESIZEBITS, &l, cr, NULL);
4238 	if (error) {
4239 		resp->status = puterrno3(error);
4240 		goto out;
4241 	}
4242 
4243 	/*
4244 	 * If the underlying file system does not support _PC_FILESIZEBITS,
4245 	 * return a reasonable default. Note that error code on VOP_PATHCONF
4246 	 * will be 0, even if the underlying file system does not support
4247 	 * _PC_FILESIZEBITS.
4248 	 */
4249 	if (l == (ulong_t)-1) {
4250 		resp->resok.maxfilesize = MAXOFF32_T;
4251 	} else {
4252 		if (l >= (sizeof (uint64_t) * 8))
4253 			resp->resok.maxfilesize = INT64_MAX;
4254 		else
4255 			resp->resok.maxfilesize = (1LL << (l-1)) - 1;
4256 	}
4257 
4258 	resp->resok.time_delta.seconds = 0;
4259 	resp->resok.time_delta.nseconds = 1000;
4260 	resp->resok.properties = FSF3_LINK | FSF3_SYMLINK |
4261 	    FSF3_HOMOGENEOUS | FSF3_CANSETTIME;
4262 
4263 	DTRACE_NFSV3_4(op__fsinfo__done, struct svc_req *, req,
4264 	    cred_t *, cr, vnode_t *, vp, FSINFO3res *, resp);
4265 
4266 	VN_RELE(vp);
4267 
4268 	return;
4269 
4270 out:
4271 	DTRACE_NFSV3_4(op__fsinfo__done, struct svc_req *, req,
4272 	    cred_t *, cr, vnode_t *, NULL, FSINFO3res *, resp);
4273 	if (vp != NULL)
4274 		VN_RELE(vp);
4275 }
4276 
4277 void *
4278 rfs3_fsinfo_getfh(FSINFO3args *args)
4279 {
4280 
4281 	return (&args->fsroot);
4282 }
4283 
4284 /* ARGSUSED */
4285 void
4286 rfs3_pathconf(PATHCONF3args *args, PATHCONF3res *resp, struct exportinfo *exi,
4287 	struct svc_req *req, cred_t *cr)
4288 {
4289 	int error;
4290 	vnode_t *vp;
4291 	struct vattr *vap;
4292 	struct vattr va;
4293 	ulong_t val;
4294 
4295 	vap = NULL;
4296 
4297 	vp = nfs3_fhtovp(&args->object, exi);
4298 
4299 	DTRACE_NFSV3_4(op__pathconf__start, struct svc_req *, req,
4300 	    cred_t *, cr, vnode_t *, vp, PATHCONF3args *, args);
4301 
4302 	if (vp == NULL) {
4303 		error = ESTALE;
4304 		goto out;
4305 	}
4306 
4307 	if (is_system_labeled()) {
4308 		bslabel_t *clabel = req->rq_label;
4309 
4310 		ASSERT(clabel != NULL);
4311 		DTRACE_PROBE2(tx__rfs3__log__info__oppathconf__clabel, char *,
4312 		    "got client label from request(1)", struct svc_req *, req);
4313 
4314 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
4315 			if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
4316 			    exi)) {
4317 				resp->status = NFS3ERR_ACCES;
4318 				goto out1;
4319 			}
4320 		}
4321 	}
4322 
4323 #ifdef DEBUG
4324 	if (rfs3_do_post_op_attr) {
4325 		va.va_mask = AT_ALL;
4326 		vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
4327 	} else
4328 		vap = NULL;
4329 #else
4330 	va.va_mask = AT_ALL;
4331 	vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
4332 #endif
4333 
4334 	error = VOP_PATHCONF(vp, _PC_LINK_MAX, &val, cr, NULL);
4335 	if (error)
4336 		goto out;
4337 	resp->resok.info.link_max = (uint32)val;
4338 
4339 	error = VOP_PATHCONF(vp, _PC_NAME_MAX, &val, cr, NULL);
4340 	if (error)
4341 		goto out;
4342 	resp->resok.info.name_max = (uint32)val;
4343 
4344 	error = VOP_PATHCONF(vp, _PC_NO_TRUNC, &val, cr, NULL);
4345 	if (error)
4346 		goto out;
4347 	if (val == 1)
4348 		resp->resok.info.no_trunc = TRUE;
4349 	else
4350 		resp->resok.info.no_trunc = FALSE;
4351 
4352 	error = VOP_PATHCONF(vp, _PC_CHOWN_RESTRICTED, &val, cr, NULL);
4353 	if (error)
4354 		goto out;
4355 	if (val == 1)
4356 		resp->resok.info.chown_restricted = TRUE;
4357 	else
4358 		resp->resok.info.chown_restricted = FALSE;
4359 
4360 	resp->status = NFS3_OK;
4361 	vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
4362 	resp->resok.info.case_insensitive = FALSE;
4363 	resp->resok.info.case_preserving = TRUE;
4364 	DTRACE_NFSV3_4(op__pathconf__done, struct svc_req *, req,
4365 	    cred_t *, cr, vnode_t *, vp, PATHCONF3res *, resp);
4366 	VN_RELE(vp);
4367 	return;
4368 
4369 out:
4370 	if (curthread->t_flag & T_WOULDBLOCK) {
4371 		curthread->t_flag &= ~T_WOULDBLOCK;
4372 		resp->status = NFS3ERR_JUKEBOX;
4373 	} else
4374 		resp->status = puterrno3(error);
4375 out1:
4376 	DTRACE_NFSV3_4(op__pathconf__done, struct svc_req *, req,
4377 	    cred_t *, cr, vnode_t *, vp, PATHCONF3res *, resp);
4378 	if (vp != NULL)
4379 		VN_RELE(vp);
4380 	vattr_to_post_op_attr(vap, &resp->resfail.obj_attributes);
4381 }
4382 
4383 void *
4384 rfs3_pathconf_getfh(PATHCONF3args *args)
4385 {
4386 
4387 	return (&args->object);
4388 }
4389 
4390 void
4391 rfs3_commit(COMMIT3args *args, COMMIT3res *resp, struct exportinfo *exi,
4392 	struct svc_req *req, cred_t *cr)
4393 {
4394 	int error;
4395 	vnode_t *vp;
4396 	struct vattr *bvap;
4397 	struct vattr bva;
4398 	struct vattr *avap;
4399 	struct vattr ava;
4400 
4401 	bvap = NULL;
4402 	avap = NULL;
4403 
4404 	vp = nfs3_fhtovp(&args->file, exi);
4405 
4406 	DTRACE_NFSV3_4(op__commit__start, struct svc_req *, req,
4407 	    cred_t *, cr, vnode_t *, vp, COMMIT3args *, args);
4408 
4409 	if (vp == NULL) {
4410 		error = ESTALE;
4411 		goto out;
4412 	}
4413 
4414 	bva.va_mask = AT_ALL;
4415 	error = VOP_GETATTR(vp, &bva, 0, cr, NULL);
4416 
4417 	/*
4418 	 * If we can't get the attributes, then we can't do the
4419 	 * right access checking.  So, we'll fail the request.
4420 	 */
4421 	if (error)
4422 		goto out;
4423 
4424 #ifdef DEBUG
4425 	if (rfs3_do_pre_op_attr)
4426 		bvap = &bva;
4427 	else
4428 		bvap = NULL;
4429 #else
4430 	bvap = &bva;
4431 #endif
4432 
4433 	if (rdonly(exi, req)) {
4434 		resp->status = NFS3ERR_ROFS;
4435 		goto out1;
4436 	}
4437 
4438 	if (vp->v_type != VREG) {
4439 		resp->status = NFS3ERR_INVAL;
4440 		goto out1;
4441 	}
4442 
4443 	if (is_system_labeled()) {
4444 		bslabel_t *clabel = req->rq_label;
4445 
4446 		ASSERT(clabel != NULL);
4447 		DTRACE_PROBE2(tx__rfs3__log__info__opcommit__clabel, char *,
4448 		    "got client label from request(1)", struct svc_req *, req);
4449 
4450 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
4451 			if (!do_rfs_label_check(clabel, vp, EQUALITY_CHECK,
4452 			    exi)) {
4453 				resp->status = NFS3ERR_ACCES;
4454 				goto out1;
4455 			}
4456 		}
4457 	}
4458 
4459 	if (crgetuid(cr) != bva.va_uid &&
4460 	    (error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL)))
4461 		goto out;
4462 
4463 	error = VOP_PUTPAGE(vp, args->offset, args->count, 0, cr, NULL);
4464 	if (!error)
4465 		error = VOP_FSYNC(vp, FNODSYNC, cr, NULL);
4466 
4467 #ifdef DEBUG
4468 	if (rfs3_do_post_op_attr) {
4469 		ava.va_mask = AT_ALL;
4470 		avap = VOP_GETATTR(vp, &ava, 0, cr, NULL) ? NULL : &ava;
4471 	} else
4472 		avap = NULL;
4473 #else
4474 	ava.va_mask = AT_ALL;
4475 	avap = VOP_GETATTR(vp, &ava, 0, cr, NULL) ? NULL : &ava;
4476 #endif
4477 
4478 	if (error)
4479 		goto out;
4480 
4481 	resp->status = NFS3_OK;
4482 	vattr_to_wcc_data(bvap, avap, &resp->resok.file_wcc);
4483 	resp->resok.verf = write3verf;
4484 
4485 	DTRACE_NFSV3_4(op__commit__done, struct svc_req *, req,
4486 	    cred_t *, cr, vnode_t *, vp, COMMIT3res *, resp);
4487 
4488 	VN_RELE(vp);
4489 
4490 	return;
4491 
4492 out:
4493 	if (curthread->t_flag & T_WOULDBLOCK) {
4494 		curthread->t_flag &= ~T_WOULDBLOCK;
4495 		resp->status = NFS3ERR_JUKEBOX;
4496 	} else
4497 		resp->status = puterrno3(error);
4498 out1:
4499 	DTRACE_NFSV3_4(op__commit__done, struct svc_req *, req,
4500 	    cred_t *, cr, vnode_t *, vp, COMMIT3res *, resp);
4501 
4502 	if (vp != NULL)
4503 		VN_RELE(vp);
4504 	vattr_to_wcc_data(bvap, avap, &resp->resfail.file_wcc);
4505 }
4506 
4507 void *
4508 rfs3_commit_getfh(COMMIT3args *args)
4509 {
4510 
4511 	return (&args->file);
4512 }
4513 
4514 static int
4515 sattr3_to_vattr(sattr3 *sap, struct vattr *vap)
4516 {
4517 
4518 	vap->va_mask = 0;
4519 
4520 	if (sap->mode.set_it) {
4521 		vap->va_mode = (mode_t)sap->mode.mode;
4522 		vap->va_mask |= AT_MODE;
4523 	}
4524 	if (sap->uid.set_it) {
4525 		vap->va_uid = (uid_t)sap->uid.uid;
4526 		vap->va_mask |= AT_UID;
4527 	}
4528 	if (sap->gid.set_it) {
4529 		vap->va_gid = (gid_t)sap->gid.gid;
4530 		vap->va_mask |= AT_GID;
4531 	}
4532 	if (sap->size.set_it) {
4533 		if (sap->size.size > (size3)((u_longlong_t)-1))
4534 			return (EINVAL);
4535 		vap->va_size = sap->size.size;
4536 		vap->va_mask |= AT_SIZE;
4537 	}
4538 	if (sap->atime.set_it == SET_TO_CLIENT_TIME) {
4539 #ifndef _LP64
4540 		/* check time validity */
4541 		if (!NFS3_TIME_OK(sap->atime.atime.seconds))
4542 			return (EOVERFLOW);
4543 #endif
4544 		/*
4545 		 * nfs protocol defines times as unsigned so don't extend sign,
4546 		 * unless sysadmin set nfs_allow_preepoch_time.
4547 		 */
4548 		NFS_TIME_T_CONVERT(vap->va_atime.tv_sec,
4549 		    sap->atime.atime.seconds);
4550 		vap->va_atime.tv_nsec = (uint32_t)sap->atime.atime.nseconds;
4551 		vap->va_mask |= AT_ATIME;
4552 	} else if (sap->atime.set_it == SET_TO_SERVER_TIME) {
4553 		gethrestime(&vap->va_atime);
4554 		vap->va_mask |= AT_ATIME;
4555 	}
4556 	if (sap->mtime.set_it == SET_TO_CLIENT_TIME) {
4557 #ifndef _LP64
4558 		/* check time validity */
4559 		if (!NFS3_TIME_OK(sap->mtime.mtime.seconds))
4560 			return (EOVERFLOW);
4561 #endif
4562 		/*
4563 		 * nfs protocol defines times as unsigned so don't extend sign,
4564 		 * unless sysadmin set nfs_allow_preepoch_time.
4565 		 */
4566 		NFS_TIME_T_CONVERT(vap->va_mtime.tv_sec,
4567 		    sap->mtime.mtime.seconds);
4568 		vap->va_mtime.tv_nsec = (uint32_t)sap->mtime.mtime.nseconds;
4569 		vap->va_mask |= AT_MTIME;
4570 	} else if (sap->mtime.set_it == SET_TO_SERVER_TIME) {
4571 		gethrestime(&vap->va_mtime);
4572 		vap->va_mask |= AT_MTIME;
4573 	}
4574 
4575 	return (0);
4576 }
4577 
4578 static ftype3 vt_to_nf3[] = {
4579 	0, NF3REG, NF3DIR, NF3BLK, NF3CHR, NF3LNK, NF3FIFO, 0, 0, NF3SOCK, 0
4580 };
4581 
4582 static int
4583 vattr_to_fattr3(struct vattr *vap, fattr3 *fap)
4584 {
4585 
4586 	ASSERT(vap->va_type >= VNON && vap->va_type <= VBAD);
4587 	/* Return error if time or size overflow */
4588 	if (! (NFS_VAP_TIME_OK(vap) && NFS3_SIZE_OK(vap->va_size))) {
4589 		return (EOVERFLOW);
4590 	}
4591 	fap->type = vt_to_nf3[vap->va_type];
4592 	fap->mode = (mode3)(vap->va_mode & MODEMASK);
4593 	fap->nlink = (uint32)vap->va_nlink;
4594 	if (vap->va_uid == UID_NOBODY)
4595 		fap->uid = (uid3)NFS_UID_NOBODY;
4596 	else
4597 		fap->uid = (uid3)vap->va_uid;
4598 	if (vap->va_gid == GID_NOBODY)
4599 		fap->gid = (gid3)NFS_GID_NOBODY;
4600 	else
4601 		fap->gid = (gid3)vap->va_gid;
4602 	fap->size = (size3)vap->va_size;
4603 	fap->used = (size3)DEV_BSIZE * (size3)vap->va_nblocks;
4604 	fap->rdev.specdata1 = (uint32)getmajor(vap->va_rdev);
4605 	fap->rdev.specdata2 = (uint32)getminor(vap->va_rdev);
4606 	fap->fsid = (uint64)vap->va_fsid;
4607 	fap->fileid = (fileid3)vap->va_nodeid;
4608 	fap->atime.seconds = vap->va_atime.tv_sec;
4609 	fap->atime.nseconds = vap->va_atime.tv_nsec;
4610 	fap->mtime.seconds = vap->va_mtime.tv_sec;
4611 	fap->mtime.nseconds = vap->va_mtime.tv_nsec;
4612 	fap->ctime.seconds = vap->va_ctime.tv_sec;
4613 	fap->ctime.nseconds = vap->va_ctime.tv_nsec;
4614 	return (0);
4615 }
4616 
4617 static int
4618 vattr_to_wcc_attr(struct vattr *vap, wcc_attr *wccap)
4619 {
4620 
4621 	/* Return error if time or size overflow */
4622 	if (!(NFS_TIME_T_OK(vap->va_mtime.tv_sec) &&
4623 	    NFS_TIME_T_OK(vap->va_ctime.tv_sec) &&
4624 	    NFS3_SIZE_OK(vap->va_size))) {
4625 		return (EOVERFLOW);
4626 	}
4627 	wccap->size = (size3)vap->va_size;
4628 	wccap->mtime.seconds = vap->va_mtime.tv_sec;
4629 	wccap->mtime.nseconds = vap->va_mtime.tv_nsec;
4630 	wccap->ctime.seconds = vap->va_ctime.tv_sec;
4631 	wccap->ctime.nseconds = vap->va_ctime.tv_nsec;
4632 	return (0);
4633 }
4634 
4635 static void
4636 vattr_to_pre_op_attr(struct vattr *vap, pre_op_attr *poap)
4637 {
4638 
4639 	/* don't return attrs if time overflow */
4640 	if ((vap != NULL) && !vattr_to_wcc_attr(vap, &poap->attr)) {
4641 		poap->attributes = TRUE;
4642 	} else
4643 		poap->attributes = FALSE;
4644 }
4645 
4646 void
4647 vattr_to_post_op_attr(struct vattr *vap, post_op_attr *poap)
4648 {
4649 
4650 	/* don't return attrs if time overflow */
4651 	if ((vap != NULL) && !vattr_to_fattr3(vap, &poap->attr)) {
4652 		poap->attributes = TRUE;
4653 	} else
4654 		poap->attributes = FALSE;
4655 }
4656 
4657 static void
4658 vattr_to_wcc_data(struct vattr *bvap, struct vattr *avap, wcc_data *wccp)
4659 {
4660 
4661 	vattr_to_pre_op_attr(bvap, &wccp->before);
4662 	vattr_to_post_op_attr(avap, &wccp->after);
4663 }
4664 
4665 void
4666 rfs3_srvrinit(void)
4667 {
4668 	struct rfs3_verf_overlay {
4669 		uint_t id; /* a "unique" identifier */
4670 		int ts; /* a unique timestamp */
4671 	} *verfp;
4672 	timestruc_t now;
4673 
4674 	/*
4675 	 * The following algorithm attempts to find a unique verifier
4676 	 * to be used as the write verifier returned from the server
4677 	 * to the client.  It is important that this verifier change
4678 	 * whenever the server reboots.  Of secondary importance, it
4679 	 * is important for the verifier to be unique between two
4680 	 * different servers.
4681 	 *
4682 	 * Thus, an attempt is made to use the system hostid and the
4683 	 * current time in seconds when the nfssrv kernel module is
4684 	 * loaded.  It is assumed that an NFS server will not be able
4685 	 * to boot and then to reboot in less than a second.  If the
4686 	 * hostid has not been set, then the current high resolution
4687 	 * time is used.  This will ensure different verifiers each
4688 	 * time the server reboots and minimize the chances that two
4689 	 * different servers will have the same verifier.
4690 	 */
4691 
4692 #ifndef	lint
4693 	/*
4694 	 * We ASSERT that this constant logic expression is
4695 	 * always true because in the past, it wasn't.
4696 	 */
4697 	ASSERT(sizeof (*verfp) <= sizeof (write3verf));
4698 #endif
4699 
4700 	gethrestime(&now);
4701 	verfp = (struct rfs3_verf_overlay *)&write3verf;
4702 	verfp->ts = (int)now.tv_sec;
4703 	verfp->id = zone_get_hostid(NULL);
4704 
4705 	if (verfp->id == 0)
4706 		verfp->id = (uint_t)now.tv_nsec;
4707 
4708 	nfs3_srv_caller_id = fs_new_caller_id();
4709 
4710 }
4711 
4712 static int
4713 rdma_setup_read_data3(READ3args *args, READ3resok *rok)
4714 {
4715 	struct clist	*wcl;
4716 	int		wlist_len;
4717 	count3		count = rok->count;
4718 
4719 	wcl = args->wlist;
4720 	if (rdma_setup_read_chunks(wcl, count, &wlist_len) == FALSE) {
4721 		return (FALSE);
4722 	}
4723 
4724 	wcl = args->wlist;
4725 	rok->wlist_len = wlist_len;
4726 	rok->wlist = wcl;
4727 	return (TRUE);
4728 }
4729 
4730 void
4731 rfs3_srvrfini(void)
4732 {
4733 	/* Nothing to do */
4734 }
4735