xref: /illumos-gate/usr/src/uts/common/fs/nfs/nfs4_srv.c (revision 54d82594cac34899a52710db0b8235a171e83e31)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 /*
28  *	Copyright (c) 1983,1984,1985,1986,1987,1988,1989  AT&T.
29  *	All Rights Reserved
30  */
31 
32 #pragma ident	"%Z%%M%	%I%	%E% SMI"
33 
34 #include <sys/param.h>
35 #include <sys/types.h>
36 #include <sys/systm.h>
37 #include <sys/cred.h>
38 #include <sys/buf.h>
39 #include <sys/vfs.h>
40 #include <sys/vnode.h>
41 #include <sys/uio.h>
42 #include <sys/errno.h>
43 #include <sys/sysmacros.h>
44 #include <sys/statvfs.h>
45 #include <sys/kmem.h>
46 #include <sys/dirent.h>
47 #include <sys/cmn_err.h>
48 #include <sys/debug.h>
49 #include <sys/systeminfo.h>
50 #include <sys/flock.h>
51 #include <sys/pathname.h>
52 #include <sys/nbmlock.h>
53 #include <sys/share.h>
54 #include <sys/atomic.h>
55 #include <sys/policy.h>
56 #include <sys/fem.h>
57 
58 #include <rpc/types.h>
59 #include <rpc/auth.h>
60 #include <rpc/rpcsec_gss.h>
61 #include <rpc/svc.h>
62 
63 #include <nfs/nfs.h>
64 #include <nfs/export.h>
65 #include <nfs/lm.h>
66 #include <nfs/nfs4.h>
67 
68 #include <sys/strsubr.h>
69 #include <sys/strsun.h>
70 
71 #include <inet/common.h>
72 #include <inet/ip.h>
73 #include <inet/ip6.h>
74 
75 #define	RFS4_MAXLOCK_TRIES 4	/* Try to get the lock this many times */
76 static int rfs4_maxlock_tries = RFS4_MAXLOCK_TRIES;
77 #define	RFS4_LOCK_DELAY 10	/* Milliseconds */
78 static clock_t rfs4_lock_delay = RFS4_LOCK_DELAY;
79 
80 /* End of Tunables */
81 
82 /*
83  * Used to bump the stateid4.seqid value and show changes in the stateid
84  */
85 #define	next_stateid(sp) (++(sp)->bits.chgseq)
86 
87 /*
88  * RFS4_MINLEN_ENTRY4: XDR-encoded size of smallest possible dirent.
89  *	This is used to return NFS4ERR_TOOSMALL when clients specify
90  *	maxcount that isn't large enough to hold the smallest possible
91  *	XDR encoded dirent.
92  *
93  *	    sizeof cookie (8 bytes) +
94  *	    sizeof name_len (4 bytes) +
95  *	    sizeof smallest (padded) name (4 bytes) +
96  *	    sizeof bitmap4_len (12 bytes) +   NOTE: we always encode len=2 bm4
97  *	    sizeof attrlist4_len (4 bytes) +
98  *	    sizeof next boolean (4 bytes)
99  *
100  * RFS4_MINLEN_RDDIR4: XDR-encoded size of READDIR op reply containing
101  * the smallest possible entry4 (assumes no attrs requested).
102  *	sizeof nfsstat4 (4 bytes) +
103  *	sizeof verifier4 (8 bytes) +
104  *	sizeof entry4list bool (4 bytes) +
105  *	sizeof entry4 	(36 bytes) +
106  *	sizeof eof bool  (4 bytes)
107  *
108  * RFS4_MINLEN_RDDIR_BUF: minimum length of buffer server will provide to
109  *	VOP_READDIR.  Its value is the size of the maximum possible dirent
110  *	for solaris.  The DIRENT64_RECLEN macro returns	the size of dirent
111  *	required for a given name length.  MAXNAMELEN is the maximum
112  *	filename length allowed in Solaris.  The first two DIRENT64_RECLEN()
113  *	macros are to allow for . and .. entries -- just a minor tweak to try
114  *	and guarantee that buffer we give to VOP_READDIR will be large enough
115  *	to hold ., .., and the largest possible solaris dirent64.
116  */
117 #define	RFS4_MINLEN_ENTRY4 36
118 #define	RFS4_MINLEN_RDDIR4 (4 + NFS4_VERIFIER_SIZE + 4 + RFS4_MINLEN_ENTRY4 + 4)
119 #define	RFS4_MINLEN_RDDIR_BUF \
120 	(DIRENT64_RECLEN(1) + DIRENT64_RECLEN(2) + DIRENT64_RECLEN(MAXNAMELEN))
121 
122 /*
123  * It would be better to pad to 4 bytes since that's what XDR would do,
124  * but the dirents UFS gives us are already padded to 8, so just take
125  * what we're given.  Dircount is only a hint anyway.  Currently the
126  * solaris kernel is ASCII only, so there's no point in calling the
127  * UTF8 functions.
128  *
129  * dirent64: named padded to provide 8 byte struct alignment
130  *	d_ino(8) + d_off(8) + d_reclen(2) + d_name(namelen + null(1) + pad)
131  *
132  * cookie: uint64_t   +  utf8namelen: uint_t  +   utf8name padded to 8 bytes
133  *
134  */
135 #define	DIRENT64_TO_DIRCOUNT(dp) \
136 	(3 * BYTES_PER_XDR_UNIT + DIRENT64_NAMELEN((dp)->d_reclen))
137 
138 time_t rfs4_start_time;			/* Initialized in rfs4_srvrinit */
139 
140 static sysid_t lockt_sysid;		/* dummy sysid for all LOCKT calls */
141 
142 u_longlong_t nfs4_srv_caller_id;
143 
144 verifier4	Write4verf;
145 verifier4	Readdir4verf;
146 
147 void		rfs4_init_compound_state(struct compound_state *);
148 
149 static void	nullfree(caddr_t);
150 static void	rfs4_op_inval(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
151 			struct compound_state *);
152 static void	rfs4_op_access(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
153 			struct compound_state *);
154 static void	rfs4_op_close(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
155 			struct compound_state *);
156 static void	rfs4_op_commit(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
157 			struct compound_state *);
158 static void	rfs4_op_create(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
159 			struct compound_state *);
160 static void	rfs4_op_create_free(nfs_resop4 *resop);
161 static void	rfs4_op_delegreturn(nfs_argop4 *, nfs_resop4 *,
162 				    struct svc_req *, struct compound_state *);
163 static void	rfs4_op_getattr(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
164 			struct compound_state *);
165 static void	rfs4_op_getattr_free(nfs_resop4 *);
166 static void	rfs4_op_getfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
167 			struct compound_state *);
168 static void	rfs4_op_getfh_free(nfs_resop4 *);
169 static void	rfs4_op_illegal(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
170 			struct compound_state *);
171 static void	rfs4_op_link(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
172 			struct compound_state *);
173 static void	rfs4_op_lock(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
174 			struct compound_state *);
175 static void	lock_denied_free(nfs_resop4 *);
176 static void	rfs4_op_locku(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
177 			struct compound_state *);
178 static void	rfs4_op_lockt(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
179 			struct compound_state *);
180 static void	rfs4_op_lookup(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
181 			struct compound_state *);
182 static void	rfs4_op_lookupp(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
183 			struct compound_state *);
184 static void	rfs4_op_openattr(nfs_argop4 *argop, nfs_resop4 *resop,
185 				struct svc_req *req, struct compound_state *cs);
186 static void	rfs4_op_nverify(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
187 			struct compound_state *);
188 static void	rfs4_op_open(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
189 			struct compound_state *);
190 static void	rfs4_op_open_confirm(nfs_argop4 *, nfs_resop4 *,
191 			struct svc_req *, struct compound_state *);
192 static void	rfs4_op_open_downgrade(nfs_argop4 *, nfs_resop4 *,
193 			struct svc_req *, struct compound_state *);
194 static void	rfs4_op_putfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
195 			struct compound_state *);
196 static void	rfs4_op_putpubfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
197 			struct compound_state *);
198 static void	rfs4_op_putrootfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
199 			struct compound_state *);
200 static void	rfs4_op_read(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
201 			struct compound_state *);
202 static void	rfs4_op_read_free(nfs_resop4 *);
203 static void	rfs4_op_readdir_free(nfs_resop4 *resop);
204 static void	rfs4_op_readlink(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
205 			struct compound_state *);
206 static void	rfs4_op_readlink_free(nfs_resop4 *);
207 static void	rfs4_op_release_lockowner(nfs_argop4 *, nfs_resop4 *,
208 			struct svc_req *, struct compound_state *);
209 static void	rfs4_op_remove(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
210 			struct compound_state *);
211 static void	rfs4_op_rename(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
212 			struct compound_state *);
213 static void	rfs4_op_renew(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
214 			struct compound_state *);
215 static void	rfs4_op_restorefh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
216 			struct compound_state *);
217 static void	rfs4_op_savefh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
218 			struct compound_state *);
219 static void	rfs4_op_setattr(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
220 			struct compound_state *);
221 static void	rfs4_op_verify(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
222 			struct compound_state *);
223 static void	rfs4_op_write(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
224 			struct compound_state *);
225 static void	rfs4_op_setclientid(nfs_argop4 *, nfs_resop4 *,
226 			struct svc_req *, struct compound_state *);
227 static void	rfs4_op_setclientid_confirm(nfs_argop4 *, nfs_resop4 *,
228 			struct svc_req *req, struct compound_state *);
229 static void	rfs4_op_secinfo(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
230 			struct compound_state *);
231 static void	rfs4_op_secinfo_free(nfs_resop4 *);
232 
233 static nfsstat4 check_open_access(uint32_t,
234 				struct compound_state *, struct svc_req *);
235 nfsstat4 rfs4_client_sysid(rfs4_client_t *, sysid_t *);
236 static int	vop_shrlock(vnode_t *, int, struct shrlock *, int);
237 static int 	rfs4_shrlock(rfs4_state_t *, int);
238 static int	rfs4_share(rfs4_state_t *);
239 void rfs4_ss_clid(rfs4_client_t *, struct svc_req *);
240 
241 /*
242  * translation table for attrs
243  */
244 struct nfs4_ntov_table {
245 	union nfs4_attr_u *na;
246 	uint8_t amap[NFS4_MAXNUM_ATTRS];
247 	int attrcnt;
248 	bool_t vfsstat;
249 };
250 
251 static void	nfs4_ntov_table_init(struct nfs4_ntov_table *ntovp);
252 static void	nfs4_ntov_table_free(struct nfs4_ntov_table *ntovp,
253 				    struct nfs4_svgetit_arg *sargp);
254 
255 static nfsstat4	do_rfs4_set_attrs(bitmap4 *resp, fattr4 *fattrp,
256 		    struct compound_state *cs, struct nfs4_svgetit_arg *sargp,
257 		    struct nfs4_ntov_table *ntovp, nfs4_attr_cmd_t cmd);
258 
259 fem_t	*deleg_rdops;
260 fem_t	*deleg_wrops;
261 
262 rfs4_servinst_t	*rfs4_cur_servinst = NULL;	/* current server instance */
263 kmutex_t	rfs4_servinst_lock;		/* protects linked list */
264 int		rfs4_seen_first_compound;	/* set first time we see one */
265 
266 #ifdef DEBUG
267 int	rfs4_servinst_debug = 0;
268 #endif
269 
270 /*
271  * NFS4 op dispatch table
272  */
273 
274 struct rfsv4disp {
275 	void	(*dis_proc)();		/* proc to call */
276 	void	(*dis_resfree)();	/* frees space allocated by proc */
277 	int	dis_flags;		/* RPC_IDEMPOTENT, etc... */
278 };
279 
280 static struct rfsv4disp rfsv4disptab[] = {
281 	/*
282 	 * NFS VERSION 4
283 	 */
284 
285 	/* RFS_NULL = 0 */
286 	{rfs4_op_illegal, nullfree, 0},
287 
288 	/* UNUSED = 1 */
289 	{rfs4_op_illegal, nullfree, 0},
290 
291 	/* UNUSED = 2 */
292 	{rfs4_op_illegal, nullfree, 0},
293 
294 	/* OP_ACCESS = 3 */
295 	{rfs4_op_access, nullfree, RPC_IDEMPOTENT},
296 
297 	/* OP_CLOSE = 4 */
298 	{rfs4_op_close, nullfree, 0},
299 
300 	/* OP_COMMIT = 5 */
301 	{rfs4_op_commit, nullfree, RPC_IDEMPOTENT},
302 
303 	/* OP_CREATE = 6 */
304 	{rfs4_op_create, nullfree, 0},
305 
306 	/* OP_DELEGPURGE = 7 */
307 	{rfs4_op_inval, nullfree, 0},
308 
309 	/* OP_DELEGRETURN = 8 */
310 	{rfs4_op_delegreturn, nullfree, 0},
311 
312 	/* OP_GETATTR = 9 */
313 	{rfs4_op_getattr, rfs4_op_getattr_free, RPC_IDEMPOTENT},
314 
315 	/* OP_GETFH = 10 */
316 	{rfs4_op_getfh, rfs4_op_getfh_free, RPC_ALL},
317 
318 	/* OP_LINK = 11 */
319 	{rfs4_op_link, nullfree, 0},
320 
321 	/* OP_LOCK = 12 */
322 	{rfs4_op_lock, lock_denied_free, 0},
323 
324 	/* OP_LOCKT = 13 */
325 	{rfs4_op_lockt, lock_denied_free, 0},
326 
327 	/* OP_LOCKU = 14 */
328 	{rfs4_op_locku, nullfree, 0},
329 
330 	/* OP_LOOKUP = 15 */
331 	{rfs4_op_lookup, nullfree, (RPC_IDEMPOTENT|RPC_PUBLICFH_OK)},
332 
333 	/* OP_LOOKUPP = 16 */
334 	{rfs4_op_lookupp, nullfree, (RPC_IDEMPOTENT|RPC_PUBLICFH_OK)},
335 
336 	/* OP_NVERIFY = 17 */
337 	{rfs4_op_nverify, nullfree, RPC_IDEMPOTENT},
338 
339 	/* OP_OPEN = 18 */
340 	{rfs4_op_open, rfs4_free_reply, 0},
341 
342 	/* OP_OPENATTR = 19 */
343 	{rfs4_op_openattr, nullfree, 0},
344 
345 	/* OP_OPEN_CONFIRM = 20 */
346 	{rfs4_op_open_confirm, nullfree, 0},
347 
348 	/* OP_OPEN_DOWNGRADE = 21 */
349 	{rfs4_op_open_downgrade, nullfree, 0},
350 
351 	/* OP_OPEN_PUTFH = 22 */
352 	{rfs4_op_putfh, nullfree, RPC_ALL},
353 
354 	/* OP_PUTPUBFH = 23 */
355 	{rfs4_op_putpubfh, nullfree, RPC_ALL},
356 
357 	/* OP_PUTROOTFH = 24 */
358 	{rfs4_op_putrootfh, nullfree, RPC_ALL},
359 
360 	/* OP_READ = 25 */
361 	{rfs4_op_read, rfs4_op_read_free, RPC_IDEMPOTENT},
362 
363 	/* OP_READDIR = 26 */
364 	{rfs4_op_readdir, rfs4_op_readdir_free, RPC_IDEMPOTENT},
365 
366 	/* OP_READLINK = 27 */
367 	{rfs4_op_readlink, rfs4_op_readlink_free, RPC_IDEMPOTENT},
368 
369 	/* OP_REMOVE = 28 */
370 	{rfs4_op_remove, nullfree, 0},
371 
372 	/* OP_RENAME = 29 */
373 	{rfs4_op_rename, nullfree, 0},
374 
375 	/* OP_RENEW = 30 */
376 	{rfs4_op_renew, nullfree, 0},
377 
378 	/* OP_RESTOREFH = 31 */
379 	{rfs4_op_restorefh, nullfree, RPC_ALL},
380 
381 	/* OP_SAVEFH = 32 */
382 	{rfs4_op_savefh, nullfree, RPC_ALL},
383 
384 	/* OP_SECINFO = 33 */
385 	{rfs4_op_secinfo, rfs4_op_secinfo_free, 0},
386 
387 	/* OP_SETATTR = 34 */
388 	{rfs4_op_setattr, nullfree, 0},
389 
390 	/* OP_SETCLIENTID = 35 */
391 	{rfs4_op_setclientid, nullfree, 0},
392 
393 	/* OP_SETCLIENTID_CONFIRM = 36 */
394 	{rfs4_op_setclientid_confirm, nullfree, 0},
395 
396 	/* OP_VERIFY = 37 */
397 	{rfs4_op_verify, nullfree, RPC_IDEMPOTENT},
398 
399 	/* OP_WRITE = 38 */
400 	{rfs4_op_write, nullfree, 0},
401 
402 	/* OP_RELEASE_LOCKOWNER = 39 */
403 	{rfs4_op_release_lockowner, nullfree, 0},
404 };
405 
406 static uint_t rfsv4disp_cnt = sizeof (rfsv4disptab) / sizeof (rfsv4disptab[0]);
407 
408 #define	OP_ILLEGAL_IDX (rfsv4disp_cnt)
409 
410 #ifdef DEBUG
411 
412 int rfs4_fillone_debug = 0;
413 int rfs4_shrlock_debug = 0;
414 int rfs4_no_stub_access = 1;
415 int rfs4_rddir_debug = 0;
416 
417 static char *rfs4_op_string[] = {
418 	"rfs4_op_null",
419 	"rfs4_op_1 unused",
420 	"rfs4_op_2 unused",
421 	"rfs4_op_access",
422 	"rfs4_op_close",
423 	"rfs4_op_commit",
424 	"rfs4_op_create",
425 	"rfs4_op_delegpurge",
426 	"rfs4_op_delegreturn",
427 	"rfs4_op_getattr",
428 	"rfs4_op_getfh",
429 	"rfs4_op_link",
430 	"rfs4_op_lock",
431 	"rfs4_op_lockt",
432 	"rfs4_op_locku",
433 	"rfs4_op_lookup",
434 	"rfs4_op_lookupp",
435 	"rfs4_op_nverify",
436 	"rfs4_op_open",
437 	"rfs4_op_openattr",
438 	"rfs4_op_open_confirm",
439 	"rfs4_op_open_downgrade",
440 	"rfs4_op_putfh",
441 	"rfs4_op_putpubfh",
442 	"rfs4_op_putrootfh",
443 	"rfs4_op_read",
444 	"rfs4_op_readdir",
445 	"rfs4_op_readlink",
446 	"rfs4_op_remove",
447 	"rfs4_op_rename",
448 	"rfs4_op_renew",
449 	"rfs4_op_restorefh",
450 	"rfs4_op_savefh",
451 	"rfs4_op_secinfo",
452 	"rfs4_op_setattr",
453 	"rfs4_op_setclientid",
454 	"rfs4_op_setclient_confirm",
455 	"rfs4_op_verify",
456 	"rfs4_op_write",
457 	"rfs4_op_release_lockowner",
458 	"rfs4_op_illegal"
459 };
460 #endif
461 
462 void rfs4_ss_chkclid(rfs4_client_t *);
463 
464 #ifdef	nextdp
465 #undef nextdp
466 #endif
467 #define	nextdp(dp)	((struct dirent64 *)((char *)(dp) + (dp)->d_reclen))
468 
469 static const fs_operation_def_t nfs4_rd_deleg_tmpl[] = {
470 	VOPNAME_OPEN, deleg_rdopen,
471 	VOPNAME_WRITE, deleg_write,
472 	VOPNAME_SETATTR, deleg_setattr,
473 	VOPNAME_RWLOCK, deleg_rd_rwlock,
474 	VOPNAME_SPACE, deleg_space,
475 	VOPNAME_SETSECATTR, deleg_setsecattr,
476 	VOPNAME_VNEVENT, deleg_vnevent,
477 	NULL, NULL
478 };
479 static const fs_operation_def_t nfs4_wr_deleg_tmpl[] = {
480 	VOPNAME_OPEN, deleg_wropen,
481 	VOPNAME_READ, deleg_read,
482 	VOPNAME_WRITE, deleg_write,
483 	VOPNAME_SETATTR, deleg_setattr,
484 	VOPNAME_RWLOCK, deleg_wr_rwlock,
485 	VOPNAME_SPACE, deleg_space,
486 	VOPNAME_SETSECATTR, deleg_setsecattr,
487 	VOPNAME_VNEVENT, deleg_vnevent,
488 	NULL, NULL
489 };
490 
491 int
492 rfs4_srvrinit(void)
493 {
494 	timespec32_t verf;
495 	int error;
496 	extern void rfs4_attr_init();
497 	extern krwlock_t rfs4_deleg_policy_lock;
498 
499 	/*
500 	 * The following algorithm attempts to find a unique verifier
501 	 * to be used as the write verifier returned from the server
502 	 * to the client.  It is important that this verifier change
503 	 * whenever the server reboots.  Of secondary importance, it
504 	 * is important for the verifier to be unique between two
505 	 * different servers.
506 	 *
507 	 * Thus, an attempt is made to use the system hostid and the
508 	 * current time in seconds when the nfssrv kernel module is
509 	 * loaded.  It is assumed that an NFS server will not be able
510 	 * to boot and then to reboot in less than a second.  If the
511 	 * hostid has not been set, then the current high resolution
512 	 * time is used.  This will ensure different verifiers each
513 	 * time the server reboots and minimize the chances that two
514 	 * different servers will have the same verifier.
515 	 * XXX - this is broken on LP64 kernels.
516 	 */
517 	verf.tv_sec = (time_t)nfs_atoi(hw_serial);
518 	if (verf.tv_sec != 0) {
519 		verf.tv_nsec = gethrestime_sec();
520 	} else {
521 		timespec_t tverf;
522 
523 		gethrestime(&tverf);
524 		verf.tv_sec = (time_t)tverf.tv_sec;
525 		verf.tv_nsec = tverf.tv_nsec;
526 	}
527 
528 	Write4verf = *(uint64_t *)&verf;
529 
530 	rfs4_attr_init();
531 	mutex_init(&rfs4_deleg_lock, NULL, MUTEX_DEFAULT, NULL);
532 
533 	/* Used to manage create/destroy of server state */
534 	mutex_init(&rfs4_state_lock, NULL, MUTEX_DEFAULT, NULL);
535 
536 	/* Used to manage access to server instance linked list */
537 	mutex_init(&rfs4_servinst_lock, NULL, MUTEX_DEFAULT, NULL);
538 
539 	/* Used to manage access to rfs4_deleg_policy */
540 	rw_init(&rfs4_deleg_policy_lock, NULL, RW_DEFAULT, NULL);
541 
542 	error = fem_create("deleg_rdops", nfs4_rd_deleg_tmpl, &deleg_rdops);
543 	if (error != 0) {
544 		rfs4_disable_delegation();
545 	} else {
546 		error = fem_create("deleg_wrops", nfs4_wr_deleg_tmpl,
547 				&deleg_wrops);
548 		if (error != 0) {
549 			rfs4_disable_delegation();
550 			fem_free(deleg_rdops);
551 		}
552 	}
553 
554 	nfs4_srv_caller_id = fs_new_caller_id();
555 
556 	lockt_sysid = lm_alloc_sysidt();
557 
558 	return (0);
559 }
560 
561 void
562 rfs4_srvrfini(void)
563 {
564 	extern krwlock_t rfs4_deleg_policy_lock;
565 
566 	if (lockt_sysid != LM_NOSYSID) {
567 		lm_free_sysidt(lockt_sysid);
568 		lockt_sysid = LM_NOSYSID;
569 	}
570 
571 	mutex_destroy(&rfs4_deleg_lock);
572 	mutex_destroy(&rfs4_state_lock);
573 	rw_destroy(&rfs4_deleg_policy_lock);
574 
575 	fem_free(deleg_rdops);
576 	fem_free(deleg_wrops);
577 }
578 
579 void
580 rfs4_init_compound_state(struct compound_state *cs)
581 {
582 	bzero(cs, sizeof (*cs));
583 	cs->cont = TRUE;
584 	cs->access = CS_ACCESS_DENIED;
585 	cs->deleg = FALSE;
586 	cs->mandlock = FALSE;
587 	cs->fh.nfs_fh4_val = cs->fhbuf;
588 }
589 
590 void
591 rfs4_grace_start(rfs4_servinst_t *sip)
592 {
593 	time_t now = gethrestime_sec();
594 
595 	NFS4_DEBUG(rfs4_servinst_debug, (CE_NOTE,
596 	    "rfs4_grace_start: inst %p: 0x%lx", (void *)sip, now));
597 
598 	rw_enter(&sip->rwlock, RW_WRITER);
599 	sip->start_time = now;
600 	sip->grace_period = rfs4_grace_period;
601 	rw_exit(&sip->rwlock);
602 }
603 
604 /*
605  * returns true if the instance's grace period has never been started
606  */
607 int
608 rfs4_servinst_grace_new(rfs4_servinst_t *sip)
609 {
610 	time_t start_time;
611 
612 	rw_enter(&sip->rwlock, RW_READER);
613 	start_time = sip->start_time;
614 	rw_exit(&sip->rwlock);
615 
616 	return (start_time == 0);
617 }
618 
619 /*
620  * Indicates if server instance is within the
621  * grace period.
622  */
623 int
624 rfs4_servinst_in_grace(rfs4_servinst_t *sip)
625 {
626 	time_t grace_expiry;
627 
628 	rw_enter(&sip->rwlock, RW_READER);
629 	grace_expiry = sip->start_time + sip->grace_period;
630 	rw_exit(&sip->rwlock);
631 
632 	return (gethrestime_sec() < grace_expiry);
633 }
634 
635 int
636 rfs4_clnt_in_grace(rfs4_client_t *cp)
637 {
638 	ASSERT(rfs4_dbe_refcnt(cp->dbe) > 0);
639 
640 	return (rfs4_servinst_in_grace(cp->server_instance));
641 }
642 
643 /*
644  * reset all currently active grace periods
645  */
646 void
647 rfs4_grace_reset_all(void)
648 {
649 #ifdef DEBUG
650 	int n = 0;
651 #endif
652 	rfs4_servinst_t *sip;
653 
654 	mutex_enter(&rfs4_servinst_lock);
655 	for (sip = rfs4_cur_servinst; sip != NULL; sip = sip->prev) {
656 		if (rfs4_servinst_in_grace(sip)) {
657 			rfs4_grace_start(sip);
658 #ifdef DEBUG
659 			n++;
660 #endif
661 		}
662 	}
663 	mutex_exit(&rfs4_servinst_lock);
664 
665 	NFS4_DEBUG(rfs4_servinst_debug, (CE_NOTE,
666 	    "rfs4_grace_reset_all: reset %d instances", n));
667 }
668 
669 /*
670  * start any new instances' grace periods
671  */
672 void
673 rfs4_grace_start_new(void)
674 {
675 #ifdef DEBUG
676 	int n = 0;
677 #endif
678 	rfs4_servinst_t *sip;
679 
680 	mutex_enter(&rfs4_servinst_lock);
681 	for (sip = rfs4_cur_servinst; sip != NULL; sip = sip->prev) {
682 		if (rfs4_servinst_grace_new(sip))
683 			rfs4_grace_start(sip);
684 #ifdef DEBUG
685 		n++;
686 #endif
687 	}
688 	mutex_exit(&rfs4_servinst_lock);
689 
690 	NFS4_DEBUG(rfs4_servinst_debug, (CE_NOTE,
691 	    "rfs4_grace_start_new: started %d new instances", n));
692 }
693 
694 /*
695  * Create a new server instance, and make it the currently active instance.
696  * Note that starting the grace period too early will reduce the clients'
697  * recovery window.
698  */
699 void
700 rfs4_servinst_create(int start_grace)
701 {
702 	rfs4_servinst_t *sip;
703 
704 	sip = kmem_alloc(sizeof (rfs4_servinst_t), KM_SLEEP);
705 	rw_init(&sip->rwlock, NULL, RW_DEFAULT, NULL);
706 
707 	sip->start_time = (time_t)0;
708 	sip->grace_period = (time_t)0;
709 	sip->next = NULL;
710 	sip->prev = NULL;
711 
712 	mutex_enter(&rfs4_servinst_lock);
713 	if (rfs4_cur_servinst == NULL) {
714 		NFS4_DEBUG(rfs4_servinst_debug, (CE_NOTE,
715 		    "rfs4_servinst_create: creating first instance"));
716 	} else {
717 		/* add to linked list */
718 		sip->prev = rfs4_cur_servinst;
719 		rfs4_cur_servinst->next = sip;
720 	}
721 	if (start_grace)
722 		rfs4_grace_start(sip);
723 	/* make the new instance "current" */
724 	rfs4_cur_servinst = sip;
725 	mutex_exit(&rfs4_servinst_lock);
726 
727 	NFS4_DEBUG(rfs4_servinst_debug, (CE_NOTE,
728 	    "rfs4_servinst_create: new current instance: %p; start_grace: %d",
729 	    (void *)sip, start_grace));
730 }
731 
732 /*
733  * In future, we might add a rfs4_servinst_destroy(sip) but, for now, destroy
734  * all instances directly.
735  */
736 void
737 rfs4_servinst_destroy_all(void)
738 {
739 	rfs4_servinst_t *sip, *prev, *current;
740 #ifdef DEBUG
741 	int n = 0;
742 #endif
743 
744 	mutex_enter(&rfs4_servinst_lock);
745 	ASSERT(rfs4_cur_servinst != NULL);
746 	current = rfs4_cur_servinst;
747 	rfs4_cur_servinst = NULL;
748 	for (sip = current; sip != NULL; sip = prev) {
749 		prev = sip->prev;
750 		rw_destroy(&sip->rwlock);
751 		kmem_free(sip, sizeof (rfs4_servinst_t));
752 #ifdef DEBUG
753 		n++;
754 #endif
755 	}
756 	mutex_exit(&rfs4_servinst_lock);
757 
758 	NFS4_DEBUG(rfs4_servinst_debug, (CE_NOTE,
759 	    "rfs4_servinst_destroy_all: destroyed %d instances", n));
760 }
761 
762 /*
763  * Assign the current server instance to a client_t.
764  * Should be called with cp->dbe held.
765  */
766 void
767 rfs4_servinst_assign(rfs4_client_t *cp, rfs4_servinst_t *sip)
768 {
769 	ASSERT(rfs4_dbe_refcnt(cp->dbe) > 0);
770 
771 	NFS4_DEBUG(rfs4_servinst_debug, (CE_NOTE,
772 	    "rfs4_servinst_assign: client: %p, old: %p, new: %p", (void *)cp,
773 	    (void *)cp->server_instance, (void *)sip));
774 
775 	/*
776 	 * The lock ensures that if the current instance is in the process
777 	 * of changing, we will see the new one.
778 	 */
779 	mutex_enter(&rfs4_servinst_lock);
780 	cp->server_instance = sip;
781 	mutex_exit(&rfs4_servinst_lock);
782 }
783 
784 rfs4_servinst_t *
785 rfs4_servinst(rfs4_client_t *cp)
786 {
787 	ASSERT(rfs4_dbe_refcnt(cp->dbe) > 0);
788 
789 	return (cp->server_instance);
790 }
791 
792 /* ARGSUSED */
793 static void
794 nullfree(caddr_t resop)
795 {
796 }
797 
798 /*
799  * This is a fall-through for invalid or not implemented (yet) ops
800  */
801 /* ARGSUSED */
802 static void
803 rfs4_op_inval(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
804 	struct compound_state *cs)
805 {
806 	*cs->statusp = *((nfsstat4 *)&(resop)->nfs_resop4_u) = NFS4ERR_INVAL;
807 }
808 
809 /*
810  * Check if the security flavor, nfsnum, is in the flavor_list.
811  */
812 bool_t
813 in_flavor_list(int nfsnum, int *flavor_list, int count)
814 {
815 	int i;
816 
817 	for (i = 0; i < count; i++) {
818 		if (nfsnum == flavor_list[i])
819 			return (TRUE);
820 	}
821 	return (FALSE);
822 }
823 
824 /*
825  * Used by rfs4_op_secinfo to get the security information from the
826  * export structure associated with the component.
827  */
828 /* ARGSUSED */
829 static nfsstat4
830 do_rfs4_op_secinfo(struct compound_state *cs, char *nm, SECINFO4res *resp)
831 {
832 	int error, different_export = 0;
833 	vnode_t *dvp, *vp, *tvp;
834 	struct exportinfo *exi = NULL;
835 	fid_t fid;
836 	uint_t count, i;
837 	secinfo4 *resok_val;
838 	struct secinfo *secp;
839 	bool_t did_traverse;
840 	int dotdot, walk;
841 
842 	dvp = cs->vp;
843 	dotdot = (nm[0] == '.' && nm[1] == '.' && nm[2] == '\0');
844 
845 	/*
846 	 * If dotdotting, then need to check whether it's above the
847 	 * root of a filesystem, or above an export point.
848 	 */
849 	if (dotdot) {
850 
851 		/*
852 		 * If dotdotting at the root of a filesystem, then
853 		 * need to traverse back to the mounted-on filesystem
854 		 * and do the dotdot lookup there.
855 		 */
856 		if (cs->vp->v_flag & VROOT) {
857 
858 			/*
859 			 * If at the system root, then can
860 			 * go up no further.
861 			 */
862 			if (VN_CMP(dvp, rootdir))
863 				return (puterrno4(ENOENT));
864 
865 			/*
866 			 * Traverse back to the mounted-on filesystem
867 			 */
868 			dvp = untraverse(cs->vp);
869 
870 			/*
871 			 * Set the different_export flag so we remember
872 			 * to pick up a new exportinfo entry for
873 			 * this new filesystem.
874 			 */
875 			different_export = 1;
876 		} else {
877 
878 			/*
879 			 * If dotdotting above an export point then set
880 			 * the different_export to get new export info.
881 			 */
882 			different_export = nfs_exported(cs->exi, cs->vp);
883 		}
884 	}
885 
886 	/*
887 	 * Get the vnode for the component "nm".
888 	 */
889 	error = VOP_LOOKUP(dvp, nm, &vp, NULL, 0, NULL, cs->cr);
890 	if (error)
891 		return (puterrno4(error));
892 
893 	/*
894 	 * If the vnode is in a pseudo filesystem, or if the security flavor
895 	 * used in the request is valid but not an explicitly shared flavor,
896 	 * or the access bit indicates that this is a limited access,
897 	 * check whether this vnode is visible.
898 	 */
899 	if (!different_export &&
900 	    (PSEUDO(cs->exi) || ! is_exported_sec(cs->nfsflavor, cs->exi) ||
901 	    cs->access & CS_ACCESS_LIMITED)) {
902 		if (! nfs_visible(cs->exi, vp, &different_export)) {
903 			VN_RELE(vp);
904 			return (puterrno4(ENOENT));
905 		}
906 	}
907 
908 	/*
909 	 * If it's a mountpoint, then traverse it.
910 	 */
911 	if (vn_ismntpt(vp)) {
912 		tvp = vp;
913 		if ((error = traverse(&tvp)) != 0) {
914 			VN_RELE(vp);
915 			return (puterrno4(error));
916 		}
917 		/* remember that we had to traverse mountpoint */
918 		did_traverse = TRUE;
919 		vp = tvp;
920 		different_export = 1;
921 	} else if (vp->v_vfsp != dvp->v_vfsp) {
922 		/*
923 		 * If vp isn't a mountpoint and the vfs ptrs aren't the same,
924 		 * then vp is probably an LOFS object.  We don't need the
925 		 * realvp, we just need to know that we might have crossed
926 		 * a server fs boundary and need to call checkexport4.
927 		 * (LOFS lookup hides server fs mountpoints, and actually calls
928 		 * traverse)
929 		 */
930 		different_export = 1;
931 		did_traverse = FALSE;
932 	}
933 
934 	/*
935 	 * Get the export information for it.
936 	 */
937 	if (different_export) {
938 
939 		bzero(&fid, sizeof (fid));
940 		fid.fid_len = MAXFIDSZ;
941 		error = vop_fid_pseudo(vp, &fid);
942 		if (error) {
943 			VN_RELE(vp);
944 			return (puterrno4(error));
945 		}
946 
947 		if (dotdot)
948 			exi = nfs_vptoexi(NULL, vp, cs->cr, &walk, NULL, TRUE);
949 		else
950 			exi = checkexport4(&vp->v_vfsp->vfs_fsid, &fid, vp);
951 
952 		if (exi == NULL) {
953 			if (did_traverse == TRUE) {
954 				/*
955 				 * If this vnode is a mounted-on vnode,
956 				 * but the mounted-on file system is not
957 				 * exported, send back the secinfo for
958 				 * the exported node that the mounted-on
959 				 * vnode lives in.
960 				 */
961 				exi = cs->exi;
962 			} else {
963 				VN_RELE(vp);
964 				return (puterrno4(EACCES));
965 			}
966 		}
967 	} else {
968 		exi = cs->exi;
969 	}
970 	ASSERT(exi != NULL);
971 
972 
973 	/*
974 	 * Create the secinfo result based on the security information
975 	 * from the exportinfo structure (exi).
976 	 *
977 	 * Return all flavors for a pseudo node.
978 	 * For a real export node, return the flavor that the client
979 	 * has access with.
980 	 */
981 	ASSERT(RW_LOCK_HELD(&exported_lock));
982 	if (PSEUDO(exi)) {
983 		count = exi->exi_export.ex_seccnt; /* total sec count */
984 		resok_val = kmem_alloc(count * sizeof (secinfo4), KM_SLEEP);
985 		secp = exi->exi_export.ex_secinfo;
986 
987 		for (i = 0; i < count; i++) {
988 		    resok_val[i].flavor = secp[i].s_secinfo.sc_rpcnum;
989 		    if (resok_val[i].flavor == RPCSEC_GSS) {
990 			rpcsec_gss_info *info;
991 
992 			info = &resok_val[i].flavor_info;
993 			info->qop = secp[i].s_secinfo.sc_qop;
994 			info->service =
995 				(rpc_gss_svc_t)secp[i].s_secinfo.sc_service;
996 
997 			/* get oid opaque data */
998 			info->oid.sec_oid4_len =
999 				secp[i].s_secinfo.sc_gss_mech_type->length;
1000 			info->oid.sec_oid4_val =
1001 				kmem_alloc(
1002 				    secp[i].s_secinfo.sc_gss_mech_type->length,
1003 				    KM_SLEEP);
1004 			bcopy(secp[i].s_secinfo.sc_gss_mech_type->elements,
1005 				info->oid.sec_oid4_val, info->oid.sec_oid4_len);
1006 		    }
1007 		}
1008 		resp->SECINFO4resok_len = count;
1009 		resp->SECINFO4resok_val = resok_val;
1010 	} else {
1011 		int ret_cnt = 0, k = 0;
1012 		int *flavor_list;
1013 
1014 		count = exi->exi_export.ex_seccnt; /* total sec count */
1015 		secp = exi->exi_export.ex_secinfo;
1016 
1017 		flavor_list = kmem_alloc(count * sizeof (int), KM_SLEEP);
1018 		/* find out which flavors to return */
1019 		for (i = 0; i < count; i ++) {
1020 			int access, flavor, perm;
1021 
1022 			flavor = secp[i].s_secinfo.sc_nfsnum;
1023 			perm = secp[i].s_flags;
1024 
1025 			access = nfsauth4_secinfo_access(exi, cs->req,
1026 						flavor, perm);
1027 
1028 			if (! (access & NFSAUTH_DENIED) &&
1029 			    ! (access & NFSAUTH_WRONGSEC)) {
1030 				flavor_list[ret_cnt] = flavor;
1031 				ret_cnt++;
1032 			}
1033 		}
1034 
1035 		/* Create the returning SECINFO value */
1036 		resok_val = kmem_alloc(ret_cnt * sizeof (secinfo4), KM_SLEEP);
1037 
1038 		for (i = 0; i < count; i++) {
1039 		/* If the flavor is in the flavor list, fill in resok_val. */
1040 		    if (in_flavor_list(secp[i].s_secinfo.sc_nfsnum,
1041 						flavor_list, ret_cnt)) {
1042 			resok_val[k].flavor = secp[i].s_secinfo.sc_rpcnum;
1043 			if (resok_val[k].flavor == RPCSEC_GSS) {
1044 			    rpcsec_gss_info *info;
1045 
1046 			    info = &resok_val[k].flavor_info;
1047 			    info->qop = secp[i].s_secinfo.sc_qop;
1048 			    info->service =
1049 				(rpc_gss_svc_t)secp[i].s_secinfo.sc_service;
1050 
1051 			    /* get oid opaque data */
1052 			    info->oid.sec_oid4_len =
1053 				secp[i].s_secinfo.sc_gss_mech_type->length;
1054 			    info->oid.sec_oid4_val =
1055 				kmem_alloc(
1056 				    secp[i].s_secinfo.sc_gss_mech_type->length,
1057 				    KM_SLEEP);
1058 			    bcopy(secp[i].s_secinfo.sc_gss_mech_type->elements,
1059 				info->oid.sec_oid4_val, info->oid.sec_oid4_len);
1060 			}
1061 			k++;
1062 		    }
1063 		    if (k >= ret_cnt)
1064 			break;
1065 		}
1066 		resp->SECINFO4resok_len = ret_cnt;
1067 		resp->SECINFO4resok_val = resok_val;
1068 		kmem_free(flavor_list, count * sizeof (int));
1069 	}
1070 
1071 	VN_RELE(vp);
1072 	return (NFS4_OK);
1073 }
1074 
1075 /*
1076  * SECINFO (Operation 33): Obtain required security information on
1077  * the component name in the format of (security-mechanism-oid, qop, service)
1078  * triplets.
1079  */
1080 /* ARGSUSED */
1081 static void
1082 rfs4_op_secinfo(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1083 	struct compound_state *cs)
1084 {
1085 	SECINFO4res *resp = &resop->nfs_resop4_u.opsecinfo;
1086 	utf8string *utfnm = &argop->nfs_argop4_u.opsecinfo.name;
1087 	uint_t len;
1088 	char *nm;
1089 
1090 	/*
1091 	 * Current file handle (cfh) should have been set before getting
1092 	 * into this function. If not, return error.
1093 	 */
1094 	if (cs->vp == NULL) {
1095 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
1096 		return;
1097 	}
1098 
1099 	if (cs->vp->v_type != VDIR) {
1100 		*cs->statusp = resp->status = NFS4ERR_NOTDIR;
1101 		return;
1102 	}
1103 
1104 	/*
1105 	 * Verify the component name. If failed, error out, but
1106 	 * do not error out if the component name is a "..".
1107 	 * SECINFO will return its parents secinfo data for SECINFO "..".
1108 	 */
1109 	if (!utf8_dir_verify(utfnm)) {
1110 		if (utfnm->utf8string_len != 2 ||
1111 				utfnm->utf8string_val[0] != '.' ||
1112 				utfnm->utf8string_val[1] != '.') {
1113 			*cs->statusp = resp->status = NFS4ERR_INVAL;
1114 			return;
1115 		}
1116 	}
1117 
1118 	nm = utf8_to_str(utfnm, &len, NULL);
1119 	if (nm == NULL) {
1120 		*cs->statusp = resp->status = NFS4ERR_INVAL;
1121 		return;
1122 	}
1123 
1124 	if (len > MAXNAMELEN) {
1125 		*cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
1126 		kmem_free(nm, len);
1127 		return;
1128 	}
1129 
1130 	*cs->statusp = resp->status = do_rfs4_op_secinfo(cs, nm, resp);
1131 
1132 	kmem_free(nm, len);
1133 }
1134 
1135 /*
1136  * Free SECINFO result.
1137  */
1138 /* ARGSUSED */
1139 static void
1140 rfs4_op_secinfo_free(nfs_resop4 *resop)
1141 {
1142 	SECINFO4res *resp = &resop->nfs_resop4_u.opsecinfo;
1143 	int count, i;
1144 	secinfo4 *resok_val;
1145 
1146 	/* If this is not an Ok result, nothing to free. */
1147 	if (resp->status != NFS4_OK) {
1148 		return;
1149 	}
1150 
1151 	count = resp->SECINFO4resok_len;
1152 	resok_val = resp->SECINFO4resok_val;
1153 
1154 	for (i = 0; i < count; i++) {
1155 	    if (resok_val[i].flavor == RPCSEC_GSS) {
1156 		rpcsec_gss_info *info;
1157 
1158 		info = &resok_val[i].flavor_info;
1159 		kmem_free(info->oid.sec_oid4_val, info->oid.sec_oid4_len);
1160 	    }
1161 	}
1162 	kmem_free(resok_val, count * sizeof (secinfo4));
1163 	resp->SECINFO4resok_len = 0;
1164 	resp->SECINFO4resok_val = NULL;
1165 }
1166 
1167 /* ARGSUSED */
1168 static void
1169 rfs4_op_access(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1170 	struct compound_state *cs)
1171 {
1172 	ACCESS4args *args = &argop->nfs_argop4_u.opaccess;
1173 	ACCESS4res *resp = &resop->nfs_resop4_u.opaccess;
1174 	int error;
1175 	vnode_t *vp;
1176 	struct vattr va;
1177 	int checkwriteperm;
1178 	cred_t *cr = cs->cr;
1179 
1180 #if 0	/* XXX allow access even if !cs->access. Eventually only pseudo fs */
1181 	if (cs->access == CS_ACCESS_DENIED) {
1182 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
1183 		return;
1184 	}
1185 #endif
1186 	if (cs->vp == NULL) {
1187 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
1188 		return;
1189 	}
1190 
1191 	ASSERT(cr != NULL);
1192 
1193 	vp = cs->vp;
1194 
1195 	/*
1196 	 * If the file system is exported read only, it is not appropriate
1197 	 * to check write permissions for regular files and directories.
1198 	 * Special files are interpreted by the client, so the underlying
1199 	 * permissions are sent back to the client for interpretation.
1200 	 */
1201 	if (rdonly4(cs->exi, cs->vp, req) &&
1202 		(vp->v_type == VREG || vp->v_type == VDIR))
1203 		checkwriteperm = 0;
1204 	else
1205 		checkwriteperm = 1;
1206 
1207 	/*
1208 	 * XXX
1209 	 * We need the mode so that we can correctly determine access
1210 	 * permissions relative to a mandatory lock file.  Access to
1211 	 * mandatory lock files is denied on the server, so it might
1212 	 * as well be reflected to the server during the open.
1213 	 */
1214 	va.va_mask = AT_MODE;
1215 	error = VOP_GETATTR(vp, &va, 0, cr);
1216 	if (error) {
1217 		*cs->statusp = resp->status = puterrno4(error);
1218 		return;
1219 	}
1220 
1221 	resp->access = 0;
1222 	resp->supported = 0;
1223 
1224 	if (args->access & ACCESS4_READ) {
1225 		error = VOP_ACCESS(vp, VREAD, 0, cr);
1226 		if (!error && !MANDLOCK(vp, va.va_mode))
1227 			resp->access |= ACCESS4_READ;
1228 		resp->supported |= ACCESS4_READ;
1229 	}
1230 	if ((args->access & ACCESS4_LOOKUP) && vp->v_type == VDIR) {
1231 		error = VOP_ACCESS(vp, VEXEC, 0, cr);
1232 		if (!error)
1233 			resp->access |= ACCESS4_LOOKUP;
1234 		resp->supported |= ACCESS4_LOOKUP;
1235 	}
1236 	if (checkwriteperm &&
1237 	    (args->access & (ACCESS4_MODIFY|ACCESS4_EXTEND))) {
1238 		error = VOP_ACCESS(vp, VWRITE, 0, cr);
1239 		if (!error && !MANDLOCK(vp, va.va_mode))
1240 			resp->access |=
1241 			    (args->access & (ACCESS4_MODIFY|ACCESS4_EXTEND));
1242 		resp->supported |= (ACCESS4_MODIFY|ACCESS4_EXTEND);
1243 	}
1244 
1245 	if (checkwriteperm &&
1246 	    (args->access & ACCESS4_DELETE) && vp->v_type == VDIR) {
1247 		error = VOP_ACCESS(vp, VWRITE, 0, cr);
1248 		if (!error)
1249 			resp->access |= ACCESS4_DELETE;
1250 		resp->supported |= ACCESS4_DELETE;
1251 	}
1252 	if (args->access & ACCESS4_EXECUTE && vp->v_type != VDIR) {
1253 		error = VOP_ACCESS(vp, VEXEC, 0, cr);
1254 		if (!error && !MANDLOCK(vp, va.va_mode))
1255 			resp->access |= ACCESS4_EXECUTE;
1256 		resp->supported |= ACCESS4_EXECUTE;
1257 	}
1258 
1259 	*cs->statusp = resp->status = NFS4_OK;
1260 }
1261 
1262 /* ARGSUSED */
1263 static void
1264 rfs4_op_commit(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1265 	struct compound_state *cs)
1266 {
1267 	COMMIT4args *args = &argop->nfs_argop4_u.opcommit;
1268 	COMMIT4res *resp = &resop->nfs_resop4_u.opcommit;
1269 	int error;
1270 	vnode_t *vp = cs->vp;
1271 	cred_t *cr = cs->cr;
1272 	vattr_t va;
1273 
1274 	if (vp == NULL) {
1275 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
1276 		return;
1277 	}
1278 	if (cs->access == CS_ACCESS_DENIED) {
1279 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
1280 		return;
1281 	}
1282 
1283 	if (args->offset + args->count < args->offset) {
1284 		*cs->statusp = resp->status = NFS4ERR_INVAL;
1285 		return;
1286 	}
1287 
1288 	va.va_mask = AT_UID;
1289 	error = VOP_GETATTR(vp, &va, 0, cr);
1290 
1291 	/*
1292 	 * If we can't get the attributes, then we can't do the
1293 	 * right access checking.  So, we'll fail the request.
1294 	 */
1295 	if (error) {
1296 		*cs->statusp = resp->status = puterrno4(error);
1297 		return;
1298 	}
1299 	if (rdonly4(cs->exi, cs->vp, req)) {
1300 		*cs->statusp = resp->status = NFS4ERR_ROFS;
1301 		return;
1302 	}
1303 
1304 	if (vp->v_type != VREG) {
1305 		if (vp->v_type == VDIR)
1306 			resp->status = NFS4ERR_ISDIR;
1307 		else
1308 			resp->status = NFS4ERR_INVAL;
1309 		*cs->statusp = resp->status;
1310 		return;
1311 	}
1312 
1313 	if (crgetuid(cr) != va.va_uid &&
1314 	    (error = VOP_ACCESS(vp, VWRITE, 0, cs->cr))) {
1315 		*cs->statusp = resp->status = puterrno4(error);
1316 		return;
1317 	}
1318 
1319 	error = VOP_PUTPAGE(vp, args->offset, args->count, 0, cr);
1320 	if (!error)
1321 		error = VOP_FSYNC(vp, FNODSYNC, cr);
1322 
1323 	if (error) {
1324 		*cs->statusp = resp->status = puterrno4(error);
1325 		return;
1326 	}
1327 
1328 	*cs->statusp = resp->status = NFS4_OK;
1329 	resp->writeverf = Write4verf;
1330 }
1331 
1332 /*
1333  * rfs4_op_mknod is called from rfs4_op_create after all initial verification
1334  * was completed. It does the nfsv4 create for special files.
1335  */
1336 /* ARGSUSED */
1337 static vnode_t *
1338 do_rfs4_op_mknod(CREATE4args *args, CREATE4res *resp, struct svc_req *req,
1339 	struct compound_state *cs, vattr_t *vap, char *nm)
1340 {
1341 	int error;
1342 	cred_t *cr = cs->cr;
1343 	vnode_t *dvp = cs->vp;
1344 	vnode_t *vp = NULL;
1345 	int mode;
1346 	enum vcexcl excl;
1347 
1348 	switch (args->type) {
1349 	case NF4CHR:
1350 	case NF4BLK:
1351 		if (secpolicy_sys_devices(cr) != 0) {
1352 			*cs->statusp = resp->status = NFS4ERR_PERM;
1353 			return (NULL);
1354 		}
1355 		if (args->type == NF4CHR)
1356 			vap->va_type = VCHR;
1357 		else
1358 			vap->va_type = VBLK;
1359 		vap->va_rdev = makedevice(args->ftype4_u.devdata.specdata1,
1360 					args->ftype4_u.devdata.specdata2);
1361 		vap->va_mask |= AT_RDEV;
1362 		break;
1363 	case NF4SOCK:
1364 		vap->va_type = VSOCK;
1365 		break;
1366 	case NF4FIFO:
1367 		vap->va_type = VFIFO;
1368 		break;
1369 	default:
1370 		*cs->statusp = resp->status = NFS4ERR_BADTYPE;
1371 		return (NULL);
1372 	}
1373 
1374 	/*
1375 	 * Must specify the mode.
1376 	 */
1377 	if (!(vap->va_mask & AT_MODE)) {
1378 		*cs->statusp = resp->status = NFS4ERR_INVAL;
1379 		return (NULL);
1380 	}
1381 
1382 	excl = EXCL;
1383 
1384 	mode = 0;
1385 
1386 	error = VOP_CREATE(dvp, nm, vap, excl, mode, &vp, cr, 0);
1387 	if (error) {
1388 		*cs->statusp = resp->status = puterrno4(error);
1389 		return (NULL);
1390 	}
1391 	return (vp);
1392 }
1393 
1394 /*
1395  * nfsv4 create is used to create non-regular files. For regular files,
1396  * use nfsv4 open.
1397  */
1398 /* ARGSUSED */
1399 static void
1400 rfs4_op_create(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1401 	struct compound_state *cs)
1402 {
1403 	CREATE4args *args = &argop->nfs_argop4_u.opcreate;
1404 	CREATE4res *resp = &resop->nfs_resop4_u.opcreate;
1405 	int error;
1406 	struct vattr bva, iva, iva2, ava, *vap;
1407 	cred_t *cr = cs->cr;
1408 	vnode_t *dvp = cs->vp;
1409 	vnode_t *vp = NULL;
1410 	char *nm, *lnm;
1411 	uint_t len, llen;
1412 	int syncval = 0;
1413 	struct nfs4_svgetit_arg sarg;
1414 	struct nfs4_ntov_table ntov;
1415 	struct statvfs64 sb;
1416 	nfsstat4 status;
1417 
1418 	resp->attrset = 0;
1419 
1420 	if (dvp == NULL) {
1421 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
1422 		return;
1423 	}
1424 
1425 	/*
1426 	 * If there is an unshared filesystem mounted on this vnode,
1427 	 * do not allow to create an object in this directory.
1428 	 */
1429 	if (vn_ismntpt(dvp)) {
1430 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
1431 		return;
1432 	}
1433 
1434 	/* Verify that type is correct */
1435 	switch (args->type) {
1436 	case NF4LNK:
1437 	case NF4BLK:
1438 	case NF4CHR:
1439 	case NF4SOCK:
1440 	case NF4FIFO:
1441 	case NF4DIR:
1442 		break;
1443 	default:
1444 		*cs->statusp = resp->status = NFS4ERR_BADTYPE;
1445 		return;
1446 	};
1447 
1448 	if (cs->access == CS_ACCESS_DENIED) {
1449 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
1450 		return;
1451 	}
1452 	if (dvp->v_type != VDIR) {
1453 		*cs->statusp = resp->status = NFS4ERR_NOTDIR;
1454 		return;
1455 	}
1456 	if (!utf8_dir_verify(&args->objname)) {
1457 		*cs->statusp = resp->status = NFS4ERR_INVAL;
1458 		return;
1459 	}
1460 
1461 	if (rdonly4(cs->exi, cs->vp, req)) {
1462 		*cs->statusp = resp->status = NFS4ERR_ROFS;
1463 		return;
1464 	}
1465 
1466 	/*
1467 	 * Name of newly created object
1468 	 */
1469 	nm = utf8_to_fn(&args->objname, &len, NULL);
1470 	if (nm == NULL) {
1471 		*cs->statusp = resp->status = NFS4ERR_INVAL;
1472 		return;
1473 	}
1474 
1475 	if (len > MAXNAMELEN) {
1476 		*cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
1477 		kmem_free(nm, len);
1478 		return;
1479 	}
1480 
1481 	resp->attrset = 0;
1482 
1483 	sarg.sbp = &sb;
1484 	nfs4_ntov_table_init(&ntov);
1485 
1486 	status = do_rfs4_set_attrs(&resp->attrset,
1487 					&args->createattrs, cs, &sarg,
1488 					&ntov, NFS4ATTR_SETIT);
1489 
1490 	if (sarg.vap->va_mask == 0 && status == NFS4_OK)
1491 		status = NFS4ERR_INVAL;
1492 
1493 	if (status != NFS4_OK) {
1494 		*cs->statusp = resp->status = status;
1495 		kmem_free(nm, len);
1496 		nfs4_ntov_table_free(&ntov, &sarg);
1497 		resp->attrset = 0;
1498 		return;
1499 	}
1500 
1501 	/* Get "before" change value */
1502 	bva.va_mask = AT_CTIME|AT_SEQ;
1503 	error = VOP_GETATTR(dvp, &bva, 0, cr);
1504 	if (error) {
1505 		*cs->statusp = resp->status = puterrno4(error);
1506 		kmem_free(nm, len);
1507 		nfs4_ntov_table_free(&ntov, &sarg);
1508 		resp->attrset = 0;
1509 		return;
1510 	}
1511 	NFS4_SET_FATTR4_CHANGE(resp->cinfo.before, bva.va_ctime)
1512 
1513 	vap = sarg.vap;
1514 
1515 	/*
1516 	 * Set default initial values for attributes when not specified
1517 	 * in createattrs.
1518 	 */
1519 	if ((vap->va_mask & AT_UID) == 0) {
1520 		vap->va_uid = crgetuid(cr);
1521 		vap->va_mask |= AT_UID;
1522 	}
1523 	if ((vap->va_mask & AT_GID) == 0) {
1524 		vap->va_gid = crgetgid(cr);
1525 		vap->va_mask |= AT_GID;
1526 	}
1527 
1528 	vap->va_mask |= AT_TYPE;
1529 	switch (args->type) {
1530 	case NF4DIR:
1531 		vap->va_type = VDIR;
1532 		if ((vap->va_mask & AT_MODE) == 0) {
1533 			vap->va_mode = 0700;	/* default: owner rwx only */
1534 			vap->va_mask |= AT_MODE;
1535 		}
1536 		error = VOP_MKDIR(dvp, nm, vap, &vp, cr);
1537 		if (error)
1538 			break;
1539 
1540 		/*
1541 		 * Get the initial "after" sequence number, if it fails,
1542 		 * set to zero
1543 		 */
1544 		iva.va_mask = AT_SEQ;
1545 		if (VOP_GETATTR(dvp, &iva, 0, cs->cr))
1546 			iva.va_seq = 0;
1547 		break;
1548 	case NF4LNK:
1549 		vap->va_type = VLNK;
1550 		if ((vap->va_mask & AT_MODE) == 0) {
1551 			vap->va_mode = 0700;	/* default: owner rwx only */
1552 			vap->va_mask |= AT_MODE;
1553 		}
1554 
1555 		/*
1556 		 * symlink names must be treated as data
1557 		 */
1558 		lnm = utf8_to_str(&args->ftype4_u.linkdata, &llen, NULL);
1559 
1560 		if (lnm == NULL) {
1561 			*cs->statusp = resp->status = NFS4ERR_INVAL;
1562 			kmem_free(nm, len);
1563 			nfs4_ntov_table_free(&ntov, &sarg);
1564 			resp->attrset = 0;
1565 			return;
1566 		}
1567 
1568 		if (llen > MAXPATHLEN) {
1569 			*cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
1570 			kmem_free(nm, len);
1571 			kmem_free(lnm, llen);
1572 			nfs4_ntov_table_free(&ntov, &sarg);
1573 			resp->attrset = 0;
1574 			return;
1575 		}
1576 
1577 		error = VOP_SYMLINK(dvp, nm, vap, lnm, cr);
1578 		if (lnm != NULL)
1579 			kmem_free(lnm, llen);
1580 		if (error)
1581 			break;
1582 
1583 		/*
1584 		 * Get the initial "after" sequence number, if it fails,
1585 		 * set to zero
1586 		 */
1587 		iva.va_mask = AT_SEQ;
1588 		if (VOP_GETATTR(dvp, &iva, 0, cs->cr))
1589 			iva.va_seq = 0;
1590 
1591 		error = VOP_LOOKUP(dvp, nm, &vp, NULL, 0, NULL, cr);
1592 		if (error)
1593 			break;
1594 
1595 		/*
1596 		 * va_seq is not safe over VOP calls, check it again
1597 		 * if it has changed zero out iva to force atomic = FALSE.
1598 		 */
1599 		iva2.va_mask = AT_SEQ;
1600 		if (VOP_GETATTR(dvp, &iva2, 0, cs->cr) ||
1601 						iva2.va_seq != iva.va_seq)
1602 			iva.va_seq = 0;
1603 		break;
1604 	default:
1605 		/*
1606 		 * probably a special file.
1607 		 */
1608 		if ((vap->va_mask & AT_MODE) == 0) {
1609 			vap->va_mode = 0600;	/* default: owner rw only */
1610 			vap->va_mask |= AT_MODE;
1611 		}
1612 		syncval = FNODSYNC;
1613 		/*
1614 		 * We know this will only generate one VOP call
1615 		 */
1616 		vp = do_rfs4_op_mknod(args, resp, req, cs, vap, nm);
1617 
1618 		if (vp == NULL) {
1619 			kmem_free(nm, len);
1620 			nfs4_ntov_table_free(&ntov, &sarg);
1621 			resp->attrset = 0;
1622 			return;
1623 		}
1624 
1625 		/*
1626 		 * Get the initial "after" sequence number, if it fails,
1627 		 * set to zero
1628 		 */
1629 		iva.va_mask = AT_SEQ;
1630 		if (VOP_GETATTR(dvp, &iva, 0, cs->cr))
1631 			iva.va_seq = 0;
1632 
1633 		break;
1634 	}
1635 	kmem_free(nm, len);
1636 
1637 	if (error) {
1638 		*cs->statusp = resp->status = puterrno4(error);
1639 	}
1640 
1641 	/*
1642 	 * Force modified data and metadata out to stable storage.
1643 	 */
1644 	(void) VOP_FSYNC(dvp, 0, cr);
1645 
1646 	if (resp->status != NFS4_OK) {
1647 		if (vp != NULL)
1648 			VN_RELE(vp);
1649 		nfs4_ntov_table_free(&ntov, &sarg);
1650 		resp->attrset = 0;
1651 		return;
1652 	}
1653 
1654 	/*
1655 	 * Finish setup of cinfo response, "before" value already set.
1656 	 * Get "after" change value, if it fails, simply return the
1657 	 * before value.
1658 	 */
1659 	ava.va_mask = AT_CTIME|AT_SEQ;
1660 	if (VOP_GETATTR(dvp, &ava, 0, cr)) {
1661 		ava.va_ctime = bva.va_ctime;
1662 		ava.va_seq = 0;
1663 	}
1664 	NFS4_SET_FATTR4_CHANGE(resp->cinfo.after, ava.va_ctime);
1665 
1666 	/*
1667 	 * True verification that object was created with correct
1668 	 * attrs is impossible.  The attrs could have been changed
1669 	 * immediately after object creation.  If attributes did
1670 	 * not verify, the only recourse for the server is to
1671 	 * destroy the object.  Maybe if some attrs (like gid)
1672 	 * are set incorrectly, the object should be destroyed;
1673 	 * however, seems bad as a default policy.  Do we really
1674 	 * want to destroy an object over one of the times not
1675 	 * verifying correctly?  For these reasons, the server
1676 	 * currently sets bits in attrset for createattrs
1677 	 * that were set; however, no verification is done.
1678 	 *
1679 	 * vmask_to_nmask accounts for vattr bits set on create
1680 	 *	[do_rfs4_set_attrs() only sets resp bits for
1681 	 *	 non-vattr/vfs bits.]
1682 	 * Mask off any bits set by default so as not to return
1683 	 * more attrset bits than were requested in createattrs
1684 	 */
1685 	nfs4_vmask_to_nmask(sarg.vap->va_mask, &resp->attrset);
1686 	resp->attrset &= args->createattrs.attrmask;
1687 	nfs4_ntov_table_free(&ntov, &sarg);
1688 
1689 	error = makefh4(&cs->fh, vp, cs->exi);
1690 	if (error) {
1691 		*cs->statusp = resp->status = puterrno4(error);
1692 	}
1693 
1694 	/*
1695 	 * The cinfo.atomic = TRUE only if we got no errors, we have
1696 	 * non-zero va_seq's, and it has incremented by exactly one
1697 	 * during the creation and it didn't change during the VOP_LOOKUP
1698 	 * or VOP_FSYNC.
1699 	 */
1700 	if (!error && bva.va_seq && iva.va_seq && ava.va_seq &&
1701 			iva.va_seq == (bva.va_seq + 1) &&
1702 			iva.va_seq == ava.va_seq)
1703 		resp->cinfo.atomic = TRUE;
1704 	else
1705 		resp->cinfo.atomic = FALSE;
1706 
1707 	(void) VOP_FSYNC(vp, syncval, cr);
1708 
1709 	if (resp->status != NFS4_OK) {
1710 		VN_RELE(vp);
1711 		return;
1712 	}
1713 	if (cs->vp)
1714 		VN_RELE(cs->vp);
1715 
1716 	cs->vp = vp;
1717 	*cs->statusp = resp->status = NFS4_OK;
1718 }
1719 
1720 
1721 /*ARGSUSED*/
1722 static void
1723 rfs4_op_delegreturn(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1724 	struct compound_state *cs)
1725 {
1726 	DELEGRETURN4args *args = &argop->nfs_argop4_u.opdelegreturn;
1727 	DELEGRETURN4res *resp = &resop->nfs_resop4_u.opdelegreturn;
1728 	rfs4_deleg_state_t *dsp;
1729 	nfsstat4 status;
1730 
1731 	status = rfs4_get_deleg_state(&args->deleg_stateid, &dsp);
1732 	resp->status = *cs->statusp = status;
1733 	if (status != NFS4_OK)
1734 		return;
1735 
1736 	/* Ensure specified filehandle matches */
1737 	if (cs->vp != dsp->finfo->vp) {
1738 		resp->status = *cs->statusp = NFS4ERR_BAD_STATEID;
1739 	} else
1740 		rfs4_return_deleg(dsp, FALSE);
1741 
1742 	rfs4_update_lease(dsp->client);
1743 
1744 	rfs4_deleg_state_rele(dsp);
1745 }
1746 
1747 /*
1748  * Check to see if a given "flavor" is an explicitly shared flavor.
1749  * The assumption of this routine is the "flavor" is already a valid
1750  * flavor in the secinfo list of "exi".
1751  *
1752  *	e.g.
1753  *		# share -o sec=flavor1 /export
1754  *		# share -o sec=flavor2 /export/home
1755  *
1756  *		flavor2 is not an explicitly shared flavor for /export,
1757  *		however it is in the secinfo list for /export thru the
1758  *		server namespace setup.
1759  */
1760 int
1761 is_exported_sec(int flavor, struct exportinfo *exi)
1762 {
1763 	int	i;
1764 	struct secinfo *sp;
1765 
1766 	sp = exi->exi_export.ex_secinfo;
1767 	for (i = 0; i < exi->exi_export.ex_seccnt; i++) {
1768 		if (flavor == sp[i].s_secinfo.sc_nfsnum ||
1769 		    sp[i].s_secinfo.sc_nfsnum == AUTH_NONE) {
1770 			return (SEC_REF_EXPORTED(&sp[i]));
1771 		}
1772 	}
1773 
1774 	/* Should not reach this point based on the assumption */
1775 	return (0);
1776 }
1777 
1778 /*
1779  * Check if the security flavor used in the request matches what is
1780  * required at the export point or at the root pseudo node (exi_root).
1781  *
1782  * returns 1 if there's a match or if exported with AUTH_NONE; 0 otherwise.
1783  *
1784  */
1785 static int
1786 secinfo_match_or_authnone(struct compound_state *cs)
1787 {
1788 	int	i;
1789 	struct secinfo *sp;
1790 
1791 	/*
1792 	 * Check cs->nfsflavor (from the request) against
1793 	 * the current export data in cs->exi.
1794 	 */
1795 	sp = cs->exi->exi_export.ex_secinfo;
1796 	for (i = 0; i < cs->exi->exi_export.ex_seccnt; i++) {
1797 		if (cs->nfsflavor == sp[i].s_secinfo.sc_nfsnum ||
1798 		    sp[i].s_secinfo.sc_nfsnum == AUTH_NONE)
1799 			return (1);
1800 	}
1801 
1802 	return (0);
1803 }
1804 
1805 /*
1806  * Check the access authority for the client and return the correct error.
1807  */
1808 nfsstat4
1809 call_checkauth4(struct compound_state *cs, struct svc_req *req)
1810 {
1811 	int	authres;
1812 
1813 	/*
1814 	 * First, check if the security flavor used in the request
1815 	 * are among the flavors set in the server namespace.
1816 	 */
1817 	if (!secinfo_match_or_authnone(cs)) {
1818 		*cs->statusp = NFS4ERR_WRONGSEC;
1819 		return (*cs->statusp);
1820 	}
1821 
1822 	authres = checkauth4(cs, req);
1823 
1824 	if (authres > 0) {
1825 		*cs->statusp = NFS4_OK;
1826 		if (! (cs->access & CS_ACCESS_LIMITED))
1827 			cs->access = CS_ACCESS_OK;
1828 	} else if (authres == 0) {
1829 		*cs->statusp = NFS4ERR_ACCESS;
1830 	} else if (authres == -2) {
1831 		*cs->statusp = NFS4ERR_WRONGSEC;
1832 	} else {
1833 		*cs->statusp = NFS4ERR_DELAY;
1834 	}
1835 	return (*cs->statusp);
1836 }
1837 
1838 /*
1839  * bitmap4_to_attrmask is called by getattr and readdir.
1840  * It sets up the vattr mask and determines whether vfsstat call is needed
1841  * based on the input bitmap.
1842  * Returns nfsv4 status.
1843  */
1844 static nfsstat4
1845 bitmap4_to_attrmask(bitmap4 breq, struct nfs4_svgetit_arg *sargp)
1846 {
1847 	int i;
1848 	uint_t	va_mask;
1849 	struct statvfs64 *sbp = sargp->sbp;
1850 
1851 	sargp->sbp = NULL;
1852 	sargp->flag = 0;
1853 	sargp->rdattr_error = NFS4_OK;
1854 	sargp->mntdfid_set = FALSE;
1855 	if (sargp->cs->vp)
1856 		sargp->xattr = get_fh4_flag(&sargp->cs->fh,
1857 					    FH4_ATTRDIR | FH4_NAMEDATTR);
1858 	else
1859 		sargp->xattr = 0;
1860 
1861 	/*
1862 	 * Set rdattr_error_req to true if return error per
1863 	 * failed entry rather than fail the readdir.
1864 	 */
1865 	if (breq & FATTR4_RDATTR_ERROR_MASK)
1866 		sargp->rdattr_error_req = 1;
1867 	else
1868 		sargp->rdattr_error_req = 0;
1869 
1870 	/*
1871 	 * generate the va_mask
1872 	 * Handle the easy cases first
1873 	 */
1874 	switch (breq) {
1875 	case NFS4_NTOV_ATTR_MASK:
1876 		sargp->vap->va_mask = NFS4_NTOV_ATTR_AT_MASK;
1877 		return (NFS4_OK);
1878 
1879 	case NFS4_FS_ATTR_MASK:
1880 		sargp->vap->va_mask = NFS4_FS_ATTR_AT_MASK;
1881 		sargp->sbp = sbp;
1882 		return (NFS4_OK);
1883 
1884 	case NFS4_NTOV_ATTR_CACHE_MASK:
1885 		sargp->vap->va_mask = NFS4_NTOV_ATTR_CACHE_AT_MASK;
1886 		return (NFS4_OK);
1887 
1888 	case FATTR4_LEASE_TIME_MASK:
1889 		sargp->vap->va_mask = 0;
1890 		return (NFS4_OK);
1891 
1892 	default:
1893 		va_mask = 0;
1894 		for (i = 0; i < nfs4_ntov_map_size; i++) {
1895 			if ((breq & nfs4_ntov_map[i].fbit) &&
1896 							nfs4_ntov_map[i].vbit)
1897 				va_mask |= nfs4_ntov_map[i].vbit;
1898 		}
1899 
1900 		/*
1901 		 * Check is vfsstat is needed
1902 		 */
1903 		if (breq & NFS4_FS_ATTR_MASK)
1904 			sargp->sbp = sbp;
1905 
1906 		sargp->vap->va_mask = va_mask;
1907 		return (NFS4_OK);
1908 	}
1909 	/* NOTREACHED */
1910 }
1911 
1912 /*
1913  * bitmap4_get_sysattrs is called by getattr and readdir.
1914  * It calls both VOP_GETATTR and VFS_STATVFS calls to get the attrs.
1915  * Returns nfsv4 status.
1916  */
1917 static nfsstat4
1918 bitmap4_get_sysattrs(struct nfs4_svgetit_arg *sargp)
1919 {
1920 	int error;
1921 	struct compound_state *cs = sargp->cs;
1922 	vnode_t *vp = cs->vp;
1923 
1924 	if (sargp->sbp != NULL) {
1925 		if (error = VFS_STATVFS(vp->v_vfsp, sargp->sbp)) {
1926 			sargp->sbp = NULL;	/* to identify error */
1927 			return (puterrno4(error));
1928 		}
1929 	}
1930 
1931 	return (rfs4_vop_getattr(vp, sargp->vap, 0, cs->cr));
1932 }
1933 
1934 static void
1935 nfs4_ntov_table_init(struct nfs4_ntov_table *ntovp)
1936 {
1937 	ntovp->na = kmem_zalloc(sizeof (union nfs4_attr_u) * nfs4_ntov_map_size,
1938 			KM_SLEEP);
1939 	ntovp->attrcnt = 0;
1940 	ntovp->vfsstat = FALSE;
1941 }
1942 
1943 static void
1944 nfs4_ntov_table_free(struct nfs4_ntov_table *ntovp,
1945 	struct nfs4_svgetit_arg *sargp)
1946 {
1947 	int i;
1948 	union nfs4_attr_u *na;
1949 	uint8_t *amap;
1950 
1951 	/*
1952 	 * XXX Should do the same checks for whether the bit is set
1953 	 */
1954 	for (i = 0, na = ntovp->na, amap = ntovp->amap;
1955 		i < ntovp->attrcnt; i++, na++, amap++) {
1956 		(void) (*nfs4_ntov_map[*amap].sv_getit)(
1957 			NFS4ATTR_FREEIT, sargp, na);
1958 	}
1959 	if ((sargp->op == NFS4ATTR_SETIT) || (sargp->op == NFS4ATTR_VERIT)) {
1960 		/*
1961 		 * xdr_free for getattr will be done later
1962 		 */
1963 		for (i = 0, na = ntovp->na, amap = ntovp->amap;
1964 			i < ntovp->attrcnt; i++, na++, amap++) {
1965 			xdr_free(nfs4_ntov_map[*amap].xfunc, (caddr_t)na);
1966 		}
1967 	}
1968 	kmem_free(ntovp->na, sizeof (union nfs4_attr_u) * nfs4_ntov_map_size);
1969 }
1970 
1971 /*
1972  * do_rfs4_op_getattr gets the system attrs and converts into fattr4.
1973  */
1974 static nfsstat4
1975 do_rfs4_op_getattr(bitmap4 breq, fattr4 *fattrp,
1976 	struct nfs4_svgetit_arg *sargp)
1977 {
1978 	int error = 0;
1979 	int i, k;
1980 	struct nfs4_ntov_table ntov;
1981 	XDR xdr;
1982 	ulong_t xdr_size;
1983 	char *xdr_attrs;
1984 	nfsstat4 status = NFS4_OK;
1985 	nfsstat4 prev_rdattr_error = sargp->rdattr_error;
1986 	union nfs4_attr_u *na;
1987 	uint8_t *amap;
1988 
1989 	sargp->op = NFS4ATTR_GETIT;
1990 	sargp->flag = 0;
1991 
1992 	fattrp->attrmask = 0;
1993 	/* if no bits requested, then return empty fattr4 */
1994 	if (breq == 0) {
1995 		fattrp->attrlist4_len = 0;
1996 		fattrp->attrlist4 = NULL;
1997 		return (NFS4_OK);
1998 	}
1999 
2000 	/*
2001 	 * return NFS4ERR_INVAL when client requests write-only attrs
2002 	 */
2003 	if (breq & (FATTR4_TIME_ACCESS_SET_MASK | FATTR4_TIME_MODIFY_SET_MASK))
2004 		return (NFS4ERR_INVAL);
2005 
2006 	nfs4_ntov_table_init(&ntov);
2007 	na = ntov.na;
2008 	amap = ntov.amap;
2009 
2010 	/*
2011 	 * Now loop to get or verify the attrs
2012 	 */
2013 	for (i = 0; i < nfs4_ntov_map_size; i++) {
2014 		if (breq & nfs4_ntov_map[i].fbit) {
2015 			if ((*nfs4_ntov_map[i].sv_getit)(
2016 				    NFS4ATTR_SUPPORTED, sargp, NULL) == 0) {
2017 
2018 				error = (*nfs4_ntov_map[i].sv_getit)(
2019 						NFS4ATTR_GETIT, sargp, na);
2020 
2021 				/*
2022 				 * Possible error values:
2023 				 * >0 if sv_getit failed to
2024 				 * get the attr; 0 if succeeded;
2025 				 * <0 if rdattr_error and the
2026 				 * attribute cannot be returned.
2027 				 */
2028 				if (error && !(sargp->rdattr_error_req))
2029 					goto done;
2030 				/*
2031 				 * If error then just for entry
2032 				 */
2033 				if (error == 0) {
2034 					fattrp->attrmask |=
2035 						nfs4_ntov_map[i].fbit;
2036 					*amap++ =
2037 						(uint8_t)nfs4_ntov_map[i].nval;
2038 					na++;
2039 					(ntov.attrcnt)++;
2040 				} else if ((error > 0) &&
2041 					(sargp->rdattr_error == NFS4_OK)) {
2042 					sargp->rdattr_error = puterrno4(error);
2043 				}
2044 				error = 0;
2045 			}
2046 		}
2047 	}
2048 
2049 	/*
2050 	 * If rdattr_error was set after the return value for it was assigned,
2051 	 * update it.
2052 	 */
2053 	if (prev_rdattr_error != sargp->rdattr_error) {
2054 		na = ntov.na;
2055 		amap = ntov.amap;
2056 		for (i = 0; i < ntov.attrcnt; i++, na++, amap++) {
2057 			k = *amap;
2058 			if (k < FATTR4_RDATTR_ERROR) {
2059 				continue;
2060 			}
2061 			if ((k == FATTR4_RDATTR_ERROR) &&
2062 			    ((*nfs4_ntov_map[k].sv_getit)(
2063 				NFS4ATTR_SUPPORTED, sargp, NULL) == 0)) {
2064 
2065 				(void) (*nfs4_ntov_map[k].sv_getit)(
2066 						NFS4ATTR_GETIT, sargp, na);
2067 			}
2068 			break;
2069 		}
2070 	}
2071 
2072 	xdr_size = 0;
2073 	na = ntov.na;
2074 	amap = ntov.amap;
2075 	for (i = 0; i < ntov.attrcnt; i++, na++, amap++) {
2076 		xdr_size += xdr_sizeof(nfs4_ntov_map[*amap].xfunc, na);
2077 	}
2078 
2079 	fattrp->attrlist4_len = xdr_size;
2080 	if (xdr_size) {
2081 		/* freed by rfs4_op_getattr_free() */
2082 		fattrp->attrlist4 = xdr_attrs = kmem_zalloc(xdr_size, KM_SLEEP);
2083 
2084 		xdrmem_create(&xdr, xdr_attrs, xdr_size, XDR_ENCODE);
2085 
2086 		na = ntov.na;
2087 		amap = ntov.amap;
2088 		for (i = 0; i < ntov.attrcnt; i++, na++, amap++) {
2089 			if (!(*nfs4_ntov_map[*amap].xfunc)(&xdr, na)) {
2090 				cmn_err(CE_WARN, "do_rfs4_op_getattr: xdr "
2091 					"encode of attribute %d failed\n",
2092 					*amap);
2093 				status = NFS4ERR_SERVERFAULT;
2094 				break;
2095 			}
2096 		}
2097 		/* xdrmem_destroy(&xdrs); */	/* NO-OP */
2098 	} else {
2099 		fattrp->attrlist4 = NULL;
2100 	}
2101 done:
2102 
2103 	nfs4_ntov_table_free(&ntov, sargp);
2104 
2105 	if (error != 0)
2106 		status = puterrno4(error);
2107 
2108 	return (status);
2109 }
2110 
2111 /* ARGSUSED */
2112 static void
2113 rfs4_op_getattr(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2114 	struct compound_state *cs)
2115 {
2116 	GETATTR4args *args = &argop->nfs_argop4_u.opgetattr;
2117 	GETATTR4res *resp = &resop->nfs_resop4_u.opgetattr;
2118 	struct nfs4_svgetit_arg sarg;
2119 	struct statvfs64 sb;
2120 	nfsstat4 status;
2121 
2122 	if (cs->vp == NULL) {
2123 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2124 		return;
2125 	}
2126 
2127 	if (cs->access == CS_ACCESS_DENIED) {
2128 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
2129 		return;
2130 	}
2131 
2132 	sarg.sbp = &sb;
2133 	sarg.cs = cs;
2134 
2135 	status = bitmap4_to_attrmask(args->attr_request, &sarg);
2136 	if (status == NFS4_OK) {
2137 		status = bitmap4_get_sysattrs(&sarg);
2138 		if (status == NFS4_OK)
2139 			status = do_rfs4_op_getattr(args->attr_request,
2140 				&resp->obj_attributes, &sarg);
2141 	}
2142 	*cs->statusp = resp->status = status;
2143 }
2144 
2145 static void
2146 rfs4_op_getattr_free(nfs_resop4 *resop)
2147 {
2148 	GETATTR4res *resp = &resop->nfs_resop4_u.opgetattr;
2149 
2150 	nfs4_fattr4_free(&resp->obj_attributes);
2151 }
2152 
2153 /* ARGSUSED */
2154 static void
2155 rfs4_op_getfh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2156 	struct compound_state *cs)
2157 {
2158 	GETFH4res *resp = &resop->nfs_resop4_u.opgetfh;
2159 
2160 	if (cs->vp == NULL) {
2161 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2162 		return;
2163 	}
2164 	if (cs->access == CS_ACCESS_DENIED) {
2165 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
2166 		return;
2167 	}
2168 
2169 	resp->object.nfs_fh4_val =
2170 		kmem_alloc(cs->fh.nfs_fh4_len, KM_SLEEP);
2171 	nfs_fh4_copy(&cs->fh, &resp->object);
2172 	*cs->statusp = resp->status = NFS4_OK;
2173 }
2174 
2175 static void
2176 rfs4_op_getfh_free(nfs_resop4 *resop)
2177 {
2178 	GETFH4res *resp = &resop->nfs_resop4_u.opgetfh;
2179 
2180 	if (resp->status == NFS4_OK &&
2181 	    resp->object.nfs_fh4_val != NULL) {
2182 		kmem_free(resp->object.nfs_fh4_val, resp->object.nfs_fh4_len);
2183 		resp->object.nfs_fh4_val = NULL;
2184 		resp->object.nfs_fh4_len = 0;
2185 	}
2186 }
2187 
2188 /*
2189  * illegal: args: void
2190  *	    res : status (NFS4ERR_OP_ILLEGAL)
2191  */
2192 /* ARGSUSED */
2193 static void
2194 rfs4_op_illegal(nfs_argop4 *argop, nfs_resop4 *resop,
2195 	struct svc_req *req, struct compound_state *cs)
2196 {
2197 	ILLEGAL4res *resp = &resop->nfs_resop4_u.opillegal;
2198 
2199 	resop->resop = OP_ILLEGAL;
2200 	*cs->statusp = resp->status = NFS4ERR_OP_ILLEGAL;
2201 }
2202 
2203 /*
2204  * link: args: SAVED_FH: file, CURRENT_FH: target directory
2205  *	 res: status. If success - CURRENT_FH unchanged, return change_info
2206  */
2207 /* ARGSUSED */
2208 static void
2209 rfs4_op_link(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2210 	struct compound_state *cs)
2211 {
2212 	LINK4args *args = &argop->nfs_argop4_u.oplink;
2213 	LINK4res *resp = &resop->nfs_resop4_u.oplink;
2214 	int error;
2215 	vnode_t *vp;
2216 	vnode_t *dvp;
2217 	struct vattr bdva, idva, adva;
2218 	char *nm;
2219 	uint_t  len;
2220 
2221 	/* SAVED_FH: source object */
2222 	vp = cs->saved_vp;
2223 	if (vp == NULL) {
2224 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2225 		return;
2226 	}
2227 
2228 	/* CURRENT_FH: target directory */
2229 	dvp = cs->vp;
2230 	if (dvp == NULL) {
2231 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2232 		return;
2233 	}
2234 
2235 	/*
2236 	 * If there is a non-shared filesystem mounted on this vnode,
2237 	 * do not allow to link any file in this directory.
2238 	 */
2239 	if (vn_ismntpt(dvp)) {
2240 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
2241 		return;
2242 	}
2243 
2244 	if (cs->access == CS_ACCESS_DENIED) {
2245 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
2246 		return;
2247 	}
2248 
2249 	/* Check source object's type validity */
2250 	if (vp->v_type == VDIR) {
2251 		*cs->statusp = resp->status = NFS4ERR_ISDIR;
2252 		return;
2253 	}
2254 
2255 	/* Check target directory's type */
2256 	if (dvp->v_type != VDIR) {
2257 		*cs->statusp = resp->status = NFS4ERR_NOTDIR;
2258 		return;
2259 	}
2260 
2261 	if (cs->saved_exi != cs->exi) {
2262 		*cs->statusp = resp->status = NFS4ERR_XDEV;
2263 		return;
2264 	}
2265 
2266 	if (!utf8_dir_verify(&args->newname)) {
2267 		*cs->statusp = resp->status = NFS4ERR_INVAL;
2268 		return;
2269 	}
2270 
2271 	nm = utf8_to_fn(&args->newname, &len, NULL);
2272 	if (nm == NULL) {
2273 		*cs->statusp = resp->status = NFS4ERR_INVAL;
2274 		return;
2275 	}
2276 
2277 	if (len > MAXNAMELEN) {
2278 		*cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
2279 		kmem_free(nm, len);
2280 		return;
2281 	}
2282 
2283 	if (rdonly4(cs->exi, cs->vp, req)) {
2284 		*cs->statusp = resp->status = NFS4ERR_ROFS;
2285 		kmem_free(nm, len);
2286 		return;
2287 	}
2288 
2289 	/* Get "before" change value */
2290 	bdva.va_mask = AT_CTIME|AT_SEQ;
2291 	error = VOP_GETATTR(dvp, &bdva, 0, cs->cr);
2292 	if (error) {
2293 		*cs->statusp = resp->status = puterrno4(error);
2294 		kmem_free(nm, len);
2295 		return;
2296 	}
2297 
2298 	NFS4_SET_FATTR4_CHANGE(resp->cinfo.before, bdva.va_ctime)
2299 
2300 	error = VOP_LINK(dvp, vp, nm, cs->cr);
2301 
2302 	kmem_free(nm, len);
2303 
2304 	/*
2305 	 * Get the initial "after" sequence number, if it fails, set to zero
2306 	 */
2307 	idva.va_mask = AT_SEQ;
2308 	if (VOP_GETATTR(dvp, &idva, 0, cs->cr))
2309 		idva.va_seq = 0;
2310 
2311 	/*
2312 	 * Force modified data and metadata out to stable storage.
2313 	 */
2314 	(void) VOP_FSYNC(vp, FNODSYNC, cs->cr);
2315 	(void) VOP_FSYNC(dvp, 0, cs->cr);
2316 
2317 	if (error) {
2318 		*cs->statusp = resp->status = puterrno4(error);
2319 		return;
2320 	}
2321 
2322 	/*
2323 	 * Get "after" change value, if it fails, simply return the
2324 	 * before value.
2325 	 */
2326 	adva.va_mask = AT_CTIME|AT_SEQ;
2327 	if (VOP_GETATTR(dvp, &adva, 0, cs->cr)) {
2328 		adva.va_ctime = bdva.va_ctime;
2329 		adva.va_seq = 0;
2330 	}
2331 
2332 	NFS4_SET_FATTR4_CHANGE(resp->cinfo.after, adva.va_ctime)
2333 
2334 	/*
2335 	 * The cinfo.atomic = TRUE only if we have
2336 	 * non-zero va_seq's, and it has incremented by exactly one
2337 	 * during the VOP_LINK and it didn't change during the VOP_FSYNC.
2338 	 */
2339 	if (bdva.va_seq && idva.va_seq && adva.va_seq &&
2340 			idva.va_seq == (bdva.va_seq + 1) &&
2341 			idva.va_seq == adva.va_seq)
2342 		resp->cinfo.atomic = TRUE;
2343 	else
2344 		resp->cinfo.atomic = FALSE;
2345 
2346 	*cs->statusp = resp->status = NFS4_OK;
2347 }
2348 
2349 /*
2350  * Used by rfs4_op_lookup and rfs4_op_lookupp to do the actual work.
2351  */
2352 
2353 /* ARGSUSED */
2354 static nfsstat4
2355 do_rfs4_op_lookup(char *nm, uint_t buflen, struct svc_req *req,
2356 	struct compound_state *cs)
2357 {
2358 	int error;
2359 	int different_export = 0;
2360 	vnode_t *vp, *tvp, *pre_tvp = NULL, *oldvp = NULL;
2361 	struct exportinfo *exi = NULL, *pre_exi = NULL;
2362 	nfsstat4 stat;
2363 	fid_t fid;
2364 	int attrdir, dotdot, walk;
2365 	bool_t is_newvp = FALSE;
2366 
2367 	if (cs->vp->v_flag & V_XATTRDIR) {
2368 		attrdir = 1;
2369 		ASSERT(get_fh4_flag(&cs->fh, FH4_ATTRDIR));
2370 	} else {
2371 		attrdir = 0;
2372 		ASSERT(! get_fh4_flag(&cs->fh, FH4_ATTRDIR));
2373 	}
2374 
2375 	dotdot = (nm[0] == '.' && nm[1] == '.' && nm[2] == '\0');
2376 
2377 	/*
2378 	 * If dotdotting, then need to check whether it's
2379 	 * above the root of a filesystem, or above an
2380 	 * export point.
2381 	 */
2382 	if (dotdot) {
2383 
2384 		/*
2385 		 * If dotdotting at the root of a filesystem, then
2386 		 * need to traverse back to the mounted-on filesystem
2387 		 * and do the dotdot lookup there.
2388 		 */
2389 		if (cs->vp->v_flag & VROOT) {
2390 
2391 			/*
2392 			 * If at the system root, then can
2393 			 * go up no further.
2394 			 */
2395 			if (VN_CMP(cs->vp, rootdir))
2396 				return (puterrno4(ENOENT));
2397 
2398 			/*
2399 			 * Traverse back to the mounted-on filesystem
2400 			 */
2401 			cs->vp = untraverse(cs->vp);
2402 
2403 			/*
2404 			 * Set the different_export flag so we remember
2405 			 * to pick up a new exportinfo entry for
2406 			 * this new filesystem.
2407 			 */
2408 			different_export = 1;
2409 		} else {
2410 
2411 			/*
2412 			 * If dotdotting above an export point then set
2413 			 * the different_export to get new export info.
2414 			 */
2415 			different_export = nfs_exported(cs->exi, cs->vp);
2416 		}
2417 	}
2418 
2419 	error = VOP_LOOKUP(cs->vp, nm, &vp, NULL, 0, NULL, cs->cr);
2420 	if (error)
2421 		return (puterrno4(error));
2422 
2423 	/*
2424 	 * If the vnode is in a pseudo filesystem, check whether it is visible.
2425 	 *
2426 	 * XXX if the vnode is a symlink and it is not visible in
2427 	 * a pseudo filesystem, return ENOENT (not following symlink).
2428 	 * V4 client can not mount such symlink. This is a regression
2429 	 * from V2/V3.
2430 	 *
2431 	 * In the same exported filesystem, if the security flavor used
2432 	 * is not an explicitly shared flavor, limit the view to the visible
2433 	 * list entries only. This is not a WRONGSEC case because it's already
2434 	 * checked via PUTROOTFH/PUTPUBFH or PUTFH.
2435 	 */
2436 	if (!different_export &&
2437 	    (PSEUDO(cs->exi) || ! is_exported_sec(cs->nfsflavor, cs->exi) ||
2438 	    cs->access & CS_ACCESS_LIMITED)) {
2439 		if (! nfs_visible(cs->exi, vp, &different_export)) {
2440 			VN_RELE(vp);
2441 			return (puterrno4(ENOENT));
2442 		}
2443 	}
2444 
2445 	/*
2446 	 * If it's a mountpoint, then traverse it.
2447 	 */
2448 	if (vn_ismntpt(vp)) {
2449 		pre_exi = cs->exi;	/* save pre-traversed exportinfo */
2450 		pre_tvp = vp;		/* save pre-traversed vnode	*/
2451 
2452 		/*
2453 		 * hold pre_tvp to counteract rele by traverse.  We will
2454 		 * need pre_tvp below if checkexport4 fails
2455 		 */
2456 		VN_HOLD(pre_tvp);
2457 		tvp = vp;
2458 		if ((error = traverse(&tvp)) != 0) {
2459 			VN_RELE(vp);
2460 			VN_RELE(pre_tvp);
2461 			return (puterrno4(error));
2462 		}
2463 		vp = tvp;
2464 		different_export = 1;
2465 	} else if (vp->v_vfsp != cs->vp->v_vfsp) {
2466 		/*
2467 		 * The vfsp comparison is to handle the case where
2468 		 * a LOFS mount is shared.  lo_lookup traverses mount points,
2469 		 * and NFS is unaware of local fs transistions because
2470 		 * v_vfsmountedhere isn't set.  For this special LOFS case,
2471 		 * the dir and the obj returned by lookup will have different
2472 		 * vfs ptrs.
2473 		 */
2474 		different_export = 1;
2475 	}
2476 
2477 	if (different_export) {
2478 
2479 		bzero(&fid, sizeof (fid));
2480 		fid.fid_len = MAXFIDSZ;
2481 		error = vop_fid_pseudo(vp, &fid);
2482 		if (error) {
2483 			VN_RELE(vp);
2484 			if (pre_tvp)
2485 				VN_RELE(pre_tvp);
2486 			return (puterrno4(error));
2487 		}
2488 
2489 		if (dotdot)
2490 			exi = nfs_vptoexi(NULL, vp, cs->cr, &walk, NULL, TRUE);
2491 		else
2492 			exi = checkexport4(&vp->v_vfsp->vfs_fsid, &fid, vp);
2493 
2494 		if (exi == NULL) {
2495 			if (pre_tvp) {
2496 				/*
2497 				 * If this vnode is a mounted-on vnode,
2498 				 * but the mounted-on file system is not
2499 				 * exported, send back the filehandle for
2500 				 * the mounted-on vnode, not the root of
2501 				 * the mounted-on file system.
2502 				 */
2503 				VN_RELE(vp);
2504 				vp = pre_tvp;
2505 				exi = pre_exi;
2506 			} else {
2507 				VN_RELE(vp);
2508 				return (puterrno4(EACCES));
2509 			}
2510 		} else if (pre_tvp) {
2511 			/* we're done with pre_tvp now. release extra hold */
2512 			VN_RELE(pre_tvp);
2513 		}
2514 
2515 		cs->exi = exi;
2516 
2517 		/*
2518 		 * Now we do a checkauth4. The reason is that
2519 		 * this client/user may not have access to the new
2520 		 * exported file system, and if he does,
2521 		 * the client/user may be mapped to a different uid.
2522 		 *
2523 		 * We start with a new cr, because the checkauth4 done
2524 		 * in the PUT*FH operation over wrote the cred's uid,
2525 		 * gid, etc, and we want the real thing before calling
2526 		 * checkauth4()
2527 		 */
2528 		crfree(cs->cr);
2529 		cs->cr = crdup(cs->basecr);
2530 
2531 		if (cs->vp)
2532 			oldvp = cs->vp;
2533 		cs->vp = vp;
2534 		is_newvp = TRUE;
2535 
2536 		stat = call_checkauth4(cs, req);
2537 		if (stat != NFS4_OK) {
2538 			VN_RELE(cs->vp);
2539 			cs->vp = oldvp;
2540 			return (stat);
2541 		}
2542 	}
2543 
2544 	error = makefh4(&cs->fh, vp, cs->exi);
2545 
2546 	if (error) {
2547 		if (is_newvp) {
2548 			VN_RELE(cs->vp);
2549 			cs->vp = oldvp;
2550 		} else
2551 			VN_RELE(vp);
2552 		return (puterrno4(error));
2553 	}
2554 
2555 	if (!is_newvp) {
2556 		if (cs->vp)
2557 			VN_RELE(cs->vp);
2558 		cs->vp = vp;
2559 	} else if (oldvp)
2560 		VN_RELE(oldvp);
2561 
2562 	/*
2563 	 * if did lookup on attrdir and didn't lookup .., set named
2564 	 * attr fh flag
2565 	 */
2566 	if (attrdir && ! dotdot)
2567 		set_fh4_flag(&cs->fh, FH4_NAMEDATTR);
2568 
2569 	/* Assume false for now, open proc will set this */
2570 	cs->mandlock = FALSE;
2571 
2572 	return (NFS4_OK);
2573 }
2574 
2575 /* ARGSUSED */
2576 static void
2577 rfs4_op_lookup(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2578 	struct compound_state *cs)
2579 {
2580 	LOOKUP4args *args = &argop->nfs_argop4_u.oplookup;
2581 	LOOKUP4res *resp = &resop->nfs_resop4_u.oplookup;
2582 	char *nm;
2583 	uint_t len;
2584 
2585 	if (cs->vp == NULL) {
2586 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2587 		return;
2588 	}
2589 
2590 	if (cs->vp->v_type == VLNK) {
2591 		*cs->statusp = resp->status = NFS4ERR_SYMLINK;
2592 		return;
2593 	}
2594 
2595 	if (cs->vp->v_type != VDIR) {
2596 		*cs->statusp = resp->status = NFS4ERR_NOTDIR;
2597 		return;
2598 	}
2599 
2600 	if (!utf8_dir_verify(&args->objname)) {
2601 		*cs->statusp = resp->status = NFS4ERR_INVAL;
2602 		return;
2603 	}
2604 
2605 	nm = utf8_to_str(&args->objname, &len, NULL);
2606 	if (nm == NULL) {
2607 		*cs->statusp = resp->status = NFS4ERR_INVAL;
2608 		return;
2609 	}
2610 
2611 	if (len > MAXNAMELEN) {
2612 		*cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
2613 		kmem_free(nm, len);
2614 		return;
2615 	}
2616 
2617 	*cs->statusp = resp->status = do_rfs4_op_lookup(nm, len, req, cs);
2618 
2619 	kmem_free(nm, len);
2620 }
2621 
2622 /* ARGSUSED */
2623 static void
2624 rfs4_op_lookupp(nfs_argop4 *args, nfs_resop4 *resop, struct svc_req *req,
2625 	struct compound_state *cs)
2626 {
2627 	LOOKUPP4res *resp = &resop->nfs_resop4_u.oplookupp;
2628 
2629 	if (cs->vp == NULL) {
2630 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2631 		return;
2632 	}
2633 
2634 	if (cs->vp->v_type != VDIR) {
2635 		*cs->statusp = resp->status = NFS4ERR_NOTDIR;
2636 		return;
2637 	}
2638 
2639 	*cs->statusp = resp->status = do_rfs4_op_lookup("..", 3, req, cs);
2640 
2641 	/*
2642 	 * From NFSV4 Specification, LOOKUPP should not check for
2643 	 * NFS4ERR_WRONGSEC. Retrun NFS4_OK instead.
2644 	 */
2645 	if (resp->status == NFS4ERR_WRONGSEC) {
2646 		*cs->statusp = resp->status = NFS4_OK;
2647 	}
2648 }
2649 
2650 
2651 /*ARGSUSED2*/
2652 static void
2653 rfs4_op_openattr(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2654 	struct compound_state *cs)
2655 {
2656 	OPENATTR4args	*args = &argop->nfs_argop4_u.opopenattr;
2657 	OPENATTR4res	*resp = &resop->nfs_resop4_u.opopenattr;
2658 	vnode_t		*avp = NULL;
2659 	int		lookup_flags = LOOKUP_XATTR, error;
2660 	int		exp_ro = 0;
2661 
2662 	if (cs->vp == NULL) {
2663 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2664 		return;
2665 	}
2666 
2667 	/*
2668 	 * Make a couple of checks made by copen()
2669 	 *
2670 	 * Check to make sure underlying fs supports xattrs.  This
2671 	 * is required because solaris filesystem implementations
2672 	 * (UFS/TMPFS) don't enforce the noxattr mount option
2673 	 * in VOP_LOOKUP(LOOKUP_XATTR).  If fs doesn't support this
2674 	 * pathconf cmd or if fs supports cmd but doesn't claim
2675 	 * support for xattr, return NOTSUPP.  It would be better
2676 	 * to use VOP_PATHCONF( _PC_XATTR_ENABLED) for this; however,
2677 	 * that cmd is not available to VOP_PATHCONF interface
2678 	 * (it's only implemented inside pathconf syscall)...
2679 	 *
2680 	 * Verify permission to put attributes on files (access
2681 	 * checks from copen).
2682 	 */
2683 
2684 	if ((cs->vp->v_vfsp->vfs_flag & VFS_XATTR) == 0) {
2685 		error = ENOTSUP;
2686 		goto error_out;
2687 	}
2688 
2689 	if ((VOP_ACCESS(cs->vp, VREAD, 0, cs->cr) != 0) &&
2690 	    (VOP_ACCESS(cs->vp, VWRITE, 0, cs->cr) != 0) &&
2691 	    (VOP_ACCESS(cs->vp, VEXEC, 0, cs->cr) != 0)) {
2692 		error = EACCES;
2693 		goto error_out;
2694 	}
2695 
2696 	/*
2697 	 * The CREATE_XATTR_DIR VOP flag cannot be specified if
2698 	 * the file system is exported read-only -- regardless of
2699 	 * createdir flag.  Otherwise the attrdir would be created
2700 	 * (assuming server fs isn't mounted readonly locally).  If
2701 	 * VOP_LOOKUP returns ENOENT in this case, the error will
2702 	 * be translated into EROFS.  ENOSYS is mapped to ENOTSUP
2703 	 * because specfs has no VOP_LOOKUP op, so the macro would
2704 	 * return ENOSYS.  EINVAL is returned by all (current)
2705 	 * Solaris file system implementations when any of their
2706 	 * restrictions are violated (xattr(dir) can't have xattrdir).
2707 	 * Returning NOTSUPP is more appropriate in this case
2708 	 * because the object will never be able to have an attrdir.
2709 	 */
2710 	if (args->createdir && ! (exp_ro = rdonly4(cs->exi, cs->vp, req)))
2711 		lookup_flags |= CREATE_XATTR_DIR;
2712 
2713 	error = VOP_LOOKUP(cs->vp, "", &avp, NULL, lookup_flags, NULL, cs->cr);
2714 
2715 	if (error) {
2716 		if (error == ENOENT && args->createdir && exp_ro)
2717 			error = EROFS;
2718 		else if (error == EINVAL || error == ENOSYS)
2719 			error = ENOTSUP;
2720 		goto error_out;
2721 	}
2722 
2723 	ASSERT(avp->v_flag & V_XATTRDIR);
2724 
2725 	error = makefh4(&cs->fh, avp, cs->exi);
2726 
2727 	if (error) {
2728 		VN_RELE(avp);
2729 		goto error_out;
2730 	}
2731 
2732 	VN_RELE(cs->vp);
2733 	cs->vp = avp;
2734 
2735 	/*
2736 	 * There is no requirement for an attrdir fh flag
2737 	 * because the attrdir has a vnode flag to distinguish
2738 	 * it from regular (non-xattr) directories.  The
2739 	 * FH4_ATTRDIR flag is set for future sanity checks.
2740 	 */
2741 	set_fh4_flag(&cs->fh, FH4_ATTRDIR);
2742 	*cs->statusp = resp->status = NFS4_OK;
2743 	return;
2744 
2745 error_out:
2746 
2747 	*cs->statusp = resp->status = puterrno4(error);
2748 }
2749 
2750 static int
2751 do_io(int direction, vnode_t *vp, struct uio *uio, int ioflag, cred_t *cred)
2752 {
2753 	int error;
2754 	int i;
2755 	clock_t delaytime;
2756 	caller_context_t ct;
2757 
2758 	delaytime = MSEC_TO_TICK_ROUNDUP(rfs4_lock_delay);
2759 
2760 	/*
2761 	 * Don't block on mandatory locks. If this routine returns
2762 	 * EAGAIN, the caller should return NFS4ERR_LOCKED.
2763 	 */
2764 	uio->uio_fmode = FNONBLOCK;
2765 
2766 	ct.cc_sysid = 0;
2767 	ct.cc_pid = 0;
2768 	ct.cc_caller_id = nfs4_srv_caller_id;
2769 
2770 	for (i = 0; i < rfs4_maxlock_tries; i++) {
2771 
2772 
2773 		if (direction == FREAD) {
2774 			(void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, &ct);
2775 			error = VOP_READ(vp, uio, ioflag, cred, &ct);
2776 			VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
2777 		} else {
2778 			(void) VOP_RWLOCK(vp, V_WRITELOCK_TRUE, &ct);
2779 			error = VOP_WRITE(vp, uio, ioflag, cred, &ct);
2780 			VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, &ct);
2781 		}
2782 
2783 		if (error != EAGAIN)
2784 			break;
2785 
2786 		if (i < rfs4_maxlock_tries - 1) {
2787 			delay(delaytime);
2788 			delaytime *= 2;
2789 		}
2790 	}
2791 
2792 	return (error);
2793 }
2794 
2795 /* ARGSUSED */
2796 static void
2797 rfs4_op_read(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2798 	struct compound_state *cs)
2799 {
2800 	READ4args *args = &argop->nfs_argop4_u.opread;
2801 	READ4res *resp = &resop->nfs_resop4_u.opread;
2802 	int error;
2803 	int verror;
2804 	vnode_t *vp;
2805 	struct vattr va;
2806 	struct iovec iov;
2807 	struct uio uio;
2808 	u_offset_t offset;
2809 	bool_t *deleg = &cs->deleg;
2810 	nfsstat4 stat;
2811 	int in_crit = 0;
2812 	mblk_t *mp;
2813 	int alloc_err = 0;
2814 
2815 	vp = cs->vp;
2816 	if (vp == NULL) {
2817 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2818 		return;
2819 	}
2820 	if (cs->access == CS_ACCESS_DENIED) {
2821 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
2822 		return;
2823 	}
2824 
2825 	/*
2826 	 * Enter the critical region before calling VOP_RWLOCK
2827 	 * to avoid a deadlock with write requests.
2828 	 */
2829 	if (nbl_need_check(vp)) {
2830 		nbl_start_crit(vp, RW_READER);
2831 		in_crit = 1;
2832 		if (nbl_conflict(vp, NBL_READ, args->offset, args->count, 0)) {
2833 			*cs->statusp = resp->status = NFS4ERR_LOCKED;
2834 			goto out;
2835 		}
2836 	}
2837 
2838 	if ((stat = rfs4_check_stateid(FREAD, vp, &args->stateid, FALSE,
2839 					deleg, TRUE)) != NFS4_OK) {
2840 		*cs->statusp = resp->status = stat;
2841 		goto out;
2842 	}
2843 
2844 	va.va_mask = AT_MODE|AT_SIZE|AT_UID;
2845 	verror = VOP_GETATTR(vp, &va, 0, cs->cr);
2846 
2847 	/*
2848 	 * If we can't get the attributes, then we can't do the
2849 	 * right access checking.  So, we'll fail the request.
2850 	 */
2851 	if (verror) {
2852 		*cs->statusp = resp->status = puterrno4(verror);
2853 		goto out;
2854 	}
2855 
2856 	if (vp->v_type != VREG) {
2857 		*cs->statusp = resp->status =
2858 			((vp->v_type == VDIR) ? NFS4ERR_ISDIR : NFS4ERR_INVAL);
2859 		goto out;
2860 	}
2861 
2862 	if (crgetuid(cs->cr) != va.va_uid &&
2863 	    (error = VOP_ACCESS(vp, VREAD, 0, cs->cr)) &&
2864 	    (error = VOP_ACCESS(vp, VEXEC, 0, cs->cr))) {
2865 		*cs->statusp = resp->status = puterrno4(error);
2866 		goto out;
2867 	}
2868 
2869 	if (MANDLOCK(vp, va.va_mode)) { /* XXX - V4 supports mand locking */
2870 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
2871 		goto out;
2872 	}
2873 
2874 	offset = args->offset;
2875 	if (offset >= va.va_size) {
2876 		*cs->statusp = resp->status = NFS4_OK;
2877 		resp->eof = TRUE;
2878 		resp->data_len = 0;
2879 		resp->data_val = NULL;
2880 		resp->mblk = NULL;
2881 		*cs->statusp = resp->status = NFS4_OK;
2882 		goto out;
2883 	}
2884 
2885 	if (args->count == 0) {
2886 		*cs->statusp = resp->status = NFS4_OK;
2887 		resp->eof = FALSE;
2888 		resp->data_len = 0;
2889 		resp->data_val = NULL;
2890 		resp->mblk = NULL;
2891 		goto out;
2892 	}
2893 
2894 	/*
2895 	 * Do not allocate memory more than maximum allowed
2896 	 * transfer size
2897 	 */
2898 	if (args->count > rfs4_tsize(req))
2899 		args->count = rfs4_tsize(req);
2900 
2901 	/*
2902 	 * mp will contain the data to be sent out in the read reply.
2903 	 * It will be freed after the reply has been sent.
2904 	 * Let's roundup the data to a BYTES_PER_XDR_UNIT multiple,
2905 	 * so that the call to xdrmblk_putmblk() never fails.
2906 	 * If the first alloc of the requested size fails, then
2907 	 * decrease the size to something more reasonable and wait
2908 	 * for the allocation to occur.
2909 	 */
2910 	mp = allocb(RNDUP(args->count), BPRI_MED);
2911 	if (mp == NULL) {
2912 		if (args->count > MAXBSIZE)
2913 			args->count = MAXBSIZE;
2914 		mp = allocb_wait(RNDUP(args->count), BPRI_MED,
2915 				STR_NOSIG, &alloc_err);
2916 	}
2917 	ASSERT(mp != NULL);
2918 	ASSERT(alloc_err == 0);
2919 
2920 	iov.iov_base = (caddr_t)mp->b_datap->db_base;
2921 	iov.iov_len = args->count;
2922 	uio.uio_iov = &iov;
2923 	uio.uio_iovcnt = 1;
2924 	uio.uio_segflg = UIO_SYSSPACE;
2925 	uio.uio_extflg = UIO_COPY_CACHED;
2926 	uio.uio_loffset = args->offset;
2927 	uio.uio_resid = args->count;
2928 
2929 	error = do_io(FREAD, vp, &uio, 0, cs->cr);
2930 
2931 	va.va_mask = AT_SIZE;
2932 	verror = VOP_GETATTR(vp, &va, 0, cs->cr);
2933 
2934 	if (error) {
2935 		freeb(mp);
2936 		*cs->statusp = resp->status = puterrno4(error);
2937 		goto out;
2938 	}
2939 
2940 	*cs->statusp = resp->status = NFS4_OK;
2941 
2942 	ASSERT(uio.uio_resid >= 0);
2943 	resp->data_len = args->count - uio.uio_resid;
2944 	resp->data_val = (char *)mp->b_datap->db_base;
2945 	resp->mblk = mp;
2946 
2947 	if (!verror && offset + resp->data_len == va.va_size)
2948 		resp->eof = TRUE;
2949 	else
2950 		resp->eof = FALSE;
2951 
2952 out:
2953 	if (in_crit)
2954 		nbl_end_crit(vp);
2955 }
2956 
2957 static void
2958 rfs4_op_read_free(nfs_resop4 *resop)
2959 {
2960 	READ4res *resp = &resop->nfs_resop4_u.opread;
2961 
2962 	if (resp->status == NFS4_OK && resp->mblk != NULL) {
2963 		freeb(resp->mblk);
2964 		resp->mblk = NULL;
2965 		resp->data_val = NULL;
2966 		resp->data_len = 0;
2967 	}
2968 }
2969 
2970 static void
2971 rfs4_op_readdir_free(nfs_resop4 *resop)
2972 {
2973 	READDIR4res *resp = &resop->nfs_resop4_u.opreaddir;
2974 
2975 	if (resp->status == NFS4_OK && resp->mblk != NULL) {
2976 		freeb(resp->mblk);
2977 		resp->mblk = NULL;
2978 		resp->data_len = 0;
2979 	}
2980 }
2981 
2982 
2983 /* ARGSUSED */
2984 static void
2985 rfs4_op_putpubfh(nfs_argop4 *args, nfs_resop4 *resop, struct svc_req *req,
2986 	struct compound_state *cs)
2987 {
2988 	PUTPUBFH4res *resp = &resop->nfs_resop4_u.opputpubfh;
2989 	int error;
2990 	vnode_t *vp;
2991 	struct exportinfo *exi, *sav_exi;
2992 	nfs_fh4_fmt_t *fh_fmtp;
2993 
2994 	if (cs->vp) {
2995 		VN_RELE(cs->vp);
2996 		cs->vp = NULL;
2997 	}
2998 
2999 	if (cs->cr)
3000 		crfree(cs->cr);
3001 
3002 	cs->cr = crdup(cs->basecr);
3003 
3004 	vp = exi_public->exi_vp;
3005 	if (vp == NULL) {
3006 		*cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
3007 		return;
3008 	}
3009 
3010 	error = makefh4(&cs->fh, vp, exi_public);
3011 	if (error != 0) {
3012 		*cs->statusp = resp->status = puterrno4(error);
3013 		return;
3014 	}
3015 	sav_exi = cs->exi;
3016 	if (exi_public == exi_root) {
3017 		/*
3018 		 * No filesystem is actually shared public, so we default
3019 		 * to exi_root. In this case, we must check whether root
3020 		 * is exported.
3021 		 */
3022 		fh_fmtp = (nfs_fh4_fmt_t *)cs->fh.nfs_fh4_val;
3023 
3024 		/*
3025 		 * if root filesystem is exported, the exportinfo struct that we
3026 		 * should use is what checkexport4 returns, because root_exi is
3027 		 * actually a mostly empty struct.
3028 		 */
3029 		exi = checkexport4(&fh_fmtp->fh4_fsid,
3030 			(fid_t *)&fh_fmtp->fh4_xlen, NULL);
3031 		cs->exi = ((exi != NULL) ? exi : exi_public);
3032 	} else {
3033 		/*
3034 		 * it's a properly shared filesystem
3035 		 */
3036 		cs->exi = exi_public;
3037 	}
3038 
3039 	VN_HOLD(vp);
3040 	cs->vp = vp;
3041 
3042 	if ((resp->status = call_checkauth4(cs, req)) != NFS4_OK) {
3043 		VN_RELE(cs->vp);
3044 		cs->vp = NULL;
3045 		cs->exi = sav_exi;
3046 		return;
3047 	}
3048 
3049 	*cs->statusp = resp->status = NFS4_OK;
3050 }
3051 
3052 /*
3053  * XXX - issue with put*fh operations. Suppose /export/home is exported.
3054  * Suppose an NFS client goes to mount /export/home/joe. If /export, home,
3055  * or joe have restrictive search permissions, then we shouldn't let
3056  * the client get a file handle. This is easy to enforce. However, we
3057  * don't know what security flavor should be used until we resolve the
3058  * path name. Another complication is uid mapping. If root is
3059  * the user, then it will be mapped to the anonymous user by default,
3060  * but we won't know that till we've resolved the path name. And we won't
3061  * know what the anonymous user is.
3062  * Luckily, SECINFO is specified to take a full filename.
3063  * So what we will have to in rfs4_op_lookup is check that flavor of
3064  * the target object matches that of the request, and if root was the
3065  * caller, check for the root= and anon= options, and if necessary,
3066  * repeat the lookup using the right cred_t. But that's not done yet.
3067  */
3068 /* ARGSUSED */
3069 static void
3070 rfs4_op_putfh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3071 	struct compound_state *cs)
3072 {
3073 	PUTFH4args *args = &argop->nfs_argop4_u.opputfh;
3074 	PUTFH4res *resp = &resop->nfs_resop4_u.opputfh;
3075 	nfs_fh4_fmt_t *fh_fmtp;
3076 
3077 	if (cs->vp) {
3078 		VN_RELE(cs->vp);
3079 		cs->vp = NULL;
3080 	}
3081 
3082 	if (cs->cr) {
3083 		crfree(cs->cr);
3084 		cs->cr = NULL;
3085 	}
3086 
3087 
3088 	if (args->object.nfs_fh4_len < NFS_FH4_LEN) {
3089 		*cs->statusp = resp->status = NFS4ERR_BADHANDLE;
3090 		return;
3091 	}
3092 
3093 	fh_fmtp = (nfs_fh4_fmt_t *)args->object.nfs_fh4_val;
3094 	cs->exi = checkexport4(&fh_fmtp->fh4_fsid, (fid_t *)&fh_fmtp->fh4_xlen,
3095 				NULL);
3096 
3097 	if (cs->exi == NULL) {
3098 		*cs->statusp = resp->status = NFS4ERR_STALE;
3099 		return;
3100 	}
3101 
3102 	cs->cr = crdup(cs->basecr);
3103 
3104 	ASSERT(cs->cr != NULL);
3105 
3106 	if (! (cs->vp = nfs4_fhtovp(&args->object, cs->exi, &resp->status))) {
3107 		*cs->statusp = resp->status;
3108 		return;
3109 	}
3110 
3111 	if ((resp->status = call_checkauth4(cs, req)) != NFS4_OK) {
3112 		VN_RELE(cs->vp);
3113 		cs->vp = NULL;
3114 		return;
3115 	}
3116 
3117 	nfs_fh4_copy(&args->object, &cs->fh);
3118 	*cs->statusp = resp->status = NFS4_OK;
3119 	cs->deleg = FALSE;
3120 }
3121 
3122 /* ARGSUSED */
3123 static void
3124 rfs4_op_putrootfh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3125 	struct compound_state *cs)
3126 
3127 {
3128 	PUTROOTFH4res *resp = &resop->nfs_resop4_u.opputrootfh;
3129 	int error;
3130 	fid_t fid;
3131 	struct exportinfo *exi, *sav_exi;
3132 
3133 	if (cs->vp) {
3134 		VN_RELE(cs->vp);
3135 		cs->vp = NULL;
3136 	}
3137 
3138 	if (cs->cr)
3139 		crfree(cs->cr);
3140 
3141 	cs->cr = crdup(cs->basecr);
3142 
3143 	/*
3144 	 * Using rootdir, the system root vnode,
3145 	 * get its fid.
3146 	 */
3147 	bzero(&fid, sizeof (fid));
3148 	fid.fid_len = MAXFIDSZ;
3149 	error = vop_fid_pseudo(rootdir, &fid);
3150 	if (error != 0) {
3151 		*cs->statusp = resp->status = puterrno4(error);
3152 		return;
3153 	}
3154 
3155 	/*
3156 	 * Then use the root fsid & fid it to find out if it's exported
3157 	 *
3158 	 * If the server root isn't exported directly, then
3159 	 * it should at least be a pseudo export based on
3160 	 * one or more exports further down in the server's
3161 	 * file tree.
3162 	 */
3163 	exi = checkexport4(&rootdir->v_vfsp->vfs_fsid, &fid, NULL);
3164 	if (exi == NULL || exi->exi_export.ex_flags & EX_PUBLIC) {
3165 		NFS4_DEBUG(rfs4_debug,
3166 			(CE_WARN, "rfs4_op_putrootfh: export check failure"));
3167 		*cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
3168 		return;
3169 	}
3170 
3171 	/*
3172 	 * Now make a filehandle based on the root
3173 	 * export and root vnode.
3174 	 */
3175 	error = makefh4(&cs->fh, rootdir, exi);
3176 	if (error != 0) {
3177 		*cs->statusp = resp->status = puterrno4(error);
3178 		return;
3179 	}
3180 
3181 	sav_exi = cs->exi;
3182 	cs->exi = exi;
3183 
3184 	VN_HOLD(rootdir);
3185 	cs->vp = rootdir;
3186 
3187 	if ((resp->status = call_checkauth4(cs, req)) != NFS4_OK) {
3188 		VN_RELE(rootdir);
3189 		cs->vp = NULL;
3190 		cs->exi = sav_exi;
3191 		return;
3192 	}
3193 
3194 	*cs->statusp = resp->status = NFS4_OK;
3195 	cs->deleg = FALSE;
3196 }
3197 
3198 /*
3199  * A directory entry is a valid nfsv4 entry if
3200  * - it has a non-zero ino
3201  * - it is not a dot or dotdot name
3202  * - it is visible in a pseudo export or in a real export that can
3203  *   only have a limited view.
3204  */
3205 static bool_t
3206 valid_nfs4_entry(struct exportinfo *exi, struct dirent64 *dp,
3207 		int *expseudo, int check_visible)
3208 {
3209 	if (dp->d_ino == 0 || NFS_IS_DOTNAME(dp->d_name)) {
3210 		*expseudo = 0;
3211 		return (FALSE);
3212 	}
3213 
3214 	if (! check_visible) {
3215 		*expseudo = 0;
3216 		return (TRUE);
3217 	}
3218 
3219 	return (nfs_visible_inode(exi, dp->d_ino, expseudo));
3220 }
3221 
3222 /*
3223  * set_rdattr_params sets up the variables used to manage what information
3224  * to get for each directory entry.
3225  */
3226 static nfsstat4
3227 set_rdattr_params(struct nfs4_svgetit_arg *sargp,
3228 		bitmap4 attrs, bool_t *need_to_lookup)
3229 {
3230 	uint_t	va_mask;
3231 	nfsstat4 status;
3232 	bitmap4 objbits;
3233 
3234 	status = bitmap4_to_attrmask(attrs, sargp);
3235 	if (status != NFS4_OK) {
3236 		/*
3237 		 * could not even figure attr mask
3238 		 */
3239 		return (status);
3240 	}
3241 	va_mask = sargp->vap->va_mask;
3242 
3243 	/*
3244 	 * dirent's d_ino is always correct value for mounted_on_fileid.
3245 	 * mntdfid_set is set once here, but mounted_on_fileid is
3246 	 * set in main dirent processing loop for each dirent.
3247 	 * The mntdfid_set is a simple optimization that lets the
3248 	 * server attr code avoid work when caller is readdir.
3249 	 */
3250 	sargp->mntdfid_set = TRUE;
3251 
3252 	/*
3253 	 * Lookup entry only if client asked for any of the following:
3254 	 * a) vattr attrs
3255 	 * b) vfs attrs
3256 	 * c) attrs w/per-object scope requested (change, filehandle, etc)
3257 	 *    other than mounted_on_fileid (which we can take from dirent)
3258 	 */
3259 	objbits = attrs ? attrs & NFS4_VP_ATTR_MASK : 0;
3260 
3261 	if (va_mask || sargp->sbp || (objbits & ~FATTR4_MOUNTED_ON_FILEID_MASK))
3262 		*need_to_lookup = TRUE;
3263 	else
3264 		*need_to_lookup = FALSE;
3265 
3266 	if (sargp->sbp == NULL)
3267 		return (NFS4_OK);
3268 
3269 	/*
3270 	 * If filesystem attrs are requested, get them now from the
3271 	 * directory vp, as most entries will have same filesystem. The only
3272 	 * exception are mounted over entries but we handle
3273 	 * those as we go (XXX mounted over detection not yet implemented).
3274 	 */
3275 	sargp->vap->va_mask = 0;	/* to avoid VOP_GETATTR */
3276 	status = bitmap4_get_sysattrs(sargp);
3277 	sargp->vap->va_mask = va_mask;
3278 
3279 	if ((status != NFS4_OK) && sargp->rdattr_error_req) {
3280 		/*
3281 		 * Failed to get filesystem attributes.
3282 		 * Return a rdattr_error for each entry, but don't fail.
3283 		 * However, don't get any obj-dependent attrs.
3284 		 */
3285 		sargp->rdattr_error = status;	/* for rdattr_error */
3286 		*need_to_lookup = FALSE;
3287 		/*
3288 		 * At least get fileid for regular readdir output
3289 		 */
3290 		sargp->vap->va_mask &= AT_NODEID;
3291 		status = NFS4_OK;
3292 	}
3293 
3294 	return (status);
3295 }
3296 
3297 /*
3298  * readlink: args: CURRENT_FH.
3299  *	res: status. If success - CURRENT_FH unchanged, return linktext.
3300  */
3301 
3302 /* ARGSUSED */
3303 static void
3304 rfs4_op_readlink(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3305 	struct compound_state *cs)
3306 {
3307 	READLINK4res *resp = &resop->nfs_resop4_u.opreadlink;
3308 	int error;
3309 	vnode_t *vp;
3310 	struct iovec iov;
3311 	struct vattr va;
3312 	struct uio uio;
3313 	char *data;
3314 
3315 	/* CURRENT_FH: directory */
3316 	vp = cs->vp;
3317 	if (vp == NULL) {
3318 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
3319 		return;
3320 	}
3321 
3322 	if (cs->access == CS_ACCESS_DENIED) {
3323 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
3324 		return;
3325 	}
3326 
3327 	if (vp->v_type == VDIR) {
3328 		*cs->statusp = resp->status = NFS4ERR_ISDIR;
3329 		return;
3330 	}
3331 
3332 	if (vp->v_type != VLNK) {
3333 		*cs->statusp = resp->status = NFS4ERR_INVAL;
3334 		return;
3335 	}
3336 
3337 	va.va_mask = AT_MODE;
3338 	error = VOP_GETATTR(vp, &va, 0, cs->cr);
3339 	if (error) {
3340 		*cs->statusp = resp->status = puterrno4(error);
3341 		return;
3342 	}
3343 
3344 	if (MANDLOCK(vp, va.va_mode)) {
3345 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
3346 		return;
3347 	}
3348 
3349 	data = kmem_alloc(MAXPATHLEN + 1, KM_SLEEP);
3350 
3351 	iov.iov_base = data;
3352 	iov.iov_len = MAXPATHLEN;
3353 	uio.uio_iov = &iov;
3354 	uio.uio_iovcnt = 1;
3355 	uio.uio_segflg = UIO_SYSSPACE;
3356 	uio.uio_extflg = UIO_COPY_CACHED;
3357 	uio.uio_loffset = 0;
3358 	uio.uio_resid = MAXPATHLEN;
3359 
3360 	error = VOP_READLINK(vp, &uio, cs->cr);
3361 
3362 	if (error) {
3363 		kmem_free((caddr_t)data, (uint_t)MAXPATHLEN + 1);
3364 		*cs->statusp = resp->status = puterrno4(error);
3365 		return;
3366 	}
3367 
3368 	*(data + MAXPATHLEN - uio.uio_resid) = '\0';
3369 
3370 	/*
3371 	 * treat link name as data
3372 	 */
3373 	(void) str_to_utf8(data, &resp->link);
3374 
3375 	kmem_free((caddr_t)data, (uint_t)MAXPATHLEN + 1);
3376 	*cs->statusp = resp->status = NFS4_OK;
3377 }
3378 
3379 static void
3380 rfs4_op_readlink_free(nfs_resop4 *resop)
3381 {
3382 	READLINK4res *resp = &resop->nfs_resop4_u.opreadlink;
3383 	utf8string *symlink = &resp->link;
3384 
3385 	if (symlink->utf8string_val) {
3386 		UTF8STRING_FREE(*symlink)
3387 	}
3388 }
3389 
3390 /*
3391  * release_lockowner:
3392  *	Release any state associated with the supplied
3393  *	lockowner. Note if any lo_state is holding locks we will not
3394  *	rele that lo_state and thus the lockowner will not be destroyed.
3395  *	A client using lock after the lock owner stateid has been released
3396  *	will suffer the consequence of NFS4ERR_BAD_STATEID and would have
3397  *	to reissue the lock with new_lock_owner set to TRUE.
3398  *	args: lock_owner
3399  *	res:  status
3400  */
3401 /* ARGSUSED */
3402 static void
3403 rfs4_op_release_lockowner(nfs_argop4 *argop, nfs_resop4 *resop,
3404 	struct svc_req *req, struct compound_state *cs)
3405 {
3406 	RELEASE_LOCKOWNER4args *ap = &argop->nfs_argop4_u.oprelease_lockowner;
3407 	RELEASE_LOCKOWNER4res *resp = &resop->nfs_resop4_u.oprelease_lockowner;
3408 	rfs4_lockowner_t *lo;
3409 	rfs4_openowner_t *oop;
3410 	rfs4_state_t *sp;
3411 	rfs4_lo_state_t *lsp;
3412 	rfs4_client_t *cp;
3413 	bool_t create = FALSE;
3414 	locklist_t *llist;
3415 	sysid_t sysid;
3416 
3417 	/* Make sure there is a clientid around for this request */
3418 	cp = rfs4_findclient_by_id(ap->lock_owner.clientid, FALSE);
3419 
3420 	if (cp == NULL) {
3421 		*cs->statusp = resp->status =
3422 			rfs4_check_clientid(&ap->lock_owner.clientid, 0);
3423 		return;
3424 	}
3425 	rfs4_client_rele(cp);
3426 
3427 	lo = rfs4_findlockowner(&ap->lock_owner, &create);
3428 	if (lo == NULL) {
3429 		*cs->statusp = resp->status = NFS4_OK;
3430 		return;
3431 	}
3432 	ASSERT(lo->client != NULL);
3433 
3434 	/*
3435 	 * Check for EXPIRED client. If so will reap state with in a lease
3436 	 * period or on next set_clientid_confirm step
3437 	 */
3438 	if (rfs4_lease_expired(lo->client)) {
3439 		rfs4_lockowner_rele(lo);
3440 		*cs->statusp = resp->status = NFS4ERR_EXPIRED;
3441 		return;
3442 	}
3443 
3444 	/*
3445 	 * If no sysid has been assigned, then no locks exist; just return.
3446 	 */
3447 	rfs4_dbe_lock(lo->client->dbe);
3448 	if (lo->client->sysidt == LM_NOSYSID) {
3449 		rfs4_lockowner_rele(lo);
3450 		rfs4_dbe_unlock(lo->client->dbe);
3451 		return;
3452 	}
3453 
3454 	sysid = lo->client->sysidt;
3455 	rfs4_dbe_unlock(lo->client->dbe);
3456 
3457 	/*
3458 	 * Mark the lockowner invalid.
3459 	 */
3460 	rfs4_dbe_hide(lo->dbe);
3461 
3462 	/*
3463 	 * sysid-pid pair should now not be used since the lockowner is
3464 	 * invalid. If the client were to instantiate the lockowner again
3465 	 * it would be assigned a new pid. Thus we can get the list of
3466 	 * current locks.
3467 	 */
3468 
3469 	llist = flk_get_active_locks(sysid, lo->pid);
3470 	/* If we are still holding locks fail */
3471 	if (llist != NULL) {
3472 
3473 		*cs->statusp = resp->status = NFS4ERR_LOCKS_HELD;
3474 
3475 		flk_free_locklist(llist);
3476 		/*
3477 		 * We need to unhide the lockowner so the client can
3478 		 * try it again. The bad thing here is if the client
3479 		 * has a logic error that took it here in the first place
3480 		 * he probably has lost accounting of the locks that it
3481 		 * is holding. So we may have dangling state until the
3482 		 * open owner state is reaped via close. One scenario
3483 		 * that could possibly occur is that the client has
3484 		 * sent the unlock request(s) in separate threads
3485 		 * and has not waited for the replies before sending the
3486 		 * RELEASE_LOCKOWNER request. Presumably, it would expect
3487 		 * and deal appropriately with NFS4ERR_LOCKS_HELD, by
3488 		 * reissuing the request.
3489 		 */
3490 		rfs4_dbe_unhide(lo->dbe);
3491 		rfs4_lockowner_rele(lo);
3492 		return;
3493 	}
3494 
3495 	/*
3496 	 * For the corresponding client we need to check each open
3497 	 * owner for any opens that have lockowner state associated
3498 	 * with this lockowner.
3499 	 */
3500 
3501 	rfs4_dbe_lock(lo->client->dbe);
3502 	for (oop = lo->client->openownerlist.next->oop; oop != NULL;
3503 	    oop = oop->openownerlist.next->oop) {
3504 
3505 		rfs4_dbe_lock(oop->dbe);
3506 		for (sp = oop->ownerstateids.next->sp; sp != NULL;
3507 		    sp = sp->ownerstateids.next->sp) {
3508 
3509 			rfs4_dbe_lock(sp->dbe);
3510 			for (lsp = sp->lockownerlist.next->lsp;
3511 			    lsp != NULL; lsp = lsp->lockownerlist.next->lsp) {
3512 				if (lsp->locker == lo) {
3513 					rfs4_dbe_lock(lsp->dbe);
3514 					rfs4_dbe_invalidate(lsp->dbe);
3515 					rfs4_dbe_unlock(lsp->dbe);
3516 				}
3517 			}
3518 			rfs4_dbe_unlock(sp->dbe);
3519 		}
3520 		rfs4_dbe_unlock(oop->dbe);
3521 	}
3522 	rfs4_dbe_unlock(lo->client->dbe);
3523 
3524 	rfs4_lockowner_rele(lo);
3525 
3526 	*cs->statusp = resp->status = NFS4_OK;
3527 }
3528 
3529 /*
3530  * short utility function to lookup a file and recall the delegation
3531  */
3532 static rfs4_file_t *
3533 rfs4_lookup_and_findfile(vnode_t *dvp, char *nm, vnode_t **vpp,
3534 	int *lkup_error, cred_t *cr)
3535 {
3536 	vnode_t *vp;
3537 	rfs4_file_t *fp = NULL;
3538 	bool_t fcreate = FALSE;
3539 	int error;
3540 
3541 	if (vpp)
3542 		*vpp = NULL;
3543 
3544 	if ((error = VOP_LOOKUP(dvp, nm, &vp, NULL, 0, NULL, cr)) == 0) {
3545 		if (vp->v_type == VREG)
3546 			fp = rfs4_findfile(vp, NULL, &fcreate);
3547 		if (vpp)
3548 			*vpp = vp;
3549 		else
3550 			VN_RELE(vp);
3551 	}
3552 
3553 	if (lkup_error)
3554 		*lkup_error = error;
3555 
3556 	return (fp);
3557 }
3558 
3559 /*
3560  * remove: args: CURRENT_FH: directory; name.
3561  *	res: status. If success - CURRENT_FH unchanged, return change_info
3562  *		for directory.
3563  */
3564 /* ARGSUSED */
3565 static void
3566 rfs4_op_remove(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3567 	struct compound_state *cs)
3568 {
3569 	REMOVE4args *args = &argop->nfs_argop4_u.opremove;
3570 	REMOVE4res *resp = &resop->nfs_resop4_u.opremove;
3571 	int error;
3572 	vnode_t *dvp, *vp;
3573 	struct vattr bdva, idva, adva;
3574 	char *nm;
3575 	uint_t len;
3576 	rfs4_file_t *fp;
3577 	int in_crit = 0;
3578 
3579 	/* CURRENT_FH: directory */
3580 	dvp = cs->vp;
3581 	if (dvp == NULL) {
3582 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
3583 		return;
3584 	}
3585 
3586 	if (cs->access == CS_ACCESS_DENIED) {
3587 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
3588 		return;
3589 	}
3590 
3591 	/*
3592 	 * If there is an unshared filesystem mounted on this vnode,
3593 	 * Do not allow to remove anything in this directory.
3594 	 */
3595 	if (vn_ismntpt(dvp)) {
3596 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
3597 		return;
3598 	}
3599 
3600 	if (dvp->v_type != VDIR) {
3601 		*cs->statusp = resp->status = NFS4ERR_NOTDIR;
3602 		return;
3603 	}
3604 
3605 	if (!utf8_dir_verify(&args->target)) {
3606 		*cs->statusp = resp->status = NFS4ERR_INVAL;
3607 		return;
3608 	}
3609 
3610 	/*
3611 	 * Lookup the file so that we can check if it's a directory
3612 	 */
3613 	nm = utf8_to_fn(&args->target, &len, NULL);
3614 	if (nm == NULL) {
3615 		*cs->statusp = resp->status = NFS4ERR_INVAL;
3616 		return;
3617 	}
3618 
3619 	if (len > MAXNAMELEN) {
3620 		*cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
3621 		kmem_free(nm, len);
3622 		return;
3623 	}
3624 
3625 	if (rdonly4(cs->exi, cs->vp, req)) {
3626 		*cs->statusp = resp->status = NFS4ERR_ROFS;
3627 		kmem_free(nm, len);
3628 		return;
3629 	}
3630 
3631 	/*
3632 	 * Lookup the file to determine type and while we are see if
3633 	 * there is a file struct around and check for delegation.
3634 	 * We don't need to acquire va_seq before this lookup, if
3635 	 * it causes an update, cinfo.before will not match, which will
3636 	 * trigger a cache flush even if atomic is TRUE.
3637 	 */
3638 	if (fp = rfs4_lookup_and_findfile(dvp, nm, &vp, &error, cs->cr)) {
3639 		if (rfs4_check_delegated_byfp(FWRITE, fp, TRUE, TRUE, TRUE,
3640 						NULL)) {
3641 			VN_RELE(vp);
3642 			rfs4_file_rele(fp);
3643 			*cs->statusp = resp->status = NFS4ERR_DELAY;
3644 			kmem_free(nm, len);
3645 			return;
3646 		}
3647 	}
3648 
3649 	/* Didn't find anything to remove */
3650 	if (vp == NULL) {
3651 		*cs->statusp = resp->status = error;
3652 		kmem_free(nm, len);
3653 		return;
3654 	}
3655 
3656 	if (nbl_need_check(vp)) {
3657 		nbl_start_crit(vp, RW_READER);
3658 		in_crit = 1;
3659 		if (nbl_conflict(vp, NBL_REMOVE, 0, 0, 0)) {
3660 			*cs->statusp = resp->status = NFS4ERR_FILE_OPEN;
3661 			kmem_free(nm, len);
3662 			nbl_end_crit(vp);
3663 			VN_RELE(vp);
3664 			if (fp) {
3665 				rfs4_clear_dont_grant(fp);
3666 				rfs4_file_rele(fp);
3667 			}
3668 			return;
3669 		}
3670 	}
3671 
3672 	/* Get dir "before" change value */
3673 	bdva.va_mask = AT_CTIME|AT_SEQ;
3674 	error = VOP_GETATTR(dvp, &bdva, 0, cs->cr);
3675 	if (error) {
3676 		*cs->statusp = resp->status = puterrno4(error);
3677 		kmem_free(nm, len);
3678 		return;
3679 	}
3680 	NFS4_SET_FATTR4_CHANGE(resp->cinfo.before, bdva.va_ctime)
3681 
3682 	/* Actually do the REMOVE operation */
3683 	if (vp->v_type == VDIR) {
3684 		/*
3685 		 * Can't remove a directory that has a mounted-on filesystem.
3686 		 */
3687 		if (vn_ismntpt(vp)) {
3688 			error = EACCES;
3689 		} else {
3690 			/*
3691 			 * System V defines rmdir to return EEXIST,
3692 			 * not * ENOTEMPTY, if the directory is not
3693 			 * empty.  A System V NFS server needs to map
3694 			 * NFS4ERR_EXIST to NFS4ERR_NOTEMPTY to
3695 			 * transmit over the wire.
3696 			 */
3697 			if ((error = VOP_RMDIR(dvp, nm, rootdir, cs->cr))
3698 				== EEXIST)
3699 				error = ENOTEMPTY;
3700 		}
3701 	} else {
3702 		if ((error = VOP_REMOVE(dvp, nm, cs->cr)) == 0 &&
3703 			fp != NULL) {
3704 			struct vattr va;
3705 			vnode_t *tvp;
3706 
3707 			rfs4_dbe_lock(fp->dbe);
3708 			tvp = fp->vp;
3709 			if (tvp)
3710 				VN_HOLD(tvp);
3711 			rfs4_dbe_unlock(fp->dbe);
3712 
3713 			if (tvp) {
3714 				/*
3715 				 * This is va_seq safe because we are not
3716 				 * manipulating dvp.
3717 				 */
3718 				va.va_mask = AT_NLINK;
3719 				if (!VOP_GETATTR(tvp, &va, 0, cs->cr) &&
3720 					va.va_nlink == 0) {
3721 					/* Remove state on file remove */
3722 					if (in_crit) {
3723 						nbl_end_crit(vp);
3724 						in_crit = 0;
3725 					}
3726 					rfs4_close_all_state(fp);
3727 				}
3728 				VN_RELE(tvp);
3729 			}
3730 		}
3731 	}
3732 
3733 	if (in_crit)
3734 		nbl_end_crit(vp);
3735 	VN_RELE(vp);
3736 
3737 	if (fp) {
3738 		rfs4_clear_dont_grant(fp);
3739 		rfs4_file_rele(fp);
3740 	}
3741 	kmem_free(nm, len);
3742 
3743 	if (error) {
3744 		*cs->statusp = resp->status = puterrno4(error);
3745 		return;
3746 	}
3747 
3748 	/*
3749 	 * Get the initial "after" sequence number, if it fails, set to zero
3750 	 */
3751 	idva.va_mask = AT_SEQ;
3752 	if (VOP_GETATTR(dvp, &idva, 0, cs->cr))
3753 		idva.va_seq = 0;
3754 
3755 	/*
3756 	 * Force modified data and metadata out to stable storage.
3757 	 */
3758 	(void) VOP_FSYNC(dvp, 0, cs->cr);
3759 
3760 	/*
3761 	 * Get "after" change value, if it fails, simply return the
3762 	 * before value.
3763 	 */
3764 	adva.va_mask = AT_CTIME|AT_SEQ;
3765 	if (VOP_GETATTR(dvp, &adva, 0, cs->cr)) {
3766 		adva.va_ctime = bdva.va_ctime;
3767 		adva.va_seq = 0;
3768 	}
3769 
3770 	NFS4_SET_FATTR4_CHANGE(resp->cinfo.after, adva.va_ctime)
3771 
3772 	/*
3773 	 * The cinfo.atomic = TRUE only if we have
3774 	 * non-zero va_seq's, and it has incremented by exactly one
3775 	 * during the VOP_REMOVE/RMDIR and it didn't change during
3776 	 * the VOP_FSYNC.
3777 	 */
3778 	if (bdva.va_seq && idva.va_seq && adva.va_seq &&
3779 			idva.va_seq == (bdva.va_seq + 1) &&
3780 			idva.va_seq == adva.va_seq)
3781 		resp->cinfo.atomic = TRUE;
3782 	else
3783 		resp->cinfo.atomic = FALSE;
3784 
3785 	*cs->statusp = resp->status = NFS4_OK;
3786 }
3787 
3788 /*
3789  * rename: args: SAVED_FH: from directory, CURRENT_FH: target directory,
3790  *		oldname and newname.
3791  *	res: status. If success - CURRENT_FH unchanged, return change_info
3792  *		for both from and target directories.
3793  */
3794 /* ARGSUSED */
3795 static void
3796 rfs4_op_rename(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3797 	struct compound_state *cs)
3798 {
3799 	RENAME4args *args = &argop->nfs_argop4_u.oprename;
3800 	RENAME4res *resp = &resop->nfs_resop4_u.oprename;
3801 	int error;
3802 	vnode_t *odvp;
3803 	vnode_t *ndvp;
3804 	vnode_t *srcvp, *targvp;
3805 	struct vattr obdva, oidva, oadva;
3806 	struct vattr nbdva, nidva, nadva;
3807 	char *onm, *nnm;
3808 	uint_t olen, nlen;
3809 	rfs4_file_t *fp, *sfp;
3810 	int in_crit_src, in_crit_targ;
3811 	int fp_rele_grant_hold, sfp_rele_grant_hold;
3812 
3813 	fp = sfp = NULL;
3814 	srcvp = targvp = NULL;
3815 	in_crit_src = in_crit_targ = 0;
3816 	fp_rele_grant_hold = sfp_rele_grant_hold = 0;
3817 
3818 	/* CURRENT_FH: target directory */
3819 	ndvp = cs->vp;
3820 	if (ndvp == NULL) {
3821 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
3822 		return;
3823 	}
3824 
3825 	/* SAVED_FH: from directory */
3826 	odvp = cs->saved_vp;
3827 	if (odvp == NULL) {
3828 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
3829 		return;
3830 	}
3831 
3832 	if (cs->access == CS_ACCESS_DENIED) {
3833 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
3834 		return;
3835 	}
3836 
3837 	/*
3838 	 * If there is an unshared filesystem mounted on this vnode,
3839 	 * do not allow to rename objects in this directory.
3840 	 */
3841 	if (vn_ismntpt(odvp)) {
3842 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
3843 		return;
3844 	}
3845 
3846 	/*
3847 	 * If there is an unshared filesystem mounted on this vnode,
3848 	 * do not allow to rename to this directory.
3849 	 */
3850 	if (vn_ismntpt(ndvp)) {
3851 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
3852 		return;
3853 	}
3854 
3855 	if (odvp->v_type != VDIR || ndvp->v_type != VDIR) {
3856 		*cs->statusp = resp->status = NFS4ERR_NOTDIR;
3857 		return;
3858 	}
3859 
3860 	if (cs->saved_exi != cs->exi) {
3861 		*cs->statusp = resp->status = NFS4ERR_XDEV;
3862 		return;
3863 	}
3864 
3865 	if (!utf8_dir_verify(&args->oldname)) {
3866 		*cs->statusp = resp->status = NFS4ERR_INVAL;
3867 		return;
3868 	}
3869 
3870 	if (!utf8_dir_verify(&args->newname)) {
3871 		*cs->statusp = resp->status = NFS4ERR_INVAL;
3872 		return;
3873 	}
3874 
3875 	onm = utf8_to_fn(&args->oldname, &olen, NULL);
3876 	if (onm == NULL) {
3877 		*cs->statusp = resp->status = NFS4ERR_INVAL;
3878 		return;
3879 	}
3880 
3881 	nnm = utf8_to_fn(&args->newname, &nlen, NULL);
3882 	if (nnm == NULL) {
3883 		*cs->statusp = resp->status = NFS4ERR_INVAL;
3884 		kmem_free(onm, olen);
3885 		return;
3886 	}
3887 
3888 	if (olen > MAXNAMELEN || nlen > MAXNAMELEN) {
3889 		*cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
3890 		kmem_free(onm, olen);
3891 		kmem_free(nnm, nlen);
3892 		return;
3893 	}
3894 
3895 
3896 	if (rdonly4(cs->exi, cs->vp, req)) {
3897 		*cs->statusp = resp->status = NFS4ERR_ROFS;
3898 		kmem_free(onm, olen);
3899 		kmem_free(nnm, nlen);
3900 		return;
3901 	}
3902 
3903 	/*
3904 	 * Is the source a file and have a delegation?
3905 	 * We don't need to acquire va_seq before these lookups, if
3906 	 * it causes an update, cinfo.before will not match, which will
3907 	 * trigger a cache flush even if atomic is TRUE.
3908 	 */
3909 	if (sfp = rfs4_lookup_and_findfile(odvp, onm, &srcvp, &error, cs->cr)) {
3910 		if (rfs4_check_delegated_byfp(FWRITE, sfp, TRUE, TRUE, TRUE,
3911 						NULL)) {
3912 			*cs->statusp = resp->status = NFS4ERR_DELAY;
3913 			goto err_out;
3914 		}
3915 	}
3916 
3917 	if (srcvp == NULL) {
3918 		*cs->statusp = resp->status = puterrno4(error);
3919 		kmem_free(onm, olen);
3920 		kmem_free(nnm, nlen);
3921 		return;
3922 	}
3923 
3924 	sfp_rele_grant_hold = 1;
3925 
3926 	/* Does the destination exist and a file and have a delegation? */
3927 	if (fp = rfs4_lookup_and_findfile(ndvp, nnm, &targvp, NULL, cs->cr)) {
3928 		if (rfs4_check_delegated_byfp(FWRITE, fp, TRUE, TRUE, TRUE,
3929 						NULL)) {
3930 			*cs->statusp = resp->status = NFS4ERR_DELAY;
3931 			goto err_out;
3932 		}
3933 	}
3934 	fp_rele_grant_hold = 1;
3935 
3936 
3937 	/* Check for NBMAND lock on both source and target */
3938 	if (nbl_need_check(srcvp)) {
3939 		nbl_start_crit(srcvp, RW_READER);
3940 		in_crit_src = 1;
3941 		if (nbl_conflict(srcvp, NBL_RENAME, 0, 0, 0)) {
3942 			*cs->statusp = resp->status = NFS4ERR_FILE_OPEN;
3943 			goto err_out;
3944 		}
3945 	}
3946 
3947 	if (targvp && nbl_need_check(targvp)) {
3948 		nbl_start_crit(targvp, RW_READER);
3949 		in_crit_targ = 1;
3950 		if (nbl_conflict(targvp, NBL_REMOVE, 0, 0, 0)) {
3951 			*cs->statusp = resp->status = NFS4ERR_FILE_OPEN;
3952 			goto err_out;
3953 		}
3954 	}
3955 
3956 	/* Get source "before" change value */
3957 	obdva.va_mask = AT_CTIME|AT_SEQ;
3958 	error = VOP_GETATTR(odvp, &obdva, 0, cs->cr);
3959 	if (!error) {
3960 		nbdva.va_mask = AT_CTIME|AT_SEQ;
3961 		error = VOP_GETATTR(ndvp, &nbdva, 0, cs->cr);
3962 	}
3963 	if (error) {
3964 		*cs->statusp = resp->status = puterrno4(error);
3965 		goto err_out;
3966 	}
3967 
3968 	NFS4_SET_FATTR4_CHANGE(resp->source_cinfo.before, obdva.va_ctime)
3969 	NFS4_SET_FATTR4_CHANGE(resp->target_cinfo.before, nbdva.va_ctime)
3970 
3971 	if ((error = VOP_RENAME(odvp, onm, ndvp, nnm, cs->cr)) == 0 &&
3972 		fp != NULL) {
3973 		struct vattr va;
3974 		vnode_t *tvp;
3975 
3976 		rfs4_dbe_lock(fp->dbe);
3977 		tvp = fp->vp;
3978 		if (tvp)
3979 			VN_HOLD(tvp);
3980 		rfs4_dbe_unlock(fp->dbe);
3981 
3982 		if (tvp) {
3983 			va.va_mask = AT_NLINK;
3984 			if (!VOP_GETATTR(tvp, &va, 0, cs->cr) &&
3985 				va.va_nlink == 0) {
3986 				/* The file is gone and so should the state */
3987 				if (in_crit_targ) {
3988 					nbl_end_crit(targvp);
3989 					in_crit_targ = 0;
3990 				}
3991 				rfs4_close_all_state(fp);
3992 			}
3993 			VN_RELE(tvp);
3994 		}
3995 	}
3996 
3997 	if (in_crit_src)
3998 		nbl_end_crit(srcvp);
3999 	if (srcvp)
4000 		VN_RELE(srcvp);
4001 	if (in_crit_targ)
4002 		nbl_end_crit(targvp);
4003 	if (targvp)
4004 		VN_RELE(targvp);
4005 
4006 	if (sfp) {
4007 		rfs4_clear_dont_grant(sfp);
4008 		rfs4_file_rele(sfp);
4009 	}
4010 	if (fp) {
4011 		rfs4_clear_dont_grant(fp);
4012 		rfs4_file_rele(fp);
4013 	}
4014 
4015 	kmem_free(onm, olen);
4016 	kmem_free(nnm, nlen);
4017 
4018 	/*
4019 	 * Get the initial "after" sequence number, if it fails, set to zero
4020 	 */
4021 	oidva.va_mask = AT_SEQ;
4022 	if (VOP_GETATTR(odvp, &oidva, 0, cs->cr))
4023 		oidva.va_seq = 0;
4024 
4025 	nidva.va_mask = AT_SEQ;
4026 	if (VOP_GETATTR(ndvp, &nidva, 0, cs->cr))
4027 		nidva.va_seq = 0;
4028 
4029 	/*
4030 	 * Force modified data and metadata out to stable storage.
4031 	 */
4032 	(void) VOP_FSYNC(odvp, 0, cs->cr);
4033 	(void) VOP_FSYNC(ndvp, 0, cs->cr);
4034 
4035 	if (error) {
4036 		*cs->statusp = resp->status = puterrno4(error);
4037 		return;
4038 	}
4039 
4040 	/*
4041 	 * Get "after" change values, if it fails, simply return the
4042 	 * before value.
4043 	 */
4044 	oadva.va_mask = AT_CTIME|AT_SEQ;
4045 	if (VOP_GETATTR(odvp, &oadva, 0, cs->cr)) {
4046 		oadva.va_ctime = obdva.va_ctime;
4047 		oadva.va_seq = 0;
4048 	}
4049 
4050 	nadva.va_mask = AT_CTIME|AT_SEQ;
4051 	if (VOP_GETATTR(odvp, &nadva, 0, cs->cr)) {
4052 		nadva.va_ctime = nbdva.va_ctime;
4053 		nadva.va_seq = 0;
4054 	}
4055 
4056 	NFS4_SET_FATTR4_CHANGE(resp->source_cinfo.after, oadva.va_ctime)
4057 	NFS4_SET_FATTR4_CHANGE(resp->target_cinfo.after, nadva.va_ctime)
4058 
4059 	/*
4060 	 * The cinfo.atomic = TRUE only if we have
4061 	 * non-zero va_seq's, and it has incremented by exactly one
4062 	 * during the VOP_RENAME and it didn't change during the VOP_FSYNC.
4063 	 */
4064 	if (obdva.va_seq && oidva.va_seq && oadva.va_seq &&
4065 			oidva.va_seq == (obdva.va_seq + 1) &&
4066 			oidva.va_seq == oadva.va_seq)
4067 		resp->source_cinfo.atomic = TRUE;
4068 	else
4069 		resp->source_cinfo.atomic = FALSE;
4070 
4071 	if (nbdva.va_seq && nidva.va_seq && nadva.va_seq &&
4072 			nidva.va_seq == (nbdva.va_seq + 1) &&
4073 			nidva.va_seq == nadva.va_seq)
4074 		resp->target_cinfo.atomic = TRUE;
4075 	else
4076 		resp->target_cinfo.atomic = FALSE;
4077 
4078 #ifdef	VOLATILE_FH_TEST
4079 	{
4080 	extern void add_volrnm_fh(struct exportinfo *, vnode_t *);
4081 
4082 	/*
4083 	 * Add the renamed file handle to the volatile rename list
4084 	 */
4085 	if (cs->exi->exi_export.ex_flags & EX_VOLRNM) {
4086 		/* file handles may expire on rename */
4087 		vnode_t *vp;
4088 
4089 		nnm = utf8_to_fn(&args->newname, &nlen, NULL);
4090 		/*
4091 		 * Already know that nnm will be a valid string
4092 		 */
4093 		error = VOP_LOOKUP(ndvp, nnm, &vp, NULL, 0, NULL, cs->cr);
4094 		kmem_free(nnm, nlen);
4095 		if (!error) {
4096 			add_volrnm_fh(cs->exi, vp);
4097 			VN_RELE(vp);
4098 		}
4099 	}
4100 	}
4101 #endif	/* VOLATILE_FH_TEST */
4102 
4103 	*cs->statusp = resp->status = NFS4_OK;
4104 	return;
4105 
4106 err_out:
4107 	kmem_free(onm, olen);
4108 	kmem_free(nnm, nlen);
4109 
4110 	if (in_crit_src) nbl_end_crit(srcvp);
4111 	if (in_crit_targ) nbl_end_crit(targvp);
4112 	if (targvp) VN_RELE(targvp);
4113 	if (srcvp) VN_RELE(srcvp);
4114 	if (sfp) {
4115 		if (sfp_rele_grant_hold) rfs4_clear_dont_grant(sfp);
4116 		rfs4_file_rele(sfp);
4117 	}
4118 	if (fp) {
4119 		if (fp_rele_grant_hold) rfs4_clear_dont_grant(fp);
4120 		rfs4_file_rele(fp);
4121 	}
4122 }
4123 
4124 /* ARGSUSED */
4125 static void
4126 rfs4_op_renew(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
4127 	struct compound_state *cs)
4128 {
4129 	RENEW4args *args = &argop->nfs_argop4_u.oprenew;
4130 	RENEW4res *resp = &resop->nfs_resop4_u.oprenew;
4131 	rfs4_client_t *cp;
4132 
4133 	if ((cp = rfs4_findclient_by_id(args->clientid, FALSE)) == NULL) {
4134 		*cs->statusp = resp->status =
4135 			rfs4_check_clientid(&args->clientid, 0);
4136 		return;
4137 	}
4138 
4139 	if (rfs4_lease_expired(cp)) {
4140 		rfs4_client_rele(cp);
4141 		*cs->statusp = resp->status = NFS4ERR_EXPIRED;
4142 		return;
4143 	}
4144 
4145 	rfs4_update_lease(cp);
4146 
4147 	mutex_enter(cp->cbinfo.cb_lock);
4148 	if (cp->cbinfo.cb_notified_of_cb_path_down == FALSE) {
4149 		cp->cbinfo.cb_notified_of_cb_path_down = TRUE;
4150 		*cs->statusp = resp->status = NFS4ERR_CB_PATH_DOWN;
4151 	} else {
4152 		*cs->statusp = resp->status = NFS4_OK;
4153 	}
4154 	mutex_exit(cp->cbinfo.cb_lock);
4155 
4156 	rfs4_client_rele(cp);
4157 
4158 }
4159 
4160 /* ARGSUSED */
4161 static void
4162 rfs4_op_restorefh(nfs_argop4 *args, nfs_resop4 *resop, struct svc_req *req,
4163 	struct compound_state *cs)
4164 {
4165 	RESTOREFH4res *resp = &resop->nfs_resop4_u.oprestorefh;
4166 
4167 	/* No need to check cs->access - we are not accessing any object */
4168 	if ((cs->saved_vp == NULL) || (cs->saved_fh.nfs_fh4_val == NULL)) {
4169 		*cs->statusp = resp->status = NFS4ERR_RESTOREFH;
4170 		return;
4171 	}
4172 	if (cs->vp != NULL) {
4173 		VN_RELE(cs->vp);
4174 	}
4175 	cs->vp = cs->saved_vp;
4176 	cs->saved_vp = NULL;
4177 	cs->exi = cs->saved_exi;
4178 	nfs_fh4_copy(&cs->saved_fh, &cs->fh);
4179 	*cs->statusp = resp->status = NFS4_OK;
4180 	cs->deleg = FALSE;
4181 }
4182 
4183 /* ARGSUSED */
4184 static void
4185 rfs4_op_savefh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
4186 	struct compound_state *cs)
4187 {
4188 	SAVEFH4res *resp = &resop->nfs_resop4_u.opsavefh;
4189 
4190 	/* No need to check cs->access - we are not accessing any object */
4191 	if (cs->vp == NULL) {
4192 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
4193 		return;
4194 	}
4195 	if (cs->saved_vp != NULL) {
4196 		VN_RELE(cs->saved_vp);
4197 	}
4198 	cs->saved_vp = cs->vp;
4199 	VN_HOLD(cs->saved_vp);
4200 	cs->saved_exi = cs->exi;
4201 	/*
4202 	 * since SAVEFH is fairly rare, don't alloc space for its fh
4203 	 * unless necessary.
4204 	 */
4205 	if (cs->saved_fh.nfs_fh4_val == NULL) {
4206 		cs->saved_fh.nfs_fh4_val = kmem_alloc(NFS4_FHSIZE, KM_SLEEP);
4207 	}
4208 	nfs_fh4_copy(&cs->fh, &cs->saved_fh);
4209 	*cs->statusp = resp->status = NFS4_OK;
4210 }
4211 
4212 /*
4213  * rfs4_verify_attr is called when nfsv4 Setattr failed, but we wish to
4214  * return the bitmap of attrs that were set successfully. It is also
4215  * called by Verify/Nverify to test the vattr/vfsstat attrs. It should
4216  * always be called only after rfs4_do_set_attrs().
4217  *
4218  * Verify that the attributes are same as the expected ones. sargp->vap
4219  * and sargp->sbp contain the input attributes as translated from fattr4.
4220  *
4221  * This function verifies only the attrs that correspond to a vattr or
4222  * vfsstat struct. That is because of the extra step needed to get the
4223  * corresponding system structs. Other attributes have already been set or
4224  * verified by do_rfs4_set_attrs.
4225  *
4226  * Return 0 if all attrs match, -1 if some don't, error if error processing.
4227  */
4228 static int
4229 rfs4_verify_attr(struct nfs4_svgetit_arg *sargp,
4230 	bitmap4 *resp, struct nfs4_ntov_table *ntovp)
4231 {
4232 	int error, ret_error = 0;
4233 	int i, k;
4234 	uint_t sva_mask = sargp->vap->va_mask;
4235 	uint_t vbit;
4236 	union nfs4_attr_u *na;
4237 	uint8_t *amap;
4238 	bool_t getsb = ntovp->vfsstat;
4239 
4240 	if (sva_mask != 0) {
4241 		/*
4242 		 * Okay to overwrite sargp->vap because we verify based
4243 		 * on the incoming values.
4244 		 */
4245 		ret_error = VOP_GETATTR(sargp->cs->vp, sargp->vap, 0,
4246 				sargp->cs->cr);
4247 		if (ret_error) {
4248 			if (resp == NULL)
4249 				return (ret_error);
4250 			/*
4251 			 * Must return bitmap of successful attrs
4252 			 */
4253 			sva_mask = 0;	/* to prevent checking vap later */
4254 		} else {
4255 			/*
4256 			 * Some file systems clobber va_mask. it is probably
4257 			 * wrong of them to do so, nonethless we practice
4258 			 * defensive coding.
4259 			 * See bug id 4276830.
4260 			 */
4261 			sargp->vap->va_mask = sva_mask;
4262 		}
4263 	}
4264 
4265 	if (getsb) {
4266 		/*
4267 		 * Now get the superblock and loop on the bitmap, as there is
4268 		 * no simple way of translating from superblock to bitmap4.
4269 		 */
4270 		ret_error = VFS_STATVFS(sargp->cs->vp->v_vfsp, sargp->sbp);
4271 		if (ret_error) {
4272 			if (resp == NULL)
4273 				goto errout;
4274 			getsb = FALSE;
4275 		}
4276 	}
4277 
4278 	/*
4279 	 * Now loop and verify each attribute which getattr returned
4280 	 * whether it's the same as the input.
4281 	 */
4282 	if (resp == NULL && !getsb && (sva_mask == 0))
4283 		goto errout;
4284 
4285 	na = ntovp->na;
4286 	amap = ntovp->amap;
4287 	k = 0;
4288 	for (i = 0; i < ntovp->attrcnt; i++, na++, amap++) {
4289 		k = *amap;
4290 		ASSERT(nfs4_ntov_map[k].nval == k);
4291 		vbit = nfs4_ntov_map[k].vbit;
4292 
4293 		/*
4294 		 * If vattr attribute but VOP_GETATTR failed, or it's
4295 		 * superblock attribute but VFS_STATVFS failed, skip
4296 		 */
4297 		if (vbit) {
4298 			if ((vbit & sva_mask) == 0)
4299 				continue;
4300 		} else if (!(getsb && nfs4_ntov_map[k].vfsstat)) {
4301 			continue;
4302 		}
4303 		error = (*nfs4_ntov_map[k].sv_getit)(
4304 				NFS4ATTR_VERIT, sargp, na);
4305 		if (resp != NULL) {
4306 			if (error)
4307 				ret_error = -1;	/* not all match */
4308 			else	/* update response bitmap */
4309 				*resp |= nfs4_ntov_map[k].fbit;
4310 			continue;
4311 		}
4312 		if (error) {
4313 			ret_error = -1;	/* not all match */
4314 			break;
4315 		}
4316 	}
4317 errout:
4318 	return (ret_error);
4319 }
4320 
4321 /*
4322  * Decode the attribute to be set/verified. If the attr requires a sys op
4323  * (VOP_GETATTR, VFS_VFSSTAT), and the request is to verify, then don't
4324  * call the sv_getit function for it, because the sys op hasn't yet been done.
4325  * Return 0 for success, error code if failed.
4326  *
4327  * Note: the decoded arg is not freed here but in nfs4_ntov_table_free.
4328  */
4329 static int
4330 decode_fattr4_attr(nfs4_attr_cmd_t cmd, struct nfs4_svgetit_arg *sargp,
4331 	int k, XDR *xdrp, bitmap4 *resp_bval, union nfs4_attr_u *nap)
4332 {
4333 	int error = 0;
4334 	bool_t set_later;
4335 
4336 	sargp->vap->va_mask |= nfs4_ntov_map[k].vbit;
4337 
4338 	if ((*nfs4_ntov_map[k].xfunc)(xdrp, nap)) {
4339 		set_later = nfs4_ntov_map[k].vbit || nfs4_ntov_map[k].vfsstat;
4340 		/*
4341 		 * don't verify yet if a vattr or sb dependent attr,
4342 		 * because we don't have their sys values yet.
4343 		 * Will be done later.
4344 		 */
4345 		if (! (set_later && (cmd == NFS4ATTR_VERIT))) {
4346 			/*
4347 			 * ACLs are a special case, since setting the MODE
4348 			 * conflicts with setting the ACL.  We delay setting
4349 			 * the ACL until all other attributes have been set.
4350 			 * The ACL gets set in do_rfs4_op_setattr().
4351 			 */
4352 			if (nfs4_ntov_map[k].fbit != FATTR4_ACL_MASK) {
4353 				error = (*nfs4_ntov_map[k].sv_getit)(cmd,
4354 				    sargp, nap);
4355 				if (error) {
4356 					xdr_free(nfs4_ntov_map[k].xfunc,
4357 					    (caddr_t)nap);
4358 				}
4359 			}
4360 		}
4361 	} else {
4362 #ifdef  DEBUG
4363 		cmn_err(CE_NOTE, "decode_fattr4_attr: error "
4364 			"decoding attribute %d\n", k);
4365 #endif
4366 		error = EINVAL;
4367 	}
4368 	if (!error && resp_bval && !set_later) {
4369 		*resp_bval |= nfs4_ntov_map[k].fbit;
4370 	}
4371 
4372 	return (error);
4373 }
4374 
4375 /*
4376  * Set vattr based on incoming fattr4 attrs - used by setattr.
4377  * Set response mask. Ignore any values that are not writable vattr attrs.
4378  */
4379 static nfsstat4
4380 do_rfs4_set_attrs(bitmap4 *resp, fattr4 *fattrp, struct compound_state *cs,
4381 		struct nfs4_svgetit_arg *sargp, struct nfs4_ntov_table *ntovp,
4382 		nfs4_attr_cmd_t cmd)
4383 {
4384 	int error = 0;
4385 	int i;
4386 	char *attrs = fattrp->attrlist4;
4387 	uint32_t attrslen = fattrp->attrlist4_len;
4388 	XDR xdr;
4389 	nfsstat4 status = NFS4_OK;
4390 	vnode_t *vp = cs->vp;
4391 	union nfs4_attr_u *na;
4392 	uint8_t *amap;
4393 
4394 #ifndef lint
4395 	/*
4396 	 * Make sure that maximum attribute number can be expressed as an
4397 	 * 8 bit quantity.
4398 	 */
4399 	ASSERT(NFS4_MAXNUM_ATTRS <= (UINT8_MAX + 1));
4400 #endif
4401 
4402 	if (vp == NULL) {
4403 		if (resp)
4404 			*resp = 0;
4405 		return (NFS4ERR_NOFILEHANDLE);
4406 	}
4407 	if (cs->access == CS_ACCESS_DENIED) {
4408 		if (resp)
4409 			*resp = 0;
4410 		return (NFS4ERR_ACCESS);
4411 	}
4412 
4413 	sargp->op = cmd;
4414 	sargp->cs = cs;
4415 	sargp->flag = 0;	/* may be set later */
4416 	sargp->vap->va_mask = 0;
4417 	sargp->rdattr_error = NFS4_OK;
4418 	sargp->rdattr_error_req = FALSE;
4419 	/* sargp->sbp is set by the caller */
4420 
4421 	xdrmem_create(&xdr, attrs, attrslen, XDR_DECODE);
4422 
4423 	na = ntovp->na;
4424 	amap = ntovp->amap;
4425 
4426 	/*
4427 	 * The following loop iterates on the nfs4_ntov_map checking
4428 	 * if the fbit is set in the requested bitmap.
4429 	 * If set then we process the arguments using the
4430 	 * rfs4_fattr4 conversion functions to populate the setattr
4431 	 * vattr and va_mask. Any settable attrs that are not using vattr
4432 	 * will be set in this loop.
4433 	 */
4434 	for (i = 0; i < nfs4_ntov_map_size; i++) {
4435 		if (!(fattrp->attrmask & nfs4_ntov_map[i].fbit)) {
4436 			continue;
4437 		}
4438 		/*
4439 		 * If setattr, must be a writable attr.
4440 		 * If verify/nverify, must be a readable attr.
4441 		 */
4442 		if ((error = (*nfs4_ntov_map[i].sv_getit)(
4443 				    NFS4ATTR_SUPPORTED, sargp, NULL)) != 0) {
4444 			/*
4445 			 * Client tries to set/verify an
4446 			 * unsupported attribute, tries to set
4447 			 * a read only attr or verify a write
4448 			 * only one - error!
4449 			 */
4450 			break;
4451 		}
4452 		/*
4453 		 * Decode the attribute to set/verify
4454 		 */
4455 		error = decode_fattr4_attr(cmd, sargp, nfs4_ntov_map[i].nval,
4456 					&xdr, resp ? resp : NULL, na);
4457 		if (error)
4458 			break;
4459 		*amap++ = (uint8_t)nfs4_ntov_map[i].nval;
4460 		na++;
4461 		(ntovp->attrcnt)++;
4462 		if (nfs4_ntov_map[i].vfsstat)
4463 			ntovp->vfsstat = TRUE;
4464 	}
4465 
4466 	if (error != 0)
4467 		status = (error == ENOTSUP ?	NFS4ERR_ATTRNOTSUPP :
4468 						puterrno4(error));
4469 	/* xdrmem_destroy(&xdrs); */	/* NO-OP */
4470 	return (status);
4471 }
4472 
4473 static nfsstat4
4474 do_rfs4_op_setattr(bitmap4 *resp, fattr4 *fattrp, struct compound_state *cs,
4475 		stateid4 *stateid)
4476 {
4477 	int error = 0;
4478 	struct nfs4_svgetit_arg sarg;
4479 	bool_t trunc;
4480 
4481 	nfsstat4 status = NFS4_OK;
4482 	cred_t *cr = cs->cr;
4483 	vnode_t *vp = cs->vp;
4484 	struct nfs4_ntov_table ntov;
4485 	struct statvfs64 sb;
4486 	struct vattr bva;
4487 	struct flock64 bf;
4488 	int in_crit = 0;
4489 	uint_t saved_mask = 0;
4490 	caller_context_t ct;
4491 
4492 	*resp = 0;
4493 	sarg.sbp = &sb;
4494 	nfs4_ntov_table_init(&ntov);
4495 	status = do_rfs4_set_attrs(resp, fattrp, cs, &sarg, &ntov,
4496 			NFS4ATTR_SETIT);
4497 	if (status != NFS4_OK) {
4498 		/*
4499 		 * failed set attrs
4500 		 */
4501 		goto done;
4502 	}
4503 	if ((sarg.vap->va_mask == 0) &&
4504 	    (! (fattrp->attrmask & FATTR4_ACL_MASK))) {
4505 		/*
4506 		 * no further work to be done
4507 		 */
4508 		goto done;
4509 	}
4510 
4511 	/*
4512 	 * If we got a request to set the ACL and the MODE, only
4513 	 * allow changing VSUID, VSGID, and VSVTX.  Attempting
4514 	 * to change any other bits, along with setting an ACL,
4515 	 * gives NFS4ERR_INVAL.
4516 	 */
4517 	if ((fattrp->attrmask & FATTR4_ACL_MASK) &&
4518 	    (fattrp->attrmask & FATTR4_MODE_MASK)) {
4519 		vattr_t va;
4520 
4521 		va.va_mask = AT_MODE;
4522 		error = VOP_GETATTR(vp, &va, 0, cs->cr);
4523 		if (error) {
4524 			status = puterrno4(error);
4525 			goto done;
4526 		}
4527 		if ((sarg.vap->va_mode ^ va.va_mode) &
4528 		    ~(VSUID | VSGID | VSVTX)) {
4529 			status = NFS4ERR_INVAL;
4530 			goto done;
4531 		}
4532 	}
4533 
4534 
4535 	/* Check stateid only if size has been set */
4536 	if (sarg.vap->va_mask & AT_SIZE) {
4537 		trunc = (sarg.vap->va_size == 0);
4538 		status = rfs4_check_stateid(FWRITE, cs->vp, stateid,
4539 			trunc, &cs->deleg, sarg.vap->va_mask & AT_SIZE);
4540 		if (status != NFS4_OK)
4541 			goto done;
4542 	}
4543 
4544 	ct.cc_sysid = 0;
4545 	ct.cc_pid = 0;
4546 	ct.cc_caller_id = nfs4_srv_caller_id;
4547 
4548 	/* XXX start of possible race with delegations */
4549 
4550 	/*
4551 	 * We need to specially handle size changes because it is
4552 	 * possible for the client to create a file with read-only
4553 	 * modes, but with the file opened for writing. If the client
4554 	 * then tries to set the file size, e.g. ftruncate(3C),
4555 	 * fcntl(F_FREESP), the normal access checking done in
4556 	 * VOP_SETATTR would prevent the client from doing it even though
4557 	 * it should be allowed to do so.  To get around this, we do the
4558 	 * access checking for ourselves and use VOP_SPACE which doesn't
4559 	 * do the access checking.
4560 	 * Also the client should not be allowed to change the file
4561 	 * size if there is a conflicting non-blocking mandatory lock in
4562 	 * the region of the change.
4563 	 */
4564 	if (vp->v_type == VREG && (sarg.vap->va_mask & AT_SIZE)) {
4565 		u_offset_t offset;
4566 		ssize_t length;
4567 
4568 		/*
4569 		 * Check any possible conflict due to NBMAND locks.
4570 		 * Get into critical region before VOP_GETATTR, so the
4571 		 * size attribute is valid when checking conflicts.
4572 		 */
4573 		if (nbl_need_check(vp)) {
4574 			nbl_start_crit(vp, RW_READER);
4575 			in_crit = 1;
4576 		}
4577 
4578 		bva.va_mask = AT_UID|AT_SIZE;
4579 		if (error = VOP_GETATTR(vp, &bva, 0, cr)) {
4580 			status = puterrno4(error);
4581 			goto done;
4582 		}
4583 
4584 		if (in_crit) {
4585 			if (sarg.vap->va_size < bva.va_size) {
4586 				offset = sarg.vap->va_size;
4587 				length = bva.va_size - sarg.vap->va_size;
4588 			} else {
4589 				offset = bva.va_size;
4590 				length = sarg.vap->va_size - bva.va_size;
4591 			}
4592 			if (nbl_conflict(vp, NBL_WRITE, offset, length, 0)) {
4593 				status = NFS4ERR_LOCKED;
4594 				goto done;
4595 			}
4596 		}
4597 
4598 		if (crgetuid(cr) == bva.va_uid) {
4599 			saved_mask = sarg.vap->va_mask;
4600 			sarg.vap->va_mask &= ~AT_SIZE;
4601 			bf.l_type = F_WRLCK;
4602 			bf.l_whence = 0;
4603 			bf.l_start = (off64_t)sarg.vap->va_size;
4604 			bf.l_len = 0;
4605 			bf.l_sysid = 0;
4606 			bf.l_pid = 0;
4607 			error = VOP_SPACE(vp, F_FREESP, &bf, FWRITE,
4608 					(offset_t)sarg.vap->va_size, cr, &ct);
4609 		}
4610 	}
4611 
4612 	if (!error && sarg.vap->va_mask != 0)
4613 		error = VOP_SETATTR(vp, sarg.vap, sarg.flag, cr, &ct);
4614 
4615 	/* restore AT_SIZE */
4616 	if (saved_mask & AT_SIZE)
4617 		sarg.vap->va_mask |= AT_SIZE;
4618 
4619 	/*
4620 	 * If an ACL was being set, it has been delayed until now,
4621 	 * in order to set the mode (via the VOP_SETATTR() above) first.
4622 	 */
4623 	if ((! error) && (fattrp->attrmask & FATTR4_ACL_MASK)) {
4624 		int i;
4625 
4626 		for (i = 0; i < NFS4_MAXNUM_ATTRS; i++)
4627 			if (ntov.amap[i] == FATTR4_ACL)
4628 				break;
4629 		if (i < NFS4_MAXNUM_ATTRS) {
4630 			error = (*nfs4_ntov_map[FATTR4_ACL].sv_getit)(
4631 			    NFS4ATTR_SETIT, &sarg, &ntov.na[i]);
4632 			if (error == 0) {
4633 				*resp |= FATTR4_ACL_MASK;
4634 			} else if (error == ENOTSUP) {
4635 				(void) rfs4_verify_attr(&sarg, resp, &ntov);
4636 				status = NFS4ERR_ATTRNOTSUPP;
4637 				goto done;
4638 			}
4639 		} else {
4640 			NFS4_DEBUG(rfs4_debug,
4641 			    (CE_NOTE, "do_rfs4_op_setattr: "
4642 			    "unable to find ACL in fattr4"));
4643 			error = EINVAL;
4644 		}
4645 	}
4646 
4647 	if (error) {
4648 		status = puterrno4(error);
4649 
4650 		/*
4651 		 * Set the response bitmap when setattr failed.
4652 		 * If VOP_SETATTR partially succeeded, test by doing a
4653 		 * VOP_GETATTR on the object and comparing the data
4654 		 * to the setattr arguments.
4655 		 */
4656 		(void) rfs4_verify_attr(&sarg, resp, &ntov);
4657 	} else {
4658 		/*
4659 		 * Force modified metadata out to stable storage.
4660 		 */
4661 		(void) VOP_FSYNC(vp, FNODSYNC, cr);
4662 		/*
4663 		 * Set response bitmap
4664 		 */
4665 		nfs4_vmask_to_nmask(sarg.vap->va_mask, resp);
4666 	}
4667 
4668 /* Return early and already have a NFSv4 error */
4669 done:
4670 	if (in_crit)
4671 		nbl_end_crit(vp);
4672 
4673 	nfs4_ntov_table_free(&ntov, &sarg);
4674 
4675 	return (status);
4676 }
4677 
4678 /* ARGSUSED */
4679 static void
4680 rfs4_op_setattr(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
4681 	struct compound_state *cs)
4682 {
4683 	SETATTR4args *args = &argop->nfs_argop4_u.opsetattr;
4684 	SETATTR4res *resp = &resop->nfs_resop4_u.opsetattr;
4685 
4686 	if (cs->vp == NULL) {
4687 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
4688 		return;
4689 	}
4690 
4691 	/*
4692 	 * If there is an unshared filesystem mounted on this vnode,
4693 	 * do not allow to setattr on this vnode.
4694 	 */
4695 	if (vn_ismntpt(cs->vp)) {
4696 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
4697 		return;
4698 	}
4699 
4700 	resp->attrsset = 0;
4701 
4702 	if (rdonly4(cs->exi, cs->vp, req)) {
4703 		*cs->statusp = resp->status = NFS4ERR_ROFS;
4704 		return;
4705 	}
4706 
4707 	*cs->statusp = resp->status =
4708 		do_rfs4_op_setattr(&resp->attrsset, &args->obj_attributes, cs,
4709 			&args->stateid);
4710 }
4711 
4712 /* ARGSUSED */
4713 static void
4714 rfs4_op_verify(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
4715 	struct compound_state *cs)
4716 {
4717 	/*
4718 	 * verify and nverify are exactly the same, except that nverify
4719 	 * succeeds when some argument changed, and verify succeeds when
4720 	 * when none changed.
4721 	 */
4722 
4723 	VERIFY4args  *args = &argop->nfs_argop4_u.opverify;
4724 	VERIFY4res *resp = &resop->nfs_resop4_u.opverify;
4725 
4726 	int error;
4727 	struct nfs4_svgetit_arg sarg;
4728 	struct statvfs64 sb;
4729 	struct nfs4_ntov_table ntov;
4730 
4731 	if (cs->vp == NULL) {
4732 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
4733 		return;
4734 	}
4735 
4736 	sarg.sbp = &sb;
4737 	nfs4_ntov_table_init(&ntov);
4738 	resp->status = do_rfs4_set_attrs(NULL, &args->obj_attributes, cs,
4739 				&sarg, &ntov, NFS4ATTR_VERIT);
4740 	if (resp->status != NFS4_OK) {
4741 		/*
4742 		 * do_rfs4_set_attrs will try to verify systemwide attrs,
4743 		 * so could return -1 for "no match".
4744 		 */
4745 		if (resp->status == -1)
4746 			resp->status = NFS4ERR_NOT_SAME;
4747 		goto done;
4748 	}
4749 	error = rfs4_verify_attr(&sarg, NULL, &ntov);
4750 	switch (error) {
4751 	case 0:
4752 		resp->status = NFS4_OK;
4753 		break;
4754 	case -1:
4755 		resp->status = NFS4ERR_NOT_SAME;
4756 		break;
4757 	default:
4758 		resp->status = puterrno4(error);
4759 		break;
4760 	}
4761 done:
4762 	*cs->statusp = resp->status;
4763 	nfs4_ntov_table_free(&ntov, &sarg);
4764 }
4765 
4766 /* ARGSUSED */
4767 static void
4768 rfs4_op_nverify(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
4769 	struct compound_state *cs)
4770 {
4771 	/*
4772 	 * verify and nverify are exactly the same, except that nverify
4773 	 * succeeds when some argument changed, and verify succeeds when
4774 	 * when none changed.
4775 	 */
4776 
4777 	NVERIFY4args  *args = &argop->nfs_argop4_u.opnverify;
4778 	NVERIFY4res *resp = &resop->nfs_resop4_u.opnverify;
4779 
4780 	int error;
4781 	struct nfs4_svgetit_arg sarg;
4782 	struct statvfs64 sb;
4783 	struct nfs4_ntov_table ntov;
4784 
4785 	if (cs->vp == NULL) {
4786 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
4787 		return;
4788 	}
4789 	sarg.sbp = &sb;
4790 	nfs4_ntov_table_init(&ntov);
4791 	resp->status = do_rfs4_set_attrs(NULL, &args->obj_attributes, cs,
4792 				&sarg, &ntov, NFS4ATTR_VERIT);
4793 	if (resp->status != NFS4_OK) {
4794 		/*
4795 		 * do_rfs4_set_attrs will try to verify systemwide attrs,
4796 		 * so could return -1 for "no match".
4797 		 */
4798 		if (resp->status == -1)
4799 			resp->status = NFS4_OK;
4800 		goto done;
4801 	}
4802 	error = rfs4_verify_attr(&sarg, NULL, &ntov);
4803 	switch (error) {
4804 	case 0:
4805 		resp->status = NFS4ERR_SAME;
4806 		break;
4807 	case -1:
4808 		resp->status = NFS4_OK;
4809 		break;
4810 	default:
4811 		resp->status = puterrno4(error);
4812 		break;
4813 	}
4814 done:
4815 	*cs->statusp = resp->status;
4816 	nfs4_ntov_table_free(&ntov, &sarg);
4817 }
4818 
4819 /*
4820  * XXX - This should live in an NFS header file.
4821  */
4822 #define	MAX_IOVECS	12
4823 
4824 /* ARGSUSED */
4825 static void
4826 rfs4_op_write(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
4827 	struct compound_state *cs)
4828 {
4829 	WRITE4args  *args = &argop->nfs_argop4_u.opwrite;
4830 	WRITE4res *resp = &resop->nfs_resop4_u.opwrite;
4831 	int error;
4832 	vnode_t *vp;
4833 	struct vattr bva;
4834 	u_offset_t rlimit;
4835 	struct uio uio;
4836 	struct iovec iov[MAX_IOVECS];
4837 	struct iovec *iovp;
4838 	int iovcnt;
4839 	int ioflag;
4840 	cred_t *savecred, *cr;
4841 	bool_t *deleg = &cs->deleg;
4842 	nfsstat4 stat;
4843 	int in_crit = 0;
4844 
4845 	vp = cs->vp;
4846 	if (vp == NULL) {
4847 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
4848 		return;
4849 	}
4850 	if (cs->access == CS_ACCESS_DENIED) {
4851 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
4852 		return;
4853 	}
4854 
4855 	cr = cs->cr;
4856 
4857 	/*
4858 	 * We have to enter the critical region before calling VOP_RWLOCK
4859 	 * to avoid a deadlock with ufs.
4860 	 */
4861 	if (nbl_need_check(vp)) {
4862 		nbl_start_crit(vp, RW_READER);
4863 		in_crit = 1;
4864 		if (nbl_conflict(vp, NBL_WRITE,
4865 				args->offset, args->data_len, 0)) {
4866 			*cs->statusp = resp->status = NFS4ERR_LOCKED;
4867 			goto out;
4868 		}
4869 	}
4870 
4871 	if ((stat = rfs4_check_stateid(FWRITE, vp, &args->stateid, FALSE,
4872 					deleg, TRUE)) != NFS4_OK) {
4873 		*cs->statusp = resp->status = stat;
4874 		goto out;
4875 	}
4876 
4877 	bva.va_mask = AT_MODE | AT_UID;
4878 	error = VOP_GETATTR(vp, &bva, 0, cr);
4879 
4880 	/*
4881 	 * If we can't get the attributes, then we can't do the
4882 	 * right access checking.  So, we'll fail the request.
4883 	 */
4884 	if (error) {
4885 		*cs->statusp = resp->status = puterrno4(error);
4886 		goto out;
4887 	}
4888 
4889 	if (rdonly4(cs->exi, cs->vp, req)) {
4890 		*cs->statusp = resp->status = NFS4ERR_ROFS;
4891 		goto out;
4892 	}
4893 
4894 	if (vp->v_type != VREG) {
4895 		*cs->statusp = resp->status =
4896 			((vp->v_type == VDIR) ? NFS4ERR_ISDIR : NFS4ERR_INVAL);
4897 		goto out;
4898 	}
4899 
4900 	if (crgetuid(cr) != bva.va_uid &&
4901 	    (error = VOP_ACCESS(vp, VWRITE, 0, cr))) {
4902 		*cs->statusp = resp->status = puterrno4(error);
4903 		goto out;
4904 	}
4905 
4906 	if (MANDLOCK(vp, bva.va_mode)) {
4907 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
4908 		goto out;
4909 	}
4910 
4911 	if (args->data_len == 0) {
4912 		*cs->statusp = resp->status = NFS4_OK;
4913 		resp->count = 0;
4914 		resp->committed = args->stable;
4915 		resp->writeverf = Write4verf;
4916 		goto out;
4917 	}
4918 
4919 	if (args->mblk != NULL) {
4920 		mblk_t *m;
4921 		uint_t bytes, round_len;
4922 
4923 		iovcnt = 0;
4924 		bytes = 0;
4925 		round_len = roundup(args->data_len, BYTES_PER_XDR_UNIT);
4926 		for (m = args->mblk;
4927 		    m != NULL && bytes < round_len;
4928 		    m = m->b_cont) {
4929 			iovcnt++;
4930 			bytes += MBLKL(m);
4931 		}
4932 #ifdef DEBUG
4933 		/* should have ended on an mblk boundary */
4934 		if (bytes != round_len) {
4935 			printf("bytes=0x%x, round_len=0x%x, req len=0x%x\n",
4936 			    bytes, round_len, args->data_len);
4937 			printf("args=%p, args->mblk=%p, m=%p", (void *)args,
4938 			    (void *)args->mblk, (void *)m);
4939 			ASSERT(bytes == round_len);
4940 		}
4941 #endif
4942 		if (iovcnt <= MAX_IOVECS) {
4943 			iovp = iov;
4944 		} else {
4945 			iovp = kmem_alloc(sizeof (*iovp) * iovcnt, KM_SLEEP);
4946 		}
4947 		mblk_to_iov(args->mblk, iovcnt, iovp);
4948 	} else {
4949 		iovcnt = 1;
4950 		iovp = iov;
4951 		iovp->iov_base = args->data_val;
4952 		iovp->iov_len = args->data_len;
4953 	}
4954 
4955 	uio.uio_iov = iovp;
4956 	uio.uio_iovcnt = iovcnt;
4957 
4958 	uio.uio_segflg = UIO_SYSSPACE;
4959 	uio.uio_extflg = UIO_COPY_DEFAULT;
4960 	uio.uio_loffset = args->offset;
4961 	uio.uio_resid = args->data_len;
4962 	uio.uio_llimit = curproc->p_fsz_ctl;
4963 	rlimit = uio.uio_llimit - args->offset;
4964 	if (rlimit < (u_offset_t)uio.uio_resid)
4965 		uio.uio_resid = (int)rlimit;
4966 
4967 	if (args->stable == UNSTABLE4)
4968 		ioflag = 0;
4969 	else if (args->stable == FILE_SYNC4)
4970 		ioflag = FSYNC;
4971 	else if (args->stable == DATA_SYNC4)
4972 		ioflag = FDSYNC;
4973 	else {
4974 		if (iovp != iov)
4975 			kmem_free(iovp, sizeof (*iovp) * iovcnt);
4976 		*cs->statusp = resp->status = NFS4ERR_INVAL;
4977 		goto out;
4978 	}
4979 
4980 	/*
4981 	 * We're changing creds because VM may fault and we need
4982 	 * the cred of the current thread to be used if quota
4983 	 * checking is enabled.
4984 	 */
4985 	savecred = curthread->t_cred;
4986 	curthread->t_cred = cr;
4987 	error = do_io(FWRITE, vp, &uio, ioflag, cr);
4988 	curthread->t_cred = savecred;
4989 
4990 	if (iovp != iov)
4991 		kmem_free(iovp, sizeof (*iovp) * iovcnt);
4992 
4993 	if (error) {
4994 		*cs->statusp = resp->status = puterrno4(error);
4995 		goto out;
4996 	}
4997 
4998 	*cs->statusp = resp->status = NFS4_OK;
4999 	resp->count = args->data_len - uio.uio_resid;
5000 
5001 	if (ioflag == 0)
5002 		resp->committed = UNSTABLE4;
5003 	else
5004 		resp->committed = FILE_SYNC4;
5005 
5006 	resp->writeverf = Write4verf;
5007 
5008 out:
5009 	if (in_crit)
5010 		nbl_end_crit(vp);
5011 }
5012 
5013 
5014 /* XXX put in a header file */
5015 extern int	sec_svc_getcred(struct svc_req *, cred_t *,  caddr_t *, int *);
5016 
5017 void
5018 rfs4_compound(COMPOUND4args *args, COMPOUND4res *resp, struct exportinfo *exi,
5019 	struct svc_req *req, cred_t *cr)
5020 {
5021 	uint_t i;
5022 	struct compound_state cs;
5023 
5024 	rfs4_init_compound_state(&cs);
5025 	/*
5026 	 * Form a reply tag by copying over the reqeuest tag.
5027 	 */
5028 	resp->tag.utf8string_val =
5029 				kmem_alloc(args->tag.utf8string_len, KM_SLEEP);
5030 	resp->tag.utf8string_len = args->tag.utf8string_len;
5031 	bcopy(args->tag.utf8string_val, resp->tag.utf8string_val,
5032 					resp->tag.utf8string_len);
5033 
5034 	cs.statusp = &resp->status;
5035 
5036 	/*
5037 	 * XXX for now, minorversion should be zero
5038 	 */
5039 	if (args->minorversion != NFS4_MINORVERSION) {
5040 		resp->array_len = 0;
5041 		resp->array = NULL;
5042 		resp->status = NFS4ERR_MINOR_VERS_MISMATCH;
5043 		return;
5044 	}
5045 
5046 	resp->array_len = args->array_len;
5047 	resp->array = kmem_zalloc(args->array_len * sizeof (nfs_resop4),
5048 		KM_SLEEP);
5049 
5050 	ASSERT(exi == NULL);
5051 	ASSERT(cr == NULL);
5052 
5053 	cr = crget();
5054 	ASSERT(cr != NULL);
5055 
5056 	if (sec_svc_getcred(req, cr, &cs.principal, &cs.nfsflavor) == 0) {
5057 		crfree(cr);
5058 		return;
5059 	}
5060 
5061 	cs.basecr = cr;
5062 
5063 	cs.req = req;
5064 
5065 	/*
5066 	 * For now, NFS4 compound processing must be protected by
5067 	 * exported_lock because it can access more than one exportinfo
5068 	 * per compound and share/unshare can now change multiple
5069 	 * exinfo structs.  The NFS2/3 code only refs 1 exportinfo
5070 	 * per proc (excluding public exinfo), and exi_count design
5071 	 * is sufficient to protect concurrent execution of NFS2/3
5072 	 * ops along with unexport.  This lock will be removed as
5073 	 * part of the NFSv4 phase 2 namespace redesign work.
5074 	 */
5075 	rw_enter(&exported_lock, RW_READER);
5076 
5077 	/*
5078 	 * If this is the first compound we've seen, we need to start all
5079 	 * new instances' grace periods.
5080 	 */
5081 	if (rfs4_seen_first_compound == 0) {
5082 		rfs4_grace_start_new();
5083 		/*
5084 		 * This must be set after rfs4_grace_start_new(), otherwise
5085 		 * another thread could proceed past here before the former
5086 		 * is finished.
5087 		 */
5088 		rfs4_seen_first_compound = 1;
5089 	}
5090 
5091 	for (i = 0; i < args->array_len && cs.cont; i++) {
5092 		nfs_argop4 *argop;
5093 		nfs_resop4 *resop;
5094 		uint_t op;
5095 
5096 		argop = &args->array[i];
5097 		resop = &resp->array[i];
5098 		resop->resop = argop->argop;
5099 		op = (uint_t)resop->resop;
5100 
5101 		if (op < rfsv4disp_cnt) {
5102 			/*
5103 			 * Count the individual ops here; NULL and COMPOUND
5104 			 * are counted in common_dispatch()
5105 			 */
5106 			rfsproccnt_v4_ptr[op].value.ui64++;
5107 
5108 			NFS4_DEBUG(rfs4_debug > 1,
5109 				(CE_NOTE, "Executing %s", rfs4_op_string[op]));
5110 			(*rfsv4disptab[op].dis_proc)(argop, resop, req, &cs);
5111 			NFS4_DEBUG(rfs4_debug > 1,
5112 				(CE_NOTE, "%s returned %d",
5113 				rfs4_op_string[op], *cs.statusp));
5114 			if (*cs.statusp != NFS4_OK)
5115 				cs.cont = FALSE;
5116 		} else {
5117 			/*
5118 			 * This is effectively dead code since XDR code
5119 			 * will have already returned BADXDR if op doesn't
5120 			 * decode to legal value.  This only done for a
5121 			 * day when XDR code doesn't verify v4 opcodes.
5122 			 */
5123 			op = OP_ILLEGAL;
5124 			rfsproccnt_v4_ptr[OP_ILLEGAL_IDX].value.ui64++;
5125 
5126 			rfs4_op_illegal(argop, resop, req, &cs);
5127 			cs.cont = FALSE;
5128 		}
5129 
5130 		/*
5131 		 * If not at last op, and if we are to stop, then
5132 		 * compact the results array.
5133 		 */
5134 		if ((i + 1) < args->array_len && !cs.cont) {
5135 			nfs_resop4 *new_res = kmem_alloc(
5136 				(i+1) * sizeof (nfs_resop4), KM_SLEEP);
5137 			bcopy(resp->array,
5138 				new_res, (i+1) * sizeof (nfs_resop4));
5139 			kmem_free(resp->array,
5140 				args->array_len * sizeof (nfs_resop4));
5141 
5142 			resp->array_len =  i + 1;
5143 			resp->array = new_res;
5144 		}
5145 	}
5146 
5147 	rw_exit(&exported_lock);
5148 
5149 	if (cs.vp)
5150 		VN_RELE(cs.vp);
5151 	if (cs.saved_vp)
5152 		VN_RELE(cs.saved_vp);
5153 	if (cs.saved_fh.nfs_fh4_val)
5154 		kmem_free(cs.saved_fh.nfs_fh4_val, NFS4_FHSIZE);
5155 
5156 	if (cs.basecr)
5157 		crfree(cs.basecr);
5158 	if (cs.cr)
5159 		crfree(cs.cr);
5160 }
5161 
5162 /*
5163  * XXX because of what appears to be duplicate calls to rfs4_compound_free
5164  * XXX zero out the tag and array values. Need to investigate why the
5165  * XXX calls occur, but at least prevent the panic for now.
5166  */
5167 void
5168 rfs4_compound_free(COMPOUND4res *resp)
5169 {
5170 	uint_t i;
5171 
5172 	if (resp->tag.utf8string_val) {
5173 		UTF8STRING_FREE(resp->tag)
5174 	}
5175 
5176 	for (i = 0; i < resp->array_len; i++) {
5177 		nfs_resop4 *resop;
5178 		uint_t op;
5179 
5180 		resop = &resp->array[i];
5181 		op = (uint_t)resop->resop;
5182 		if (op < rfsv4disp_cnt) {
5183 			(*rfsv4disptab[op].dis_resfree)(resop);
5184 		}
5185 	}
5186 	if (resp->array != NULL) {
5187 		kmem_free(resp->array, resp->array_len * sizeof (nfs_resop4));
5188 	}
5189 }
5190 
5191 /*
5192  * Process the value of the compound request rpc flags, as a bit-AND
5193  * of the individual per-op flags (idempotent, allowork, publicfh_ok)
5194  */
5195 void
5196 rfs4_compound_flagproc(COMPOUND4args *args, int *flagp)
5197 {
5198 	int i;
5199 	int flag = RPC_ALL;
5200 
5201 	for (i = 0; flag && i < args->array_len; i++) {
5202 		uint_t op;
5203 
5204 		op = (uint_t)args->array[i].argop;
5205 
5206 		if (op < rfsv4disp_cnt)
5207 			flag &= rfsv4disptab[op].dis_flags;
5208 		else
5209 			flag = 0;
5210 	}
5211 	*flagp = flag;
5212 }
5213 
5214 nfsstat4
5215 rfs4_client_sysid(rfs4_client_t *cp, sysid_t *sp)
5216 {
5217 	nfsstat4 e;
5218 
5219 	rfs4_dbe_lock(cp->dbe);
5220 
5221 	if (cp->sysidt != LM_NOSYSID) {
5222 		*sp = cp->sysidt;
5223 		e = NFS4_OK;
5224 
5225 	} else if ((cp->sysidt = lm_alloc_sysidt()) != LM_NOSYSID) {
5226 		*sp = cp->sysidt;
5227 		e = NFS4_OK;
5228 
5229 		NFS4_DEBUG(rfs4_debug, (CE_NOTE,
5230 			"rfs4_client_sysid: allocated 0x%x\n", *sp));
5231 	} else
5232 		e = NFS4ERR_DELAY;
5233 
5234 	rfs4_dbe_unlock(cp->dbe);
5235 	return (e);
5236 }
5237 
5238 #if defined(DEBUG) && ! defined(lint)
5239 static void lock_print(char *str, int operation, struct flock64 *flk)
5240 {
5241 	char *op, *type;
5242 
5243 	switch (operation) {
5244 	case F_GETLK: op = "F_GETLK";
5245 		break;
5246 	case F_SETLK: op = "F_SETLK";
5247 		break;
5248 	default: op = "F_UNKNOWN";
5249 		break;
5250 	}
5251 	switch (flk->l_type) {
5252 	case F_UNLCK: type = "F_UNLCK";
5253 		break;
5254 	case F_RDLCK: type = "F_RDLCK";
5255 		break;
5256 	case F_WRLCK: type = "F_WRLCK";
5257 		break;
5258 	default: type = "F_UNKNOWN";
5259 		break;
5260 	}
5261 
5262 	ASSERT(flk->l_whence == 0);
5263 	cmn_err(CE_NOTE, "%s:  %s, type = %s, off = %llx len = %llx pid = %d",
5264 		str, op, type,
5265 		(longlong_t)flk->l_start,
5266 		flk->l_len ? (longlong_t)flk->l_len : ~0LL,
5267 		flk->l_pid);
5268 }
5269 
5270 #define	LOCK_PRINT(d, s, t, f) if (d) lock_print(s, t, f)
5271 #else
5272 #define	LOCK_PRINT(d, s, t, f)
5273 #endif
5274 
5275 /*ARGSUSED*/
5276 static bool_t
5277 creds_ok(cred_set_t cr_set, struct svc_req *req, struct compound_state *cs)
5278 {
5279 	return (TRUE);
5280 }
5281 
5282 /*
5283  * Look up the pathname using the vp in cs as the directory vnode.
5284  * cs->vp will be the vnode for the file on success
5285  */
5286 
5287 static nfsstat4
5288 rfs4_lookup(component4 *component, struct svc_req *req,
5289 	    struct compound_state *cs)
5290 {
5291 	char *nm;
5292 	uint32_t len;
5293 	nfsstat4 status;
5294 
5295 	if (cs->vp == NULL) {
5296 		return (NFS4ERR_NOFILEHANDLE);
5297 	}
5298 	if (cs->vp->v_type != VDIR) {
5299 		return (NFS4ERR_NOTDIR);
5300 	}
5301 
5302 	if (!utf8_dir_verify(component))
5303 		return (NFS4ERR_INVAL);
5304 
5305 	nm = utf8_to_fn(component, &len, NULL);
5306 	if (nm == NULL) {
5307 		return (NFS4ERR_INVAL);
5308 	}
5309 
5310 	if (len > MAXNAMELEN) {
5311 		kmem_free(nm, len);
5312 		return (NFS4ERR_NAMETOOLONG);
5313 	}
5314 
5315 	status = do_rfs4_op_lookup(nm, len, req, cs);
5316 
5317 	kmem_free(nm, len);
5318 
5319 	return (status);
5320 }
5321 
5322 static nfsstat4
5323 rfs4_lookupfile(component4 *component, struct svc_req *req,
5324 		struct compound_state *cs, uint32_t access,
5325 		change_info4 *cinfo)
5326 {
5327 	nfsstat4 status;
5328 	vnode_t *dvp = cs->vp;
5329 	vattr_t bva, ava, fva;
5330 	int error;
5331 
5332 	/* Get "before" change value */
5333 	bva.va_mask = AT_CTIME|AT_SEQ;
5334 	error = VOP_GETATTR(dvp, &bva, 0, cs->cr);
5335 	if (error)
5336 		return (puterrno4(error));
5337 
5338 	/* rfs4_lookup may VN_RELE directory */
5339 	VN_HOLD(dvp);
5340 
5341 	status = rfs4_lookup(component, req, cs);
5342 	if (status != NFS4_OK) {
5343 		VN_RELE(dvp);
5344 		return (status);
5345 	}
5346 
5347 	/*
5348 	 * Get "after" change value, if it fails, simply return the
5349 	 * before value.
5350 	 */
5351 	ava.va_mask = AT_CTIME|AT_SEQ;
5352 	if (VOP_GETATTR(dvp, &ava, 0, cs->cr)) {
5353 		ava.va_ctime = bva.va_ctime;
5354 		ava.va_seq = 0;
5355 	}
5356 	VN_RELE(dvp);
5357 
5358 	/*
5359 	 * Validate the file is a file
5360 	 */
5361 	fva.va_mask = AT_TYPE|AT_MODE;
5362 	error = VOP_GETATTR(cs->vp, &fva, 0, cs->cr);
5363 	if (error)
5364 		return (puterrno4(error));
5365 
5366 	if (fva.va_type != VREG) {
5367 		if (fva.va_type == VDIR)
5368 			return (NFS4ERR_ISDIR);
5369 		if (fva.va_type == VLNK)
5370 			return (NFS4ERR_SYMLINK);
5371 		return (NFS4ERR_INVAL);
5372 	}
5373 
5374 	NFS4_SET_FATTR4_CHANGE(cinfo->before, bva.va_ctime);
5375 	NFS4_SET_FATTR4_CHANGE(cinfo->after, ava.va_ctime);
5376 
5377 	/*
5378 	 * It is undefined if VOP_LOOKUP will change va_seq, so
5379 	 * cinfo.atomic = TRUE only if we have
5380 	 * non-zero va_seq's, and they have not changed.
5381 	 */
5382 	if (bva.va_seq && ava.va_seq && ava.va_seq == bva.va_seq)
5383 		cinfo->atomic = TRUE;
5384 	else
5385 		cinfo->atomic = FALSE;
5386 
5387 	/* Check for mandatory locking */
5388 	cs->mandlock = MANDLOCK(cs->vp, fva.va_mode);
5389 	return (check_open_access(access, cs, req));
5390 }
5391 
5392 static nfsstat4
5393 create_vnode(vnode_t *dvp, char *nm,  vattr_t *vap, createmode4 mode,
5394 	    timespec32_t *mtime, cred_t *cr, vnode_t **vpp, bool_t *created)
5395 {
5396 	int error;
5397 	nfsstat4 status = NFS4_OK;
5398 	vattr_t va;
5399 
5400 tryagain:
5401 
5402 	/*
5403 	 * The file open mode used is VWRITE.  If the client needs
5404 	 * some other semantic, then it should do the access checking
5405 	 * itself.  It would have been nice to have the file open mode
5406 	 * passed as part of the arguments.
5407 	 */
5408 
5409 	*created = TRUE;
5410 	error = VOP_CREATE(dvp, nm, vap, EXCL, VWRITE, vpp, cr, 0);
5411 
5412 	if (error) {
5413 		*created = FALSE;
5414 
5415 		/*
5416 		 * If we got something other than file already exists
5417 		 * then just return this error.  Otherwise, we got
5418 		 * EEXIST.  If we were doing a GUARDED create, then
5419 		 * just return this error.  Otherwise, we need to
5420 		 * make sure that this wasn't a duplicate of an
5421 		 * exclusive create request.
5422 		 *
5423 		 * The assumption is made that a non-exclusive create
5424 		 * request will never return EEXIST.
5425 		 */
5426 
5427 		if (error != EEXIST || mode == GUARDED4) {
5428 			status = puterrno4(error);
5429 			return (status);
5430 		}
5431 		error = VOP_LOOKUP(dvp, nm, vpp, NULL, 0, NULL, cr);
5432 
5433 		if (error) {
5434 			/*
5435 			 * We couldn't find the file that we thought that
5436 			 * we just created.  So, we'll just try creating
5437 			 * it again.
5438 			 */
5439 			if (error == ENOENT)
5440 				goto tryagain;
5441 
5442 			status = puterrno4(error);
5443 			return (status);
5444 		}
5445 
5446 		if (mode == UNCHECKED4) {
5447 			/* existing object must be regular file */
5448 			if ((*vpp)->v_type != VREG) {
5449 				if ((*vpp)->v_type == VDIR)
5450 					status = NFS4ERR_ISDIR;
5451 				else if ((*vpp)->v_type == VLNK)
5452 					status = NFS4ERR_SYMLINK;
5453 				else
5454 					status = NFS4ERR_INVAL;
5455 				VN_RELE(*vpp);
5456 				return (status);
5457 			}
5458 
5459 			return (NFS4_OK);
5460 		}
5461 
5462 		/* Check for duplicate request */
5463 		ASSERT(mtime != 0);
5464 		va.va_mask = AT_MTIME;
5465 		error = VOP_GETATTR(*vpp, &va, 0, cr);
5466 		if (!error) {
5467 			/* We found the file */
5468 			if (va.va_mtime.tv_sec != mtime->tv_sec ||
5469 			    va.va_mtime.tv_nsec != mtime->tv_nsec) {
5470 				/* but its not our creation */
5471 				VN_RELE(*vpp);
5472 				return (NFS4ERR_EXIST);
5473 			}
5474 			*created = TRUE; /* retrans of create == created */
5475 			return (NFS4_OK);
5476 		}
5477 		VN_RELE(*vpp);
5478 		return (NFS4ERR_EXIST);
5479 	}
5480 
5481 	return (NFS4_OK);
5482 }
5483 
5484 static nfsstat4
5485 check_open_access(uint32_t access,
5486 		struct compound_state *cs, struct svc_req *req)
5487 {
5488 	int error;
5489 	vnode_t *vp;
5490 	bool_t readonly;
5491 	cred_t *cr = cs->cr;
5492 
5493 	/* For now we don't allow mandatory locking as per V2/V3 */
5494 	if (cs->access == CS_ACCESS_DENIED || cs->mandlock) {
5495 		return (NFS4ERR_ACCESS);
5496 	}
5497 
5498 	vp = cs->vp;
5499 	ASSERT(cr != NULL && vp->v_type == VREG);
5500 
5501 	/*
5502 	 * If the file system is exported read only and we are trying
5503 	 * to open for write, then return NFS4ERR_ROFS
5504 	 */
5505 
5506 	readonly = rdonly4(cs->exi, cs->vp, req);
5507 
5508 	if ((access & OPEN4_SHARE_ACCESS_WRITE) && readonly)
5509 		return (NFS4ERR_ROFS);
5510 
5511 	if (access & OPEN4_SHARE_ACCESS_READ) {
5512 		if ((VOP_ACCESS(vp, VREAD, 0, cr) != 0) &&
5513 		    (VOP_ACCESS(vp, VEXEC, 0, cr) != 0)) {
5514 			return (NFS4ERR_ACCESS);
5515 		}
5516 	}
5517 
5518 	if (access & OPEN4_SHARE_ACCESS_WRITE) {
5519 		error = VOP_ACCESS(vp, VWRITE, 0, cr);
5520 		if (error)
5521 			return (NFS4ERR_ACCESS);
5522 	}
5523 
5524 	return (NFS4_OK);
5525 }
5526 
5527 static nfsstat4
5528 rfs4_createfile(OPEN4args *args, struct svc_req *req, struct compound_state *cs,
5529 		change_info4 *cinfo, bitmap4 *attrset, clientid4 clientid)
5530 {
5531 	struct nfs4_svgetit_arg sarg;
5532 	struct nfs4_ntov_table ntov;
5533 
5534 	bool_t ntov_table_init = FALSE;
5535 	struct statvfs64 sb;
5536 	nfsstat4 status;
5537 	vnode_t *vp;
5538 	vattr_t bva, ava, iva, cva, *vap;
5539 	vnode_t *dvp;
5540 	timespec32_t *mtime;
5541 	char *nm = NULL;
5542 	uint_t buflen;
5543 	bool_t created;
5544 	bool_t setsize = FALSE;
5545 	len_t reqsize;
5546 	int error;
5547 	bool_t trunc;
5548 	caller_context_t ct;
5549 	component4 *component;
5550 
5551 	sarg.sbp = &sb;
5552 
5553 	dvp = cs->vp;
5554 
5555 	/* Check if the file system is read only */
5556 	if (rdonly4(cs->exi, dvp, req))
5557 		return (NFS4ERR_ROFS);
5558 
5559 	/*
5560 	 * Get the last component of path name in nm. cs will reference
5561 	 * the including directory on success.
5562 	 */
5563 	component = &args->open_claim4_u.file;
5564 	if (!utf8_dir_verify(component))
5565 		return (NFS4ERR_INVAL);
5566 
5567 	nm = utf8_to_fn(component, &buflen, NULL);
5568 
5569 	if (nm == NULL)
5570 		return (NFS4ERR_RESOURCE);
5571 
5572 	if (buflen > MAXNAMELEN) {
5573 		kmem_free(nm, buflen);
5574 		return (NFS4ERR_NAMETOOLONG);
5575 	}
5576 
5577 	bva.va_mask = AT_TYPE|AT_CTIME|AT_SEQ;
5578 	error = VOP_GETATTR(dvp, &bva, 0, cs->cr);
5579 	if (error) {
5580 		kmem_free(nm, buflen);
5581 		return (puterrno4(error));
5582 	}
5583 
5584 	if (bva.va_type != VDIR) {
5585 		kmem_free(nm, buflen);
5586 		return (NFS4ERR_NOTDIR);
5587 	}
5588 
5589 	NFS4_SET_FATTR4_CHANGE(cinfo->before, bva.va_ctime)
5590 
5591 	switch (args->mode) {
5592 	case GUARDED4:
5593 		/*FALLTHROUGH*/
5594 	case UNCHECKED4:
5595 		nfs4_ntov_table_init(&ntov);
5596 		ntov_table_init = TRUE;
5597 
5598 		*attrset = 0;
5599 		status = do_rfs4_set_attrs(attrset,
5600 					&args->createhow4_u.createattrs,
5601 					cs, &sarg, &ntov, NFS4ATTR_SETIT);
5602 
5603 		if (status == NFS4_OK && (sarg.vap->va_mask & AT_TYPE) &&
5604 		    sarg.vap->va_type != VREG) {
5605 			if (sarg.vap->va_type == VDIR)
5606 				status = NFS4ERR_ISDIR;
5607 			else if (sarg.vap->va_type == VLNK)
5608 				status = NFS4ERR_SYMLINK;
5609 			else
5610 				status = NFS4ERR_INVAL;
5611 		}
5612 
5613 		if (status != NFS4_OK) {
5614 			kmem_free(nm, buflen);
5615 			nfs4_ntov_table_free(&ntov, &sarg);
5616 			*attrset = 0;
5617 			return (status);
5618 		}
5619 
5620 		vap = sarg.vap;
5621 		vap->va_type = VREG;
5622 		vap->va_mask |= AT_TYPE;
5623 
5624 		if ((vap->va_mask & AT_MODE) == 0) {
5625 			vap->va_mask |= AT_MODE;
5626 			vap->va_mode = (mode_t)0600;
5627 		}
5628 
5629 		if (vap->va_mask & AT_SIZE) {
5630 
5631 			/* Disallow create with a non-zero size */
5632 
5633 			if ((reqsize = sarg.vap->va_size) != 0) {
5634 				kmem_free(nm, buflen);
5635 				nfs4_ntov_table_free(&ntov, &sarg);
5636 				*attrset = 0;
5637 				return (NFS4ERR_INVAL);
5638 			}
5639 			setsize = TRUE;
5640 		}
5641 		break;
5642 
5643 	case EXCLUSIVE4:
5644 		/* prohibit EXCL create of named attributes */
5645 		if (dvp->v_flag & V_XATTRDIR) {
5646 			kmem_free(nm, buflen);
5647 			*attrset = 0;
5648 			return (NFS4ERR_INVAL);
5649 		}
5650 
5651 		cva.va_mask = AT_TYPE | AT_MTIME | AT_MODE;
5652 		cva.va_type = VREG;
5653 		/*
5654 		 * Ensure no time overflows. Assumes underlying
5655 		 * filesystem supports at least 32 bits.
5656 		 * Truncate nsec to usec resolution to allow valid
5657 		 * compares even if the underlying filesystem truncates.
5658 		 */
5659 		mtime = (timespec32_t *)&args->createhow4_u.createverf;
5660 		cva.va_mtime.tv_sec = mtime->tv_sec % TIME32_MAX;
5661 		cva.va_mtime.tv_nsec = (mtime->tv_nsec / 1000) * 1000;
5662 		cva.va_mode = (mode_t)0;
5663 		vap = &cva;
5664 		break;
5665 	}
5666 
5667 	status = create_vnode(dvp, nm, vap, args->mode, mtime,
5668 						cs->cr, &vp, &created);
5669 	kmem_free(nm, buflen);
5670 
5671 	if (status != NFS4_OK) {
5672 		if (ntov_table_init)
5673 			nfs4_ntov_table_free(&ntov, &sarg);
5674 		*attrset = 0;
5675 		return (status);
5676 	}
5677 
5678 	trunc = (setsize && !created);
5679 
5680 	if (args->mode != EXCLUSIVE4) {
5681 		bitmap4 createmask = args->createhow4_u.createattrs.attrmask;
5682 
5683 		/*
5684 		 * True verification that object was created with correct
5685 		 * attrs is impossible.  The attrs could have been changed
5686 		 * immediately after object creation.  If attributes did
5687 		 * not verify, the only recourse for the server is to
5688 		 * destroy the object.  Maybe if some attrs (like gid)
5689 		 * are set incorrectly, the object should be destroyed;
5690 		 * however, seems bad as a default policy.  Do we really
5691 		 * want to destroy an object over one of the times not
5692 		 * verifying correctly?  For these reasons, the server
5693 		 * currently sets bits in attrset for createattrs
5694 		 * that were set; however, no verification is done.
5695 		 *
5696 		 * vmask_to_nmask accounts for vattr bits set on create
5697 		 *	[do_rfs4_set_attrs() only sets resp bits for
5698 		 *	 non-vattr/vfs bits.]
5699 		 * Mask off any bits we set by default so as not to return
5700 		 * more attrset bits than were requested in createattrs
5701 		 */
5702 		if (created) {
5703 			nfs4_vmask_to_nmask(sarg.vap->va_mask, attrset);
5704 			*attrset &= createmask;
5705 		} else {
5706 			/*
5707 			 * We did not create the vnode (we tried but it
5708 			 * already existed).  In this case, the only createattr
5709 			 * that the spec allows the server to set is size,
5710 			 * and even then, it can only be set if it is 0.
5711 			 */
5712 			*attrset = 0;
5713 			if (trunc)
5714 				*attrset = FATTR4_SIZE_MASK;
5715 		}
5716 	}
5717 	if (ntov_table_init)
5718 		nfs4_ntov_table_free(&ntov, &sarg);
5719 
5720 	/*
5721 	 * Get the initial "after" sequence number, if it fails,
5722 	 * set to zero, time to before.
5723 	 */
5724 	iva.va_mask = AT_CTIME|AT_SEQ;
5725 	if (VOP_GETATTR(dvp, &iva, 0, cs->cr)) {
5726 		iva.va_seq = 0;
5727 		iva.va_ctime = bva.va_ctime;
5728 	}
5729 
5730 	/*
5731 	 * create_vnode attempts to create the file exclusive,
5732 	 * if it already exists the VOP_CREATE will fail and
5733 	 * may not increase va_seq. It is atomic if
5734 	 * we haven't changed the directory, but if it has changed
5735 	 * we don't know what changed it.
5736 	 */
5737 	if (!created) {
5738 		if (bva.va_seq && iva.va_seq &&
5739 			bva.va_seq == iva.va_seq)
5740 			cinfo->atomic = TRUE;
5741 		else
5742 			cinfo->atomic = FALSE;
5743 		NFS4_SET_FATTR4_CHANGE(cinfo->after, iva.va_ctime);
5744 	} else {
5745 		/*
5746 		 * The entry was created, we need to sync the
5747 		 * directory metadata.
5748 		 */
5749 		(void) VOP_FSYNC(dvp, 0, cs->cr);
5750 
5751 		/*
5752 		 * Get "after" change value, if it fails, simply return the
5753 		 * before value.
5754 		 */
5755 		ava.va_mask = AT_CTIME|AT_SEQ;
5756 		if (VOP_GETATTR(dvp, &ava, 0, cs->cr)) {
5757 			ava.va_ctime = bva.va_ctime;
5758 			ava.va_seq = 0;
5759 		}
5760 
5761 		NFS4_SET_FATTR4_CHANGE(cinfo->after, ava.va_ctime);
5762 
5763 		/*
5764 		 * The cinfo->atomic = TRUE only if we have
5765 		 * non-zero va_seq's, and it has incremented by exactly one
5766 		 * during the create_vnode and it didn't
5767 		 * change during the VOP_FSYNC.
5768 		 */
5769 		if (bva.va_seq && iva.va_seq && ava.va_seq &&
5770 				iva.va_seq == (bva.va_seq + 1) &&
5771 				iva.va_seq == ava.va_seq)
5772 			cinfo->atomic = TRUE;
5773 		else
5774 			cinfo->atomic = FALSE;
5775 	}
5776 
5777 	/* Check for mandatory locking and that the size gets set. */
5778 	cva.va_mask = AT_MODE;
5779 	if (setsize)
5780 		cva.va_mask |= AT_SIZE;
5781 
5782 	/* Assume the worst */
5783 	cs->mandlock = TRUE;
5784 
5785 	if (VOP_GETATTR(vp, &cva, 0, cs->cr) == 0) {
5786 		cs->mandlock = MANDLOCK(cs->vp, cva.va_mode);
5787 
5788 		/*
5789 		 * Truncate the file if necessary; this would be
5790 		 * the case for create over an existing file.
5791 		 */
5792 
5793 		if (trunc) {
5794 			int in_crit = 0;
5795 			rfs4_file_t *fp;
5796 			bool_t create = FALSE;
5797 
5798 			/*
5799 			 * We are writing over an existing file.
5800 			 * Check to see if we need to recall a delegation.
5801 			 */
5802 			rfs4_hold_deleg_policy();
5803 			if ((fp = rfs4_findfile(vp, NULL, &create)) != NULL) {
5804 				if (rfs4_check_delegated_byfp(FWRITE, fp,
5805 					(reqsize == 0), FALSE, FALSE,
5806 							&clientid)) {
5807 
5808 					rfs4_file_rele(fp);
5809 					rfs4_rele_deleg_policy();
5810 					VN_RELE(vp);
5811 					*attrset = 0;
5812 					return (NFS4ERR_DELAY);
5813 				}
5814 				rfs4_file_rele(fp);
5815 			}
5816 			rfs4_rele_deleg_policy();
5817 
5818 			if (nbl_need_check(vp)) {
5819 				in_crit = 1;
5820 
5821 				ASSERT(reqsize == 0);
5822 
5823 				nbl_start_crit(vp, RW_READER);
5824 				if (nbl_conflict(vp, NBL_WRITE, 0,
5825 						cva.va_size, 0)) {
5826 					in_crit = 0;
5827 					nbl_end_crit(vp);
5828 					VN_RELE(vp);
5829 					*attrset = 0;
5830 					return (NFS4ERR_ACCESS);
5831 				}
5832 			}
5833 			ct.cc_sysid = 0;
5834 			ct.cc_pid = 0;
5835 			ct.cc_caller_id = nfs4_srv_caller_id;
5836 
5837 			cva.va_mask = AT_SIZE;
5838 			cva.va_size = reqsize;
5839 			(void) VOP_SETATTR(vp, &cva, 0, cs->cr, &ct);
5840 			if (in_crit)
5841 				nbl_end_crit(vp);
5842 		}
5843 	}
5844 
5845 	error = makefh4(&cs->fh, vp, cs->exi);
5846 
5847 	/*
5848 	 * Force modified data and metadata out to stable storage.
5849 	 */
5850 	(void) VOP_FSYNC(vp, FNODSYNC, cs->cr);
5851 
5852 	if (error) {
5853 		VN_RELE(vp);
5854 		*attrset = 0;
5855 		return (puterrno4(error));
5856 	}
5857 
5858 	/* if parent dir is attrdir, set namedattr fh flag */
5859 	if (dvp->v_flag & V_XATTRDIR)
5860 		set_fh4_flag(&cs->fh, FH4_NAMEDATTR);
5861 
5862 	if (cs->vp)
5863 		VN_RELE(cs->vp);
5864 
5865 	cs->vp = vp;
5866 
5867 	/*
5868 	 * if we did not create the file, we will need to check
5869 	 * the access bits on the file
5870 	 */
5871 
5872 	if (!created) {
5873 		if (setsize)
5874 			args->share_access |= OPEN4_SHARE_ACCESS_WRITE;
5875 		status = check_open_access(args->share_access, cs, req);
5876 		if (status != NFS4_OK)
5877 			*attrset = 0;
5878 	}
5879 	return (status);
5880 }
5881 
5882 /*ARGSUSED*/
5883 static void
5884 rfs4_do_open(struct compound_state *cs, struct svc_req *req,
5885 		rfs4_openowner_t *oo, delegreq_t deleg,
5886 		uint32_t access, uint32_t deny,
5887 		OPEN4res *resp)
5888 {
5889 	/* XXX Currently not using req  */
5890 	rfs4_state_t *state;
5891 	rfs4_file_t *file;
5892 	bool_t screate = TRUE;
5893 	bool_t fcreate = TRUE;
5894 	uint32_t amodes;
5895 	uint32_t dmodes;
5896 	rfs4_deleg_state_t *dsp;
5897 	struct shrlock shr;
5898 	struct shr_locowner shr_loco;
5899 	sysid_t sysid;
5900 	nfsstat4 status;
5901 	int fflags = 0;
5902 	int recall = 0;
5903 	int err;
5904 
5905 	/* get the file struct and hold a lock on it during initial open */
5906 	file = rfs4_findfile_withlock(cs->vp, &cs->fh, &fcreate);
5907 	if (file == NULL) {
5908 		NFS4_DEBUG(rfs4_debug,
5909 			(CE_NOTE, "rfs4_do_open: can't find file"));
5910 		resp->status = NFS4ERR_SERVERFAULT;
5911 		return;
5912 	}
5913 
5914 	state = rfs4_findstate_by_owner_file(oo, file, &screate);
5915 	if (state == NULL) {
5916 		NFS4_DEBUG(rfs4_debug,
5917 			(CE_NOTE, "rfs4_do_open: can't find state"));
5918 		resp->status = NFS4ERR_RESOURCE;
5919 		/* No need to keep any reference */
5920 		rfs4_file_rele_withunlock(file);
5921 		return;
5922 	}
5923 
5924 	/*
5925 	 * Check for conflicts in deny and access before checking for
5926 	 * conflicts in delegation.  We don't want to recall a
5927 	 * delegation based on an open that will eventually fail based
5928 	 * on shares modes.
5929 	 */
5930 
5931 	shr.s_access = (short)access;
5932 	shr.s_deny = (short)deny;
5933 	shr.s_pid = rfs4_dbe_getid(oo->dbe);
5934 
5935 	if ((status = rfs4_client_sysid(oo->client, &sysid)) != NFS4_OK) {
5936 		resp->status = status;
5937 		rfs4_file_rele(file);
5938 		/* Not a fully formed open; "close" it */
5939 		if (screate == TRUE)
5940 			rfs4_state_close(state, FALSE, FALSE, cs->cr);
5941 		rfs4_state_rele(state);
5942 		return;
5943 	}
5944 	shr.s_sysid = sysid;
5945 	shr_loco.sl_pid = shr.s_pid;
5946 	shr_loco.sl_id = shr.s_sysid;
5947 	shr.s_owner = (caddr_t)&shr_loco;
5948 	shr.s_own_len = sizeof (shr_loco);
5949 
5950 	fflags = 0;
5951 	if (access & OPEN4_SHARE_ACCESS_READ)
5952 		fflags |= FREAD;
5953 	if (access & OPEN4_SHARE_ACCESS_WRITE)
5954 		fflags |= FWRITE;
5955 
5956 	if ((err = vop_shrlock(cs->vp, F_SHARE, &shr, fflags)) != 0) {
5957 
5958 		resp->status = err == EAGAIN ?
5959 			NFS4ERR_SHARE_DENIED : puterrno4(err);
5960 
5961 		rfs4_file_rele(file);
5962 		/* Not a fully formed open; "close" it */
5963 		if (screate == TRUE)
5964 			rfs4_state_close(state, FALSE, FALSE, cs->cr);
5965 		rfs4_state_rele(state);
5966 		return;
5967 	}
5968 
5969 	rfs4_dbe_lock(state->dbe);
5970 	rfs4_dbe_lock(file->dbe);
5971 
5972 	/*
5973 	 * Calculate the new deny and access mode that this open is adding to
5974 	 * the file for this open owner;
5975 	 */
5976 	dmodes = (deny & ~state->share_deny);
5977 	amodes = (access & ~state->share_access);
5978 
5979 	/*
5980 	 * Check to see if this file is delegated and if so, if a
5981 	 * recall needs to be done.
5982 	 */
5983 	if (rfs4_check_recall(state, access)) {
5984 		rfs4_dbe_unlock(file->dbe);
5985 		rfs4_dbe_unlock(state->dbe);
5986 		rfs4_recall_deleg(file, FALSE, state->owner->client);
5987 		delay(NFS4_DELEGATION_CONFLICT_DELAY);
5988 		rfs4_dbe_lock(state->dbe);
5989 		rfs4_dbe_lock(file->dbe);
5990 		/* Let's see if the delegation was returned */
5991 		if (rfs4_check_recall(state, access)) {
5992 			rfs4_dbe_unlock(file->dbe);
5993 			rfs4_dbe_unlock(state->dbe);
5994 			rfs4_file_rele(file);
5995 			rfs4_update_lease(state->owner->client);
5996 			/* recalculate flags to match what was added */
5997 			fflags = 0;
5998 			if (amodes & OPEN4_SHARE_ACCESS_READ)
5999 				fflags |= FREAD;
6000 			if (amodes & OPEN4_SHARE_ACCESS_WRITE)
6001 				fflags |= FWRITE;
6002 			(void) vop_shrlock(cs->vp, F_UNSHARE, &shr, fflags);
6003 			/* Not a fully formed open; "close" it */
6004 			if (screate == TRUE)
6005 				rfs4_state_close(state, FALSE, FALSE, cs->cr);
6006 			rfs4_state_rele(state);
6007 			resp->status = NFS4ERR_DELAY;
6008 			return;
6009 		}
6010 	}
6011 
6012 	if (dmodes & OPEN4_SHARE_DENY_READ)
6013 		file->deny_read++;
6014 	if (dmodes & OPEN4_SHARE_DENY_WRITE)
6015 		file->deny_write++;
6016 	file->share_deny |= deny;
6017 	state->share_deny |= deny;
6018 
6019 	if (amodes & OPEN4_SHARE_ACCESS_READ)
6020 		file->access_read++;
6021 	if (amodes & OPEN4_SHARE_ACCESS_WRITE)
6022 		file->access_write++;
6023 	file->share_access |= access;
6024 	state->share_access |= access;
6025 
6026 	/*
6027 	 * Check for delegation here. if the deleg argument is not
6028 	 * DELEG_ANY, then this is a reclaim from a client and
6029 	 * we must honor the delegation requested. If necessary we can
6030 	 * set the recall flag.
6031 	 */
6032 
6033 	dsp = rfs4_grant_delegation(deleg, state, &recall);
6034 
6035 	cs->deleg = (file->dinfo->dtype == OPEN_DELEGATE_WRITE);
6036 
6037 	next_stateid(&state->stateid);
6038 
6039 	resp->stateid = state->stateid.stateid;
6040 
6041 	rfs4_dbe_unlock(file->dbe);
6042 	rfs4_dbe_unlock(state->dbe);
6043 
6044 	if (dsp) {
6045 		rfs4_set_deleg_response(dsp, &resp->delegation, NULL, recall);
6046 		rfs4_deleg_state_rele(dsp);
6047 	}
6048 
6049 	rfs4_file_rele(file);
6050 	rfs4_state_rele(state);
6051 
6052 	resp->status = NFS4_OK;
6053 }
6054 
6055 /*ARGSUSED*/
6056 static void
6057 rfs4_do_opennull(struct compound_state *cs, struct svc_req *req,
6058 		OPEN4args *args, rfs4_openowner_t *oo, OPEN4res *resp)
6059 {
6060 	change_info4 *cinfo = &resp->cinfo;
6061 	bitmap4 *attrset = &resp->attrset;
6062 
6063 	if (args->opentype == OPEN4_NOCREATE)
6064 		resp->status = rfs4_lookupfile(&args->open_claim4_u.file,
6065 					req, cs, args->share_access, cinfo);
6066 	else {
6067 		/* inhibit delegation grants during exclusive create */
6068 
6069 		if (args->mode == EXCLUSIVE4)
6070 			rfs4_disable_delegation();
6071 
6072 		resp->status = rfs4_createfile(args, req, cs, cinfo, attrset,
6073 					oo->client->clientid);
6074 	}
6075 
6076 	if (resp->status == NFS4_OK) {
6077 
6078 		/* cs->vp cs->fh now reference the desired file */
6079 
6080 		rfs4_do_open(cs, req, oo, DELEG_ANY, args->share_access,
6081 						args->share_deny, resp);
6082 
6083 		/*
6084 		 * If rfs4_createfile set attrset, we must
6085 		 * clear this attrset before the response is copied.
6086 		 */
6087 		if (resp->status != NFS4_OK && resp->attrset) {
6088 			resp->attrset = 0;
6089 		}
6090 	}
6091 	else
6092 		*cs->statusp = resp->status;
6093 
6094 	if (args->mode == EXCLUSIVE4)
6095 		rfs4_enable_delegation();
6096 }
6097 
6098 /*ARGSUSED*/
6099 static void
6100 rfs4_do_openprev(struct compound_state *cs, struct svc_req *req,
6101 		OPEN4args *args, rfs4_openowner_t *oo, OPEN4res *resp)
6102 {
6103 	change_info4 *cinfo = &resp->cinfo;
6104 	vattr_t va;
6105 	vtype_t v_type = cs->vp->v_type;
6106 	int error = 0;
6107 
6108 	/* Verify that we have a regular file */
6109 	if (v_type != VREG) {
6110 		if (v_type == VDIR)
6111 			resp->status = NFS4ERR_ISDIR;
6112 		else if (v_type == VLNK)
6113 			resp->status = NFS4ERR_SYMLINK;
6114 		else
6115 			resp->status = NFS4ERR_INVAL;
6116 		return;
6117 	}
6118 
6119 	va.va_mask = AT_MODE|AT_UID;
6120 	error = VOP_GETATTR(cs->vp, &va, 0, cs->cr);
6121 	if (error) {
6122 		resp->status = puterrno4(error);
6123 		return;
6124 	}
6125 
6126 	cs->mandlock = MANDLOCK(cs->vp, va.va_mode);
6127 
6128 	/*
6129 	 * Check if we have access to the file, Note the the file
6130 	 * could have originally been open UNCHECKED or GUARDED
6131 	 * with mode bits that will now fail, but there is nothing
6132 	 * we can really do about that except in the case that the
6133 	 * owner of the file is the one requesting the open.
6134 	 */
6135 	if (crgetuid(cs->cr) != va.va_uid) {
6136 		resp->status = check_open_access(args->share_access, cs, req);
6137 		if (resp->status != NFS4_OK) {
6138 			return;
6139 		}
6140 	}
6141 
6142 	/*
6143 	 * cinfo on a CLAIM_PREVIOUS is undefined, initialize to zero
6144 	 */
6145 	cinfo->before = 0;
6146 	cinfo->after = 0;
6147 	cinfo->atomic = FALSE;
6148 
6149 	rfs4_do_open(cs, req, oo,
6150 		NFS4_DELEG4TYPE2REQTYPE(args->open_claim4_u.delegate_type),
6151 		args->share_access, args->share_deny, resp);
6152 }
6153 
6154 static void
6155 rfs4_do_opendelcur(struct compound_state *cs, struct svc_req *req,
6156 		OPEN4args *args, rfs4_openowner_t *oo, OPEN4res *resp)
6157 {
6158 	int error;
6159 	nfsstat4 status;
6160 	stateid4 stateid =
6161 			args->open_claim4_u.delegate_cur_info.delegate_stateid;
6162 	rfs4_deleg_state_t *dsp;
6163 
6164 	/*
6165 	 * Find the state info from the stateid and confirm that the
6166 	 * file is delegated.  If the state openowner is the same as
6167 	 * the supplied openowner we're done. If not, get the file
6168 	 * info from the found state info. Use that file info to
6169 	 * create the state for this lock owner. Note solaris doen't
6170 	 * really need the pathname to find the file. We may want to
6171 	 * lookup the pathname and make sure that the vp exist and
6172 	 * matches the vp in the file structure. However it is
6173 	 * possible that the pathname nolonger exists (local process
6174 	 * unlinks the file), so this may not be that useful.
6175 	 */
6176 
6177 	status = rfs4_get_deleg_state(&stateid, &dsp);
6178 	if (status != NFS4_OK) {
6179 		resp->status = status;
6180 		return;
6181 	}
6182 
6183 	ASSERT(dsp->finfo->dinfo->dtype != OPEN_DELEGATE_NONE);
6184 
6185 	/*
6186 	 * New lock owner, create state. Since this was probably called
6187 	 * in response to a CB_RECALL we set deleg to DELEG_NONE
6188 	 */
6189 
6190 	ASSERT(cs->vp != NULL);
6191 	VN_RELE(cs->vp);
6192 	VN_HOLD(dsp->finfo->vp);
6193 	cs->vp = dsp->finfo->vp;
6194 
6195 	if (error = makefh4(&cs->fh, cs->vp, cs->exi)) {
6196 		rfs4_deleg_state_rele(dsp);
6197 		*cs->statusp = resp->status = puterrno4(error);
6198 		return;
6199 	}
6200 
6201 	/* Mark progress for delegation returns */
6202 	dsp->finfo->dinfo->time_lastwrite = gethrestime_sec();
6203 	rfs4_deleg_state_rele(dsp);
6204 	rfs4_do_open(cs, req, oo, DELEG_NONE,
6205 				args->share_access, args->share_deny, resp);
6206 }
6207 
6208 /*ARGSUSED*/
6209 static void
6210 rfs4_do_opendelprev(struct compound_state *cs, struct svc_req *req,
6211 			OPEN4args *args, rfs4_openowner_t *oo, OPEN4res *resp)
6212 {
6213 	/*
6214 	 * Lookup the pathname, it must already exist since this file
6215 	 * was delegated.
6216 	 *
6217 	 * Find the file and state info for this vp and open owner pair.
6218 	 *	check that they are in fact delegated.
6219 	 *	check that the state access and deny modes are the same.
6220 	 *
6221 	 * Return the delgation possibly seting the recall flag.
6222 	 */
6223 	rfs4_file_t *file;
6224 	rfs4_state_t *state;
6225 	bool_t create = FALSE;
6226 	bool_t dcreate = FALSE;
6227 	rfs4_deleg_state_t *dsp;
6228 	nfsace4 *ace;
6229 
6230 
6231 	/* Note we ignore oflags */
6232 	resp->status = rfs4_lookupfile(&args->open_claim4_u.file_delegate_prev,
6233 				req, cs, args->share_access, &resp->cinfo);
6234 
6235 	if (resp->status != NFS4_OK) {
6236 		return;
6237 	}
6238 
6239 	/* get the file struct and hold a lock on it during initial open */
6240 	file = rfs4_findfile_withlock(cs->vp, NULL, &create);
6241 	if (file == NULL) {
6242 		NFS4_DEBUG(rfs4_debug,
6243 			(CE_NOTE, "rfs4_do_opendelprev: can't find file"));
6244 		resp->status = NFS4ERR_SERVERFAULT;
6245 		return;
6246 	}
6247 
6248 	state = rfs4_findstate_by_owner_file(oo, file, &create);
6249 	if (state == NULL) {
6250 		NFS4_DEBUG(rfs4_debug,
6251 			(CE_NOTE, "rfs4_do_opendelprev: can't find state"));
6252 		resp->status = NFS4ERR_SERVERFAULT;
6253 		rfs4_file_rele_withunlock(file);
6254 		return;
6255 	}
6256 
6257 	rfs4_dbe_lock(state->dbe);
6258 	rfs4_dbe_lock(file->dbe);
6259 	if (args->share_access != state->share_access ||
6260 			args->share_deny != state->share_deny ||
6261 			state->finfo->dinfo->dtype == OPEN_DELEGATE_NONE) {
6262 		NFS4_DEBUG(rfs4_debug,
6263 			(CE_NOTE, "rfs4_do_opendelprev: state mixup"));
6264 		rfs4_dbe_unlock(file->dbe);
6265 		rfs4_dbe_unlock(state->dbe);
6266 		rfs4_file_rele(file);
6267 		rfs4_state_rele(state);
6268 		resp->status = NFS4ERR_SERVERFAULT;
6269 		return;
6270 	}
6271 	rfs4_dbe_unlock(file->dbe);
6272 	rfs4_dbe_unlock(state->dbe);
6273 
6274 	dsp = rfs4_finddeleg(state, &dcreate);
6275 	if (dsp == NULL) {
6276 		rfs4_state_rele(state);
6277 		rfs4_file_rele(file);
6278 		resp->status = NFS4ERR_SERVERFAULT;
6279 		return;
6280 	}
6281 
6282 	next_stateid(&state->stateid);
6283 
6284 	resp->stateid = state->stateid.stateid;
6285 
6286 	resp->delegation.delegation_type = dsp->dtype;
6287 
6288 	if (dsp->dtype == OPEN_DELEGATE_READ) {
6289 		open_read_delegation4 *rv =
6290 			&resp->delegation.open_delegation4_u.read;
6291 
6292 		rv->stateid = dsp->delegid.stateid;
6293 		rv->recall = FALSE; /* no policy in place to set to TRUE */
6294 		ace = &rv->permissions;
6295 	} else {
6296 		open_write_delegation4 *rv =
6297 			&resp->delegation.open_delegation4_u.write;
6298 
6299 		rv->stateid = dsp->delegid.stateid;
6300 		rv->recall = FALSE;  /* no policy in place to set to TRUE */
6301 		ace = &rv->permissions;
6302 		rv->space_limit.limitby = NFS_LIMIT_SIZE;
6303 		rv->space_limit.nfs_space_limit4_u.filesize = UINT64_MAX;
6304 	}
6305 
6306 	/* XXX For now */
6307 	ace->type = ACE4_ACCESS_ALLOWED_ACE_TYPE;
6308 	ace->flag = 0;
6309 	ace->access_mask = 0;
6310 	ace->who.utf8string_len = 0;
6311 	ace->who.utf8string_val = 0;
6312 
6313 	rfs4_deleg_state_rele(dsp);
6314 	rfs4_state_rele(state);
6315 	rfs4_file_rele(file);
6316 }
6317 
6318 typedef enum {
6319 	NFS4_CHKSEQ_OKAY = 0,
6320 	NFS4_CHKSEQ_REPLAY = 1,
6321 	NFS4_CHKSEQ_BAD = 2
6322 } rfs4_chkseq_t;
6323 
6324 /*
6325  * Generic function for sequence number checks.
6326  */
6327 static rfs4_chkseq_t
6328 rfs4_check_seqid(seqid4 seqid, nfs_resop4 *lastop,
6329 		seqid4 rqst_seq, nfs_resop4 *resop, bool_t copyres)
6330 {
6331 	/* Same sequence ids and matching operations? */
6332 	if (seqid == rqst_seq && resop->resop == lastop->resop) {
6333 		if (copyres == TRUE) {
6334 			rfs4_free_reply(resop);
6335 			rfs4_copy_reply(resop, lastop);
6336 		}
6337 		NFS4_DEBUG(rfs4_debug, (CE_NOTE,
6338 			"Replayed SEQID %d\n", seqid));
6339 		return (NFS4_CHKSEQ_REPLAY);
6340 	}
6341 
6342 	/* If the incoming sequence is not the next expected then it is bad */
6343 	if (rqst_seq != seqid + 1) {
6344 		if (rqst_seq == seqid) {
6345 			NFS4_DEBUG(rfs4_debug,
6346 				(CE_NOTE, "BAD SEQID: Replayed sequence id "
6347 				"but last op was %d current op is %d\n",
6348 				lastop->resop, resop->resop));
6349 			return (NFS4_CHKSEQ_BAD);
6350 		}
6351 		NFS4_DEBUG(rfs4_debug,
6352 			(CE_NOTE, "BAD SEQID: got %u expecting %u\n",
6353 				rqst_seq, seqid));
6354 		return (NFS4_CHKSEQ_BAD);
6355 	}
6356 
6357 	/* Everything okay -- next expected */
6358 	return (NFS4_CHKSEQ_OKAY);
6359 }
6360 
6361 
6362 static rfs4_chkseq_t
6363 rfs4_check_open_seqid(seqid4 seqid, rfs4_openowner_t *op, nfs_resop4 *resop)
6364 {
6365 	rfs4_chkseq_t rc;
6366 
6367 	rfs4_dbe_lock(op->dbe);
6368 	rc = rfs4_check_seqid(op->open_seqid, op->reply, seqid, resop, TRUE);
6369 	rfs4_dbe_unlock(op->dbe);
6370 
6371 	if (rc == NFS4_CHKSEQ_OKAY)
6372 		rfs4_update_lease(op->client);
6373 
6374 	return (rc);
6375 }
6376 
6377 static rfs4_chkseq_t
6378 rfs4_check_olo_seqid(seqid4 olo_seqid, rfs4_openowner_t *op,
6379 	nfs_resop4 *resop)
6380 {
6381 	rfs4_chkseq_t rc;
6382 
6383 	rfs4_dbe_lock(op->dbe);
6384 	rc = rfs4_check_seqid(op->open_seqid, op->reply,
6385 		olo_seqid, resop, FALSE);
6386 	rfs4_dbe_unlock(op->dbe);
6387 
6388 	return (rc);
6389 }
6390 
6391 static rfs4_chkseq_t
6392 rfs4_check_lock_seqid(seqid4 seqid, rfs4_lo_state_t *lp, nfs_resop4 *resop)
6393 {
6394 	rfs4_chkseq_t rc = NFS4_CHKSEQ_OKAY;
6395 
6396 	rfs4_dbe_lock(lp->dbe);
6397 	if (!lp->skip_seqid_check)
6398 		rc = rfs4_check_seqid(lp->seqid, lp->reply,
6399 			seqid, resop, TRUE);
6400 	rfs4_dbe_unlock(lp->dbe);
6401 
6402 	return (rc);
6403 }
6404 
6405 static void
6406 rfs4_op_open(nfs_argop4 *argop, nfs_resop4 *resop,
6407 	    struct svc_req *req, struct compound_state *cs)
6408 {
6409 	OPEN4args *args = &argop->nfs_argop4_u.opopen;
6410 	OPEN4res *resp = &resop->nfs_resop4_u.opopen;
6411 	open_owner4 *owner = &args->owner;
6412 	open_claim_type4 claim = args->claim;
6413 	rfs4_client_t *cp;
6414 	rfs4_openowner_t *oo;
6415 	bool_t create;
6416 	bool_t replay = FALSE;
6417 	int can_reclaim;
6418 
6419 
6420 	if (cs->vp == NULL) {
6421 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
6422 		return;
6423 	}
6424 
6425 	/*
6426 	 * Need to check clientid and lease expiration first based on
6427 	 * error ordering and incrementing sequence id.
6428 	 */
6429 	cp = rfs4_findclient_by_id(owner->clientid, FALSE);
6430 	if (cp == NULL) {
6431 		*cs->statusp = resp->status =
6432 			rfs4_check_clientid(&owner->clientid, 0);
6433 		return;
6434 	}
6435 
6436 	if (rfs4_lease_expired(cp)) {
6437 		rfs4_client_close(cp);
6438 		*cs->statusp = resp->status = NFS4ERR_EXPIRED;
6439 		return;
6440 	}
6441 	can_reclaim = cp->can_reclaim;
6442 
6443 	/*
6444 	 * Find the open_owner for use from this point forward.  Take
6445 	 * care in updating the sequence id based on the type of error
6446 	 * being returned.
6447 	 */
6448 retry:
6449 	create = TRUE;
6450 	oo = rfs4_findopenowner(owner, &create, args->seqid);
6451 	if (oo == NULL) {
6452 		*cs->statusp = resp->status = NFS4ERR_STALE_CLIENTID;
6453 		rfs4_client_rele(cp);
6454 		return;
6455 	}
6456 
6457 	/* Hold off access to the sequence space while the open is done */
6458 	rfs4_sw_enter(&oo->oo_sw);
6459 
6460 	/*
6461 	 * If the open_owner existed before at the server, then check
6462 	 * the sequence id.
6463 	 */
6464 	if (!create && !oo->postpone_confirm) {
6465 		switch (rfs4_check_open_seqid(args->seqid, oo, resop)) {
6466 		case NFS4_CHKSEQ_BAD:
6467 			if ((args->seqid > oo->open_seqid) &&
6468 				oo->need_confirm) {
6469 				rfs4_free_opens(oo, TRUE, FALSE);
6470 				rfs4_sw_exit(&oo->oo_sw);
6471 				rfs4_openowner_rele(oo);
6472 				goto retry;
6473 			}
6474 			resp->status = NFS4ERR_BAD_SEQID;
6475 			goto out;
6476 		case NFS4_CHKSEQ_REPLAY: /* replay of previous request */
6477 			replay = TRUE;
6478 			goto out;
6479 		default:
6480 			break;
6481 		}
6482 
6483 		/*
6484 		 * Sequence was ok and open owner exists
6485 		 * check to see if we have yet to see an
6486 		 * open_confirm.
6487 		 */
6488 		if (oo->need_confirm) {
6489 			rfs4_free_opens(oo, TRUE, FALSE);
6490 			rfs4_sw_exit(&oo->oo_sw);
6491 			rfs4_openowner_rele(oo);
6492 			goto retry;
6493 		}
6494 	}
6495 	/* Grace only applies to regular-type OPENs */
6496 	if (rfs4_clnt_in_grace(cp) &&
6497 	    (claim == CLAIM_NULL || claim == CLAIM_DELEGATE_CUR)) {
6498 		*cs->statusp = resp->status = NFS4ERR_GRACE;
6499 		goto out;
6500 	}
6501 
6502 	/*
6503 	 * If previous state at the server existed then can_reclaim
6504 	 * will be set. If not reply NFS4ERR_NO_GRACE to the
6505 	 * client.
6506 	 */
6507 	if (rfs4_clnt_in_grace(cp) && claim == CLAIM_PREVIOUS && !can_reclaim) {
6508 		*cs->statusp = resp->status = NFS4ERR_NO_GRACE;
6509 		goto out;
6510 	}
6511 
6512 
6513 	/*
6514 	 * Reject the open if the client has missed the grace period
6515 	 */
6516 	if (!rfs4_clnt_in_grace(cp) && claim == CLAIM_PREVIOUS) {
6517 		*cs->statusp = resp->status = NFS4ERR_NO_GRACE;
6518 		goto out;
6519 	}
6520 
6521 	/* Couple of up-front bookkeeping items */
6522 	if (oo->need_confirm) {
6523 		/*
6524 		 * If this is a reclaim OPEN then we should not ask
6525 		 * for a confirmation of the open_owner per the
6526 		 * protocol specification.
6527 		 */
6528 		if (claim == CLAIM_PREVIOUS)
6529 			oo->need_confirm = FALSE;
6530 		else
6531 			resp->rflags |= OPEN4_RESULT_CONFIRM;
6532 	}
6533 	resp->rflags |= OPEN4_RESULT_LOCKTYPE_POSIX;
6534 
6535 	/*
6536 	 * If there is an unshared filesystem mounted on this vnode,
6537 	 * do not allow to open/create in this directory.
6538 	 */
6539 	if (vn_ismntpt(cs->vp)) {
6540 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
6541 		goto out;
6542 	}
6543 
6544 	/*
6545 	 * access must READ, WRITE, or BOTH.  No access is invalid.
6546 	 * deny can be READ, WRITE, BOTH, or NONE.
6547 	 * bits not defined for access/deny are invalid.
6548 	 */
6549 	if (! (args->share_access & OPEN4_SHARE_ACCESS_BOTH) ||
6550 	    (args->share_access & ~OPEN4_SHARE_ACCESS_BOTH) ||
6551 	    (args->share_deny & ~OPEN4_SHARE_DENY_BOTH)) {
6552 		*cs->statusp = resp->status = NFS4ERR_INVAL;
6553 		goto out;
6554 	}
6555 
6556 
6557 	/*
6558 	 * make sure attrset is zero before response is built.
6559 	 */
6560 	resp->attrset = 0;
6561 
6562 	switch (claim) {
6563 	case CLAIM_NULL:
6564 		rfs4_do_opennull(cs, req, args, oo, resp);
6565 	    break;
6566 	case CLAIM_PREVIOUS:
6567 		rfs4_do_openprev(cs, req, args, oo, resp);
6568 	    break;
6569 	case CLAIM_DELEGATE_CUR:
6570 		rfs4_do_opendelcur(cs, req, args, oo, resp);
6571 	    break;
6572 	case CLAIM_DELEGATE_PREV:
6573 		rfs4_do_opendelprev(cs, req, args, oo, resp);
6574 	    break;
6575 	default:
6576 		resp->status = NFS4ERR_INVAL;
6577 		break;
6578 	}
6579 
6580 out:
6581 	rfs4_client_rele(cp);
6582 
6583 	/* Catch sequence id handling here to make it a little easier */
6584 	switch (resp->status) {
6585 	case NFS4ERR_BADXDR:
6586 	case NFS4ERR_BAD_SEQID:
6587 	case NFS4ERR_BAD_STATEID:
6588 	case NFS4ERR_NOFILEHANDLE:
6589 	case NFS4ERR_RESOURCE:
6590 	case NFS4ERR_STALE_CLIENTID:
6591 	case NFS4ERR_STALE_STATEID:
6592 		/*
6593 		 * The protocol states that if any of these errors are
6594 		 * being returned, the sequence id should not be
6595 		 * incremented.  Any other return requires an
6596 		 * increment.
6597 		 */
6598 		break;
6599 	default:
6600 		/* Always update the lease in this case */
6601 		rfs4_update_lease(oo->client);
6602 
6603 		/* Regular response - copy the result */
6604 		if (!replay)
6605 			rfs4_update_open_resp(oo, resop, &cs->fh);
6606 
6607 		/*
6608 		 * REPLAY case: Only if the previous response was OK
6609 		 * do we copy the filehandle.  If not OK, no
6610 		 * filehandle to copy.
6611 		 */
6612 		if (replay == TRUE &&
6613 		    resp->status == NFS4_OK &&
6614 		    oo->reply_fh.nfs_fh4_val) {
6615 			/*
6616 			 * If this is a replay, we must restore the
6617 			 * current filehandle/vp to that of what was
6618 			 * returned originally.  Try our best to do
6619 			 * it.
6620 			 */
6621 			nfs_fh4_fmt_t *fh_fmtp =
6622 				(nfs_fh4_fmt_t *)oo->reply_fh.nfs_fh4_val;
6623 
6624 			cs->exi = checkexport4(&fh_fmtp->fh4_fsid,
6625 				(fid_t *)&fh_fmtp->fh4_xlen, NULL);
6626 
6627 			if (cs->exi == NULL) {
6628 				resp->status = NFS4ERR_STALE;
6629 				goto finish;
6630 			}
6631 
6632 			VN_RELE(cs->vp);
6633 
6634 			cs->vp = nfs4_fhtovp(&oo->reply_fh, cs->exi,
6635 				&resp->status);
6636 
6637 			if (cs->vp == NULL)
6638 				goto finish;
6639 
6640 			nfs_fh4_copy(&oo->reply_fh, &cs->fh);
6641 		}
6642 
6643 		/*
6644 		 * If this was a replay, no need to update the
6645 		 * sequence id. If the open_owner was not created on
6646 		 * this pass, then update.  The first use of an
6647 		 * open_owner will not bump the sequence id.
6648 		 */
6649 		if (replay == FALSE && !create)
6650 			rfs4_update_open_sequence(oo);
6651 		/*
6652 		 * If the client is receiving an error and the
6653 		 * open_owner needs to be confirmed, there is no way
6654 		 * to notify the client of this fact ignoring the fact
6655 		 * that the server has no method of returning a
6656 		 * stateid to confirm.  Therefore, the server needs to
6657 		 * mark this open_owner in a way as to avoid the
6658 		 * sequence id checking the next time the client uses
6659 		 * this open_owner.
6660 		 */
6661 		if (resp->status != NFS4_OK && oo->need_confirm)
6662 			oo->postpone_confirm = TRUE;
6663 		/*
6664 		 * If OK response then clear the postpone flag and
6665 		 * reset the sequence id to keep in sync with the
6666 		 * client.
6667 		 */
6668 		if (resp->status == NFS4_OK && oo->postpone_confirm) {
6669 			oo->postpone_confirm = FALSE;
6670 			oo->open_seqid = args->seqid;
6671 		}
6672 		break;
6673 	}
6674 
6675 finish:
6676 	*cs->statusp = resp->status;
6677 
6678 	rfs4_sw_exit(&oo->oo_sw);
6679 	rfs4_openowner_rele(oo);
6680 }
6681 
6682 /*ARGSUSED*/
6683 void
6684 rfs4_op_open_confirm(nfs_argop4 *argop, nfs_resop4 *resop,
6685 		    struct svc_req *req, struct compound_state *cs)
6686 {
6687 	OPEN_CONFIRM4args *args = &argop->nfs_argop4_u.opopen_confirm;
6688 	OPEN_CONFIRM4res *resp = &resop->nfs_resop4_u.opopen_confirm;
6689 	rfs4_state_t *sp;
6690 	nfsstat4 status;
6691 
6692 	if (cs->vp == NULL) {
6693 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
6694 		return;
6695 	}
6696 
6697 	status = rfs4_get_state(&args->open_stateid, &sp, RFS4_DBS_VALID);
6698 	if (status != NFS4_OK) {
6699 		*cs->statusp = resp->status = status;
6700 		return;
6701 	}
6702 
6703 	/* Ensure specified filehandle matches */
6704 	if (cs->vp != sp->finfo->vp) {
6705 		rfs4_state_rele(sp);
6706 		*cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
6707 		return;
6708 	}
6709 
6710 	/* hold off other access to open_owner while we tinker */
6711 	rfs4_sw_enter(&sp->owner->oo_sw);
6712 
6713 	switch (rfs4_check_stateid_seqid(sp, &args->open_stateid)) {
6714 	case NFS4_CHECK_STATEID_OKAY:
6715 		if (rfs4_check_open_seqid(args->seqid, sp->owner,
6716 			resop) != 0) {
6717 			*cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
6718 			break;
6719 		}
6720 		/*
6721 		 * If it is the appropriate stateid and determined to
6722 		 * be "OKAY" then this means that the stateid does not
6723 		 * need to be confirmed and the client is in error for
6724 		 * sending an OPEN_CONFIRM.
6725 		 */
6726 		*cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
6727 		break;
6728 	case NFS4_CHECK_STATEID_OLD:
6729 		*cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
6730 		break;
6731 	case NFS4_CHECK_STATEID_BAD:
6732 		*cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
6733 		break;
6734 	case NFS4_CHECK_STATEID_EXPIRED:
6735 		*cs->statusp = resp->status = NFS4ERR_EXPIRED;
6736 		break;
6737 	case NFS4_CHECK_STATEID_CLOSED:
6738 		*cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
6739 		break;
6740 	case NFS4_CHECK_STATEID_REPLAY:
6741 		switch (rfs4_check_open_seqid(args->seqid, sp->owner, resop)) {
6742 		case NFS4_CHKSEQ_OKAY:
6743 			/*
6744 			 * This is replayed stateid; if seqid matches
6745 			 * next expected, then client is using wrong seqid.
6746 			 */
6747 			/* fall through */
6748 		case NFS4_CHKSEQ_BAD:
6749 			*cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
6750 			break;
6751 		case NFS4_CHKSEQ_REPLAY:
6752 			/*
6753 			 * Note this case is the duplicate case so
6754 			 * resp->status is already set.
6755 			 */
6756 			*cs->statusp = resp->status;
6757 			rfs4_update_lease(sp->owner->client);
6758 			break;
6759 		}
6760 		break;
6761 	case NFS4_CHECK_STATEID_UNCONFIRMED:
6762 		if (rfs4_check_open_seqid(args->seqid, sp->owner,
6763 			resop) != NFS4_CHKSEQ_OKAY) {
6764 			*cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
6765 			break;
6766 		}
6767 		*cs->statusp = resp->status = NFS4_OK;
6768 
6769 		next_stateid(&sp->stateid);
6770 		resp->open_stateid = sp->stateid.stateid;
6771 		sp->owner->need_confirm = FALSE;
6772 		rfs4_update_lease(sp->owner->client);
6773 		rfs4_update_open_sequence(sp->owner);
6774 		rfs4_update_open_resp(sp->owner, resop, NULL);
6775 		break;
6776 	default:
6777 		ASSERT(FALSE);
6778 		*cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
6779 		break;
6780 	}
6781 	rfs4_sw_exit(&sp->owner->oo_sw);
6782 	rfs4_state_rele(sp);
6783 }
6784 
6785 /*ARGSUSED*/
6786 void
6787 rfs4_op_open_downgrade(nfs_argop4 *argop, nfs_resop4 *resop,
6788 		    struct svc_req *req, struct compound_state *cs)
6789 {
6790 	OPEN_DOWNGRADE4args *args = &argop->nfs_argop4_u.opopen_downgrade;
6791 	OPEN_DOWNGRADE4res *resp = &resop->nfs_resop4_u.opopen_downgrade;
6792 	uint32_t access = args->share_access;
6793 	uint32_t deny = args->share_deny;
6794 	nfsstat4 status;
6795 	rfs4_state_t *sp;
6796 	rfs4_file_t *fp;
6797 
6798 	if (cs->vp == NULL) {
6799 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
6800 		return;
6801 	}
6802 
6803 	status = rfs4_get_state(&args->open_stateid, &sp, RFS4_DBS_VALID);
6804 	if (status != NFS4_OK) {
6805 		*cs->statusp = resp->status = status;
6806 		return;
6807 	}
6808 
6809 	/* Ensure specified filehandle matches */
6810 	if (cs->vp != sp->finfo->vp) {
6811 		rfs4_state_rele(sp);
6812 		*cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
6813 		return;
6814 	}
6815 
6816 	/* hold off other access to open_owner while we tinker */
6817 	rfs4_sw_enter(&sp->owner->oo_sw);
6818 
6819 	switch (rfs4_check_stateid_seqid(sp, &args->open_stateid)) {
6820 	case NFS4_CHECK_STATEID_OKAY:
6821 		if (rfs4_check_open_seqid(args->seqid, sp->owner,
6822 			resop) != NFS4_CHKSEQ_OKAY) {
6823 			*cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
6824 			goto end;
6825 		}
6826 		break;
6827 	case NFS4_CHECK_STATEID_OLD:
6828 		*cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
6829 		goto end;
6830 	case NFS4_CHECK_STATEID_BAD:
6831 		*cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
6832 		goto end;
6833 	case NFS4_CHECK_STATEID_EXPIRED:
6834 		*cs->statusp = resp->status = NFS4ERR_EXPIRED;
6835 		goto end;
6836 	case NFS4_CHECK_STATEID_CLOSED:
6837 		*cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
6838 		goto end;
6839 	case NFS4_CHECK_STATEID_UNCONFIRMED:
6840 		*cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
6841 		goto end;
6842 	case NFS4_CHECK_STATEID_REPLAY:
6843 		/* Check the sequence id for the open owner */
6844 		switch (rfs4_check_open_seqid(args->seqid, sp->owner, resop)) {
6845 		case NFS4_CHKSEQ_OKAY:
6846 			/*
6847 			 * This is replayed stateid; if seqid matches
6848 			 * next expected, then client is using wrong seqid.
6849 			 */
6850 			/* fall through */
6851 		case NFS4_CHKSEQ_BAD:
6852 			*cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
6853 			goto end;
6854 		case NFS4_CHKSEQ_REPLAY:
6855 			/*
6856 			 * Note this case is the duplicate case so
6857 			 * resp->status is already set.
6858 			 */
6859 			*cs->statusp = resp->status;
6860 			rfs4_update_lease(sp->owner->client);
6861 			goto end;
6862 		}
6863 		break;
6864 	default:
6865 		ASSERT(FALSE);
6866 		break;
6867 	}
6868 
6869 	rfs4_dbe_lock(sp->dbe);
6870 	/*
6871 	 * Check that the new access modes and deny modes are valid.
6872 	 * Check that no invalid bits are set.
6873 	 */
6874 	if ((access & ~(OPEN4_SHARE_ACCESS_READ | OPEN4_SHARE_ACCESS_WRITE)) ||
6875 	    (deny & ~(OPEN4_SHARE_DENY_READ | OPEN4_SHARE_DENY_READ))) {
6876 		*cs->statusp = resp->status = NFS4ERR_INVAL;
6877 		rfs4_update_open_sequence(sp->owner);
6878 		rfs4_dbe_unlock(sp->dbe);
6879 		goto end;
6880 	}
6881 
6882 	/*
6883 	 * The new modes must be a subset of the current modes and
6884 	 * the access must specify at least one mode. To test that
6885 	 * the new mode is a subset of the current modes we bitwise
6886 	 * AND them together and check that the result equals the new
6887 	 * mode. For example:
6888 	 * New mode, access == R and current mode, sp->share_access  == RW
6889 	 * access & sp->share_access == R == access, so the new access mode
6890 	 * is valid. Consider access == RW, sp->share_access = R
6891 	 * access & sp->share_access == R != access, so the new access mode
6892 	 * is invalid.
6893 	 */
6894 	if ((access & sp->share_access) != access ||
6895 	    (deny & sp->share_deny) != deny ||
6896 	    (access &
6897 	    (OPEN4_SHARE_ACCESS_READ | OPEN4_SHARE_ACCESS_WRITE)) == 0) {
6898 		*cs->statusp = resp->status = NFS4ERR_INVAL;
6899 		rfs4_update_open_sequence(sp->owner);
6900 		rfs4_dbe_unlock(sp->dbe);
6901 		goto end;
6902 	}
6903 
6904 	/*
6905 	 * Release any share locks associated with this stateID.
6906 	 * Strictly speaking, this violates the spec because the
6907 	 * spec effectively requires that open downgrade be atomic.
6908 	 * At present, fs_shrlock does not have this capability.
6909 	 */
6910 	rfs4_dbe_unlock(sp->dbe);
6911 	rfs4_unshare(sp);
6912 	rfs4_dbe_lock(sp->dbe);
6913 
6914 	fp = sp->finfo;
6915 	rfs4_dbe_lock(fp->dbe);
6916 
6917 	/*
6918 	 * If the current mode has deny read and the new mode
6919 	 * does not, decrement the number of deny read mode bits
6920 	 * and if it goes to zero turn off the deny read bit
6921 	 * on the file.
6922 	 */
6923 	if ((sp->share_deny & OPEN4_SHARE_DENY_READ) &&
6924 	    (deny & OPEN4_SHARE_DENY_READ) == 0) {
6925 		fp->deny_read--;
6926 		if (fp->deny_read == 0)
6927 			fp->share_deny &= ~OPEN4_SHARE_DENY_READ;
6928 	}
6929 
6930 	/*
6931 	 * If the current mode has deny write and the new mode
6932 	 * does not, decrement the number of deny write mode bits
6933 	 * and if it goes to zero turn off the deny write bit
6934 	 * on the file.
6935 	 */
6936 	if ((sp->share_deny & OPEN4_SHARE_DENY_WRITE) &&
6937 	    (deny & OPEN4_SHARE_DENY_WRITE) == 0) {
6938 		fp->deny_write--;
6939 		if (fp->deny_write == 0)
6940 			fp->share_deny &= ~OPEN4_SHARE_DENY_WRITE;
6941 	}
6942 
6943 	/*
6944 	 * If the current mode has access read and the new mode
6945 	 * does not, decrement the number of access read mode bits
6946 	 * and if it goes to zero turn off the access read bit
6947 	 * on the file.
6948 	 */
6949 	if ((sp->share_access & OPEN4_SHARE_ACCESS_READ) &&
6950 	    (access & OPEN4_SHARE_ACCESS_READ) == 0) {
6951 		fp->access_read--;
6952 		if (fp->access_read == 0)
6953 			fp->share_access &= ~OPEN4_SHARE_ACCESS_READ;
6954 	}
6955 
6956 	/*
6957 	 * If the current mode has access write and the new mode
6958 	 * does not, decrement the number of access write mode bits
6959 	 * and if it goes to zero turn off the access write bit
6960 	 * on the file.
6961 	 */
6962 	if ((sp->share_access & OPEN4_SHARE_ACCESS_WRITE) &&
6963 	    (access & OPEN4_SHARE_ACCESS_WRITE) == 0) {
6964 		fp->access_write--;
6965 		if (fp->access_write == 0)
6966 			fp->share_deny &= ~OPEN4_SHARE_ACCESS_WRITE;
6967 	}
6968 
6969 	/* Set the new access and deny modes */
6970 	sp->share_access = access;
6971 	sp->share_deny = deny;
6972 	/* Check that the file is still accessible */
6973 	ASSERT(fp->share_access);
6974 
6975 	rfs4_dbe_unlock(fp->dbe);
6976 
6977 	rfs4_dbe_unlock(sp->dbe);
6978 	if ((status = rfs4_share(sp)) != NFS4_OK) {
6979 		*cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
6980 		rfs4_update_open_sequence(sp->owner);
6981 		goto end;
6982 	}
6983 
6984 	rfs4_dbe_lock(sp->dbe);
6985 
6986 	/* Update the stateid */
6987 	next_stateid(&sp->stateid);
6988 	resp->open_stateid = sp->stateid.stateid;
6989 
6990 	rfs4_dbe_unlock(sp->dbe);
6991 
6992 	*cs->statusp = resp->status = NFS4_OK;
6993 	/* Update the lease */
6994 	rfs4_update_lease(sp->owner->client);
6995 	/* And the sequence */
6996 	rfs4_update_open_sequence(sp->owner);
6997 	rfs4_update_open_resp(sp->owner, resop, NULL);
6998 
6999 end:
7000 	rfs4_sw_exit(&sp->owner->oo_sw);
7001 	rfs4_state_rele(sp);
7002 }
7003 
7004 /*
7005  * The logic behind this function is detailed in the NFSv4 RFC in the
7006  * SETCLIENTID operation description under IMPLEMENTATION.  Refer to
7007  * that section for explicit guidance to server behavior for
7008  * SETCLIENTID.
7009  */
7010 void
7011 rfs4_op_setclientid(nfs_argop4 *argop, nfs_resop4 *resop,
7012 		    struct svc_req *req, struct compound_state *cs)
7013 {
7014 	SETCLIENTID4args *args = &argop->nfs_argop4_u.opsetclientid;
7015 	SETCLIENTID4res *res = &resop->nfs_resop4_u.opsetclientid;
7016 	rfs4_client_t *cp, *newcp, *cp_confirmed, *cp_unconfirmed;
7017 	bool_t create = TRUE;
7018 	char *addr, *netid;
7019 	int len;
7020 
7021 retry:
7022 	newcp = cp_confirmed = cp_unconfirmed = NULL;
7023 
7024 	/*
7025 	 * In search of an EXISTING client matching the incoming
7026 	 * request to establish a new client identifier at the server
7027 	 */
7028 	create = TRUE;
7029 	cp = rfs4_findclient(&args->client, &create, NULL);
7030 
7031 	/* Should never happen */
7032 	ASSERT(cp != NULL);
7033 
7034 	if (cp == NULL) {
7035 		*cs->statusp = res->status = NFS4ERR_SERVERFAULT;
7036 		return;
7037 	}
7038 
7039 	/*
7040 	 * Easiest case. Client identifier is newly created and is
7041 	 * unconfirmed.  Also note that for this case, no other
7042 	 * entries exist for the client identifier.  Nothing else to
7043 	 * check.  Just setup the response and respond.
7044 	 */
7045 	if (create) {
7046 		*cs->statusp = res->status = NFS4_OK;
7047 		res->SETCLIENTID4res_u.resok4.clientid = cp->clientid;
7048 		res->SETCLIENTID4res_u.resok4.setclientid_confirm =
7049 							cp->confirm_verf;
7050 		/* Setup callback information; CB_NULL confirmation later */
7051 		rfs4_client_setcb(cp, &args->callback, args->callback_ident);
7052 
7053 		rfs4_client_rele(cp);
7054 		return;
7055 	}
7056 
7057 	/*
7058 	 * An existing, confirmed client may exist but it may not have
7059 	 * been active for at least one lease period.  If so, then
7060 	 * "close" the client and create a new client identifier
7061 	 */
7062 	if (rfs4_lease_expired(cp)) {
7063 		rfs4_client_close(cp);
7064 		goto retry;
7065 	}
7066 
7067 	if (cp->need_confirm == TRUE)
7068 		cp_unconfirmed = cp;
7069 	else
7070 		cp_confirmed = cp;
7071 
7072 	cp = NULL;
7073 
7074 	/*
7075 	 * We have a confirmed client, now check for an
7076 	 * unconfimred entry
7077 	 */
7078 	if (cp_confirmed) {
7079 		/* If creds don't match then client identifier is inuse */
7080 		if (!creds_ok(cp_confirmed->cr_set, req, cs)) {
7081 			rfs4_cbinfo_t *cbp;
7082 			/*
7083 			 * Some one else has established this client
7084 			 * id. Try and say * who they are. We will use
7085 			 * the call back address supplied by * the
7086 			 * first client.
7087 			 */
7088 			*cs->statusp = res->status = NFS4ERR_CLID_INUSE;
7089 
7090 			addr = netid = NULL;
7091 
7092 			cbp = &cp_confirmed->cbinfo;
7093 			if (cbp->cb_callback.cb_location.r_addr &&
7094 			    cbp->cb_callback.cb_location.r_netid) {
7095 				cb_client4 *cbcp = &cbp->cb_callback;
7096 
7097 				len = strlen(cbcp->cb_location.r_addr)+1;
7098 				addr = kmem_alloc(len, KM_SLEEP);
7099 				bcopy(cbcp->cb_location.r_addr, addr, len);
7100 				len = strlen(cbcp->cb_location.r_netid)+1;
7101 				netid = kmem_alloc(len, KM_SLEEP);
7102 				bcopy(cbcp->cb_location.r_netid, netid, len);
7103 			}
7104 
7105 			res->SETCLIENTID4res_u.client_using.r_addr = addr;
7106 			res->SETCLIENTID4res_u.client_using.r_netid = netid;
7107 
7108 			rfs4_client_rele(cp_confirmed);
7109 		}
7110 
7111 		/*
7112 		 * Confirmed, creds match, and verifier matches; must
7113 		 * be an update of the callback info
7114 		 */
7115 		if (cp_confirmed->nfs_client.verifier ==
7116 						args->client.verifier) {
7117 			/* Setup callback information */
7118 			rfs4_client_setcb(cp_confirmed, &args->callback,
7119 						args->callback_ident);
7120 
7121 			/* everything okay -- move ahead */
7122 			*cs->statusp = res->status = NFS4_OK;
7123 			res->SETCLIENTID4res_u.resok4.clientid =
7124 				cp_confirmed->clientid;
7125 
7126 			/* update the confirm_verifier and return it */
7127 			rfs4_client_scv_next(cp_confirmed);
7128 			res->SETCLIENTID4res_u.resok4.setclientid_confirm =
7129 						cp_confirmed->confirm_verf;
7130 
7131 			rfs4_client_rele(cp_confirmed);
7132 			return;
7133 		}
7134 
7135 		/*
7136 		 * Creds match but the verifier doesn't.  Must search
7137 		 * for an unconfirmed client that would be replaced by
7138 		 * this request.
7139 		 */
7140 		create = FALSE;
7141 		cp_unconfirmed = rfs4_findclient(&args->client, &create,
7142 						cp_confirmed);
7143 	}
7144 
7145 	/*
7146 	 * At this point, we have taken care of the brand new client
7147 	 * struct, INUSE case, update of an existing, and confirmed
7148 	 * client struct.
7149 	 */
7150 
7151 	/*
7152 	 * check to see if things have changed while we originally
7153 	 * picked up the client struct.  If they have, then return and
7154 	 * retry the processing of this SETCLIENTID request.
7155 	 */
7156 	if (cp_unconfirmed) {
7157 		rfs4_dbe_lock(cp_unconfirmed->dbe);
7158 		if (!cp_unconfirmed->need_confirm) {
7159 			rfs4_dbe_unlock(cp_unconfirmed->dbe);
7160 			rfs4_client_rele(cp_unconfirmed);
7161 			if (cp_confirmed)
7162 				rfs4_client_rele(cp_confirmed);
7163 			goto retry;
7164 		}
7165 		/* do away with the old unconfirmed one */
7166 		rfs4_dbe_invalidate(cp_unconfirmed->dbe);
7167 		rfs4_dbe_unlock(cp_unconfirmed->dbe);
7168 		rfs4_client_rele(cp_unconfirmed);
7169 		cp_unconfirmed = NULL;
7170 	}
7171 
7172 	/*
7173 	 * This search will temporarily hide the confirmed client
7174 	 * struct while a new client struct is created as the
7175 	 * unconfirmed one.
7176 	 */
7177 	create = TRUE;
7178 	newcp = rfs4_findclient(&args->client, &create, cp_confirmed);
7179 
7180 	ASSERT(newcp != NULL);
7181 
7182 	if (newcp == NULL) {
7183 		*cs->statusp = res->status = NFS4ERR_SERVERFAULT;
7184 		rfs4_client_rele(cp_confirmed);
7185 		return;
7186 	}
7187 
7188 	/*
7189 	 * If one was not created, then a similar request must be in
7190 	 * process so release and start over with this one
7191 	 */
7192 	if (create != TRUE) {
7193 		rfs4_client_rele(newcp);
7194 		if (cp_confirmed)
7195 			rfs4_client_rele(cp_confirmed);
7196 		goto retry;
7197 	}
7198 
7199 	*cs->statusp = res->status = NFS4_OK;
7200 	res->SETCLIENTID4res_u.resok4.clientid = newcp->clientid;
7201 	res->SETCLIENTID4res_u.resok4.setclientid_confirm =
7202 							newcp->confirm_verf;
7203 	/* Setup callback information; CB_NULL confirmation later */
7204 	rfs4_client_setcb(newcp, &args->callback,
7205 				args->callback_ident);
7206 
7207 	newcp->cp_confirmed = cp_confirmed;
7208 
7209 	rfs4_client_rele(newcp);
7210 }
7211 
7212 /*ARGSUSED*/
7213 void
7214 rfs4_op_setclientid_confirm(nfs_argop4 *argop, nfs_resop4 *resop,
7215 			    struct svc_req *req, struct compound_state *cs)
7216 {
7217 	SETCLIENTID_CONFIRM4args *args =
7218 		&argop->nfs_argop4_u.opsetclientid_confirm;
7219 	SETCLIENTID_CONFIRM4res *res =
7220 		&resop->nfs_resop4_u.opsetclientid_confirm;
7221 	rfs4_client_t *cp, *cptoclose = NULL;
7222 
7223 	*cs->statusp = res->status = NFS4_OK;
7224 
7225 	cp = rfs4_findclient_by_id(args->clientid, TRUE);
7226 
7227 	if (cp == NULL) {
7228 		*cs->statusp = res->status =
7229 			rfs4_check_clientid(&args->clientid, 1);
7230 		return;
7231 	}
7232 
7233 	if (!creds_ok(cp, req, cs)) {
7234 		*cs->statusp = res->status = NFS4ERR_CLID_INUSE;
7235 		rfs4_client_rele(cp);
7236 		return;
7237 	}
7238 
7239 	/* If the verifier doesn't match, the record doesn't match */
7240 	if (cp->confirm_verf != args->setclientid_confirm) {
7241 		*cs->statusp = res->status = NFS4ERR_STALE_CLIENTID;
7242 		rfs4_client_rele(cp);
7243 		return;
7244 	}
7245 
7246 	rfs4_dbe_lock(cp->dbe);
7247 	cp->need_confirm = FALSE;
7248 	if (cp->cp_confirmed) {
7249 		cptoclose = cp->cp_confirmed;
7250 		cptoclose->ss_remove = 1;
7251 		cp->cp_confirmed = NULL;
7252 	}
7253 
7254 	/*
7255 	 * Record clientid in stable storage
7256 	 */
7257 	rfs4_ss_clid(cp, req);
7258 
7259 	rfs4_dbe_unlock(cp->dbe);
7260 
7261 	if (cptoclose)
7262 		/* don't need to rele, client_close does it */
7263 		rfs4_client_close(cptoclose);
7264 
7265 	/* If needed, initiate CB_NULL call for callback path */
7266 	rfs4_deleg_cb_check(cp);
7267 	rfs4_update_lease(cp);
7268 
7269 	/*
7270 	 * Update the client's associated server instance, if it's changed
7271 	 * since the client was created.
7272 	 */
7273 	if (rfs4_servinst(cp) != rfs4_cur_servinst)
7274 		rfs4_servinst_assign(cp, rfs4_cur_servinst);
7275 
7276 	/*
7277 	 * Check to see if client can perform reclaims
7278 	 */
7279 	rfs4_ss_chkclid(cp);
7280 
7281 	rfs4_client_rele(cp);
7282 }
7283 
7284 
7285 /*ARGSUSED*/
7286 void
7287 rfs4_op_close(nfs_argop4 *argop, nfs_resop4 *resop,
7288 	    struct svc_req *req, struct compound_state *cs)
7289 {
7290 	/* XXX Currently not using req arg */
7291 	CLOSE4args *args = &argop->nfs_argop4_u.opclose;
7292 	CLOSE4res *resp = &resop->nfs_resop4_u.opclose;
7293 	rfs4_state_t *sp;
7294 	nfsstat4 status;
7295 
7296 	if (cs->vp == NULL) {
7297 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
7298 		return;
7299 	}
7300 
7301 	status = rfs4_get_state(&args->open_stateid, &sp, RFS4_DBS_INVALID);
7302 	if (status != NFS4_OK) {
7303 		*cs->statusp = resp->status = status;
7304 		return;
7305 	}
7306 
7307 	/* Ensure specified filehandle matches */
7308 	if (cs->vp != sp->finfo->vp) {
7309 		rfs4_state_rele(sp);
7310 		*cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7311 		return;
7312 	}
7313 
7314 	/* hold off other access to open_owner while we tinker */
7315 	rfs4_sw_enter(&sp->owner->oo_sw);
7316 
7317 	switch (rfs4_check_stateid_seqid(sp, &args->open_stateid)) {
7318 	case NFS4_CHECK_STATEID_OKAY:
7319 		if (rfs4_check_open_seqid(args->seqid, sp->owner,
7320 			resop) != NFS4_CHKSEQ_OKAY) {
7321 			*cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
7322 			goto end;
7323 		}
7324 		break;
7325 	case NFS4_CHECK_STATEID_OLD:
7326 		*cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
7327 		goto end;
7328 	case NFS4_CHECK_STATEID_BAD:
7329 		*cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7330 		goto end;
7331 	case NFS4_CHECK_STATEID_EXPIRED:
7332 		*cs->statusp = resp->status = NFS4ERR_EXPIRED;
7333 		goto end;
7334 	case NFS4_CHECK_STATEID_CLOSED:
7335 		*cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
7336 		goto end;
7337 	case NFS4_CHECK_STATEID_UNCONFIRMED:
7338 		*cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7339 		goto end;
7340 	case NFS4_CHECK_STATEID_REPLAY:
7341 		/* Check the sequence id for the open owner */
7342 		switch (rfs4_check_open_seqid(args->seqid, sp->owner, resop)) {
7343 		case NFS4_CHKSEQ_OKAY:
7344 			/*
7345 			 * This is replayed stateid; if seqid matches
7346 			 * next expected, then client is using wrong seqid.
7347 			 */
7348 			/* FALL THROUGH */
7349 		case NFS4_CHKSEQ_BAD:
7350 			*cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
7351 			goto end;
7352 		case NFS4_CHKSEQ_REPLAY:
7353 			/*
7354 			 * Note this case is the duplicate case so
7355 			 * resp->status is already set.
7356 			 */
7357 			*cs->statusp = resp->status;
7358 			rfs4_update_lease(sp->owner->client);
7359 			goto end;
7360 		}
7361 		break;
7362 	default:
7363 		ASSERT(FALSE);
7364 		break;
7365 	}
7366 
7367 	rfs4_dbe_lock(sp->dbe);
7368 
7369 	/* Update the stateid. */
7370 	next_stateid(&sp->stateid);
7371 	resp->open_stateid = sp->stateid.stateid;
7372 
7373 	rfs4_dbe_unlock(sp->dbe);
7374 
7375 	rfs4_update_lease(sp->owner->client);
7376 	rfs4_update_open_sequence(sp->owner);
7377 	rfs4_update_open_resp(sp->owner, resop, NULL);
7378 
7379 	rfs4_state_close(sp, FALSE, FALSE, cs->cr);
7380 
7381 	*cs->statusp = resp->status = status;
7382 
7383 end:
7384 	rfs4_sw_exit(&sp->owner->oo_sw);
7385 	rfs4_state_rele(sp);
7386 }
7387 
7388 /*
7389  * Manage the counts on the file struct and close all file locks
7390  */
7391 /*ARGSUSED*/
7392 void
7393 rfs4_release_share_lock_state(rfs4_state_t *sp, cred_t *cr,
7394 	bool_t close_of_client)
7395 {
7396 	rfs4_file_t *fp = sp->finfo;
7397 	rfs4_lo_state_t *lsp;
7398 	struct shrlock shr;
7399 	struct shr_locowner shr_loco;
7400 	int fflags, s_access, s_deny;
7401 
7402 	fflags = s_access = s_deny = 0;
7403 	/*
7404 	 * Decrement the count for each access and deny bit that this
7405 	 * state has contributed to the file. If the file counts go to zero
7406 	 * clear the appropriate bit in the appropriate mask.
7407 	 */
7408 
7409 	if (sp->share_access & OPEN4_SHARE_ACCESS_READ) {
7410 		fp->access_read--;
7411 		fflags |= FREAD;
7412 		s_access |= F_RDACC;
7413 		if (fp->access_read == 0)
7414 			fp->share_access &= ~OPEN4_SHARE_ACCESS_READ;
7415 	}
7416 	if (sp->share_access & OPEN4_SHARE_ACCESS_WRITE) {
7417 		fp->access_write--;
7418 		fflags |= FWRITE;
7419 		s_access |= F_WRACC;
7420 		if (fp->access_write == 0)
7421 			fp->share_access &= ~OPEN4_SHARE_ACCESS_WRITE;
7422 	}
7423 	if (sp->share_deny & OPEN4_SHARE_DENY_READ) {
7424 		fp->deny_read--;
7425 		s_deny |= F_RDDNY;
7426 		if (fp->deny_read == 0)
7427 			fp->share_deny &= ~OPEN4_SHARE_DENY_READ;
7428 	}
7429 	if (sp->share_deny & OPEN4_SHARE_DENY_WRITE) {
7430 		fp->deny_write--;
7431 		s_deny |= F_WRDNY;
7432 		if (fp->deny_write == 0)
7433 			fp->share_deny &= ~OPEN4_SHARE_DENY_WRITE;
7434 	}
7435 
7436 	/*
7437 	 * If this call is part of the larger closing down of client
7438 	 * state then it is just easier to release all locks
7439 	 * associated with this client instead of going through each
7440 	 * individual file and cleaning locks there.
7441 	 */
7442 	if (close_of_client) {
7443 		if (sp->owner->client->unlksys_completed == FALSE &&
7444 		    sp->lockownerlist.next->lsp != NULL &&
7445 			sp->owner->client->sysidt != LM_NOSYSID) {
7446 			/* Is the PxFS kernel module loaded? */
7447 			if (lm_remove_file_locks != NULL) {
7448 				int new_sysid;
7449 
7450 				/* Encode the cluster nodeid in new sysid */
7451 				new_sysid = sp->owner->client->sysidt;
7452 				lm_set_nlmid_flk(&new_sysid);
7453 
7454 				/*
7455 				 * This PxFS routine removes file locks for a
7456 				 * client over all nodes of a cluster.
7457 				 */
7458 				NFS4_DEBUG(rfs4_debug, (CE_NOTE,
7459 				    "lm_remove_file_locks(sysid=0x%x)\n",
7460 				    new_sysid));
7461 				(*lm_remove_file_locks)(new_sysid);
7462 			} else {
7463 				struct flock64 flk;
7464 
7465 				/* Release all locks for this client */
7466 				flk.l_type = F_UNLKSYS;
7467 				flk.l_whence = 0;
7468 				flk.l_start = 0;
7469 				flk.l_len = 0;
7470 				flk.l_sysid = sp->owner->client->sysidt;
7471 				flk.l_pid = 0;
7472 				(void) VOP_FRLOCK(sp->finfo->vp, F_SETLK, &flk,
7473 				    F_REMOTELOCK | FREAD | FWRITE,
7474 				    (u_offset_t)0, NULL, CRED());
7475 			}
7476 
7477 			sp->owner->client->unlksys_completed = TRUE;
7478 		}
7479 	}
7480 
7481 	/*
7482 	 * Release all locks on this file by this lock owner or at
7483 	 * least mark the locks as having been released
7484 	 */
7485 	for (lsp = sp->lockownerlist.next->lsp; lsp != NULL;
7486 		lsp = lsp->lockownerlist.next->lsp) {
7487 
7488 		lsp->locks_cleaned = TRUE;
7489 
7490 		/* Was this already taken care of above? */
7491 		if (!close_of_client &&
7492 		    sp->owner->client->sysidt != LM_NOSYSID)
7493 			(void) cleanlocks(sp->finfo->vp, lsp->locker->pid,
7494 				lsp->locker->client->sysidt);
7495 	}
7496 
7497 	/*
7498 	 * Release any shrlocks associated with this open state ID.
7499 	 * This must be done before the rfs4_state gets marked closed.
7500 	 */
7501 	if (sp->owner->client->sysidt != LM_NOSYSID) {
7502 		shr.s_access = s_access;
7503 		shr.s_deny = s_deny;
7504 		shr.s_pid = rfs4_dbe_getid(sp->owner->dbe);
7505 		shr.s_sysid = sp->owner->client->sysidt;
7506 		shr_loco.sl_pid = shr.s_pid;
7507 		shr_loco.sl_id = shr.s_sysid;
7508 		shr.s_owner = (caddr_t)&shr_loco;
7509 		shr.s_own_len = sizeof (shr_loco);
7510 		(void) vop_shrlock(sp->finfo->vp, F_UNSHARE, &shr, fflags);
7511 	}
7512 }
7513 
7514 /*
7515  * lock_denied: Fill in a LOCK4deneid structure given an flock64 structure.
7516  */
7517 static nfsstat4
7518 lock_denied(LOCK4denied *dp, struct flock64 *flk)
7519 {
7520 	rfs4_lockowner_t *lo;
7521 	rfs4_client_t *cp;
7522 	uint32_t len;
7523 
7524 	lo = rfs4_findlockowner_by_pid(flk->l_pid);
7525 	if (lo != NULL) {
7526 		cp = lo->client;
7527 		if (rfs4_lease_expired(cp)) {
7528 			rfs4_lockowner_rele(lo);
7529 			rfs4_dbe_hold(cp->dbe);
7530 			rfs4_client_close(cp);
7531 			return (NFS4ERR_EXPIRED);
7532 		}
7533 		dp->owner.clientid = lo->owner.clientid;
7534 		len = lo->owner.owner_len;
7535 		dp->owner.owner_val = kmem_alloc(len, KM_SLEEP);
7536 		bcopy(lo->owner.owner_val, dp->owner.owner_val, len);
7537 		dp->owner.owner_len = len;
7538 		rfs4_lockowner_rele(lo);
7539 		goto finish;
7540 	}
7541 
7542 	/*
7543 	 * Its not a NFS4 lock. We take advantage that the upper 32 bits
7544 	 * of the client id contain the boot time for a NFS4 lock. So we
7545 	 * fabricate and identity by setting clientid to the sysid, and
7546 	 * the lock owner to the pid.
7547 	 */
7548 	dp->owner.clientid = flk->l_sysid;
7549 	len = sizeof (pid_t);
7550 	dp->owner.owner_len = len;
7551 	dp->owner.owner_val = kmem_alloc(len, KM_SLEEP);
7552 	bcopy(&flk->l_pid, dp->owner.owner_val, len);
7553 finish:
7554 	dp->offset = flk->l_start;
7555 	dp->length = flk->l_len;
7556 
7557 	if (flk->l_type == F_RDLCK)
7558 		dp->locktype = READ_LT;
7559 	else if (flk->l_type == F_WRLCK)
7560 		dp->locktype = WRITE_LT;
7561 	else
7562 		return (NFS4ERR_INVAL);	/* no mapping from POSIX ltype to v4 */
7563 
7564 	return (NFS4_OK);
7565 }
7566 
7567 static int
7568 setlock(vnode_t *vp, struct flock64 *flock, int flag, cred_t *cred)
7569 {
7570 	int error;
7571 	struct flock64 flk;
7572 	int i;
7573 	clock_t delaytime;
7574 
7575 retry:
7576 	delaytime = MSEC_TO_TICK_ROUNDUP(rfs4_lock_delay);
7577 
7578 	for (i = 0; i < rfs4_maxlock_tries; i++) {
7579 		LOCK_PRINT(rfs4_debug, "setlock", F_SETLK, flock);
7580 		error = VOP_FRLOCK(vp, F_SETLK,
7581 				flock, flag, (u_offset_t)0, NULL, cred);
7582 
7583 		if (error != EAGAIN && error != EACCES)
7584 			break;
7585 
7586 		if (i < rfs4_maxlock_tries - 1) {
7587 			delay(delaytime);
7588 			delaytime *= 2;
7589 		}
7590 	}
7591 
7592 	if (error == EAGAIN || error == EACCES) {
7593 		/* Get the owner of the lock */
7594 		flk = *flock;
7595 		LOCK_PRINT(rfs4_debug, "setlock", F_GETLK, &flk);
7596 		if (VOP_FRLOCK(vp, F_GETLK,
7597 			    &flk,  flag, (u_offset_t)0, NULL, cred) == 0) {
7598 			if (flk.l_type == F_UNLCK) {
7599 				/* No longer locked, retry */
7600 				goto retry;
7601 			}
7602 			*flock = flk;
7603 			LOCK_PRINT(rfs4_debug, "setlock(blocking lock)",
7604 				F_GETLK, &flk);
7605 		}
7606 	}
7607 
7608 	return (error);
7609 }
7610 
7611 /*ARGSUSED*/
7612 static nfsstat4
7613 rfs4_do_lock(rfs4_lo_state_t *lp, nfs_lock_type4 locktype,
7614 	    seqid4 seqid, offset4 offset,
7615 	    length4 length, cred_t *cred, nfs_resop4 *resop)
7616 {
7617 	nfsstat4 status;
7618 	rfs4_lockowner_t *lo = lp->locker;
7619 	rfs4_state_t *sp = lp->state;
7620 	struct flock64 flock;
7621 	int16_t ltype;
7622 	int flag;
7623 	int error;
7624 	sysid_t sysid;
7625 	LOCK4res *lres;
7626 
7627 	if (rfs4_lease_expired(lo->client)) {
7628 		return (NFS4ERR_EXPIRED);
7629 	}
7630 
7631 	if ((status = rfs4_client_sysid(lo->client, &sysid)) != NFS4_OK)
7632 		return (status);
7633 
7634 	/* Check for zero length. To lock to end of file use all ones for V4 */
7635 	if (length == 0)
7636 		return (NFS4ERR_INVAL);
7637 	else if (length == (length4)(~0))
7638 		length = 0;		/* Posix to end of file  */
7639 
7640 retry:
7641 	rfs4_dbe_lock(sp->dbe);
7642 
7643 
7644 	if (resop->resop != OP_LOCKU) {
7645 		switch (locktype) {
7646 		case READ_LT:
7647 		case READW_LT:
7648 			if ((sp->share_access
7649 			    & OPEN4_SHARE_ACCESS_READ) == 0) {
7650 				rfs4_dbe_unlock(sp->dbe);
7651 
7652 				return (NFS4ERR_OPENMODE);
7653 			}
7654 			ltype = F_RDLCK;
7655 			break;
7656 		case WRITE_LT:
7657 		case WRITEW_LT:
7658 			if ((sp->share_access
7659 			    & OPEN4_SHARE_ACCESS_WRITE) == 0) {
7660 				rfs4_dbe_unlock(sp->dbe);
7661 
7662 				return (NFS4ERR_OPENMODE);
7663 			}
7664 			ltype = F_WRLCK;
7665 			break;
7666 		}
7667 	} else
7668 		ltype = F_UNLCK;
7669 
7670 	flock.l_type = ltype;
7671 	flock.l_whence = 0;		/* SEEK_SET */
7672 	flock.l_start = offset;
7673 	flock.l_len = length;
7674 	flock.l_sysid = sysid;
7675 	flock.l_pid = lp->locker->pid;
7676 
7677 	/* Note that length4 is uint64_t but l_len and l_start are off64_t */
7678 	if (flock.l_len < 0 || flock.l_start < 0) {
7679 		rfs4_dbe_unlock(sp->dbe);
7680 		return (NFS4ERR_INVAL);
7681 	}
7682 
7683 	/*
7684 	 * N.B. FREAD has the same value as OPEN4_SHARE_ACCESS_READ and
7685 	 * FWRITE has the same value as OPEN4_SHARE_ACCESS_WRITE.
7686 	 */
7687 	flag = (int)sp->share_access | F_REMOTELOCK;
7688 
7689 	error = setlock(sp->finfo->vp, &flock, flag, cred);
7690 	if (error == 0) {
7691 		rfs4_dbe_lock(lp->dbe);
7692 		next_stateid(&lp->lockid);
7693 		rfs4_dbe_unlock(lp->dbe);
7694 	}
7695 
7696 	rfs4_dbe_unlock(sp->dbe);
7697 
7698 	/*
7699 	 * N.B. We map error values to nfsv4 errors. This is differrent
7700 	 * than puterrno4 routine.
7701 	 */
7702 	switch (error) {
7703 	case 0:
7704 		status = NFS4_OK;
7705 		break;
7706 	case EAGAIN:
7707 	case EACCES:		/* Old value */
7708 		/* Can only get here if op is OP_LOCK */
7709 		ASSERT(resop->resop == OP_LOCK);
7710 		lres = &resop->nfs_resop4_u.oplock;
7711 		status = NFS4ERR_DENIED;
7712 		if (lock_denied(&lres->LOCK4res_u.denied, &flock)
7713 			== NFS4ERR_EXPIRED)
7714 			goto retry;
7715 		break;
7716 	case ENOLCK:
7717 		status = NFS4ERR_DELAY;
7718 		break;
7719 	case EOVERFLOW:
7720 		status = NFS4ERR_INVAL;
7721 		break;
7722 	case EINVAL:
7723 		status = NFS4ERR_NOTSUPP;
7724 		break;
7725 	default:
7726 		cmn_err(CE_WARN, "rfs4_do_lock: unexpected errno (%d)",
7727 			error);
7728 		status = NFS4ERR_SERVERFAULT;
7729 		break;
7730 	}
7731 
7732 	return (status);
7733 }
7734 
7735 /*ARGSUSED*/
7736 void
7737 rfs4_op_lock(nfs_argop4 *argop, nfs_resop4 *resop,
7738 	    struct svc_req *req, struct compound_state *cs)
7739 {
7740 	/* XXX Currently not using req arg */
7741 	LOCK4args *args = &argop->nfs_argop4_u.oplock;
7742 	LOCK4res *resp = &resop->nfs_resop4_u.oplock;
7743 	nfsstat4 status;
7744 	stateid4 *stateid;
7745 	rfs4_lockowner_t *lo;
7746 	rfs4_client_t *cp;
7747 	rfs4_state_t *sp = NULL;
7748 	rfs4_lo_state_t *lsp = NULL;
7749 	bool_t ls_sw_held = FALSE;
7750 	bool_t create = TRUE;
7751 	bool_t lcreate = TRUE;
7752 	bool_t dup_lock = FALSE;
7753 	int rc;
7754 
7755 	if (cs->vp == NULL) {
7756 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
7757 		return;
7758 	}
7759 
7760 	if (args->locker.new_lock_owner) {
7761 		/* Create a new lockowner for this instance */
7762 		open_to_lock_owner4 *olo = &args->locker.locker4_u.open_owner;
7763 
7764 		NFS4_DEBUG(rfs4_debug, (CE_NOTE, "Creating new lock owner"));
7765 
7766 		stateid = &olo->open_stateid;
7767 		status = rfs4_get_state(stateid, &sp, RFS4_DBS_VALID);
7768 		if (status != NFS4_OK) {
7769 			NFS4_DEBUG(rfs4_debug,
7770 				(CE_NOTE, "Get state failed in lock %d",
7771 				status));
7772 			*cs->statusp = resp->status = status;
7773 			return;
7774 		}
7775 
7776 		/* Ensure specified filehandle matches */
7777 		if (cs->vp != sp->finfo->vp) {
7778 			rfs4_state_rele(sp);
7779 			*cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7780 			return;
7781 		}
7782 
7783 		/* hold off other access to open_owner while we tinker */
7784 		rfs4_sw_enter(&sp->owner->oo_sw);
7785 
7786 		switch (rc = rfs4_check_stateid_seqid(sp, stateid)) {
7787 		case NFS4_CHECK_STATEID_OLD:
7788 			*cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
7789 			goto end;
7790 		case NFS4_CHECK_STATEID_BAD:
7791 			*cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7792 			goto end;
7793 		case NFS4_CHECK_STATEID_EXPIRED:
7794 			*cs->statusp = resp->status = NFS4ERR_EXPIRED;
7795 			goto end;
7796 		case NFS4_CHECK_STATEID_UNCONFIRMED:
7797 			*cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7798 			goto end;
7799 		case NFS4_CHECK_STATEID_CLOSED:
7800 			*cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
7801 			goto end;
7802 		case NFS4_CHECK_STATEID_OKAY:
7803 		case NFS4_CHECK_STATEID_REPLAY:
7804 			switch (rfs4_check_olo_seqid(olo->open_seqid,
7805 				sp->owner, resop)) {
7806 			case NFS4_CHKSEQ_OKAY:
7807 				if (rc == NFS4_CHECK_STATEID_OKAY)
7808 					break;
7809 				/*
7810 				 * This is replayed stateid; if seqid
7811 				 * matches next expected, then client
7812 				 * is using wrong seqid.
7813 				 */
7814 				/* FALLTHROUGH */
7815 			case NFS4_CHKSEQ_BAD:
7816 				*cs->statusp = resp->status =
7817 					NFS4ERR_BAD_SEQID;
7818 				goto end;
7819 			case NFS4_CHKSEQ_REPLAY:
7820 				/* This is a duplicate LOCK request */
7821 				dup_lock = TRUE;
7822 
7823 				/*
7824 				 * For a duplicate we do not want to
7825 				 * create a new lockowner as it should
7826 				 * already exist.
7827 				 * Turn off the lockowner create flag.
7828 				 */
7829 				lcreate = FALSE;
7830 			}
7831 			break;
7832 		}
7833 
7834 		lo = rfs4_findlockowner(&olo->lock_owner, &lcreate);
7835 		if (lo == NULL) {
7836 			NFS4_DEBUG(rfs4_debug,
7837 				(CE_NOTE, "rfs4_op_lock: no lock owner"));
7838 			*cs->statusp = resp->status = NFS4ERR_RESOURCE;
7839 			goto end;
7840 		}
7841 
7842 		lsp = rfs4_findlo_state_by_owner(lo, sp, &create);
7843 		if (lsp == NULL) {
7844 			rfs4_update_lease(sp->owner->client);
7845 			/*
7846 			 * Only update theh open_seqid if this is not
7847 			 * a duplicate request
7848 			 */
7849 			if (dup_lock == FALSE) {
7850 				rfs4_update_open_sequence(sp->owner);
7851 			}
7852 
7853 			NFS4_DEBUG(rfs4_debug,
7854 				(CE_NOTE, "rfs4_op_lock: no state"));
7855 			*cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
7856 			rfs4_update_open_resp(sp->owner, resop, NULL);
7857 			rfs4_lockowner_rele(lo);
7858 			goto end;
7859 		}
7860 
7861 		/*
7862 		 * This is the new_lock_owner branch and the client is
7863 		 * supposed to be associating a new lock_owner with
7864 		 * the open file at this point.  If we find that a
7865 		 * lock_owner/state association already exists and a
7866 		 * successful LOCK request was returned to the client,
7867 		 * an error is returned to the client since this is
7868 		 * not appropriate.  The client should be using the
7869 		 * existing lock_owner branch.
7870 		 */
7871 		if (dup_lock == FALSE && create == FALSE) {
7872 			if (lsp->lock_completed == TRUE) {
7873 				*cs->statusp =
7874 					resp->status = NFS4ERR_BAD_SEQID;
7875 				rfs4_lockowner_rele(lo);
7876 				goto end;
7877 			}
7878 		}
7879 
7880 		rfs4_update_lease(sp->owner->client);
7881 
7882 		/*
7883 		 * Only update theh open_seqid if this is not
7884 		 * a duplicate request
7885 		 */
7886 		if (dup_lock == FALSE) {
7887 			rfs4_update_open_sequence(sp->owner);
7888 		}
7889 
7890 		/*
7891 		 * If this is a duplicate lock request, just copy the
7892 		 * previously saved reply and return.
7893 		 */
7894 		if (dup_lock == TRUE) {
7895 			/* verify that lock_seqid's match */
7896 			if (lsp->seqid != olo->lock_seqid) {
7897 				NFS4_DEBUG(rfs4_debug,
7898 				(CE_NOTE, "rfs4_op_lock: Dup-Lock seqid bad"
7899 				"lsp->seqid=%d old->seqid=%d",
7900 				lsp->seqid, olo->lock_seqid));
7901 				*cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
7902 			} else {
7903 				rfs4_copy_reply(resop, lsp->reply);
7904 				/*
7905 				 * Make sure to copy the just
7906 				 * retrieved reply status into the
7907 				 * overall compound status
7908 				 */
7909 				*cs->statusp = resp->status;
7910 			}
7911 			rfs4_lockowner_rele(lo);
7912 			goto end;
7913 		}
7914 
7915 		rfs4_dbe_lock(lsp->dbe);
7916 
7917 		/* Make sure to update the lock sequence id */
7918 		lsp->seqid = olo->lock_seqid;
7919 
7920 		NFS4_DEBUG(rfs4_debug,
7921 			(CE_NOTE, "Lock seqid established as %d", lsp->seqid));
7922 
7923 		/*
7924 		 * This is used to signify the newly created lockowner
7925 		 * stateid and its sequence number.  The checks for
7926 		 * sequence number and increment don't occur on the
7927 		 * very first lock request for a lockowner.
7928 		 */
7929 		lsp->skip_seqid_check = TRUE;
7930 
7931 		/* hold off other access to lsp while we tinker */
7932 		rfs4_sw_enter(&lsp->ls_sw);
7933 		ls_sw_held = TRUE;
7934 
7935 		rfs4_dbe_unlock(lsp->dbe);
7936 
7937 		rfs4_lockowner_rele(lo);
7938 	} else {
7939 		stateid = &args->locker.locker4_u.lock_owner.lock_stateid;
7940 		/* get lsp and hold the lock on the underlying file struct */
7941 		if ((status = rfs4_get_lo_state(stateid, &lsp, TRUE))
7942 		    != NFS4_OK) {
7943 			*cs->statusp = resp->status = status;
7944 			return;
7945 		}
7946 		create = FALSE;	/* We didn't create lsp */
7947 
7948 		/* Ensure specified filehandle matches */
7949 		if (cs->vp != lsp->state->finfo->vp) {
7950 			rfs4_lo_state_rele(lsp, TRUE);
7951 			*cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7952 			return;
7953 		}
7954 
7955 		/* hold off other access to lsp while we tinker */
7956 		rfs4_sw_enter(&lsp->ls_sw);
7957 		ls_sw_held = TRUE;
7958 
7959 		switch (rfs4_check_lo_stateid_seqid(lsp, stateid)) {
7960 		/*
7961 		 * The stateid looks like it was okay (expected to be
7962 		 * the next one)
7963 		 */
7964 		case NFS4_CHECK_STATEID_OKAY:
7965 			/*
7966 			 * The sequence id is now checked.  Determine
7967 			 * if this is a replay or if it is in the
7968 			 * expected (next) sequence.  In the case of a
7969 			 * replay, there are two replay conditions
7970 			 * that may occur.  The first is the normal
7971 			 * condition where a LOCK is done with a
7972 			 * NFS4_OK response and the stateid is
7973 			 * updated.  That case is handled below when
7974 			 * the stateid is identified as a REPLAY.  The
7975 			 * second is the case where an error is
7976 			 * returned, like NFS4ERR_DENIED, and the
7977 			 * sequence number is updated but the stateid
7978 			 * is not updated.  This second case is dealt
7979 			 * with here.  So it may seem odd that the
7980 			 * stateid is okay but the sequence id is a
7981 			 * replay but it is okay.
7982 			 */
7983 			switch (rfs4_check_lock_seqid(
7984 				args->locker.locker4_u.lock_owner.lock_seqid,
7985 				lsp, resop)) {
7986 			case NFS4_CHKSEQ_REPLAY:
7987 				if (resp->status != NFS4_OK) {
7988 					/*
7989 					 * Here is our replay and need
7990 					 * to verify that the last
7991 					 * response was an error.
7992 					 */
7993 					*cs->statusp = resp->status;
7994 					goto end;
7995 				}
7996 				/*
7997 				 * This is done since the sequence id
7998 				 * looked like a replay but it didn't
7999 				 * pass our check so a BAD_SEQID is
8000 				 * returned as a result.
8001 				 */
8002 				/*FALLTHROUGH*/
8003 			case NFS4_CHKSEQ_BAD:
8004 				*cs->statusp = resp->status =
8005 					NFS4ERR_BAD_SEQID;
8006 				goto end;
8007 			case NFS4_CHKSEQ_OKAY:
8008 				/* Everything looks okay move ahead */
8009 				break;
8010 			}
8011 			break;
8012 		case NFS4_CHECK_STATEID_OLD:
8013 			*cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
8014 			goto end;
8015 		case NFS4_CHECK_STATEID_BAD:
8016 			*cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8017 			goto end;
8018 		case NFS4_CHECK_STATEID_EXPIRED:
8019 			*cs->statusp = resp->status = NFS4ERR_EXPIRED;
8020 			goto end;
8021 		case NFS4_CHECK_STATEID_CLOSED:
8022 			*cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
8023 			goto end;
8024 		case NFS4_CHECK_STATEID_REPLAY:
8025 			switch (rfs4_check_lock_seqid(
8026 				args->locker.locker4_u.lock_owner.lock_seqid,
8027 				lsp, resop)) {
8028 			case NFS4_CHKSEQ_OKAY:
8029 				/*
8030 				 * This is a replayed stateid; if
8031 				 * seqid matches the next expected,
8032 				 * then client is using wrong seqid.
8033 				 */
8034 			case NFS4_CHKSEQ_BAD:
8035 				*cs->statusp = resp->status =
8036 					NFS4ERR_BAD_SEQID;
8037 				goto end;
8038 			case NFS4_CHKSEQ_REPLAY:
8039 				rfs4_update_lease(lsp->locker->client);
8040 				*cs->statusp = status = resp->status;
8041 				goto end;
8042 			}
8043 			break;
8044 		default:
8045 			ASSERT(FALSE);
8046 			break;
8047 		}
8048 
8049 		rfs4_update_lock_sequence(lsp);
8050 		rfs4_update_lease(lsp->locker->client);
8051 	}
8052 
8053 	/*
8054 	 * NFS4 only allows locking on regular files, so
8055 	 * verify type of object.
8056 	 */
8057 	if (cs->vp->v_type != VREG) {
8058 		if (cs->vp->v_type == VDIR)
8059 			status = NFS4ERR_ISDIR;
8060 		else
8061 			status = NFS4ERR_INVAL;
8062 		goto out;
8063 	}
8064 
8065 	cp = lsp->state->owner->client;
8066 
8067 	if (rfs4_clnt_in_grace(cp) && !args->reclaim) {
8068 		status = NFS4ERR_GRACE;
8069 		goto out;
8070 	}
8071 
8072 	if (rfs4_clnt_in_grace(cp) && args->reclaim && !cp->can_reclaim) {
8073 		status = NFS4ERR_NO_GRACE;
8074 		goto out;
8075 	}
8076 
8077 	if (!rfs4_clnt_in_grace(cp) && args->reclaim) {
8078 		status = NFS4ERR_NO_GRACE;
8079 		goto out;
8080 	}
8081 
8082 	if (lsp->state->finfo->dinfo->dtype == OPEN_DELEGATE_WRITE)
8083 		cs->deleg = TRUE;
8084 
8085 	status = rfs4_do_lock(lsp, args->locktype,
8086 				args->locker.locker4_u.lock_owner.lock_seqid,
8087 				args->offset,
8088 				args->length, cs->cr, resop);
8089 
8090 out:
8091 	lsp->skip_seqid_check = FALSE;
8092 
8093 	*cs->statusp = resp->status = status;
8094 
8095 	if (status == NFS4_OK) {
8096 		resp->LOCK4res_u.lock_stateid = lsp->lockid.stateid;
8097 		lsp->lock_completed = TRUE;
8098 	}
8099 	/*
8100 	 * Only update the "OPEN" response here if this was a new
8101 	 * lock_owner
8102 	 */
8103 	if (sp)
8104 		rfs4_update_open_resp(sp->owner, resop, NULL);
8105 
8106 	rfs4_update_lock_resp(lsp, resop);
8107 
8108 end:
8109 	if (lsp) {
8110 		if (ls_sw_held)
8111 			rfs4_sw_exit(&lsp->ls_sw);
8112 		/*
8113 		 * If an sp obtained, then the lsp does not represent
8114 		 * a lock on the file struct.
8115 		 */
8116 		if (sp != NULL)
8117 			rfs4_lo_state_rele(lsp, FALSE);
8118 		else
8119 			rfs4_lo_state_rele(lsp, TRUE);
8120 	}
8121 	if (sp) {
8122 		rfs4_sw_exit(&sp->owner->oo_sw);
8123 		rfs4_state_rele(sp);
8124 	}
8125 }
8126 
8127 /* free function for LOCK/LOCKT */
8128 static void
8129 lock_denied_free(nfs_resop4 *resop)
8130 {
8131 	LOCK4denied *dp = NULL;
8132 
8133 	switch (resop->resop) {
8134 	case OP_LOCK:
8135 		if (resop->nfs_resop4_u.oplock.status == NFS4ERR_DENIED)
8136 			dp = &resop->nfs_resop4_u.oplock.LOCK4res_u.denied;
8137 		break;
8138 	case OP_LOCKT:
8139 		if (resop->nfs_resop4_u.oplockt.status == NFS4ERR_DENIED)
8140 			dp = &resop->nfs_resop4_u.oplockt.denied;
8141 		break;
8142 	default:
8143 		break;
8144 	}
8145 
8146 	if (dp)
8147 		kmem_free(dp->owner.owner_val, dp->owner.owner_len);
8148 }
8149 
8150 /*ARGSUSED*/
8151 void
8152 rfs4_op_locku(nfs_argop4 *argop, nfs_resop4 *resop,
8153 	    struct svc_req *req, struct compound_state *cs)
8154 {
8155 	/* XXX Currently not using req arg */
8156 	LOCKU4args *args = &argop->nfs_argop4_u.oplocku;
8157 	LOCKU4res *resp = &resop->nfs_resop4_u.oplocku;
8158 	nfsstat4 status;
8159 	stateid4 *stateid = &args->lock_stateid;
8160 	rfs4_lo_state_t *lsp;
8161 
8162 	if (cs->vp == NULL) {
8163 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
8164 		return;
8165 	}
8166 
8167 	if ((status = rfs4_get_lo_state(stateid, &lsp, TRUE)) != NFS4_OK) {
8168 		*cs->statusp = resp->status = status;
8169 		return;
8170 	}
8171 
8172 	/* Ensure specified filehandle matches */
8173 	if (cs->vp != lsp->state->finfo->vp) {
8174 		rfs4_lo_state_rele(lsp, TRUE);
8175 		*cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8176 		return;
8177 	}
8178 
8179 	/* hold off other access to lsp while we tinker */
8180 	rfs4_sw_enter(&lsp->ls_sw);
8181 
8182 	switch (rfs4_check_lo_stateid_seqid(lsp, stateid)) {
8183 	case NFS4_CHECK_STATEID_OKAY:
8184 		if (rfs4_check_lock_seqid(args->seqid, lsp, resop)
8185 		    != NFS4_CHKSEQ_OKAY) {
8186 			*cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
8187 			goto end;
8188 		}
8189 		break;
8190 	case NFS4_CHECK_STATEID_OLD:
8191 		*cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
8192 		goto end;
8193 	case NFS4_CHECK_STATEID_BAD:
8194 		*cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8195 		goto end;
8196 	case NFS4_CHECK_STATEID_EXPIRED:
8197 		*cs->statusp = resp->status = NFS4ERR_EXPIRED;
8198 		goto end;
8199 	case NFS4_CHECK_STATEID_CLOSED:
8200 		*cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
8201 		goto end;
8202 	case NFS4_CHECK_STATEID_REPLAY:
8203 		switch (rfs4_check_lock_seqid(args->seqid, lsp, resop)) {
8204 		case NFS4_CHKSEQ_OKAY:
8205 				/*
8206 				 * This is a replayed stateid; if
8207 				 * seqid matches the next expected,
8208 				 * then client is using wrong seqid.
8209 				 */
8210 		case NFS4_CHKSEQ_BAD:
8211 			*cs->statusp = resp->status =
8212 				NFS4ERR_BAD_SEQID;
8213 			goto end;
8214 		case NFS4_CHKSEQ_REPLAY:
8215 			rfs4_update_lease(lsp->locker->client);
8216 			*cs->statusp = status = resp->status;
8217 			goto end;
8218 		}
8219 		break;
8220 	default:
8221 		ASSERT(FALSE);
8222 		break;
8223 	}
8224 
8225 	rfs4_update_lock_sequence(lsp);
8226 	rfs4_update_lease(lsp->locker->client);
8227 
8228 	/*
8229 	 * NFS4 only allows locking on regular files, so
8230 	 * verify type of object.
8231 	 */
8232 	if (cs->vp->v_type != VREG) {
8233 		if (cs->vp->v_type == VDIR)
8234 			status = NFS4ERR_ISDIR;
8235 		else
8236 			status = NFS4ERR_INVAL;
8237 		goto out;
8238 	}
8239 
8240 	if (rfs4_clnt_in_grace(lsp->state->owner->client)) {
8241 		status = NFS4ERR_GRACE;
8242 		goto out;
8243 	}
8244 
8245 	status = rfs4_do_lock(lsp, args->locktype,
8246 			    args->seqid, args->offset,
8247 			    args->length, cs->cr, resop);
8248 
8249 out:
8250 	*cs->statusp = resp->status = status;
8251 
8252 	if (status == NFS4_OK)
8253 		resp->lock_stateid = lsp->lockid.stateid;
8254 
8255 	rfs4_update_lock_resp(lsp, resop);
8256 
8257 end:
8258 	rfs4_sw_exit(&lsp->ls_sw);
8259 	rfs4_lo_state_rele(lsp, TRUE);
8260 }
8261 
8262 /*
8263  * LOCKT is a best effort routine, the client can not be guaranteed that
8264  * the status return is still in effect by the time the reply is received.
8265  * They are numerous race conditions in this routine, but we are not required
8266  * and can not be accurate.
8267  */
8268 /*ARGSUSED*/
8269 void
8270 rfs4_op_lockt(nfs_argop4 *argop, nfs_resop4 *resop,
8271 	    struct svc_req *req, struct compound_state *cs)
8272 {
8273 	LOCKT4args *args = &argop->nfs_argop4_u.oplockt;
8274 	LOCKT4res *resp = &resop->nfs_resop4_u.oplockt;
8275 	rfs4_lockowner_t *lo;
8276 	rfs4_client_t *cp;
8277 	bool_t create = FALSE;
8278 	struct flock64 flk;
8279 	int error;
8280 	int flag = FREAD | FWRITE;
8281 	int ltype;
8282 	length4 posix_length;
8283 	sysid_t sysid;
8284 	pid_t pid;
8285 
8286 	if (cs->vp == NULL) {
8287 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
8288 		return;
8289 	}
8290 
8291 	/*
8292 	 * NFS4 only allows locking on regular files, so
8293 	 * verify type of object.
8294 	 */
8295 	if (cs->vp->v_type != VREG) {
8296 		if (cs->vp->v_type == VDIR)
8297 			*cs->statusp = resp->status = NFS4ERR_ISDIR;
8298 		else
8299 			*cs->statusp = resp->status =  NFS4ERR_INVAL;
8300 		return;
8301 	}
8302 
8303 	/*
8304 	 * Check out the clientid to ensure the server knows about it
8305 	 * so that we correctly inform the client of a server reboot.
8306 	 */
8307 	if ((cp = rfs4_findclient_by_id(args->owner.clientid, FALSE))
8308 	    == NULL) {
8309 		*cs->statusp = resp->status =
8310 			rfs4_check_clientid(&args->owner.clientid, 0);
8311 		return;
8312 	}
8313 	if (rfs4_lease_expired(cp)) {
8314 		rfs4_client_close(cp);
8315 		/*
8316 		 * Protocol doesn't allow returning NFS4ERR_STALE as
8317 		 * other operations do on this check so STALE_CLIENTID
8318 		 * is returned instead
8319 		 */
8320 		*cs->statusp = resp->status = NFS4ERR_STALE_CLIENTID;
8321 		return;
8322 	}
8323 
8324 	if (rfs4_clnt_in_grace(cp)) {
8325 		*cs->statusp = resp->status = NFS4ERR_GRACE;
8326 		return;
8327 	}
8328 	rfs4_client_rele(cp);
8329 
8330 	resp->status = NFS4_OK;
8331 
8332 	switch (args->locktype) {
8333 	case READ_LT:
8334 	case READW_LT:
8335 		ltype = F_RDLCK;
8336 		break;
8337 	case WRITE_LT:
8338 	case WRITEW_LT:
8339 		ltype = F_WRLCK;
8340 		break;
8341 	}
8342 
8343 	posix_length = args->length;
8344 	/* Check for zero length. To lock to end of file use all ones for V4 */
8345 	if (posix_length == 0) {
8346 		*cs->statusp = resp->status = NFS4ERR_INVAL;
8347 		return;
8348 	} else if (posix_length == (length4)(~0)) {
8349 		posix_length = 0;	/* Posix to end of file  */
8350 	}
8351 
8352 	/* Find or create a lockowner */
8353 	lo = rfs4_findlockowner(&args->owner, &create);
8354 
8355 	if (lo) {
8356 		pid = lo->pid;
8357 		if ((resp->status =
8358 			rfs4_client_sysid(lo->client, &sysid)) != NFS4_OK)
8359 		goto out;
8360 	} else {
8361 		pid = 0;
8362 		sysid = lockt_sysid;
8363 	}
8364 retry:
8365 	flk.l_type = ltype;
8366 	flk.l_whence = 0;		/* SEEK_SET */
8367 	flk.l_start = args->offset;
8368 	flk.l_len = posix_length;
8369 	flk.l_sysid = sysid;
8370 	flk.l_pid = pid;
8371 	flag |= F_REMOTELOCK;
8372 
8373 	LOCK_PRINT(rfs4_debug, "rfs4_op_lockt", F_GETLK, &flk);
8374 
8375 	/* Note that length4 is uint64_t but l_len and l_start are off64_t */
8376 	if (flk.l_len < 0 || flk.l_start < 0) {
8377 		resp->status = NFS4ERR_INVAL;
8378 		goto out;
8379 	}
8380 	error = VOP_FRLOCK(cs->vp, F_GETLK, &flk, flag, (u_offset_t)0,
8381 	    NULL, cs->cr);
8382 
8383 	/*
8384 	 * N.B. We map error values to nfsv4 errors. This is differrent
8385 	 * than puterrno4 routine.
8386 	 */
8387 	switch (error) {
8388 	case 0:
8389 		if (flk.l_type == F_UNLCK)
8390 			resp->status = NFS4_OK;
8391 		else {
8392 			if (lock_denied(&resp->denied, &flk) == NFS4ERR_EXPIRED)
8393 				goto retry;
8394 			resp->status = NFS4ERR_DENIED;
8395 		}
8396 		break;
8397 	case EOVERFLOW:
8398 		resp->status = NFS4ERR_INVAL;
8399 		break;
8400 	case EINVAL:
8401 		resp->status = NFS4ERR_NOTSUPP;
8402 		break;
8403 	default:
8404 		cmn_err(CE_WARN, "rfs4_op_lockt: unexpected errno (%d)",
8405 			error);
8406 		resp->status = NFS4ERR_SERVERFAULT;
8407 		break;
8408 	}
8409 
8410 out:
8411 	if (lo)
8412 		rfs4_lockowner_rele(lo);
8413 	*cs->statusp = resp->status;
8414 }
8415 
8416 static int
8417 vop_shrlock(vnode_t *vp, int cmd, struct shrlock *sp, int fflags)
8418 {
8419 	int err;
8420 
8421 	if (cmd == F_UNSHARE && sp->s_deny == 0 && sp->s_access == 0)
8422 		return (0);
8423 
8424 	err = VOP_SHRLOCK(vp, cmd, sp, fflags, CRED());
8425 
8426 	NFS4_DEBUG(rfs4_shrlock_debug,
8427 		(CE_NOTE, "rfs4_shrlock %s vp=%p acc=%d dny=%d sysid=%d "
8428 		"pid=%d err=%d\n", cmd == F_SHARE ? "SHARE" : "UNSHR",
8429 		(void *) vp, sp->s_access, sp->s_deny, sp->s_sysid, sp->s_pid,
8430 		err));
8431 
8432 	return (err);
8433 }
8434 
8435 static int
8436 rfs4_shrlock(rfs4_state_t *sp, int cmd)
8437 {
8438 	struct shrlock shr;
8439 	struct shr_locowner shr_loco;
8440 	int fflags;
8441 
8442 	fflags = shr.s_access = shr.s_deny = 0;
8443 
8444 	if (sp->share_access & OPEN4_SHARE_ACCESS_READ) {
8445 		fflags |= FREAD;
8446 		shr.s_access |= F_RDACC;
8447 	}
8448 	if (sp->share_access & OPEN4_SHARE_ACCESS_WRITE) {
8449 		fflags |= FWRITE;
8450 		shr.s_access |= F_WRACC;
8451 	}
8452 	if (sp->share_deny & OPEN4_SHARE_DENY_READ)
8453 		shr.s_deny |= F_RDDNY;
8454 	if (sp->share_deny & OPEN4_SHARE_DENY_WRITE)
8455 		shr.s_deny |= F_WRDNY;
8456 
8457 	shr.s_pid = rfs4_dbe_getid(sp->owner->dbe);
8458 	shr.s_sysid = sp->owner->client->sysidt;
8459 	shr_loco.sl_pid = shr.s_pid;
8460 	shr_loco.sl_id = shr.s_sysid;
8461 	shr.s_owner = (caddr_t)&shr_loco;
8462 	shr.s_own_len = sizeof (shr_loco);
8463 	return (vop_shrlock(sp->finfo->vp, cmd, &shr, fflags));
8464 }
8465 
8466 static int
8467 rfs4_share(rfs4_state_t *sp)
8468 {
8469 	return (rfs4_shrlock(sp, F_SHARE));
8470 }
8471 
8472 void
8473 rfs4_unshare(rfs4_state_t *sp)
8474 {
8475 	(void) rfs4_shrlock(sp, F_UNSHARE);
8476 }
8477