xref: /titanic_52/usr/src/uts/common/fs/nfs/nfs4_srv.c (revision 5d54f3d8999eac1762fe0a8c7177d20f1f201fae)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 /*
28  *	Copyright (c) 1983,1984,1985,1986,1987,1988,1989  AT&T.
29  *	All Rights Reserved
30  */
31 
32 #pragma ident	"%Z%%M%	%I%	%E% SMI"
33 
34 #include <sys/param.h>
35 #include <sys/types.h>
36 #include <sys/systm.h>
37 #include <sys/cred.h>
38 #include <sys/buf.h>
39 #include <sys/vfs.h>
40 #include <sys/vnode.h>
41 #include <sys/uio.h>
42 #include <sys/errno.h>
43 #include <sys/sysmacros.h>
44 #include <sys/statvfs.h>
45 #include <sys/kmem.h>
46 #include <sys/dirent.h>
47 #include <sys/cmn_err.h>
48 #include <sys/debug.h>
49 #include <sys/systeminfo.h>
50 #include <sys/flock.h>
51 #include <sys/pathname.h>
52 #include <sys/nbmlock.h>
53 #include <sys/share.h>
54 #include <sys/atomic.h>
55 #include <sys/policy.h>
56 #include <sys/fem.h>
57 
58 #include <rpc/types.h>
59 #include <rpc/auth.h>
60 #include <rpc/rpcsec_gss.h>
61 #include <rpc/svc.h>
62 
63 #include <nfs/nfs.h>
64 #include <nfs/export.h>
65 #include <nfs/lm.h>
66 #include <nfs/nfs4.h>
67 
68 #include <sys/strsubr.h>
69 #include <sys/strsun.h>
70 
71 #include <inet/common.h>
72 #include <inet/ip.h>
73 #include <inet/ip6.h>
74 
75 #define	RFS4_MAXLOCK_TRIES 4	/* Try to get the lock this many times */
76 static int rfs4_maxlock_tries = RFS4_MAXLOCK_TRIES;
77 #define	RFS4_LOCK_DELAY 10	/* Milliseconds */
78 static clock_t rfs4_lock_delay = RFS4_LOCK_DELAY;
79 
80 /* End of Tunables */
81 
82 /*
83  * Used to bump the stateid4.seqid value and show changes in the stateid
84  */
85 #define	next_stateid(sp) (++(sp)->bits.chgseq)
86 
87 /*
88  * RFS4_MINLEN_ENTRY4: XDR-encoded size of smallest possible dirent.
89  *	This is used to return NFS4ERR_TOOSMALL when clients specify
90  *	maxcount that isn't large enough to hold the smallest possible
91  *	XDR encoded dirent.
92  *
93  *	    sizeof cookie (8 bytes) +
94  *	    sizeof name_len (4 bytes) +
95  *	    sizeof smallest (padded) name (4 bytes) +
96  *	    sizeof bitmap4_len (12 bytes) +   NOTE: we always encode len=2 bm4
97  *	    sizeof attrlist4_len (4 bytes) +
98  *	    sizeof next boolean (4 bytes)
99  *
100  * RFS4_MINLEN_RDDIR4: XDR-encoded size of READDIR op reply containing
101  * the smallest possible entry4 (assumes no attrs requested).
102  *	sizeof nfsstat4 (4 bytes) +
103  *	sizeof verifier4 (8 bytes) +
104  *	sizeof entry4list bool (4 bytes) +
105  *	sizeof entry4 	(36 bytes) +
106  *	sizeof eof bool  (4 bytes)
107  *
108  * RFS4_MINLEN_RDDIR_BUF: minimum length of buffer server will provide to
109  *	VOP_READDIR.  Its value is the size of the maximum possible dirent
110  *	for solaris.  The DIRENT64_RECLEN macro returns	the size of dirent
111  *	required for a given name length.  MAXNAMELEN is the maximum
112  *	filename length allowed in Solaris.  The first two DIRENT64_RECLEN()
113  *	macros are to allow for . and .. entries -- just a minor tweak to try
114  *	and guarantee that buffer we give to VOP_READDIR will be large enough
115  *	to hold ., .., and the largest possible solaris dirent64.
116  */
117 #define	RFS4_MINLEN_ENTRY4 36
118 #define	RFS4_MINLEN_RDDIR4 (4 + NFS4_VERIFIER_SIZE + 4 + RFS4_MINLEN_ENTRY4 + 4)
119 #define	RFS4_MINLEN_RDDIR_BUF \
120 	(DIRENT64_RECLEN(1) + DIRENT64_RECLEN(2) + DIRENT64_RECLEN(MAXNAMELEN))
121 
122 /*
123  * It would be better to pad to 4 bytes since that's what XDR would do,
124  * but the dirents UFS gives us are already padded to 8, so just take
125  * what we're given.  Dircount is only a hint anyway.  Currently the
126  * solaris kernel is ASCII only, so there's no point in calling the
127  * UTF8 functions.
128  *
129  * dirent64: named padded to provide 8 byte struct alignment
130  *	d_ino(8) + d_off(8) + d_reclen(2) + d_name(namelen + null(1) + pad)
131  *
132  * cookie: uint64_t   +  utf8namelen: uint_t  +   utf8name padded to 8 bytes
133  *
134  */
135 #define	DIRENT64_TO_DIRCOUNT(dp) \
136 	(3 * BYTES_PER_XDR_UNIT + DIRENT64_NAMELEN((dp)->d_reclen))
137 
138 time_t rfs4_start_time;			/* Initialized in rfs4_srvrinit */
139 
140 static sysid_t lockt_sysid;		/* dummy sysid for all LOCKT calls */
141 
142 u_longlong_t nfs4_srv_caller_id;
143 
144 verifier4	Write4verf;
145 verifier4	Readdir4verf;
146 
147 void		rfs4_init_compound_state(struct compound_state *);
148 
149 static void	nullfree(caddr_t);
150 static void	rfs4_op_inval(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
151 			struct compound_state *);
152 static void	rfs4_op_access(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
153 			struct compound_state *);
154 static void	rfs4_op_close(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
155 			struct compound_state *);
156 static void	rfs4_op_commit(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
157 			struct compound_state *);
158 static void	rfs4_op_create(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
159 			struct compound_state *);
160 static void	rfs4_op_create_free(nfs_resop4 *resop);
161 static void	rfs4_op_delegreturn(nfs_argop4 *, nfs_resop4 *,
162 				    struct svc_req *, struct compound_state *);
163 static void	rfs4_op_getattr(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
164 			struct compound_state *);
165 static void	rfs4_op_getattr_free(nfs_resop4 *);
166 static void	rfs4_op_getfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
167 			struct compound_state *);
168 static void	rfs4_op_getfh_free(nfs_resop4 *);
169 static void	rfs4_op_illegal(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
170 			struct compound_state *);
171 static void	rfs4_op_link(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
172 			struct compound_state *);
173 static void	rfs4_op_lock(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
174 			struct compound_state *);
175 static void	lock_denied_free(nfs_resop4 *);
176 static void	rfs4_op_locku(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
177 			struct compound_state *);
178 static void	rfs4_op_lockt(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
179 			struct compound_state *);
180 static void	rfs4_op_lookup(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
181 			struct compound_state *);
182 static void	rfs4_op_lookupp(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
183 			struct compound_state *);
184 static void	rfs4_op_openattr(nfs_argop4 *argop, nfs_resop4 *resop,
185 				struct svc_req *req, struct compound_state *cs);
186 static void	rfs4_op_nverify(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
187 			struct compound_state *);
188 static void	rfs4_op_open(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
189 			struct compound_state *);
190 static void	rfs4_op_open_confirm(nfs_argop4 *, nfs_resop4 *,
191 			struct svc_req *, struct compound_state *);
192 static void	rfs4_op_open_downgrade(nfs_argop4 *, nfs_resop4 *,
193 			struct svc_req *, struct compound_state *);
194 static void	rfs4_op_putfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
195 			struct compound_state *);
196 static void	rfs4_op_putpubfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
197 			struct compound_state *);
198 static void	rfs4_op_putrootfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
199 			struct compound_state *);
200 static void	rfs4_op_read(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
201 			struct compound_state *);
202 static void	rfs4_op_read_free(nfs_resop4 *);
203 static void	rfs4_op_readdir_free(nfs_resop4 *resop);
204 static void	rfs4_op_readlink(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
205 			struct compound_state *);
206 static void	rfs4_op_readlink_free(nfs_resop4 *);
207 static void	rfs4_op_release_lockowner(nfs_argop4 *, nfs_resop4 *,
208 			struct svc_req *, struct compound_state *);
209 static void	rfs4_op_remove(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
210 			struct compound_state *);
211 static void	rfs4_op_rename(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
212 			struct compound_state *);
213 static void	rfs4_op_renew(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
214 			struct compound_state *);
215 static void	rfs4_op_restorefh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
216 			struct compound_state *);
217 static void	rfs4_op_savefh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
218 			struct compound_state *);
219 static void	rfs4_op_setattr(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
220 			struct compound_state *);
221 static void	rfs4_op_verify(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
222 			struct compound_state *);
223 static void	rfs4_op_write(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
224 			struct compound_state *);
225 static void	rfs4_op_setclientid(nfs_argop4 *, nfs_resop4 *,
226 			struct svc_req *, struct compound_state *);
227 static void	rfs4_op_setclientid_confirm(nfs_argop4 *, nfs_resop4 *,
228 			struct svc_req *req, struct compound_state *);
229 static void	rfs4_op_secinfo(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
230 			struct compound_state *);
231 static void	rfs4_op_secinfo_free(nfs_resop4 *);
232 
233 static nfsstat4 check_open_access(uint32_t,
234 				struct compound_state *, struct svc_req *);
235 nfsstat4 rfs4_client_sysid(rfs4_client_t *, sysid_t *);
236 static int	vop_shrlock(vnode_t *, int, struct shrlock *, int);
237 static int 	rfs4_shrlock(rfs4_state_t *, int);
238 static int	rfs4_share(rfs4_state_t *);
239 void rfs4_ss_clid(rfs4_client_t *, struct svc_req *);
240 
241 /*
242  * translation table for attrs
243  */
244 struct nfs4_ntov_table {
245 	union nfs4_attr_u *na;
246 	uint8_t amap[NFS4_MAXNUM_ATTRS];
247 	int attrcnt;
248 	bool_t vfsstat;
249 };
250 
251 static void	nfs4_ntov_table_init(struct nfs4_ntov_table *ntovp);
252 static void	nfs4_ntov_table_free(struct nfs4_ntov_table *ntovp,
253 				    struct nfs4_svgetit_arg *sargp);
254 
255 static nfsstat4	do_rfs4_set_attrs(bitmap4 *resp, fattr4 *fattrp,
256 		    struct compound_state *cs, struct nfs4_svgetit_arg *sargp,
257 		    struct nfs4_ntov_table *ntovp, nfs4_attr_cmd_t cmd);
258 
259 fem_t	*deleg_rdops;
260 fem_t	*deleg_wrops;
261 
262 rfs4_servinst_t	*rfs4_cur_servinst = NULL;	/* current server instance */
263 kmutex_t	rfs4_servinst_lock;		/* protects linked list */
264 int		rfs4_seen_first_compound;	/* set first time we see one */
265 
266 #ifdef DEBUG
267 int	rfs4_servinst_debug = 0;
268 #endif
269 
270 /*
271  * NFS4 op dispatch table
272  */
273 
274 struct rfsv4disp {
275 	void	(*dis_proc)();		/* proc to call */
276 	void	(*dis_resfree)();	/* frees space allocated by proc */
277 	int	dis_flags;		/* RPC_IDEMPOTENT, etc... */
278 };
279 
280 static struct rfsv4disp rfsv4disptab[] = {
281 	/*
282 	 * NFS VERSION 4
283 	 */
284 
285 	/* RFS_NULL = 0 */
286 	{rfs4_op_illegal, nullfree, 0},
287 
288 	/* UNUSED = 1 */
289 	{rfs4_op_illegal, nullfree, 0},
290 
291 	/* UNUSED = 2 */
292 	{rfs4_op_illegal, nullfree, 0},
293 
294 	/* OP_ACCESS = 3 */
295 	{rfs4_op_access, nullfree, RPC_IDEMPOTENT},
296 
297 	/* OP_CLOSE = 4 */
298 	{rfs4_op_close, nullfree, 0},
299 
300 	/* OP_COMMIT = 5 */
301 	{rfs4_op_commit, nullfree, RPC_IDEMPOTENT},
302 
303 	/* OP_CREATE = 6 */
304 	{rfs4_op_create, nullfree, 0},
305 
306 	/* OP_DELEGPURGE = 7 */
307 	{rfs4_op_inval, nullfree, 0},
308 
309 	/* OP_DELEGRETURN = 8 */
310 	{rfs4_op_delegreturn, nullfree, 0},
311 
312 	/* OP_GETATTR = 9 */
313 	{rfs4_op_getattr, rfs4_op_getattr_free, RPC_IDEMPOTENT},
314 
315 	/* OP_GETFH = 10 */
316 	{rfs4_op_getfh, rfs4_op_getfh_free, RPC_ALL},
317 
318 	/* OP_LINK = 11 */
319 	{rfs4_op_link, nullfree, 0},
320 
321 	/* OP_LOCK = 12 */
322 	{rfs4_op_lock, lock_denied_free, 0},
323 
324 	/* OP_LOCKT = 13 */
325 	{rfs4_op_lockt, lock_denied_free, 0},
326 
327 	/* OP_LOCKU = 14 */
328 	{rfs4_op_locku, nullfree, 0},
329 
330 	/* OP_LOOKUP = 15 */
331 	{rfs4_op_lookup, nullfree, (RPC_IDEMPOTENT|RPC_PUBLICFH_OK)},
332 
333 	/* OP_LOOKUPP = 16 */
334 	{rfs4_op_lookupp, nullfree, (RPC_IDEMPOTENT|RPC_PUBLICFH_OK)},
335 
336 	/* OP_NVERIFY = 17 */
337 	{rfs4_op_nverify, nullfree, RPC_IDEMPOTENT},
338 
339 	/* OP_OPEN = 18 */
340 	{rfs4_op_open, rfs4_free_reply, 0},
341 
342 	/* OP_OPENATTR = 19 */
343 	{rfs4_op_openattr, nullfree, 0},
344 
345 	/* OP_OPEN_CONFIRM = 20 */
346 	{rfs4_op_open_confirm, nullfree, 0},
347 
348 	/* OP_OPEN_DOWNGRADE = 21 */
349 	{rfs4_op_open_downgrade, nullfree, 0},
350 
351 	/* OP_OPEN_PUTFH = 22 */
352 	{rfs4_op_putfh, nullfree, RPC_ALL},
353 
354 	/* OP_PUTPUBFH = 23 */
355 	{rfs4_op_putpubfh, nullfree, RPC_ALL},
356 
357 	/* OP_PUTROOTFH = 24 */
358 	{rfs4_op_putrootfh, nullfree, RPC_ALL},
359 
360 	/* OP_READ = 25 */
361 	{rfs4_op_read, rfs4_op_read_free, RPC_IDEMPOTENT},
362 
363 	/* OP_READDIR = 26 */
364 	{rfs4_op_readdir, rfs4_op_readdir_free, RPC_IDEMPOTENT},
365 
366 	/* OP_READLINK = 27 */
367 	{rfs4_op_readlink, rfs4_op_readlink_free, RPC_IDEMPOTENT},
368 
369 	/* OP_REMOVE = 28 */
370 	{rfs4_op_remove, nullfree, 0},
371 
372 	/* OP_RENAME = 29 */
373 	{rfs4_op_rename, nullfree, 0},
374 
375 	/* OP_RENEW = 30 */
376 	{rfs4_op_renew, nullfree, 0},
377 
378 	/* OP_RESTOREFH = 31 */
379 	{rfs4_op_restorefh, nullfree, RPC_ALL},
380 
381 	/* OP_SAVEFH = 32 */
382 	{rfs4_op_savefh, nullfree, RPC_ALL},
383 
384 	/* OP_SECINFO = 33 */
385 	{rfs4_op_secinfo, rfs4_op_secinfo_free, 0},
386 
387 	/* OP_SETATTR = 34 */
388 	{rfs4_op_setattr, nullfree, 0},
389 
390 	/* OP_SETCLIENTID = 35 */
391 	{rfs4_op_setclientid, nullfree, 0},
392 
393 	/* OP_SETCLIENTID_CONFIRM = 36 */
394 	{rfs4_op_setclientid_confirm, nullfree, 0},
395 
396 	/* OP_VERIFY = 37 */
397 	{rfs4_op_verify, nullfree, RPC_IDEMPOTENT},
398 
399 	/* OP_WRITE = 38 */
400 	{rfs4_op_write, nullfree, 0},
401 
402 	/* OP_RELEASE_LOCKOWNER = 39 */
403 	{rfs4_op_release_lockowner, nullfree, 0},
404 };
405 
406 static uint_t rfsv4disp_cnt = sizeof (rfsv4disptab) / sizeof (rfsv4disptab[0]);
407 
408 #define	OP_ILLEGAL_IDX (rfsv4disp_cnt)
409 
410 #ifdef DEBUG
411 
412 int rfs4_fillone_debug = 0;
413 int rfs4_shrlock_debug = 0;
414 int rfs4_no_stub_access = 1;
415 int rfs4_rddir_debug = 0;
416 
417 static char *rfs4_op_string[] = {
418 	"rfs4_op_null",
419 	"rfs4_op_1 unused",
420 	"rfs4_op_2 unused",
421 	"rfs4_op_access",
422 	"rfs4_op_close",
423 	"rfs4_op_commit",
424 	"rfs4_op_create",
425 	"rfs4_op_delegpurge",
426 	"rfs4_op_delegreturn",
427 	"rfs4_op_getattr",
428 	"rfs4_op_getfh",
429 	"rfs4_op_link",
430 	"rfs4_op_lock",
431 	"rfs4_op_lockt",
432 	"rfs4_op_locku",
433 	"rfs4_op_lookup",
434 	"rfs4_op_lookupp",
435 	"rfs4_op_nverify",
436 	"rfs4_op_open",
437 	"rfs4_op_openattr",
438 	"rfs4_op_open_confirm",
439 	"rfs4_op_open_downgrade",
440 	"rfs4_op_putfh",
441 	"rfs4_op_putpubfh",
442 	"rfs4_op_putrootfh",
443 	"rfs4_op_read",
444 	"rfs4_op_readdir",
445 	"rfs4_op_readlink",
446 	"rfs4_op_remove",
447 	"rfs4_op_rename",
448 	"rfs4_op_renew",
449 	"rfs4_op_restorefh",
450 	"rfs4_op_savefh",
451 	"rfs4_op_secinfo",
452 	"rfs4_op_setattr",
453 	"rfs4_op_setclientid",
454 	"rfs4_op_setclient_confirm",
455 	"rfs4_op_verify",
456 	"rfs4_op_write",
457 	"rfs4_op_release_lockowner",
458 	"rfs4_op_illegal"
459 };
460 #endif
461 
462 void rfs4_ss_chkclid(rfs4_client_t *);
463 
464 #ifdef	nextdp
465 #undef nextdp
466 #endif
467 #define	nextdp(dp)	((struct dirent64 *)((char *)(dp) + (dp)->d_reclen))
468 
469 static const fs_operation_def_t nfs4_rd_deleg_tmpl[] = {
470 	VOPNAME_OPEN, deleg_rdopen,
471 	VOPNAME_WRITE, deleg_write,
472 	VOPNAME_SETATTR, deleg_setattr,
473 	VOPNAME_RWLOCK, deleg_rd_rwlock,
474 	VOPNAME_SPACE, deleg_space,
475 	VOPNAME_SETSECATTR, deleg_setsecattr,
476 	VOPNAME_VNEVENT, deleg_vnevent,
477 	NULL, NULL
478 };
479 static const fs_operation_def_t nfs4_wr_deleg_tmpl[] = {
480 	VOPNAME_OPEN, deleg_wropen,
481 	VOPNAME_READ, deleg_read,
482 	VOPNAME_WRITE, deleg_write,
483 	VOPNAME_SETATTR, deleg_setattr,
484 	VOPNAME_RWLOCK, deleg_wr_rwlock,
485 	VOPNAME_SPACE, deleg_space,
486 	VOPNAME_SETSECATTR, deleg_setsecattr,
487 	VOPNAME_VNEVENT, deleg_vnevent,
488 	NULL, NULL
489 };
490 
491 int
492 rfs4_srvrinit(void)
493 {
494 	timespec32_t verf;
495 	int error;
496 	extern void rfs4_attr_init();
497 	extern krwlock_t rfs4_deleg_policy_lock;
498 
499 	/*
500 	 * The following algorithm attempts to find a unique verifier
501 	 * to be used as the write verifier returned from the server
502 	 * to the client.  It is important that this verifier change
503 	 * whenever the server reboots.  Of secondary importance, it
504 	 * is important for the verifier to be unique between two
505 	 * different servers.
506 	 *
507 	 * Thus, an attempt is made to use the system hostid and the
508 	 * current time in seconds when the nfssrv kernel module is
509 	 * loaded.  It is assumed that an NFS server will not be able
510 	 * to boot and then to reboot in less than a second.  If the
511 	 * hostid has not been set, then the current high resolution
512 	 * time is used.  This will ensure different verifiers each
513 	 * time the server reboots and minimize the chances that two
514 	 * different servers will have the same verifier.
515 	 * XXX - this is broken on LP64 kernels.
516 	 */
517 	verf.tv_sec = (time_t)nfs_atoi(hw_serial);
518 	if (verf.tv_sec != 0) {
519 		verf.tv_nsec = gethrestime_sec();
520 	} else {
521 		timespec_t tverf;
522 
523 		gethrestime(&tverf);
524 		verf.tv_sec = (time_t)tverf.tv_sec;
525 		verf.tv_nsec = tverf.tv_nsec;
526 	}
527 
528 	Write4verf = *(uint64_t *)&verf;
529 
530 	rfs4_attr_init();
531 	mutex_init(&rfs4_deleg_lock, NULL, MUTEX_DEFAULT, NULL);
532 
533 	/* Used to manage create/destroy of server state */
534 	mutex_init(&rfs4_state_lock, NULL, MUTEX_DEFAULT, NULL);
535 
536 	/* Used to manage access to server instance linked list */
537 	mutex_init(&rfs4_servinst_lock, NULL, MUTEX_DEFAULT, NULL);
538 
539 	/* Used to manage access to rfs4_deleg_policy */
540 	rw_init(&rfs4_deleg_policy_lock, NULL, RW_DEFAULT, NULL);
541 
542 	error = fem_create("deleg_rdops", nfs4_rd_deleg_tmpl, &deleg_rdops);
543 	if (error != 0) {
544 		rfs4_disable_delegation();
545 	} else {
546 		error = fem_create("deleg_wrops", nfs4_wr_deleg_tmpl,
547 				&deleg_wrops);
548 		if (error != 0) {
549 			rfs4_disable_delegation();
550 			fem_free(deleg_rdops);
551 		}
552 	}
553 
554 	nfs4_srv_caller_id = fs_new_caller_id();
555 
556 	lockt_sysid = lm_alloc_sysidt();
557 
558 	return (0);
559 }
560 
561 void
562 rfs4_srvrfini(void)
563 {
564 	extern krwlock_t rfs4_deleg_policy_lock;
565 
566 	if (lockt_sysid != LM_NOSYSID) {
567 		lm_free_sysidt(lockt_sysid);
568 		lockt_sysid = LM_NOSYSID;
569 	}
570 
571 	mutex_destroy(&rfs4_deleg_lock);
572 	mutex_destroy(&rfs4_state_lock);
573 	rw_destroy(&rfs4_deleg_policy_lock);
574 
575 	fem_free(deleg_rdops);
576 	fem_free(deleg_wrops);
577 }
578 
579 void
580 rfs4_init_compound_state(struct compound_state *cs)
581 {
582 	bzero(cs, sizeof (*cs));
583 	cs->cont = TRUE;
584 	cs->access = CS_ACCESS_DENIED;
585 	cs->deleg = FALSE;
586 	cs->mandlock = FALSE;
587 	cs->fh.nfs_fh4_val = cs->fhbuf;
588 }
589 
590 void
591 rfs4_grace_start(rfs4_servinst_t *sip)
592 {
593 	time_t now = gethrestime_sec();
594 
595 	NFS4_DEBUG(rfs4_servinst_debug, (CE_NOTE,
596 	    "rfs4_grace_start: inst %p: 0x%lx", (void *)sip, now));
597 
598 	rw_enter(&sip->rwlock, RW_WRITER);
599 	sip->start_time = now;
600 	sip->grace_period = rfs4_grace_period;
601 	rw_exit(&sip->rwlock);
602 }
603 
604 /*
605  * returns true if the instance's grace period has never been started
606  */
607 int
608 rfs4_servinst_grace_new(rfs4_servinst_t *sip)
609 {
610 	time_t start_time;
611 
612 	rw_enter(&sip->rwlock, RW_READER);
613 	start_time = sip->start_time;
614 	rw_exit(&sip->rwlock);
615 
616 	return (start_time == 0);
617 }
618 
619 /*
620  * Indicates if server instance is within the
621  * grace period.
622  */
623 int
624 rfs4_servinst_in_grace(rfs4_servinst_t *sip)
625 {
626 	time_t grace_expiry;
627 
628 	rw_enter(&sip->rwlock, RW_READER);
629 	grace_expiry = sip->start_time + sip->grace_period;
630 	rw_exit(&sip->rwlock);
631 
632 	return (gethrestime_sec() < grace_expiry);
633 }
634 
635 int
636 rfs4_clnt_in_grace(rfs4_client_t *cp)
637 {
638 	ASSERT(rfs4_dbe_refcnt(cp->dbe) > 0);
639 
640 	return (rfs4_servinst_in_grace(cp->server_instance));
641 }
642 
643 /*
644  * reset all currently active grace periods
645  */
646 void
647 rfs4_grace_reset_all(void)
648 {
649 #ifdef DEBUG
650 	int n = 0;
651 #endif
652 	rfs4_servinst_t *sip;
653 
654 	mutex_enter(&rfs4_servinst_lock);
655 	for (sip = rfs4_cur_servinst; sip != NULL; sip = sip->prev) {
656 		if (rfs4_servinst_in_grace(sip)) {
657 			rfs4_grace_start(sip);
658 #ifdef DEBUG
659 			n++;
660 #endif
661 		}
662 	}
663 	mutex_exit(&rfs4_servinst_lock);
664 
665 	NFS4_DEBUG(rfs4_servinst_debug, (CE_NOTE,
666 	    "rfs4_grace_reset_all: reset %d instances", n));
667 }
668 
669 /*
670  * start any new instances' grace periods
671  */
672 void
673 rfs4_grace_start_new(void)
674 {
675 #ifdef DEBUG
676 	int n = 0;
677 #endif
678 	rfs4_servinst_t *sip;
679 
680 	mutex_enter(&rfs4_servinst_lock);
681 	for (sip = rfs4_cur_servinst; sip != NULL; sip = sip->prev) {
682 		if (rfs4_servinst_grace_new(sip))
683 			rfs4_grace_start(sip);
684 #ifdef DEBUG
685 		n++;
686 #endif
687 	}
688 	mutex_exit(&rfs4_servinst_lock);
689 
690 	NFS4_DEBUG(rfs4_servinst_debug, (CE_NOTE,
691 	    "rfs4_grace_start_new: started %d new instances", n));
692 }
693 
694 /*
695  * Create a new server instance, and make it the currently active instance.
696  * Note that starting the grace period too early will reduce the clients'
697  * recovery window.
698  */
699 void
700 rfs4_servinst_create(int start_grace)
701 {
702 	rfs4_servinst_t *sip;
703 
704 	sip = kmem_alloc(sizeof (rfs4_servinst_t), KM_SLEEP);
705 	rw_init(&sip->rwlock, NULL, RW_DEFAULT, NULL);
706 
707 	sip->start_time = (time_t)0;
708 	sip->grace_period = (time_t)0;
709 	sip->next = NULL;
710 	sip->prev = NULL;
711 
712 	mutex_enter(&rfs4_servinst_lock);
713 	if (rfs4_cur_servinst == NULL) {
714 		NFS4_DEBUG(rfs4_servinst_debug, (CE_NOTE,
715 		    "rfs4_servinst_create: creating first instance"));
716 	} else {
717 		/* add to linked list */
718 		sip->prev = rfs4_cur_servinst;
719 		rfs4_cur_servinst->next = sip;
720 	}
721 	if (start_grace)
722 		rfs4_grace_start(sip);
723 	/* make the new instance "current" */
724 	rfs4_cur_servinst = sip;
725 	mutex_exit(&rfs4_servinst_lock);
726 
727 	NFS4_DEBUG(rfs4_servinst_debug, (CE_NOTE,
728 	    "rfs4_servinst_create: new current instance: %p; start_grace: %d",
729 	    (void *)sip, start_grace));
730 }
731 
732 /*
733  * In future, we might add a rfs4_servinst_destroy(sip) but, for now, destroy
734  * all instances directly.
735  */
736 void
737 rfs4_servinst_destroy_all(void)
738 {
739 	rfs4_servinst_t *sip, *prev, *current;
740 #ifdef DEBUG
741 	int n = 0;
742 #endif
743 
744 	mutex_enter(&rfs4_servinst_lock);
745 	ASSERT(rfs4_cur_servinst != NULL);
746 	current = rfs4_cur_servinst;
747 	rfs4_cur_servinst = NULL;
748 	for (sip = current; sip != NULL; sip = prev) {
749 		prev = sip->prev;
750 		rw_destroy(&sip->rwlock);
751 		kmem_free(sip, sizeof (rfs4_servinst_t));
752 #ifdef DEBUG
753 		n++;
754 #endif
755 	}
756 	mutex_exit(&rfs4_servinst_lock);
757 
758 	NFS4_DEBUG(rfs4_servinst_debug, (CE_NOTE,
759 	    "rfs4_servinst_destroy_all: destroyed %d instances", n));
760 }
761 
762 /*
763  * Assign the current server instance to a client_t.
764  * Should be called with cp->dbe held.
765  */
766 void
767 rfs4_servinst_assign(rfs4_client_t *cp, rfs4_servinst_t *sip)
768 {
769 	ASSERT(rfs4_dbe_refcnt(cp->dbe) > 0);
770 
771 	NFS4_DEBUG(rfs4_servinst_debug, (CE_NOTE,
772 	    "rfs4_servinst_assign: client: %p, old: %p, new: %p", (void *)cp,
773 	    (void *)cp->server_instance, (void *)sip));
774 
775 	/*
776 	 * The lock ensures that if the current instance is in the process
777 	 * of changing, we will see the new one.
778 	 */
779 	mutex_enter(&rfs4_servinst_lock);
780 	cp->server_instance = sip;
781 	mutex_exit(&rfs4_servinst_lock);
782 }
783 
784 rfs4_servinst_t *
785 rfs4_servinst(rfs4_client_t *cp)
786 {
787 	ASSERT(rfs4_dbe_refcnt(cp->dbe) > 0);
788 
789 	return (cp->server_instance);
790 }
791 
792 /* ARGSUSED */
793 static void
794 nullfree(caddr_t resop)
795 {
796 }
797 
798 /*
799  * This is a fall-through for invalid or not implemented (yet) ops
800  */
801 /* ARGSUSED */
802 static void
803 rfs4_op_inval(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
804 	struct compound_state *cs)
805 {
806 	*cs->statusp = *((nfsstat4 *)&(resop)->nfs_resop4_u) = NFS4ERR_INVAL;
807 }
808 
809 /*
810  * Check if the security flavor, nfsnum, is in the flavor_list.
811  */
812 bool_t
813 in_flavor_list(int nfsnum, int *flavor_list, int count)
814 {
815 	int i;
816 
817 	for (i = 0; i < count; i++) {
818 		if (nfsnum == flavor_list[i])
819 			return (TRUE);
820 	}
821 	return (FALSE);
822 }
823 
824 /*
825  * Used by rfs4_op_secinfo to get the security information from the
826  * export structure associated with the component.
827  */
828 /* ARGSUSED */
829 static nfsstat4
830 do_rfs4_op_secinfo(struct compound_state *cs, char *nm, SECINFO4res *resp)
831 {
832 	int error, different_export = 0;
833 	vnode_t *dvp, *vp, *tvp;
834 	struct exportinfo *exi = NULL;
835 	fid_t fid;
836 	uint_t count, i;
837 	secinfo4 *resok_val;
838 	struct secinfo *secp;
839 	bool_t did_traverse;
840 	int dotdot, walk;
841 
842 	dvp = cs->vp;
843 	dotdot = (nm[0] == '.' && nm[1] == '.' && nm[2] == '\0');
844 
845 	/*
846 	 * If dotdotting, then need to check whether it's above the
847 	 * root of a filesystem, or above an export point.
848 	 */
849 	if (dotdot) {
850 
851 		/*
852 		 * If dotdotting at the root of a filesystem, then
853 		 * need to traverse back to the mounted-on filesystem
854 		 * and do the dotdot lookup there.
855 		 */
856 		if (cs->vp->v_flag & VROOT) {
857 
858 			/*
859 			 * If at the system root, then can
860 			 * go up no further.
861 			 */
862 			if (VN_CMP(dvp, rootdir))
863 				return (puterrno4(ENOENT));
864 
865 			/*
866 			 * Traverse back to the mounted-on filesystem
867 			 */
868 			dvp = untraverse(cs->vp);
869 
870 			/*
871 			 * Set the different_export flag so we remember
872 			 * to pick up a new exportinfo entry for
873 			 * this new filesystem.
874 			 */
875 			different_export = 1;
876 		} else {
877 
878 			/*
879 			 * If dotdotting above an export point then set
880 			 * the different_export to get new export info.
881 			 */
882 			different_export = nfs_exported(cs->exi, cs->vp);
883 		}
884 	}
885 
886 	/*
887 	 * Get the vnode for the component "nm".
888 	 */
889 	error = VOP_LOOKUP(dvp, nm, &vp, NULL, 0, NULL, cs->cr);
890 	if (error)
891 		return (puterrno4(error));
892 
893 	/*
894 	 * If the vnode is in a pseudo filesystem, or if the security flavor
895 	 * used in the request is valid but not an explicitly shared flavor,
896 	 * or the access bit indicates that this is a limited access,
897 	 * check whether this vnode is visible.
898 	 */
899 	if (!different_export &&
900 	    (PSEUDO(cs->exi) || ! is_exported_sec(cs->nfsflavor, cs->exi) ||
901 	    cs->access & CS_ACCESS_LIMITED)) {
902 		if (! nfs_visible(cs->exi, vp, &different_export)) {
903 			VN_RELE(vp);
904 			return (puterrno4(ENOENT));
905 		}
906 	}
907 
908 	/*
909 	 * If it's a mountpoint, then traverse it.
910 	 */
911 	if (vn_ismntpt(vp)) {
912 		tvp = vp;
913 		if ((error = traverse(&tvp)) != 0) {
914 			VN_RELE(vp);
915 			return (puterrno4(error));
916 		}
917 		/* remember that we had to traverse mountpoint */
918 		did_traverse = TRUE;
919 		vp = tvp;
920 		different_export = 1;
921 	} else if (vp->v_vfsp != dvp->v_vfsp) {
922 		/*
923 		 * If vp isn't a mountpoint and the vfs ptrs aren't the same,
924 		 * then vp is probably an LOFS object.  We don't need the
925 		 * realvp, we just need to know that we might have crossed
926 		 * a server fs boundary and need to call checkexport4.
927 		 * (LOFS lookup hides server fs mountpoints, and actually calls
928 		 * traverse)
929 		 */
930 		different_export = 1;
931 		did_traverse = FALSE;
932 	}
933 
934 	/*
935 	 * Get the export information for it.
936 	 */
937 	if (different_export) {
938 
939 		bzero(&fid, sizeof (fid));
940 		fid.fid_len = MAXFIDSZ;
941 		error = vop_fid_pseudo(vp, &fid);
942 		if (error) {
943 			VN_RELE(vp);
944 			return (puterrno4(error));
945 		}
946 
947 		if (dotdot)
948 			exi = nfs_vptoexi(NULL, vp, cs->cr, &walk, NULL, TRUE);
949 		else
950 			exi = checkexport4(&vp->v_vfsp->vfs_fsid, &fid, vp);
951 
952 		if (exi == NULL) {
953 			if (did_traverse == TRUE) {
954 				/*
955 				 * If this vnode is a mounted-on vnode,
956 				 * but the mounted-on file system is not
957 				 * exported, send back the secinfo for
958 				 * the exported node that the mounted-on
959 				 * vnode lives in.
960 				 */
961 				exi = cs->exi;
962 			} else {
963 				VN_RELE(vp);
964 				return (puterrno4(EACCES));
965 			}
966 		}
967 	} else {
968 		exi = cs->exi;
969 	}
970 	ASSERT(exi != NULL);
971 
972 
973 	/*
974 	 * Create the secinfo result based on the security information
975 	 * from the exportinfo structure (exi).
976 	 *
977 	 * Return all flavors for a pseudo node.
978 	 * For a real export node, return the flavor that the client
979 	 * has access with.
980 	 */
981 	ASSERT(RW_LOCK_HELD(&exported_lock));
982 	if (PSEUDO(exi)) {
983 		count = exi->exi_export.ex_seccnt; /* total sec count */
984 		resok_val = kmem_alloc(count * sizeof (secinfo4), KM_SLEEP);
985 		secp = exi->exi_export.ex_secinfo;
986 
987 		for (i = 0; i < count; i++) {
988 		    resok_val[i].flavor = secp[i].s_secinfo.sc_rpcnum;
989 		    if (resok_val[i].flavor == RPCSEC_GSS) {
990 			rpcsec_gss_info *info;
991 
992 			info = &resok_val[i].flavor_info;
993 			info->qop = secp[i].s_secinfo.sc_qop;
994 			info->service =
995 				(rpc_gss_svc_t)secp[i].s_secinfo.sc_service;
996 
997 			/* get oid opaque data */
998 			info->oid.sec_oid4_len =
999 				secp[i].s_secinfo.sc_gss_mech_type->length;
1000 			info->oid.sec_oid4_val =
1001 				kmem_alloc(
1002 				    secp[i].s_secinfo.sc_gss_mech_type->length,
1003 				    KM_SLEEP);
1004 			bcopy(secp[i].s_secinfo.sc_gss_mech_type->elements,
1005 				info->oid.sec_oid4_val, info->oid.sec_oid4_len);
1006 		    }
1007 		}
1008 		resp->SECINFO4resok_len = count;
1009 		resp->SECINFO4resok_val = resok_val;
1010 	} else {
1011 		int ret_cnt = 0, k = 0;
1012 		int *flavor_list;
1013 
1014 		count = exi->exi_export.ex_seccnt; /* total sec count */
1015 		secp = exi->exi_export.ex_secinfo;
1016 
1017 		flavor_list = kmem_alloc(count * sizeof (int), KM_SLEEP);
1018 		/* find out which flavors to return */
1019 		for (i = 0; i < count; i ++) {
1020 			int access, flavor, perm;
1021 
1022 			flavor = secp[i].s_secinfo.sc_nfsnum;
1023 			perm = secp[i].s_flags;
1024 
1025 			access = nfsauth4_secinfo_access(exi, cs->req,
1026 						flavor, perm);
1027 
1028 			if (! (access & NFSAUTH_DENIED) &&
1029 			    ! (access & NFSAUTH_WRONGSEC)) {
1030 				flavor_list[ret_cnt] = flavor;
1031 				ret_cnt++;
1032 			}
1033 		}
1034 
1035 		/* Create the returning SECINFO value */
1036 		resok_val = kmem_alloc(ret_cnt * sizeof (secinfo4), KM_SLEEP);
1037 
1038 		for (i = 0; i < count; i++) {
1039 		/* If the flavor is in the flavor list, fill in resok_val. */
1040 		    if (in_flavor_list(secp[i].s_secinfo.sc_nfsnum,
1041 						flavor_list, ret_cnt)) {
1042 			resok_val[k].flavor = secp[i].s_secinfo.sc_rpcnum;
1043 			if (resok_val[k].flavor == RPCSEC_GSS) {
1044 			    rpcsec_gss_info *info;
1045 
1046 			    info = &resok_val[k].flavor_info;
1047 			    info->qop = secp[i].s_secinfo.sc_qop;
1048 			    info->service =
1049 				(rpc_gss_svc_t)secp[i].s_secinfo.sc_service;
1050 
1051 			    /* get oid opaque data */
1052 			    info->oid.sec_oid4_len =
1053 				secp[i].s_secinfo.sc_gss_mech_type->length;
1054 			    info->oid.sec_oid4_val =
1055 				kmem_alloc(
1056 				    secp[i].s_secinfo.sc_gss_mech_type->length,
1057 				    KM_SLEEP);
1058 			    bcopy(secp[i].s_secinfo.sc_gss_mech_type->elements,
1059 				info->oid.sec_oid4_val, info->oid.sec_oid4_len);
1060 			}
1061 			k++;
1062 		    }
1063 		    if (k >= ret_cnt)
1064 			break;
1065 		}
1066 		resp->SECINFO4resok_len = ret_cnt;
1067 		resp->SECINFO4resok_val = resok_val;
1068 		kmem_free(flavor_list, count * sizeof (int));
1069 	}
1070 
1071 	VN_RELE(vp);
1072 	return (NFS4_OK);
1073 }
1074 
1075 /*
1076  * SECINFO (Operation 33): Obtain required security information on
1077  * the component name in the format of (security-mechanism-oid, qop, service)
1078  * triplets.
1079  */
1080 /* ARGSUSED */
1081 static void
1082 rfs4_op_secinfo(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1083 	struct compound_state *cs)
1084 {
1085 	SECINFO4res *resp = &resop->nfs_resop4_u.opsecinfo;
1086 	utf8string *utfnm = &argop->nfs_argop4_u.opsecinfo.name;
1087 	uint_t len;
1088 	char *nm;
1089 
1090 	/*
1091 	 * Current file handle (cfh) should have been set before getting
1092 	 * into this function. If not, return error.
1093 	 */
1094 	if (cs->vp == NULL) {
1095 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
1096 		return;
1097 	}
1098 
1099 	if (cs->vp->v_type != VDIR) {
1100 		*cs->statusp = resp->status = NFS4ERR_NOTDIR;
1101 		return;
1102 	}
1103 
1104 	/*
1105 	 * Verify the component name. If failed, error out, but
1106 	 * do not error out if the component name is a "..".
1107 	 * SECINFO will return its parents secinfo data for SECINFO "..".
1108 	 */
1109 	if (!utf8_dir_verify(utfnm)) {
1110 		if (utfnm->utf8string_len != 2 ||
1111 				utfnm->utf8string_val[0] != '.' ||
1112 				utfnm->utf8string_val[1] != '.') {
1113 			*cs->statusp = resp->status = NFS4ERR_INVAL;
1114 			return;
1115 		}
1116 	}
1117 
1118 	nm = utf8_to_str(utfnm, &len, NULL);
1119 	if (nm == NULL) {
1120 		*cs->statusp = resp->status = NFS4ERR_INVAL;
1121 		return;
1122 	}
1123 
1124 	if (len > MAXNAMELEN) {
1125 		*cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
1126 		kmem_free(nm, len);
1127 		return;
1128 	}
1129 
1130 	*cs->statusp = resp->status = do_rfs4_op_secinfo(cs, nm, resp);
1131 
1132 	kmem_free(nm, len);
1133 }
1134 
1135 /*
1136  * Free SECINFO result.
1137  */
1138 /* ARGSUSED */
1139 static void
1140 rfs4_op_secinfo_free(nfs_resop4 *resop)
1141 {
1142 	SECINFO4res *resp = &resop->nfs_resop4_u.opsecinfo;
1143 	int count, i;
1144 	secinfo4 *resok_val;
1145 
1146 	/* If this is not an Ok result, nothing to free. */
1147 	if (resp->status != NFS4_OK) {
1148 		return;
1149 	}
1150 
1151 	count = resp->SECINFO4resok_len;
1152 	resok_val = resp->SECINFO4resok_val;
1153 
1154 	for (i = 0; i < count; i++) {
1155 	    if (resok_val[i].flavor == RPCSEC_GSS) {
1156 		rpcsec_gss_info *info;
1157 
1158 		info = &resok_val[i].flavor_info;
1159 		kmem_free(info->oid.sec_oid4_val, info->oid.sec_oid4_len);
1160 	    }
1161 	}
1162 	kmem_free(resok_val, count * sizeof (secinfo4));
1163 	resp->SECINFO4resok_len = 0;
1164 	resp->SECINFO4resok_val = NULL;
1165 }
1166 
1167 /* ARGSUSED */
1168 static void
1169 rfs4_op_access(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1170 	struct compound_state *cs)
1171 {
1172 	ACCESS4args *args = &argop->nfs_argop4_u.opaccess;
1173 	ACCESS4res *resp = &resop->nfs_resop4_u.opaccess;
1174 	int error;
1175 	vnode_t *vp;
1176 	struct vattr va;
1177 	int checkwriteperm;
1178 	cred_t *cr = cs->cr;
1179 
1180 #if 0	/* XXX allow access even if !cs->access. Eventually only pseudo fs */
1181 	if (cs->access == CS_ACCESS_DENIED) {
1182 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
1183 		return;
1184 	}
1185 #endif
1186 	if (cs->vp == NULL) {
1187 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
1188 		return;
1189 	}
1190 
1191 	ASSERT(cr != NULL);
1192 
1193 	vp = cs->vp;
1194 
1195 	/*
1196 	 * If the file system is exported read only, it is not appropriate
1197 	 * to check write permissions for regular files and directories.
1198 	 * Special files are interpreted by the client, so the underlying
1199 	 * permissions are sent back to the client for interpretation.
1200 	 */
1201 	if (rdonly4(cs->exi, cs->vp, req) &&
1202 		(vp->v_type == VREG || vp->v_type == VDIR))
1203 		checkwriteperm = 0;
1204 	else
1205 		checkwriteperm = 1;
1206 
1207 	/*
1208 	 * XXX
1209 	 * We need the mode so that we can correctly determine access
1210 	 * permissions relative to a mandatory lock file.  Access to
1211 	 * mandatory lock files is denied on the server, so it might
1212 	 * as well be reflected to the server during the open.
1213 	 */
1214 	va.va_mask = AT_MODE;
1215 	error = VOP_GETATTR(vp, &va, 0, cr);
1216 	if (error) {
1217 		*cs->statusp = resp->status = puterrno4(error);
1218 		return;
1219 	}
1220 
1221 	resp->access = 0;
1222 	resp->supported = 0;
1223 
1224 	if (args->access & ACCESS4_READ) {
1225 		error = VOP_ACCESS(vp, VREAD, 0, cr);
1226 		if (!error && !MANDLOCK(vp, va.va_mode))
1227 			resp->access |= ACCESS4_READ;
1228 		resp->supported |= ACCESS4_READ;
1229 	}
1230 	if ((args->access & ACCESS4_LOOKUP) && vp->v_type == VDIR) {
1231 		error = VOP_ACCESS(vp, VEXEC, 0, cr);
1232 		if (!error)
1233 			resp->access |= ACCESS4_LOOKUP;
1234 		resp->supported |= ACCESS4_LOOKUP;
1235 	}
1236 	if (checkwriteperm &&
1237 	    (args->access & (ACCESS4_MODIFY|ACCESS4_EXTEND))) {
1238 		error = VOP_ACCESS(vp, VWRITE, 0, cr);
1239 		if (!error && !MANDLOCK(vp, va.va_mode))
1240 			resp->access |=
1241 			    (args->access & (ACCESS4_MODIFY|ACCESS4_EXTEND));
1242 		resp->supported |= (ACCESS4_MODIFY|ACCESS4_EXTEND);
1243 	}
1244 
1245 	if (checkwriteperm &&
1246 	    (args->access & ACCESS4_DELETE) && vp->v_type == VDIR) {
1247 		error = VOP_ACCESS(vp, VWRITE, 0, cr);
1248 		if (!error)
1249 			resp->access |= ACCESS4_DELETE;
1250 		resp->supported |= ACCESS4_DELETE;
1251 	}
1252 	if (args->access & ACCESS4_EXECUTE && vp->v_type != VDIR) {
1253 		error = VOP_ACCESS(vp, VEXEC, 0, cr);
1254 		if (!error && !MANDLOCK(vp, va.va_mode))
1255 			resp->access |= ACCESS4_EXECUTE;
1256 		resp->supported |= ACCESS4_EXECUTE;
1257 	}
1258 
1259 	*cs->statusp = resp->status = NFS4_OK;
1260 }
1261 
1262 /* ARGSUSED */
1263 static void
1264 rfs4_op_commit(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1265 	struct compound_state *cs)
1266 {
1267 	COMMIT4args *args = &argop->nfs_argop4_u.opcommit;
1268 	COMMIT4res *resp = &resop->nfs_resop4_u.opcommit;
1269 	int error;
1270 	vnode_t *vp = cs->vp;
1271 	cred_t *cr = cs->cr;
1272 	vattr_t va;
1273 
1274 	if (vp == NULL) {
1275 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
1276 		return;
1277 	}
1278 	if (cs->access == CS_ACCESS_DENIED) {
1279 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
1280 		return;
1281 	}
1282 
1283 	if (args->offset + args->count < args->offset) {
1284 		*cs->statusp = resp->status = NFS4ERR_INVAL;
1285 		return;
1286 	}
1287 
1288 	va.va_mask = AT_UID;
1289 	error = VOP_GETATTR(vp, &va, 0, cr);
1290 
1291 	/*
1292 	 * If we can't get the attributes, then we can't do the
1293 	 * right access checking.  So, we'll fail the request.
1294 	 */
1295 	if (error) {
1296 		*cs->statusp = resp->status = puterrno4(error);
1297 		return;
1298 	}
1299 	if (rdonly4(cs->exi, cs->vp, req)) {
1300 		*cs->statusp = resp->status = NFS4ERR_ROFS;
1301 		return;
1302 	}
1303 
1304 	if (vp->v_type != VREG) {
1305 		if (vp->v_type == VDIR)
1306 			resp->status = NFS4ERR_ISDIR;
1307 		else
1308 			resp->status = NFS4ERR_INVAL;
1309 		*cs->statusp = resp->status;
1310 		return;
1311 	}
1312 
1313 	if (crgetuid(cr) != va.va_uid &&
1314 	    (error = VOP_ACCESS(vp, VWRITE, 0, cs->cr))) {
1315 		*cs->statusp = resp->status = puterrno4(error);
1316 		return;
1317 	}
1318 
1319 	error = VOP_PUTPAGE(vp, args->offset, args->count, 0, cr);
1320 	if (!error)
1321 		error = VOP_FSYNC(vp, FNODSYNC, cr);
1322 
1323 	if (error) {
1324 		*cs->statusp = resp->status = puterrno4(error);
1325 		return;
1326 	}
1327 
1328 	*cs->statusp = resp->status = NFS4_OK;
1329 	resp->writeverf = Write4verf;
1330 }
1331 
1332 /*
1333  * rfs4_op_mknod is called from rfs4_op_create after all initial verification
1334  * was completed. It does the nfsv4 create for special files.
1335  */
1336 /* ARGSUSED */
1337 static vnode_t *
1338 do_rfs4_op_mknod(CREATE4args *args, CREATE4res *resp, struct svc_req *req,
1339 	struct compound_state *cs, vattr_t *vap, char *nm)
1340 {
1341 	int error;
1342 	cred_t *cr = cs->cr;
1343 	vnode_t *dvp = cs->vp;
1344 	vnode_t *vp = NULL;
1345 	int mode;
1346 	enum vcexcl excl;
1347 
1348 	switch (args->type) {
1349 	case NF4CHR:
1350 	case NF4BLK:
1351 		if (secpolicy_sys_devices(cr) != 0) {
1352 			*cs->statusp = resp->status = NFS4ERR_PERM;
1353 			return (NULL);
1354 		}
1355 		if (args->type == NF4CHR)
1356 			vap->va_type = VCHR;
1357 		else
1358 			vap->va_type = VBLK;
1359 		vap->va_rdev = makedevice(args->ftype4_u.devdata.specdata1,
1360 					args->ftype4_u.devdata.specdata2);
1361 		vap->va_mask |= AT_RDEV;
1362 		break;
1363 	case NF4SOCK:
1364 		vap->va_type = VSOCK;
1365 		break;
1366 	case NF4FIFO:
1367 		vap->va_type = VFIFO;
1368 		break;
1369 	default:
1370 		*cs->statusp = resp->status = NFS4ERR_BADTYPE;
1371 		return (NULL);
1372 	}
1373 
1374 	/*
1375 	 * Must specify the mode.
1376 	 */
1377 	if (!(vap->va_mask & AT_MODE)) {
1378 		*cs->statusp = resp->status = NFS4ERR_INVAL;
1379 		return (NULL);
1380 	}
1381 
1382 	excl = EXCL;
1383 
1384 	mode = 0;
1385 
1386 	error = VOP_CREATE(dvp, nm, vap, excl, mode, &vp, cr, 0);
1387 	if (error) {
1388 		*cs->statusp = resp->status = puterrno4(error);
1389 		return (NULL);
1390 	}
1391 	return (vp);
1392 }
1393 
1394 /*
1395  * nfsv4 create is used to create non-regular files. For regular files,
1396  * use nfsv4 open.
1397  */
1398 /* ARGSUSED */
1399 static void
1400 rfs4_op_create(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1401 	struct compound_state *cs)
1402 {
1403 	CREATE4args *args = &argop->nfs_argop4_u.opcreate;
1404 	CREATE4res *resp = &resop->nfs_resop4_u.opcreate;
1405 	int error;
1406 	struct vattr bva, iva, iva2, ava, *vap;
1407 	cred_t *cr = cs->cr;
1408 	vnode_t *dvp = cs->vp;
1409 	vnode_t *vp = NULL;
1410 	char *nm, *lnm;
1411 	uint_t len, llen;
1412 	int syncval = 0;
1413 	struct nfs4_svgetit_arg sarg;
1414 	struct nfs4_ntov_table ntov;
1415 	struct statvfs64 sb;
1416 	nfsstat4 status;
1417 
1418 	resp->attrset = 0;
1419 
1420 	if (dvp == NULL) {
1421 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
1422 		return;
1423 	}
1424 
1425 	/*
1426 	 * If there is an unshared filesystem mounted on this vnode,
1427 	 * do not allow to create an object in this directory.
1428 	 */
1429 	if (vn_ismntpt(dvp)) {
1430 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
1431 		return;
1432 	}
1433 
1434 	/* Verify that type is correct */
1435 	switch (args->type) {
1436 	case NF4LNK:
1437 	case NF4BLK:
1438 	case NF4CHR:
1439 	case NF4SOCK:
1440 	case NF4FIFO:
1441 	case NF4DIR:
1442 		break;
1443 	default:
1444 		*cs->statusp = resp->status = NFS4ERR_BADTYPE;
1445 		return;
1446 	};
1447 
1448 	if (cs->access == CS_ACCESS_DENIED) {
1449 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
1450 		return;
1451 	}
1452 	if (dvp->v_type != VDIR) {
1453 		*cs->statusp = resp->status = NFS4ERR_NOTDIR;
1454 		return;
1455 	}
1456 	if (!utf8_dir_verify(&args->objname)) {
1457 		*cs->statusp = resp->status = NFS4ERR_INVAL;
1458 		return;
1459 	}
1460 
1461 	if (rdonly4(cs->exi, cs->vp, req)) {
1462 		*cs->statusp = resp->status = NFS4ERR_ROFS;
1463 		return;
1464 	}
1465 
1466 	/*
1467 	 * Name of newly created object
1468 	 */
1469 	nm = utf8_to_fn(&args->objname, &len, NULL);
1470 	if (nm == NULL) {
1471 		*cs->statusp = resp->status = NFS4ERR_INVAL;
1472 		return;
1473 	}
1474 
1475 	if (len > MAXNAMELEN) {
1476 		*cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
1477 		kmem_free(nm, len);
1478 		return;
1479 	}
1480 
1481 	resp->attrset = 0;
1482 
1483 	sarg.sbp = &sb;
1484 	nfs4_ntov_table_init(&ntov);
1485 
1486 	status = do_rfs4_set_attrs(&resp->attrset,
1487 					&args->createattrs, cs, &sarg,
1488 					&ntov, NFS4ATTR_SETIT);
1489 
1490 	if (sarg.vap->va_mask == 0 && status == NFS4_OK)
1491 		status = NFS4ERR_INVAL;
1492 
1493 	if (status != NFS4_OK) {
1494 		*cs->statusp = resp->status = status;
1495 		kmem_free(nm, len);
1496 		nfs4_ntov_table_free(&ntov, &sarg);
1497 		resp->attrset = 0;
1498 		return;
1499 	}
1500 
1501 	/* Get "before" change value */
1502 	bva.va_mask = AT_CTIME|AT_SEQ;
1503 	error = VOP_GETATTR(dvp, &bva, 0, cr);
1504 	if (error) {
1505 		*cs->statusp = resp->status = puterrno4(error);
1506 		kmem_free(nm, len);
1507 		nfs4_ntov_table_free(&ntov, &sarg);
1508 		resp->attrset = 0;
1509 		return;
1510 	}
1511 	NFS4_SET_FATTR4_CHANGE(resp->cinfo.before, bva.va_ctime)
1512 
1513 	vap = sarg.vap;
1514 
1515 	/*
1516 	 * Set default initial values for attributes when not specified
1517 	 * in createattrs.
1518 	 */
1519 	if ((vap->va_mask & AT_UID) == 0) {
1520 		vap->va_uid = crgetuid(cr);
1521 		vap->va_mask |= AT_UID;
1522 	}
1523 	if ((vap->va_mask & AT_GID) == 0) {
1524 		vap->va_gid = crgetgid(cr);
1525 		vap->va_mask |= AT_GID;
1526 	}
1527 
1528 	vap->va_mask |= AT_TYPE;
1529 	switch (args->type) {
1530 	case NF4DIR:
1531 		vap->va_type = VDIR;
1532 		if ((vap->va_mask & AT_MODE) == 0) {
1533 			vap->va_mode = 0700;	/* default: owner rwx only */
1534 			vap->va_mask |= AT_MODE;
1535 		}
1536 		error = VOP_MKDIR(dvp, nm, vap, &vp, cr);
1537 		if (error)
1538 			break;
1539 
1540 		/*
1541 		 * Get the initial "after" sequence number, if it fails,
1542 		 * set to zero
1543 		 */
1544 		iva.va_mask = AT_SEQ;
1545 		if (VOP_GETATTR(dvp, &iva, 0, cs->cr))
1546 			iva.va_seq = 0;
1547 		break;
1548 	case NF4LNK:
1549 		vap->va_type = VLNK;
1550 		if ((vap->va_mask & AT_MODE) == 0) {
1551 			vap->va_mode = 0700;	/* default: owner rwx only */
1552 			vap->va_mask |= AT_MODE;
1553 		}
1554 
1555 		/*
1556 		 * symlink names must be treated as data
1557 		 */
1558 		lnm = utf8_to_str(&args->ftype4_u.linkdata, &llen, NULL);
1559 
1560 		if (lnm == NULL) {
1561 			*cs->statusp = resp->status = NFS4ERR_INVAL;
1562 			kmem_free(nm, len);
1563 			nfs4_ntov_table_free(&ntov, &sarg);
1564 			resp->attrset = 0;
1565 			return;
1566 		}
1567 
1568 		if (llen > MAXPATHLEN) {
1569 			*cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
1570 			kmem_free(nm, len);
1571 			kmem_free(lnm, llen);
1572 			nfs4_ntov_table_free(&ntov, &sarg);
1573 			resp->attrset = 0;
1574 			return;
1575 		}
1576 
1577 		error = VOP_SYMLINK(dvp, nm, vap, lnm, cr);
1578 		if (lnm != NULL)
1579 			kmem_free(lnm, llen);
1580 		if (error)
1581 			break;
1582 
1583 		/*
1584 		 * Get the initial "after" sequence number, if it fails,
1585 		 * set to zero
1586 		 */
1587 		iva.va_mask = AT_SEQ;
1588 		if (VOP_GETATTR(dvp, &iva, 0, cs->cr))
1589 			iva.va_seq = 0;
1590 
1591 		error = VOP_LOOKUP(dvp, nm, &vp, NULL, 0, NULL, cr);
1592 		if (error)
1593 			break;
1594 
1595 		/*
1596 		 * va_seq is not safe over VOP calls, check it again
1597 		 * if it has changed zero out iva to force atomic = FALSE.
1598 		 */
1599 		iva2.va_mask = AT_SEQ;
1600 		if (VOP_GETATTR(dvp, &iva2, 0, cs->cr) ||
1601 						iva2.va_seq != iva.va_seq)
1602 			iva.va_seq = 0;
1603 		break;
1604 	default:
1605 		/*
1606 		 * probably a special file.
1607 		 */
1608 		if ((vap->va_mask & AT_MODE) == 0) {
1609 			vap->va_mode = 0600;	/* default: owner rw only */
1610 			vap->va_mask |= AT_MODE;
1611 		}
1612 		syncval = FNODSYNC;
1613 		/*
1614 		 * We know this will only generate one VOP call
1615 		 */
1616 		vp = do_rfs4_op_mknod(args, resp, req, cs, vap, nm);
1617 
1618 		if (vp == NULL) {
1619 			kmem_free(nm, len);
1620 			nfs4_ntov_table_free(&ntov, &sarg);
1621 			resp->attrset = 0;
1622 			return;
1623 		}
1624 
1625 		/*
1626 		 * Get the initial "after" sequence number, if it fails,
1627 		 * set to zero
1628 		 */
1629 		iva.va_mask = AT_SEQ;
1630 		if (VOP_GETATTR(dvp, &iva, 0, cs->cr))
1631 			iva.va_seq = 0;
1632 
1633 		break;
1634 	}
1635 	kmem_free(nm, len);
1636 
1637 	if (error) {
1638 		*cs->statusp = resp->status = puterrno4(error);
1639 	}
1640 
1641 	/*
1642 	 * Force modified data and metadata out to stable storage.
1643 	 */
1644 	(void) VOP_FSYNC(dvp, 0, cr);
1645 
1646 	if (resp->status != NFS4_OK) {
1647 		if (vp != NULL)
1648 			VN_RELE(vp);
1649 		nfs4_ntov_table_free(&ntov, &sarg);
1650 		resp->attrset = 0;
1651 		return;
1652 	}
1653 
1654 	/*
1655 	 * Finish setup of cinfo response, "before" value already set.
1656 	 * Get "after" change value, if it fails, simply return the
1657 	 * before value.
1658 	 */
1659 	ava.va_mask = AT_CTIME|AT_SEQ;
1660 	if (VOP_GETATTR(dvp, &ava, 0, cr)) {
1661 		ava.va_ctime = bva.va_ctime;
1662 		ava.va_seq = 0;
1663 	}
1664 	NFS4_SET_FATTR4_CHANGE(resp->cinfo.after, ava.va_ctime);
1665 
1666 	/*
1667 	 * True verification that object was created with correct
1668 	 * attrs is impossible.  The attrs could have been changed
1669 	 * immediately after object creation.  If attributes did
1670 	 * not verify, the only recourse for the server is to
1671 	 * destroy the object.  Maybe if some attrs (like gid)
1672 	 * are set incorrectly, the object should be destroyed;
1673 	 * however, seems bad as a default policy.  Do we really
1674 	 * want to destroy an object over one of the times not
1675 	 * verifying correctly?  For these reasons, the server
1676 	 * currently sets bits in attrset for createattrs
1677 	 * that were set; however, no verification is done.
1678 	 *
1679 	 * vmask_to_nmask accounts for vattr bits set on create
1680 	 *	[do_rfs4_set_attrs() only sets resp bits for
1681 	 *	 non-vattr/vfs bits.]
1682 	 * Mask off any bits set by default so as not to return
1683 	 * more attrset bits than were requested in createattrs
1684 	 */
1685 	nfs4_vmask_to_nmask(sarg.vap->va_mask, &resp->attrset);
1686 	resp->attrset &= args->createattrs.attrmask;
1687 	nfs4_ntov_table_free(&ntov, &sarg);
1688 
1689 	error = makefh4(&cs->fh, vp, cs->exi);
1690 	if (error) {
1691 		*cs->statusp = resp->status = puterrno4(error);
1692 	}
1693 
1694 	/*
1695 	 * The cinfo.atomic = TRUE only if we got no errors, we have
1696 	 * non-zero va_seq's, and it has incremented by exactly one
1697 	 * during the creation and it didn't change during the VOP_LOOKUP
1698 	 * or VOP_FSYNC.
1699 	 */
1700 	if (!error && bva.va_seq && iva.va_seq && ava.va_seq &&
1701 			iva.va_seq == (bva.va_seq + 1) &&
1702 			iva.va_seq == ava.va_seq)
1703 		resp->cinfo.atomic = TRUE;
1704 	else
1705 		resp->cinfo.atomic = FALSE;
1706 
1707 	(void) VOP_FSYNC(vp, syncval, cr);
1708 
1709 	if (resp->status != NFS4_OK) {
1710 		VN_RELE(vp);
1711 		return;
1712 	}
1713 	if (cs->vp)
1714 		VN_RELE(cs->vp);
1715 
1716 	cs->vp = vp;
1717 	*cs->statusp = resp->status = NFS4_OK;
1718 }
1719 
1720 
1721 /*ARGSUSED*/
1722 static void
1723 rfs4_op_delegreturn(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1724 	struct compound_state *cs)
1725 {
1726 	DELEGRETURN4args *args = &argop->nfs_argop4_u.opdelegreturn;
1727 	DELEGRETURN4res *resp = &resop->nfs_resop4_u.opdelegreturn;
1728 	rfs4_deleg_state_t *dsp;
1729 	nfsstat4 status;
1730 
1731 	status = rfs4_get_deleg_state(&args->deleg_stateid, &dsp);
1732 	resp->status = *cs->statusp = status;
1733 	if (status != NFS4_OK)
1734 		return;
1735 
1736 	/* Ensure specified filehandle matches */
1737 	if (cs->vp != dsp->finfo->vp) {
1738 		resp->status = *cs->statusp = NFS4ERR_BAD_STATEID;
1739 	} else
1740 		rfs4_return_deleg(dsp, FALSE);
1741 
1742 	rfs4_update_lease(dsp->client);
1743 
1744 	rfs4_deleg_state_rele(dsp);
1745 }
1746 
1747 /*
1748  * Check to see if a given "flavor" is an explicitly shared flavor.
1749  * The assumption of this routine is the "flavor" is already a valid
1750  * flavor in the secinfo list of "exi".
1751  *
1752  *	e.g.
1753  *		# share -o sec=flavor1 /export
1754  *		# share -o sec=flavor2 /export/home
1755  *
1756  *		flavor2 is not an explicitly shared flavor for /export,
1757  *		however it is in the secinfo list for /export thru the
1758  *		server namespace setup.
1759  */
1760 int
1761 is_exported_sec(int flavor, struct exportinfo *exi)
1762 {
1763 	int	i;
1764 	struct secinfo *sp;
1765 
1766 	sp = exi->exi_export.ex_secinfo;
1767 	for (i = 0; i < exi->exi_export.ex_seccnt; i++) {
1768 		if (flavor == sp[i].s_secinfo.sc_nfsnum ||
1769 		    sp[i].s_secinfo.sc_nfsnum == AUTH_NONE) {
1770 			return (SEC_REF_EXPORTED(&sp[i]));
1771 		}
1772 	}
1773 
1774 	/* Should not reach this point based on the assumption */
1775 	return (0);
1776 }
1777 
1778 /*
1779  * Check if the security flavor used in the request matches what is
1780  * required at the export point or at the root pseudo node (exi_root).
1781  *
1782  * returns 1 if there's a match or if exported with AUTH_NONE; 0 otherwise.
1783  *
1784  */
1785 static int
1786 secinfo_match_or_authnone(struct compound_state *cs)
1787 {
1788 	int	i;
1789 	struct secinfo *sp;
1790 
1791 	/*
1792 	 * Check cs->nfsflavor (from the request) against
1793 	 * the current export data in cs->exi.
1794 	 */
1795 	sp = cs->exi->exi_export.ex_secinfo;
1796 	for (i = 0; i < cs->exi->exi_export.ex_seccnt; i++) {
1797 		if (cs->nfsflavor == sp[i].s_secinfo.sc_nfsnum ||
1798 		    sp[i].s_secinfo.sc_nfsnum == AUTH_NONE)
1799 			return (1);
1800 	}
1801 
1802 	return (0);
1803 }
1804 
1805 /*
1806  * Check the access authority for the client and return the correct error.
1807  */
1808 nfsstat4
1809 call_checkauth4(struct compound_state *cs, struct svc_req *req)
1810 {
1811 	int	authres;
1812 
1813 	/*
1814 	 * First, check if the security flavor used in the request
1815 	 * are among the flavors set in the server namespace.
1816 	 */
1817 	if (!secinfo_match_or_authnone(cs)) {
1818 		*cs->statusp = NFS4ERR_WRONGSEC;
1819 		return (*cs->statusp);
1820 	}
1821 
1822 	authres = checkauth4(cs, req);
1823 
1824 	if (authres > 0) {
1825 		*cs->statusp = NFS4_OK;
1826 		if (! (cs->access & CS_ACCESS_LIMITED))
1827 			cs->access = CS_ACCESS_OK;
1828 	} else if (authres == 0) {
1829 		*cs->statusp = NFS4ERR_ACCESS;
1830 	} else if (authres == -2) {
1831 		*cs->statusp = NFS4ERR_WRONGSEC;
1832 	} else {
1833 		*cs->statusp = NFS4ERR_DELAY;
1834 	}
1835 	return (*cs->statusp);
1836 }
1837 
1838 /*
1839  * bitmap4_to_attrmask is called by getattr and readdir.
1840  * It sets up the vattr mask and determines whether vfsstat call is needed
1841  * based on the input bitmap.
1842  * Returns nfsv4 status.
1843  */
1844 static nfsstat4
1845 bitmap4_to_attrmask(bitmap4 breq, struct nfs4_svgetit_arg *sargp)
1846 {
1847 	int i;
1848 	uint_t	va_mask;
1849 	struct statvfs64 *sbp = sargp->sbp;
1850 
1851 	sargp->sbp = NULL;
1852 	sargp->flag = 0;
1853 	sargp->rdattr_error = NFS4_OK;
1854 	sargp->mntdfid_set = FALSE;
1855 	if (sargp->cs->vp)
1856 		sargp->xattr = get_fh4_flag(&sargp->cs->fh,
1857 					    FH4_ATTRDIR | FH4_NAMEDATTR);
1858 	else
1859 		sargp->xattr = 0;
1860 
1861 	/*
1862 	 * Set rdattr_error_req to true if return error per
1863 	 * failed entry rather than fail the readdir.
1864 	 */
1865 	if (breq & FATTR4_RDATTR_ERROR_MASK)
1866 		sargp->rdattr_error_req = 1;
1867 	else
1868 		sargp->rdattr_error_req = 0;
1869 
1870 	/*
1871 	 * generate the va_mask
1872 	 * Handle the easy cases first
1873 	 */
1874 	switch (breq) {
1875 	case NFS4_NTOV_ATTR_MASK:
1876 		sargp->vap->va_mask = NFS4_NTOV_ATTR_AT_MASK;
1877 		return (NFS4_OK);
1878 
1879 	case NFS4_FS_ATTR_MASK:
1880 		sargp->vap->va_mask = NFS4_FS_ATTR_AT_MASK;
1881 		sargp->sbp = sbp;
1882 		return (NFS4_OK);
1883 
1884 	case NFS4_NTOV_ATTR_CACHE_MASK:
1885 		sargp->vap->va_mask = NFS4_NTOV_ATTR_CACHE_AT_MASK;
1886 		return (NFS4_OK);
1887 
1888 	case FATTR4_LEASE_TIME_MASK:
1889 		sargp->vap->va_mask = 0;
1890 		return (NFS4_OK);
1891 
1892 	default:
1893 		va_mask = 0;
1894 		for (i = 0; i < nfs4_ntov_map_size; i++) {
1895 			if ((breq & nfs4_ntov_map[i].fbit) &&
1896 							nfs4_ntov_map[i].vbit)
1897 				va_mask |= nfs4_ntov_map[i].vbit;
1898 		}
1899 
1900 		/*
1901 		 * Check is vfsstat is needed
1902 		 */
1903 		if (breq & NFS4_FS_ATTR_MASK)
1904 			sargp->sbp = sbp;
1905 
1906 		sargp->vap->va_mask = va_mask;
1907 		return (NFS4_OK);
1908 	}
1909 	/* NOTREACHED */
1910 }
1911 
1912 /*
1913  * bitmap4_get_sysattrs is called by getattr and readdir.
1914  * It calls both VOP_GETATTR and VFS_STATVFS calls to get the attrs.
1915  * Returns nfsv4 status.
1916  */
1917 static nfsstat4
1918 bitmap4_get_sysattrs(struct nfs4_svgetit_arg *sargp)
1919 {
1920 	int error;
1921 	struct compound_state *cs = sargp->cs;
1922 	vnode_t *vp = cs->vp;
1923 
1924 	if (sargp->sbp != NULL) {
1925 		if (error = VFS_STATVFS(vp->v_vfsp, sargp->sbp)) {
1926 			sargp->sbp = NULL;	/* to identify error */
1927 			return (puterrno4(error));
1928 		}
1929 	}
1930 
1931 	return (rfs4_vop_getattr(vp, sargp->vap, 0, cs->cr));
1932 }
1933 
1934 static void
1935 nfs4_ntov_table_init(struct nfs4_ntov_table *ntovp)
1936 {
1937 	ntovp->na = kmem_zalloc(sizeof (union nfs4_attr_u) * nfs4_ntov_map_size,
1938 			KM_SLEEP);
1939 	ntovp->attrcnt = 0;
1940 	ntovp->vfsstat = FALSE;
1941 }
1942 
1943 static void
1944 nfs4_ntov_table_free(struct nfs4_ntov_table *ntovp,
1945 	struct nfs4_svgetit_arg *sargp)
1946 {
1947 	int i;
1948 	union nfs4_attr_u *na;
1949 	uint8_t *amap;
1950 
1951 	/*
1952 	 * XXX Should do the same checks for whether the bit is set
1953 	 */
1954 	for (i = 0, na = ntovp->na, amap = ntovp->amap;
1955 		i < ntovp->attrcnt; i++, na++, amap++) {
1956 		(void) (*nfs4_ntov_map[*amap].sv_getit)(
1957 			NFS4ATTR_FREEIT, sargp, na);
1958 	}
1959 	if ((sargp->op == NFS4ATTR_SETIT) || (sargp->op == NFS4ATTR_VERIT)) {
1960 		/*
1961 		 * xdr_free for getattr will be done later
1962 		 */
1963 		for (i = 0, na = ntovp->na, amap = ntovp->amap;
1964 			i < ntovp->attrcnt; i++, na++, amap++) {
1965 			xdr_free(nfs4_ntov_map[*amap].xfunc, (caddr_t)na);
1966 		}
1967 	}
1968 	kmem_free(ntovp->na, sizeof (union nfs4_attr_u) * nfs4_ntov_map_size);
1969 }
1970 
1971 /*
1972  * do_rfs4_op_getattr gets the system attrs and converts into fattr4.
1973  */
1974 static nfsstat4
1975 do_rfs4_op_getattr(bitmap4 breq, fattr4 *fattrp,
1976 	struct nfs4_svgetit_arg *sargp)
1977 {
1978 	int error = 0;
1979 	int i, k;
1980 	struct nfs4_ntov_table ntov;
1981 	XDR xdr;
1982 	ulong_t xdr_size;
1983 	char *xdr_attrs;
1984 	nfsstat4 status = NFS4_OK;
1985 	nfsstat4 prev_rdattr_error = sargp->rdattr_error;
1986 	union nfs4_attr_u *na;
1987 	uint8_t *amap;
1988 
1989 	sargp->op = NFS4ATTR_GETIT;
1990 	sargp->flag = 0;
1991 
1992 	fattrp->attrmask = 0;
1993 	/* if no bits requested, then return empty fattr4 */
1994 	if (breq == 0) {
1995 		fattrp->attrlist4_len = 0;
1996 		fattrp->attrlist4 = NULL;
1997 		return (NFS4_OK);
1998 	}
1999 
2000 	/*
2001 	 * return NFS4ERR_INVAL when client requests write-only attrs
2002 	 */
2003 	if (breq & (FATTR4_TIME_ACCESS_SET_MASK | FATTR4_TIME_MODIFY_SET_MASK))
2004 		return (NFS4ERR_INVAL);
2005 
2006 	nfs4_ntov_table_init(&ntov);
2007 	na = ntov.na;
2008 	amap = ntov.amap;
2009 
2010 	/*
2011 	 * Now loop to get or verify the attrs
2012 	 */
2013 	for (i = 0; i < nfs4_ntov_map_size; i++) {
2014 		if (breq & nfs4_ntov_map[i].fbit) {
2015 			if ((*nfs4_ntov_map[i].sv_getit)(
2016 				    NFS4ATTR_SUPPORTED, sargp, NULL) == 0) {
2017 
2018 				error = (*nfs4_ntov_map[i].sv_getit)(
2019 						NFS4ATTR_GETIT, sargp, na);
2020 
2021 				/*
2022 				 * Possible error values:
2023 				 * >0 if sv_getit failed to
2024 				 * get the attr; 0 if succeeded;
2025 				 * <0 if rdattr_error and the
2026 				 * attribute cannot be returned.
2027 				 */
2028 				if (error && !(sargp->rdattr_error_req))
2029 					goto done;
2030 				/*
2031 				 * If error then just for entry
2032 				 */
2033 				if (error == 0) {
2034 					fattrp->attrmask |=
2035 						nfs4_ntov_map[i].fbit;
2036 					*amap++ =
2037 						(uint8_t)nfs4_ntov_map[i].nval;
2038 					na++;
2039 					(ntov.attrcnt)++;
2040 				} else if ((error > 0) &&
2041 					(sargp->rdattr_error == NFS4_OK)) {
2042 					sargp->rdattr_error = puterrno4(error);
2043 				}
2044 				error = 0;
2045 			}
2046 		}
2047 	}
2048 
2049 	/*
2050 	 * If rdattr_error was set after the return value for it was assigned,
2051 	 * update it.
2052 	 */
2053 	if (prev_rdattr_error != sargp->rdattr_error) {
2054 		na = ntov.na;
2055 		amap = ntov.amap;
2056 		for (i = 0; i < ntov.attrcnt; i++, na++, amap++) {
2057 			k = *amap;
2058 			if (k < FATTR4_RDATTR_ERROR) {
2059 				continue;
2060 			}
2061 			if ((k == FATTR4_RDATTR_ERROR) &&
2062 			    ((*nfs4_ntov_map[k].sv_getit)(
2063 				NFS4ATTR_SUPPORTED, sargp, NULL) == 0)) {
2064 
2065 				(void) (*nfs4_ntov_map[k].sv_getit)(
2066 						NFS4ATTR_GETIT, sargp, na);
2067 			}
2068 			break;
2069 		}
2070 	}
2071 
2072 	xdr_size = 0;
2073 	na = ntov.na;
2074 	amap = ntov.amap;
2075 	for (i = 0; i < ntov.attrcnt; i++, na++, amap++) {
2076 		xdr_size += xdr_sizeof(nfs4_ntov_map[*amap].xfunc, na);
2077 	}
2078 
2079 	fattrp->attrlist4_len = xdr_size;
2080 	if (xdr_size) {
2081 		/* freed by rfs4_op_getattr_free() */
2082 		fattrp->attrlist4 = xdr_attrs = kmem_zalloc(xdr_size, KM_SLEEP);
2083 
2084 		xdrmem_create(&xdr, xdr_attrs, xdr_size, XDR_ENCODE);
2085 
2086 		na = ntov.na;
2087 		amap = ntov.amap;
2088 		for (i = 0; i < ntov.attrcnt; i++, na++, amap++) {
2089 			if (!(*nfs4_ntov_map[*amap].xfunc)(&xdr, na)) {
2090 				cmn_err(CE_WARN, "do_rfs4_op_getattr: xdr "
2091 					"encode of attribute %d failed\n",
2092 					*amap);
2093 				status = NFS4ERR_SERVERFAULT;
2094 				break;
2095 			}
2096 		}
2097 		/* xdrmem_destroy(&xdrs); */	/* NO-OP */
2098 	} else {
2099 		fattrp->attrlist4 = NULL;
2100 	}
2101 done:
2102 
2103 	nfs4_ntov_table_free(&ntov, sargp);
2104 
2105 	if (error != 0)
2106 		status = puterrno4(error);
2107 
2108 	return (status);
2109 }
2110 
2111 /* ARGSUSED */
2112 static void
2113 rfs4_op_getattr(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2114 	struct compound_state *cs)
2115 {
2116 	GETATTR4args *args = &argop->nfs_argop4_u.opgetattr;
2117 	GETATTR4res *resp = &resop->nfs_resop4_u.opgetattr;
2118 	struct nfs4_svgetit_arg sarg;
2119 	struct statvfs64 sb;
2120 	nfsstat4 status;
2121 
2122 	if (cs->vp == NULL) {
2123 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2124 		return;
2125 	}
2126 
2127 	if (cs->access == CS_ACCESS_DENIED) {
2128 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
2129 		return;
2130 	}
2131 
2132 	sarg.sbp = &sb;
2133 	sarg.cs = cs;
2134 
2135 	status = bitmap4_to_attrmask(args->attr_request, &sarg);
2136 	if (status == NFS4_OK) {
2137 		status = bitmap4_get_sysattrs(&sarg);
2138 		if (status == NFS4_OK)
2139 			status = do_rfs4_op_getattr(args->attr_request,
2140 				&resp->obj_attributes, &sarg);
2141 	}
2142 	*cs->statusp = resp->status = status;
2143 }
2144 
2145 static void
2146 rfs4_op_getattr_free(nfs_resop4 *resop)
2147 {
2148 	GETATTR4res *resp = &resop->nfs_resop4_u.opgetattr;
2149 
2150 	nfs4_fattr4_free(&resp->obj_attributes);
2151 }
2152 
2153 /* ARGSUSED */
2154 static void
2155 rfs4_op_getfh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2156 	struct compound_state *cs)
2157 {
2158 	GETFH4res *resp = &resop->nfs_resop4_u.opgetfh;
2159 
2160 	if (cs->vp == NULL) {
2161 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2162 		return;
2163 	}
2164 	if (cs->access == CS_ACCESS_DENIED) {
2165 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
2166 		return;
2167 	}
2168 
2169 	resp->object.nfs_fh4_val =
2170 		kmem_alloc(cs->fh.nfs_fh4_len, KM_SLEEP);
2171 	nfs_fh4_copy(&cs->fh, &resp->object);
2172 	*cs->statusp = resp->status = NFS4_OK;
2173 }
2174 
2175 static void
2176 rfs4_op_getfh_free(nfs_resop4 *resop)
2177 {
2178 	GETFH4res *resp = &resop->nfs_resop4_u.opgetfh;
2179 
2180 	if (resp->status == NFS4_OK &&
2181 	    resp->object.nfs_fh4_val != NULL) {
2182 		kmem_free(resp->object.nfs_fh4_val, resp->object.nfs_fh4_len);
2183 		resp->object.nfs_fh4_val = NULL;
2184 		resp->object.nfs_fh4_len = 0;
2185 	}
2186 }
2187 
2188 /*
2189  * illegal: args: void
2190  *	    res : status (NFS4ERR_OP_ILLEGAL)
2191  */
2192 /* ARGSUSED */
2193 static void
2194 rfs4_op_illegal(nfs_argop4 *argop, nfs_resop4 *resop,
2195 	struct svc_req *req, struct compound_state *cs)
2196 {
2197 	ILLEGAL4res *resp = &resop->nfs_resop4_u.opillegal;
2198 
2199 	resop->resop = OP_ILLEGAL;
2200 	*cs->statusp = resp->status = NFS4ERR_OP_ILLEGAL;
2201 }
2202 
2203 /*
2204  * link: args: SAVED_FH: file, CURRENT_FH: target directory
2205  *	 res: status. If success - CURRENT_FH unchanged, return change_info
2206  */
2207 /* ARGSUSED */
2208 static void
2209 rfs4_op_link(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2210 	struct compound_state *cs)
2211 {
2212 	LINK4args *args = &argop->nfs_argop4_u.oplink;
2213 	LINK4res *resp = &resop->nfs_resop4_u.oplink;
2214 	int error;
2215 	vnode_t *vp;
2216 	vnode_t *dvp;
2217 	struct vattr bdva, idva, adva;
2218 	char *nm;
2219 	uint_t  len;
2220 
2221 	/* SAVED_FH: source object */
2222 	vp = cs->saved_vp;
2223 	if (vp == NULL) {
2224 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2225 		return;
2226 	}
2227 
2228 	/* CURRENT_FH: target directory */
2229 	dvp = cs->vp;
2230 	if (dvp == NULL) {
2231 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2232 		return;
2233 	}
2234 
2235 	/*
2236 	 * If there is a non-shared filesystem mounted on this vnode,
2237 	 * do not allow to link any file in this directory.
2238 	 */
2239 	if (vn_ismntpt(dvp)) {
2240 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
2241 		return;
2242 	}
2243 
2244 	if (cs->access == CS_ACCESS_DENIED) {
2245 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
2246 		return;
2247 	}
2248 
2249 	/* Check source object's type validity */
2250 	if (vp->v_type == VDIR) {
2251 		*cs->statusp = resp->status = NFS4ERR_ISDIR;
2252 		return;
2253 	}
2254 
2255 	/* Check target directory's type */
2256 	if (dvp->v_type != VDIR) {
2257 		*cs->statusp = resp->status = NFS4ERR_NOTDIR;
2258 		return;
2259 	}
2260 
2261 	if (cs->saved_exi != cs->exi) {
2262 		*cs->statusp = resp->status = NFS4ERR_XDEV;
2263 		return;
2264 	}
2265 
2266 	if (!utf8_dir_verify(&args->newname)) {
2267 		*cs->statusp = resp->status = NFS4ERR_INVAL;
2268 		return;
2269 	}
2270 
2271 	nm = utf8_to_fn(&args->newname, &len, NULL);
2272 	if (nm == NULL) {
2273 		*cs->statusp = resp->status = NFS4ERR_INVAL;
2274 		return;
2275 	}
2276 
2277 	if (len > MAXNAMELEN) {
2278 		*cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
2279 		kmem_free(nm, len);
2280 		return;
2281 	}
2282 
2283 	if (rdonly4(cs->exi, cs->vp, req)) {
2284 		*cs->statusp = resp->status = NFS4ERR_ROFS;
2285 		kmem_free(nm, len);
2286 		return;
2287 	}
2288 
2289 	/* Get "before" change value */
2290 	bdva.va_mask = AT_CTIME|AT_SEQ;
2291 	error = VOP_GETATTR(dvp, &bdva, 0, cs->cr);
2292 	if (error) {
2293 		*cs->statusp = resp->status = puterrno4(error);
2294 		kmem_free(nm, len);
2295 		return;
2296 	}
2297 
2298 	NFS4_SET_FATTR4_CHANGE(resp->cinfo.before, bdva.va_ctime)
2299 
2300 	error = VOP_LINK(dvp, vp, nm, cs->cr);
2301 
2302 	kmem_free(nm, len);
2303 
2304 	/*
2305 	 * Get the initial "after" sequence number, if it fails, set to zero
2306 	 */
2307 	idva.va_mask = AT_SEQ;
2308 	if (VOP_GETATTR(dvp, &idva, 0, cs->cr))
2309 		idva.va_seq = 0;
2310 
2311 	/*
2312 	 * Force modified data and metadata out to stable storage.
2313 	 */
2314 	(void) VOP_FSYNC(vp, FNODSYNC, cs->cr);
2315 	(void) VOP_FSYNC(dvp, 0, cs->cr);
2316 
2317 	if (error) {
2318 		*cs->statusp = resp->status = puterrno4(error);
2319 		return;
2320 	}
2321 
2322 	/*
2323 	 * Get "after" change value, if it fails, simply return the
2324 	 * before value.
2325 	 */
2326 	adva.va_mask = AT_CTIME|AT_SEQ;
2327 	if (VOP_GETATTR(dvp, &adva, 0, cs->cr)) {
2328 		adva.va_ctime = bdva.va_ctime;
2329 		adva.va_seq = 0;
2330 	}
2331 
2332 	NFS4_SET_FATTR4_CHANGE(resp->cinfo.after, adva.va_ctime)
2333 
2334 	/*
2335 	 * The cinfo.atomic = TRUE only if we have
2336 	 * non-zero va_seq's, and it has incremented by exactly one
2337 	 * during the VOP_LINK and it didn't change during the VOP_FSYNC.
2338 	 */
2339 	if (bdva.va_seq && idva.va_seq && adva.va_seq &&
2340 			idva.va_seq == (bdva.va_seq + 1) &&
2341 			idva.va_seq == adva.va_seq)
2342 		resp->cinfo.atomic = TRUE;
2343 	else
2344 		resp->cinfo.atomic = FALSE;
2345 
2346 	*cs->statusp = resp->status = NFS4_OK;
2347 }
2348 
2349 /*
2350  * Used by rfs4_op_lookup and rfs4_op_lookupp to do the actual work.
2351  */
2352 
2353 /* ARGSUSED */
2354 static nfsstat4
2355 do_rfs4_op_lookup(char *nm, uint_t buflen, struct svc_req *req,
2356 	struct compound_state *cs)
2357 {
2358 	int error;
2359 	int different_export = 0;
2360 	vnode_t *vp, *tvp, *pre_tvp = NULL, *oldvp = NULL;
2361 	struct exportinfo *exi = NULL, *pre_exi = NULL;
2362 	nfsstat4 stat;
2363 	fid_t fid;
2364 	int attrdir, dotdot, walk;
2365 	bool_t is_newvp = FALSE;
2366 
2367 	if (cs->vp->v_flag & V_XATTRDIR) {
2368 		attrdir = 1;
2369 		ASSERT(get_fh4_flag(&cs->fh, FH4_ATTRDIR));
2370 	} else {
2371 		attrdir = 0;
2372 		ASSERT(! get_fh4_flag(&cs->fh, FH4_ATTRDIR));
2373 	}
2374 
2375 	dotdot = (nm[0] == '.' && nm[1] == '.' && nm[2] == '\0');
2376 
2377 	/*
2378 	 * If dotdotting, then need to check whether it's
2379 	 * above the root of a filesystem, or above an
2380 	 * export point.
2381 	 */
2382 	if (dotdot) {
2383 
2384 		/*
2385 		 * If dotdotting at the root of a filesystem, then
2386 		 * need to traverse back to the mounted-on filesystem
2387 		 * and do the dotdot lookup there.
2388 		 */
2389 		if (cs->vp->v_flag & VROOT) {
2390 
2391 			/*
2392 			 * If at the system root, then can
2393 			 * go up no further.
2394 			 */
2395 			if (VN_CMP(cs->vp, rootdir))
2396 				return (puterrno4(ENOENT));
2397 
2398 			/*
2399 			 * Traverse back to the mounted-on filesystem
2400 			 */
2401 			cs->vp = untraverse(cs->vp);
2402 
2403 			/*
2404 			 * Set the different_export flag so we remember
2405 			 * to pick up a new exportinfo entry for
2406 			 * this new filesystem.
2407 			 */
2408 			different_export = 1;
2409 		} else {
2410 
2411 			/*
2412 			 * If dotdotting above an export point then set
2413 			 * the different_export to get new export info.
2414 			 */
2415 			different_export = nfs_exported(cs->exi, cs->vp);
2416 		}
2417 	}
2418 
2419 	error = VOP_LOOKUP(cs->vp, nm, &vp, NULL, 0, NULL, cs->cr);
2420 	if (error)
2421 		return (puterrno4(error));
2422 
2423 	/*
2424 	 * If the vnode is in a pseudo filesystem, check whether it is visible.
2425 	 *
2426 	 * XXX if the vnode is a symlink and it is not visible in
2427 	 * a pseudo filesystem, return ENOENT (not following symlink).
2428 	 * V4 client can not mount such symlink. This is a regression
2429 	 * from V2/V3.
2430 	 *
2431 	 * In the same exported filesystem, if the security flavor used
2432 	 * is not an explicitly shared flavor, limit the view to the visible
2433 	 * list entries only. This is not a WRONGSEC case because it's already
2434 	 * checked via PUTROOTFH/PUTPUBFH or PUTFH.
2435 	 */
2436 	if (!different_export &&
2437 	    (PSEUDO(cs->exi) || ! is_exported_sec(cs->nfsflavor, cs->exi) ||
2438 	    cs->access & CS_ACCESS_LIMITED)) {
2439 		if (! nfs_visible(cs->exi, vp, &different_export)) {
2440 			VN_RELE(vp);
2441 			return (puterrno4(ENOENT));
2442 		}
2443 	}
2444 
2445 	/*
2446 	 * If it's a mountpoint, then traverse it.
2447 	 */
2448 	if (vn_ismntpt(vp)) {
2449 		pre_exi = cs->exi;	/* save pre-traversed exportinfo */
2450 		pre_tvp = vp;		/* save pre-traversed vnode	*/
2451 
2452 		/*
2453 		 * hold pre_tvp to counteract rele by traverse.  We will
2454 		 * need pre_tvp below if checkexport4 fails
2455 		 */
2456 		VN_HOLD(pre_tvp);
2457 		tvp = vp;
2458 		if ((error = traverse(&tvp)) != 0) {
2459 			VN_RELE(vp);
2460 			VN_RELE(pre_tvp);
2461 			return (puterrno4(error));
2462 		}
2463 		vp = tvp;
2464 		different_export = 1;
2465 	} else if (vp->v_vfsp != cs->vp->v_vfsp) {
2466 		/*
2467 		 * The vfsp comparison is to handle the case where
2468 		 * a LOFS mount is shared.  lo_lookup traverses mount points,
2469 		 * and NFS is unaware of local fs transistions because
2470 		 * v_vfsmountedhere isn't set.  For this special LOFS case,
2471 		 * the dir and the obj returned by lookup will have different
2472 		 * vfs ptrs.
2473 		 */
2474 		different_export = 1;
2475 	}
2476 
2477 	if (different_export) {
2478 
2479 		bzero(&fid, sizeof (fid));
2480 		fid.fid_len = MAXFIDSZ;
2481 		error = vop_fid_pseudo(vp, &fid);
2482 		if (error) {
2483 			VN_RELE(vp);
2484 			if (pre_tvp)
2485 				VN_RELE(pre_tvp);
2486 			return (puterrno4(error));
2487 		}
2488 
2489 		if (dotdot)
2490 			exi = nfs_vptoexi(NULL, vp, cs->cr, &walk, NULL, TRUE);
2491 		else
2492 			exi = checkexport4(&vp->v_vfsp->vfs_fsid, &fid, vp);
2493 
2494 		if (exi == NULL) {
2495 			if (pre_tvp) {
2496 				/*
2497 				 * If this vnode is a mounted-on vnode,
2498 				 * but the mounted-on file system is not
2499 				 * exported, send back the filehandle for
2500 				 * the mounted-on vnode, not the root of
2501 				 * the mounted-on file system.
2502 				 */
2503 				VN_RELE(vp);
2504 				vp = pre_tvp;
2505 				exi = pre_exi;
2506 			} else {
2507 				VN_RELE(vp);
2508 				return (puterrno4(EACCES));
2509 			}
2510 		} else if (pre_tvp) {
2511 			/* we're done with pre_tvp now. release extra hold */
2512 			VN_RELE(pre_tvp);
2513 		}
2514 
2515 		cs->exi = exi;
2516 
2517 		/*
2518 		 * Now we do a checkauth4. The reason is that
2519 		 * this client/user may not have access to the new
2520 		 * exported file system, and if he does,
2521 		 * the client/user may be mapped to a different uid.
2522 		 *
2523 		 * We start with a new cr, because the checkauth4 done
2524 		 * in the PUT*FH operation over wrote the cred's uid,
2525 		 * gid, etc, and we want the real thing before calling
2526 		 * checkauth4()
2527 		 */
2528 		crfree(cs->cr);
2529 		cs->cr = crdup(cs->basecr);
2530 
2531 		if (cs->vp)
2532 			oldvp = cs->vp;
2533 		cs->vp = vp;
2534 		is_newvp = TRUE;
2535 
2536 		stat = call_checkauth4(cs, req);
2537 		if (stat != NFS4_OK) {
2538 			VN_RELE(cs->vp);
2539 			cs->vp = oldvp;
2540 			return (stat);
2541 		}
2542 	}
2543 
2544 	error = makefh4(&cs->fh, vp, cs->exi);
2545 
2546 	if (error) {
2547 		if (is_newvp) {
2548 			VN_RELE(cs->vp);
2549 			cs->vp = oldvp;
2550 		} else
2551 			VN_RELE(vp);
2552 		return (puterrno4(error));
2553 	}
2554 
2555 	if (!is_newvp) {
2556 		if (cs->vp)
2557 			VN_RELE(cs->vp);
2558 		cs->vp = vp;
2559 	} else if (oldvp)
2560 		VN_RELE(oldvp);
2561 
2562 	/*
2563 	 * if did lookup on attrdir and didn't lookup .., set named
2564 	 * attr fh flag
2565 	 */
2566 	if (attrdir && ! dotdot)
2567 		set_fh4_flag(&cs->fh, FH4_NAMEDATTR);
2568 
2569 	/* Assume false for now, open proc will set this */
2570 	cs->mandlock = FALSE;
2571 
2572 	return (NFS4_OK);
2573 }
2574 
2575 /* ARGSUSED */
2576 static void
2577 rfs4_op_lookup(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2578 	struct compound_state *cs)
2579 {
2580 	LOOKUP4args *args = &argop->nfs_argop4_u.oplookup;
2581 	LOOKUP4res *resp = &resop->nfs_resop4_u.oplookup;
2582 	char *nm;
2583 	uint_t len;
2584 
2585 	if (cs->vp == NULL) {
2586 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2587 		return;
2588 	}
2589 
2590 	if (cs->vp->v_type == VLNK) {
2591 		*cs->statusp = resp->status = NFS4ERR_SYMLINK;
2592 		return;
2593 	}
2594 
2595 	if (cs->vp->v_type != VDIR) {
2596 		*cs->statusp = resp->status = NFS4ERR_NOTDIR;
2597 		return;
2598 	}
2599 
2600 	if (!utf8_dir_verify(&args->objname)) {
2601 		*cs->statusp = resp->status = NFS4ERR_INVAL;
2602 		return;
2603 	}
2604 
2605 	nm = utf8_to_str(&args->objname, &len, NULL);
2606 	if (nm == NULL) {
2607 		*cs->statusp = resp->status = NFS4ERR_INVAL;
2608 		return;
2609 	}
2610 
2611 	if (len > MAXNAMELEN) {
2612 		*cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
2613 		kmem_free(nm, len);
2614 		return;
2615 	}
2616 
2617 	*cs->statusp = resp->status = do_rfs4_op_lookup(nm, len, req, cs);
2618 
2619 	kmem_free(nm, len);
2620 }
2621 
2622 /* ARGSUSED */
2623 static void
2624 rfs4_op_lookupp(nfs_argop4 *args, nfs_resop4 *resop, struct svc_req *req,
2625 	struct compound_state *cs)
2626 {
2627 	LOOKUPP4res *resp = &resop->nfs_resop4_u.oplookupp;
2628 
2629 	if (cs->vp == NULL) {
2630 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2631 		return;
2632 	}
2633 
2634 	if (cs->vp->v_type != VDIR) {
2635 		*cs->statusp = resp->status = NFS4ERR_NOTDIR;
2636 		return;
2637 	}
2638 
2639 	*cs->statusp = resp->status = do_rfs4_op_lookup("..", 3, req, cs);
2640 
2641 	/*
2642 	 * From NFSV4 Specification, LOOKUPP should not check for
2643 	 * NFS4ERR_WRONGSEC. Retrun NFS4_OK instead.
2644 	 */
2645 	if (resp->status == NFS4ERR_WRONGSEC) {
2646 		*cs->statusp = resp->status = NFS4_OK;
2647 	}
2648 }
2649 
2650 
2651 /*ARGSUSED2*/
2652 static void
2653 rfs4_op_openattr(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2654 	struct compound_state *cs)
2655 {
2656 	OPENATTR4args	*args = &argop->nfs_argop4_u.opopenattr;
2657 	OPENATTR4res	*resp = &resop->nfs_resop4_u.opopenattr;
2658 	vnode_t		*avp = NULL;
2659 	int		lookup_flags = LOOKUP_XATTR, error;
2660 	int		exp_ro = 0;
2661 
2662 	if (cs->vp == NULL) {
2663 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2664 		return;
2665 	}
2666 
2667 	/*
2668 	 * Make a couple of checks made by copen()
2669 	 *
2670 	 * Check to make sure underlying fs supports xattrs.  This
2671 	 * is required because solaris filesystem implementations
2672 	 * (UFS/TMPFS) don't enforce the noxattr mount option
2673 	 * in VOP_LOOKUP(LOOKUP_XATTR).  If fs doesn't support this
2674 	 * pathconf cmd or if fs supports cmd but doesn't claim
2675 	 * support for xattr, return NOTSUPP.  It would be better
2676 	 * to use VOP_PATHCONF( _PC_XATTR_ENABLED) for this; however,
2677 	 * that cmd is not available to VOP_PATHCONF interface
2678 	 * (it's only implemented inside pathconf syscall)...
2679 	 *
2680 	 * Verify permission to put attributes on files (access
2681 	 * checks from copen).
2682 	 */
2683 
2684 	if ((cs->vp->v_vfsp->vfs_flag & VFS_XATTR) == 0) {
2685 		error = ENOTSUP;
2686 		goto error_out;
2687 	}
2688 
2689 	if ((VOP_ACCESS(cs->vp, VREAD, 0, cs->cr) != 0) &&
2690 	    (VOP_ACCESS(cs->vp, VWRITE, 0, cs->cr) != 0) &&
2691 	    (VOP_ACCESS(cs->vp, VEXEC, 0, cs->cr) != 0)) {
2692 		error = EACCES;
2693 		goto error_out;
2694 	}
2695 
2696 	/*
2697 	 * The CREATE_XATTR_DIR VOP flag cannot be specified if
2698 	 * the file system is exported read-only -- regardless of
2699 	 * createdir flag.  Otherwise the attrdir would be created
2700 	 * (assuming server fs isn't mounted readonly locally).  If
2701 	 * VOP_LOOKUP returns ENOENT in this case, the error will
2702 	 * be translated into EROFS.  ENOSYS is mapped to ENOTSUP
2703 	 * because specfs has no VOP_LOOKUP op, so the macro would
2704 	 * return ENOSYS.  EINVAL is returned by all (current)
2705 	 * Solaris file system implementations when any of their
2706 	 * restrictions are violated (xattr(dir) can't have xattrdir).
2707 	 * Returning NOTSUPP is more appropriate in this case
2708 	 * because the object will never be able to have an attrdir.
2709 	 */
2710 	if (args->createdir && ! (exp_ro = rdonly4(cs->exi, cs->vp, req)))
2711 		lookup_flags |= CREATE_XATTR_DIR;
2712 
2713 	error = VOP_LOOKUP(cs->vp, "", &avp, NULL, lookup_flags, NULL, cs->cr);
2714 
2715 	if (error) {
2716 		if (error == ENOENT && args->createdir && exp_ro)
2717 			error = EROFS;
2718 		else if (error == EINVAL || error == ENOSYS)
2719 			error = ENOTSUP;
2720 		goto error_out;
2721 	}
2722 
2723 	ASSERT(avp->v_flag & V_XATTRDIR);
2724 
2725 	error = makefh4(&cs->fh, avp, cs->exi);
2726 
2727 	if (error) {
2728 		VN_RELE(avp);
2729 		goto error_out;
2730 	}
2731 
2732 	VN_RELE(cs->vp);
2733 	cs->vp = avp;
2734 
2735 	/*
2736 	 * There is no requirement for an attrdir fh flag
2737 	 * because the attrdir has a vnode flag to distinguish
2738 	 * it from regular (non-xattr) directories.  The
2739 	 * FH4_ATTRDIR flag is set for future sanity checks.
2740 	 */
2741 	set_fh4_flag(&cs->fh, FH4_ATTRDIR);
2742 	*cs->statusp = resp->status = NFS4_OK;
2743 	return;
2744 
2745 error_out:
2746 
2747 	*cs->statusp = resp->status = puterrno4(error);
2748 }
2749 
2750 static int
2751 do_io(int direction, vnode_t *vp, struct uio *uio, int ioflag, cred_t *cred)
2752 {
2753 	int error;
2754 	int i;
2755 	clock_t delaytime;
2756 	caller_context_t ct;
2757 
2758 	delaytime = MSEC_TO_TICK_ROUNDUP(rfs4_lock_delay);
2759 
2760 	/*
2761 	 * Don't block on mandatory locks. If this routine returns
2762 	 * EAGAIN, the caller should return NFS4ERR_LOCKED.
2763 	 */
2764 	uio->uio_fmode = FNONBLOCK;
2765 
2766 	ct.cc_sysid = 0;
2767 	ct.cc_pid = 0;
2768 	ct.cc_caller_id = nfs4_srv_caller_id;
2769 
2770 	for (i = 0; i < rfs4_maxlock_tries; i++) {
2771 
2772 
2773 		if (direction == FREAD) {
2774 			(void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, &ct);
2775 			error = VOP_READ(vp, uio, ioflag, cred, &ct);
2776 			VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
2777 		} else {
2778 			(void) VOP_RWLOCK(vp, V_WRITELOCK_TRUE, &ct);
2779 			error = VOP_WRITE(vp, uio, ioflag, cred, &ct);
2780 			VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, &ct);
2781 		}
2782 
2783 		if (error != EAGAIN)
2784 			break;
2785 
2786 		if (i < rfs4_maxlock_tries - 1) {
2787 			delay(delaytime);
2788 			delaytime *= 2;
2789 		}
2790 	}
2791 
2792 	return (error);
2793 }
2794 
2795 /* ARGSUSED */
2796 static void
2797 rfs4_op_read(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2798 	struct compound_state *cs)
2799 {
2800 	READ4args *args = &argop->nfs_argop4_u.opread;
2801 	READ4res *resp = &resop->nfs_resop4_u.opread;
2802 	int error;
2803 	int verror;
2804 	vnode_t *vp;
2805 	struct vattr va;
2806 	struct iovec iov;
2807 	struct uio uio;
2808 	u_offset_t offset;
2809 	bool_t *deleg = &cs->deleg;
2810 	nfsstat4 stat;
2811 	int in_crit = 0;
2812 	mblk_t *mp;
2813 	int alloc_err = 0;
2814 
2815 	vp = cs->vp;
2816 	if (vp == NULL) {
2817 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2818 		return;
2819 	}
2820 	if (cs->access == CS_ACCESS_DENIED) {
2821 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
2822 		return;
2823 	}
2824 
2825 	/*
2826 	 * Enter the critical region before calling VOP_RWLOCK
2827 	 * to avoid a deadlock with write requests.
2828 	 */
2829 	if (nbl_need_check(vp)) {
2830 		nbl_start_crit(vp, RW_READER);
2831 		in_crit = 1;
2832 		if (nbl_conflict(vp, NBL_READ, args->offset, args->count, 0)) {
2833 			*cs->statusp = resp->status = NFS4ERR_LOCKED;
2834 			goto out;
2835 		}
2836 	}
2837 
2838 	if ((stat = rfs4_check_stateid(FREAD, vp, &args->stateid, FALSE,
2839 					deleg, TRUE)) != NFS4_OK) {
2840 		*cs->statusp = resp->status = stat;
2841 		goto out;
2842 	}
2843 
2844 	va.va_mask = AT_MODE|AT_SIZE|AT_UID;
2845 	verror = VOP_GETATTR(vp, &va, 0, cs->cr);
2846 
2847 	/*
2848 	 * If we can't get the attributes, then we can't do the
2849 	 * right access checking.  So, we'll fail the request.
2850 	 */
2851 	if (verror) {
2852 		*cs->statusp = resp->status = puterrno4(verror);
2853 		goto out;
2854 	}
2855 
2856 	if (vp->v_type != VREG) {
2857 		*cs->statusp = resp->status =
2858 			((vp->v_type == VDIR) ? NFS4ERR_ISDIR : NFS4ERR_INVAL);
2859 		goto out;
2860 	}
2861 
2862 	if (crgetuid(cs->cr) != va.va_uid &&
2863 	    (error = VOP_ACCESS(vp, VREAD, 0, cs->cr)) &&
2864 	    (error = VOP_ACCESS(vp, VEXEC, 0, cs->cr))) {
2865 		*cs->statusp = resp->status = puterrno4(error);
2866 		goto out;
2867 	}
2868 
2869 	if (MANDLOCK(vp, va.va_mode)) { /* XXX - V4 supports mand locking */
2870 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
2871 		goto out;
2872 	}
2873 
2874 	offset = args->offset;
2875 	if (offset >= va.va_size) {
2876 		*cs->statusp = resp->status = NFS4_OK;
2877 		resp->eof = TRUE;
2878 		resp->data_len = 0;
2879 		resp->data_val = NULL;
2880 		resp->mblk = NULL;
2881 		*cs->statusp = resp->status = NFS4_OK;
2882 		goto out;
2883 	}
2884 
2885 	if (args->count == 0) {
2886 		*cs->statusp = resp->status = NFS4_OK;
2887 		resp->eof = FALSE;
2888 		resp->data_len = 0;
2889 		resp->data_val = NULL;
2890 		resp->mblk = NULL;
2891 		goto out;
2892 	}
2893 
2894 	/*
2895 	 * Do not allocate memory more than maximum allowed
2896 	 * transfer size
2897 	 */
2898 	if (args->count > rfs4_tsize(req))
2899 		args->count = rfs4_tsize(req);
2900 
2901 	/*
2902 	 * mp will contain the data to be sent out in the read reply.
2903 	 * It will be freed after the reply has been sent.
2904 	 * Let's roundup the data to a BYTES_PER_XDR_UNIT multiple,
2905 	 * so that the call to xdrmblk_putmblk() never fails.
2906 	 * If the first alloc of the requested size fails, then
2907 	 * decrease the size to something more reasonable and wait
2908 	 * for the allocation to occur.
2909 	 */
2910 	mp = allocb(RNDUP(args->count), BPRI_MED);
2911 	if (mp == NULL) {
2912 		if (args->count > MAXBSIZE)
2913 			args->count = MAXBSIZE;
2914 		mp = allocb_wait(RNDUP(args->count), BPRI_MED,
2915 				STR_NOSIG, &alloc_err);
2916 	}
2917 	ASSERT(mp != NULL);
2918 	ASSERT(alloc_err == 0);
2919 
2920 	iov.iov_base = (caddr_t)mp->b_datap->db_base;
2921 	iov.iov_len = args->count;
2922 	uio.uio_iov = &iov;
2923 	uio.uio_iovcnt = 1;
2924 	uio.uio_segflg = UIO_SYSSPACE;
2925 	uio.uio_extflg = UIO_COPY_CACHED;
2926 	uio.uio_loffset = args->offset;
2927 	uio.uio_resid = args->count;
2928 
2929 	error = do_io(FREAD, vp, &uio, 0, cs->cr);
2930 
2931 	va.va_mask = AT_SIZE;
2932 	verror = VOP_GETATTR(vp, &va, 0, cs->cr);
2933 
2934 	if (error) {
2935 		freeb(mp);
2936 		*cs->statusp = resp->status = puterrno4(error);
2937 		goto out;
2938 	}
2939 
2940 	*cs->statusp = resp->status = NFS4_OK;
2941 
2942 	ASSERT(uio.uio_resid >= 0);
2943 	resp->data_len = args->count - uio.uio_resid;
2944 	resp->data_val = (char *)mp->b_datap->db_base;
2945 	resp->mblk = mp;
2946 
2947 	if (!verror && offset + resp->data_len == va.va_size)
2948 		resp->eof = TRUE;
2949 	else
2950 		resp->eof = FALSE;
2951 
2952 out:
2953 	if (in_crit)
2954 		nbl_end_crit(vp);
2955 }
2956 
2957 static void
2958 rfs4_op_read_free(nfs_resop4 *resop)
2959 {
2960 	READ4res *resp = &resop->nfs_resop4_u.opread;
2961 
2962 	if (resp->status == NFS4_OK && resp->mblk != NULL) {
2963 		freeb(resp->mblk);
2964 		resp->mblk = NULL;
2965 		resp->data_val = NULL;
2966 		resp->data_len = 0;
2967 	}
2968 }
2969 
2970 static void
2971 rfs4_op_readdir_free(nfs_resop4 *resop)
2972 {
2973 	READDIR4res *resp = &resop->nfs_resop4_u.opreaddir;
2974 
2975 	if (resp->status == NFS4_OK && resp->mblk != NULL) {
2976 		freeb(resp->mblk);
2977 		resp->mblk = NULL;
2978 		resp->data_len = 0;
2979 	}
2980 }
2981 
2982 
2983 /* ARGSUSED */
2984 static void
2985 rfs4_op_putpubfh(nfs_argop4 *args, nfs_resop4 *resop, struct svc_req *req,
2986 	struct compound_state *cs)
2987 {
2988 	PUTPUBFH4res *resp = &resop->nfs_resop4_u.opputpubfh;
2989 	int error;
2990 	vnode_t *vp;
2991 	struct exportinfo *exi, *sav_exi;
2992 	nfs_fh4_fmt_t *fh_fmtp;
2993 
2994 	if (cs->vp) {
2995 		VN_RELE(cs->vp);
2996 		cs->vp = NULL;
2997 	}
2998 
2999 	if (cs->cr)
3000 		crfree(cs->cr);
3001 
3002 	cs->cr = crdup(cs->basecr);
3003 
3004 	vp = exi_public->exi_vp;
3005 	if (vp == NULL) {
3006 		*cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
3007 		return;
3008 	}
3009 
3010 	error = makefh4(&cs->fh, vp, exi_public);
3011 	if (error != 0) {
3012 		*cs->statusp = resp->status = puterrno4(error);
3013 		return;
3014 	}
3015 	sav_exi = cs->exi;
3016 	if (exi_public == exi_root) {
3017 		/*
3018 		 * No filesystem is actually shared public, so we default
3019 		 * to exi_root. In this case, we must check whether root
3020 		 * is exported.
3021 		 */
3022 		fh_fmtp = (nfs_fh4_fmt_t *)cs->fh.nfs_fh4_val;
3023 
3024 		/*
3025 		 * if root filesystem is exported, the exportinfo struct that we
3026 		 * should use is what checkexport4 returns, because root_exi is
3027 		 * actually a mostly empty struct.
3028 		 */
3029 		exi = checkexport4(&fh_fmtp->fh4_fsid,
3030 			(fid_t *)&fh_fmtp->fh4_xlen, NULL);
3031 		cs->exi = ((exi != NULL) ? exi : exi_public);
3032 	} else {
3033 		/*
3034 		 * it's a properly shared filesystem
3035 		 */
3036 		cs->exi = exi_public;
3037 	}
3038 
3039 	VN_HOLD(vp);
3040 	cs->vp = vp;
3041 
3042 	if ((resp->status = call_checkauth4(cs, req)) != NFS4_OK) {
3043 		VN_RELE(cs->vp);
3044 		cs->vp = NULL;
3045 		cs->exi = sav_exi;
3046 		return;
3047 	}
3048 
3049 	*cs->statusp = resp->status = NFS4_OK;
3050 }
3051 
3052 /*
3053  * XXX - issue with put*fh operations. Suppose /export/home is exported.
3054  * Suppose an NFS client goes to mount /export/home/joe. If /export, home,
3055  * or joe have restrictive search permissions, then we shouldn't let
3056  * the client get a file handle. This is easy to enforce. However, we
3057  * don't know what security flavor should be used until we resolve the
3058  * path name. Another complication is uid mapping. If root is
3059  * the user, then it will be mapped to the anonymous user by default,
3060  * but we won't know that till we've resolved the path name. And we won't
3061  * know what the anonymous user is.
3062  * Luckily, SECINFO is specified to take a full filename.
3063  * So what we will have to in rfs4_op_lookup is check that flavor of
3064  * the target object matches that of the request, and if root was the
3065  * caller, check for the root= and anon= options, and if necessary,
3066  * repeat the lookup using the right cred_t. But that's not done yet.
3067  */
3068 /* ARGSUSED */
3069 static void
3070 rfs4_op_putfh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3071 	struct compound_state *cs)
3072 {
3073 	PUTFH4args *args = &argop->nfs_argop4_u.opputfh;
3074 	PUTFH4res *resp = &resop->nfs_resop4_u.opputfh;
3075 	nfs_fh4_fmt_t *fh_fmtp;
3076 
3077 	if (cs->vp) {
3078 		VN_RELE(cs->vp);
3079 		cs->vp = NULL;
3080 	}
3081 
3082 	if (cs->cr) {
3083 		crfree(cs->cr);
3084 		cs->cr = NULL;
3085 	}
3086 
3087 
3088 	if (args->object.nfs_fh4_len < NFS_FH4_LEN) {
3089 		*cs->statusp = resp->status = NFS4ERR_BADHANDLE;
3090 		return;
3091 	}
3092 
3093 	fh_fmtp = (nfs_fh4_fmt_t *)args->object.nfs_fh4_val;
3094 	cs->exi = checkexport4(&fh_fmtp->fh4_fsid, (fid_t *)&fh_fmtp->fh4_xlen,
3095 				NULL);
3096 
3097 	if (cs->exi == NULL) {
3098 		*cs->statusp = resp->status = NFS4ERR_STALE;
3099 		return;
3100 	}
3101 
3102 	cs->cr = crdup(cs->basecr);
3103 
3104 	ASSERT(cs->cr != NULL);
3105 
3106 	if (! (cs->vp = nfs4_fhtovp(&args->object, cs->exi, &resp->status))) {
3107 		*cs->statusp = resp->status;
3108 		return;
3109 	}
3110 
3111 	if ((resp->status = call_checkauth4(cs, req)) != NFS4_OK) {
3112 		VN_RELE(cs->vp);
3113 		cs->vp = NULL;
3114 		return;
3115 	}
3116 
3117 	nfs_fh4_copy(&args->object, &cs->fh);
3118 	*cs->statusp = resp->status = NFS4_OK;
3119 	cs->deleg = FALSE;
3120 }
3121 
3122 /* ARGSUSED */
3123 static void
3124 rfs4_op_putrootfh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3125 	struct compound_state *cs)
3126 
3127 {
3128 	PUTROOTFH4res *resp = &resop->nfs_resop4_u.opputrootfh;
3129 	int error;
3130 	fid_t fid;
3131 	struct exportinfo *exi, *sav_exi;
3132 
3133 	if (cs->vp) {
3134 		VN_RELE(cs->vp);
3135 		cs->vp = NULL;
3136 	}
3137 
3138 	if (cs->cr)
3139 		crfree(cs->cr);
3140 
3141 	cs->cr = crdup(cs->basecr);
3142 
3143 	/*
3144 	 * Using rootdir, the system root vnode,
3145 	 * get its fid.
3146 	 */
3147 	bzero(&fid, sizeof (fid));
3148 	fid.fid_len = MAXFIDSZ;
3149 	error = vop_fid_pseudo(rootdir, &fid);
3150 	if (error != 0) {
3151 		*cs->statusp = resp->status = puterrno4(error);
3152 		return;
3153 	}
3154 
3155 	/*
3156 	 * Then use the root fsid & fid it to find out if it's exported
3157 	 *
3158 	 * If the server root isn't exported directly, then
3159 	 * it should at least be a pseudo export based on
3160 	 * one or more exports further down in the server's
3161 	 * file tree.
3162 	 */
3163 	exi = checkexport4(&rootdir->v_vfsp->vfs_fsid, &fid, NULL);
3164 	if (exi == NULL || exi->exi_export.ex_flags & EX_PUBLIC) {
3165 		NFS4_DEBUG(rfs4_debug,
3166 			(CE_WARN, "rfs4_op_putrootfh: export check failure"));
3167 		*cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
3168 		return;
3169 	}
3170 
3171 	/*
3172 	 * Now make a filehandle based on the root
3173 	 * export and root vnode.
3174 	 */
3175 	error = makefh4(&cs->fh, rootdir, exi);
3176 	if (error != 0) {
3177 		*cs->statusp = resp->status = puterrno4(error);
3178 		return;
3179 	}
3180 
3181 	sav_exi = cs->exi;
3182 	cs->exi = exi;
3183 
3184 	VN_HOLD(rootdir);
3185 	cs->vp = rootdir;
3186 
3187 	if ((resp->status = call_checkauth4(cs, req)) != NFS4_OK) {
3188 		VN_RELE(rootdir);
3189 		cs->vp = NULL;
3190 		cs->exi = sav_exi;
3191 		return;
3192 	}
3193 
3194 	*cs->statusp = resp->status = NFS4_OK;
3195 	cs->deleg = FALSE;
3196 }
3197 
3198 /*
3199  * A directory entry is a valid nfsv4 entry if
3200  * - it has a non-zero ino
3201  * - it is not a dot or dotdot name
3202  * - it is visible in a pseudo export or in a real export that can
3203  *   only have a limited view.
3204  */
3205 static bool_t
3206 valid_nfs4_entry(struct exportinfo *exi, struct dirent64 *dp,
3207 		int *expseudo, int check_visible)
3208 {
3209 	if (dp->d_ino == 0 || NFS_IS_DOTNAME(dp->d_name)) {
3210 		*expseudo = 0;
3211 		return (FALSE);
3212 	}
3213 
3214 	if (! check_visible) {
3215 		*expseudo = 0;
3216 		return (TRUE);
3217 	}
3218 
3219 	return (nfs_visible_inode(exi, dp->d_ino, expseudo));
3220 }
3221 
3222 /*
3223  * set_rdattr_params sets up the variables used to manage what information
3224  * to get for each directory entry.
3225  */
3226 static nfsstat4
3227 set_rdattr_params(struct nfs4_svgetit_arg *sargp,
3228 		bitmap4 attrs, bool_t *need_to_lookup)
3229 {
3230 	uint_t	va_mask;
3231 	nfsstat4 status;
3232 	bitmap4 objbits;
3233 
3234 	status = bitmap4_to_attrmask(attrs, sargp);
3235 	if (status != NFS4_OK) {
3236 		/*
3237 		 * could not even figure attr mask
3238 		 */
3239 		return (status);
3240 	}
3241 	va_mask = sargp->vap->va_mask;
3242 
3243 	/*
3244 	 * dirent's d_ino is always correct value for mounted_on_fileid.
3245 	 * mntdfid_set is set once here, but mounted_on_fileid is
3246 	 * set in main dirent processing loop for each dirent.
3247 	 * The mntdfid_set is a simple optimization that lets the
3248 	 * server attr code avoid work when caller is readdir.
3249 	 */
3250 	sargp->mntdfid_set = TRUE;
3251 
3252 	/*
3253 	 * Lookup entry only if client asked for any of the following:
3254 	 * a) vattr attrs
3255 	 * b) vfs attrs
3256 	 * c) attrs w/per-object scope requested (change, filehandle, etc)
3257 	 *    other than mounted_on_fileid (which we can take from dirent)
3258 	 */
3259 	objbits = attrs ? attrs & NFS4_VP_ATTR_MASK : 0;
3260 
3261 	if (va_mask || sargp->sbp || (objbits & ~FATTR4_MOUNTED_ON_FILEID_MASK))
3262 		*need_to_lookup = TRUE;
3263 	else
3264 		*need_to_lookup = FALSE;
3265 
3266 	if (sargp->sbp == NULL)
3267 		return (NFS4_OK);
3268 
3269 	/*
3270 	 * If filesystem attrs are requested, get them now from the
3271 	 * directory vp, as most entries will have same filesystem. The only
3272 	 * exception are mounted over entries but we handle
3273 	 * those as we go (XXX mounted over detection not yet implemented).
3274 	 */
3275 	sargp->vap->va_mask = 0;	/* to avoid VOP_GETATTR */
3276 	status = bitmap4_get_sysattrs(sargp);
3277 	sargp->vap->va_mask = va_mask;
3278 
3279 	if ((status != NFS4_OK) && sargp->rdattr_error_req) {
3280 		/*
3281 		 * Failed to get filesystem attributes.
3282 		 * Return a rdattr_error for each entry, but don't fail.
3283 		 * However, don't get any obj-dependent attrs.
3284 		 */
3285 		sargp->rdattr_error = status;	/* for rdattr_error */
3286 		*need_to_lookup = FALSE;
3287 		/*
3288 		 * At least get fileid for regular readdir output
3289 		 */
3290 		sargp->vap->va_mask &= AT_NODEID;
3291 		status = NFS4_OK;
3292 	}
3293 
3294 	return (status);
3295 }
3296 
3297 /*
3298  * readlink: args: CURRENT_FH.
3299  *	res: status. If success - CURRENT_FH unchanged, return linktext.
3300  */
3301 
3302 /* ARGSUSED */
3303 static void
3304 rfs4_op_readlink(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3305 	struct compound_state *cs)
3306 {
3307 	READLINK4res *resp = &resop->nfs_resop4_u.opreadlink;
3308 	int error;
3309 	vnode_t *vp;
3310 	struct iovec iov;
3311 	struct vattr va;
3312 	struct uio uio;
3313 	char *data;
3314 
3315 	/* CURRENT_FH: directory */
3316 	vp = cs->vp;
3317 	if (vp == NULL) {
3318 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
3319 		return;
3320 	}
3321 
3322 	if (cs->access == CS_ACCESS_DENIED) {
3323 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
3324 		return;
3325 	}
3326 
3327 	if (vp->v_type == VDIR) {
3328 		*cs->statusp = resp->status = NFS4ERR_ISDIR;
3329 		return;
3330 	}
3331 
3332 	if (vp->v_type != VLNK) {
3333 		*cs->statusp = resp->status = NFS4ERR_INVAL;
3334 		return;
3335 	}
3336 
3337 	va.va_mask = AT_MODE;
3338 	error = VOP_GETATTR(vp, &va, 0, cs->cr);
3339 	if (error) {
3340 		*cs->statusp = resp->status = puterrno4(error);
3341 		return;
3342 	}
3343 
3344 	if (MANDLOCK(vp, va.va_mode)) {
3345 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
3346 		return;
3347 	}
3348 
3349 	data = kmem_alloc(MAXPATHLEN + 1, KM_SLEEP);
3350 
3351 	iov.iov_base = data;
3352 	iov.iov_len = MAXPATHLEN;
3353 	uio.uio_iov = &iov;
3354 	uio.uio_iovcnt = 1;
3355 	uio.uio_segflg = UIO_SYSSPACE;
3356 	uio.uio_extflg = UIO_COPY_CACHED;
3357 	uio.uio_loffset = 0;
3358 	uio.uio_resid = MAXPATHLEN;
3359 
3360 	error = VOP_READLINK(vp, &uio, cs->cr);
3361 
3362 	if (error) {
3363 		kmem_free((caddr_t)data, (uint_t)MAXPATHLEN + 1);
3364 		*cs->statusp = resp->status = puterrno4(error);
3365 		return;
3366 	}
3367 
3368 	*(data + MAXPATHLEN - uio.uio_resid) = '\0';
3369 
3370 	/*
3371 	 * treat link name as data
3372 	 */
3373 	(void) str_to_utf8(data, &resp->link);
3374 
3375 	kmem_free((caddr_t)data, (uint_t)MAXPATHLEN + 1);
3376 	*cs->statusp = resp->status = NFS4_OK;
3377 }
3378 
3379 static void
3380 rfs4_op_readlink_free(nfs_resop4 *resop)
3381 {
3382 	READLINK4res *resp = &resop->nfs_resop4_u.opreadlink;
3383 	utf8string *symlink = &resp->link;
3384 
3385 	if (symlink->utf8string_val) {
3386 		UTF8STRING_FREE(*symlink)
3387 	}
3388 }
3389 
3390 /*
3391  * release_lockowner:
3392  *	Release any state associated with the supplied
3393  *	lockowner. Note if any lo_state is holding locks we will not
3394  *	rele that lo_state and thus the lockowner will not be destroyed.
3395  *	A client using lock after the lock owner stateid has been released
3396  *	will suffer the consequence of NFS4ERR_BAD_STATEID and would have
3397  *	to reissue the lock with new_lock_owner set to TRUE.
3398  *	args: lock_owner
3399  *	res:  status
3400  */
3401 /* ARGSUSED */
3402 static void
3403 rfs4_op_release_lockowner(nfs_argop4 *argop, nfs_resop4 *resop,
3404 	struct svc_req *req, struct compound_state *cs)
3405 {
3406 	RELEASE_LOCKOWNER4args *ap = &argop->nfs_argop4_u.oprelease_lockowner;
3407 	RELEASE_LOCKOWNER4res *resp = &resop->nfs_resop4_u.oprelease_lockowner;
3408 	rfs4_lockowner_t *lo;
3409 	rfs4_openowner_t *oop;
3410 	rfs4_state_t *sp;
3411 	rfs4_lo_state_t *lsp;
3412 	rfs4_client_t *cp;
3413 	bool_t create = FALSE;
3414 	locklist_t *llist;
3415 	sysid_t sysid;
3416 
3417 	/* Make sure there is a clientid around for this request */
3418 	cp = rfs4_findclient_by_id(ap->lock_owner.clientid, FALSE);
3419 
3420 	if (cp == NULL) {
3421 		*cs->statusp = resp->status =
3422 			rfs4_check_clientid(&ap->lock_owner.clientid, 0);
3423 		return;
3424 	}
3425 	rfs4_client_rele(cp);
3426 
3427 	lo = rfs4_findlockowner(&ap->lock_owner, &create);
3428 	if (lo == NULL) {
3429 		*cs->statusp = resp->status = NFS4_OK;
3430 		return;
3431 	}
3432 	ASSERT(lo->client != NULL);
3433 
3434 	/*
3435 	 * Check for EXPIRED client. If so will reap state with in a lease
3436 	 * period or on next set_clientid_confirm step
3437 	 */
3438 	if (rfs4_lease_expired(lo->client)) {
3439 		rfs4_lockowner_rele(lo);
3440 		*cs->statusp = resp->status = NFS4ERR_EXPIRED;
3441 		return;
3442 	}
3443 
3444 	/*
3445 	 * If no sysid has been assigned, then no locks exist; just return.
3446 	 */
3447 	rfs4_dbe_lock(lo->client->dbe);
3448 	if (lo->client->sysidt == LM_NOSYSID) {
3449 		rfs4_lockowner_rele(lo);
3450 		rfs4_dbe_unlock(lo->client->dbe);
3451 		return;
3452 	}
3453 
3454 	sysid = lo->client->sysidt;
3455 	rfs4_dbe_unlock(lo->client->dbe);
3456 
3457 	/*
3458 	 * Mark the lockowner invalid.
3459 	 */
3460 	rfs4_dbe_hide(lo->dbe);
3461 
3462 	/*
3463 	 * sysid-pid pair should now not be used since the lockowner is
3464 	 * invalid. If the client were to instantiate the lockowner again
3465 	 * it would be assigned a new pid. Thus we can get the list of
3466 	 * current locks.
3467 	 */
3468 
3469 	llist = flk_get_active_locks(sysid, lo->pid);
3470 	/* If we are still holding locks fail */
3471 	if (llist != NULL) {
3472 
3473 		*cs->statusp = resp->status = NFS4ERR_LOCKS_HELD;
3474 
3475 		flk_free_locklist(llist);
3476 		/*
3477 		 * We need to unhide the lockowner so the client can
3478 		 * try it again. The bad thing here is if the client
3479 		 * has a logic error that took it here in the first place
3480 		 * he probably has lost accounting of the locks that it
3481 		 * is holding. So we may have dangling state until the
3482 		 * open owner state is reaped via close. One scenario
3483 		 * that could possibly occur is that the client has
3484 		 * sent the unlock request(s) in separate threads
3485 		 * and has not waited for the replies before sending the
3486 		 * RELEASE_LOCKOWNER request. Presumably, it would expect
3487 		 * and deal appropriately with NFS4ERR_LOCKS_HELD, by
3488 		 * reissuing the request.
3489 		 */
3490 		rfs4_dbe_unhide(lo->dbe);
3491 		rfs4_lockowner_rele(lo);
3492 		return;
3493 	}
3494 
3495 	/*
3496 	 * For the corresponding client we need to check each open
3497 	 * owner for any opens that have lockowner state associated
3498 	 * with this lockowner.
3499 	 */
3500 
3501 	rfs4_dbe_lock(lo->client->dbe);
3502 	for (oop = lo->client->openownerlist.next->oop; oop != NULL;
3503 	    oop = oop->openownerlist.next->oop) {
3504 
3505 		rfs4_dbe_lock(oop->dbe);
3506 		for (sp = oop->ownerstateids.next->sp; sp != NULL;
3507 		    sp = sp->ownerstateids.next->sp) {
3508 
3509 			rfs4_dbe_lock(sp->dbe);
3510 			for (lsp = sp->lockownerlist.next->lsp;
3511 			    lsp != NULL; lsp = lsp->lockownerlist.next->lsp) {
3512 				if (lsp->locker == lo) {
3513 					rfs4_dbe_lock(lsp->dbe);
3514 					rfs4_dbe_invalidate(lsp->dbe);
3515 					rfs4_dbe_unlock(lsp->dbe);
3516 				}
3517 			}
3518 			rfs4_dbe_unlock(sp->dbe);
3519 		}
3520 		rfs4_dbe_unlock(oop->dbe);
3521 	}
3522 	rfs4_dbe_unlock(lo->client->dbe);
3523 
3524 	rfs4_lockowner_rele(lo);
3525 
3526 	*cs->statusp = resp->status = NFS4_OK;
3527 }
3528 
3529 /*
3530  * short utility function to lookup a file and recall the delegation
3531  */
3532 static rfs4_file_t *
3533 rfs4_lookup_and_findfile(vnode_t *dvp, char *nm, vnode_t **vpp,
3534 	int *lkup_error, cred_t *cr)
3535 {
3536 	vnode_t *vp;
3537 	rfs4_file_t *fp = NULL;
3538 	bool_t fcreate = FALSE;
3539 	int error;
3540 
3541 	if (vpp)
3542 		*vpp = NULL;
3543 
3544 	if ((error = VOP_LOOKUP(dvp, nm, &vp, NULL, 0, NULL, cr)) == 0) {
3545 		if (vp->v_type == VREG)
3546 			fp = rfs4_findfile(vp, NULL, &fcreate);
3547 		if (vpp)
3548 			*vpp = vp;
3549 		else
3550 			VN_RELE(vp);
3551 	}
3552 
3553 	if (lkup_error)
3554 		*lkup_error = error;
3555 
3556 	return (fp);
3557 }
3558 
3559 /*
3560  * remove: args: CURRENT_FH: directory; name.
3561  *	res: status. If success - CURRENT_FH unchanged, return change_info
3562  *		for directory.
3563  */
3564 /* ARGSUSED */
3565 static void
3566 rfs4_op_remove(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3567 	struct compound_state *cs)
3568 {
3569 	REMOVE4args *args = &argop->nfs_argop4_u.opremove;
3570 	REMOVE4res *resp = &resop->nfs_resop4_u.opremove;
3571 	int error;
3572 	vnode_t *dvp, *vp;
3573 	struct vattr bdva, idva, adva;
3574 	char *nm;
3575 	uint_t len;
3576 	rfs4_file_t *fp;
3577 	int in_crit = 0;
3578 
3579 	/* CURRENT_FH: directory */
3580 	dvp = cs->vp;
3581 	if (dvp == NULL) {
3582 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
3583 		return;
3584 	}
3585 
3586 	if (cs->access == CS_ACCESS_DENIED) {
3587 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
3588 		return;
3589 	}
3590 
3591 	/*
3592 	 * If there is an unshared filesystem mounted on this vnode,
3593 	 * Do not allow to remove anything in this directory.
3594 	 */
3595 	if (vn_ismntpt(dvp)) {
3596 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
3597 		return;
3598 	}
3599 
3600 	if (dvp->v_type != VDIR) {
3601 		*cs->statusp = resp->status = NFS4ERR_NOTDIR;
3602 		return;
3603 	}
3604 
3605 	if (!utf8_dir_verify(&args->target)) {
3606 		*cs->statusp = resp->status = NFS4ERR_INVAL;
3607 		return;
3608 	}
3609 
3610 	/*
3611 	 * Lookup the file so that we can check if it's a directory
3612 	 */
3613 	nm = utf8_to_fn(&args->target, &len, NULL);
3614 	if (nm == NULL) {
3615 		*cs->statusp = resp->status = NFS4ERR_INVAL;
3616 		return;
3617 	}
3618 
3619 	if (len > MAXNAMELEN) {
3620 		*cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
3621 		kmem_free(nm, len);
3622 		return;
3623 	}
3624 
3625 	if (rdonly4(cs->exi, cs->vp, req)) {
3626 		*cs->statusp = resp->status = NFS4ERR_ROFS;
3627 		kmem_free(nm, len);
3628 		return;
3629 	}
3630 
3631 	/*
3632 	 * Lookup the file to determine type and while we are see if
3633 	 * there is a file struct around and check for delegation.
3634 	 * We don't need to acquire va_seq before this lookup, if
3635 	 * it causes an update, cinfo.before will not match, which will
3636 	 * trigger a cache flush even if atomic is TRUE.
3637 	 */
3638 	if (fp = rfs4_lookup_and_findfile(dvp, nm, &vp, &error, cs->cr)) {
3639 		if (rfs4_check_delegated_byfp(FWRITE, fp, TRUE, TRUE, TRUE,
3640 						NULL)) {
3641 			VN_RELE(vp);
3642 			rfs4_file_rele(fp);
3643 			*cs->statusp = resp->status = NFS4ERR_DELAY;
3644 			kmem_free(nm, len);
3645 			return;
3646 		}
3647 	}
3648 
3649 	/* Didn't find anything to remove */
3650 	if (vp == NULL) {
3651 		*cs->statusp = resp->status = error;
3652 		kmem_free(nm, len);
3653 		return;
3654 	}
3655 
3656 	if (nbl_need_check(vp)) {
3657 		nbl_start_crit(vp, RW_READER);
3658 		in_crit = 1;
3659 		if (nbl_conflict(vp, NBL_REMOVE, 0, 0, 0)) {
3660 			*cs->statusp = resp->status = NFS4ERR_FILE_OPEN;
3661 			kmem_free(nm, len);
3662 			nbl_end_crit(vp);
3663 			VN_RELE(vp);
3664 			if (fp) {
3665 				rfs4_clear_dont_grant(fp);
3666 				rfs4_file_rele(fp);
3667 			}
3668 			return;
3669 		}
3670 	}
3671 
3672 	/* Get dir "before" change value */
3673 	bdva.va_mask = AT_CTIME|AT_SEQ;
3674 	error = VOP_GETATTR(dvp, &bdva, 0, cs->cr);
3675 	if (error) {
3676 		*cs->statusp = resp->status = puterrno4(error);
3677 		kmem_free(nm, len);
3678 		return;
3679 	}
3680 	NFS4_SET_FATTR4_CHANGE(resp->cinfo.before, bdva.va_ctime)
3681 
3682 	/* Actually do the REMOVE operation */
3683 	if (vp->v_type == VDIR) {
3684 		/*
3685 		 * Can't remove a directory that has a mounted-on filesystem.
3686 		 */
3687 		if (vn_ismntpt(vp)) {
3688 			error = EACCES;
3689 		} else {
3690 			/*
3691 			 * System V defines rmdir to return EEXIST,
3692 			 * not * ENOTEMPTY, if the directory is not
3693 			 * empty.  A System V NFS server needs to map
3694 			 * NFS4ERR_EXIST to NFS4ERR_NOTEMPTY to
3695 			 * transmit over the wire.
3696 			 */
3697 			if ((error = VOP_RMDIR(dvp, nm, rootdir, cs->cr))
3698 				== EEXIST)
3699 				error = ENOTEMPTY;
3700 		}
3701 	} else {
3702 		if ((error = VOP_REMOVE(dvp, nm, cs->cr)) == 0 &&
3703 			fp != NULL) {
3704 			struct vattr va;
3705 			vnode_t *tvp;
3706 
3707 			rfs4_dbe_lock(fp->dbe);
3708 			tvp = fp->vp;
3709 			if (tvp)
3710 				VN_HOLD(tvp);
3711 			rfs4_dbe_unlock(fp->dbe);
3712 
3713 			if (tvp) {
3714 				/*
3715 				 * This is va_seq safe because we are not
3716 				 * manipulating dvp.
3717 				 */
3718 				va.va_mask = AT_NLINK;
3719 				if (!VOP_GETATTR(tvp, &va, 0, cs->cr) &&
3720 					va.va_nlink == 0) {
3721 					/* Remove state on file remove */
3722 					if (in_crit) {
3723 						nbl_end_crit(vp);
3724 						in_crit = 0;
3725 					}
3726 					rfs4_close_all_state(fp);
3727 				}
3728 				VN_RELE(tvp);
3729 			}
3730 		}
3731 	}
3732 
3733 	if (in_crit)
3734 		nbl_end_crit(vp);
3735 	VN_RELE(vp);
3736 
3737 	if (fp) {
3738 		rfs4_clear_dont_grant(fp);
3739 		rfs4_file_rele(fp);
3740 	}
3741 	kmem_free(nm, len);
3742 
3743 	if (error) {
3744 		*cs->statusp = resp->status = puterrno4(error);
3745 		return;
3746 	}
3747 
3748 	/*
3749 	 * Get the initial "after" sequence number, if it fails, set to zero
3750 	 */
3751 	idva.va_mask = AT_SEQ;
3752 	if (VOP_GETATTR(dvp, &idva, 0, cs->cr))
3753 		idva.va_seq = 0;
3754 
3755 	/*
3756 	 * Force modified data and metadata out to stable storage.
3757 	 */
3758 	(void) VOP_FSYNC(dvp, 0, cs->cr);
3759 
3760 	/*
3761 	 * Get "after" change value, if it fails, simply return the
3762 	 * before value.
3763 	 */
3764 	adva.va_mask = AT_CTIME|AT_SEQ;
3765 	if (VOP_GETATTR(dvp, &adva, 0, cs->cr)) {
3766 		adva.va_ctime = bdva.va_ctime;
3767 		adva.va_seq = 0;
3768 	}
3769 
3770 	NFS4_SET_FATTR4_CHANGE(resp->cinfo.after, adva.va_ctime)
3771 
3772 	/*
3773 	 * The cinfo.atomic = TRUE only if we have
3774 	 * non-zero va_seq's, and it has incremented by exactly one
3775 	 * during the VOP_REMOVE/RMDIR and it didn't change during
3776 	 * the VOP_FSYNC.
3777 	 */
3778 	if (bdva.va_seq && idva.va_seq && adva.va_seq &&
3779 			idva.va_seq == (bdva.va_seq + 1) &&
3780 			idva.va_seq == adva.va_seq)
3781 		resp->cinfo.atomic = TRUE;
3782 	else
3783 		resp->cinfo.atomic = FALSE;
3784 
3785 	*cs->statusp = resp->status = NFS4_OK;
3786 }
3787 
3788 /*
3789  * rename: args: SAVED_FH: from directory, CURRENT_FH: target directory,
3790  *		oldname and newname.
3791  *	res: status. If success - CURRENT_FH unchanged, return change_info
3792  *		for both from and target directories.
3793  */
3794 /* ARGSUSED */
3795 static void
3796 rfs4_op_rename(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3797 	struct compound_state *cs)
3798 {
3799 	RENAME4args *args = &argop->nfs_argop4_u.oprename;
3800 	RENAME4res *resp = &resop->nfs_resop4_u.oprename;
3801 	int error;
3802 	vnode_t *odvp;
3803 	vnode_t *ndvp;
3804 	vnode_t *srcvp, *targvp;
3805 	struct vattr obdva, oidva, oadva;
3806 	struct vattr nbdva, nidva, nadva;
3807 	char *onm, *nnm;
3808 	uint_t olen, nlen;
3809 	rfs4_file_t *fp, *sfp;
3810 	int in_crit_src, in_crit_targ;
3811 	int fp_rele_grant_hold, sfp_rele_grant_hold;
3812 
3813 	fp = sfp = NULL;
3814 	srcvp = targvp = NULL;
3815 	in_crit_src = in_crit_targ = 0;
3816 	fp_rele_grant_hold = sfp_rele_grant_hold = 0;
3817 
3818 	/* CURRENT_FH: target directory */
3819 	ndvp = cs->vp;
3820 	if (ndvp == NULL) {
3821 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
3822 		return;
3823 	}
3824 
3825 	/* SAVED_FH: from directory */
3826 	odvp = cs->saved_vp;
3827 	if (odvp == NULL) {
3828 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
3829 		return;
3830 	}
3831 
3832 	if (cs->access == CS_ACCESS_DENIED) {
3833 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
3834 		return;
3835 	}
3836 
3837 	/*
3838 	 * If there is an unshared filesystem mounted on this vnode,
3839 	 * do not allow to rename objects in this directory.
3840 	 */
3841 	if (vn_ismntpt(odvp)) {
3842 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
3843 		return;
3844 	}
3845 
3846 	/*
3847 	 * If there is an unshared filesystem mounted on this vnode,
3848 	 * do not allow to rename to this directory.
3849 	 */
3850 	if (vn_ismntpt(ndvp)) {
3851 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
3852 		return;
3853 	}
3854 
3855 	if (odvp->v_type != VDIR || ndvp->v_type != VDIR) {
3856 		*cs->statusp = resp->status = NFS4ERR_NOTDIR;
3857 		return;
3858 	}
3859 
3860 	if (cs->saved_exi != cs->exi) {
3861 		*cs->statusp = resp->status = NFS4ERR_XDEV;
3862 		return;
3863 	}
3864 
3865 	if (!utf8_dir_verify(&args->oldname)) {
3866 		*cs->statusp = resp->status = NFS4ERR_INVAL;
3867 		return;
3868 	}
3869 
3870 	if (!utf8_dir_verify(&args->newname)) {
3871 		*cs->statusp = resp->status = NFS4ERR_INVAL;
3872 		return;
3873 	}
3874 
3875 	onm = utf8_to_fn(&args->oldname, &olen, NULL);
3876 	if (onm == NULL) {
3877 		*cs->statusp = resp->status = NFS4ERR_INVAL;
3878 		return;
3879 	}
3880 
3881 	nnm = utf8_to_fn(&args->newname, &nlen, NULL);
3882 	if (nnm == NULL) {
3883 		*cs->statusp = resp->status = NFS4ERR_INVAL;
3884 		kmem_free(onm, olen);
3885 		return;
3886 	}
3887 
3888 	if (olen > MAXNAMELEN || nlen > MAXNAMELEN) {
3889 		*cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
3890 		kmem_free(onm, olen);
3891 		kmem_free(nnm, nlen);
3892 		return;
3893 	}
3894 
3895 
3896 	if (rdonly4(cs->exi, cs->vp, req)) {
3897 		*cs->statusp = resp->status = NFS4ERR_ROFS;
3898 		kmem_free(onm, olen);
3899 		kmem_free(nnm, nlen);
3900 		return;
3901 	}
3902 
3903 	/*
3904 	 * Is the source a file and have a delegation?
3905 	 * We don't need to acquire va_seq before these lookups, if
3906 	 * it causes an update, cinfo.before will not match, which will
3907 	 * trigger a cache flush even if atomic is TRUE.
3908 	 */
3909 	if (sfp = rfs4_lookup_and_findfile(odvp, onm, &srcvp, &error, cs->cr)) {
3910 		if (rfs4_check_delegated_byfp(FWRITE, sfp, TRUE, TRUE, TRUE,
3911 						NULL)) {
3912 			*cs->statusp = resp->status = NFS4ERR_DELAY;
3913 			goto err_out;
3914 		}
3915 	}
3916 
3917 	if (srcvp == NULL) {
3918 		*cs->statusp = resp->status = puterrno4(error);
3919 		kmem_free(onm, olen);
3920 		kmem_free(nnm, nlen);
3921 		return;
3922 	}
3923 
3924 	sfp_rele_grant_hold = 1;
3925 
3926 	/* Does the destination exist and a file and have a delegation? */
3927 	if (fp = rfs4_lookup_and_findfile(ndvp, nnm, &targvp, NULL, cs->cr)) {
3928 		if (rfs4_check_delegated_byfp(FWRITE, fp, TRUE, TRUE, TRUE,
3929 						NULL)) {
3930 			*cs->statusp = resp->status = NFS4ERR_DELAY;
3931 			goto err_out;
3932 		}
3933 	}
3934 	fp_rele_grant_hold = 1;
3935 
3936 
3937 	/* Check for NBMAND lock on both source and target */
3938 	if (nbl_need_check(srcvp)) {
3939 		nbl_start_crit(srcvp, RW_READER);
3940 		in_crit_src = 1;
3941 		if (nbl_conflict(srcvp, NBL_RENAME, 0, 0, 0)) {
3942 			*cs->statusp = resp->status = NFS4ERR_FILE_OPEN;
3943 			goto err_out;
3944 		}
3945 	}
3946 
3947 	if (targvp && nbl_need_check(targvp)) {
3948 		nbl_start_crit(targvp, RW_READER);
3949 		in_crit_targ = 1;
3950 		if (nbl_conflict(targvp, NBL_REMOVE, 0, 0, 0)) {
3951 			*cs->statusp = resp->status = NFS4ERR_FILE_OPEN;
3952 			goto err_out;
3953 		}
3954 	}
3955 
3956 	/* Get source "before" change value */
3957 	obdva.va_mask = AT_CTIME|AT_SEQ;
3958 	error = VOP_GETATTR(odvp, &obdva, 0, cs->cr);
3959 	if (!error) {
3960 		nbdva.va_mask = AT_CTIME|AT_SEQ;
3961 		error = VOP_GETATTR(ndvp, &nbdva, 0, cs->cr);
3962 	}
3963 	if (error) {
3964 		*cs->statusp = resp->status = puterrno4(error);
3965 		goto err_out;
3966 	}
3967 
3968 	NFS4_SET_FATTR4_CHANGE(resp->source_cinfo.before, obdva.va_ctime)
3969 	NFS4_SET_FATTR4_CHANGE(resp->target_cinfo.before, nbdva.va_ctime)
3970 
3971 	if ((error = VOP_RENAME(odvp, onm, ndvp, nnm, cs->cr)) == 0 &&
3972 		fp != NULL) {
3973 		struct vattr va;
3974 		vnode_t *tvp;
3975 
3976 		rfs4_dbe_lock(fp->dbe);
3977 		tvp = fp->vp;
3978 		if (tvp)
3979 			VN_HOLD(tvp);
3980 		rfs4_dbe_unlock(fp->dbe);
3981 
3982 		if (tvp) {
3983 			va.va_mask = AT_NLINK;
3984 			if (!VOP_GETATTR(tvp, &va, 0, cs->cr) &&
3985 				va.va_nlink == 0) {
3986 				/* The file is gone and so should the state */
3987 				if (in_crit_targ) {
3988 					nbl_end_crit(targvp);
3989 					in_crit_targ = 0;
3990 				}
3991 				rfs4_close_all_state(fp);
3992 			}
3993 			VN_RELE(tvp);
3994 		}
3995 	}
3996 
3997 	if (in_crit_src)
3998 		nbl_end_crit(srcvp);
3999 	if (srcvp)
4000 		VN_RELE(srcvp);
4001 	if (in_crit_targ)
4002 		nbl_end_crit(targvp);
4003 	if (targvp)
4004 		VN_RELE(targvp);
4005 
4006 	if (sfp) {
4007 		rfs4_clear_dont_grant(sfp);
4008 		rfs4_file_rele(sfp);
4009 	}
4010 	if (fp) {
4011 		rfs4_clear_dont_grant(fp);
4012 		rfs4_file_rele(fp);
4013 	}
4014 
4015 	kmem_free(onm, olen);
4016 	kmem_free(nnm, nlen);
4017 
4018 	/*
4019 	 * Get the initial "after" sequence number, if it fails, set to zero
4020 	 */
4021 	oidva.va_mask = AT_SEQ;
4022 	if (VOP_GETATTR(odvp, &oidva, 0, cs->cr))
4023 		oidva.va_seq = 0;
4024 
4025 	nidva.va_mask = AT_SEQ;
4026 	if (VOP_GETATTR(ndvp, &nidva, 0, cs->cr))
4027 		nidva.va_seq = 0;
4028 
4029 	/*
4030 	 * Force modified data and metadata out to stable storage.
4031 	 */
4032 	(void) VOP_FSYNC(odvp, 0, cs->cr);
4033 	(void) VOP_FSYNC(ndvp, 0, cs->cr);
4034 
4035 	if (error) {
4036 		*cs->statusp = resp->status = puterrno4(error);
4037 		return;
4038 	}
4039 
4040 	/*
4041 	 * Get "after" change values, if it fails, simply return the
4042 	 * before value.
4043 	 */
4044 	oadva.va_mask = AT_CTIME|AT_SEQ;
4045 	if (VOP_GETATTR(odvp, &oadva, 0, cs->cr)) {
4046 		oadva.va_ctime = obdva.va_ctime;
4047 		oadva.va_seq = 0;
4048 	}
4049 
4050 	nadva.va_mask = AT_CTIME|AT_SEQ;
4051 	if (VOP_GETATTR(odvp, &nadva, 0, cs->cr)) {
4052 		nadva.va_ctime = nbdva.va_ctime;
4053 		nadva.va_seq = 0;
4054 	}
4055 
4056 	NFS4_SET_FATTR4_CHANGE(resp->source_cinfo.after, oadva.va_ctime)
4057 	NFS4_SET_FATTR4_CHANGE(resp->target_cinfo.after, nadva.va_ctime)
4058 
4059 	/*
4060 	 * The cinfo.atomic = TRUE only if we have
4061 	 * non-zero va_seq's, and it has incremented by exactly one
4062 	 * during the VOP_RENAME and it didn't change during the VOP_FSYNC.
4063 	 */
4064 	if (obdva.va_seq && oidva.va_seq && oadva.va_seq &&
4065 			oidva.va_seq == (obdva.va_seq + 1) &&
4066 			oidva.va_seq == oadva.va_seq)
4067 		resp->source_cinfo.atomic = TRUE;
4068 	else
4069 		resp->source_cinfo.atomic = FALSE;
4070 
4071 	if (nbdva.va_seq && nidva.va_seq && nadva.va_seq &&
4072 			nidva.va_seq == (nbdva.va_seq + 1) &&
4073 			nidva.va_seq == nadva.va_seq)
4074 		resp->target_cinfo.atomic = TRUE;
4075 	else
4076 		resp->target_cinfo.atomic = FALSE;
4077 
4078 #ifdef	VOLATILE_FH_TEST
4079 	{
4080 	extern void add_volrnm_fh(struct exportinfo *, vnode_t *);
4081 
4082 	/*
4083 	 * Add the renamed file handle to the volatile rename list
4084 	 */
4085 	if (cs->exi->exi_export.ex_flags & EX_VOLRNM) {
4086 		/* file handles may expire on rename */
4087 		vnode_t *vp;
4088 
4089 		nnm = utf8_to_fn(&args->newname, &nlen, NULL);
4090 		/*
4091 		 * Already know that nnm will be a valid string
4092 		 */
4093 		error = VOP_LOOKUP(ndvp, nnm, &vp, NULL, 0, NULL, cs->cr);
4094 		kmem_free(nnm, nlen);
4095 		if (!error) {
4096 			add_volrnm_fh(cs->exi, vp);
4097 			VN_RELE(vp);
4098 		}
4099 	}
4100 	}
4101 #endif	/* VOLATILE_FH_TEST */
4102 
4103 	*cs->statusp = resp->status = NFS4_OK;
4104 	return;
4105 
4106 err_out:
4107 	kmem_free(onm, olen);
4108 	kmem_free(nnm, nlen);
4109 
4110 	if (in_crit_src) nbl_end_crit(srcvp);
4111 	if (in_crit_targ) nbl_end_crit(targvp);
4112 	if (targvp) VN_RELE(targvp);
4113 	if (srcvp) VN_RELE(srcvp);
4114 	if (sfp) {
4115 		if (sfp_rele_grant_hold) rfs4_clear_dont_grant(sfp);
4116 		rfs4_file_rele(sfp);
4117 	}
4118 	if (fp) {
4119 		if (fp_rele_grant_hold) rfs4_clear_dont_grant(fp);
4120 		rfs4_file_rele(fp);
4121 	}
4122 }
4123 
4124 /* ARGSUSED */
4125 static void
4126 rfs4_op_renew(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
4127 	struct compound_state *cs)
4128 {
4129 	RENEW4args *args = &argop->nfs_argop4_u.oprenew;
4130 	RENEW4res *resp = &resop->nfs_resop4_u.oprenew;
4131 	rfs4_client_t *cp;
4132 
4133 	if ((cp = rfs4_findclient_by_id(args->clientid, FALSE)) == NULL) {
4134 		*cs->statusp = resp->status =
4135 			rfs4_check_clientid(&args->clientid, 0);
4136 		return;
4137 	}
4138 
4139 	if (rfs4_lease_expired(cp)) {
4140 		rfs4_client_rele(cp);
4141 		*cs->statusp = resp->status = NFS4ERR_EXPIRED;
4142 		return;
4143 	}
4144 
4145 	rfs4_update_lease(cp);
4146 
4147 	mutex_enter(cp->cbinfo.cb_lock);
4148 	if (cp->cbinfo.cb_notified_of_cb_path_down == FALSE) {
4149 		cp->cbinfo.cb_notified_of_cb_path_down = TRUE;
4150 		*cs->statusp = resp->status = NFS4ERR_CB_PATH_DOWN;
4151 	} else {
4152 		*cs->statusp = resp->status = NFS4_OK;
4153 	}
4154 	mutex_exit(cp->cbinfo.cb_lock);
4155 
4156 	rfs4_client_rele(cp);
4157 
4158 }
4159 
4160 /* ARGSUSED */
4161 static void
4162 rfs4_op_restorefh(nfs_argop4 *args, nfs_resop4 *resop, struct svc_req *req,
4163 	struct compound_state *cs)
4164 {
4165 	RESTOREFH4res *resp = &resop->nfs_resop4_u.oprestorefh;
4166 
4167 	/* No need to check cs->access - we are not accessing any object */
4168 	if ((cs->saved_vp == NULL) || (cs->saved_fh.nfs_fh4_val == NULL)) {
4169 		*cs->statusp = resp->status = NFS4ERR_RESTOREFH;
4170 		return;
4171 	}
4172 	if (cs->vp != NULL) {
4173 		VN_RELE(cs->vp);
4174 	}
4175 	cs->vp = cs->saved_vp;
4176 	cs->saved_vp = NULL;
4177 	cs->exi = cs->saved_exi;
4178 	nfs_fh4_copy(&cs->saved_fh, &cs->fh);
4179 	*cs->statusp = resp->status = NFS4_OK;
4180 	cs->deleg = FALSE;
4181 }
4182 
4183 /* ARGSUSED */
4184 static void
4185 rfs4_op_savefh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
4186 	struct compound_state *cs)
4187 {
4188 	SAVEFH4res *resp = &resop->nfs_resop4_u.opsavefh;
4189 
4190 	/* No need to check cs->access - we are not accessing any object */
4191 	if (cs->vp == NULL) {
4192 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
4193 		return;
4194 	}
4195 	if (cs->saved_vp != NULL) {
4196 		VN_RELE(cs->saved_vp);
4197 	}
4198 	cs->saved_vp = cs->vp;
4199 	VN_HOLD(cs->saved_vp);
4200 	cs->saved_exi = cs->exi;
4201 	/*
4202 	 * since SAVEFH is fairly rare, don't alloc space for its fh
4203 	 * unless necessary.
4204 	 */
4205 	if (cs->saved_fh.nfs_fh4_val == NULL) {
4206 		cs->saved_fh.nfs_fh4_val = kmem_alloc(NFS4_FHSIZE, KM_SLEEP);
4207 	}
4208 	nfs_fh4_copy(&cs->fh, &cs->saved_fh);
4209 	*cs->statusp = resp->status = NFS4_OK;
4210 }
4211 
4212 /*
4213  * rfs4_verify_attr is called when nfsv4 Setattr failed, but we wish to
4214  * return the bitmap of attrs that were set successfully. It is also
4215  * called by Verify/Nverify to test the vattr/vfsstat attrs. It should
4216  * always be called only after rfs4_do_set_attrs().
4217  *
4218  * Verify that the attributes are same as the expected ones. sargp->vap
4219  * and sargp->sbp contain the input attributes as translated from fattr4.
4220  *
4221  * This function verifies only the attrs that correspond to a vattr or
4222  * vfsstat struct. That is because of the extra step needed to get the
4223  * corresponding system structs. Other attributes have already been set or
4224  * verified by do_rfs4_set_attrs.
4225  *
4226  * Return 0 if all attrs match, -1 if some don't, error if error processing.
4227  */
4228 static int
4229 rfs4_verify_attr(struct nfs4_svgetit_arg *sargp,
4230 	bitmap4 *resp, struct nfs4_ntov_table *ntovp)
4231 {
4232 	int error, ret_error = 0;
4233 	int i, k;
4234 	uint_t sva_mask = sargp->vap->va_mask;
4235 	uint_t vbit;
4236 	union nfs4_attr_u *na;
4237 	uint8_t *amap;
4238 	bool_t getsb = ntovp->vfsstat;
4239 
4240 	if (sva_mask != 0) {
4241 		/*
4242 		 * Okay to overwrite sargp->vap because we verify based
4243 		 * on the incoming values.
4244 		 */
4245 		ret_error = VOP_GETATTR(sargp->cs->vp, sargp->vap, 0,
4246 				sargp->cs->cr);
4247 		if (ret_error) {
4248 			if (resp == NULL)
4249 				return (ret_error);
4250 			/*
4251 			 * Must return bitmap of successful attrs
4252 			 */
4253 			sva_mask = 0;	/* to prevent checking vap later */
4254 		} else {
4255 			/*
4256 			 * Some file systems clobber va_mask. it is probably
4257 			 * wrong of them to do so, nonethless we practice
4258 			 * defensive coding.
4259 			 * See bug id 4276830.
4260 			 */
4261 			sargp->vap->va_mask = sva_mask;
4262 		}
4263 	}
4264 
4265 	if (getsb) {
4266 		/*
4267 		 * Now get the superblock and loop on the bitmap, as there is
4268 		 * no simple way of translating from superblock to bitmap4.
4269 		 */
4270 		ret_error = VFS_STATVFS(sargp->cs->vp->v_vfsp, sargp->sbp);
4271 		if (ret_error) {
4272 			if (resp == NULL)
4273 				goto errout;
4274 			getsb = FALSE;
4275 		}
4276 	}
4277 
4278 	/*
4279 	 * Now loop and verify each attribute which getattr returned
4280 	 * whether it's the same as the input.
4281 	 */
4282 	if (resp == NULL && !getsb && (sva_mask == 0))
4283 		goto errout;
4284 
4285 	na = ntovp->na;
4286 	amap = ntovp->amap;
4287 	k = 0;
4288 	for (i = 0; i < ntovp->attrcnt; i++, na++, amap++) {
4289 		k = *amap;
4290 		ASSERT(nfs4_ntov_map[k].nval == k);
4291 		vbit = nfs4_ntov_map[k].vbit;
4292 
4293 		/*
4294 		 * If vattr attribute but VOP_GETATTR failed, or it's
4295 		 * superblock attribute but VFS_STATVFS failed, skip
4296 		 */
4297 		if (vbit) {
4298 			if ((vbit & sva_mask) == 0)
4299 				continue;
4300 		} else if (!(getsb && nfs4_ntov_map[k].vfsstat)) {
4301 			continue;
4302 		}
4303 		error = (*nfs4_ntov_map[k].sv_getit)(
4304 				NFS4ATTR_VERIT, sargp, na);
4305 		if (resp != NULL) {
4306 			if (error)
4307 				ret_error = -1;	/* not all match */
4308 			else	/* update response bitmap */
4309 				*resp |= nfs4_ntov_map[k].fbit;
4310 			continue;
4311 		}
4312 		if (error) {
4313 			ret_error = -1;	/* not all match */
4314 			break;
4315 		}
4316 	}
4317 errout:
4318 	return (ret_error);
4319 }
4320 
4321 /*
4322  * Decode the attribute to be set/verified. If the attr requires a sys op
4323  * (VOP_GETATTR, VFS_VFSSTAT), and the request is to verify, then don't
4324  * call the sv_getit function for it, because the sys op hasn't yet been done.
4325  * Return 0 for success, error code if failed.
4326  *
4327  * Note: the decoded arg is not freed here but in nfs4_ntov_table_free.
4328  */
4329 static int
4330 decode_fattr4_attr(nfs4_attr_cmd_t cmd, struct nfs4_svgetit_arg *sargp,
4331 	int k, XDR *xdrp, bitmap4 *resp_bval, union nfs4_attr_u *nap)
4332 {
4333 	int error = 0;
4334 	bool_t set_later;
4335 
4336 	sargp->vap->va_mask |= nfs4_ntov_map[k].vbit;
4337 
4338 	if ((*nfs4_ntov_map[k].xfunc)(xdrp, nap)) {
4339 		set_later = nfs4_ntov_map[k].vbit || nfs4_ntov_map[k].vfsstat;
4340 		/*
4341 		 * don't verify yet if a vattr or sb dependent attr,
4342 		 * because we don't have their sys values yet.
4343 		 * Will be done later.
4344 		 */
4345 		if (! (set_later && (cmd == NFS4ATTR_VERIT))) {
4346 			/*
4347 			 * ACLs are a special case, since setting the MODE
4348 			 * conflicts with setting the ACL.  We delay setting
4349 			 * the ACL until all other attributes have been set.
4350 			 * The ACL gets set in do_rfs4_op_setattr().
4351 			 */
4352 			if (nfs4_ntov_map[k].fbit != FATTR4_ACL_MASK) {
4353 				error = (*nfs4_ntov_map[k].sv_getit)(cmd,
4354 				    sargp, nap);
4355 				if (error) {
4356 					xdr_free(nfs4_ntov_map[k].xfunc,
4357 					    (caddr_t)nap);
4358 				}
4359 			}
4360 		}
4361 	} else {
4362 #ifdef  DEBUG
4363 		cmn_err(CE_NOTE, "decode_fattr4_attr: error "
4364 			"decoding attribute %d\n", k);
4365 #endif
4366 		error = EINVAL;
4367 	}
4368 	if (!error && resp_bval && !set_later) {
4369 		*resp_bval |= nfs4_ntov_map[k].fbit;
4370 	}
4371 
4372 	return (error);
4373 }
4374 
4375 /*
4376  * Set vattr based on incoming fattr4 attrs - used by setattr.
4377  * Set response mask. Ignore any values that are not writable vattr attrs.
4378  */
4379 static nfsstat4
4380 do_rfs4_set_attrs(bitmap4 *resp, fattr4 *fattrp, struct compound_state *cs,
4381 		struct nfs4_svgetit_arg *sargp, struct nfs4_ntov_table *ntovp,
4382 		nfs4_attr_cmd_t cmd)
4383 {
4384 	int error = 0;
4385 	int i;
4386 	char *attrs = fattrp->attrlist4;
4387 	uint32_t attrslen = fattrp->attrlist4_len;
4388 	XDR xdr;
4389 	nfsstat4 status = NFS4_OK;
4390 	vnode_t *vp = cs->vp;
4391 	union nfs4_attr_u *na;
4392 	uint8_t *amap;
4393 
4394 #ifndef lint
4395 	/*
4396 	 * Make sure that maximum attribute number can be expressed as an
4397 	 * 8 bit quantity.
4398 	 */
4399 	ASSERT(NFS4_MAXNUM_ATTRS <= (UINT8_MAX + 1));
4400 #endif
4401 
4402 	if (vp == NULL) {
4403 		if (resp)
4404 			*resp = 0;
4405 		return (NFS4ERR_NOFILEHANDLE);
4406 	}
4407 	if (cs->access == CS_ACCESS_DENIED) {
4408 		if (resp)
4409 			*resp = 0;
4410 		return (NFS4ERR_ACCESS);
4411 	}
4412 
4413 	sargp->op = cmd;
4414 	sargp->cs = cs;
4415 	sargp->flag = 0;	/* may be set later */
4416 	sargp->vap->va_mask = 0;
4417 	sargp->rdattr_error = NFS4_OK;
4418 	sargp->rdattr_error_req = FALSE;
4419 	/* sargp->sbp is set by the caller */
4420 
4421 	xdrmem_create(&xdr, attrs, attrslen, XDR_DECODE);
4422 
4423 	na = ntovp->na;
4424 	amap = ntovp->amap;
4425 
4426 	/*
4427 	 * The following loop iterates on the nfs4_ntov_map checking
4428 	 * if the fbit is set in the requested bitmap.
4429 	 * If set then we process the arguments using the
4430 	 * rfs4_fattr4 conversion functions to populate the setattr
4431 	 * vattr and va_mask. Any settable attrs that are not using vattr
4432 	 * will be set in this loop.
4433 	 */
4434 	for (i = 0; i < nfs4_ntov_map_size; i++) {
4435 		if (!(fattrp->attrmask & nfs4_ntov_map[i].fbit)) {
4436 			continue;
4437 		}
4438 		/*
4439 		 * If setattr, must be a writable attr.
4440 		 * If verify/nverify, must be a readable attr.
4441 		 */
4442 		if ((error = (*nfs4_ntov_map[i].sv_getit)(
4443 				    NFS4ATTR_SUPPORTED, sargp, NULL)) != 0) {
4444 			/*
4445 			 * Client tries to set/verify an
4446 			 * unsupported attribute, tries to set
4447 			 * a read only attr or verify a write
4448 			 * only one - error!
4449 			 */
4450 			break;
4451 		}
4452 		/*
4453 		 * Decode the attribute to set/verify
4454 		 */
4455 		error = decode_fattr4_attr(cmd, sargp, nfs4_ntov_map[i].nval,
4456 					&xdr, resp ? resp : NULL, na);
4457 		if (error)
4458 			break;
4459 		*amap++ = (uint8_t)nfs4_ntov_map[i].nval;
4460 		na++;
4461 		(ntovp->attrcnt)++;
4462 		if (nfs4_ntov_map[i].vfsstat)
4463 			ntovp->vfsstat = TRUE;
4464 	}
4465 
4466 	if (error != 0)
4467 		status = (error == ENOTSUP ?	NFS4ERR_ATTRNOTSUPP :
4468 						puterrno4(error));
4469 	/* xdrmem_destroy(&xdrs); */	/* NO-OP */
4470 	return (status);
4471 }
4472 
4473 static nfsstat4
4474 do_rfs4_op_setattr(bitmap4 *resp, fattr4 *fattrp, struct compound_state *cs,
4475 		stateid4 *stateid)
4476 {
4477 	int error = 0;
4478 	struct nfs4_svgetit_arg sarg;
4479 	bool_t trunc;
4480 
4481 	nfsstat4 status = NFS4_OK;
4482 	cred_t *cr = cs->cr;
4483 	vnode_t *vp = cs->vp;
4484 	struct nfs4_ntov_table ntov;
4485 	struct statvfs64 sb;
4486 	struct vattr bva;
4487 	struct flock64 bf;
4488 	int in_crit = 0;
4489 	uint_t saved_mask = 0;
4490 	caller_context_t ct;
4491 
4492 	*resp = 0;
4493 	sarg.sbp = &sb;
4494 	nfs4_ntov_table_init(&ntov);
4495 	status = do_rfs4_set_attrs(resp, fattrp, cs, &sarg, &ntov,
4496 			NFS4ATTR_SETIT);
4497 	if (status != NFS4_OK) {
4498 		/*
4499 		 * failed set attrs
4500 		 */
4501 		goto done;
4502 	}
4503 	if ((sarg.vap->va_mask == 0) &&
4504 	    (! (fattrp->attrmask & FATTR4_ACL_MASK))) {
4505 		/*
4506 		 * no further work to be done
4507 		 */
4508 		goto done;
4509 	}
4510 
4511 	/*
4512 	 * If we got a request to set the ACL and the MODE, only
4513 	 * allow changing VSUID, VSGID, and VSVTX.  Attempting
4514 	 * to change any other bits, along with setting an ACL,
4515 	 * gives NFS4ERR_INVAL.
4516 	 */
4517 	if ((fattrp->attrmask & FATTR4_ACL_MASK) &&
4518 	    (fattrp->attrmask & FATTR4_MODE_MASK)) {
4519 		vattr_t va;
4520 
4521 		va.va_mask = AT_MODE;
4522 		error = VOP_GETATTR(vp, &va, 0, cs->cr);
4523 		if (error) {
4524 			status = puterrno4(error);
4525 			goto done;
4526 		}
4527 		if ((sarg.vap->va_mode ^ va.va_mode) &
4528 		    ~(VSUID | VSGID | VSVTX)) {
4529 			status = NFS4ERR_INVAL;
4530 			goto done;
4531 		}
4532 	}
4533 
4534 	/* Check stateid only if size has been set */
4535 	if (sarg.vap->va_mask & AT_SIZE) {
4536 		trunc = (sarg.vap->va_size == 0);
4537 		status = rfs4_check_stateid(FWRITE, cs->vp, stateid,
4538 			trunc, &cs->deleg, sarg.vap->va_mask & AT_SIZE);
4539 		if (status != NFS4_OK)
4540 			goto done;
4541 	}
4542 
4543 	ct.cc_sysid = 0;
4544 	ct.cc_pid = 0;
4545 	ct.cc_caller_id = nfs4_srv_caller_id;
4546 
4547 	/* XXX start of possible race with delegations */
4548 
4549 	/*
4550 	 * We need to specially handle size changes because it is
4551 	 * possible for the client to create a file with read-only
4552 	 * modes, but with the file opened for writing. If the client
4553 	 * then tries to set the file size, e.g. ftruncate(3C),
4554 	 * fcntl(F_FREESP), the normal access checking done in
4555 	 * VOP_SETATTR would prevent the client from doing it even though
4556 	 * it should be allowed to do so.  To get around this, we do the
4557 	 * access checking for ourselves and use VOP_SPACE which doesn't
4558 	 * do the access checking.
4559 	 * Also the client should not be allowed to change the file
4560 	 * size if there is a conflicting non-blocking mandatory lock in
4561 	 * the region of the change.
4562 	 */
4563 	if (vp->v_type == VREG && (sarg.vap->va_mask & AT_SIZE)) {
4564 		u_offset_t offset;
4565 		ssize_t length;
4566 
4567 		/*
4568 		 * ufs_setattr clears AT_SIZE from vap->va_mask, but
4569 		 * before returning, sarg.vap->va_mask is used to
4570 		 * generate the setattr reply bitmap.  We also clear
4571 		 * AT_SIZE below before calling VOP_SPACE.  For both
4572 		 * of these cases, the va_mask needs to be saved here
4573 		 * and restored after calling VOP_SETATTR.
4574 		 */
4575 		saved_mask = sarg.vap->va_mask;
4576 
4577 		/*
4578 		 * Check any possible conflict due to NBMAND locks.
4579 		 * Get into critical region before VOP_GETATTR, so the
4580 		 * size attribute is valid when checking conflicts.
4581 		 */
4582 		if (nbl_need_check(vp)) {
4583 			nbl_start_crit(vp, RW_READER);
4584 			in_crit = 1;
4585 		}
4586 
4587 		bva.va_mask = AT_UID|AT_SIZE;
4588 		if (error = VOP_GETATTR(vp, &bva, 0, cr)) {
4589 			status = puterrno4(error);
4590 			goto done;
4591 		}
4592 
4593 		if (in_crit) {
4594 			if (sarg.vap->va_size < bva.va_size) {
4595 				offset = sarg.vap->va_size;
4596 				length = bva.va_size - sarg.vap->va_size;
4597 			} else {
4598 				offset = bva.va_size;
4599 				length = sarg.vap->va_size - bva.va_size;
4600 			}
4601 			if (nbl_conflict(vp, NBL_WRITE, offset, length, 0)) {
4602 				status = NFS4ERR_LOCKED;
4603 				goto done;
4604 			}
4605 		}
4606 
4607 		if (crgetuid(cr) == bva.va_uid) {
4608 			sarg.vap->va_mask &= ~AT_SIZE;
4609 			bf.l_type = F_WRLCK;
4610 			bf.l_whence = 0;
4611 			bf.l_start = (off64_t)sarg.vap->va_size;
4612 			bf.l_len = 0;
4613 			bf.l_sysid = 0;
4614 			bf.l_pid = 0;
4615 			error = VOP_SPACE(vp, F_FREESP, &bf, FWRITE,
4616 					(offset_t)sarg.vap->va_size, cr, &ct);
4617 		}
4618 	}
4619 
4620 	if (!error && sarg.vap->va_mask != 0)
4621 		error = VOP_SETATTR(vp, sarg.vap, sarg.flag, cr, &ct);
4622 
4623 	/* restore va_mask -- ufs_setattr clears AT_SIZE */
4624 	if (saved_mask & AT_SIZE)
4625 		sarg.vap->va_mask |= AT_SIZE;
4626 
4627 	/*
4628 	 * If an ACL was being set, it has been delayed until now,
4629 	 * in order to set the mode (via the VOP_SETATTR() above) first.
4630 	 */
4631 	if ((! error) && (fattrp->attrmask & FATTR4_ACL_MASK)) {
4632 		int i;
4633 
4634 		for (i = 0; i < NFS4_MAXNUM_ATTRS; i++)
4635 			if (ntov.amap[i] == FATTR4_ACL)
4636 				break;
4637 		if (i < NFS4_MAXNUM_ATTRS) {
4638 			error = (*nfs4_ntov_map[FATTR4_ACL].sv_getit)(
4639 			    NFS4ATTR_SETIT, &sarg, &ntov.na[i]);
4640 			if (error == 0) {
4641 				*resp |= FATTR4_ACL_MASK;
4642 			} else if (error == ENOTSUP) {
4643 				(void) rfs4_verify_attr(&sarg, resp, &ntov);
4644 				status = NFS4ERR_ATTRNOTSUPP;
4645 				goto done;
4646 			}
4647 		} else {
4648 			NFS4_DEBUG(rfs4_debug,
4649 			    (CE_NOTE, "do_rfs4_op_setattr: "
4650 			    "unable to find ACL in fattr4"));
4651 			error = EINVAL;
4652 		}
4653 	}
4654 
4655 	if (error) {
4656 		status = puterrno4(error);
4657 
4658 		/*
4659 		 * Set the response bitmap when setattr failed.
4660 		 * If VOP_SETATTR partially succeeded, test by doing a
4661 		 * VOP_GETATTR on the object and comparing the data
4662 		 * to the setattr arguments.
4663 		 */
4664 		(void) rfs4_verify_attr(&sarg, resp, &ntov);
4665 	} else {
4666 		/*
4667 		 * Force modified metadata out to stable storage.
4668 		 */
4669 		(void) VOP_FSYNC(vp, FNODSYNC, cr);
4670 		/*
4671 		 * Set response bitmap
4672 		 */
4673 		nfs4_vmask_to_nmask_set(sarg.vap->va_mask, resp);
4674 	}
4675 
4676 /* Return early and already have a NFSv4 error */
4677 done:
4678 	/*
4679 	 * Except for nfs4_vmask_to_nmask_set(), vattr --> fattr
4680 	 * conversion sets both readable and writeable NFS4 attrs
4681 	 * for AT_MTIME and AT_ATIME.  The line below masks out
4682 	 * unrequested attrs from the setattr result bitmap.  This
4683 	 * is placed after the done: label to catch the ATTRNOTSUP
4684 	 * case.
4685 	 */
4686 	*resp &= fattrp->attrmask;
4687 
4688 	if (in_crit)
4689 		nbl_end_crit(vp);
4690 
4691 	nfs4_ntov_table_free(&ntov, &sarg);
4692 
4693 	return (status);
4694 }
4695 
4696 /* ARGSUSED */
4697 static void
4698 rfs4_op_setattr(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
4699 	struct compound_state *cs)
4700 {
4701 	SETATTR4args *args = &argop->nfs_argop4_u.opsetattr;
4702 	SETATTR4res *resp = &resop->nfs_resop4_u.opsetattr;
4703 
4704 	if (cs->vp == NULL) {
4705 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
4706 		return;
4707 	}
4708 
4709 	/*
4710 	 * If there is an unshared filesystem mounted on this vnode,
4711 	 * do not allow to setattr on this vnode.
4712 	 */
4713 	if (vn_ismntpt(cs->vp)) {
4714 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
4715 		return;
4716 	}
4717 
4718 	resp->attrsset = 0;
4719 
4720 	if (rdonly4(cs->exi, cs->vp, req)) {
4721 		*cs->statusp = resp->status = NFS4ERR_ROFS;
4722 		return;
4723 	}
4724 
4725 	*cs->statusp = resp->status =
4726 		do_rfs4_op_setattr(&resp->attrsset, &args->obj_attributes, cs,
4727 			&args->stateid);
4728 }
4729 
4730 /* ARGSUSED */
4731 static void
4732 rfs4_op_verify(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
4733 	struct compound_state *cs)
4734 {
4735 	/*
4736 	 * verify and nverify are exactly the same, except that nverify
4737 	 * succeeds when some argument changed, and verify succeeds when
4738 	 * when none changed.
4739 	 */
4740 
4741 	VERIFY4args  *args = &argop->nfs_argop4_u.opverify;
4742 	VERIFY4res *resp = &resop->nfs_resop4_u.opverify;
4743 
4744 	int error;
4745 	struct nfs4_svgetit_arg sarg;
4746 	struct statvfs64 sb;
4747 	struct nfs4_ntov_table ntov;
4748 
4749 	if (cs->vp == NULL) {
4750 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
4751 		return;
4752 	}
4753 
4754 	sarg.sbp = &sb;
4755 	nfs4_ntov_table_init(&ntov);
4756 	resp->status = do_rfs4_set_attrs(NULL, &args->obj_attributes, cs,
4757 				&sarg, &ntov, NFS4ATTR_VERIT);
4758 	if (resp->status != NFS4_OK) {
4759 		/*
4760 		 * do_rfs4_set_attrs will try to verify systemwide attrs,
4761 		 * so could return -1 for "no match".
4762 		 */
4763 		if (resp->status == -1)
4764 			resp->status = NFS4ERR_NOT_SAME;
4765 		goto done;
4766 	}
4767 	error = rfs4_verify_attr(&sarg, NULL, &ntov);
4768 	switch (error) {
4769 	case 0:
4770 		resp->status = NFS4_OK;
4771 		break;
4772 	case -1:
4773 		resp->status = NFS4ERR_NOT_SAME;
4774 		break;
4775 	default:
4776 		resp->status = puterrno4(error);
4777 		break;
4778 	}
4779 done:
4780 	*cs->statusp = resp->status;
4781 	nfs4_ntov_table_free(&ntov, &sarg);
4782 }
4783 
4784 /* ARGSUSED */
4785 static void
4786 rfs4_op_nverify(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
4787 	struct compound_state *cs)
4788 {
4789 	/*
4790 	 * verify and nverify are exactly the same, except that nverify
4791 	 * succeeds when some argument changed, and verify succeeds when
4792 	 * when none changed.
4793 	 */
4794 
4795 	NVERIFY4args  *args = &argop->nfs_argop4_u.opnverify;
4796 	NVERIFY4res *resp = &resop->nfs_resop4_u.opnverify;
4797 
4798 	int error;
4799 	struct nfs4_svgetit_arg sarg;
4800 	struct statvfs64 sb;
4801 	struct nfs4_ntov_table ntov;
4802 
4803 	if (cs->vp == NULL) {
4804 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
4805 		return;
4806 	}
4807 	sarg.sbp = &sb;
4808 	nfs4_ntov_table_init(&ntov);
4809 	resp->status = do_rfs4_set_attrs(NULL, &args->obj_attributes, cs,
4810 				&sarg, &ntov, NFS4ATTR_VERIT);
4811 	if (resp->status != NFS4_OK) {
4812 		/*
4813 		 * do_rfs4_set_attrs will try to verify systemwide attrs,
4814 		 * so could return -1 for "no match".
4815 		 */
4816 		if (resp->status == -1)
4817 			resp->status = NFS4_OK;
4818 		goto done;
4819 	}
4820 	error = rfs4_verify_attr(&sarg, NULL, &ntov);
4821 	switch (error) {
4822 	case 0:
4823 		resp->status = NFS4ERR_SAME;
4824 		break;
4825 	case -1:
4826 		resp->status = NFS4_OK;
4827 		break;
4828 	default:
4829 		resp->status = puterrno4(error);
4830 		break;
4831 	}
4832 done:
4833 	*cs->statusp = resp->status;
4834 	nfs4_ntov_table_free(&ntov, &sarg);
4835 }
4836 
4837 /*
4838  * XXX - This should live in an NFS header file.
4839  */
4840 #define	MAX_IOVECS	12
4841 
4842 /* ARGSUSED */
4843 static void
4844 rfs4_op_write(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
4845 	struct compound_state *cs)
4846 {
4847 	WRITE4args  *args = &argop->nfs_argop4_u.opwrite;
4848 	WRITE4res *resp = &resop->nfs_resop4_u.opwrite;
4849 	int error;
4850 	vnode_t *vp;
4851 	struct vattr bva;
4852 	u_offset_t rlimit;
4853 	struct uio uio;
4854 	struct iovec iov[MAX_IOVECS];
4855 	struct iovec *iovp;
4856 	int iovcnt;
4857 	int ioflag;
4858 	cred_t *savecred, *cr;
4859 	bool_t *deleg = &cs->deleg;
4860 	nfsstat4 stat;
4861 	int in_crit = 0;
4862 
4863 	vp = cs->vp;
4864 	if (vp == NULL) {
4865 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
4866 		return;
4867 	}
4868 	if (cs->access == CS_ACCESS_DENIED) {
4869 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
4870 		return;
4871 	}
4872 
4873 	cr = cs->cr;
4874 
4875 	/*
4876 	 * We have to enter the critical region before calling VOP_RWLOCK
4877 	 * to avoid a deadlock with ufs.
4878 	 */
4879 	if (nbl_need_check(vp)) {
4880 		nbl_start_crit(vp, RW_READER);
4881 		in_crit = 1;
4882 		if (nbl_conflict(vp, NBL_WRITE,
4883 				args->offset, args->data_len, 0)) {
4884 			*cs->statusp = resp->status = NFS4ERR_LOCKED;
4885 			goto out;
4886 		}
4887 	}
4888 
4889 	if ((stat = rfs4_check_stateid(FWRITE, vp, &args->stateid, FALSE,
4890 					deleg, TRUE)) != NFS4_OK) {
4891 		*cs->statusp = resp->status = stat;
4892 		goto out;
4893 	}
4894 
4895 	bva.va_mask = AT_MODE | AT_UID;
4896 	error = VOP_GETATTR(vp, &bva, 0, cr);
4897 
4898 	/*
4899 	 * If we can't get the attributes, then we can't do the
4900 	 * right access checking.  So, we'll fail the request.
4901 	 */
4902 	if (error) {
4903 		*cs->statusp = resp->status = puterrno4(error);
4904 		goto out;
4905 	}
4906 
4907 	if (rdonly4(cs->exi, cs->vp, req)) {
4908 		*cs->statusp = resp->status = NFS4ERR_ROFS;
4909 		goto out;
4910 	}
4911 
4912 	if (vp->v_type != VREG) {
4913 		*cs->statusp = resp->status =
4914 			((vp->v_type == VDIR) ? NFS4ERR_ISDIR : NFS4ERR_INVAL);
4915 		goto out;
4916 	}
4917 
4918 	if (crgetuid(cr) != bva.va_uid &&
4919 	    (error = VOP_ACCESS(vp, VWRITE, 0, cr))) {
4920 		*cs->statusp = resp->status = puterrno4(error);
4921 		goto out;
4922 	}
4923 
4924 	if (MANDLOCK(vp, bva.va_mode)) {
4925 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
4926 		goto out;
4927 	}
4928 
4929 	if (args->data_len == 0) {
4930 		*cs->statusp = resp->status = NFS4_OK;
4931 		resp->count = 0;
4932 		resp->committed = args->stable;
4933 		resp->writeverf = Write4verf;
4934 		goto out;
4935 	}
4936 
4937 	if (args->mblk != NULL) {
4938 		mblk_t *m;
4939 		uint_t bytes, round_len;
4940 
4941 		iovcnt = 0;
4942 		bytes = 0;
4943 		round_len = roundup(args->data_len, BYTES_PER_XDR_UNIT);
4944 		for (m = args->mblk;
4945 		    m != NULL && bytes < round_len;
4946 		    m = m->b_cont) {
4947 			iovcnt++;
4948 			bytes += MBLKL(m);
4949 		}
4950 #ifdef DEBUG
4951 		/* should have ended on an mblk boundary */
4952 		if (bytes != round_len) {
4953 			printf("bytes=0x%x, round_len=0x%x, req len=0x%x\n",
4954 			    bytes, round_len, args->data_len);
4955 			printf("args=%p, args->mblk=%p, m=%p", (void *)args,
4956 			    (void *)args->mblk, (void *)m);
4957 			ASSERT(bytes == round_len);
4958 		}
4959 #endif
4960 		if (iovcnt <= MAX_IOVECS) {
4961 			iovp = iov;
4962 		} else {
4963 			iovp = kmem_alloc(sizeof (*iovp) * iovcnt, KM_SLEEP);
4964 		}
4965 		mblk_to_iov(args->mblk, iovcnt, iovp);
4966 	} else {
4967 		iovcnt = 1;
4968 		iovp = iov;
4969 		iovp->iov_base = args->data_val;
4970 		iovp->iov_len = args->data_len;
4971 	}
4972 
4973 	uio.uio_iov = iovp;
4974 	uio.uio_iovcnt = iovcnt;
4975 
4976 	uio.uio_segflg = UIO_SYSSPACE;
4977 	uio.uio_extflg = UIO_COPY_DEFAULT;
4978 	uio.uio_loffset = args->offset;
4979 	uio.uio_resid = args->data_len;
4980 	uio.uio_llimit = curproc->p_fsz_ctl;
4981 	rlimit = uio.uio_llimit - args->offset;
4982 	if (rlimit < (u_offset_t)uio.uio_resid)
4983 		uio.uio_resid = (int)rlimit;
4984 
4985 	if (args->stable == UNSTABLE4)
4986 		ioflag = 0;
4987 	else if (args->stable == FILE_SYNC4)
4988 		ioflag = FSYNC;
4989 	else if (args->stable == DATA_SYNC4)
4990 		ioflag = FDSYNC;
4991 	else {
4992 		if (iovp != iov)
4993 			kmem_free(iovp, sizeof (*iovp) * iovcnt);
4994 		*cs->statusp = resp->status = NFS4ERR_INVAL;
4995 		goto out;
4996 	}
4997 
4998 	/*
4999 	 * We're changing creds because VM may fault and we need
5000 	 * the cred of the current thread to be used if quota
5001 	 * checking is enabled.
5002 	 */
5003 	savecred = curthread->t_cred;
5004 	curthread->t_cred = cr;
5005 	error = do_io(FWRITE, vp, &uio, ioflag, cr);
5006 	curthread->t_cred = savecred;
5007 
5008 	if (iovp != iov)
5009 		kmem_free(iovp, sizeof (*iovp) * iovcnt);
5010 
5011 	if (error) {
5012 		*cs->statusp = resp->status = puterrno4(error);
5013 		goto out;
5014 	}
5015 
5016 	*cs->statusp = resp->status = NFS4_OK;
5017 	resp->count = args->data_len - uio.uio_resid;
5018 
5019 	if (ioflag == 0)
5020 		resp->committed = UNSTABLE4;
5021 	else
5022 		resp->committed = FILE_SYNC4;
5023 
5024 	resp->writeverf = Write4verf;
5025 
5026 out:
5027 	if (in_crit)
5028 		nbl_end_crit(vp);
5029 }
5030 
5031 
5032 /* XXX put in a header file */
5033 extern int	sec_svc_getcred(struct svc_req *, cred_t *,  caddr_t *, int *);
5034 
5035 void
5036 rfs4_compound(COMPOUND4args *args, COMPOUND4res *resp, struct exportinfo *exi,
5037 	struct svc_req *req, cred_t *cr)
5038 {
5039 	uint_t i;
5040 	struct compound_state cs;
5041 
5042 	rfs4_init_compound_state(&cs);
5043 	/*
5044 	 * Form a reply tag by copying over the reqeuest tag.
5045 	 */
5046 	resp->tag.utf8string_val =
5047 				kmem_alloc(args->tag.utf8string_len, KM_SLEEP);
5048 	resp->tag.utf8string_len = args->tag.utf8string_len;
5049 	bcopy(args->tag.utf8string_val, resp->tag.utf8string_val,
5050 					resp->tag.utf8string_len);
5051 
5052 	cs.statusp = &resp->status;
5053 
5054 	/*
5055 	 * XXX for now, minorversion should be zero
5056 	 */
5057 	if (args->minorversion != NFS4_MINORVERSION) {
5058 		resp->array_len = 0;
5059 		resp->array = NULL;
5060 		resp->status = NFS4ERR_MINOR_VERS_MISMATCH;
5061 		return;
5062 	}
5063 
5064 	resp->array_len = args->array_len;
5065 	resp->array = kmem_zalloc(args->array_len * sizeof (nfs_resop4),
5066 		KM_SLEEP);
5067 
5068 	ASSERT(exi == NULL);
5069 	ASSERT(cr == NULL);
5070 
5071 	cr = crget();
5072 	ASSERT(cr != NULL);
5073 
5074 	if (sec_svc_getcred(req, cr, &cs.principal, &cs.nfsflavor) == 0) {
5075 		crfree(cr);
5076 		return;
5077 	}
5078 
5079 	cs.basecr = cr;
5080 
5081 	cs.req = req;
5082 
5083 	/*
5084 	 * For now, NFS4 compound processing must be protected by
5085 	 * exported_lock because it can access more than one exportinfo
5086 	 * per compound and share/unshare can now change multiple
5087 	 * exinfo structs.  The NFS2/3 code only refs 1 exportinfo
5088 	 * per proc (excluding public exinfo), and exi_count design
5089 	 * is sufficient to protect concurrent execution of NFS2/3
5090 	 * ops along with unexport.  This lock will be removed as
5091 	 * part of the NFSv4 phase 2 namespace redesign work.
5092 	 */
5093 	rw_enter(&exported_lock, RW_READER);
5094 
5095 	/*
5096 	 * If this is the first compound we've seen, we need to start all
5097 	 * new instances' grace periods.
5098 	 */
5099 	if (rfs4_seen_first_compound == 0) {
5100 		rfs4_grace_start_new();
5101 		/*
5102 		 * This must be set after rfs4_grace_start_new(), otherwise
5103 		 * another thread could proceed past here before the former
5104 		 * is finished.
5105 		 */
5106 		rfs4_seen_first_compound = 1;
5107 	}
5108 
5109 	for (i = 0; i < args->array_len && cs.cont; i++) {
5110 		nfs_argop4 *argop;
5111 		nfs_resop4 *resop;
5112 		uint_t op;
5113 
5114 		argop = &args->array[i];
5115 		resop = &resp->array[i];
5116 		resop->resop = argop->argop;
5117 		op = (uint_t)resop->resop;
5118 
5119 		if (op < rfsv4disp_cnt) {
5120 			/*
5121 			 * Count the individual ops here; NULL and COMPOUND
5122 			 * are counted in common_dispatch()
5123 			 */
5124 			rfsproccnt_v4_ptr[op].value.ui64++;
5125 
5126 			NFS4_DEBUG(rfs4_debug > 1,
5127 				(CE_NOTE, "Executing %s", rfs4_op_string[op]));
5128 			(*rfsv4disptab[op].dis_proc)(argop, resop, req, &cs);
5129 			NFS4_DEBUG(rfs4_debug > 1,
5130 				(CE_NOTE, "%s returned %d",
5131 				rfs4_op_string[op], *cs.statusp));
5132 			if (*cs.statusp != NFS4_OK)
5133 				cs.cont = FALSE;
5134 		} else {
5135 			/*
5136 			 * This is effectively dead code since XDR code
5137 			 * will have already returned BADXDR if op doesn't
5138 			 * decode to legal value.  This only done for a
5139 			 * day when XDR code doesn't verify v4 opcodes.
5140 			 */
5141 			op = OP_ILLEGAL;
5142 			rfsproccnt_v4_ptr[OP_ILLEGAL_IDX].value.ui64++;
5143 
5144 			rfs4_op_illegal(argop, resop, req, &cs);
5145 			cs.cont = FALSE;
5146 		}
5147 
5148 		/*
5149 		 * If not at last op, and if we are to stop, then
5150 		 * compact the results array.
5151 		 */
5152 		if ((i + 1) < args->array_len && !cs.cont) {
5153 			nfs_resop4 *new_res = kmem_alloc(
5154 				(i+1) * sizeof (nfs_resop4), KM_SLEEP);
5155 			bcopy(resp->array,
5156 				new_res, (i+1) * sizeof (nfs_resop4));
5157 			kmem_free(resp->array,
5158 				args->array_len * sizeof (nfs_resop4));
5159 
5160 			resp->array_len =  i + 1;
5161 			resp->array = new_res;
5162 		}
5163 	}
5164 
5165 	rw_exit(&exported_lock);
5166 
5167 	if (cs.vp)
5168 		VN_RELE(cs.vp);
5169 	if (cs.saved_vp)
5170 		VN_RELE(cs.saved_vp);
5171 	if (cs.saved_fh.nfs_fh4_val)
5172 		kmem_free(cs.saved_fh.nfs_fh4_val, NFS4_FHSIZE);
5173 
5174 	if (cs.basecr)
5175 		crfree(cs.basecr);
5176 	if (cs.cr)
5177 		crfree(cs.cr);
5178 }
5179 
5180 /*
5181  * XXX because of what appears to be duplicate calls to rfs4_compound_free
5182  * XXX zero out the tag and array values. Need to investigate why the
5183  * XXX calls occur, but at least prevent the panic for now.
5184  */
5185 void
5186 rfs4_compound_free(COMPOUND4res *resp)
5187 {
5188 	uint_t i;
5189 
5190 	if (resp->tag.utf8string_val) {
5191 		UTF8STRING_FREE(resp->tag)
5192 	}
5193 
5194 	for (i = 0; i < resp->array_len; i++) {
5195 		nfs_resop4 *resop;
5196 		uint_t op;
5197 
5198 		resop = &resp->array[i];
5199 		op = (uint_t)resop->resop;
5200 		if (op < rfsv4disp_cnt) {
5201 			(*rfsv4disptab[op].dis_resfree)(resop);
5202 		}
5203 	}
5204 	if (resp->array != NULL) {
5205 		kmem_free(resp->array, resp->array_len * sizeof (nfs_resop4));
5206 	}
5207 }
5208 
5209 /*
5210  * Process the value of the compound request rpc flags, as a bit-AND
5211  * of the individual per-op flags (idempotent, allowork, publicfh_ok)
5212  */
5213 void
5214 rfs4_compound_flagproc(COMPOUND4args *args, int *flagp)
5215 {
5216 	int i;
5217 	int flag = RPC_ALL;
5218 
5219 	for (i = 0; flag && i < args->array_len; i++) {
5220 		uint_t op;
5221 
5222 		op = (uint_t)args->array[i].argop;
5223 
5224 		if (op < rfsv4disp_cnt)
5225 			flag &= rfsv4disptab[op].dis_flags;
5226 		else
5227 			flag = 0;
5228 	}
5229 	*flagp = flag;
5230 }
5231 
5232 nfsstat4
5233 rfs4_client_sysid(rfs4_client_t *cp, sysid_t *sp)
5234 {
5235 	nfsstat4 e;
5236 
5237 	rfs4_dbe_lock(cp->dbe);
5238 
5239 	if (cp->sysidt != LM_NOSYSID) {
5240 		*sp = cp->sysidt;
5241 		e = NFS4_OK;
5242 
5243 	} else if ((cp->sysidt = lm_alloc_sysidt()) != LM_NOSYSID) {
5244 		*sp = cp->sysidt;
5245 		e = NFS4_OK;
5246 
5247 		NFS4_DEBUG(rfs4_debug, (CE_NOTE,
5248 			"rfs4_client_sysid: allocated 0x%x\n", *sp));
5249 	} else
5250 		e = NFS4ERR_DELAY;
5251 
5252 	rfs4_dbe_unlock(cp->dbe);
5253 	return (e);
5254 }
5255 
5256 #if defined(DEBUG) && ! defined(lint)
5257 static void lock_print(char *str, int operation, struct flock64 *flk)
5258 {
5259 	char *op, *type;
5260 
5261 	switch (operation) {
5262 	case F_GETLK: op = "F_GETLK";
5263 		break;
5264 	case F_SETLK: op = "F_SETLK";
5265 		break;
5266 	default: op = "F_UNKNOWN";
5267 		break;
5268 	}
5269 	switch (flk->l_type) {
5270 	case F_UNLCK: type = "F_UNLCK";
5271 		break;
5272 	case F_RDLCK: type = "F_RDLCK";
5273 		break;
5274 	case F_WRLCK: type = "F_WRLCK";
5275 		break;
5276 	default: type = "F_UNKNOWN";
5277 		break;
5278 	}
5279 
5280 	ASSERT(flk->l_whence == 0);
5281 	cmn_err(CE_NOTE, "%s:  %s, type = %s, off = %llx len = %llx pid = %d",
5282 		str, op, type,
5283 		(longlong_t)flk->l_start,
5284 		flk->l_len ? (longlong_t)flk->l_len : ~0LL,
5285 		flk->l_pid);
5286 }
5287 
5288 #define	LOCK_PRINT(d, s, t, f) if (d) lock_print(s, t, f)
5289 #else
5290 #define	LOCK_PRINT(d, s, t, f)
5291 #endif
5292 
5293 /*ARGSUSED*/
5294 static bool_t
5295 creds_ok(cred_set_t cr_set, struct svc_req *req, struct compound_state *cs)
5296 {
5297 	return (TRUE);
5298 }
5299 
5300 /*
5301  * Look up the pathname using the vp in cs as the directory vnode.
5302  * cs->vp will be the vnode for the file on success
5303  */
5304 
5305 static nfsstat4
5306 rfs4_lookup(component4 *component, struct svc_req *req,
5307 	    struct compound_state *cs)
5308 {
5309 	char *nm;
5310 	uint32_t len;
5311 	nfsstat4 status;
5312 
5313 	if (cs->vp == NULL) {
5314 		return (NFS4ERR_NOFILEHANDLE);
5315 	}
5316 	if (cs->vp->v_type != VDIR) {
5317 		return (NFS4ERR_NOTDIR);
5318 	}
5319 
5320 	if (!utf8_dir_verify(component))
5321 		return (NFS4ERR_INVAL);
5322 
5323 	nm = utf8_to_fn(component, &len, NULL);
5324 	if (nm == NULL) {
5325 		return (NFS4ERR_INVAL);
5326 	}
5327 
5328 	if (len > MAXNAMELEN) {
5329 		kmem_free(nm, len);
5330 		return (NFS4ERR_NAMETOOLONG);
5331 	}
5332 
5333 	status = do_rfs4_op_lookup(nm, len, req, cs);
5334 
5335 	kmem_free(nm, len);
5336 
5337 	return (status);
5338 }
5339 
5340 static nfsstat4
5341 rfs4_lookupfile(component4 *component, struct svc_req *req,
5342 		struct compound_state *cs, uint32_t access,
5343 		change_info4 *cinfo)
5344 {
5345 	nfsstat4 status;
5346 	vnode_t *dvp = cs->vp;
5347 	vattr_t bva, ava, fva;
5348 	int error;
5349 
5350 	/* Get "before" change value */
5351 	bva.va_mask = AT_CTIME|AT_SEQ;
5352 	error = VOP_GETATTR(dvp, &bva, 0, cs->cr);
5353 	if (error)
5354 		return (puterrno4(error));
5355 
5356 	/* rfs4_lookup may VN_RELE directory */
5357 	VN_HOLD(dvp);
5358 
5359 	status = rfs4_lookup(component, req, cs);
5360 	if (status != NFS4_OK) {
5361 		VN_RELE(dvp);
5362 		return (status);
5363 	}
5364 
5365 	/*
5366 	 * Get "after" change value, if it fails, simply return the
5367 	 * before value.
5368 	 */
5369 	ava.va_mask = AT_CTIME|AT_SEQ;
5370 	if (VOP_GETATTR(dvp, &ava, 0, cs->cr)) {
5371 		ava.va_ctime = bva.va_ctime;
5372 		ava.va_seq = 0;
5373 	}
5374 	VN_RELE(dvp);
5375 
5376 	/*
5377 	 * Validate the file is a file
5378 	 */
5379 	fva.va_mask = AT_TYPE|AT_MODE;
5380 	error = VOP_GETATTR(cs->vp, &fva, 0, cs->cr);
5381 	if (error)
5382 		return (puterrno4(error));
5383 
5384 	if (fva.va_type != VREG) {
5385 		if (fva.va_type == VDIR)
5386 			return (NFS4ERR_ISDIR);
5387 		if (fva.va_type == VLNK)
5388 			return (NFS4ERR_SYMLINK);
5389 		return (NFS4ERR_INVAL);
5390 	}
5391 
5392 	NFS4_SET_FATTR4_CHANGE(cinfo->before, bva.va_ctime);
5393 	NFS4_SET_FATTR4_CHANGE(cinfo->after, ava.va_ctime);
5394 
5395 	/*
5396 	 * It is undefined if VOP_LOOKUP will change va_seq, so
5397 	 * cinfo.atomic = TRUE only if we have
5398 	 * non-zero va_seq's, and they have not changed.
5399 	 */
5400 	if (bva.va_seq && ava.va_seq && ava.va_seq == bva.va_seq)
5401 		cinfo->atomic = TRUE;
5402 	else
5403 		cinfo->atomic = FALSE;
5404 
5405 	/* Check for mandatory locking */
5406 	cs->mandlock = MANDLOCK(cs->vp, fva.va_mode);
5407 	return (check_open_access(access, cs, req));
5408 }
5409 
5410 static nfsstat4
5411 create_vnode(vnode_t *dvp, char *nm,  vattr_t *vap, createmode4 mode,
5412 	    timespec32_t *mtime, cred_t *cr, vnode_t **vpp, bool_t *created)
5413 {
5414 	int error;
5415 	nfsstat4 status = NFS4_OK;
5416 	vattr_t va;
5417 
5418 tryagain:
5419 
5420 	/*
5421 	 * The file open mode used is VWRITE.  If the client needs
5422 	 * some other semantic, then it should do the access checking
5423 	 * itself.  It would have been nice to have the file open mode
5424 	 * passed as part of the arguments.
5425 	 */
5426 
5427 	*created = TRUE;
5428 	error = VOP_CREATE(dvp, nm, vap, EXCL, VWRITE, vpp, cr, 0);
5429 
5430 	if (error) {
5431 		*created = FALSE;
5432 
5433 		/*
5434 		 * If we got something other than file already exists
5435 		 * then just return this error.  Otherwise, we got
5436 		 * EEXIST.  If we were doing a GUARDED create, then
5437 		 * just return this error.  Otherwise, we need to
5438 		 * make sure that this wasn't a duplicate of an
5439 		 * exclusive create request.
5440 		 *
5441 		 * The assumption is made that a non-exclusive create
5442 		 * request will never return EEXIST.
5443 		 */
5444 
5445 		if (error != EEXIST || mode == GUARDED4) {
5446 			status = puterrno4(error);
5447 			return (status);
5448 		}
5449 		error = VOP_LOOKUP(dvp, nm, vpp, NULL, 0, NULL, cr);
5450 
5451 		if (error) {
5452 			/*
5453 			 * We couldn't find the file that we thought that
5454 			 * we just created.  So, we'll just try creating
5455 			 * it again.
5456 			 */
5457 			if (error == ENOENT)
5458 				goto tryagain;
5459 
5460 			status = puterrno4(error);
5461 			return (status);
5462 		}
5463 
5464 		if (mode == UNCHECKED4) {
5465 			/* existing object must be regular file */
5466 			if ((*vpp)->v_type != VREG) {
5467 				if ((*vpp)->v_type == VDIR)
5468 					status = NFS4ERR_ISDIR;
5469 				else if ((*vpp)->v_type == VLNK)
5470 					status = NFS4ERR_SYMLINK;
5471 				else
5472 					status = NFS4ERR_INVAL;
5473 				VN_RELE(*vpp);
5474 				return (status);
5475 			}
5476 
5477 			return (NFS4_OK);
5478 		}
5479 
5480 		/* Check for duplicate request */
5481 		ASSERT(mtime != 0);
5482 		va.va_mask = AT_MTIME;
5483 		error = VOP_GETATTR(*vpp, &va, 0, cr);
5484 		if (!error) {
5485 			/* We found the file */
5486 			if (va.va_mtime.tv_sec != mtime->tv_sec ||
5487 			    va.va_mtime.tv_nsec != mtime->tv_nsec) {
5488 				/* but its not our creation */
5489 				VN_RELE(*vpp);
5490 				return (NFS4ERR_EXIST);
5491 			}
5492 			*created = TRUE; /* retrans of create == created */
5493 			return (NFS4_OK);
5494 		}
5495 		VN_RELE(*vpp);
5496 		return (NFS4ERR_EXIST);
5497 	}
5498 
5499 	return (NFS4_OK);
5500 }
5501 
5502 static nfsstat4
5503 check_open_access(uint32_t access,
5504 		struct compound_state *cs, struct svc_req *req)
5505 {
5506 	int error;
5507 	vnode_t *vp;
5508 	bool_t readonly;
5509 	cred_t *cr = cs->cr;
5510 
5511 	/* For now we don't allow mandatory locking as per V2/V3 */
5512 	if (cs->access == CS_ACCESS_DENIED || cs->mandlock) {
5513 		return (NFS4ERR_ACCESS);
5514 	}
5515 
5516 	vp = cs->vp;
5517 	ASSERT(cr != NULL && vp->v_type == VREG);
5518 
5519 	/*
5520 	 * If the file system is exported read only and we are trying
5521 	 * to open for write, then return NFS4ERR_ROFS
5522 	 */
5523 
5524 	readonly = rdonly4(cs->exi, cs->vp, req);
5525 
5526 	if ((access & OPEN4_SHARE_ACCESS_WRITE) && readonly)
5527 		return (NFS4ERR_ROFS);
5528 
5529 	if (access & OPEN4_SHARE_ACCESS_READ) {
5530 		if ((VOP_ACCESS(vp, VREAD, 0, cr) != 0) &&
5531 		    (VOP_ACCESS(vp, VEXEC, 0, cr) != 0)) {
5532 			return (NFS4ERR_ACCESS);
5533 		}
5534 	}
5535 
5536 	if (access & OPEN4_SHARE_ACCESS_WRITE) {
5537 		error = VOP_ACCESS(vp, VWRITE, 0, cr);
5538 		if (error)
5539 			return (NFS4ERR_ACCESS);
5540 	}
5541 
5542 	return (NFS4_OK);
5543 }
5544 
5545 static nfsstat4
5546 rfs4_createfile(OPEN4args *args, struct svc_req *req, struct compound_state *cs,
5547 		change_info4 *cinfo, bitmap4 *attrset, clientid4 clientid)
5548 {
5549 	struct nfs4_svgetit_arg sarg;
5550 	struct nfs4_ntov_table ntov;
5551 
5552 	bool_t ntov_table_init = FALSE;
5553 	struct statvfs64 sb;
5554 	nfsstat4 status;
5555 	vnode_t *vp;
5556 	vattr_t bva, ava, iva, cva, *vap;
5557 	vnode_t *dvp;
5558 	timespec32_t *mtime;
5559 	char *nm = NULL;
5560 	uint_t buflen;
5561 	bool_t created;
5562 	bool_t setsize = FALSE;
5563 	len_t reqsize;
5564 	int error;
5565 	bool_t trunc;
5566 	caller_context_t ct;
5567 	component4 *component;
5568 
5569 	sarg.sbp = &sb;
5570 
5571 	dvp = cs->vp;
5572 
5573 	/* Check if the file system is read only */
5574 	if (rdonly4(cs->exi, dvp, req))
5575 		return (NFS4ERR_ROFS);
5576 
5577 	/*
5578 	 * Get the last component of path name in nm. cs will reference
5579 	 * the including directory on success.
5580 	 */
5581 	component = &args->open_claim4_u.file;
5582 	if (!utf8_dir_verify(component))
5583 		return (NFS4ERR_INVAL);
5584 
5585 	nm = utf8_to_fn(component, &buflen, NULL);
5586 
5587 	if (nm == NULL)
5588 		return (NFS4ERR_RESOURCE);
5589 
5590 	if (buflen > MAXNAMELEN) {
5591 		kmem_free(nm, buflen);
5592 		return (NFS4ERR_NAMETOOLONG);
5593 	}
5594 
5595 	bva.va_mask = AT_TYPE|AT_CTIME|AT_SEQ;
5596 	error = VOP_GETATTR(dvp, &bva, 0, cs->cr);
5597 	if (error) {
5598 		kmem_free(nm, buflen);
5599 		return (puterrno4(error));
5600 	}
5601 
5602 	if (bva.va_type != VDIR) {
5603 		kmem_free(nm, buflen);
5604 		return (NFS4ERR_NOTDIR);
5605 	}
5606 
5607 	NFS4_SET_FATTR4_CHANGE(cinfo->before, bva.va_ctime)
5608 
5609 	switch (args->mode) {
5610 	case GUARDED4:
5611 		/*FALLTHROUGH*/
5612 	case UNCHECKED4:
5613 		nfs4_ntov_table_init(&ntov);
5614 		ntov_table_init = TRUE;
5615 
5616 		*attrset = 0;
5617 		status = do_rfs4_set_attrs(attrset,
5618 					&args->createhow4_u.createattrs,
5619 					cs, &sarg, &ntov, NFS4ATTR_SETIT);
5620 
5621 		if (status == NFS4_OK && (sarg.vap->va_mask & AT_TYPE) &&
5622 		    sarg.vap->va_type != VREG) {
5623 			if (sarg.vap->va_type == VDIR)
5624 				status = NFS4ERR_ISDIR;
5625 			else if (sarg.vap->va_type == VLNK)
5626 				status = NFS4ERR_SYMLINK;
5627 			else
5628 				status = NFS4ERR_INVAL;
5629 		}
5630 
5631 		if (status != NFS4_OK) {
5632 			kmem_free(nm, buflen);
5633 			nfs4_ntov_table_free(&ntov, &sarg);
5634 			*attrset = 0;
5635 			return (status);
5636 		}
5637 
5638 		vap = sarg.vap;
5639 		vap->va_type = VREG;
5640 		vap->va_mask |= AT_TYPE;
5641 
5642 		if ((vap->va_mask & AT_MODE) == 0) {
5643 			vap->va_mask |= AT_MODE;
5644 			vap->va_mode = (mode_t)0600;
5645 		}
5646 
5647 		if (vap->va_mask & AT_SIZE) {
5648 
5649 			/* Disallow create with a non-zero size */
5650 
5651 			if ((reqsize = sarg.vap->va_size) != 0) {
5652 				kmem_free(nm, buflen);
5653 				nfs4_ntov_table_free(&ntov, &sarg);
5654 				*attrset = 0;
5655 				return (NFS4ERR_INVAL);
5656 			}
5657 			setsize = TRUE;
5658 		}
5659 		break;
5660 
5661 	case EXCLUSIVE4:
5662 		/* prohibit EXCL create of named attributes */
5663 		if (dvp->v_flag & V_XATTRDIR) {
5664 			kmem_free(nm, buflen);
5665 			*attrset = 0;
5666 			return (NFS4ERR_INVAL);
5667 		}
5668 
5669 		cva.va_mask = AT_TYPE | AT_MTIME | AT_MODE;
5670 		cva.va_type = VREG;
5671 		/*
5672 		 * Ensure no time overflows. Assumes underlying
5673 		 * filesystem supports at least 32 bits.
5674 		 * Truncate nsec to usec resolution to allow valid
5675 		 * compares even if the underlying filesystem truncates.
5676 		 */
5677 		mtime = (timespec32_t *)&args->createhow4_u.createverf;
5678 		cva.va_mtime.tv_sec = mtime->tv_sec % TIME32_MAX;
5679 		cva.va_mtime.tv_nsec = (mtime->tv_nsec / 1000) * 1000;
5680 		cva.va_mode = (mode_t)0;
5681 		vap = &cva;
5682 		break;
5683 	}
5684 
5685 	status = create_vnode(dvp, nm, vap, args->mode, mtime,
5686 						cs->cr, &vp, &created);
5687 	kmem_free(nm, buflen);
5688 
5689 	if (status != NFS4_OK) {
5690 		if (ntov_table_init)
5691 			nfs4_ntov_table_free(&ntov, &sarg);
5692 		*attrset = 0;
5693 		return (status);
5694 	}
5695 
5696 	trunc = (setsize && !created);
5697 
5698 	if (args->mode != EXCLUSIVE4) {
5699 		bitmap4 createmask = args->createhow4_u.createattrs.attrmask;
5700 
5701 		/*
5702 		 * True verification that object was created with correct
5703 		 * attrs is impossible.  The attrs could have been changed
5704 		 * immediately after object creation.  If attributes did
5705 		 * not verify, the only recourse for the server is to
5706 		 * destroy the object.  Maybe if some attrs (like gid)
5707 		 * are set incorrectly, the object should be destroyed;
5708 		 * however, seems bad as a default policy.  Do we really
5709 		 * want to destroy an object over one of the times not
5710 		 * verifying correctly?  For these reasons, the server
5711 		 * currently sets bits in attrset for createattrs
5712 		 * that were set; however, no verification is done.
5713 		 *
5714 		 * vmask_to_nmask accounts for vattr bits set on create
5715 		 *	[do_rfs4_set_attrs() only sets resp bits for
5716 		 *	 non-vattr/vfs bits.]
5717 		 * Mask off any bits we set by default so as not to return
5718 		 * more attrset bits than were requested in createattrs
5719 		 */
5720 		if (created) {
5721 			nfs4_vmask_to_nmask(sarg.vap->va_mask, attrset);
5722 			*attrset &= createmask;
5723 		} else {
5724 			/*
5725 			 * We did not create the vnode (we tried but it
5726 			 * already existed).  In this case, the only createattr
5727 			 * that the spec allows the server to set is size,
5728 			 * and even then, it can only be set if it is 0.
5729 			 */
5730 			*attrset = 0;
5731 			if (trunc)
5732 				*attrset = FATTR4_SIZE_MASK;
5733 		}
5734 	}
5735 	if (ntov_table_init)
5736 		nfs4_ntov_table_free(&ntov, &sarg);
5737 
5738 	/*
5739 	 * Get the initial "after" sequence number, if it fails,
5740 	 * set to zero, time to before.
5741 	 */
5742 	iva.va_mask = AT_CTIME|AT_SEQ;
5743 	if (VOP_GETATTR(dvp, &iva, 0, cs->cr)) {
5744 		iva.va_seq = 0;
5745 		iva.va_ctime = bva.va_ctime;
5746 	}
5747 
5748 	/*
5749 	 * create_vnode attempts to create the file exclusive,
5750 	 * if it already exists the VOP_CREATE will fail and
5751 	 * may not increase va_seq. It is atomic if
5752 	 * we haven't changed the directory, but if it has changed
5753 	 * we don't know what changed it.
5754 	 */
5755 	if (!created) {
5756 		if (bva.va_seq && iva.va_seq &&
5757 			bva.va_seq == iva.va_seq)
5758 			cinfo->atomic = TRUE;
5759 		else
5760 			cinfo->atomic = FALSE;
5761 		NFS4_SET_FATTR4_CHANGE(cinfo->after, iva.va_ctime);
5762 	} else {
5763 		/*
5764 		 * The entry was created, we need to sync the
5765 		 * directory metadata.
5766 		 */
5767 		(void) VOP_FSYNC(dvp, 0, cs->cr);
5768 
5769 		/*
5770 		 * Get "after" change value, if it fails, simply return the
5771 		 * before value.
5772 		 */
5773 		ava.va_mask = AT_CTIME|AT_SEQ;
5774 		if (VOP_GETATTR(dvp, &ava, 0, cs->cr)) {
5775 			ava.va_ctime = bva.va_ctime;
5776 			ava.va_seq = 0;
5777 		}
5778 
5779 		NFS4_SET_FATTR4_CHANGE(cinfo->after, ava.va_ctime);
5780 
5781 		/*
5782 		 * The cinfo->atomic = TRUE only if we have
5783 		 * non-zero va_seq's, and it has incremented by exactly one
5784 		 * during the create_vnode and it didn't
5785 		 * change during the VOP_FSYNC.
5786 		 */
5787 		if (bva.va_seq && iva.va_seq && ava.va_seq &&
5788 				iva.va_seq == (bva.va_seq + 1) &&
5789 				iva.va_seq == ava.va_seq)
5790 			cinfo->atomic = TRUE;
5791 		else
5792 			cinfo->atomic = FALSE;
5793 	}
5794 
5795 	/* Check for mandatory locking and that the size gets set. */
5796 	cva.va_mask = AT_MODE;
5797 	if (setsize)
5798 		cva.va_mask |= AT_SIZE;
5799 
5800 	/* Assume the worst */
5801 	cs->mandlock = TRUE;
5802 
5803 	if (VOP_GETATTR(vp, &cva, 0, cs->cr) == 0) {
5804 		cs->mandlock = MANDLOCK(cs->vp, cva.va_mode);
5805 
5806 		/*
5807 		 * Truncate the file if necessary; this would be
5808 		 * the case for create over an existing file.
5809 		 */
5810 
5811 		if (trunc) {
5812 			int in_crit = 0;
5813 			rfs4_file_t *fp;
5814 			bool_t create = FALSE;
5815 
5816 			/*
5817 			 * We are writing over an existing file.
5818 			 * Check to see if we need to recall a delegation.
5819 			 */
5820 			rfs4_hold_deleg_policy();
5821 			if ((fp = rfs4_findfile(vp, NULL, &create)) != NULL) {
5822 				if (rfs4_check_delegated_byfp(FWRITE, fp,
5823 					(reqsize == 0), FALSE, FALSE,
5824 							&clientid)) {
5825 
5826 					rfs4_file_rele(fp);
5827 					rfs4_rele_deleg_policy();
5828 					VN_RELE(vp);
5829 					*attrset = 0;
5830 					return (NFS4ERR_DELAY);
5831 				}
5832 				rfs4_file_rele(fp);
5833 			}
5834 			rfs4_rele_deleg_policy();
5835 
5836 			if (nbl_need_check(vp)) {
5837 				in_crit = 1;
5838 
5839 				ASSERT(reqsize == 0);
5840 
5841 				nbl_start_crit(vp, RW_READER);
5842 				if (nbl_conflict(vp, NBL_WRITE, 0,
5843 						cva.va_size, 0)) {
5844 					in_crit = 0;
5845 					nbl_end_crit(vp);
5846 					VN_RELE(vp);
5847 					*attrset = 0;
5848 					return (NFS4ERR_ACCESS);
5849 				}
5850 			}
5851 			ct.cc_sysid = 0;
5852 			ct.cc_pid = 0;
5853 			ct.cc_caller_id = nfs4_srv_caller_id;
5854 
5855 			cva.va_mask = AT_SIZE;
5856 			cva.va_size = reqsize;
5857 			(void) VOP_SETATTR(vp, &cva, 0, cs->cr, &ct);
5858 			if (in_crit)
5859 				nbl_end_crit(vp);
5860 		}
5861 	}
5862 
5863 	error = makefh4(&cs->fh, vp, cs->exi);
5864 
5865 	/*
5866 	 * Force modified data and metadata out to stable storage.
5867 	 */
5868 	(void) VOP_FSYNC(vp, FNODSYNC, cs->cr);
5869 
5870 	if (error) {
5871 		VN_RELE(vp);
5872 		*attrset = 0;
5873 		return (puterrno4(error));
5874 	}
5875 
5876 	/* if parent dir is attrdir, set namedattr fh flag */
5877 	if (dvp->v_flag & V_XATTRDIR)
5878 		set_fh4_flag(&cs->fh, FH4_NAMEDATTR);
5879 
5880 	if (cs->vp)
5881 		VN_RELE(cs->vp);
5882 
5883 	cs->vp = vp;
5884 
5885 	/*
5886 	 * if we did not create the file, we will need to check
5887 	 * the access bits on the file
5888 	 */
5889 
5890 	if (!created) {
5891 		if (setsize)
5892 			args->share_access |= OPEN4_SHARE_ACCESS_WRITE;
5893 		status = check_open_access(args->share_access, cs, req);
5894 		if (status != NFS4_OK)
5895 			*attrset = 0;
5896 	}
5897 	return (status);
5898 }
5899 
5900 /*ARGSUSED*/
5901 static void
5902 rfs4_do_open(struct compound_state *cs, struct svc_req *req,
5903 		rfs4_openowner_t *oo, delegreq_t deleg,
5904 		uint32_t access, uint32_t deny,
5905 		OPEN4res *resp)
5906 {
5907 	/* XXX Currently not using req  */
5908 	rfs4_state_t *state;
5909 	rfs4_file_t *file;
5910 	bool_t screate = TRUE;
5911 	bool_t fcreate = TRUE;
5912 	uint32_t amodes;
5913 	uint32_t dmodes;
5914 	rfs4_deleg_state_t *dsp;
5915 	struct shrlock shr;
5916 	struct shr_locowner shr_loco;
5917 	sysid_t sysid;
5918 	nfsstat4 status;
5919 	int fflags = 0;
5920 	int recall = 0;
5921 	int err;
5922 
5923 	/* get the file struct and hold a lock on it during initial open */
5924 	file = rfs4_findfile_withlock(cs->vp, &cs->fh, &fcreate);
5925 	if (file == NULL) {
5926 		NFS4_DEBUG(rfs4_debug,
5927 			(CE_NOTE, "rfs4_do_open: can't find file"));
5928 		resp->status = NFS4ERR_SERVERFAULT;
5929 		return;
5930 	}
5931 
5932 	state = rfs4_findstate_by_owner_file(oo, file, &screate);
5933 	if (state == NULL) {
5934 		NFS4_DEBUG(rfs4_debug,
5935 			(CE_NOTE, "rfs4_do_open: can't find state"));
5936 		resp->status = NFS4ERR_RESOURCE;
5937 		/* No need to keep any reference */
5938 		rfs4_file_rele_withunlock(file);
5939 		return;
5940 	}
5941 
5942 	/*
5943 	 * Check for conflicts in deny and access before checking for
5944 	 * conflicts in delegation.  We don't want to recall a
5945 	 * delegation based on an open that will eventually fail based
5946 	 * on shares modes.
5947 	 */
5948 
5949 	shr.s_access = (short)access;
5950 	shr.s_deny = (short)deny;
5951 	shr.s_pid = rfs4_dbe_getid(oo->dbe);
5952 
5953 	if ((status = rfs4_client_sysid(oo->client, &sysid)) != NFS4_OK) {
5954 		resp->status = status;
5955 		rfs4_file_rele(file);
5956 		/* Not a fully formed open; "close" it */
5957 		if (screate == TRUE)
5958 			rfs4_state_close(state, FALSE, FALSE, cs->cr);
5959 		rfs4_state_rele(state);
5960 		return;
5961 	}
5962 	shr.s_sysid = sysid;
5963 	shr_loco.sl_pid = shr.s_pid;
5964 	shr_loco.sl_id = shr.s_sysid;
5965 	shr.s_owner = (caddr_t)&shr_loco;
5966 	shr.s_own_len = sizeof (shr_loco);
5967 
5968 	fflags = 0;
5969 	if (access & OPEN4_SHARE_ACCESS_READ)
5970 		fflags |= FREAD;
5971 	if (access & OPEN4_SHARE_ACCESS_WRITE)
5972 		fflags |= FWRITE;
5973 
5974 	if ((err = vop_shrlock(cs->vp, F_SHARE, &shr, fflags)) != 0) {
5975 
5976 		resp->status = err == EAGAIN ?
5977 			NFS4ERR_SHARE_DENIED : puterrno4(err);
5978 
5979 		rfs4_file_rele(file);
5980 		/* Not a fully formed open; "close" it */
5981 		if (screate == TRUE)
5982 			rfs4_state_close(state, FALSE, FALSE, cs->cr);
5983 		rfs4_state_rele(state);
5984 		return;
5985 	}
5986 
5987 	rfs4_dbe_lock(state->dbe);
5988 	rfs4_dbe_lock(file->dbe);
5989 
5990 	/*
5991 	 * Calculate the new deny and access mode that this open is adding to
5992 	 * the file for this open owner;
5993 	 */
5994 	dmodes = (deny & ~state->share_deny);
5995 	amodes = (access & ~state->share_access);
5996 
5997 	/*
5998 	 * Check to see if this file is delegated and if so, if a
5999 	 * recall needs to be done.
6000 	 */
6001 	if (rfs4_check_recall(state, access)) {
6002 		rfs4_dbe_unlock(file->dbe);
6003 		rfs4_dbe_unlock(state->dbe);
6004 		rfs4_recall_deleg(file, FALSE, state->owner->client);
6005 		delay(NFS4_DELEGATION_CONFLICT_DELAY);
6006 		rfs4_dbe_lock(state->dbe);
6007 		rfs4_dbe_lock(file->dbe);
6008 		/* Let's see if the delegation was returned */
6009 		if (rfs4_check_recall(state, access)) {
6010 			rfs4_dbe_unlock(file->dbe);
6011 			rfs4_dbe_unlock(state->dbe);
6012 			rfs4_file_rele(file);
6013 			rfs4_update_lease(state->owner->client);
6014 			/* recalculate flags to match what was added */
6015 			fflags = 0;
6016 			if (amodes & OPEN4_SHARE_ACCESS_READ)
6017 				fflags |= FREAD;
6018 			if (amodes & OPEN4_SHARE_ACCESS_WRITE)
6019 				fflags |= FWRITE;
6020 			(void) vop_shrlock(cs->vp, F_UNSHARE, &shr, fflags);
6021 			/* Not a fully formed open; "close" it */
6022 			if (screate == TRUE)
6023 				rfs4_state_close(state, FALSE, FALSE, cs->cr);
6024 			rfs4_state_rele(state);
6025 			resp->status = NFS4ERR_DELAY;
6026 			return;
6027 		}
6028 	}
6029 
6030 	if (dmodes & OPEN4_SHARE_DENY_READ)
6031 		file->deny_read++;
6032 	if (dmodes & OPEN4_SHARE_DENY_WRITE)
6033 		file->deny_write++;
6034 	file->share_deny |= deny;
6035 	state->share_deny |= deny;
6036 
6037 	if (amodes & OPEN4_SHARE_ACCESS_READ)
6038 		file->access_read++;
6039 	if (amodes & OPEN4_SHARE_ACCESS_WRITE)
6040 		file->access_write++;
6041 	file->share_access |= access;
6042 	state->share_access |= access;
6043 
6044 	/*
6045 	 * Check for delegation here. if the deleg argument is not
6046 	 * DELEG_ANY, then this is a reclaim from a client and
6047 	 * we must honor the delegation requested. If necessary we can
6048 	 * set the recall flag.
6049 	 */
6050 
6051 	dsp = rfs4_grant_delegation(deleg, state, &recall);
6052 
6053 	cs->deleg = (file->dinfo->dtype == OPEN_DELEGATE_WRITE);
6054 
6055 	next_stateid(&state->stateid);
6056 
6057 	resp->stateid = state->stateid.stateid;
6058 
6059 	rfs4_dbe_unlock(file->dbe);
6060 	rfs4_dbe_unlock(state->dbe);
6061 
6062 	if (dsp) {
6063 		rfs4_set_deleg_response(dsp, &resp->delegation, NULL, recall);
6064 		rfs4_deleg_state_rele(dsp);
6065 	}
6066 
6067 	rfs4_file_rele(file);
6068 	rfs4_state_rele(state);
6069 
6070 	resp->status = NFS4_OK;
6071 }
6072 
6073 /*ARGSUSED*/
6074 static void
6075 rfs4_do_opennull(struct compound_state *cs, struct svc_req *req,
6076 		OPEN4args *args, rfs4_openowner_t *oo, OPEN4res *resp)
6077 {
6078 	change_info4 *cinfo = &resp->cinfo;
6079 	bitmap4 *attrset = &resp->attrset;
6080 
6081 	if (args->opentype == OPEN4_NOCREATE)
6082 		resp->status = rfs4_lookupfile(&args->open_claim4_u.file,
6083 					req, cs, args->share_access, cinfo);
6084 	else {
6085 		/* inhibit delegation grants during exclusive create */
6086 
6087 		if (args->mode == EXCLUSIVE4)
6088 			rfs4_disable_delegation();
6089 
6090 		resp->status = rfs4_createfile(args, req, cs, cinfo, attrset,
6091 					oo->client->clientid);
6092 	}
6093 
6094 	if (resp->status == NFS4_OK) {
6095 
6096 		/* cs->vp cs->fh now reference the desired file */
6097 
6098 		rfs4_do_open(cs, req, oo, DELEG_ANY, args->share_access,
6099 						args->share_deny, resp);
6100 
6101 		/*
6102 		 * If rfs4_createfile set attrset, we must
6103 		 * clear this attrset before the response is copied.
6104 		 */
6105 		if (resp->status != NFS4_OK && resp->attrset) {
6106 			resp->attrset = 0;
6107 		}
6108 	}
6109 	else
6110 		*cs->statusp = resp->status;
6111 
6112 	if (args->mode == EXCLUSIVE4)
6113 		rfs4_enable_delegation();
6114 }
6115 
6116 /*ARGSUSED*/
6117 static void
6118 rfs4_do_openprev(struct compound_state *cs, struct svc_req *req,
6119 		OPEN4args *args, rfs4_openowner_t *oo, OPEN4res *resp)
6120 {
6121 	change_info4 *cinfo = &resp->cinfo;
6122 	vattr_t va;
6123 	vtype_t v_type = cs->vp->v_type;
6124 	int error = 0;
6125 
6126 	/* Verify that we have a regular file */
6127 	if (v_type != VREG) {
6128 		if (v_type == VDIR)
6129 			resp->status = NFS4ERR_ISDIR;
6130 		else if (v_type == VLNK)
6131 			resp->status = NFS4ERR_SYMLINK;
6132 		else
6133 			resp->status = NFS4ERR_INVAL;
6134 		return;
6135 	}
6136 
6137 	va.va_mask = AT_MODE|AT_UID;
6138 	error = VOP_GETATTR(cs->vp, &va, 0, cs->cr);
6139 	if (error) {
6140 		resp->status = puterrno4(error);
6141 		return;
6142 	}
6143 
6144 	cs->mandlock = MANDLOCK(cs->vp, va.va_mode);
6145 
6146 	/*
6147 	 * Check if we have access to the file, Note the the file
6148 	 * could have originally been open UNCHECKED or GUARDED
6149 	 * with mode bits that will now fail, but there is nothing
6150 	 * we can really do about that except in the case that the
6151 	 * owner of the file is the one requesting the open.
6152 	 */
6153 	if (crgetuid(cs->cr) != va.va_uid) {
6154 		resp->status = check_open_access(args->share_access, cs, req);
6155 		if (resp->status != NFS4_OK) {
6156 			return;
6157 		}
6158 	}
6159 
6160 	/*
6161 	 * cinfo on a CLAIM_PREVIOUS is undefined, initialize to zero
6162 	 */
6163 	cinfo->before = 0;
6164 	cinfo->after = 0;
6165 	cinfo->atomic = FALSE;
6166 
6167 	rfs4_do_open(cs, req, oo,
6168 		NFS4_DELEG4TYPE2REQTYPE(args->open_claim4_u.delegate_type),
6169 		args->share_access, args->share_deny, resp);
6170 }
6171 
6172 static void
6173 rfs4_do_opendelcur(struct compound_state *cs, struct svc_req *req,
6174 		OPEN4args *args, rfs4_openowner_t *oo, OPEN4res *resp)
6175 {
6176 	int error;
6177 	nfsstat4 status;
6178 	stateid4 stateid =
6179 			args->open_claim4_u.delegate_cur_info.delegate_stateid;
6180 	rfs4_deleg_state_t *dsp;
6181 
6182 	/*
6183 	 * Find the state info from the stateid and confirm that the
6184 	 * file is delegated.  If the state openowner is the same as
6185 	 * the supplied openowner we're done. If not, get the file
6186 	 * info from the found state info. Use that file info to
6187 	 * create the state for this lock owner. Note solaris doen't
6188 	 * really need the pathname to find the file. We may want to
6189 	 * lookup the pathname and make sure that the vp exist and
6190 	 * matches the vp in the file structure. However it is
6191 	 * possible that the pathname nolonger exists (local process
6192 	 * unlinks the file), so this may not be that useful.
6193 	 */
6194 
6195 	status = rfs4_get_deleg_state(&stateid, &dsp);
6196 	if (status != NFS4_OK) {
6197 		resp->status = status;
6198 		return;
6199 	}
6200 
6201 	ASSERT(dsp->finfo->dinfo->dtype != OPEN_DELEGATE_NONE);
6202 
6203 	/*
6204 	 * New lock owner, create state. Since this was probably called
6205 	 * in response to a CB_RECALL we set deleg to DELEG_NONE
6206 	 */
6207 
6208 	ASSERT(cs->vp != NULL);
6209 	VN_RELE(cs->vp);
6210 	VN_HOLD(dsp->finfo->vp);
6211 	cs->vp = dsp->finfo->vp;
6212 
6213 	if (error = makefh4(&cs->fh, cs->vp, cs->exi)) {
6214 		rfs4_deleg_state_rele(dsp);
6215 		*cs->statusp = resp->status = puterrno4(error);
6216 		return;
6217 	}
6218 
6219 	/* Mark progress for delegation returns */
6220 	dsp->finfo->dinfo->time_lastwrite = gethrestime_sec();
6221 	rfs4_deleg_state_rele(dsp);
6222 	rfs4_do_open(cs, req, oo, DELEG_NONE,
6223 				args->share_access, args->share_deny, resp);
6224 }
6225 
6226 /*ARGSUSED*/
6227 static void
6228 rfs4_do_opendelprev(struct compound_state *cs, struct svc_req *req,
6229 			OPEN4args *args, rfs4_openowner_t *oo, OPEN4res *resp)
6230 {
6231 	/*
6232 	 * Lookup the pathname, it must already exist since this file
6233 	 * was delegated.
6234 	 *
6235 	 * Find the file and state info for this vp and open owner pair.
6236 	 *	check that they are in fact delegated.
6237 	 *	check that the state access and deny modes are the same.
6238 	 *
6239 	 * Return the delgation possibly seting the recall flag.
6240 	 */
6241 	rfs4_file_t *file;
6242 	rfs4_state_t *state;
6243 	bool_t create = FALSE;
6244 	bool_t dcreate = FALSE;
6245 	rfs4_deleg_state_t *dsp;
6246 	nfsace4 *ace;
6247 
6248 
6249 	/* Note we ignore oflags */
6250 	resp->status = rfs4_lookupfile(&args->open_claim4_u.file_delegate_prev,
6251 				req, cs, args->share_access, &resp->cinfo);
6252 
6253 	if (resp->status != NFS4_OK) {
6254 		return;
6255 	}
6256 
6257 	/* get the file struct and hold a lock on it during initial open */
6258 	file = rfs4_findfile_withlock(cs->vp, NULL, &create);
6259 	if (file == NULL) {
6260 		NFS4_DEBUG(rfs4_debug,
6261 			(CE_NOTE, "rfs4_do_opendelprev: can't find file"));
6262 		resp->status = NFS4ERR_SERVERFAULT;
6263 		return;
6264 	}
6265 
6266 	state = rfs4_findstate_by_owner_file(oo, file, &create);
6267 	if (state == NULL) {
6268 		NFS4_DEBUG(rfs4_debug,
6269 			(CE_NOTE, "rfs4_do_opendelprev: can't find state"));
6270 		resp->status = NFS4ERR_SERVERFAULT;
6271 		rfs4_file_rele_withunlock(file);
6272 		return;
6273 	}
6274 
6275 	rfs4_dbe_lock(state->dbe);
6276 	rfs4_dbe_lock(file->dbe);
6277 	if (args->share_access != state->share_access ||
6278 			args->share_deny != state->share_deny ||
6279 			state->finfo->dinfo->dtype == OPEN_DELEGATE_NONE) {
6280 		NFS4_DEBUG(rfs4_debug,
6281 			(CE_NOTE, "rfs4_do_opendelprev: state mixup"));
6282 		rfs4_dbe_unlock(file->dbe);
6283 		rfs4_dbe_unlock(state->dbe);
6284 		rfs4_file_rele(file);
6285 		rfs4_state_rele(state);
6286 		resp->status = NFS4ERR_SERVERFAULT;
6287 		return;
6288 	}
6289 	rfs4_dbe_unlock(file->dbe);
6290 	rfs4_dbe_unlock(state->dbe);
6291 
6292 	dsp = rfs4_finddeleg(state, &dcreate);
6293 	if (dsp == NULL) {
6294 		rfs4_state_rele(state);
6295 		rfs4_file_rele(file);
6296 		resp->status = NFS4ERR_SERVERFAULT;
6297 		return;
6298 	}
6299 
6300 	next_stateid(&state->stateid);
6301 
6302 	resp->stateid = state->stateid.stateid;
6303 
6304 	resp->delegation.delegation_type = dsp->dtype;
6305 
6306 	if (dsp->dtype == OPEN_DELEGATE_READ) {
6307 		open_read_delegation4 *rv =
6308 			&resp->delegation.open_delegation4_u.read;
6309 
6310 		rv->stateid = dsp->delegid.stateid;
6311 		rv->recall = FALSE; /* no policy in place to set to TRUE */
6312 		ace = &rv->permissions;
6313 	} else {
6314 		open_write_delegation4 *rv =
6315 			&resp->delegation.open_delegation4_u.write;
6316 
6317 		rv->stateid = dsp->delegid.stateid;
6318 		rv->recall = FALSE;  /* no policy in place to set to TRUE */
6319 		ace = &rv->permissions;
6320 		rv->space_limit.limitby = NFS_LIMIT_SIZE;
6321 		rv->space_limit.nfs_space_limit4_u.filesize = UINT64_MAX;
6322 	}
6323 
6324 	/* XXX For now */
6325 	ace->type = ACE4_ACCESS_ALLOWED_ACE_TYPE;
6326 	ace->flag = 0;
6327 	ace->access_mask = 0;
6328 	ace->who.utf8string_len = 0;
6329 	ace->who.utf8string_val = 0;
6330 
6331 	rfs4_deleg_state_rele(dsp);
6332 	rfs4_state_rele(state);
6333 	rfs4_file_rele(file);
6334 }
6335 
6336 typedef enum {
6337 	NFS4_CHKSEQ_OKAY = 0,
6338 	NFS4_CHKSEQ_REPLAY = 1,
6339 	NFS4_CHKSEQ_BAD = 2
6340 } rfs4_chkseq_t;
6341 
6342 /*
6343  * Generic function for sequence number checks.
6344  */
6345 static rfs4_chkseq_t
6346 rfs4_check_seqid(seqid4 seqid, nfs_resop4 *lastop,
6347 		seqid4 rqst_seq, nfs_resop4 *resop, bool_t copyres)
6348 {
6349 	/* Same sequence ids and matching operations? */
6350 	if (seqid == rqst_seq && resop->resop == lastop->resop) {
6351 		if (copyres == TRUE) {
6352 			rfs4_free_reply(resop);
6353 			rfs4_copy_reply(resop, lastop);
6354 		}
6355 		NFS4_DEBUG(rfs4_debug, (CE_NOTE,
6356 			"Replayed SEQID %d\n", seqid));
6357 		return (NFS4_CHKSEQ_REPLAY);
6358 	}
6359 
6360 	/* If the incoming sequence is not the next expected then it is bad */
6361 	if (rqst_seq != seqid + 1) {
6362 		if (rqst_seq == seqid) {
6363 			NFS4_DEBUG(rfs4_debug,
6364 				(CE_NOTE, "BAD SEQID: Replayed sequence id "
6365 				"but last op was %d current op is %d\n",
6366 				lastop->resop, resop->resop));
6367 			return (NFS4_CHKSEQ_BAD);
6368 		}
6369 		NFS4_DEBUG(rfs4_debug,
6370 			(CE_NOTE, "BAD SEQID: got %u expecting %u\n",
6371 				rqst_seq, seqid));
6372 		return (NFS4_CHKSEQ_BAD);
6373 	}
6374 
6375 	/* Everything okay -- next expected */
6376 	return (NFS4_CHKSEQ_OKAY);
6377 }
6378 
6379 
6380 static rfs4_chkseq_t
6381 rfs4_check_open_seqid(seqid4 seqid, rfs4_openowner_t *op, nfs_resop4 *resop)
6382 {
6383 	rfs4_chkseq_t rc;
6384 
6385 	rfs4_dbe_lock(op->dbe);
6386 	rc = rfs4_check_seqid(op->open_seqid, op->reply, seqid, resop, TRUE);
6387 	rfs4_dbe_unlock(op->dbe);
6388 
6389 	if (rc == NFS4_CHKSEQ_OKAY)
6390 		rfs4_update_lease(op->client);
6391 
6392 	return (rc);
6393 }
6394 
6395 static rfs4_chkseq_t
6396 rfs4_check_olo_seqid(seqid4 olo_seqid, rfs4_openowner_t *op,
6397 	nfs_resop4 *resop)
6398 {
6399 	rfs4_chkseq_t rc;
6400 
6401 	rfs4_dbe_lock(op->dbe);
6402 	rc = rfs4_check_seqid(op->open_seqid, op->reply,
6403 		olo_seqid, resop, FALSE);
6404 	rfs4_dbe_unlock(op->dbe);
6405 
6406 	return (rc);
6407 }
6408 
6409 static rfs4_chkseq_t
6410 rfs4_check_lock_seqid(seqid4 seqid, rfs4_lo_state_t *lp, nfs_resop4 *resop)
6411 {
6412 	rfs4_chkseq_t rc = NFS4_CHKSEQ_OKAY;
6413 
6414 	rfs4_dbe_lock(lp->dbe);
6415 	if (!lp->skip_seqid_check)
6416 		rc = rfs4_check_seqid(lp->seqid, lp->reply,
6417 			seqid, resop, TRUE);
6418 	rfs4_dbe_unlock(lp->dbe);
6419 
6420 	return (rc);
6421 }
6422 
6423 static void
6424 rfs4_op_open(nfs_argop4 *argop, nfs_resop4 *resop,
6425 	    struct svc_req *req, struct compound_state *cs)
6426 {
6427 	OPEN4args *args = &argop->nfs_argop4_u.opopen;
6428 	OPEN4res *resp = &resop->nfs_resop4_u.opopen;
6429 	open_owner4 *owner = &args->owner;
6430 	open_claim_type4 claim = args->claim;
6431 	rfs4_client_t *cp;
6432 	rfs4_openowner_t *oo;
6433 	bool_t create;
6434 	bool_t replay = FALSE;
6435 	int can_reclaim;
6436 
6437 
6438 	if (cs->vp == NULL) {
6439 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
6440 		return;
6441 	}
6442 
6443 	/*
6444 	 * Need to check clientid and lease expiration first based on
6445 	 * error ordering and incrementing sequence id.
6446 	 */
6447 	cp = rfs4_findclient_by_id(owner->clientid, FALSE);
6448 	if (cp == NULL) {
6449 		*cs->statusp = resp->status =
6450 			rfs4_check_clientid(&owner->clientid, 0);
6451 		return;
6452 	}
6453 
6454 	if (rfs4_lease_expired(cp)) {
6455 		rfs4_client_close(cp);
6456 		*cs->statusp = resp->status = NFS4ERR_EXPIRED;
6457 		return;
6458 	}
6459 	can_reclaim = cp->can_reclaim;
6460 
6461 	/*
6462 	 * Find the open_owner for use from this point forward.  Take
6463 	 * care in updating the sequence id based on the type of error
6464 	 * being returned.
6465 	 */
6466 retry:
6467 	create = TRUE;
6468 	oo = rfs4_findopenowner(owner, &create, args->seqid);
6469 	if (oo == NULL) {
6470 		*cs->statusp = resp->status = NFS4ERR_STALE_CLIENTID;
6471 		rfs4_client_rele(cp);
6472 		return;
6473 	}
6474 
6475 	/* Hold off access to the sequence space while the open is done */
6476 	rfs4_sw_enter(&oo->oo_sw);
6477 
6478 	/*
6479 	 * If the open_owner existed before at the server, then check
6480 	 * the sequence id.
6481 	 */
6482 	if (!create && !oo->postpone_confirm) {
6483 		switch (rfs4_check_open_seqid(args->seqid, oo, resop)) {
6484 		case NFS4_CHKSEQ_BAD:
6485 			if ((args->seqid > oo->open_seqid) &&
6486 				oo->need_confirm) {
6487 				rfs4_free_opens(oo, TRUE, FALSE);
6488 				rfs4_sw_exit(&oo->oo_sw);
6489 				rfs4_openowner_rele(oo);
6490 				goto retry;
6491 			}
6492 			resp->status = NFS4ERR_BAD_SEQID;
6493 			goto out;
6494 		case NFS4_CHKSEQ_REPLAY: /* replay of previous request */
6495 			replay = TRUE;
6496 			goto out;
6497 		default:
6498 			break;
6499 		}
6500 
6501 		/*
6502 		 * Sequence was ok and open owner exists
6503 		 * check to see if we have yet to see an
6504 		 * open_confirm.
6505 		 */
6506 		if (oo->need_confirm) {
6507 			rfs4_free_opens(oo, TRUE, FALSE);
6508 			rfs4_sw_exit(&oo->oo_sw);
6509 			rfs4_openowner_rele(oo);
6510 			goto retry;
6511 		}
6512 	}
6513 	/* Grace only applies to regular-type OPENs */
6514 	if (rfs4_clnt_in_grace(cp) &&
6515 	    (claim == CLAIM_NULL || claim == CLAIM_DELEGATE_CUR)) {
6516 		*cs->statusp = resp->status = NFS4ERR_GRACE;
6517 		goto out;
6518 	}
6519 
6520 	/*
6521 	 * If previous state at the server existed then can_reclaim
6522 	 * will be set. If not reply NFS4ERR_NO_GRACE to the
6523 	 * client.
6524 	 */
6525 	if (rfs4_clnt_in_grace(cp) && claim == CLAIM_PREVIOUS && !can_reclaim) {
6526 		*cs->statusp = resp->status = NFS4ERR_NO_GRACE;
6527 		goto out;
6528 	}
6529 
6530 
6531 	/*
6532 	 * Reject the open if the client has missed the grace period
6533 	 */
6534 	if (!rfs4_clnt_in_grace(cp) && claim == CLAIM_PREVIOUS) {
6535 		*cs->statusp = resp->status = NFS4ERR_NO_GRACE;
6536 		goto out;
6537 	}
6538 
6539 	/* Couple of up-front bookkeeping items */
6540 	if (oo->need_confirm) {
6541 		/*
6542 		 * If this is a reclaim OPEN then we should not ask
6543 		 * for a confirmation of the open_owner per the
6544 		 * protocol specification.
6545 		 */
6546 		if (claim == CLAIM_PREVIOUS)
6547 			oo->need_confirm = FALSE;
6548 		else
6549 			resp->rflags |= OPEN4_RESULT_CONFIRM;
6550 	}
6551 	resp->rflags |= OPEN4_RESULT_LOCKTYPE_POSIX;
6552 
6553 	/*
6554 	 * If there is an unshared filesystem mounted on this vnode,
6555 	 * do not allow to open/create in this directory.
6556 	 */
6557 	if (vn_ismntpt(cs->vp)) {
6558 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
6559 		goto out;
6560 	}
6561 
6562 	/*
6563 	 * access must READ, WRITE, or BOTH.  No access is invalid.
6564 	 * deny can be READ, WRITE, BOTH, or NONE.
6565 	 * bits not defined for access/deny are invalid.
6566 	 */
6567 	if (! (args->share_access & OPEN4_SHARE_ACCESS_BOTH) ||
6568 	    (args->share_access & ~OPEN4_SHARE_ACCESS_BOTH) ||
6569 	    (args->share_deny & ~OPEN4_SHARE_DENY_BOTH)) {
6570 		*cs->statusp = resp->status = NFS4ERR_INVAL;
6571 		goto out;
6572 	}
6573 
6574 
6575 	/*
6576 	 * make sure attrset is zero before response is built.
6577 	 */
6578 	resp->attrset = 0;
6579 
6580 	switch (claim) {
6581 	case CLAIM_NULL:
6582 		rfs4_do_opennull(cs, req, args, oo, resp);
6583 	    break;
6584 	case CLAIM_PREVIOUS:
6585 		rfs4_do_openprev(cs, req, args, oo, resp);
6586 	    break;
6587 	case CLAIM_DELEGATE_CUR:
6588 		rfs4_do_opendelcur(cs, req, args, oo, resp);
6589 	    break;
6590 	case CLAIM_DELEGATE_PREV:
6591 		rfs4_do_opendelprev(cs, req, args, oo, resp);
6592 	    break;
6593 	default:
6594 		resp->status = NFS4ERR_INVAL;
6595 		break;
6596 	}
6597 
6598 out:
6599 	rfs4_client_rele(cp);
6600 
6601 	/* Catch sequence id handling here to make it a little easier */
6602 	switch (resp->status) {
6603 	case NFS4ERR_BADXDR:
6604 	case NFS4ERR_BAD_SEQID:
6605 	case NFS4ERR_BAD_STATEID:
6606 	case NFS4ERR_NOFILEHANDLE:
6607 	case NFS4ERR_RESOURCE:
6608 	case NFS4ERR_STALE_CLIENTID:
6609 	case NFS4ERR_STALE_STATEID:
6610 		/*
6611 		 * The protocol states that if any of these errors are
6612 		 * being returned, the sequence id should not be
6613 		 * incremented.  Any other return requires an
6614 		 * increment.
6615 		 */
6616 		break;
6617 	default:
6618 		/* Always update the lease in this case */
6619 		rfs4_update_lease(oo->client);
6620 
6621 		/* Regular response - copy the result */
6622 		if (!replay)
6623 			rfs4_update_open_resp(oo, resop, &cs->fh);
6624 
6625 		/*
6626 		 * REPLAY case: Only if the previous response was OK
6627 		 * do we copy the filehandle.  If not OK, no
6628 		 * filehandle to copy.
6629 		 */
6630 		if (replay == TRUE &&
6631 		    resp->status == NFS4_OK &&
6632 		    oo->reply_fh.nfs_fh4_val) {
6633 			/*
6634 			 * If this is a replay, we must restore the
6635 			 * current filehandle/vp to that of what was
6636 			 * returned originally.  Try our best to do
6637 			 * it.
6638 			 */
6639 			nfs_fh4_fmt_t *fh_fmtp =
6640 				(nfs_fh4_fmt_t *)oo->reply_fh.nfs_fh4_val;
6641 
6642 			cs->exi = checkexport4(&fh_fmtp->fh4_fsid,
6643 				(fid_t *)&fh_fmtp->fh4_xlen, NULL);
6644 
6645 			if (cs->exi == NULL) {
6646 				resp->status = NFS4ERR_STALE;
6647 				goto finish;
6648 			}
6649 
6650 			VN_RELE(cs->vp);
6651 
6652 			cs->vp = nfs4_fhtovp(&oo->reply_fh, cs->exi,
6653 				&resp->status);
6654 
6655 			if (cs->vp == NULL)
6656 				goto finish;
6657 
6658 			nfs_fh4_copy(&oo->reply_fh, &cs->fh);
6659 		}
6660 
6661 		/*
6662 		 * If this was a replay, no need to update the
6663 		 * sequence id. If the open_owner was not created on
6664 		 * this pass, then update.  The first use of an
6665 		 * open_owner will not bump the sequence id.
6666 		 */
6667 		if (replay == FALSE && !create)
6668 			rfs4_update_open_sequence(oo);
6669 		/*
6670 		 * If the client is receiving an error and the
6671 		 * open_owner needs to be confirmed, there is no way
6672 		 * to notify the client of this fact ignoring the fact
6673 		 * that the server has no method of returning a
6674 		 * stateid to confirm.  Therefore, the server needs to
6675 		 * mark this open_owner in a way as to avoid the
6676 		 * sequence id checking the next time the client uses
6677 		 * this open_owner.
6678 		 */
6679 		if (resp->status != NFS4_OK && oo->need_confirm)
6680 			oo->postpone_confirm = TRUE;
6681 		/*
6682 		 * If OK response then clear the postpone flag and
6683 		 * reset the sequence id to keep in sync with the
6684 		 * client.
6685 		 */
6686 		if (resp->status == NFS4_OK && oo->postpone_confirm) {
6687 			oo->postpone_confirm = FALSE;
6688 			oo->open_seqid = args->seqid;
6689 		}
6690 		break;
6691 	}
6692 
6693 finish:
6694 	*cs->statusp = resp->status;
6695 
6696 	rfs4_sw_exit(&oo->oo_sw);
6697 	rfs4_openowner_rele(oo);
6698 }
6699 
6700 /*ARGSUSED*/
6701 void
6702 rfs4_op_open_confirm(nfs_argop4 *argop, nfs_resop4 *resop,
6703 		    struct svc_req *req, struct compound_state *cs)
6704 {
6705 	OPEN_CONFIRM4args *args = &argop->nfs_argop4_u.opopen_confirm;
6706 	OPEN_CONFIRM4res *resp = &resop->nfs_resop4_u.opopen_confirm;
6707 	rfs4_state_t *sp;
6708 	nfsstat4 status;
6709 
6710 	if (cs->vp == NULL) {
6711 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
6712 		return;
6713 	}
6714 
6715 	status = rfs4_get_state(&args->open_stateid, &sp, RFS4_DBS_VALID);
6716 	if (status != NFS4_OK) {
6717 		*cs->statusp = resp->status = status;
6718 		return;
6719 	}
6720 
6721 	/* Ensure specified filehandle matches */
6722 	if (cs->vp != sp->finfo->vp) {
6723 		rfs4_state_rele(sp);
6724 		*cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
6725 		return;
6726 	}
6727 
6728 	/* hold off other access to open_owner while we tinker */
6729 	rfs4_sw_enter(&sp->owner->oo_sw);
6730 
6731 	switch (rfs4_check_stateid_seqid(sp, &args->open_stateid)) {
6732 	case NFS4_CHECK_STATEID_OKAY:
6733 		if (rfs4_check_open_seqid(args->seqid, sp->owner,
6734 			resop) != 0) {
6735 			*cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
6736 			break;
6737 		}
6738 		/*
6739 		 * If it is the appropriate stateid and determined to
6740 		 * be "OKAY" then this means that the stateid does not
6741 		 * need to be confirmed and the client is in error for
6742 		 * sending an OPEN_CONFIRM.
6743 		 */
6744 		*cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
6745 		break;
6746 	case NFS4_CHECK_STATEID_OLD:
6747 		*cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
6748 		break;
6749 	case NFS4_CHECK_STATEID_BAD:
6750 		*cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
6751 		break;
6752 	case NFS4_CHECK_STATEID_EXPIRED:
6753 		*cs->statusp = resp->status = NFS4ERR_EXPIRED;
6754 		break;
6755 	case NFS4_CHECK_STATEID_CLOSED:
6756 		*cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
6757 		break;
6758 	case NFS4_CHECK_STATEID_REPLAY:
6759 		switch (rfs4_check_open_seqid(args->seqid, sp->owner, resop)) {
6760 		case NFS4_CHKSEQ_OKAY:
6761 			/*
6762 			 * This is replayed stateid; if seqid matches
6763 			 * next expected, then client is using wrong seqid.
6764 			 */
6765 			/* fall through */
6766 		case NFS4_CHKSEQ_BAD:
6767 			*cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
6768 			break;
6769 		case NFS4_CHKSEQ_REPLAY:
6770 			/*
6771 			 * Note this case is the duplicate case so
6772 			 * resp->status is already set.
6773 			 */
6774 			*cs->statusp = resp->status;
6775 			rfs4_update_lease(sp->owner->client);
6776 			break;
6777 		}
6778 		break;
6779 	case NFS4_CHECK_STATEID_UNCONFIRMED:
6780 		if (rfs4_check_open_seqid(args->seqid, sp->owner,
6781 			resop) != NFS4_CHKSEQ_OKAY) {
6782 			*cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
6783 			break;
6784 		}
6785 		*cs->statusp = resp->status = NFS4_OK;
6786 
6787 		next_stateid(&sp->stateid);
6788 		resp->open_stateid = sp->stateid.stateid;
6789 		sp->owner->need_confirm = FALSE;
6790 		rfs4_update_lease(sp->owner->client);
6791 		rfs4_update_open_sequence(sp->owner);
6792 		rfs4_update_open_resp(sp->owner, resop, NULL);
6793 		break;
6794 	default:
6795 		ASSERT(FALSE);
6796 		*cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
6797 		break;
6798 	}
6799 	rfs4_sw_exit(&sp->owner->oo_sw);
6800 	rfs4_state_rele(sp);
6801 }
6802 
6803 /*ARGSUSED*/
6804 void
6805 rfs4_op_open_downgrade(nfs_argop4 *argop, nfs_resop4 *resop,
6806 		    struct svc_req *req, struct compound_state *cs)
6807 {
6808 	OPEN_DOWNGRADE4args *args = &argop->nfs_argop4_u.opopen_downgrade;
6809 	OPEN_DOWNGRADE4res *resp = &resop->nfs_resop4_u.opopen_downgrade;
6810 	uint32_t access = args->share_access;
6811 	uint32_t deny = args->share_deny;
6812 	nfsstat4 status;
6813 	rfs4_state_t *sp;
6814 	rfs4_file_t *fp;
6815 
6816 	if (cs->vp == NULL) {
6817 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
6818 		return;
6819 	}
6820 
6821 	status = rfs4_get_state(&args->open_stateid, &sp, RFS4_DBS_VALID);
6822 	if (status != NFS4_OK) {
6823 		*cs->statusp = resp->status = status;
6824 		return;
6825 	}
6826 
6827 	/* Ensure specified filehandle matches */
6828 	if (cs->vp != sp->finfo->vp) {
6829 		rfs4_state_rele(sp);
6830 		*cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
6831 		return;
6832 	}
6833 
6834 	/* hold off other access to open_owner while we tinker */
6835 	rfs4_sw_enter(&sp->owner->oo_sw);
6836 
6837 	switch (rfs4_check_stateid_seqid(sp, &args->open_stateid)) {
6838 	case NFS4_CHECK_STATEID_OKAY:
6839 		if (rfs4_check_open_seqid(args->seqid, sp->owner,
6840 			resop) != NFS4_CHKSEQ_OKAY) {
6841 			*cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
6842 			goto end;
6843 		}
6844 		break;
6845 	case NFS4_CHECK_STATEID_OLD:
6846 		*cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
6847 		goto end;
6848 	case NFS4_CHECK_STATEID_BAD:
6849 		*cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
6850 		goto end;
6851 	case NFS4_CHECK_STATEID_EXPIRED:
6852 		*cs->statusp = resp->status = NFS4ERR_EXPIRED;
6853 		goto end;
6854 	case NFS4_CHECK_STATEID_CLOSED:
6855 		*cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
6856 		goto end;
6857 	case NFS4_CHECK_STATEID_UNCONFIRMED:
6858 		*cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
6859 		goto end;
6860 	case NFS4_CHECK_STATEID_REPLAY:
6861 		/* Check the sequence id for the open owner */
6862 		switch (rfs4_check_open_seqid(args->seqid, sp->owner, resop)) {
6863 		case NFS4_CHKSEQ_OKAY:
6864 			/*
6865 			 * This is replayed stateid; if seqid matches
6866 			 * next expected, then client is using wrong seqid.
6867 			 */
6868 			/* fall through */
6869 		case NFS4_CHKSEQ_BAD:
6870 			*cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
6871 			goto end;
6872 		case NFS4_CHKSEQ_REPLAY:
6873 			/*
6874 			 * Note this case is the duplicate case so
6875 			 * resp->status is already set.
6876 			 */
6877 			*cs->statusp = resp->status;
6878 			rfs4_update_lease(sp->owner->client);
6879 			goto end;
6880 		}
6881 		break;
6882 	default:
6883 		ASSERT(FALSE);
6884 		break;
6885 	}
6886 
6887 	rfs4_dbe_lock(sp->dbe);
6888 	/*
6889 	 * Check that the new access modes and deny modes are valid.
6890 	 * Check that no invalid bits are set.
6891 	 */
6892 	if ((access & ~(OPEN4_SHARE_ACCESS_READ | OPEN4_SHARE_ACCESS_WRITE)) ||
6893 	    (deny & ~(OPEN4_SHARE_DENY_READ | OPEN4_SHARE_DENY_READ))) {
6894 		*cs->statusp = resp->status = NFS4ERR_INVAL;
6895 		rfs4_update_open_sequence(sp->owner);
6896 		rfs4_dbe_unlock(sp->dbe);
6897 		goto end;
6898 	}
6899 
6900 	/*
6901 	 * The new modes must be a subset of the current modes and
6902 	 * the access must specify at least one mode. To test that
6903 	 * the new mode is a subset of the current modes we bitwise
6904 	 * AND them together and check that the result equals the new
6905 	 * mode. For example:
6906 	 * New mode, access == R and current mode, sp->share_access  == RW
6907 	 * access & sp->share_access == R == access, so the new access mode
6908 	 * is valid. Consider access == RW, sp->share_access = R
6909 	 * access & sp->share_access == R != access, so the new access mode
6910 	 * is invalid.
6911 	 */
6912 	if ((access & sp->share_access) != access ||
6913 	    (deny & sp->share_deny) != deny ||
6914 	    (access &
6915 	    (OPEN4_SHARE_ACCESS_READ | OPEN4_SHARE_ACCESS_WRITE)) == 0) {
6916 		*cs->statusp = resp->status = NFS4ERR_INVAL;
6917 		rfs4_update_open_sequence(sp->owner);
6918 		rfs4_dbe_unlock(sp->dbe);
6919 		goto end;
6920 	}
6921 
6922 	/*
6923 	 * Release any share locks associated with this stateID.
6924 	 * Strictly speaking, this violates the spec because the
6925 	 * spec effectively requires that open downgrade be atomic.
6926 	 * At present, fs_shrlock does not have this capability.
6927 	 */
6928 	rfs4_dbe_unlock(sp->dbe);
6929 	rfs4_unshare(sp);
6930 	rfs4_dbe_lock(sp->dbe);
6931 
6932 	fp = sp->finfo;
6933 	rfs4_dbe_lock(fp->dbe);
6934 
6935 	/*
6936 	 * If the current mode has deny read and the new mode
6937 	 * does not, decrement the number of deny read mode bits
6938 	 * and if it goes to zero turn off the deny read bit
6939 	 * on the file.
6940 	 */
6941 	if ((sp->share_deny & OPEN4_SHARE_DENY_READ) &&
6942 	    (deny & OPEN4_SHARE_DENY_READ) == 0) {
6943 		fp->deny_read--;
6944 		if (fp->deny_read == 0)
6945 			fp->share_deny &= ~OPEN4_SHARE_DENY_READ;
6946 	}
6947 
6948 	/*
6949 	 * If the current mode has deny write and the new mode
6950 	 * does not, decrement the number of deny write mode bits
6951 	 * and if it goes to zero turn off the deny write bit
6952 	 * on the file.
6953 	 */
6954 	if ((sp->share_deny & OPEN4_SHARE_DENY_WRITE) &&
6955 	    (deny & OPEN4_SHARE_DENY_WRITE) == 0) {
6956 		fp->deny_write--;
6957 		if (fp->deny_write == 0)
6958 			fp->share_deny &= ~OPEN4_SHARE_DENY_WRITE;
6959 	}
6960 
6961 	/*
6962 	 * If the current mode has access read and the new mode
6963 	 * does not, decrement the number of access read mode bits
6964 	 * and if it goes to zero turn off the access read bit
6965 	 * on the file.
6966 	 */
6967 	if ((sp->share_access & OPEN4_SHARE_ACCESS_READ) &&
6968 	    (access & OPEN4_SHARE_ACCESS_READ) == 0) {
6969 		fp->access_read--;
6970 		if (fp->access_read == 0)
6971 			fp->share_access &= ~OPEN4_SHARE_ACCESS_READ;
6972 	}
6973 
6974 	/*
6975 	 * If the current mode has access write and the new mode
6976 	 * does not, decrement the number of access write mode bits
6977 	 * and if it goes to zero turn off the access write bit
6978 	 * on the file.
6979 	 */
6980 	if ((sp->share_access & OPEN4_SHARE_ACCESS_WRITE) &&
6981 	    (access & OPEN4_SHARE_ACCESS_WRITE) == 0) {
6982 		fp->access_write--;
6983 		if (fp->access_write == 0)
6984 			fp->share_deny &= ~OPEN4_SHARE_ACCESS_WRITE;
6985 	}
6986 
6987 	/* Set the new access and deny modes */
6988 	sp->share_access = access;
6989 	sp->share_deny = deny;
6990 	/* Check that the file is still accessible */
6991 	ASSERT(fp->share_access);
6992 
6993 	rfs4_dbe_unlock(fp->dbe);
6994 
6995 	rfs4_dbe_unlock(sp->dbe);
6996 	if ((status = rfs4_share(sp)) != NFS4_OK) {
6997 		*cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
6998 		rfs4_update_open_sequence(sp->owner);
6999 		goto end;
7000 	}
7001 
7002 	rfs4_dbe_lock(sp->dbe);
7003 
7004 	/* Update the stateid */
7005 	next_stateid(&sp->stateid);
7006 	resp->open_stateid = sp->stateid.stateid;
7007 
7008 	rfs4_dbe_unlock(sp->dbe);
7009 
7010 	*cs->statusp = resp->status = NFS4_OK;
7011 	/* Update the lease */
7012 	rfs4_update_lease(sp->owner->client);
7013 	/* And the sequence */
7014 	rfs4_update_open_sequence(sp->owner);
7015 	rfs4_update_open_resp(sp->owner, resop, NULL);
7016 
7017 end:
7018 	rfs4_sw_exit(&sp->owner->oo_sw);
7019 	rfs4_state_rele(sp);
7020 }
7021 
7022 /*
7023  * The logic behind this function is detailed in the NFSv4 RFC in the
7024  * SETCLIENTID operation description under IMPLEMENTATION.  Refer to
7025  * that section for explicit guidance to server behavior for
7026  * SETCLIENTID.
7027  */
7028 void
7029 rfs4_op_setclientid(nfs_argop4 *argop, nfs_resop4 *resop,
7030 		    struct svc_req *req, struct compound_state *cs)
7031 {
7032 	SETCLIENTID4args *args = &argop->nfs_argop4_u.opsetclientid;
7033 	SETCLIENTID4res *res = &resop->nfs_resop4_u.opsetclientid;
7034 	rfs4_client_t *cp, *newcp, *cp_confirmed, *cp_unconfirmed;
7035 	bool_t create = TRUE;
7036 	char *addr, *netid;
7037 	int len;
7038 
7039 retry:
7040 	newcp = cp_confirmed = cp_unconfirmed = NULL;
7041 
7042 	/*
7043 	 * In search of an EXISTING client matching the incoming
7044 	 * request to establish a new client identifier at the server
7045 	 */
7046 	create = TRUE;
7047 	cp = rfs4_findclient(&args->client, &create, NULL);
7048 
7049 	/* Should never happen */
7050 	ASSERT(cp != NULL);
7051 
7052 	if (cp == NULL) {
7053 		*cs->statusp = res->status = NFS4ERR_SERVERFAULT;
7054 		return;
7055 	}
7056 
7057 	/*
7058 	 * Easiest case. Client identifier is newly created and is
7059 	 * unconfirmed.  Also note that for this case, no other
7060 	 * entries exist for the client identifier.  Nothing else to
7061 	 * check.  Just setup the response and respond.
7062 	 */
7063 	if (create) {
7064 		*cs->statusp = res->status = NFS4_OK;
7065 		res->SETCLIENTID4res_u.resok4.clientid = cp->clientid;
7066 		res->SETCLIENTID4res_u.resok4.setclientid_confirm =
7067 							cp->confirm_verf;
7068 		/* Setup callback information; CB_NULL confirmation later */
7069 		rfs4_client_setcb(cp, &args->callback, args->callback_ident);
7070 
7071 		rfs4_client_rele(cp);
7072 		return;
7073 	}
7074 
7075 	/*
7076 	 * An existing, confirmed client may exist but it may not have
7077 	 * been active for at least one lease period.  If so, then
7078 	 * "close" the client and create a new client identifier
7079 	 */
7080 	if (rfs4_lease_expired(cp)) {
7081 		rfs4_client_close(cp);
7082 		goto retry;
7083 	}
7084 
7085 	if (cp->need_confirm == TRUE)
7086 		cp_unconfirmed = cp;
7087 	else
7088 		cp_confirmed = cp;
7089 
7090 	cp = NULL;
7091 
7092 	/*
7093 	 * We have a confirmed client, now check for an
7094 	 * unconfimred entry
7095 	 */
7096 	if (cp_confirmed) {
7097 		/* If creds don't match then client identifier is inuse */
7098 		if (!creds_ok(cp_confirmed->cr_set, req, cs)) {
7099 			rfs4_cbinfo_t *cbp;
7100 			/*
7101 			 * Some one else has established this client
7102 			 * id. Try and say * who they are. We will use
7103 			 * the call back address supplied by * the
7104 			 * first client.
7105 			 */
7106 			*cs->statusp = res->status = NFS4ERR_CLID_INUSE;
7107 
7108 			addr = netid = NULL;
7109 
7110 			cbp = &cp_confirmed->cbinfo;
7111 			if (cbp->cb_callback.cb_location.r_addr &&
7112 			    cbp->cb_callback.cb_location.r_netid) {
7113 				cb_client4 *cbcp = &cbp->cb_callback;
7114 
7115 				len = strlen(cbcp->cb_location.r_addr)+1;
7116 				addr = kmem_alloc(len, KM_SLEEP);
7117 				bcopy(cbcp->cb_location.r_addr, addr, len);
7118 				len = strlen(cbcp->cb_location.r_netid)+1;
7119 				netid = kmem_alloc(len, KM_SLEEP);
7120 				bcopy(cbcp->cb_location.r_netid, netid, len);
7121 			}
7122 
7123 			res->SETCLIENTID4res_u.client_using.r_addr = addr;
7124 			res->SETCLIENTID4res_u.client_using.r_netid = netid;
7125 
7126 			rfs4_client_rele(cp_confirmed);
7127 		}
7128 
7129 		/*
7130 		 * Confirmed, creds match, and verifier matches; must
7131 		 * be an update of the callback info
7132 		 */
7133 		if (cp_confirmed->nfs_client.verifier ==
7134 						args->client.verifier) {
7135 			/* Setup callback information */
7136 			rfs4_client_setcb(cp_confirmed, &args->callback,
7137 						args->callback_ident);
7138 
7139 			/* everything okay -- move ahead */
7140 			*cs->statusp = res->status = NFS4_OK;
7141 			res->SETCLIENTID4res_u.resok4.clientid =
7142 				cp_confirmed->clientid;
7143 
7144 			/* update the confirm_verifier and return it */
7145 			rfs4_client_scv_next(cp_confirmed);
7146 			res->SETCLIENTID4res_u.resok4.setclientid_confirm =
7147 						cp_confirmed->confirm_verf;
7148 
7149 			rfs4_client_rele(cp_confirmed);
7150 			return;
7151 		}
7152 
7153 		/*
7154 		 * Creds match but the verifier doesn't.  Must search
7155 		 * for an unconfirmed client that would be replaced by
7156 		 * this request.
7157 		 */
7158 		create = FALSE;
7159 		cp_unconfirmed = rfs4_findclient(&args->client, &create,
7160 						cp_confirmed);
7161 	}
7162 
7163 	/*
7164 	 * At this point, we have taken care of the brand new client
7165 	 * struct, INUSE case, update of an existing, and confirmed
7166 	 * client struct.
7167 	 */
7168 
7169 	/*
7170 	 * check to see if things have changed while we originally
7171 	 * picked up the client struct.  If they have, then return and
7172 	 * retry the processing of this SETCLIENTID request.
7173 	 */
7174 	if (cp_unconfirmed) {
7175 		rfs4_dbe_lock(cp_unconfirmed->dbe);
7176 		if (!cp_unconfirmed->need_confirm) {
7177 			rfs4_dbe_unlock(cp_unconfirmed->dbe);
7178 			rfs4_client_rele(cp_unconfirmed);
7179 			if (cp_confirmed)
7180 				rfs4_client_rele(cp_confirmed);
7181 			goto retry;
7182 		}
7183 		/* do away with the old unconfirmed one */
7184 		rfs4_dbe_invalidate(cp_unconfirmed->dbe);
7185 		rfs4_dbe_unlock(cp_unconfirmed->dbe);
7186 		rfs4_client_rele(cp_unconfirmed);
7187 		cp_unconfirmed = NULL;
7188 	}
7189 
7190 	/*
7191 	 * This search will temporarily hide the confirmed client
7192 	 * struct while a new client struct is created as the
7193 	 * unconfirmed one.
7194 	 */
7195 	create = TRUE;
7196 	newcp = rfs4_findclient(&args->client, &create, cp_confirmed);
7197 
7198 	ASSERT(newcp != NULL);
7199 
7200 	if (newcp == NULL) {
7201 		*cs->statusp = res->status = NFS4ERR_SERVERFAULT;
7202 		rfs4_client_rele(cp_confirmed);
7203 		return;
7204 	}
7205 
7206 	/*
7207 	 * If one was not created, then a similar request must be in
7208 	 * process so release and start over with this one
7209 	 */
7210 	if (create != TRUE) {
7211 		rfs4_client_rele(newcp);
7212 		if (cp_confirmed)
7213 			rfs4_client_rele(cp_confirmed);
7214 		goto retry;
7215 	}
7216 
7217 	*cs->statusp = res->status = NFS4_OK;
7218 	res->SETCLIENTID4res_u.resok4.clientid = newcp->clientid;
7219 	res->SETCLIENTID4res_u.resok4.setclientid_confirm =
7220 							newcp->confirm_verf;
7221 	/* Setup callback information; CB_NULL confirmation later */
7222 	rfs4_client_setcb(newcp, &args->callback,
7223 				args->callback_ident);
7224 
7225 	newcp->cp_confirmed = cp_confirmed;
7226 
7227 	rfs4_client_rele(newcp);
7228 }
7229 
7230 /*ARGSUSED*/
7231 void
7232 rfs4_op_setclientid_confirm(nfs_argop4 *argop, nfs_resop4 *resop,
7233 			    struct svc_req *req, struct compound_state *cs)
7234 {
7235 	SETCLIENTID_CONFIRM4args *args =
7236 		&argop->nfs_argop4_u.opsetclientid_confirm;
7237 	SETCLIENTID_CONFIRM4res *res =
7238 		&resop->nfs_resop4_u.opsetclientid_confirm;
7239 	rfs4_client_t *cp, *cptoclose = NULL;
7240 
7241 	*cs->statusp = res->status = NFS4_OK;
7242 
7243 	cp = rfs4_findclient_by_id(args->clientid, TRUE);
7244 
7245 	if (cp == NULL) {
7246 		*cs->statusp = res->status =
7247 			rfs4_check_clientid(&args->clientid, 1);
7248 		return;
7249 	}
7250 
7251 	if (!creds_ok(cp, req, cs)) {
7252 		*cs->statusp = res->status = NFS4ERR_CLID_INUSE;
7253 		rfs4_client_rele(cp);
7254 		return;
7255 	}
7256 
7257 	/* If the verifier doesn't match, the record doesn't match */
7258 	if (cp->confirm_verf != args->setclientid_confirm) {
7259 		*cs->statusp = res->status = NFS4ERR_STALE_CLIENTID;
7260 		rfs4_client_rele(cp);
7261 		return;
7262 	}
7263 
7264 	rfs4_dbe_lock(cp->dbe);
7265 	cp->need_confirm = FALSE;
7266 	if (cp->cp_confirmed) {
7267 		cptoclose = cp->cp_confirmed;
7268 		cptoclose->ss_remove = 1;
7269 		cp->cp_confirmed = NULL;
7270 	}
7271 
7272 	/*
7273 	 * Record clientid in stable storage
7274 	 */
7275 	rfs4_ss_clid(cp, req);
7276 
7277 	rfs4_dbe_unlock(cp->dbe);
7278 
7279 	if (cptoclose)
7280 		/* don't need to rele, client_close does it */
7281 		rfs4_client_close(cptoclose);
7282 
7283 	/* If needed, initiate CB_NULL call for callback path */
7284 	rfs4_deleg_cb_check(cp);
7285 	rfs4_update_lease(cp);
7286 
7287 	/*
7288 	 * Update the client's associated server instance, if it's changed
7289 	 * since the client was created.
7290 	 */
7291 	if (rfs4_servinst(cp) != rfs4_cur_servinst)
7292 		rfs4_servinst_assign(cp, rfs4_cur_servinst);
7293 
7294 	/*
7295 	 * Check to see if client can perform reclaims
7296 	 */
7297 	rfs4_ss_chkclid(cp);
7298 
7299 	rfs4_client_rele(cp);
7300 }
7301 
7302 
7303 /*ARGSUSED*/
7304 void
7305 rfs4_op_close(nfs_argop4 *argop, nfs_resop4 *resop,
7306 	    struct svc_req *req, struct compound_state *cs)
7307 {
7308 	/* XXX Currently not using req arg */
7309 	CLOSE4args *args = &argop->nfs_argop4_u.opclose;
7310 	CLOSE4res *resp = &resop->nfs_resop4_u.opclose;
7311 	rfs4_state_t *sp;
7312 	nfsstat4 status;
7313 
7314 	if (cs->vp == NULL) {
7315 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
7316 		return;
7317 	}
7318 
7319 	status = rfs4_get_state(&args->open_stateid, &sp, RFS4_DBS_INVALID);
7320 	if (status != NFS4_OK) {
7321 		*cs->statusp = resp->status = status;
7322 		return;
7323 	}
7324 
7325 	/* Ensure specified filehandle matches */
7326 	if (cs->vp != sp->finfo->vp) {
7327 		rfs4_state_rele(sp);
7328 		*cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7329 		return;
7330 	}
7331 
7332 	/* hold off other access to open_owner while we tinker */
7333 	rfs4_sw_enter(&sp->owner->oo_sw);
7334 
7335 	switch (rfs4_check_stateid_seqid(sp, &args->open_stateid)) {
7336 	case NFS4_CHECK_STATEID_OKAY:
7337 		if (rfs4_check_open_seqid(args->seqid, sp->owner,
7338 			resop) != NFS4_CHKSEQ_OKAY) {
7339 			*cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
7340 			goto end;
7341 		}
7342 		break;
7343 	case NFS4_CHECK_STATEID_OLD:
7344 		*cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
7345 		goto end;
7346 	case NFS4_CHECK_STATEID_BAD:
7347 		*cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7348 		goto end;
7349 	case NFS4_CHECK_STATEID_EXPIRED:
7350 		*cs->statusp = resp->status = NFS4ERR_EXPIRED;
7351 		goto end;
7352 	case NFS4_CHECK_STATEID_CLOSED:
7353 		*cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
7354 		goto end;
7355 	case NFS4_CHECK_STATEID_UNCONFIRMED:
7356 		*cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7357 		goto end;
7358 	case NFS4_CHECK_STATEID_REPLAY:
7359 		/* Check the sequence id for the open owner */
7360 		switch (rfs4_check_open_seqid(args->seqid, sp->owner, resop)) {
7361 		case NFS4_CHKSEQ_OKAY:
7362 			/*
7363 			 * This is replayed stateid; if seqid matches
7364 			 * next expected, then client is using wrong seqid.
7365 			 */
7366 			/* FALL THROUGH */
7367 		case NFS4_CHKSEQ_BAD:
7368 			*cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
7369 			goto end;
7370 		case NFS4_CHKSEQ_REPLAY:
7371 			/*
7372 			 * Note this case is the duplicate case so
7373 			 * resp->status is already set.
7374 			 */
7375 			*cs->statusp = resp->status;
7376 			rfs4_update_lease(sp->owner->client);
7377 			goto end;
7378 		}
7379 		break;
7380 	default:
7381 		ASSERT(FALSE);
7382 		break;
7383 	}
7384 
7385 	rfs4_dbe_lock(sp->dbe);
7386 
7387 	/* Update the stateid. */
7388 	next_stateid(&sp->stateid);
7389 	resp->open_stateid = sp->stateid.stateid;
7390 
7391 	rfs4_dbe_unlock(sp->dbe);
7392 
7393 	rfs4_update_lease(sp->owner->client);
7394 	rfs4_update_open_sequence(sp->owner);
7395 	rfs4_update_open_resp(sp->owner, resop, NULL);
7396 
7397 	rfs4_state_close(sp, FALSE, FALSE, cs->cr);
7398 
7399 	*cs->statusp = resp->status = status;
7400 
7401 end:
7402 	rfs4_sw_exit(&sp->owner->oo_sw);
7403 	rfs4_state_rele(sp);
7404 }
7405 
7406 /*
7407  * Manage the counts on the file struct and close all file locks
7408  */
7409 /*ARGSUSED*/
7410 void
7411 rfs4_release_share_lock_state(rfs4_state_t *sp, cred_t *cr,
7412 	bool_t close_of_client)
7413 {
7414 	rfs4_file_t *fp = sp->finfo;
7415 	rfs4_lo_state_t *lsp;
7416 	struct shrlock shr;
7417 	struct shr_locowner shr_loco;
7418 	int fflags, s_access, s_deny;
7419 
7420 	fflags = s_access = s_deny = 0;
7421 	/*
7422 	 * Decrement the count for each access and deny bit that this
7423 	 * state has contributed to the file. If the file counts go to zero
7424 	 * clear the appropriate bit in the appropriate mask.
7425 	 */
7426 
7427 	if (sp->share_access & OPEN4_SHARE_ACCESS_READ) {
7428 		fp->access_read--;
7429 		fflags |= FREAD;
7430 		s_access |= F_RDACC;
7431 		if (fp->access_read == 0)
7432 			fp->share_access &= ~OPEN4_SHARE_ACCESS_READ;
7433 	}
7434 	if (sp->share_access & OPEN4_SHARE_ACCESS_WRITE) {
7435 		fp->access_write--;
7436 		fflags |= FWRITE;
7437 		s_access |= F_WRACC;
7438 		if (fp->access_write == 0)
7439 			fp->share_access &= ~OPEN4_SHARE_ACCESS_WRITE;
7440 	}
7441 	if (sp->share_deny & OPEN4_SHARE_DENY_READ) {
7442 		fp->deny_read--;
7443 		s_deny |= F_RDDNY;
7444 		if (fp->deny_read == 0)
7445 			fp->share_deny &= ~OPEN4_SHARE_DENY_READ;
7446 	}
7447 	if (sp->share_deny & OPEN4_SHARE_DENY_WRITE) {
7448 		fp->deny_write--;
7449 		s_deny |= F_WRDNY;
7450 		if (fp->deny_write == 0)
7451 			fp->share_deny &= ~OPEN4_SHARE_DENY_WRITE;
7452 	}
7453 
7454 	/*
7455 	 * If this call is part of the larger closing down of client
7456 	 * state then it is just easier to release all locks
7457 	 * associated with this client instead of going through each
7458 	 * individual file and cleaning locks there.
7459 	 */
7460 	if (close_of_client) {
7461 		if (sp->owner->client->unlksys_completed == FALSE &&
7462 		    sp->lockownerlist.next->lsp != NULL &&
7463 			sp->owner->client->sysidt != LM_NOSYSID) {
7464 			/* Is the PxFS kernel module loaded? */
7465 			if (lm_remove_file_locks != NULL) {
7466 				int new_sysid;
7467 
7468 				/* Encode the cluster nodeid in new sysid */
7469 				new_sysid = sp->owner->client->sysidt;
7470 				lm_set_nlmid_flk(&new_sysid);
7471 
7472 				/*
7473 				 * This PxFS routine removes file locks for a
7474 				 * client over all nodes of a cluster.
7475 				 */
7476 				NFS4_DEBUG(rfs4_debug, (CE_NOTE,
7477 				    "lm_remove_file_locks(sysid=0x%x)\n",
7478 				    new_sysid));
7479 				(*lm_remove_file_locks)(new_sysid);
7480 			} else {
7481 				struct flock64 flk;
7482 
7483 				/* Release all locks for this client */
7484 				flk.l_type = F_UNLKSYS;
7485 				flk.l_whence = 0;
7486 				flk.l_start = 0;
7487 				flk.l_len = 0;
7488 				flk.l_sysid = sp->owner->client->sysidt;
7489 				flk.l_pid = 0;
7490 				(void) VOP_FRLOCK(sp->finfo->vp, F_SETLK, &flk,
7491 				    F_REMOTELOCK | FREAD | FWRITE,
7492 				    (u_offset_t)0, NULL, CRED());
7493 			}
7494 
7495 			sp->owner->client->unlksys_completed = TRUE;
7496 		}
7497 	}
7498 
7499 	/*
7500 	 * Release all locks on this file by this lock owner or at
7501 	 * least mark the locks as having been released
7502 	 */
7503 	for (lsp = sp->lockownerlist.next->lsp; lsp != NULL;
7504 		lsp = lsp->lockownerlist.next->lsp) {
7505 
7506 		lsp->locks_cleaned = TRUE;
7507 
7508 		/* Was this already taken care of above? */
7509 		if (!close_of_client &&
7510 		    sp->owner->client->sysidt != LM_NOSYSID)
7511 			(void) cleanlocks(sp->finfo->vp, lsp->locker->pid,
7512 				lsp->locker->client->sysidt);
7513 	}
7514 
7515 	/*
7516 	 * Release any shrlocks associated with this open state ID.
7517 	 * This must be done before the rfs4_state gets marked closed.
7518 	 */
7519 	if (sp->owner->client->sysidt != LM_NOSYSID) {
7520 		shr.s_access = s_access;
7521 		shr.s_deny = s_deny;
7522 		shr.s_pid = rfs4_dbe_getid(sp->owner->dbe);
7523 		shr.s_sysid = sp->owner->client->sysidt;
7524 		shr_loco.sl_pid = shr.s_pid;
7525 		shr_loco.sl_id = shr.s_sysid;
7526 		shr.s_owner = (caddr_t)&shr_loco;
7527 		shr.s_own_len = sizeof (shr_loco);
7528 		(void) vop_shrlock(sp->finfo->vp, F_UNSHARE, &shr, fflags);
7529 	}
7530 }
7531 
7532 /*
7533  * lock_denied: Fill in a LOCK4deneid structure given an flock64 structure.
7534  */
7535 static nfsstat4
7536 lock_denied(LOCK4denied *dp, struct flock64 *flk)
7537 {
7538 	rfs4_lockowner_t *lo;
7539 	rfs4_client_t *cp;
7540 	uint32_t len;
7541 
7542 	lo = rfs4_findlockowner_by_pid(flk->l_pid);
7543 	if (lo != NULL) {
7544 		cp = lo->client;
7545 		if (rfs4_lease_expired(cp)) {
7546 			rfs4_lockowner_rele(lo);
7547 			rfs4_dbe_hold(cp->dbe);
7548 			rfs4_client_close(cp);
7549 			return (NFS4ERR_EXPIRED);
7550 		}
7551 		dp->owner.clientid = lo->owner.clientid;
7552 		len = lo->owner.owner_len;
7553 		dp->owner.owner_val = kmem_alloc(len, KM_SLEEP);
7554 		bcopy(lo->owner.owner_val, dp->owner.owner_val, len);
7555 		dp->owner.owner_len = len;
7556 		rfs4_lockowner_rele(lo);
7557 		goto finish;
7558 	}
7559 
7560 	/*
7561 	 * Its not a NFS4 lock. We take advantage that the upper 32 bits
7562 	 * of the client id contain the boot time for a NFS4 lock. So we
7563 	 * fabricate and identity by setting clientid to the sysid, and
7564 	 * the lock owner to the pid.
7565 	 */
7566 	dp->owner.clientid = flk->l_sysid;
7567 	len = sizeof (pid_t);
7568 	dp->owner.owner_len = len;
7569 	dp->owner.owner_val = kmem_alloc(len, KM_SLEEP);
7570 	bcopy(&flk->l_pid, dp->owner.owner_val, len);
7571 finish:
7572 	dp->offset = flk->l_start;
7573 	dp->length = flk->l_len;
7574 
7575 	if (flk->l_type == F_RDLCK)
7576 		dp->locktype = READ_LT;
7577 	else if (flk->l_type == F_WRLCK)
7578 		dp->locktype = WRITE_LT;
7579 	else
7580 		return (NFS4ERR_INVAL);	/* no mapping from POSIX ltype to v4 */
7581 
7582 	return (NFS4_OK);
7583 }
7584 
7585 static int
7586 setlock(vnode_t *vp, struct flock64 *flock, int flag, cred_t *cred)
7587 {
7588 	int error;
7589 	struct flock64 flk;
7590 	int i;
7591 	clock_t delaytime;
7592 
7593 retry:
7594 	delaytime = MSEC_TO_TICK_ROUNDUP(rfs4_lock_delay);
7595 
7596 	for (i = 0; i < rfs4_maxlock_tries; i++) {
7597 		LOCK_PRINT(rfs4_debug, "setlock", F_SETLK, flock);
7598 		error = VOP_FRLOCK(vp, F_SETLK,
7599 				flock, flag, (u_offset_t)0, NULL, cred);
7600 
7601 		if (error != EAGAIN && error != EACCES)
7602 			break;
7603 
7604 		if (i < rfs4_maxlock_tries - 1) {
7605 			delay(delaytime);
7606 			delaytime *= 2;
7607 		}
7608 	}
7609 
7610 	if (error == EAGAIN || error == EACCES) {
7611 		/* Get the owner of the lock */
7612 		flk = *flock;
7613 		LOCK_PRINT(rfs4_debug, "setlock", F_GETLK, &flk);
7614 		if (VOP_FRLOCK(vp, F_GETLK,
7615 			    &flk,  flag, (u_offset_t)0, NULL, cred) == 0) {
7616 			if (flk.l_type == F_UNLCK) {
7617 				/* No longer locked, retry */
7618 				goto retry;
7619 			}
7620 			*flock = flk;
7621 			LOCK_PRINT(rfs4_debug, "setlock(blocking lock)",
7622 				F_GETLK, &flk);
7623 		}
7624 	}
7625 
7626 	return (error);
7627 }
7628 
7629 /*ARGSUSED*/
7630 static nfsstat4
7631 rfs4_do_lock(rfs4_lo_state_t *lp, nfs_lock_type4 locktype,
7632 	    seqid4 seqid, offset4 offset,
7633 	    length4 length, cred_t *cred, nfs_resop4 *resop)
7634 {
7635 	nfsstat4 status;
7636 	rfs4_lockowner_t *lo = lp->locker;
7637 	rfs4_state_t *sp = lp->state;
7638 	struct flock64 flock;
7639 	int16_t ltype;
7640 	int flag;
7641 	int error;
7642 	sysid_t sysid;
7643 	LOCK4res *lres;
7644 
7645 	if (rfs4_lease_expired(lo->client)) {
7646 		return (NFS4ERR_EXPIRED);
7647 	}
7648 
7649 	if ((status = rfs4_client_sysid(lo->client, &sysid)) != NFS4_OK)
7650 		return (status);
7651 
7652 	/* Check for zero length. To lock to end of file use all ones for V4 */
7653 	if (length == 0)
7654 		return (NFS4ERR_INVAL);
7655 	else if (length == (length4)(~0))
7656 		length = 0;		/* Posix to end of file  */
7657 
7658 retry:
7659 	rfs4_dbe_lock(sp->dbe);
7660 
7661 
7662 	if (resop->resop != OP_LOCKU) {
7663 		switch (locktype) {
7664 		case READ_LT:
7665 		case READW_LT:
7666 			if ((sp->share_access
7667 			    & OPEN4_SHARE_ACCESS_READ) == 0) {
7668 				rfs4_dbe_unlock(sp->dbe);
7669 
7670 				return (NFS4ERR_OPENMODE);
7671 			}
7672 			ltype = F_RDLCK;
7673 			break;
7674 		case WRITE_LT:
7675 		case WRITEW_LT:
7676 			if ((sp->share_access
7677 			    & OPEN4_SHARE_ACCESS_WRITE) == 0) {
7678 				rfs4_dbe_unlock(sp->dbe);
7679 
7680 				return (NFS4ERR_OPENMODE);
7681 			}
7682 			ltype = F_WRLCK;
7683 			break;
7684 		}
7685 	} else
7686 		ltype = F_UNLCK;
7687 
7688 	flock.l_type = ltype;
7689 	flock.l_whence = 0;		/* SEEK_SET */
7690 	flock.l_start = offset;
7691 	flock.l_len = length;
7692 	flock.l_sysid = sysid;
7693 	flock.l_pid = lp->locker->pid;
7694 
7695 	/* Note that length4 is uint64_t but l_len and l_start are off64_t */
7696 	if (flock.l_len < 0 || flock.l_start < 0) {
7697 		rfs4_dbe_unlock(sp->dbe);
7698 		return (NFS4ERR_INVAL);
7699 	}
7700 
7701 	/*
7702 	 * N.B. FREAD has the same value as OPEN4_SHARE_ACCESS_READ and
7703 	 * FWRITE has the same value as OPEN4_SHARE_ACCESS_WRITE.
7704 	 */
7705 	flag = (int)sp->share_access | F_REMOTELOCK;
7706 
7707 	error = setlock(sp->finfo->vp, &flock, flag, cred);
7708 	if (error == 0) {
7709 		rfs4_dbe_lock(lp->dbe);
7710 		next_stateid(&lp->lockid);
7711 		rfs4_dbe_unlock(lp->dbe);
7712 	}
7713 
7714 	rfs4_dbe_unlock(sp->dbe);
7715 
7716 	/*
7717 	 * N.B. We map error values to nfsv4 errors. This is differrent
7718 	 * than puterrno4 routine.
7719 	 */
7720 	switch (error) {
7721 	case 0:
7722 		status = NFS4_OK;
7723 		break;
7724 	case EAGAIN:
7725 	case EACCES:		/* Old value */
7726 		/* Can only get here if op is OP_LOCK */
7727 		ASSERT(resop->resop == OP_LOCK);
7728 		lres = &resop->nfs_resop4_u.oplock;
7729 		status = NFS4ERR_DENIED;
7730 		if (lock_denied(&lres->LOCK4res_u.denied, &flock)
7731 			== NFS4ERR_EXPIRED)
7732 			goto retry;
7733 		break;
7734 	case ENOLCK:
7735 		status = NFS4ERR_DELAY;
7736 		break;
7737 	case EOVERFLOW:
7738 		status = NFS4ERR_INVAL;
7739 		break;
7740 	case EINVAL:
7741 		status = NFS4ERR_NOTSUPP;
7742 		break;
7743 	default:
7744 		cmn_err(CE_WARN, "rfs4_do_lock: unexpected errno (%d)",
7745 			error);
7746 		status = NFS4ERR_SERVERFAULT;
7747 		break;
7748 	}
7749 
7750 	return (status);
7751 }
7752 
7753 /*ARGSUSED*/
7754 void
7755 rfs4_op_lock(nfs_argop4 *argop, nfs_resop4 *resop,
7756 	    struct svc_req *req, struct compound_state *cs)
7757 {
7758 	/* XXX Currently not using req arg */
7759 	LOCK4args *args = &argop->nfs_argop4_u.oplock;
7760 	LOCK4res *resp = &resop->nfs_resop4_u.oplock;
7761 	nfsstat4 status;
7762 	stateid4 *stateid;
7763 	rfs4_lockowner_t *lo;
7764 	rfs4_client_t *cp;
7765 	rfs4_state_t *sp = NULL;
7766 	rfs4_lo_state_t *lsp = NULL;
7767 	bool_t ls_sw_held = FALSE;
7768 	bool_t create = TRUE;
7769 	bool_t lcreate = TRUE;
7770 	bool_t dup_lock = FALSE;
7771 	int rc;
7772 
7773 	if (cs->vp == NULL) {
7774 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
7775 		return;
7776 	}
7777 
7778 	if (args->locker.new_lock_owner) {
7779 		/* Create a new lockowner for this instance */
7780 		open_to_lock_owner4 *olo = &args->locker.locker4_u.open_owner;
7781 
7782 		NFS4_DEBUG(rfs4_debug, (CE_NOTE, "Creating new lock owner"));
7783 
7784 		stateid = &olo->open_stateid;
7785 		status = rfs4_get_state(stateid, &sp, RFS4_DBS_VALID);
7786 		if (status != NFS4_OK) {
7787 			NFS4_DEBUG(rfs4_debug,
7788 				(CE_NOTE, "Get state failed in lock %d",
7789 				status));
7790 			*cs->statusp = resp->status = status;
7791 			return;
7792 		}
7793 
7794 		/* Ensure specified filehandle matches */
7795 		if (cs->vp != sp->finfo->vp) {
7796 			rfs4_state_rele(sp);
7797 			*cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7798 			return;
7799 		}
7800 
7801 		/* hold off other access to open_owner while we tinker */
7802 		rfs4_sw_enter(&sp->owner->oo_sw);
7803 
7804 		switch (rc = rfs4_check_stateid_seqid(sp, stateid)) {
7805 		case NFS4_CHECK_STATEID_OLD:
7806 			*cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
7807 			goto end;
7808 		case NFS4_CHECK_STATEID_BAD:
7809 			*cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7810 			goto end;
7811 		case NFS4_CHECK_STATEID_EXPIRED:
7812 			*cs->statusp = resp->status = NFS4ERR_EXPIRED;
7813 			goto end;
7814 		case NFS4_CHECK_STATEID_UNCONFIRMED:
7815 			*cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7816 			goto end;
7817 		case NFS4_CHECK_STATEID_CLOSED:
7818 			*cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
7819 			goto end;
7820 		case NFS4_CHECK_STATEID_OKAY:
7821 		case NFS4_CHECK_STATEID_REPLAY:
7822 			switch (rfs4_check_olo_seqid(olo->open_seqid,
7823 				sp->owner, resop)) {
7824 			case NFS4_CHKSEQ_OKAY:
7825 				if (rc == NFS4_CHECK_STATEID_OKAY)
7826 					break;
7827 				/*
7828 				 * This is replayed stateid; if seqid
7829 				 * matches next expected, then client
7830 				 * is using wrong seqid.
7831 				 */
7832 				/* FALLTHROUGH */
7833 			case NFS4_CHKSEQ_BAD:
7834 				*cs->statusp = resp->status =
7835 					NFS4ERR_BAD_SEQID;
7836 				goto end;
7837 			case NFS4_CHKSEQ_REPLAY:
7838 				/* This is a duplicate LOCK request */
7839 				dup_lock = TRUE;
7840 
7841 				/*
7842 				 * For a duplicate we do not want to
7843 				 * create a new lockowner as it should
7844 				 * already exist.
7845 				 * Turn off the lockowner create flag.
7846 				 */
7847 				lcreate = FALSE;
7848 			}
7849 			break;
7850 		}
7851 
7852 		lo = rfs4_findlockowner(&olo->lock_owner, &lcreate);
7853 		if (lo == NULL) {
7854 			NFS4_DEBUG(rfs4_debug,
7855 				(CE_NOTE, "rfs4_op_lock: no lock owner"));
7856 			*cs->statusp = resp->status = NFS4ERR_RESOURCE;
7857 			goto end;
7858 		}
7859 
7860 		lsp = rfs4_findlo_state_by_owner(lo, sp, &create);
7861 		if (lsp == NULL) {
7862 			rfs4_update_lease(sp->owner->client);
7863 			/*
7864 			 * Only update theh open_seqid if this is not
7865 			 * a duplicate request
7866 			 */
7867 			if (dup_lock == FALSE) {
7868 				rfs4_update_open_sequence(sp->owner);
7869 			}
7870 
7871 			NFS4_DEBUG(rfs4_debug,
7872 				(CE_NOTE, "rfs4_op_lock: no state"));
7873 			*cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
7874 			rfs4_update_open_resp(sp->owner, resop, NULL);
7875 			rfs4_lockowner_rele(lo);
7876 			goto end;
7877 		}
7878 
7879 		/*
7880 		 * This is the new_lock_owner branch and the client is
7881 		 * supposed to be associating a new lock_owner with
7882 		 * the open file at this point.  If we find that a
7883 		 * lock_owner/state association already exists and a
7884 		 * successful LOCK request was returned to the client,
7885 		 * an error is returned to the client since this is
7886 		 * not appropriate.  The client should be using the
7887 		 * existing lock_owner branch.
7888 		 */
7889 		if (dup_lock == FALSE && create == FALSE) {
7890 			if (lsp->lock_completed == TRUE) {
7891 				*cs->statusp =
7892 					resp->status = NFS4ERR_BAD_SEQID;
7893 				rfs4_lockowner_rele(lo);
7894 				goto end;
7895 			}
7896 		}
7897 
7898 		rfs4_update_lease(sp->owner->client);
7899 
7900 		/*
7901 		 * Only update theh open_seqid if this is not
7902 		 * a duplicate request
7903 		 */
7904 		if (dup_lock == FALSE) {
7905 			rfs4_update_open_sequence(sp->owner);
7906 		}
7907 
7908 		/*
7909 		 * If this is a duplicate lock request, just copy the
7910 		 * previously saved reply and return.
7911 		 */
7912 		if (dup_lock == TRUE) {
7913 			/* verify that lock_seqid's match */
7914 			if (lsp->seqid != olo->lock_seqid) {
7915 				NFS4_DEBUG(rfs4_debug,
7916 				(CE_NOTE, "rfs4_op_lock: Dup-Lock seqid bad"
7917 				"lsp->seqid=%d old->seqid=%d",
7918 				lsp->seqid, olo->lock_seqid));
7919 				*cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
7920 			} else {
7921 				rfs4_copy_reply(resop, lsp->reply);
7922 				/*
7923 				 * Make sure to copy the just
7924 				 * retrieved reply status into the
7925 				 * overall compound status
7926 				 */
7927 				*cs->statusp = resp->status;
7928 			}
7929 			rfs4_lockowner_rele(lo);
7930 			goto end;
7931 		}
7932 
7933 		rfs4_dbe_lock(lsp->dbe);
7934 
7935 		/* Make sure to update the lock sequence id */
7936 		lsp->seqid = olo->lock_seqid;
7937 
7938 		NFS4_DEBUG(rfs4_debug,
7939 			(CE_NOTE, "Lock seqid established as %d", lsp->seqid));
7940 
7941 		/*
7942 		 * This is used to signify the newly created lockowner
7943 		 * stateid and its sequence number.  The checks for
7944 		 * sequence number and increment don't occur on the
7945 		 * very first lock request for a lockowner.
7946 		 */
7947 		lsp->skip_seqid_check = TRUE;
7948 
7949 		/* hold off other access to lsp while we tinker */
7950 		rfs4_sw_enter(&lsp->ls_sw);
7951 		ls_sw_held = TRUE;
7952 
7953 		rfs4_dbe_unlock(lsp->dbe);
7954 
7955 		rfs4_lockowner_rele(lo);
7956 	} else {
7957 		stateid = &args->locker.locker4_u.lock_owner.lock_stateid;
7958 		/* get lsp and hold the lock on the underlying file struct */
7959 		if ((status = rfs4_get_lo_state(stateid, &lsp, TRUE))
7960 		    != NFS4_OK) {
7961 			*cs->statusp = resp->status = status;
7962 			return;
7963 		}
7964 		create = FALSE;	/* We didn't create lsp */
7965 
7966 		/* Ensure specified filehandle matches */
7967 		if (cs->vp != lsp->state->finfo->vp) {
7968 			rfs4_lo_state_rele(lsp, TRUE);
7969 			*cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7970 			return;
7971 		}
7972 
7973 		/* hold off other access to lsp while we tinker */
7974 		rfs4_sw_enter(&lsp->ls_sw);
7975 		ls_sw_held = TRUE;
7976 
7977 		switch (rfs4_check_lo_stateid_seqid(lsp, stateid)) {
7978 		/*
7979 		 * The stateid looks like it was okay (expected to be
7980 		 * the next one)
7981 		 */
7982 		case NFS4_CHECK_STATEID_OKAY:
7983 			/*
7984 			 * The sequence id is now checked.  Determine
7985 			 * if this is a replay or if it is in the
7986 			 * expected (next) sequence.  In the case of a
7987 			 * replay, there are two replay conditions
7988 			 * that may occur.  The first is the normal
7989 			 * condition where a LOCK is done with a
7990 			 * NFS4_OK response and the stateid is
7991 			 * updated.  That case is handled below when
7992 			 * the stateid is identified as a REPLAY.  The
7993 			 * second is the case where an error is
7994 			 * returned, like NFS4ERR_DENIED, and the
7995 			 * sequence number is updated but the stateid
7996 			 * is not updated.  This second case is dealt
7997 			 * with here.  So it may seem odd that the
7998 			 * stateid is okay but the sequence id is a
7999 			 * replay but it is okay.
8000 			 */
8001 			switch (rfs4_check_lock_seqid(
8002 				args->locker.locker4_u.lock_owner.lock_seqid,
8003 				lsp, resop)) {
8004 			case NFS4_CHKSEQ_REPLAY:
8005 				if (resp->status != NFS4_OK) {
8006 					/*
8007 					 * Here is our replay and need
8008 					 * to verify that the last
8009 					 * response was an error.
8010 					 */
8011 					*cs->statusp = resp->status;
8012 					goto end;
8013 				}
8014 				/*
8015 				 * This is done since the sequence id
8016 				 * looked like a replay but it didn't
8017 				 * pass our check so a BAD_SEQID is
8018 				 * returned as a result.
8019 				 */
8020 				/*FALLTHROUGH*/
8021 			case NFS4_CHKSEQ_BAD:
8022 				*cs->statusp = resp->status =
8023 					NFS4ERR_BAD_SEQID;
8024 				goto end;
8025 			case NFS4_CHKSEQ_OKAY:
8026 				/* Everything looks okay move ahead */
8027 				break;
8028 			}
8029 			break;
8030 		case NFS4_CHECK_STATEID_OLD:
8031 			*cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
8032 			goto end;
8033 		case NFS4_CHECK_STATEID_BAD:
8034 			*cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8035 			goto end;
8036 		case NFS4_CHECK_STATEID_EXPIRED:
8037 			*cs->statusp = resp->status = NFS4ERR_EXPIRED;
8038 			goto end;
8039 		case NFS4_CHECK_STATEID_CLOSED:
8040 			*cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
8041 			goto end;
8042 		case NFS4_CHECK_STATEID_REPLAY:
8043 			switch (rfs4_check_lock_seqid(
8044 				args->locker.locker4_u.lock_owner.lock_seqid,
8045 				lsp, resop)) {
8046 			case NFS4_CHKSEQ_OKAY:
8047 				/*
8048 				 * This is a replayed stateid; if
8049 				 * seqid matches the next expected,
8050 				 * then client is using wrong seqid.
8051 				 */
8052 			case NFS4_CHKSEQ_BAD:
8053 				*cs->statusp = resp->status =
8054 					NFS4ERR_BAD_SEQID;
8055 				goto end;
8056 			case NFS4_CHKSEQ_REPLAY:
8057 				rfs4_update_lease(lsp->locker->client);
8058 				*cs->statusp = status = resp->status;
8059 				goto end;
8060 			}
8061 			break;
8062 		default:
8063 			ASSERT(FALSE);
8064 			break;
8065 		}
8066 
8067 		rfs4_update_lock_sequence(lsp);
8068 		rfs4_update_lease(lsp->locker->client);
8069 	}
8070 
8071 	/*
8072 	 * NFS4 only allows locking on regular files, so
8073 	 * verify type of object.
8074 	 */
8075 	if (cs->vp->v_type != VREG) {
8076 		if (cs->vp->v_type == VDIR)
8077 			status = NFS4ERR_ISDIR;
8078 		else
8079 			status = NFS4ERR_INVAL;
8080 		goto out;
8081 	}
8082 
8083 	cp = lsp->state->owner->client;
8084 
8085 	if (rfs4_clnt_in_grace(cp) && !args->reclaim) {
8086 		status = NFS4ERR_GRACE;
8087 		goto out;
8088 	}
8089 
8090 	if (rfs4_clnt_in_grace(cp) && args->reclaim && !cp->can_reclaim) {
8091 		status = NFS4ERR_NO_GRACE;
8092 		goto out;
8093 	}
8094 
8095 	if (!rfs4_clnt_in_grace(cp) && args->reclaim) {
8096 		status = NFS4ERR_NO_GRACE;
8097 		goto out;
8098 	}
8099 
8100 	if (lsp->state->finfo->dinfo->dtype == OPEN_DELEGATE_WRITE)
8101 		cs->deleg = TRUE;
8102 
8103 	status = rfs4_do_lock(lsp, args->locktype,
8104 				args->locker.locker4_u.lock_owner.lock_seqid,
8105 				args->offset,
8106 				args->length, cs->cr, resop);
8107 
8108 out:
8109 	lsp->skip_seqid_check = FALSE;
8110 
8111 	*cs->statusp = resp->status = status;
8112 
8113 	if (status == NFS4_OK) {
8114 		resp->LOCK4res_u.lock_stateid = lsp->lockid.stateid;
8115 		lsp->lock_completed = TRUE;
8116 	}
8117 	/*
8118 	 * Only update the "OPEN" response here if this was a new
8119 	 * lock_owner
8120 	 */
8121 	if (sp)
8122 		rfs4_update_open_resp(sp->owner, resop, NULL);
8123 
8124 	rfs4_update_lock_resp(lsp, resop);
8125 
8126 end:
8127 	if (lsp) {
8128 		if (ls_sw_held)
8129 			rfs4_sw_exit(&lsp->ls_sw);
8130 		/*
8131 		 * If an sp obtained, then the lsp does not represent
8132 		 * a lock on the file struct.
8133 		 */
8134 		if (sp != NULL)
8135 			rfs4_lo_state_rele(lsp, FALSE);
8136 		else
8137 			rfs4_lo_state_rele(lsp, TRUE);
8138 	}
8139 	if (sp) {
8140 		rfs4_sw_exit(&sp->owner->oo_sw);
8141 		rfs4_state_rele(sp);
8142 	}
8143 }
8144 
8145 /* free function for LOCK/LOCKT */
8146 static void
8147 lock_denied_free(nfs_resop4 *resop)
8148 {
8149 	LOCK4denied *dp = NULL;
8150 
8151 	switch (resop->resop) {
8152 	case OP_LOCK:
8153 		if (resop->nfs_resop4_u.oplock.status == NFS4ERR_DENIED)
8154 			dp = &resop->nfs_resop4_u.oplock.LOCK4res_u.denied;
8155 		break;
8156 	case OP_LOCKT:
8157 		if (resop->nfs_resop4_u.oplockt.status == NFS4ERR_DENIED)
8158 			dp = &resop->nfs_resop4_u.oplockt.denied;
8159 		break;
8160 	default:
8161 		break;
8162 	}
8163 
8164 	if (dp)
8165 		kmem_free(dp->owner.owner_val, dp->owner.owner_len);
8166 }
8167 
8168 /*ARGSUSED*/
8169 void
8170 rfs4_op_locku(nfs_argop4 *argop, nfs_resop4 *resop,
8171 	    struct svc_req *req, struct compound_state *cs)
8172 {
8173 	/* XXX Currently not using req arg */
8174 	LOCKU4args *args = &argop->nfs_argop4_u.oplocku;
8175 	LOCKU4res *resp = &resop->nfs_resop4_u.oplocku;
8176 	nfsstat4 status;
8177 	stateid4 *stateid = &args->lock_stateid;
8178 	rfs4_lo_state_t *lsp;
8179 
8180 	if (cs->vp == NULL) {
8181 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
8182 		return;
8183 	}
8184 
8185 	if ((status = rfs4_get_lo_state(stateid, &lsp, TRUE)) != NFS4_OK) {
8186 		*cs->statusp = resp->status = status;
8187 		return;
8188 	}
8189 
8190 	/* Ensure specified filehandle matches */
8191 	if (cs->vp != lsp->state->finfo->vp) {
8192 		rfs4_lo_state_rele(lsp, TRUE);
8193 		*cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8194 		return;
8195 	}
8196 
8197 	/* hold off other access to lsp while we tinker */
8198 	rfs4_sw_enter(&lsp->ls_sw);
8199 
8200 	switch (rfs4_check_lo_stateid_seqid(lsp, stateid)) {
8201 	case NFS4_CHECK_STATEID_OKAY:
8202 		if (rfs4_check_lock_seqid(args->seqid, lsp, resop)
8203 		    != NFS4_CHKSEQ_OKAY) {
8204 			*cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
8205 			goto end;
8206 		}
8207 		break;
8208 	case NFS4_CHECK_STATEID_OLD:
8209 		*cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
8210 		goto end;
8211 	case NFS4_CHECK_STATEID_BAD:
8212 		*cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8213 		goto end;
8214 	case NFS4_CHECK_STATEID_EXPIRED:
8215 		*cs->statusp = resp->status = NFS4ERR_EXPIRED;
8216 		goto end;
8217 	case NFS4_CHECK_STATEID_CLOSED:
8218 		*cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
8219 		goto end;
8220 	case NFS4_CHECK_STATEID_REPLAY:
8221 		switch (rfs4_check_lock_seqid(args->seqid, lsp, resop)) {
8222 		case NFS4_CHKSEQ_OKAY:
8223 				/*
8224 				 * This is a replayed stateid; if
8225 				 * seqid matches the next expected,
8226 				 * then client is using wrong seqid.
8227 				 */
8228 		case NFS4_CHKSEQ_BAD:
8229 			*cs->statusp = resp->status =
8230 				NFS4ERR_BAD_SEQID;
8231 			goto end;
8232 		case NFS4_CHKSEQ_REPLAY:
8233 			rfs4_update_lease(lsp->locker->client);
8234 			*cs->statusp = status = resp->status;
8235 			goto end;
8236 		}
8237 		break;
8238 	default:
8239 		ASSERT(FALSE);
8240 		break;
8241 	}
8242 
8243 	rfs4_update_lock_sequence(lsp);
8244 	rfs4_update_lease(lsp->locker->client);
8245 
8246 	/*
8247 	 * NFS4 only allows locking on regular files, so
8248 	 * verify type of object.
8249 	 */
8250 	if (cs->vp->v_type != VREG) {
8251 		if (cs->vp->v_type == VDIR)
8252 			status = NFS4ERR_ISDIR;
8253 		else
8254 			status = NFS4ERR_INVAL;
8255 		goto out;
8256 	}
8257 
8258 	if (rfs4_clnt_in_grace(lsp->state->owner->client)) {
8259 		status = NFS4ERR_GRACE;
8260 		goto out;
8261 	}
8262 
8263 	status = rfs4_do_lock(lsp, args->locktype,
8264 			    args->seqid, args->offset,
8265 			    args->length, cs->cr, resop);
8266 
8267 out:
8268 	*cs->statusp = resp->status = status;
8269 
8270 	if (status == NFS4_OK)
8271 		resp->lock_stateid = lsp->lockid.stateid;
8272 
8273 	rfs4_update_lock_resp(lsp, resop);
8274 
8275 end:
8276 	rfs4_sw_exit(&lsp->ls_sw);
8277 	rfs4_lo_state_rele(lsp, TRUE);
8278 }
8279 
8280 /*
8281  * LOCKT is a best effort routine, the client can not be guaranteed that
8282  * the status return is still in effect by the time the reply is received.
8283  * They are numerous race conditions in this routine, but we are not required
8284  * and can not be accurate.
8285  */
8286 /*ARGSUSED*/
8287 void
8288 rfs4_op_lockt(nfs_argop4 *argop, nfs_resop4 *resop,
8289 	    struct svc_req *req, struct compound_state *cs)
8290 {
8291 	LOCKT4args *args = &argop->nfs_argop4_u.oplockt;
8292 	LOCKT4res *resp = &resop->nfs_resop4_u.oplockt;
8293 	rfs4_lockowner_t *lo;
8294 	rfs4_client_t *cp;
8295 	bool_t create = FALSE;
8296 	struct flock64 flk;
8297 	int error;
8298 	int flag = FREAD | FWRITE;
8299 	int ltype;
8300 	length4 posix_length;
8301 	sysid_t sysid;
8302 	pid_t pid;
8303 
8304 	if (cs->vp == NULL) {
8305 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
8306 		return;
8307 	}
8308 
8309 	/*
8310 	 * NFS4 only allows locking on regular files, so
8311 	 * verify type of object.
8312 	 */
8313 	if (cs->vp->v_type != VREG) {
8314 		if (cs->vp->v_type == VDIR)
8315 			*cs->statusp = resp->status = NFS4ERR_ISDIR;
8316 		else
8317 			*cs->statusp = resp->status =  NFS4ERR_INVAL;
8318 		return;
8319 	}
8320 
8321 	/*
8322 	 * Check out the clientid to ensure the server knows about it
8323 	 * so that we correctly inform the client of a server reboot.
8324 	 */
8325 	if ((cp = rfs4_findclient_by_id(args->owner.clientid, FALSE))
8326 	    == NULL) {
8327 		*cs->statusp = resp->status =
8328 			rfs4_check_clientid(&args->owner.clientid, 0);
8329 		return;
8330 	}
8331 	if (rfs4_lease_expired(cp)) {
8332 		rfs4_client_close(cp);
8333 		/*
8334 		 * Protocol doesn't allow returning NFS4ERR_STALE as
8335 		 * other operations do on this check so STALE_CLIENTID
8336 		 * is returned instead
8337 		 */
8338 		*cs->statusp = resp->status = NFS4ERR_STALE_CLIENTID;
8339 		return;
8340 	}
8341 
8342 	if (rfs4_clnt_in_grace(cp)) {
8343 		*cs->statusp = resp->status = NFS4ERR_GRACE;
8344 		return;
8345 	}
8346 	rfs4_client_rele(cp);
8347 
8348 	resp->status = NFS4_OK;
8349 
8350 	switch (args->locktype) {
8351 	case READ_LT:
8352 	case READW_LT:
8353 		ltype = F_RDLCK;
8354 		break;
8355 	case WRITE_LT:
8356 	case WRITEW_LT:
8357 		ltype = F_WRLCK;
8358 		break;
8359 	}
8360 
8361 	posix_length = args->length;
8362 	/* Check for zero length. To lock to end of file use all ones for V4 */
8363 	if (posix_length == 0) {
8364 		*cs->statusp = resp->status = NFS4ERR_INVAL;
8365 		return;
8366 	} else if (posix_length == (length4)(~0)) {
8367 		posix_length = 0;	/* Posix to end of file  */
8368 	}
8369 
8370 	/* Find or create a lockowner */
8371 	lo = rfs4_findlockowner(&args->owner, &create);
8372 
8373 	if (lo) {
8374 		pid = lo->pid;
8375 		if ((resp->status =
8376 			rfs4_client_sysid(lo->client, &sysid)) != NFS4_OK)
8377 		goto out;
8378 	} else {
8379 		pid = 0;
8380 		sysid = lockt_sysid;
8381 	}
8382 retry:
8383 	flk.l_type = ltype;
8384 	flk.l_whence = 0;		/* SEEK_SET */
8385 	flk.l_start = args->offset;
8386 	flk.l_len = posix_length;
8387 	flk.l_sysid = sysid;
8388 	flk.l_pid = pid;
8389 	flag |= F_REMOTELOCK;
8390 
8391 	LOCK_PRINT(rfs4_debug, "rfs4_op_lockt", F_GETLK, &flk);
8392 
8393 	/* Note that length4 is uint64_t but l_len and l_start are off64_t */
8394 	if (flk.l_len < 0 || flk.l_start < 0) {
8395 		resp->status = NFS4ERR_INVAL;
8396 		goto out;
8397 	}
8398 	error = VOP_FRLOCK(cs->vp, F_GETLK, &flk, flag, (u_offset_t)0,
8399 	    NULL, cs->cr);
8400 
8401 	/*
8402 	 * N.B. We map error values to nfsv4 errors. This is differrent
8403 	 * than puterrno4 routine.
8404 	 */
8405 	switch (error) {
8406 	case 0:
8407 		if (flk.l_type == F_UNLCK)
8408 			resp->status = NFS4_OK;
8409 		else {
8410 			if (lock_denied(&resp->denied, &flk) == NFS4ERR_EXPIRED)
8411 				goto retry;
8412 			resp->status = NFS4ERR_DENIED;
8413 		}
8414 		break;
8415 	case EOVERFLOW:
8416 		resp->status = NFS4ERR_INVAL;
8417 		break;
8418 	case EINVAL:
8419 		resp->status = NFS4ERR_NOTSUPP;
8420 		break;
8421 	default:
8422 		cmn_err(CE_WARN, "rfs4_op_lockt: unexpected errno (%d)",
8423 			error);
8424 		resp->status = NFS4ERR_SERVERFAULT;
8425 		break;
8426 	}
8427 
8428 out:
8429 	if (lo)
8430 		rfs4_lockowner_rele(lo);
8431 	*cs->statusp = resp->status;
8432 }
8433 
8434 static int
8435 vop_shrlock(vnode_t *vp, int cmd, struct shrlock *sp, int fflags)
8436 {
8437 	int err;
8438 
8439 	if (cmd == F_UNSHARE && sp->s_deny == 0 && sp->s_access == 0)
8440 		return (0);
8441 
8442 	err = VOP_SHRLOCK(vp, cmd, sp, fflags, CRED());
8443 
8444 	NFS4_DEBUG(rfs4_shrlock_debug,
8445 		(CE_NOTE, "rfs4_shrlock %s vp=%p acc=%d dny=%d sysid=%d "
8446 		"pid=%d err=%d\n", cmd == F_SHARE ? "SHARE" : "UNSHR",
8447 		(void *) vp, sp->s_access, sp->s_deny, sp->s_sysid, sp->s_pid,
8448 		err));
8449 
8450 	return (err);
8451 }
8452 
8453 static int
8454 rfs4_shrlock(rfs4_state_t *sp, int cmd)
8455 {
8456 	struct shrlock shr;
8457 	struct shr_locowner shr_loco;
8458 	int fflags;
8459 
8460 	fflags = shr.s_access = shr.s_deny = 0;
8461 
8462 	if (sp->share_access & OPEN4_SHARE_ACCESS_READ) {
8463 		fflags |= FREAD;
8464 		shr.s_access |= F_RDACC;
8465 	}
8466 	if (sp->share_access & OPEN4_SHARE_ACCESS_WRITE) {
8467 		fflags |= FWRITE;
8468 		shr.s_access |= F_WRACC;
8469 	}
8470 	if (sp->share_deny & OPEN4_SHARE_DENY_READ)
8471 		shr.s_deny |= F_RDDNY;
8472 	if (sp->share_deny & OPEN4_SHARE_DENY_WRITE)
8473 		shr.s_deny |= F_WRDNY;
8474 
8475 	shr.s_pid = rfs4_dbe_getid(sp->owner->dbe);
8476 	shr.s_sysid = sp->owner->client->sysidt;
8477 	shr_loco.sl_pid = shr.s_pid;
8478 	shr_loco.sl_id = shr.s_sysid;
8479 	shr.s_owner = (caddr_t)&shr_loco;
8480 	shr.s_own_len = sizeof (shr_loco);
8481 	return (vop_shrlock(sp->finfo->vp, cmd, &shr, fflags));
8482 }
8483 
8484 static int
8485 rfs4_share(rfs4_state_t *sp)
8486 {
8487 	return (rfs4_shrlock(sp, F_SHARE));
8488 }
8489 
8490 void
8491 rfs4_unshare(rfs4_state_t *sp)
8492 {
8493 	(void) rfs4_shrlock(sp, F_UNSHARE);
8494 }
8495