xref: /illumos-gate/usr/src/uts/common/fs/nfs/nfs4_srv.c (revision d6bb6a8465e557cb946ef49d56ed3202f6218652)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 /*
28  *	Copyright (c) 1983,1984,1985,1986,1987,1988,1989  AT&T.
29  *	All Rights Reserved
30  */
31 
32 #pragma ident	"%Z%%M%	%I%	%E% SMI"
33 
34 #include <sys/param.h>
35 #include <sys/types.h>
36 #include <sys/systm.h>
37 #include <sys/cred.h>
38 #include <sys/buf.h>
39 #include <sys/vfs.h>
40 #include <sys/vnode.h>
41 #include <sys/uio.h>
42 #include <sys/errno.h>
43 #include <sys/sysmacros.h>
44 #include <sys/statvfs.h>
45 #include <sys/kmem.h>
46 #include <sys/dirent.h>
47 #include <sys/cmn_err.h>
48 #include <sys/debug.h>
49 #include <sys/systeminfo.h>
50 #include <sys/flock.h>
51 #include <sys/pathname.h>
52 #include <sys/nbmlock.h>
53 #include <sys/share.h>
54 #include <sys/atomic.h>
55 #include <sys/policy.h>
56 #include <sys/fem.h>
57 
58 #include <rpc/types.h>
59 #include <rpc/auth.h>
60 #include <rpc/rpcsec_gss.h>
61 #include <rpc/svc.h>
62 
63 #include <nfs/nfs.h>
64 #include <nfs/export.h>
65 #include <nfs/lm.h>
66 #include <nfs/nfs4.h>
67 
68 #include <sys/strsubr.h>
69 #include <sys/strsun.h>
70 
71 #include <inet/common.h>
72 #include <inet/ip.h>
73 #include <inet/ip6.h>
74 
75 #define	RFS4_MAXLOCK_TRIES 4	/* Try to get the lock this many times */
76 static int rfs4_maxlock_tries = RFS4_MAXLOCK_TRIES;
77 #define	RFS4_LOCK_DELAY 10	/* Milliseconds */
78 static clock_t rfs4_lock_delay = RFS4_LOCK_DELAY;
79 
80 /* End of Tunables */
81 
82 /*
83  * Used to bump the stateid4.seqid value and show changes in the stateid
84  */
85 #define	next_stateid(sp) (++(sp)->bits.chgseq)
86 
87 /*
88  * RFS4_MINLEN_ENTRY4: XDR-encoded size of smallest possible dirent.
89  *	This is used to return NFS4ERR_TOOSMALL when clients specify
90  *	maxcount that isn't large enough to hold the smallest possible
91  *	XDR encoded dirent.
92  *
93  *	    sizeof cookie (8 bytes) +
94  *	    sizeof name_len (4 bytes) +
95  *	    sizeof smallest (padded) name (4 bytes) +
96  *	    sizeof bitmap4_len (12 bytes) +   NOTE: we always encode len=2 bm4
97  *	    sizeof attrlist4_len (4 bytes) +
98  *	    sizeof next boolean (4 bytes)
99  *
100  * RFS4_MINLEN_RDDIR4: XDR-encoded size of READDIR op reply containing
101  * the smallest possible entry4 (assumes no attrs requested).
102  *	sizeof nfsstat4 (4 bytes) +
103  *	sizeof verifier4 (8 bytes) +
104  *	sizeof entry4list bool (4 bytes) +
105  *	sizeof entry4 	(36 bytes) +
106  *	sizeof eof bool  (4 bytes)
107  *
108  * RFS4_MINLEN_RDDIR_BUF: minimum length of buffer server will provide to
109  *	VOP_READDIR.  Its value is the size of the maximum possible dirent
110  *	for solaris.  The DIRENT64_RECLEN macro returns	the size of dirent
111  *	required for a given name length.  MAXNAMELEN is the maximum
112  *	filename length allowed in Solaris.  The first two DIRENT64_RECLEN()
113  *	macros are to allow for . and .. entries -- just a minor tweak to try
114  *	and guarantee that buffer we give to VOP_READDIR will be large enough
115  *	to hold ., .., and the largest possible solaris dirent64.
116  */
117 #define	RFS4_MINLEN_ENTRY4 36
118 #define	RFS4_MINLEN_RDDIR4 (4 + NFS4_VERIFIER_SIZE + 4 + RFS4_MINLEN_ENTRY4 + 4)
119 #define	RFS4_MINLEN_RDDIR_BUF \
120 	(DIRENT64_RECLEN(1) + DIRENT64_RECLEN(2) + DIRENT64_RECLEN(MAXNAMELEN))
121 
122 /*
123  * It would be better to pad to 4 bytes since that's what XDR would do,
124  * but the dirents UFS gives us are already padded to 8, so just take
125  * what we're given.  Dircount is only a hint anyway.  Currently the
126  * solaris kernel is ASCII only, so there's no point in calling the
127  * UTF8 functions.
128  *
129  * dirent64: named padded to provide 8 byte struct alignment
130  *	d_ino(8) + d_off(8) + d_reclen(2) + d_name(namelen + null(1) + pad)
131  *
132  * cookie: uint64_t   +  utf8namelen: uint_t  +   utf8name padded to 8 bytes
133  *
134  */
135 #define	DIRENT64_TO_DIRCOUNT(dp) \
136 	(3 * BYTES_PER_XDR_UNIT + DIRENT64_NAMELEN((dp)->d_reclen))
137 
138 time_t rfs4_start_time;			/* Initialized in rfs4_srvrinit */
139 
140 static sysid_t lockt_sysid;		/* dummy sysid for all LOCKT calls */
141 
142 u_longlong_t nfs4_srv_caller_id;
143 
144 verifier4	Write4verf;
145 verifier4	Readdir4verf;
146 
147 void		rfs4_init_compound_state(struct compound_state *);
148 
149 static void	nullfree(caddr_t);
150 static void	rfs4_op_inval(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
151 			struct compound_state *);
152 static void	rfs4_op_access(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
153 			struct compound_state *);
154 static void	rfs4_op_close(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
155 			struct compound_state *);
156 static void	rfs4_op_commit(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
157 			struct compound_state *);
158 static void	rfs4_op_create(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
159 			struct compound_state *);
160 static void	rfs4_op_create_free(nfs_resop4 *resop);
161 static void	rfs4_op_delegreturn(nfs_argop4 *, nfs_resop4 *,
162 				    struct svc_req *, struct compound_state *);
163 static void	rfs4_op_getattr(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
164 			struct compound_state *);
165 static void	rfs4_op_getattr_free(nfs_resop4 *);
166 static void	rfs4_op_getfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
167 			struct compound_state *);
168 static void	rfs4_op_getfh_free(nfs_resop4 *);
169 static void	rfs4_op_illegal(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
170 			struct compound_state *);
171 static void	rfs4_op_link(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
172 			struct compound_state *);
173 static void	rfs4_op_lock(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
174 			struct compound_state *);
175 static void	lock_denied_free(nfs_resop4 *);
176 static void	rfs4_op_locku(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
177 			struct compound_state *);
178 static void	rfs4_op_lockt(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
179 			struct compound_state *);
180 static void	rfs4_op_lookup(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
181 			struct compound_state *);
182 static void	rfs4_op_lookupp(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
183 			struct compound_state *);
184 static void	rfs4_op_openattr(nfs_argop4 *argop, nfs_resop4 *resop,
185 				struct svc_req *req, struct compound_state *cs);
186 static void	rfs4_op_nverify(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
187 			struct compound_state *);
188 static void	rfs4_op_open(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
189 			struct compound_state *);
190 static void	rfs4_op_open_confirm(nfs_argop4 *, nfs_resop4 *,
191 			struct svc_req *, struct compound_state *);
192 static void	rfs4_op_open_downgrade(nfs_argop4 *, nfs_resop4 *,
193 			struct svc_req *, struct compound_state *);
194 static void	rfs4_op_putfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
195 			struct compound_state *);
196 static void	rfs4_op_putpubfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
197 			struct compound_state *);
198 static void	rfs4_op_putrootfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
199 			struct compound_state *);
200 static void	rfs4_op_read(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
201 			struct compound_state *);
202 static void	rfs4_op_read_free(nfs_resop4 *);
203 static void	rfs4_op_readdir_free(nfs_resop4 *resop);
204 static void	rfs4_op_readlink(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
205 			struct compound_state *);
206 static void	rfs4_op_readlink_free(nfs_resop4 *);
207 static void	rfs4_op_release_lockowner(nfs_argop4 *, nfs_resop4 *,
208 			struct svc_req *, struct compound_state *);
209 static void	rfs4_op_remove(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
210 			struct compound_state *);
211 static void	rfs4_op_rename(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
212 			struct compound_state *);
213 static void	rfs4_op_renew(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
214 			struct compound_state *);
215 static void	rfs4_op_restorefh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
216 			struct compound_state *);
217 static void	rfs4_op_savefh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
218 			struct compound_state *);
219 static void	rfs4_op_setattr(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
220 			struct compound_state *);
221 static void	rfs4_op_verify(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
222 			struct compound_state *);
223 static void	rfs4_op_write(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
224 			struct compound_state *);
225 static void	rfs4_op_setclientid(nfs_argop4 *, nfs_resop4 *,
226 			struct svc_req *, struct compound_state *);
227 static void	rfs4_op_setclientid_confirm(nfs_argop4 *, nfs_resop4 *,
228 			struct svc_req *req, struct compound_state *);
229 static void	rfs4_op_secinfo(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
230 			struct compound_state *);
231 static void	rfs4_op_secinfo_free(nfs_resop4 *);
232 
233 static nfsstat4 check_open_access(uint32_t,
234 				struct compound_state *, struct svc_req *);
235 nfsstat4 rfs4_client_sysid(rfs4_client_t *, sysid_t *);
236 static int	vop_shrlock(vnode_t *, int, struct shrlock *, int);
237 static int 	rfs4_shrlock(rfs4_state_t *, int);
238 static int	rfs4_share(rfs4_state_t *);
239 void rfs4_ss_clid(rfs4_client_t *, struct svc_req *);
240 
241 /*
242  * translation table for attrs
243  */
244 struct nfs4_ntov_table {
245 	union nfs4_attr_u *na;
246 	uint8_t amap[NFS4_MAXNUM_ATTRS];
247 	int attrcnt;
248 	bool_t vfsstat;
249 };
250 
251 static void	nfs4_ntov_table_init(struct nfs4_ntov_table *ntovp);
252 static void	nfs4_ntov_table_free(struct nfs4_ntov_table *ntovp,
253 				    struct nfs4_svgetit_arg *sargp);
254 
255 static nfsstat4	do_rfs4_set_attrs(bitmap4 *resp, fattr4 *fattrp,
256 		    struct compound_state *cs, struct nfs4_svgetit_arg *sargp,
257 		    struct nfs4_ntov_table *ntovp, nfs4_attr_cmd_t cmd);
258 
259 fem_t	*deleg_rdops;
260 fem_t	*deleg_wrops;
261 
262 rfs4_servinst_t	*rfs4_cur_servinst = NULL;	/* current server instance */
263 kmutex_t	rfs4_servinst_lock;		/* protects linked list */
264 int		rfs4_seen_first_compound;	/* set first time we see one */
265 
266 #ifdef DEBUG
267 int	rfs4_servinst_debug = 0;
268 #endif
269 
270 /*
271  * NFS4 op dispatch table
272  */
273 
274 struct rfsv4disp {
275 	void	(*dis_proc)();		/* proc to call */
276 	void	(*dis_resfree)();	/* frees space allocated by proc */
277 	int	dis_flags;		/* RPC_IDEMPOTENT, etc... */
278 };
279 
280 static struct rfsv4disp rfsv4disptab[] = {
281 	/*
282 	 * NFS VERSION 4
283 	 */
284 
285 	/* RFS_NULL = 0 */
286 	{rfs4_op_illegal, nullfree, 0},
287 
288 	/* UNUSED = 1 */
289 	{rfs4_op_illegal, nullfree, 0},
290 
291 	/* UNUSED = 2 */
292 	{rfs4_op_illegal, nullfree, 0},
293 
294 	/* OP_ACCESS = 3 */
295 	{rfs4_op_access, nullfree, RPC_IDEMPOTENT},
296 
297 	/* OP_CLOSE = 4 */
298 	{rfs4_op_close, nullfree, 0},
299 
300 	/* OP_COMMIT = 5 */
301 	{rfs4_op_commit, nullfree, RPC_IDEMPOTENT},
302 
303 	/* OP_CREATE = 6 */
304 	{rfs4_op_create, nullfree, 0},
305 
306 	/* OP_DELEGPURGE = 7 */
307 	{rfs4_op_inval, nullfree, 0},
308 
309 	/* OP_DELEGRETURN = 8 */
310 	{rfs4_op_delegreturn, nullfree, 0},
311 
312 	/* OP_GETATTR = 9 */
313 	{rfs4_op_getattr, rfs4_op_getattr_free, RPC_IDEMPOTENT},
314 
315 	/* OP_GETFH = 10 */
316 	{rfs4_op_getfh, rfs4_op_getfh_free, RPC_ALL},
317 
318 	/* OP_LINK = 11 */
319 	{rfs4_op_link, nullfree, 0},
320 
321 	/* OP_LOCK = 12 */
322 	{rfs4_op_lock, lock_denied_free, 0},
323 
324 	/* OP_LOCKT = 13 */
325 	{rfs4_op_lockt, lock_denied_free, 0},
326 
327 	/* OP_LOCKU = 14 */
328 	{rfs4_op_locku, nullfree, 0},
329 
330 	/* OP_LOOKUP = 15 */
331 	{rfs4_op_lookup, nullfree, (RPC_IDEMPOTENT|RPC_PUBLICFH_OK)},
332 
333 	/* OP_LOOKUPP = 16 */
334 	{rfs4_op_lookupp, nullfree, (RPC_IDEMPOTENT|RPC_PUBLICFH_OK)},
335 
336 	/* OP_NVERIFY = 17 */
337 	{rfs4_op_nverify, nullfree, RPC_IDEMPOTENT},
338 
339 	/* OP_OPEN = 18 */
340 	{rfs4_op_open, rfs4_free_reply, 0},
341 
342 	/* OP_OPENATTR = 19 */
343 	{rfs4_op_openattr, nullfree, 0},
344 
345 	/* OP_OPEN_CONFIRM = 20 */
346 	{rfs4_op_open_confirm, nullfree, 0},
347 
348 	/* OP_OPEN_DOWNGRADE = 21 */
349 	{rfs4_op_open_downgrade, nullfree, 0},
350 
351 	/* OP_OPEN_PUTFH = 22 */
352 	{rfs4_op_putfh, nullfree, RPC_ALL},
353 
354 	/* OP_PUTPUBFH = 23 */
355 	{rfs4_op_putpubfh, nullfree, RPC_ALL},
356 
357 	/* OP_PUTROOTFH = 24 */
358 	{rfs4_op_putrootfh, nullfree, RPC_ALL},
359 
360 	/* OP_READ = 25 */
361 	{rfs4_op_read, rfs4_op_read_free, RPC_IDEMPOTENT},
362 
363 	/* OP_READDIR = 26 */
364 	{rfs4_op_readdir, rfs4_op_readdir_free, RPC_IDEMPOTENT},
365 
366 	/* OP_READLINK = 27 */
367 	{rfs4_op_readlink, rfs4_op_readlink_free, RPC_IDEMPOTENT},
368 
369 	/* OP_REMOVE = 28 */
370 	{rfs4_op_remove, nullfree, 0},
371 
372 	/* OP_RENAME = 29 */
373 	{rfs4_op_rename, nullfree, 0},
374 
375 	/* OP_RENEW = 30 */
376 	{rfs4_op_renew, nullfree, 0},
377 
378 	/* OP_RESTOREFH = 31 */
379 	{rfs4_op_restorefh, nullfree, RPC_ALL},
380 
381 	/* OP_SAVEFH = 32 */
382 	{rfs4_op_savefh, nullfree, RPC_ALL},
383 
384 	/* OP_SECINFO = 33 */
385 	{rfs4_op_secinfo, rfs4_op_secinfo_free, 0},
386 
387 	/* OP_SETATTR = 34 */
388 	{rfs4_op_setattr, nullfree, 0},
389 
390 	/* OP_SETCLIENTID = 35 */
391 	{rfs4_op_setclientid, nullfree, 0},
392 
393 	/* OP_SETCLIENTID_CONFIRM = 36 */
394 	{rfs4_op_setclientid_confirm, nullfree, 0},
395 
396 	/* OP_VERIFY = 37 */
397 	{rfs4_op_verify, nullfree, RPC_IDEMPOTENT},
398 
399 	/* OP_WRITE = 38 */
400 	{rfs4_op_write, nullfree, 0},
401 
402 	/* OP_RELEASE_LOCKOWNER = 39 */
403 	{rfs4_op_release_lockowner, nullfree, 0},
404 };
405 
406 static uint_t rfsv4disp_cnt = sizeof (rfsv4disptab) / sizeof (rfsv4disptab[0]);
407 
408 #define	OP_ILLEGAL_IDX (rfsv4disp_cnt)
409 
410 #ifdef DEBUG
411 
412 int rfs4_fillone_debug = 0;
413 int rfs4_shrlock_debug = 0;
414 int rfs4_no_stub_access = 1;
415 int rfs4_rddir_debug = 0;
416 
417 static char *rfs4_op_string[] = {
418 	"rfs4_op_null",
419 	"rfs4_op_1 unused",
420 	"rfs4_op_2 unused",
421 	"rfs4_op_access",
422 	"rfs4_op_close",
423 	"rfs4_op_commit",
424 	"rfs4_op_create",
425 	"rfs4_op_delegpurge",
426 	"rfs4_op_delegreturn",
427 	"rfs4_op_getattr",
428 	"rfs4_op_getfh",
429 	"rfs4_op_link",
430 	"rfs4_op_lock",
431 	"rfs4_op_lockt",
432 	"rfs4_op_locku",
433 	"rfs4_op_lookup",
434 	"rfs4_op_lookupp",
435 	"rfs4_op_nverify",
436 	"rfs4_op_open",
437 	"rfs4_op_openattr",
438 	"rfs4_op_open_confirm",
439 	"rfs4_op_open_downgrade",
440 	"rfs4_op_putfh",
441 	"rfs4_op_putpubfh",
442 	"rfs4_op_putrootfh",
443 	"rfs4_op_read",
444 	"rfs4_op_readdir",
445 	"rfs4_op_readlink",
446 	"rfs4_op_remove",
447 	"rfs4_op_rename",
448 	"rfs4_op_renew",
449 	"rfs4_op_restorefh",
450 	"rfs4_op_savefh",
451 	"rfs4_op_secinfo",
452 	"rfs4_op_setattr",
453 	"rfs4_op_setclientid",
454 	"rfs4_op_setclient_confirm",
455 	"rfs4_op_verify",
456 	"rfs4_op_write",
457 	"rfs4_op_release_lockowner",
458 	"rfs4_op_illegal"
459 };
460 #endif
461 
462 void rfs4_ss_chkclid(rfs4_client_t *);
463 
464 #ifdef	nextdp
465 #undef nextdp
466 #endif
467 #define	nextdp(dp)	((struct dirent64 *)((char *)(dp) + (dp)->d_reclen))
468 
469 static const fs_operation_def_t nfs4_rd_deleg_tmpl[] = {
470 	VOPNAME_OPEN, deleg_rdopen,
471 	VOPNAME_WRITE, deleg_write,
472 	VOPNAME_SETATTR, deleg_setattr,
473 	VOPNAME_RWLOCK, deleg_rd_rwlock,
474 	VOPNAME_SPACE, deleg_space,
475 	VOPNAME_SETSECATTR, deleg_setsecattr,
476 	VOPNAME_VNEVENT, deleg_vnevent,
477 	NULL, NULL
478 };
479 static const fs_operation_def_t nfs4_wr_deleg_tmpl[] = {
480 	VOPNAME_OPEN, deleg_wropen,
481 	VOPNAME_READ, deleg_read,
482 	VOPNAME_WRITE, deleg_write,
483 	VOPNAME_SETATTR, deleg_setattr,
484 	VOPNAME_RWLOCK, deleg_wr_rwlock,
485 	VOPNAME_SPACE, deleg_space,
486 	VOPNAME_SETSECATTR, deleg_setsecattr,
487 	VOPNAME_VNEVENT, deleg_vnevent,
488 	NULL, NULL
489 };
490 
491 int
492 rfs4_srvrinit(void)
493 {
494 	timespec32_t verf;
495 	int error;
496 	extern void rfs4_attr_init();
497 	extern krwlock_t rfs4_deleg_policy_lock;
498 
499 	/*
500 	 * The following algorithm attempts to find a unique verifier
501 	 * to be used as the write verifier returned from the server
502 	 * to the client.  It is important that this verifier change
503 	 * whenever the server reboots.  Of secondary importance, it
504 	 * is important for the verifier to be unique between two
505 	 * different servers.
506 	 *
507 	 * Thus, an attempt is made to use the system hostid and the
508 	 * current time in seconds when the nfssrv kernel module is
509 	 * loaded.  It is assumed that an NFS server will not be able
510 	 * to boot and then to reboot in less than a second.  If the
511 	 * hostid has not been set, then the current high resolution
512 	 * time is used.  This will ensure different verifiers each
513 	 * time the server reboots and minimize the chances that two
514 	 * different servers will have the same verifier.
515 	 * XXX - this is broken on LP64 kernels.
516 	 */
517 	verf.tv_sec = (time_t)nfs_atoi(hw_serial);
518 	if (verf.tv_sec != 0) {
519 		verf.tv_nsec = gethrestime_sec();
520 	} else {
521 		timespec_t tverf;
522 
523 		gethrestime(&tverf);
524 		verf.tv_sec = (time_t)tverf.tv_sec;
525 		verf.tv_nsec = tverf.tv_nsec;
526 	}
527 
528 	Write4verf = *(uint64_t *)&verf;
529 
530 	rfs4_attr_init();
531 	mutex_init(&rfs4_deleg_lock, NULL, MUTEX_DEFAULT, NULL);
532 
533 	/* Used to manage create/destroy of server state */
534 	mutex_init(&rfs4_state_lock, NULL, MUTEX_DEFAULT, NULL);
535 
536 	/* Used to manage access to server instance linked list */
537 	mutex_init(&rfs4_servinst_lock, NULL, MUTEX_DEFAULT, NULL);
538 
539 	/* Used to manage access to rfs4_deleg_policy */
540 	rw_init(&rfs4_deleg_policy_lock, NULL, RW_DEFAULT, NULL);
541 
542 	error = fem_create("deleg_rdops", nfs4_rd_deleg_tmpl, &deleg_rdops);
543 	if (error != 0) {
544 		rfs4_disable_delegation();
545 	} else {
546 		error = fem_create("deleg_wrops", nfs4_wr_deleg_tmpl,
547 				&deleg_wrops);
548 		if (error != 0) {
549 			rfs4_disable_delegation();
550 			fem_free(deleg_rdops);
551 		}
552 	}
553 
554 	nfs4_srv_caller_id = fs_new_caller_id();
555 
556 	lockt_sysid = lm_alloc_sysidt();
557 
558 	return (0);
559 }
560 
561 void
562 rfs4_srvrfini(void)
563 {
564 	extern krwlock_t rfs4_deleg_policy_lock;
565 
566 	if (lockt_sysid != LM_NOSYSID) {
567 		lm_free_sysidt(lockt_sysid);
568 		lockt_sysid = LM_NOSYSID;
569 	}
570 
571 	mutex_destroy(&rfs4_deleg_lock);
572 	mutex_destroy(&rfs4_state_lock);
573 	rw_destroy(&rfs4_deleg_policy_lock);
574 
575 	fem_free(deleg_rdops);
576 	fem_free(deleg_wrops);
577 }
578 
579 void
580 rfs4_init_compound_state(struct compound_state *cs)
581 {
582 	bzero(cs, sizeof (*cs));
583 	cs->cont = TRUE;
584 	cs->access = CS_ACCESS_DENIED;
585 	cs->deleg = FALSE;
586 	cs->mandlock = FALSE;
587 	cs->fh.nfs_fh4_val = cs->fhbuf;
588 }
589 
590 void
591 rfs4_grace_start(rfs4_servinst_t *sip)
592 {
593 	time_t now = gethrestime_sec();
594 
595 	NFS4_DEBUG(rfs4_servinst_debug, (CE_NOTE,
596 	    "rfs4_grace_start: inst %p: 0x%lx", (void *)sip, now));
597 
598 	rw_enter(&sip->rwlock, RW_WRITER);
599 	sip->start_time = now;
600 	sip->grace_period = rfs4_grace_period;
601 	rw_exit(&sip->rwlock);
602 }
603 
604 /*
605  * returns true if the instance's grace period has never been started
606  */
607 int
608 rfs4_servinst_grace_new(rfs4_servinst_t *sip)
609 {
610 	time_t start_time;
611 
612 	rw_enter(&sip->rwlock, RW_READER);
613 	start_time = sip->start_time;
614 	rw_exit(&sip->rwlock);
615 
616 	return (start_time == 0);
617 }
618 
619 /*
620  * Indicates if server instance is within the
621  * grace period.
622  */
623 int
624 rfs4_servinst_in_grace(rfs4_servinst_t *sip)
625 {
626 	time_t grace_expiry;
627 
628 	rw_enter(&sip->rwlock, RW_READER);
629 	grace_expiry = sip->start_time + sip->grace_period;
630 	rw_exit(&sip->rwlock);
631 
632 	return (gethrestime_sec() < grace_expiry);
633 }
634 
635 int
636 rfs4_clnt_in_grace(rfs4_client_t *cp)
637 {
638 	ASSERT(rfs4_dbe_refcnt(cp->dbe) > 0);
639 
640 	return (rfs4_servinst_in_grace(cp->server_instance));
641 }
642 
643 /*
644  * reset all currently active grace periods
645  */
646 void
647 rfs4_grace_reset_all(void)
648 {
649 #ifdef DEBUG
650 	int n = 0;
651 #endif
652 	rfs4_servinst_t *sip;
653 
654 	mutex_enter(&rfs4_servinst_lock);
655 	for (sip = rfs4_cur_servinst; sip != NULL; sip = sip->prev) {
656 		if (rfs4_servinst_in_grace(sip)) {
657 			rfs4_grace_start(sip);
658 #ifdef DEBUG
659 			n++;
660 #endif
661 		}
662 	}
663 	mutex_exit(&rfs4_servinst_lock);
664 
665 	NFS4_DEBUG(rfs4_servinst_debug, (CE_NOTE,
666 	    "rfs4_grace_reset_all: reset %d instances", n));
667 }
668 
669 /*
670  * start any new instances' grace periods
671  */
672 void
673 rfs4_grace_start_new(void)
674 {
675 #ifdef DEBUG
676 	int n = 0;
677 #endif
678 	rfs4_servinst_t *sip;
679 
680 	mutex_enter(&rfs4_servinst_lock);
681 	for (sip = rfs4_cur_servinst; sip != NULL; sip = sip->prev) {
682 		if (rfs4_servinst_grace_new(sip))
683 			rfs4_grace_start(sip);
684 #ifdef DEBUG
685 		n++;
686 #endif
687 	}
688 	mutex_exit(&rfs4_servinst_lock);
689 
690 	NFS4_DEBUG(rfs4_servinst_debug, (CE_NOTE,
691 	    "rfs4_grace_start_new: started %d new instances", n));
692 }
693 
694 /*
695  * Create a new server instance, and make it the currently active instance.
696  * Note that starting the grace period too early will reduce the clients'
697  * recovery window.
698  */
699 void
700 rfs4_servinst_create(int start_grace)
701 {
702 	rfs4_servinst_t *sip;
703 
704 	sip = kmem_alloc(sizeof (rfs4_servinst_t), KM_SLEEP);
705 	rw_init(&sip->rwlock, NULL, RW_DEFAULT, NULL);
706 
707 	sip->start_time = (time_t)0;
708 	sip->grace_period = (time_t)0;
709 	sip->next = NULL;
710 	sip->prev = NULL;
711 
712 	mutex_enter(&rfs4_servinst_lock);
713 	if (rfs4_cur_servinst == NULL) {
714 		NFS4_DEBUG(rfs4_servinst_debug, (CE_NOTE,
715 		    "rfs4_servinst_create: creating first instance"));
716 	} else {
717 		/* add to linked list */
718 		sip->prev = rfs4_cur_servinst;
719 		rfs4_cur_servinst->next = sip;
720 	}
721 	if (start_grace)
722 		rfs4_grace_start(sip);
723 	/* make the new instance "current" */
724 	rfs4_cur_servinst = sip;
725 	mutex_exit(&rfs4_servinst_lock);
726 
727 	NFS4_DEBUG(rfs4_servinst_debug, (CE_NOTE,
728 	    "rfs4_servinst_create: new current instance: %p; start_grace: %d",
729 	    (void *)sip, start_grace));
730 }
731 
732 /*
733  * In future, we might add a rfs4_servinst_destroy(sip) but, for now, destroy
734  * all instances directly.
735  */
736 void
737 rfs4_servinst_destroy_all(void)
738 {
739 	rfs4_servinst_t *sip, *prev, *current;
740 #ifdef DEBUG
741 	int n = 0;
742 #endif
743 
744 	mutex_enter(&rfs4_servinst_lock);
745 	ASSERT(rfs4_cur_servinst != NULL);
746 	current = rfs4_cur_servinst;
747 	rfs4_cur_servinst = NULL;
748 	for (sip = current; sip != NULL; sip = prev) {
749 		prev = sip->prev;
750 		rw_destroy(&sip->rwlock);
751 		kmem_free(sip, sizeof (rfs4_servinst_t));
752 #ifdef DEBUG
753 		n++;
754 #endif
755 	}
756 	mutex_exit(&rfs4_servinst_lock);
757 
758 	NFS4_DEBUG(rfs4_servinst_debug, (CE_NOTE,
759 	    "rfs4_servinst_destroy_all: destroyed %d instances", n));
760 }
761 
762 /*
763  * Assign the current server instance to a client_t.
764  * Should be called with cp->dbe held.
765  */
766 void
767 rfs4_servinst_assign(rfs4_client_t *cp, rfs4_servinst_t *sip)
768 {
769 	ASSERT(rfs4_dbe_refcnt(cp->dbe) > 0);
770 
771 	NFS4_DEBUG(rfs4_servinst_debug, (CE_NOTE,
772 	    "rfs4_servinst_assign: client: %p, old: %p, new: %p", (void *)cp,
773 	    (void *)cp->server_instance, (void *)sip));
774 
775 	/*
776 	 * The lock ensures that if the current instance is in the process
777 	 * of changing, we will see the new one.
778 	 */
779 	mutex_enter(&rfs4_servinst_lock);
780 	cp->server_instance = sip;
781 	mutex_exit(&rfs4_servinst_lock);
782 }
783 
784 rfs4_servinst_t *
785 rfs4_servinst(rfs4_client_t *cp)
786 {
787 	ASSERT(rfs4_dbe_refcnt(cp->dbe) > 0);
788 
789 	return (cp->server_instance);
790 }
791 
792 /* ARGSUSED */
793 static void
794 nullfree(caddr_t resop)
795 {
796 }
797 
798 /*
799  * This is a fall-through for invalid or not implemented (yet) ops
800  */
801 /* ARGSUSED */
802 static void
803 rfs4_op_inval(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
804 	struct compound_state *cs)
805 {
806 	*cs->statusp = *((nfsstat4 *)&(resop)->nfs_resop4_u) = NFS4ERR_INVAL;
807 }
808 
809 /*
810  * Check if the security flavor, nfsnum, is in the flavor_list.
811  */
812 bool_t
813 in_flavor_list(int nfsnum, int *flavor_list, int count)
814 {
815 	int i;
816 
817 	for (i = 0; i < count; i++) {
818 		if (nfsnum == flavor_list[i])
819 			return (TRUE);
820 	}
821 	return (FALSE);
822 }
823 
824 /*
825  * Used by rfs4_op_secinfo to get the security information from the
826  * export structure associated with the component.
827  */
828 /* ARGSUSED */
829 static nfsstat4
830 do_rfs4_op_secinfo(struct compound_state *cs, char *nm, SECINFO4res *resp)
831 {
832 	int error, different_export = 0;
833 	vnode_t *dvp, *vp, *tvp;
834 	struct exportinfo *exi = NULL;
835 	fid_t fid;
836 	uint_t count, i;
837 	secinfo4 *resok_val;
838 	struct secinfo *secp;
839 	bool_t did_traverse;
840 	int dotdot, walk;
841 
842 	dvp = cs->vp;
843 	dotdot = (nm[0] == '.' && nm[1] == '.' && nm[2] == '\0');
844 
845 	/*
846 	 * If dotdotting, then need to check whether it's above the
847 	 * root of a filesystem, or above an export point.
848 	 */
849 	if (dotdot) {
850 
851 		/*
852 		 * If dotdotting at the root of a filesystem, then
853 		 * need to traverse back to the mounted-on filesystem
854 		 * and do the dotdot lookup there.
855 		 */
856 		if (cs->vp->v_flag & VROOT) {
857 
858 			/*
859 			 * If at the system root, then can
860 			 * go up no further.
861 			 */
862 			if (VN_CMP(dvp, rootdir))
863 				return (puterrno4(ENOENT));
864 
865 			/*
866 			 * Traverse back to the mounted-on filesystem
867 			 */
868 			dvp = untraverse(cs->vp);
869 
870 			/*
871 			 * Set the different_export flag so we remember
872 			 * to pick up a new exportinfo entry for
873 			 * this new filesystem.
874 			 */
875 			different_export = 1;
876 		} else {
877 
878 			/*
879 			 * If dotdotting above an export point then set
880 			 * the different_export to get new export info.
881 			 */
882 			different_export = nfs_exported(cs->exi, cs->vp);
883 		}
884 	}
885 
886 	/*
887 	 * Get the vnode for the component "nm".
888 	 */
889 	error = VOP_LOOKUP(dvp, nm, &vp, NULL, 0, NULL, cs->cr);
890 	if (error)
891 		return (puterrno4(error));
892 
893 	/*
894 	 * If the vnode is in a pseudo filesystem, or if the security flavor
895 	 * used in the request is valid but not an explicitly shared flavor,
896 	 * or the access bit indicates that this is a limited access,
897 	 * check whether this vnode is visible.
898 	 */
899 	if (!different_export &&
900 	    (PSEUDO(cs->exi) || ! is_exported_sec(cs->nfsflavor, cs->exi) ||
901 	    cs->access & CS_ACCESS_LIMITED)) {
902 		if (! nfs_visible(cs->exi, vp, &different_export)) {
903 			VN_RELE(vp);
904 			return (puterrno4(ENOENT));
905 		}
906 	}
907 
908 	/*
909 	 * If it's a mountpoint, then traverse it.
910 	 */
911 	if (vn_ismntpt(vp)) {
912 		tvp = vp;
913 		if ((error = traverse(&tvp)) != 0) {
914 			VN_RELE(vp);
915 			return (puterrno4(error));
916 		}
917 		/* remember that we had to traverse mountpoint */
918 		did_traverse = TRUE;
919 		vp = tvp;
920 		different_export = 1;
921 	} else if (vp->v_vfsp != dvp->v_vfsp) {
922 		/*
923 		 * If vp isn't a mountpoint and the vfs ptrs aren't the same,
924 		 * then vp is probably an LOFS object.  We don't need the
925 		 * realvp, we just need to know that we might have crossed
926 		 * a server fs boundary and need to call checkexport4.
927 		 * (LOFS lookup hides server fs mountpoints, and actually calls
928 		 * traverse)
929 		 */
930 		different_export = 1;
931 		did_traverse = FALSE;
932 	}
933 
934 	/*
935 	 * Get the export information for it.
936 	 */
937 	if (different_export) {
938 
939 		bzero(&fid, sizeof (fid));
940 		fid.fid_len = MAXFIDSZ;
941 		error = vop_fid_pseudo(vp, &fid);
942 		if (error) {
943 			VN_RELE(vp);
944 			return (puterrno4(error));
945 		}
946 
947 		if (dotdot)
948 			exi = nfs_vptoexi(NULL, vp, cs->cr, &walk, NULL, TRUE);
949 		else
950 			exi = checkexport4(&vp->v_vfsp->vfs_fsid, &fid, vp);
951 
952 		if (exi == NULL) {
953 			if (did_traverse == TRUE) {
954 				/*
955 				 * If this vnode is a mounted-on vnode,
956 				 * but the mounted-on file system is not
957 				 * exported, send back the secinfo for
958 				 * the exported node that the mounted-on
959 				 * vnode lives in.
960 				 */
961 				exi = cs->exi;
962 			} else {
963 				VN_RELE(vp);
964 				return (puterrno4(EACCES));
965 			}
966 		}
967 	} else {
968 		exi = cs->exi;
969 	}
970 	ASSERT(exi != NULL);
971 
972 
973 	/*
974 	 * Create the secinfo result based on the security information
975 	 * from the exportinfo structure (exi).
976 	 *
977 	 * Return all flavors for a pseudo node.
978 	 * For a real export node, return the flavor that the client
979 	 * has access with.
980 	 */
981 	ASSERT(RW_LOCK_HELD(&exported_lock));
982 	if (PSEUDO(exi)) {
983 		count = exi->exi_export.ex_seccnt; /* total sec count */
984 		resok_val = kmem_alloc(count * sizeof (secinfo4), KM_SLEEP);
985 		secp = exi->exi_export.ex_secinfo;
986 
987 		for (i = 0; i < count; i++) {
988 		    resok_val[i].flavor = secp[i].s_secinfo.sc_rpcnum;
989 		    if (resok_val[i].flavor == RPCSEC_GSS) {
990 			rpcsec_gss_info *info;
991 
992 			info = &resok_val[i].flavor_info;
993 			info->qop = secp[i].s_secinfo.sc_qop;
994 			info->service =
995 				(rpc_gss_svc_t)secp[i].s_secinfo.sc_service;
996 
997 			/* get oid opaque data */
998 			info->oid.sec_oid4_len =
999 				secp[i].s_secinfo.sc_gss_mech_type->length;
1000 			info->oid.sec_oid4_val =
1001 				kmem_alloc(
1002 				    secp[i].s_secinfo.sc_gss_mech_type->length,
1003 				    KM_SLEEP);
1004 			bcopy(secp[i].s_secinfo.sc_gss_mech_type->elements,
1005 				info->oid.sec_oid4_val, info->oid.sec_oid4_len);
1006 		    }
1007 		}
1008 		resp->SECINFO4resok_len = count;
1009 		resp->SECINFO4resok_val = resok_val;
1010 	} else {
1011 		int ret_cnt = 0, k = 0;
1012 		int *flavor_list;
1013 
1014 		count = exi->exi_export.ex_seccnt; /* total sec count */
1015 		secp = exi->exi_export.ex_secinfo;
1016 
1017 		flavor_list = kmem_alloc(count * sizeof (int), KM_SLEEP);
1018 		/* find out which flavors to return */
1019 		for (i = 0; i < count; i ++) {
1020 			int access, flavor, perm;
1021 
1022 			flavor = secp[i].s_secinfo.sc_nfsnum;
1023 			perm = secp[i].s_flags;
1024 
1025 			access = nfsauth4_secinfo_access(exi, cs->req,
1026 						flavor, perm);
1027 
1028 			if (! (access & NFSAUTH_DENIED) &&
1029 			    ! (access & NFSAUTH_WRONGSEC)) {
1030 				flavor_list[ret_cnt] = flavor;
1031 				ret_cnt++;
1032 			}
1033 		}
1034 
1035 		/* Create the returning SECINFO value */
1036 		resok_val = kmem_alloc(ret_cnt * sizeof (secinfo4), KM_SLEEP);
1037 
1038 		for (i = 0; i < count; i++) {
1039 		/* If the flavor is in the flavor list, fill in resok_val. */
1040 		    if (in_flavor_list(secp[i].s_secinfo.sc_nfsnum,
1041 						flavor_list, ret_cnt)) {
1042 			resok_val[k].flavor = secp[i].s_secinfo.sc_rpcnum;
1043 			if (resok_val[k].flavor == RPCSEC_GSS) {
1044 			    rpcsec_gss_info *info;
1045 
1046 			    info = &resok_val[k].flavor_info;
1047 			    info->qop = secp[i].s_secinfo.sc_qop;
1048 			    info->service =
1049 				(rpc_gss_svc_t)secp[i].s_secinfo.sc_service;
1050 
1051 			    /* get oid opaque data */
1052 			    info->oid.sec_oid4_len =
1053 				secp[i].s_secinfo.sc_gss_mech_type->length;
1054 			    info->oid.sec_oid4_val =
1055 				kmem_alloc(
1056 				    secp[i].s_secinfo.sc_gss_mech_type->length,
1057 				    KM_SLEEP);
1058 			    bcopy(secp[i].s_secinfo.sc_gss_mech_type->elements,
1059 				info->oid.sec_oid4_val, info->oid.sec_oid4_len);
1060 			}
1061 			k++;
1062 		    }
1063 		    if (k >= ret_cnt)
1064 			break;
1065 		}
1066 		resp->SECINFO4resok_len = ret_cnt;
1067 		resp->SECINFO4resok_val = resok_val;
1068 		kmem_free(flavor_list, count * sizeof (int));
1069 	}
1070 
1071 	VN_RELE(vp);
1072 	return (NFS4_OK);
1073 }
1074 
1075 /*
1076  * SECINFO (Operation 33): Obtain required security information on
1077  * the component name in the format of (security-mechanism-oid, qop, service)
1078  * triplets.
1079  */
1080 /* ARGSUSED */
1081 static void
1082 rfs4_op_secinfo(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1083 	struct compound_state *cs)
1084 {
1085 	SECINFO4res *resp = &resop->nfs_resop4_u.opsecinfo;
1086 	utf8string *utfnm = &argop->nfs_argop4_u.opsecinfo.name;
1087 	uint_t len;
1088 	char *nm;
1089 
1090 	/*
1091 	 * Current file handle (cfh) should have been set before getting
1092 	 * into this function. If not, return error.
1093 	 */
1094 	if (cs->vp == NULL) {
1095 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
1096 		return;
1097 	}
1098 
1099 	if (cs->vp->v_type != VDIR) {
1100 		*cs->statusp = resp->status = NFS4ERR_NOTDIR;
1101 		return;
1102 	}
1103 
1104 	/*
1105 	 * Verify the component name. If failed, error out, but
1106 	 * do not error out if the component name is a "..".
1107 	 * SECINFO will return its parents secinfo data for SECINFO "..".
1108 	 */
1109 	if (!utf8_dir_verify(utfnm)) {
1110 		if (utfnm->utf8string_len != 2 ||
1111 				utfnm->utf8string_val[0] != '.' ||
1112 				utfnm->utf8string_val[1] != '.') {
1113 			*cs->statusp = resp->status = NFS4ERR_INVAL;
1114 			return;
1115 		}
1116 	}
1117 
1118 	nm = utf8_to_str(utfnm, &len, NULL);
1119 	if (nm == NULL) {
1120 		*cs->statusp = resp->status = NFS4ERR_INVAL;
1121 		return;
1122 	}
1123 
1124 	if (len > MAXNAMELEN) {
1125 		*cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
1126 		kmem_free(nm, len);
1127 		return;
1128 	}
1129 
1130 	*cs->statusp = resp->status = do_rfs4_op_secinfo(cs, nm, resp);
1131 
1132 	kmem_free(nm, len);
1133 }
1134 
1135 /*
1136  * Free SECINFO result.
1137  */
1138 /* ARGSUSED */
1139 static void
1140 rfs4_op_secinfo_free(nfs_resop4 *resop)
1141 {
1142 	SECINFO4res *resp = &resop->nfs_resop4_u.opsecinfo;
1143 	int count, i;
1144 	secinfo4 *resok_val;
1145 
1146 	/* If this is not an Ok result, nothing to free. */
1147 	if (resp->status != NFS4_OK) {
1148 		return;
1149 	}
1150 
1151 	count = resp->SECINFO4resok_len;
1152 	resok_val = resp->SECINFO4resok_val;
1153 
1154 	for (i = 0; i < count; i++) {
1155 	    if (resok_val[i].flavor == RPCSEC_GSS) {
1156 		rpcsec_gss_info *info;
1157 
1158 		info = &resok_val[i].flavor_info;
1159 		kmem_free(info->oid.sec_oid4_val, info->oid.sec_oid4_len);
1160 	    }
1161 	}
1162 	kmem_free(resok_val, count * sizeof (secinfo4));
1163 	resp->SECINFO4resok_len = 0;
1164 	resp->SECINFO4resok_val = NULL;
1165 }
1166 
1167 /* ARGSUSED */
1168 static void
1169 rfs4_op_access(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1170 	struct compound_state *cs)
1171 {
1172 	ACCESS4args *args = &argop->nfs_argop4_u.opaccess;
1173 	ACCESS4res *resp = &resop->nfs_resop4_u.opaccess;
1174 	int error;
1175 	vnode_t *vp;
1176 	struct vattr va;
1177 	int checkwriteperm;
1178 	cred_t *cr = cs->cr;
1179 
1180 #if 0	/* XXX allow access even if !cs->access. Eventually only pseudo fs */
1181 	if (cs->access == CS_ACCESS_DENIED) {
1182 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
1183 		return;
1184 	}
1185 #endif
1186 	if (cs->vp == NULL) {
1187 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
1188 		return;
1189 	}
1190 
1191 	ASSERT(cr != NULL);
1192 
1193 	vp = cs->vp;
1194 
1195 	/*
1196 	 * If the file system is exported read only, it is not appropriate
1197 	 * to check write permissions for regular files and directories.
1198 	 * Special files are interpreted by the client, so the underlying
1199 	 * permissions are sent back to the client for interpretation.
1200 	 */
1201 	if (rdonly4(cs->exi, cs->vp, req) &&
1202 		(vp->v_type == VREG || vp->v_type == VDIR))
1203 		checkwriteperm = 0;
1204 	else
1205 		checkwriteperm = 1;
1206 
1207 	/*
1208 	 * XXX
1209 	 * We need the mode so that we can correctly determine access
1210 	 * permissions relative to a mandatory lock file.  Access to
1211 	 * mandatory lock files is denied on the server, so it might
1212 	 * as well be reflected to the server during the open.
1213 	 */
1214 	va.va_mask = AT_MODE;
1215 	error = VOP_GETATTR(vp, &va, 0, cr);
1216 	if (error) {
1217 		*cs->statusp = resp->status = puterrno4(error);
1218 		return;
1219 	}
1220 
1221 	resp->access = 0;
1222 	resp->supported = 0;
1223 
1224 	if (args->access & ACCESS4_READ) {
1225 		error = VOP_ACCESS(vp, VREAD, 0, cr);
1226 		if (!error && !MANDLOCK(vp, va.va_mode))
1227 			resp->access |= ACCESS4_READ;
1228 		resp->supported |= ACCESS4_READ;
1229 	}
1230 	if ((args->access & ACCESS4_LOOKUP) && vp->v_type == VDIR) {
1231 		error = VOP_ACCESS(vp, VEXEC, 0, cr);
1232 		if (!error)
1233 			resp->access |= ACCESS4_LOOKUP;
1234 		resp->supported |= ACCESS4_LOOKUP;
1235 	}
1236 	if (checkwriteperm &&
1237 	    (args->access & (ACCESS4_MODIFY|ACCESS4_EXTEND))) {
1238 		error = VOP_ACCESS(vp, VWRITE, 0, cr);
1239 		if (!error && !MANDLOCK(vp, va.va_mode))
1240 			resp->access |=
1241 			    (args->access & (ACCESS4_MODIFY|ACCESS4_EXTEND));
1242 		resp->supported |= (ACCESS4_MODIFY|ACCESS4_EXTEND);
1243 	}
1244 
1245 	if (checkwriteperm &&
1246 	    (args->access & ACCESS4_DELETE) && vp->v_type == VDIR) {
1247 		error = VOP_ACCESS(vp, VWRITE, 0, cr);
1248 		if (!error)
1249 			resp->access |= ACCESS4_DELETE;
1250 		resp->supported |= ACCESS4_DELETE;
1251 	}
1252 	if (args->access & ACCESS4_EXECUTE && vp->v_type != VDIR) {
1253 		error = VOP_ACCESS(vp, VEXEC, 0, cr);
1254 		if (!error && !MANDLOCK(vp, va.va_mode))
1255 			resp->access |= ACCESS4_EXECUTE;
1256 		resp->supported |= ACCESS4_EXECUTE;
1257 	}
1258 
1259 	*cs->statusp = resp->status = NFS4_OK;
1260 }
1261 
1262 /* ARGSUSED */
1263 static void
1264 rfs4_op_commit(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1265 	struct compound_state *cs)
1266 {
1267 	COMMIT4args *args = &argop->nfs_argop4_u.opcommit;
1268 	COMMIT4res *resp = &resop->nfs_resop4_u.opcommit;
1269 	int error;
1270 	vnode_t *vp = cs->vp;
1271 	cred_t *cr = cs->cr;
1272 	vattr_t va;
1273 
1274 	if (vp == NULL) {
1275 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
1276 		return;
1277 	}
1278 	if (cs->access == CS_ACCESS_DENIED) {
1279 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
1280 		return;
1281 	}
1282 
1283 	if (args->offset + args->count < args->offset) {
1284 		*cs->statusp = resp->status = NFS4ERR_INVAL;
1285 		return;
1286 	}
1287 
1288 	va.va_mask = AT_UID;
1289 	error = VOP_GETATTR(vp, &va, 0, cr);
1290 
1291 	/*
1292 	 * If we can't get the attributes, then we can't do the
1293 	 * right access checking.  So, we'll fail the request.
1294 	 */
1295 	if (error) {
1296 		*cs->statusp = resp->status = puterrno4(error);
1297 		return;
1298 	}
1299 	if (rdonly4(cs->exi, cs->vp, req)) {
1300 		*cs->statusp = resp->status = NFS4ERR_ROFS;
1301 		return;
1302 	}
1303 
1304 	if (vp->v_type != VREG) {
1305 		if (vp->v_type == VDIR)
1306 			resp->status = NFS4ERR_ISDIR;
1307 		else
1308 			resp->status = NFS4ERR_INVAL;
1309 		*cs->statusp = resp->status;
1310 		return;
1311 	}
1312 
1313 	if (crgetuid(cr) != va.va_uid &&
1314 	    (error = VOP_ACCESS(vp, VWRITE, 0, cs->cr))) {
1315 		*cs->statusp = resp->status = puterrno4(error);
1316 		return;
1317 	}
1318 
1319 	error = VOP_PUTPAGE(vp, args->offset, args->count, 0, cr);
1320 	if (!error)
1321 		error = VOP_FSYNC(vp, FNODSYNC, cr);
1322 
1323 	if (error) {
1324 		*cs->statusp = resp->status = puterrno4(error);
1325 		return;
1326 	}
1327 
1328 	*cs->statusp = resp->status = NFS4_OK;
1329 	resp->writeverf = Write4verf;
1330 }
1331 
1332 /*
1333  * rfs4_op_mknod is called from rfs4_op_create after all initial verification
1334  * was completed. It does the nfsv4 create for special files.
1335  */
1336 /* ARGSUSED */
1337 static vnode_t *
1338 do_rfs4_op_mknod(CREATE4args *args, CREATE4res *resp, struct svc_req *req,
1339 	struct compound_state *cs, vattr_t *vap, char *nm)
1340 {
1341 	int error;
1342 	cred_t *cr = cs->cr;
1343 	vnode_t *dvp = cs->vp;
1344 	vnode_t *vp = NULL;
1345 	int mode;
1346 	enum vcexcl excl;
1347 
1348 	switch (args->type) {
1349 	case NF4CHR:
1350 	case NF4BLK:
1351 		if (secpolicy_sys_devices(cr) != 0) {
1352 			*cs->statusp = resp->status = NFS4ERR_PERM;
1353 			return (NULL);
1354 		}
1355 		if (args->type == NF4CHR)
1356 			vap->va_type = VCHR;
1357 		else
1358 			vap->va_type = VBLK;
1359 		vap->va_rdev = makedevice(args->ftype4_u.devdata.specdata1,
1360 					args->ftype4_u.devdata.specdata2);
1361 		vap->va_mask |= AT_RDEV;
1362 		break;
1363 	case NF4SOCK:
1364 		vap->va_type = VSOCK;
1365 		break;
1366 	case NF4FIFO:
1367 		vap->va_type = VFIFO;
1368 		break;
1369 	default:
1370 		*cs->statusp = resp->status = NFS4ERR_BADTYPE;
1371 		return (NULL);
1372 	}
1373 
1374 	/*
1375 	 * Must specify the mode.
1376 	 */
1377 	if (!(vap->va_mask & AT_MODE)) {
1378 		*cs->statusp = resp->status = NFS4ERR_INVAL;
1379 		return (NULL);
1380 	}
1381 
1382 	excl = EXCL;
1383 
1384 	mode = 0;
1385 
1386 	error = VOP_CREATE(dvp, nm, vap, excl, mode, &vp, cr, 0);
1387 	if (error) {
1388 		*cs->statusp = resp->status = puterrno4(error);
1389 		return (NULL);
1390 	}
1391 	return (vp);
1392 }
1393 
1394 /*
1395  * nfsv4 create is used to create non-regular files. For regular files,
1396  * use nfsv4 open.
1397  */
1398 /* ARGSUSED */
1399 static void
1400 rfs4_op_create(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1401 	struct compound_state *cs)
1402 {
1403 	CREATE4args *args = &argop->nfs_argop4_u.opcreate;
1404 	CREATE4res *resp = &resop->nfs_resop4_u.opcreate;
1405 	int error;
1406 	struct vattr bva, iva, iva2, ava, *vap;
1407 	cred_t *cr = cs->cr;
1408 	vnode_t *dvp = cs->vp;
1409 	vnode_t *vp = NULL;
1410 	char *nm, *lnm;
1411 	uint_t len, llen;
1412 	int syncval = 0;
1413 	struct nfs4_svgetit_arg sarg;
1414 	struct nfs4_ntov_table ntov;
1415 	struct statvfs64 sb;
1416 	nfsstat4 status;
1417 
1418 	resp->attrset = 0;
1419 
1420 	if (dvp == NULL) {
1421 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
1422 		return;
1423 	}
1424 
1425 	/*
1426 	 * If there is an unshared filesystem mounted on this vnode,
1427 	 * do not allow to create an object in this directory.
1428 	 */
1429 	if (vn_ismntpt(dvp)) {
1430 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
1431 		return;
1432 	}
1433 
1434 	/* Verify that type is correct */
1435 	switch (args->type) {
1436 	case NF4LNK:
1437 	case NF4BLK:
1438 	case NF4CHR:
1439 	case NF4SOCK:
1440 	case NF4FIFO:
1441 	case NF4DIR:
1442 		break;
1443 	default:
1444 		*cs->statusp = resp->status = NFS4ERR_BADTYPE;
1445 		return;
1446 	};
1447 
1448 	if (cs->access == CS_ACCESS_DENIED) {
1449 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
1450 		return;
1451 	}
1452 	if (dvp->v_type != VDIR) {
1453 		*cs->statusp = resp->status = NFS4ERR_NOTDIR;
1454 		return;
1455 	}
1456 	if (!utf8_dir_verify(&args->objname)) {
1457 		*cs->statusp = resp->status = NFS4ERR_INVAL;
1458 		return;
1459 	}
1460 
1461 	if (rdonly4(cs->exi, cs->vp, req)) {
1462 		*cs->statusp = resp->status = NFS4ERR_ROFS;
1463 		return;
1464 	}
1465 
1466 	/*
1467 	 * Name of newly created object
1468 	 */
1469 	nm = utf8_to_fn(&args->objname, &len, NULL);
1470 	if (nm == NULL) {
1471 		*cs->statusp = resp->status = NFS4ERR_INVAL;
1472 		return;
1473 	}
1474 
1475 	if (len > MAXNAMELEN) {
1476 		*cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
1477 		kmem_free(nm, len);
1478 		return;
1479 	}
1480 
1481 	resp->attrset = 0;
1482 
1483 	sarg.sbp = &sb;
1484 	nfs4_ntov_table_init(&ntov);
1485 
1486 	status = do_rfs4_set_attrs(&resp->attrset,
1487 					&args->createattrs, cs, &sarg,
1488 					&ntov, NFS4ATTR_SETIT);
1489 
1490 	if (sarg.vap->va_mask == 0 && status == NFS4_OK)
1491 		status = NFS4ERR_INVAL;
1492 
1493 	if (status != NFS4_OK) {
1494 		*cs->statusp = resp->status = status;
1495 		kmem_free(nm, len);
1496 		nfs4_ntov_table_free(&ntov, &sarg);
1497 		resp->attrset = 0;
1498 		return;
1499 	}
1500 
1501 	/* Get "before" change value */
1502 	bva.va_mask = AT_CTIME|AT_SEQ;
1503 	error = VOP_GETATTR(dvp, &bva, 0, cr);
1504 	if (error) {
1505 		*cs->statusp = resp->status = puterrno4(error);
1506 		kmem_free(nm, len);
1507 		nfs4_ntov_table_free(&ntov, &sarg);
1508 		resp->attrset = 0;
1509 		return;
1510 	}
1511 	NFS4_SET_FATTR4_CHANGE(resp->cinfo.before, bva.va_ctime)
1512 
1513 	vap = sarg.vap;
1514 
1515 	/*
1516 	 * Set default initial values for attributes when not specified
1517 	 * in createattrs.
1518 	 */
1519 	if ((vap->va_mask & AT_UID) == 0) {
1520 		vap->va_uid = crgetuid(cr);
1521 		vap->va_mask |= AT_UID;
1522 	}
1523 	if ((vap->va_mask & AT_GID) == 0) {
1524 		vap->va_gid = crgetgid(cr);
1525 		vap->va_mask |= AT_GID;
1526 	}
1527 
1528 	vap->va_mask |= AT_TYPE;
1529 	switch (args->type) {
1530 	case NF4DIR:
1531 		vap->va_type = VDIR;
1532 		if ((vap->va_mask & AT_MODE) == 0) {
1533 			vap->va_mode = 0700;	/* default: owner rwx only */
1534 			vap->va_mask |= AT_MODE;
1535 		}
1536 		error = VOP_MKDIR(dvp, nm, vap, &vp, cr);
1537 		if (error)
1538 			break;
1539 
1540 		/*
1541 		 * Get the initial "after" sequence number, if it fails,
1542 		 * set to zero
1543 		 */
1544 		iva.va_mask = AT_SEQ;
1545 		if (VOP_GETATTR(dvp, &iva, 0, cs->cr))
1546 			iva.va_seq = 0;
1547 		break;
1548 	case NF4LNK:
1549 		vap->va_type = VLNK;
1550 		if ((vap->va_mask & AT_MODE) == 0) {
1551 			vap->va_mode = 0700;	/* default: owner rwx only */
1552 			vap->va_mask |= AT_MODE;
1553 		}
1554 
1555 		/*
1556 		 * symlink names must be treated as data
1557 		 */
1558 		lnm = utf8_to_str(&args->ftype4_u.linkdata, &llen, NULL);
1559 
1560 		if (lnm == NULL) {
1561 			*cs->statusp = resp->status = NFS4ERR_INVAL;
1562 			kmem_free(nm, len);
1563 			nfs4_ntov_table_free(&ntov, &sarg);
1564 			resp->attrset = 0;
1565 			return;
1566 		}
1567 
1568 		if (llen > MAXPATHLEN) {
1569 			*cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
1570 			kmem_free(nm, len);
1571 			kmem_free(lnm, llen);
1572 			nfs4_ntov_table_free(&ntov, &sarg);
1573 			resp->attrset = 0;
1574 			return;
1575 		}
1576 
1577 		error = VOP_SYMLINK(dvp, nm, vap, lnm, cr);
1578 		if (lnm != NULL)
1579 			kmem_free(lnm, llen);
1580 		if (error)
1581 			break;
1582 
1583 		/*
1584 		 * Get the initial "after" sequence number, if it fails,
1585 		 * set to zero
1586 		 */
1587 		iva.va_mask = AT_SEQ;
1588 		if (VOP_GETATTR(dvp, &iva, 0, cs->cr))
1589 			iva.va_seq = 0;
1590 
1591 		error = VOP_LOOKUP(dvp, nm, &vp, NULL, 0, NULL, cr);
1592 		if (error)
1593 			break;
1594 
1595 		/*
1596 		 * va_seq is not safe over VOP calls, check it again
1597 		 * if it has changed zero out iva to force atomic = FALSE.
1598 		 */
1599 		iva2.va_mask = AT_SEQ;
1600 		if (VOP_GETATTR(dvp, &iva2, 0, cs->cr) ||
1601 						iva2.va_seq != iva.va_seq)
1602 			iva.va_seq = 0;
1603 		break;
1604 	default:
1605 		/*
1606 		 * probably a special file.
1607 		 */
1608 		if ((vap->va_mask & AT_MODE) == 0) {
1609 			vap->va_mode = 0600;	/* default: owner rw only */
1610 			vap->va_mask |= AT_MODE;
1611 		}
1612 		syncval = FNODSYNC;
1613 		/*
1614 		 * We know this will only generate one VOP call
1615 		 */
1616 		vp = do_rfs4_op_mknod(args, resp, req, cs, vap, nm);
1617 
1618 		if (vp == NULL) {
1619 			kmem_free(nm, len);
1620 			nfs4_ntov_table_free(&ntov, &sarg);
1621 			resp->attrset = 0;
1622 			return;
1623 		}
1624 
1625 		/*
1626 		 * Get the initial "after" sequence number, if it fails,
1627 		 * set to zero
1628 		 */
1629 		iva.va_mask = AT_SEQ;
1630 		if (VOP_GETATTR(dvp, &iva, 0, cs->cr))
1631 			iva.va_seq = 0;
1632 
1633 		break;
1634 	}
1635 	kmem_free(nm, len);
1636 
1637 	if (error) {
1638 		*cs->statusp = resp->status = puterrno4(error);
1639 	}
1640 
1641 	/*
1642 	 * Force modified data and metadata out to stable storage.
1643 	 */
1644 	(void) VOP_FSYNC(dvp, 0, cr);
1645 
1646 	if (resp->status != NFS4_OK) {
1647 		if (vp != NULL)
1648 			VN_RELE(vp);
1649 		nfs4_ntov_table_free(&ntov, &sarg);
1650 		resp->attrset = 0;
1651 		return;
1652 	}
1653 
1654 	/*
1655 	 * Finish setup of cinfo response, "before" value already set.
1656 	 * Get "after" change value, if it fails, simply return the
1657 	 * before value.
1658 	 */
1659 	ava.va_mask = AT_CTIME|AT_SEQ;
1660 	if (VOP_GETATTR(dvp, &ava, 0, cr)) {
1661 		ava.va_ctime = bva.va_ctime;
1662 		ava.va_seq = 0;
1663 	}
1664 	NFS4_SET_FATTR4_CHANGE(resp->cinfo.after, ava.va_ctime);
1665 
1666 	/*
1667 	 * True verification that object was created with correct
1668 	 * attrs is impossible.  The attrs could have been changed
1669 	 * immediately after object creation.  If attributes did
1670 	 * not verify, the only recourse for the server is to
1671 	 * destroy the object.  Maybe if some attrs (like gid)
1672 	 * are set incorrectly, the object should be destroyed;
1673 	 * however, seems bad as a default policy.  Do we really
1674 	 * want to destroy an object over one of the times not
1675 	 * verifying correctly?  For these reasons, the server
1676 	 * currently sets bits in attrset for createattrs
1677 	 * that were set; however, no verification is done.
1678 	 *
1679 	 * vmask_to_nmask accounts for vattr bits set on create
1680 	 *	[do_rfs4_set_attrs() only sets resp bits for
1681 	 *	 non-vattr/vfs bits.]
1682 	 * Mask off any bits set by default so as not to return
1683 	 * more attrset bits than were requested in createattrs
1684 	 */
1685 	nfs4_vmask_to_nmask(sarg.vap->va_mask, &resp->attrset);
1686 	resp->attrset &= args->createattrs.attrmask;
1687 	nfs4_ntov_table_free(&ntov, &sarg);
1688 
1689 	error = makefh4(&cs->fh, vp, cs->exi);
1690 	if (error) {
1691 		*cs->statusp = resp->status = puterrno4(error);
1692 	}
1693 
1694 	/*
1695 	 * The cinfo.atomic = TRUE only if we got no errors, we have
1696 	 * non-zero va_seq's, and it has incremented by exactly one
1697 	 * during the creation and it didn't change during the VOP_LOOKUP
1698 	 * or VOP_FSYNC.
1699 	 */
1700 	if (!error && bva.va_seq && iva.va_seq && ava.va_seq &&
1701 			iva.va_seq == (bva.va_seq + 1) &&
1702 			iva.va_seq == ava.va_seq)
1703 		resp->cinfo.atomic = TRUE;
1704 	else
1705 		resp->cinfo.atomic = FALSE;
1706 
1707 	(void) VOP_FSYNC(vp, syncval, cr);
1708 
1709 	if (resp->status != NFS4_OK) {
1710 		VN_RELE(vp);
1711 		return;
1712 	}
1713 	if (cs->vp)
1714 		VN_RELE(cs->vp);
1715 
1716 	cs->vp = vp;
1717 	*cs->statusp = resp->status = NFS4_OK;
1718 }
1719 
1720 
1721 /*ARGSUSED*/
1722 static void
1723 rfs4_op_delegreturn(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1724 	struct compound_state *cs)
1725 {
1726 	DELEGRETURN4args *args = &argop->nfs_argop4_u.opdelegreturn;
1727 	DELEGRETURN4res *resp = &resop->nfs_resop4_u.opdelegreturn;
1728 	rfs4_deleg_state_t *dsp;
1729 	nfsstat4 status;
1730 
1731 	status = rfs4_get_deleg_state(&args->deleg_stateid, &dsp);
1732 	resp->status = *cs->statusp = status;
1733 	if (status != NFS4_OK)
1734 		return;
1735 
1736 	/* Ensure specified filehandle matches */
1737 	if (cs->vp != dsp->finfo->vp) {
1738 		resp->status = *cs->statusp = NFS4ERR_BAD_STATEID;
1739 	} else
1740 		rfs4_return_deleg(dsp, FALSE);
1741 
1742 	rfs4_update_lease(dsp->client);
1743 
1744 	rfs4_deleg_state_rele(dsp);
1745 }
1746 
1747 /*
1748  * Check to see if a given "flavor" is an explicitly shared flavor.
1749  * The assumption of this routine is the "flavor" is already a valid
1750  * flavor in the secinfo list of "exi".
1751  *
1752  *	e.g.
1753  *		# share -o sec=flavor1 /export
1754  *		# share -o sec=flavor2 /export/home
1755  *
1756  *		flavor2 is not an explicitly shared flavor for /export,
1757  *		however it is in the secinfo list for /export thru the
1758  *		server namespace setup.
1759  */
1760 int
1761 is_exported_sec(int flavor, struct exportinfo *exi)
1762 {
1763 	int	i;
1764 	struct secinfo *sp;
1765 
1766 	sp = exi->exi_export.ex_secinfo;
1767 	for (i = 0; i < exi->exi_export.ex_seccnt; i++) {
1768 		if (flavor == sp[i].s_secinfo.sc_nfsnum ||
1769 		    sp[i].s_secinfo.sc_nfsnum == AUTH_NONE) {
1770 			return (SEC_REF_EXPORTED(&sp[i]));
1771 		}
1772 	}
1773 
1774 	/* Should not reach this point based on the assumption */
1775 	return (0);
1776 }
1777 
1778 /*
1779  * Check if the security flavor used in the request matches what is
1780  * required at the export point or at the root pseudo node (exi_root).
1781  *
1782  * returns 1 if there's a match or if exported with AUTH_NONE; 0 otherwise.
1783  *
1784  */
1785 static int
1786 secinfo_match_or_authnone(struct compound_state *cs)
1787 {
1788 	int	i;
1789 	struct secinfo *sp;
1790 
1791 	/*
1792 	 * Check cs->nfsflavor (from the request) against
1793 	 * the current export data in cs->exi.
1794 	 */
1795 	sp = cs->exi->exi_export.ex_secinfo;
1796 	for (i = 0; i < cs->exi->exi_export.ex_seccnt; i++) {
1797 		if (cs->nfsflavor == sp[i].s_secinfo.sc_nfsnum ||
1798 		    sp[i].s_secinfo.sc_nfsnum == AUTH_NONE)
1799 			return (1);
1800 	}
1801 
1802 	return (0);
1803 }
1804 
1805 /*
1806  * Check the access authority for the client and return the correct error.
1807  */
1808 nfsstat4
1809 call_checkauth4(struct compound_state *cs, struct svc_req *req)
1810 {
1811 	int	authres;
1812 
1813 	/*
1814 	 * First, check if the security flavor used in the request
1815 	 * are among the flavors set in the server namespace.
1816 	 */
1817 	if (!secinfo_match_or_authnone(cs)) {
1818 		*cs->statusp = NFS4ERR_WRONGSEC;
1819 		return (*cs->statusp);
1820 	}
1821 
1822 	authres = checkauth4(cs, req);
1823 
1824 	if (authres > 0) {
1825 		*cs->statusp = NFS4_OK;
1826 		if (! (cs->access & CS_ACCESS_LIMITED))
1827 			cs->access = CS_ACCESS_OK;
1828 	} else if (authres == 0) {
1829 		*cs->statusp = NFS4ERR_ACCESS;
1830 	} else if (authres == -2) {
1831 		*cs->statusp = NFS4ERR_WRONGSEC;
1832 	} else {
1833 		*cs->statusp = NFS4ERR_DELAY;
1834 	}
1835 	return (*cs->statusp);
1836 }
1837 
1838 /*
1839  * bitmap4_to_attrmask is called by getattr and readdir.
1840  * It sets up the vattr mask and determines whether vfsstat call is needed
1841  * based on the input bitmap.
1842  * Returns nfsv4 status.
1843  */
1844 static nfsstat4
1845 bitmap4_to_attrmask(bitmap4 breq, struct nfs4_svgetit_arg *sargp)
1846 {
1847 	int i;
1848 	uint_t	va_mask;
1849 	struct statvfs64 *sbp = sargp->sbp;
1850 
1851 	sargp->sbp = NULL;
1852 	sargp->flag = 0;
1853 	sargp->rdattr_error = NFS4_OK;
1854 	sargp->mntdfid_set = FALSE;
1855 	if (sargp->cs->vp)
1856 		sargp->xattr = get_fh4_flag(&sargp->cs->fh,
1857 					    FH4_ATTRDIR | FH4_NAMEDATTR);
1858 	else
1859 		sargp->xattr = 0;
1860 
1861 	/*
1862 	 * Set rdattr_error_req to true if return error per
1863 	 * failed entry rather than fail the readdir.
1864 	 */
1865 	if (breq & FATTR4_RDATTR_ERROR_MASK)
1866 		sargp->rdattr_error_req = 1;
1867 	else
1868 		sargp->rdattr_error_req = 0;
1869 
1870 	/*
1871 	 * generate the va_mask
1872 	 * Handle the easy cases first
1873 	 */
1874 	switch (breq) {
1875 	case NFS4_NTOV_ATTR_MASK:
1876 		sargp->vap->va_mask = NFS4_NTOV_ATTR_AT_MASK;
1877 		return (NFS4_OK);
1878 
1879 	case NFS4_FS_ATTR_MASK:
1880 		sargp->vap->va_mask = NFS4_FS_ATTR_AT_MASK;
1881 		sargp->sbp = sbp;
1882 		return (NFS4_OK);
1883 
1884 	case NFS4_NTOV_ATTR_CACHE_MASK:
1885 		sargp->vap->va_mask = NFS4_NTOV_ATTR_CACHE_AT_MASK;
1886 		return (NFS4_OK);
1887 
1888 	case FATTR4_LEASE_TIME_MASK:
1889 		sargp->vap->va_mask = 0;
1890 		return (NFS4_OK);
1891 
1892 	default:
1893 		va_mask = 0;
1894 		for (i = 0; i < nfs4_ntov_map_size; i++) {
1895 			if ((breq & nfs4_ntov_map[i].fbit) &&
1896 							nfs4_ntov_map[i].vbit)
1897 				va_mask |= nfs4_ntov_map[i].vbit;
1898 		}
1899 
1900 		/*
1901 		 * Check is vfsstat is needed
1902 		 */
1903 		if (breq & NFS4_FS_ATTR_MASK)
1904 			sargp->sbp = sbp;
1905 
1906 		sargp->vap->va_mask = va_mask;
1907 		return (NFS4_OK);
1908 	}
1909 	/* NOTREACHED */
1910 }
1911 
1912 /*
1913  * bitmap4_get_sysattrs is called by getattr and readdir.
1914  * It calls both VOP_GETATTR and VFS_STATVFS calls to get the attrs.
1915  * Returns nfsv4 status.
1916  */
1917 static nfsstat4
1918 bitmap4_get_sysattrs(struct nfs4_svgetit_arg *sargp)
1919 {
1920 	int error;
1921 	struct compound_state *cs = sargp->cs;
1922 	vnode_t *vp = cs->vp;
1923 
1924 	if (sargp->sbp != NULL) {
1925 		if (error = VFS_STATVFS(vp->v_vfsp, sargp->sbp)) {
1926 			sargp->sbp = NULL;	/* to identify error */
1927 			return (puterrno4(error));
1928 		}
1929 	}
1930 
1931 	return (rfs4_vop_getattr(vp, sargp->vap, 0, cs->cr));
1932 }
1933 
1934 static void
1935 nfs4_ntov_table_init(struct nfs4_ntov_table *ntovp)
1936 {
1937 	ntovp->na = kmem_zalloc(sizeof (union nfs4_attr_u) * nfs4_ntov_map_size,
1938 			KM_SLEEP);
1939 	ntovp->attrcnt = 0;
1940 	ntovp->vfsstat = FALSE;
1941 }
1942 
1943 static void
1944 nfs4_ntov_table_free(struct nfs4_ntov_table *ntovp,
1945 	struct nfs4_svgetit_arg *sargp)
1946 {
1947 	int i;
1948 	union nfs4_attr_u *na;
1949 	uint8_t *amap;
1950 
1951 	/*
1952 	 * XXX Should do the same checks for whether the bit is set
1953 	 */
1954 	for (i = 0, na = ntovp->na, amap = ntovp->amap;
1955 		i < ntovp->attrcnt; i++, na++, amap++) {
1956 		(void) (*nfs4_ntov_map[*amap].sv_getit)(
1957 			NFS4ATTR_FREEIT, sargp, na);
1958 	}
1959 	if ((sargp->op == NFS4ATTR_SETIT) || (sargp->op == NFS4ATTR_VERIT)) {
1960 		/*
1961 		 * xdr_free for getattr will be done later
1962 		 */
1963 		for (i = 0, na = ntovp->na, amap = ntovp->amap;
1964 			i < ntovp->attrcnt; i++, na++, amap++) {
1965 			xdr_free(nfs4_ntov_map[*amap].xfunc, (caddr_t)na);
1966 		}
1967 	}
1968 	kmem_free(ntovp->na, sizeof (union nfs4_attr_u) * nfs4_ntov_map_size);
1969 }
1970 
1971 /*
1972  * do_rfs4_op_getattr gets the system attrs and converts into fattr4.
1973  */
1974 static nfsstat4
1975 do_rfs4_op_getattr(bitmap4 breq, fattr4 *fattrp,
1976 	struct nfs4_svgetit_arg *sargp)
1977 {
1978 	int error = 0;
1979 	int i, k;
1980 	struct nfs4_ntov_table ntov;
1981 	XDR xdr;
1982 	ulong_t xdr_size;
1983 	char *xdr_attrs;
1984 	nfsstat4 status = NFS4_OK;
1985 	nfsstat4 prev_rdattr_error = sargp->rdattr_error;
1986 	union nfs4_attr_u *na;
1987 	uint8_t *amap;
1988 
1989 	sargp->op = NFS4ATTR_GETIT;
1990 	sargp->flag = 0;
1991 
1992 	fattrp->attrmask = 0;
1993 	/* if no bits requested, then return empty fattr4 */
1994 	if (breq == 0) {
1995 		fattrp->attrlist4_len = 0;
1996 		fattrp->attrlist4 = NULL;
1997 		return (NFS4_OK);
1998 	}
1999 
2000 	/*
2001 	 * return NFS4ERR_INVAL when client requests write-only attrs
2002 	 */
2003 	if (breq & (FATTR4_TIME_ACCESS_SET_MASK | FATTR4_TIME_MODIFY_SET_MASK))
2004 		return (NFS4ERR_INVAL);
2005 
2006 	nfs4_ntov_table_init(&ntov);
2007 	na = ntov.na;
2008 	amap = ntov.amap;
2009 
2010 	/*
2011 	 * Now loop to get or verify the attrs
2012 	 */
2013 	for (i = 0; i < nfs4_ntov_map_size; i++) {
2014 		if (breq & nfs4_ntov_map[i].fbit) {
2015 			if ((*nfs4_ntov_map[i].sv_getit)(
2016 				    NFS4ATTR_SUPPORTED, sargp, NULL) == 0) {
2017 
2018 				error = (*nfs4_ntov_map[i].sv_getit)(
2019 						NFS4ATTR_GETIT, sargp, na);
2020 
2021 				/*
2022 				 * Possible error values:
2023 				 * >0 if sv_getit failed to
2024 				 * get the attr; 0 if succeeded;
2025 				 * <0 if rdattr_error and the
2026 				 * attribute cannot be returned.
2027 				 */
2028 				if (error && !(sargp->rdattr_error_req))
2029 					goto done;
2030 				/*
2031 				 * If error then just for entry
2032 				 */
2033 				if (error == 0) {
2034 					fattrp->attrmask |=
2035 						nfs4_ntov_map[i].fbit;
2036 					*amap++ =
2037 						(uint8_t)nfs4_ntov_map[i].nval;
2038 					na++;
2039 					(ntov.attrcnt)++;
2040 				} else if ((error > 0) &&
2041 					(sargp->rdattr_error == NFS4_OK)) {
2042 					sargp->rdattr_error = puterrno4(error);
2043 				}
2044 				error = 0;
2045 			}
2046 		}
2047 	}
2048 
2049 	/*
2050 	 * If rdattr_error was set after the return value for it was assigned,
2051 	 * update it.
2052 	 */
2053 	if (prev_rdattr_error != sargp->rdattr_error) {
2054 		na = ntov.na;
2055 		amap = ntov.amap;
2056 		for (i = 0; i < ntov.attrcnt; i++, na++, amap++) {
2057 			k = *amap;
2058 			if (k < FATTR4_RDATTR_ERROR) {
2059 				continue;
2060 			}
2061 			if ((k == FATTR4_RDATTR_ERROR) &&
2062 			    ((*nfs4_ntov_map[k].sv_getit)(
2063 				NFS4ATTR_SUPPORTED, sargp, NULL) == 0)) {
2064 
2065 				(void) (*nfs4_ntov_map[k].sv_getit)(
2066 						NFS4ATTR_GETIT, sargp, na);
2067 			}
2068 			break;
2069 		}
2070 	}
2071 
2072 	xdr_size = 0;
2073 	na = ntov.na;
2074 	amap = ntov.amap;
2075 	for (i = 0; i < ntov.attrcnt; i++, na++, amap++) {
2076 		xdr_size += xdr_sizeof(nfs4_ntov_map[*amap].xfunc, na);
2077 	}
2078 
2079 	fattrp->attrlist4_len = xdr_size;
2080 	if (xdr_size) {
2081 		/* freed by rfs4_op_getattr_free() */
2082 		fattrp->attrlist4 = xdr_attrs = kmem_zalloc(xdr_size, KM_SLEEP);
2083 
2084 		xdrmem_create(&xdr, xdr_attrs, xdr_size, XDR_ENCODE);
2085 
2086 		na = ntov.na;
2087 		amap = ntov.amap;
2088 		for (i = 0; i < ntov.attrcnt; i++, na++, amap++) {
2089 			if (!(*nfs4_ntov_map[*amap].xfunc)(&xdr, na)) {
2090 				cmn_err(CE_WARN, "do_rfs4_op_getattr: xdr "
2091 					"encode of attribute %d failed\n",
2092 					*amap);
2093 				status = NFS4ERR_SERVERFAULT;
2094 				break;
2095 			}
2096 		}
2097 		/* xdrmem_destroy(&xdrs); */	/* NO-OP */
2098 	} else {
2099 		fattrp->attrlist4 = NULL;
2100 	}
2101 done:
2102 
2103 	nfs4_ntov_table_free(&ntov, sargp);
2104 
2105 	if (error != 0)
2106 		status = puterrno4(error);
2107 
2108 	return (status);
2109 }
2110 
2111 /* ARGSUSED */
2112 static void
2113 rfs4_op_getattr(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2114 	struct compound_state *cs)
2115 {
2116 	GETATTR4args *args = &argop->nfs_argop4_u.opgetattr;
2117 	GETATTR4res *resp = &resop->nfs_resop4_u.opgetattr;
2118 	struct nfs4_svgetit_arg sarg;
2119 	struct statvfs64 sb;
2120 	nfsstat4 status;
2121 
2122 	if (cs->vp == NULL) {
2123 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2124 		return;
2125 	}
2126 
2127 	if (cs->access == CS_ACCESS_DENIED) {
2128 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
2129 		return;
2130 	}
2131 
2132 	sarg.sbp = &sb;
2133 	sarg.cs = cs;
2134 
2135 	status = bitmap4_to_attrmask(args->attr_request, &sarg);
2136 	if (status == NFS4_OK) {
2137 		status = bitmap4_get_sysattrs(&sarg);
2138 		if (status == NFS4_OK)
2139 			status = do_rfs4_op_getattr(args->attr_request,
2140 				&resp->obj_attributes, &sarg);
2141 	}
2142 	*cs->statusp = resp->status = status;
2143 }
2144 
2145 static void
2146 rfs4_op_getattr_free(nfs_resop4 *resop)
2147 {
2148 	GETATTR4res *resp = &resop->nfs_resop4_u.opgetattr;
2149 
2150 	nfs4_fattr4_free(&resp->obj_attributes);
2151 }
2152 
2153 /* ARGSUSED */
2154 static void
2155 rfs4_op_getfh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2156 	struct compound_state *cs)
2157 {
2158 	GETFH4res *resp = &resop->nfs_resop4_u.opgetfh;
2159 
2160 	if (cs->vp == NULL) {
2161 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2162 		return;
2163 	}
2164 	if (cs->access == CS_ACCESS_DENIED) {
2165 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
2166 		return;
2167 	}
2168 
2169 	resp->object.nfs_fh4_val =
2170 		kmem_alloc(cs->fh.nfs_fh4_len, KM_SLEEP);
2171 	nfs_fh4_copy(&cs->fh, &resp->object);
2172 	*cs->statusp = resp->status = NFS4_OK;
2173 }
2174 
2175 static void
2176 rfs4_op_getfh_free(nfs_resop4 *resop)
2177 {
2178 	GETFH4res *resp = &resop->nfs_resop4_u.opgetfh;
2179 
2180 	if (resp->status == NFS4_OK &&
2181 	    resp->object.nfs_fh4_val != NULL) {
2182 		kmem_free(resp->object.nfs_fh4_val, resp->object.nfs_fh4_len);
2183 		resp->object.nfs_fh4_val = NULL;
2184 		resp->object.nfs_fh4_len = 0;
2185 	}
2186 }
2187 
2188 /*
2189  * illegal: args: void
2190  *	    res : status (NFS4ERR_OP_ILLEGAL)
2191  */
2192 /* ARGSUSED */
2193 static void
2194 rfs4_op_illegal(nfs_argop4 *argop, nfs_resop4 *resop,
2195 	struct svc_req *req, struct compound_state *cs)
2196 {
2197 	ILLEGAL4res *resp = &resop->nfs_resop4_u.opillegal;
2198 
2199 	resop->resop = OP_ILLEGAL;
2200 	*cs->statusp = resp->status = NFS4ERR_OP_ILLEGAL;
2201 }
2202 
2203 /*
2204  * link: args: SAVED_FH: file, CURRENT_FH: target directory
2205  *	 res: status. If success - CURRENT_FH unchanged, return change_info
2206  */
2207 /* ARGSUSED */
2208 static void
2209 rfs4_op_link(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2210 	struct compound_state *cs)
2211 {
2212 	LINK4args *args = &argop->nfs_argop4_u.oplink;
2213 	LINK4res *resp = &resop->nfs_resop4_u.oplink;
2214 	int error;
2215 	vnode_t *vp;
2216 	vnode_t *dvp;
2217 	struct vattr bdva, idva, adva;
2218 	char *nm;
2219 	uint_t  len;
2220 
2221 	/* SAVED_FH: source object */
2222 	vp = cs->saved_vp;
2223 	if (vp == NULL) {
2224 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2225 		return;
2226 	}
2227 
2228 	/* CURRENT_FH: target directory */
2229 	dvp = cs->vp;
2230 	if (dvp == NULL) {
2231 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2232 		return;
2233 	}
2234 
2235 	/*
2236 	 * If there is a non-shared filesystem mounted on this vnode,
2237 	 * do not allow to link any file in this directory.
2238 	 */
2239 	if (vn_ismntpt(dvp)) {
2240 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
2241 		return;
2242 	}
2243 
2244 	if (cs->access == CS_ACCESS_DENIED) {
2245 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
2246 		return;
2247 	}
2248 
2249 	/* Check source object's type validity */
2250 	if (vp->v_type == VDIR) {
2251 		*cs->statusp = resp->status = NFS4ERR_ISDIR;
2252 		return;
2253 	}
2254 
2255 	/* Check target directory's type */
2256 	if (dvp->v_type != VDIR) {
2257 		*cs->statusp = resp->status = NFS4ERR_NOTDIR;
2258 		return;
2259 	}
2260 
2261 	if (cs->saved_exi != cs->exi) {
2262 		*cs->statusp = resp->status = NFS4ERR_XDEV;
2263 		return;
2264 	}
2265 
2266 	if (!utf8_dir_verify(&args->newname)) {
2267 		*cs->statusp = resp->status = NFS4ERR_INVAL;
2268 		return;
2269 	}
2270 
2271 	nm = utf8_to_fn(&args->newname, &len, NULL);
2272 	if (nm == NULL) {
2273 		*cs->statusp = resp->status = NFS4ERR_INVAL;
2274 		return;
2275 	}
2276 
2277 	if (len > MAXNAMELEN) {
2278 		*cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
2279 		kmem_free(nm, len);
2280 		return;
2281 	}
2282 
2283 	if (rdonly4(cs->exi, cs->vp, req)) {
2284 		*cs->statusp = resp->status = NFS4ERR_ROFS;
2285 		kmem_free(nm, len);
2286 		return;
2287 	}
2288 
2289 	/* Get "before" change value */
2290 	bdva.va_mask = AT_CTIME|AT_SEQ;
2291 	error = VOP_GETATTR(dvp, &bdva, 0, cs->cr);
2292 	if (error) {
2293 		*cs->statusp = resp->status = puterrno4(error);
2294 		kmem_free(nm, len);
2295 		return;
2296 	}
2297 
2298 	NFS4_SET_FATTR4_CHANGE(resp->cinfo.before, bdva.va_ctime)
2299 
2300 	error = VOP_LINK(dvp, vp, nm, cs->cr);
2301 
2302 	kmem_free(nm, len);
2303 
2304 	/*
2305 	 * Get the initial "after" sequence number, if it fails, set to zero
2306 	 */
2307 	idva.va_mask = AT_SEQ;
2308 	if (VOP_GETATTR(dvp, &idva, 0, cs->cr))
2309 		idva.va_seq = 0;
2310 
2311 	/*
2312 	 * Force modified data and metadata out to stable storage.
2313 	 */
2314 	(void) VOP_FSYNC(vp, FNODSYNC, cs->cr);
2315 	(void) VOP_FSYNC(dvp, 0, cs->cr);
2316 
2317 	if (error) {
2318 		*cs->statusp = resp->status = puterrno4(error);
2319 		return;
2320 	}
2321 
2322 	/*
2323 	 * Get "after" change value, if it fails, simply return the
2324 	 * before value.
2325 	 */
2326 	adva.va_mask = AT_CTIME|AT_SEQ;
2327 	if (VOP_GETATTR(dvp, &adva, 0, cs->cr)) {
2328 		adva.va_ctime = bdva.va_ctime;
2329 		adva.va_seq = 0;
2330 	}
2331 
2332 	NFS4_SET_FATTR4_CHANGE(resp->cinfo.after, adva.va_ctime)
2333 
2334 	/*
2335 	 * The cinfo.atomic = TRUE only if we have
2336 	 * non-zero va_seq's, and it has incremented by exactly one
2337 	 * during the VOP_LINK and it didn't change during the VOP_FSYNC.
2338 	 */
2339 	if (bdva.va_seq && idva.va_seq && adva.va_seq &&
2340 			idva.va_seq == (bdva.va_seq + 1) &&
2341 			idva.va_seq == adva.va_seq)
2342 		resp->cinfo.atomic = TRUE;
2343 	else
2344 		resp->cinfo.atomic = FALSE;
2345 
2346 	*cs->statusp = resp->status = NFS4_OK;
2347 }
2348 
2349 /*
2350  * Used by rfs4_op_lookup and rfs4_op_lookupp to do the actual work.
2351  */
2352 
2353 /* ARGSUSED */
2354 static nfsstat4
2355 do_rfs4_op_lookup(char *nm, uint_t buflen, struct svc_req *req,
2356 	struct compound_state *cs)
2357 {
2358 	int error;
2359 	int different_export = 0;
2360 	vnode_t *vp, *tvp, *pre_tvp = NULL, *oldvp = NULL;
2361 	struct exportinfo *exi = NULL, *pre_exi = NULL;
2362 	nfsstat4 stat;
2363 	fid_t fid;
2364 	int attrdir, dotdot, walk;
2365 	bool_t is_newvp = FALSE;
2366 
2367 	if (cs->vp->v_flag & V_XATTRDIR) {
2368 		attrdir = 1;
2369 		ASSERT(get_fh4_flag(&cs->fh, FH4_ATTRDIR));
2370 	} else {
2371 		attrdir = 0;
2372 		ASSERT(! get_fh4_flag(&cs->fh, FH4_ATTRDIR));
2373 	}
2374 
2375 	dotdot = (nm[0] == '.' && nm[1] == '.' && nm[2] == '\0');
2376 
2377 	/*
2378 	 * If dotdotting, then need to check whether it's
2379 	 * above the root of a filesystem, or above an
2380 	 * export point.
2381 	 */
2382 	if (dotdot) {
2383 
2384 		/*
2385 		 * If dotdotting at the root of a filesystem, then
2386 		 * need to traverse back to the mounted-on filesystem
2387 		 * and do the dotdot lookup there.
2388 		 */
2389 		if (cs->vp->v_flag & VROOT) {
2390 
2391 			/*
2392 			 * If at the system root, then can
2393 			 * go up no further.
2394 			 */
2395 			if (VN_CMP(cs->vp, rootdir))
2396 				return (puterrno4(ENOENT));
2397 
2398 			/*
2399 			 * Traverse back to the mounted-on filesystem
2400 			 */
2401 			cs->vp = untraverse(cs->vp);
2402 
2403 			/*
2404 			 * Set the different_export flag so we remember
2405 			 * to pick up a new exportinfo entry for
2406 			 * this new filesystem.
2407 			 */
2408 			different_export = 1;
2409 		} else {
2410 
2411 			/*
2412 			 * If dotdotting above an export point then set
2413 			 * the different_export to get new export info.
2414 			 */
2415 			different_export = nfs_exported(cs->exi, cs->vp);
2416 		}
2417 	}
2418 
2419 	error = VOP_LOOKUP(cs->vp, nm, &vp, NULL, 0, NULL, cs->cr);
2420 	if (error)
2421 		return (puterrno4(error));
2422 
2423 	/*
2424 	 * If the vnode is in a pseudo filesystem, check whether it is visible.
2425 	 *
2426 	 * XXX if the vnode is a symlink and it is not visible in
2427 	 * a pseudo filesystem, return ENOENT (not following symlink).
2428 	 * V4 client can not mount such symlink. This is a regression
2429 	 * from V2/V3.
2430 	 *
2431 	 * In the same exported filesystem, if the security flavor used
2432 	 * is not an explicitly shared flavor, limit the view to the visible
2433 	 * list entries only. This is not a WRONGSEC case because it's already
2434 	 * checked via PUTROOTFH/PUTPUBFH or PUTFH.
2435 	 */
2436 	if (!different_export &&
2437 	    (PSEUDO(cs->exi) || ! is_exported_sec(cs->nfsflavor, cs->exi) ||
2438 	    cs->access & CS_ACCESS_LIMITED)) {
2439 		if (! nfs_visible(cs->exi, vp, &different_export)) {
2440 			VN_RELE(vp);
2441 			return (puterrno4(ENOENT));
2442 		}
2443 	}
2444 
2445 	/*
2446 	 * If it's a mountpoint, then traverse it.
2447 	 */
2448 	if (vn_ismntpt(vp)) {
2449 		pre_exi = cs->exi;	/* save pre-traversed exportinfo */
2450 		pre_tvp = vp;		/* save pre-traversed vnode	*/
2451 
2452 		/*
2453 		 * hold pre_tvp to counteract rele by traverse.  We will
2454 		 * need pre_tvp below if checkexport4 fails
2455 		 */
2456 		VN_HOLD(pre_tvp);
2457 		tvp = vp;
2458 		if ((error = traverse(&tvp)) != 0) {
2459 			VN_RELE(vp);
2460 			VN_RELE(pre_tvp);
2461 			return (puterrno4(error));
2462 		}
2463 		vp = tvp;
2464 		different_export = 1;
2465 	} else if (vp->v_vfsp != cs->vp->v_vfsp) {
2466 		/*
2467 		 * The vfsp comparison is to handle the case where
2468 		 * a LOFS mount is shared.  lo_lookup traverses mount points,
2469 		 * and NFS is unaware of local fs transistions because
2470 		 * v_vfsmountedhere isn't set.  For this special LOFS case,
2471 		 * the dir and the obj returned by lookup will have different
2472 		 * vfs ptrs.
2473 		 */
2474 		different_export = 1;
2475 	}
2476 
2477 	if (different_export) {
2478 
2479 		bzero(&fid, sizeof (fid));
2480 		fid.fid_len = MAXFIDSZ;
2481 		error = vop_fid_pseudo(vp, &fid);
2482 		if (error) {
2483 			VN_RELE(vp);
2484 			if (pre_tvp)
2485 				VN_RELE(pre_tvp);
2486 			return (puterrno4(error));
2487 		}
2488 
2489 		if (dotdot)
2490 			exi = nfs_vptoexi(NULL, vp, cs->cr, &walk, NULL, TRUE);
2491 		else
2492 			exi = checkexport4(&vp->v_vfsp->vfs_fsid, &fid, vp);
2493 
2494 		if (exi == NULL) {
2495 			if (pre_tvp) {
2496 				/*
2497 				 * If this vnode is a mounted-on vnode,
2498 				 * but the mounted-on file system is not
2499 				 * exported, send back the filehandle for
2500 				 * the mounted-on vnode, not the root of
2501 				 * the mounted-on file system.
2502 				 */
2503 				VN_RELE(vp);
2504 				vp = pre_tvp;
2505 				exi = pre_exi;
2506 			} else {
2507 				VN_RELE(vp);
2508 				return (puterrno4(EACCES));
2509 			}
2510 		} else if (pre_tvp) {
2511 			/* we're done with pre_tvp now. release extra hold */
2512 			VN_RELE(pre_tvp);
2513 		}
2514 
2515 		cs->exi = exi;
2516 
2517 		/*
2518 		 * Now we do a checkauth4. The reason is that
2519 		 * this client/user may not have access to the new
2520 		 * exported file system, and if he does,
2521 		 * the client/user may be mapped to a different uid.
2522 		 *
2523 		 * We start with a new cr, because the checkauth4 done
2524 		 * in the PUT*FH operation over wrote the cred's uid,
2525 		 * gid, etc, and we want the real thing before calling
2526 		 * checkauth4()
2527 		 */
2528 		crfree(cs->cr);
2529 		cs->cr = crdup(cs->basecr);
2530 
2531 		if (cs->vp)
2532 			oldvp = cs->vp;
2533 		cs->vp = vp;
2534 		is_newvp = TRUE;
2535 
2536 		stat = call_checkauth4(cs, req);
2537 		if (stat != NFS4_OK) {
2538 			VN_RELE(cs->vp);
2539 			cs->vp = oldvp;
2540 			return (stat);
2541 		}
2542 	}
2543 
2544 	error = makefh4(&cs->fh, vp, cs->exi);
2545 
2546 	if (error) {
2547 		if (is_newvp) {
2548 			VN_RELE(cs->vp);
2549 			cs->vp = oldvp;
2550 		} else
2551 			VN_RELE(vp);
2552 		return (puterrno4(error));
2553 	}
2554 
2555 	if (!is_newvp) {
2556 		if (cs->vp)
2557 			VN_RELE(cs->vp);
2558 		cs->vp = vp;
2559 	} else if (oldvp)
2560 		VN_RELE(oldvp);
2561 
2562 	/*
2563 	 * if did lookup on attrdir and didn't lookup .., set named
2564 	 * attr fh flag
2565 	 */
2566 	if (attrdir && ! dotdot)
2567 		set_fh4_flag(&cs->fh, FH4_NAMEDATTR);
2568 
2569 	/* Assume false for now, open proc will set this */
2570 	cs->mandlock = FALSE;
2571 
2572 	return (NFS4_OK);
2573 }
2574 
2575 /* ARGSUSED */
2576 static void
2577 rfs4_op_lookup(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2578 	struct compound_state *cs)
2579 {
2580 	LOOKUP4args *args = &argop->nfs_argop4_u.oplookup;
2581 	LOOKUP4res *resp = &resop->nfs_resop4_u.oplookup;
2582 	char *nm;
2583 	uint_t len;
2584 
2585 	if (cs->vp == NULL) {
2586 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2587 		return;
2588 	}
2589 
2590 	if (cs->vp->v_type == VLNK) {
2591 		*cs->statusp = resp->status = NFS4ERR_SYMLINK;
2592 		return;
2593 	}
2594 
2595 	if (cs->vp->v_type != VDIR) {
2596 		*cs->statusp = resp->status = NFS4ERR_NOTDIR;
2597 		return;
2598 	}
2599 
2600 	if (!utf8_dir_verify(&args->objname)) {
2601 		*cs->statusp = resp->status = NFS4ERR_INVAL;
2602 		return;
2603 	}
2604 
2605 	nm = utf8_to_str(&args->objname, &len, NULL);
2606 	if (nm == NULL) {
2607 		*cs->statusp = resp->status = NFS4ERR_INVAL;
2608 		return;
2609 	}
2610 
2611 	if (len > MAXNAMELEN) {
2612 		*cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
2613 		kmem_free(nm, len);
2614 		return;
2615 	}
2616 
2617 	*cs->statusp = resp->status = do_rfs4_op_lookup(nm, len, req, cs);
2618 
2619 	kmem_free(nm, len);
2620 }
2621 
2622 /* ARGSUSED */
2623 static void
2624 rfs4_op_lookupp(nfs_argop4 *args, nfs_resop4 *resop, struct svc_req *req,
2625 	struct compound_state *cs)
2626 {
2627 	LOOKUPP4res *resp = &resop->nfs_resop4_u.oplookupp;
2628 
2629 	if (cs->vp == NULL) {
2630 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2631 		return;
2632 	}
2633 
2634 	if (cs->vp->v_type != VDIR) {
2635 		*cs->statusp = resp->status = NFS4ERR_NOTDIR;
2636 		return;
2637 	}
2638 
2639 	*cs->statusp = resp->status = do_rfs4_op_lookup("..", 3, req, cs);
2640 
2641 	/*
2642 	 * From NFSV4 Specification, LOOKUPP should not check for
2643 	 * NFS4ERR_WRONGSEC. Retrun NFS4_OK instead.
2644 	 */
2645 	if (resp->status == NFS4ERR_WRONGSEC) {
2646 		*cs->statusp = resp->status = NFS4_OK;
2647 	}
2648 }
2649 
2650 
2651 /*ARGSUSED2*/
2652 static void
2653 rfs4_op_openattr(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2654 	struct compound_state *cs)
2655 {
2656 	OPENATTR4args	*args = &argop->nfs_argop4_u.opopenattr;
2657 	OPENATTR4res	*resp = &resop->nfs_resop4_u.opopenattr;
2658 	vnode_t		*avp = NULL;
2659 	int		lookup_flags = LOOKUP_XATTR, error;
2660 	int		exp_ro = 0;
2661 
2662 	if (cs->vp == NULL) {
2663 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2664 		return;
2665 	}
2666 
2667 	/*
2668 	 * Make a couple of checks made by copen()
2669 	 *
2670 	 * Check to make sure underlying fs supports xattrs.  This
2671 	 * is required because solaris filesystem implementations
2672 	 * (UFS/TMPFS) don't enforce the noxattr mount option
2673 	 * in VOP_LOOKUP(LOOKUP_XATTR).  If fs doesn't support this
2674 	 * pathconf cmd or if fs supports cmd but doesn't claim
2675 	 * support for xattr, return NOTSUPP.  It would be better
2676 	 * to use VOP_PATHCONF( _PC_XATTR_ENABLED) for this; however,
2677 	 * that cmd is not available to VOP_PATHCONF interface
2678 	 * (it's only implemented inside pathconf syscall)...
2679 	 *
2680 	 * Verify permission to put attributes on files (access
2681 	 * checks from copen).
2682 	 */
2683 
2684 	if ((cs->vp->v_vfsp->vfs_flag & VFS_XATTR) == 0) {
2685 		error = ENOTSUP;
2686 		goto error_out;
2687 	}
2688 
2689 	if ((VOP_ACCESS(cs->vp, VREAD, 0, cs->cr) != 0) &&
2690 	    (VOP_ACCESS(cs->vp, VWRITE, 0, cs->cr) != 0) &&
2691 	    (VOP_ACCESS(cs->vp, VEXEC, 0, cs->cr) != 0)) {
2692 		error = EACCES;
2693 		goto error_out;
2694 	}
2695 
2696 	/*
2697 	 * The CREATE_XATTR_DIR VOP flag cannot be specified if
2698 	 * the file system is exported read-only -- regardless of
2699 	 * createdir flag.  Otherwise the attrdir would be created
2700 	 * (assuming server fs isn't mounted readonly locally).  If
2701 	 * VOP_LOOKUP returns ENOENT in this case, the error will
2702 	 * be translated into EROFS.  ENOSYS is mapped to ENOTSUP
2703 	 * because specfs has no VOP_LOOKUP op, so the macro would
2704 	 * return ENOSYS.  EINVAL is returned by all (current)
2705 	 * Solaris file system implementations when any of their
2706 	 * restrictions are violated (xattr(dir) can't have xattrdir).
2707 	 * Returning NOTSUPP is more appropriate in this case
2708 	 * because the object will never be able to have an attrdir.
2709 	 */
2710 	if (args->createdir && ! (exp_ro = rdonly4(cs->exi, cs->vp, req)))
2711 		lookup_flags |= CREATE_XATTR_DIR;
2712 
2713 	error = VOP_LOOKUP(cs->vp, "", &avp, NULL, lookup_flags, NULL, cs->cr);
2714 
2715 	if (error) {
2716 		if (error == ENOENT && args->createdir && exp_ro)
2717 			error = EROFS;
2718 		else if (error == EINVAL || error == ENOSYS)
2719 			error = ENOTSUP;
2720 		goto error_out;
2721 	}
2722 
2723 	ASSERT(avp->v_flag & V_XATTRDIR);
2724 
2725 	error = makefh4(&cs->fh, avp, cs->exi);
2726 
2727 	if (error) {
2728 		VN_RELE(avp);
2729 		goto error_out;
2730 	}
2731 
2732 	VN_RELE(cs->vp);
2733 	cs->vp = avp;
2734 
2735 	/*
2736 	 * There is no requirement for an attrdir fh flag
2737 	 * because the attrdir has a vnode flag to distinguish
2738 	 * it from regular (non-xattr) directories.  The
2739 	 * FH4_ATTRDIR flag is set for future sanity checks.
2740 	 */
2741 	set_fh4_flag(&cs->fh, FH4_ATTRDIR);
2742 	*cs->statusp = resp->status = NFS4_OK;
2743 	return;
2744 
2745 error_out:
2746 
2747 	*cs->statusp = resp->status = puterrno4(error);
2748 }
2749 
2750 static int
2751 do_io(int direction, vnode_t *vp, struct uio *uio, int ioflag, cred_t *cred)
2752 {
2753 	int error;
2754 	int i;
2755 	clock_t delaytime;
2756 	caller_context_t ct;
2757 
2758 	delaytime = MSEC_TO_TICK_ROUNDUP(rfs4_lock_delay);
2759 
2760 	/*
2761 	 * Don't block on mandatory locks. If this routine returns
2762 	 * EAGAIN, the caller should return NFS4ERR_LOCKED.
2763 	 */
2764 	uio->uio_fmode = FNONBLOCK;
2765 
2766 	ct.cc_sysid = 0;
2767 	ct.cc_pid = 0;
2768 	ct.cc_caller_id = nfs4_srv_caller_id;
2769 
2770 	for (i = 0; i < rfs4_maxlock_tries; i++) {
2771 
2772 
2773 		if (direction == FREAD) {
2774 			(void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, &ct);
2775 			error = VOP_READ(vp, uio, ioflag, cred, &ct);
2776 			VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
2777 		} else {
2778 			(void) VOP_RWLOCK(vp, V_WRITELOCK_TRUE, &ct);
2779 			error = VOP_WRITE(vp, uio, ioflag, cred, &ct);
2780 			VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, &ct);
2781 		}
2782 
2783 		if (error != EAGAIN)
2784 			break;
2785 
2786 		if (i < rfs4_maxlock_tries - 1) {
2787 			delay(delaytime);
2788 			delaytime *= 2;
2789 		}
2790 	}
2791 
2792 	return (error);
2793 }
2794 
2795 /* ARGSUSED */
2796 static void
2797 rfs4_op_read(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2798 	struct compound_state *cs)
2799 {
2800 	READ4args *args = &argop->nfs_argop4_u.opread;
2801 	READ4res *resp = &resop->nfs_resop4_u.opread;
2802 	int error;
2803 	int verror;
2804 	vnode_t *vp;
2805 	struct vattr va;
2806 	struct iovec iov;
2807 	struct uio uio;
2808 	u_offset_t offset;
2809 	bool_t *deleg = &cs->deleg;
2810 	nfsstat4 stat;
2811 	int in_crit = 0;
2812 	mblk_t *mp;
2813 	int alloc_err = 0;
2814 
2815 	vp = cs->vp;
2816 	if (vp == NULL) {
2817 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2818 		return;
2819 	}
2820 	if (cs->access == CS_ACCESS_DENIED) {
2821 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
2822 		return;
2823 	}
2824 
2825 	/*
2826 	 * Enter the critical region before calling VOP_RWLOCK
2827 	 * to avoid a deadlock with write requests.
2828 	 */
2829 	if (nbl_need_check(vp)) {
2830 		nbl_start_crit(vp, RW_READER);
2831 		in_crit = 1;
2832 		if (nbl_conflict(vp, NBL_READ, args->offset, args->count, 0)) {
2833 			*cs->statusp = resp->status = NFS4ERR_LOCKED;
2834 			goto out;
2835 		}
2836 	}
2837 
2838 	if ((stat = rfs4_check_stateid(FREAD, vp, &args->stateid, FALSE,
2839 					deleg, TRUE)) != NFS4_OK) {
2840 		*cs->statusp = resp->status = stat;
2841 		goto out;
2842 	}
2843 
2844 	va.va_mask = AT_MODE|AT_SIZE|AT_UID;
2845 	verror = VOP_GETATTR(vp, &va, 0, cs->cr);
2846 
2847 	/*
2848 	 * If we can't get the attributes, then we can't do the
2849 	 * right access checking.  So, we'll fail the request.
2850 	 */
2851 	if (verror) {
2852 		*cs->statusp = resp->status = puterrno4(verror);
2853 		goto out;
2854 	}
2855 
2856 	if (vp->v_type != VREG) {
2857 		*cs->statusp = resp->status =
2858 			((vp->v_type == VDIR) ? NFS4ERR_ISDIR : NFS4ERR_INVAL);
2859 		goto out;
2860 	}
2861 
2862 	if (crgetuid(cs->cr) != va.va_uid &&
2863 	    (error = VOP_ACCESS(vp, VREAD, 0, cs->cr)) &&
2864 	    (error = VOP_ACCESS(vp, VEXEC, 0, cs->cr))) {
2865 		*cs->statusp = resp->status = puterrno4(error);
2866 		goto out;
2867 	}
2868 
2869 	if (MANDLOCK(vp, va.va_mode)) { /* XXX - V4 supports mand locking */
2870 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
2871 		goto out;
2872 	}
2873 
2874 	offset = args->offset;
2875 	if (offset >= va.va_size) {
2876 		*cs->statusp = resp->status = NFS4_OK;
2877 		resp->eof = TRUE;
2878 		resp->data_len = 0;
2879 		resp->data_val = NULL;
2880 		resp->mblk = NULL;
2881 		*cs->statusp = resp->status = NFS4_OK;
2882 		goto out;
2883 	}
2884 
2885 	if (args->count == 0) {
2886 		*cs->statusp = resp->status = NFS4_OK;
2887 		resp->eof = FALSE;
2888 		resp->data_len = 0;
2889 		resp->data_val = NULL;
2890 		resp->mblk = NULL;
2891 		goto out;
2892 	}
2893 
2894 	/*
2895 	 * Do not allocate memory more than maximum allowed
2896 	 * transfer size
2897 	 */
2898 	if (args->count > rfs4_tsize(req))
2899 		args->count = rfs4_tsize(req);
2900 
2901 	/*
2902 	 * mp will contain the data to be sent out in the read reply.
2903 	 * It will be freed after the reply has been sent.
2904 	 * Let's roundup the data to a BYTES_PER_XDR_UNIT multiple,
2905 	 * so that the call to xdrmblk_putmblk() never fails.
2906 	 * If the first alloc of the requested size fails, then
2907 	 * decrease the size to something more reasonable and wait
2908 	 * for the allocation to occur.
2909 	 */
2910 	mp = allocb(RNDUP(args->count), BPRI_MED);
2911 	if (mp == NULL) {
2912 		if (args->count > MAXBSIZE)
2913 			args->count = MAXBSIZE;
2914 		mp = allocb_wait(RNDUP(args->count), BPRI_MED,
2915 				STR_NOSIG, &alloc_err);
2916 	}
2917 	ASSERT(mp != NULL);
2918 	ASSERT(alloc_err == 0);
2919 
2920 	iov.iov_base = (caddr_t)mp->b_datap->db_base;
2921 	iov.iov_len = args->count;
2922 	uio.uio_iov = &iov;
2923 	uio.uio_iovcnt = 1;
2924 	uio.uio_segflg = UIO_SYSSPACE;
2925 	uio.uio_extflg = UIO_COPY_CACHED;
2926 	uio.uio_loffset = args->offset;
2927 	uio.uio_resid = args->count;
2928 
2929 	error = do_io(FREAD, vp, &uio, 0, cs->cr);
2930 
2931 	va.va_mask = AT_SIZE;
2932 	verror = VOP_GETATTR(vp, &va, 0, cs->cr);
2933 
2934 	if (error) {
2935 		freeb(mp);
2936 		*cs->statusp = resp->status = puterrno4(error);
2937 		goto out;
2938 	}
2939 
2940 	*cs->statusp = resp->status = NFS4_OK;
2941 
2942 	ASSERT(uio.uio_resid >= 0);
2943 	resp->data_len = args->count - uio.uio_resid;
2944 	resp->data_val = (char *)mp->b_datap->db_base;
2945 	resp->mblk = mp;
2946 
2947 	if (!verror && offset + resp->data_len == va.va_size)
2948 		resp->eof = TRUE;
2949 	else
2950 		resp->eof = FALSE;
2951 
2952 out:
2953 	if (in_crit)
2954 		nbl_end_crit(vp);
2955 }
2956 
2957 static void
2958 rfs4_op_read_free(nfs_resop4 *resop)
2959 {
2960 	READ4res *resp = &resop->nfs_resop4_u.opread;
2961 
2962 	if (resp->status == NFS4_OK && resp->mblk != NULL) {
2963 		freeb(resp->mblk);
2964 		resp->mblk = NULL;
2965 		resp->data_val = NULL;
2966 		resp->data_len = 0;
2967 	}
2968 }
2969 
2970 static void
2971 rfs4_op_readdir_free(nfs_resop4 *resop)
2972 {
2973 	READDIR4res *resp = &resop->nfs_resop4_u.opreaddir;
2974 
2975 	if (resp->status == NFS4_OK && resp->mblk != NULL) {
2976 		freeb(resp->mblk);
2977 		resp->mblk = NULL;
2978 		resp->data_len = 0;
2979 	}
2980 }
2981 
2982 
2983 /* ARGSUSED */
2984 static void
2985 rfs4_op_putpubfh(nfs_argop4 *args, nfs_resop4 *resop, struct svc_req *req,
2986 	struct compound_state *cs)
2987 {
2988 	PUTPUBFH4res *resp = &resop->nfs_resop4_u.opputpubfh;
2989 	int error;
2990 	vnode_t *vp;
2991 	struct exportinfo *exi, *sav_exi;
2992 	nfs_fh4_fmt_t *fh_fmtp;
2993 
2994 	if (cs->vp) {
2995 		VN_RELE(cs->vp);
2996 		cs->vp = NULL;
2997 	}
2998 
2999 	if (cs->cr)
3000 		crfree(cs->cr);
3001 
3002 	cs->cr = crdup(cs->basecr);
3003 
3004 	vp = exi_public->exi_vp;
3005 	if (vp == NULL) {
3006 		*cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
3007 		return;
3008 	}
3009 
3010 	error = makefh4(&cs->fh, vp, exi_public);
3011 	if (error != 0) {
3012 		*cs->statusp = resp->status = puterrno4(error);
3013 		return;
3014 	}
3015 	sav_exi = cs->exi;
3016 	if (exi_public == exi_root) {
3017 		/*
3018 		 * No filesystem is actually shared public, so we default
3019 		 * to exi_root. In this case, we must check whether root
3020 		 * is exported.
3021 		 */
3022 		fh_fmtp = (nfs_fh4_fmt_t *)cs->fh.nfs_fh4_val;
3023 
3024 		/*
3025 		 * if root filesystem is exported, the exportinfo struct that we
3026 		 * should use is what checkexport4 returns, because root_exi is
3027 		 * actually a mostly empty struct.
3028 		 */
3029 		exi = checkexport4(&fh_fmtp->fh4_fsid,
3030 			(fid_t *)&fh_fmtp->fh4_xlen, NULL);
3031 		cs->exi = ((exi != NULL) ? exi : exi_public);
3032 	} else {
3033 		/*
3034 		 * it's a properly shared filesystem
3035 		 */
3036 		cs->exi = exi_public;
3037 	}
3038 
3039 	VN_HOLD(vp);
3040 	cs->vp = vp;
3041 
3042 	if ((resp->status = call_checkauth4(cs, req)) != NFS4_OK) {
3043 		VN_RELE(cs->vp);
3044 		cs->vp = NULL;
3045 		cs->exi = sav_exi;
3046 		return;
3047 	}
3048 
3049 	*cs->statusp = resp->status = NFS4_OK;
3050 }
3051 
3052 /*
3053  * XXX - issue with put*fh operations. Suppose /export/home is exported.
3054  * Suppose an NFS client goes to mount /export/home/joe. If /export, home,
3055  * or joe have restrictive search permissions, then we shouldn't let
3056  * the client get a file handle. This is easy to enforce. However, we
3057  * don't know what security flavor should be used until we resolve the
3058  * path name. Another complication is uid mapping. If root is
3059  * the user, then it will be mapped to the anonymous user by default,
3060  * but we won't know that till we've resolved the path name. And we won't
3061  * know what the anonymous user is.
3062  * Luckily, SECINFO is specified to take a full filename.
3063  * So what we will have to in rfs4_op_lookup is check that flavor of
3064  * the target object matches that of the request, and if root was the
3065  * caller, check for the root= and anon= options, and if necessary,
3066  * repeat the lookup using the right cred_t. But that's not done yet.
3067  */
3068 /* ARGSUSED */
3069 static void
3070 rfs4_op_putfh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3071 	struct compound_state *cs)
3072 {
3073 	PUTFH4args *args = &argop->nfs_argop4_u.opputfh;
3074 	PUTFH4res *resp = &resop->nfs_resop4_u.opputfh;
3075 	nfs_fh4_fmt_t *fh_fmtp;
3076 
3077 	if (cs->vp) {
3078 		VN_RELE(cs->vp);
3079 		cs->vp = NULL;
3080 	}
3081 
3082 	if (cs->cr) {
3083 		crfree(cs->cr);
3084 		cs->cr = NULL;
3085 	}
3086 
3087 
3088 	if (args->object.nfs_fh4_len < NFS_FH4_LEN) {
3089 		*cs->statusp = resp->status = NFS4ERR_BADHANDLE;
3090 		return;
3091 	}
3092 
3093 	fh_fmtp = (nfs_fh4_fmt_t *)args->object.nfs_fh4_val;
3094 	cs->exi = checkexport4(&fh_fmtp->fh4_fsid, (fid_t *)&fh_fmtp->fh4_xlen,
3095 				NULL);
3096 
3097 	if (cs->exi == NULL) {
3098 		*cs->statusp = resp->status = NFS4ERR_STALE;
3099 		return;
3100 	}
3101 
3102 	cs->cr = crdup(cs->basecr);
3103 
3104 	ASSERT(cs->cr != NULL);
3105 
3106 	if (! (cs->vp = nfs4_fhtovp(&args->object, cs->exi, &resp->status))) {
3107 		*cs->statusp = resp->status;
3108 		return;
3109 	}
3110 
3111 	if ((resp->status = call_checkauth4(cs, req)) != NFS4_OK) {
3112 		VN_RELE(cs->vp);
3113 		cs->vp = NULL;
3114 		return;
3115 	}
3116 
3117 	nfs_fh4_copy(&args->object, &cs->fh);
3118 	*cs->statusp = resp->status = NFS4_OK;
3119 	cs->deleg = FALSE;
3120 }
3121 
3122 /* ARGSUSED */
3123 static void
3124 rfs4_op_putrootfh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3125 	struct compound_state *cs)
3126 
3127 {
3128 	PUTROOTFH4res *resp = &resop->nfs_resop4_u.opputrootfh;
3129 	int error;
3130 	fid_t fid;
3131 	struct exportinfo *exi, *sav_exi;
3132 
3133 	if (cs->vp) {
3134 		VN_RELE(cs->vp);
3135 		cs->vp = NULL;
3136 	}
3137 
3138 	if (cs->cr)
3139 		crfree(cs->cr);
3140 
3141 	cs->cr = crdup(cs->basecr);
3142 
3143 	/*
3144 	 * Using rootdir, the system root vnode,
3145 	 * get its fid.
3146 	 */
3147 	bzero(&fid, sizeof (fid));
3148 	fid.fid_len = MAXFIDSZ;
3149 	error = vop_fid_pseudo(rootdir, &fid);
3150 	if (error != 0) {
3151 		*cs->statusp = resp->status = puterrno4(error);
3152 		return;
3153 	}
3154 
3155 	/*
3156 	 * Then use the root fsid & fid it to find out if it's exported
3157 	 *
3158 	 * If the server root isn't exported directly, then
3159 	 * it should at least be a pseudo export based on
3160 	 * one or more exports further down in the server's
3161 	 * file tree.
3162 	 */
3163 	exi = checkexport4(&rootdir->v_vfsp->vfs_fsid, &fid, NULL);
3164 	if (exi == NULL || exi->exi_export.ex_flags & EX_PUBLIC) {
3165 		NFS4_DEBUG(rfs4_debug,
3166 			(CE_WARN, "rfs4_op_putrootfh: export check failure"));
3167 		*cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
3168 		return;
3169 	}
3170 
3171 	/*
3172 	 * Now make a filehandle based on the root
3173 	 * export and root vnode.
3174 	 */
3175 	error = makefh4(&cs->fh, rootdir, exi);
3176 	if (error != 0) {
3177 		*cs->statusp = resp->status = puterrno4(error);
3178 		return;
3179 	}
3180 
3181 	sav_exi = cs->exi;
3182 	cs->exi = exi;
3183 
3184 	VN_HOLD(rootdir);
3185 	cs->vp = rootdir;
3186 
3187 	if ((resp->status = call_checkauth4(cs, req)) != NFS4_OK) {
3188 		VN_RELE(rootdir);
3189 		cs->vp = NULL;
3190 		cs->exi = sav_exi;
3191 		return;
3192 	}
3193 
3194 	*cs->statusp = resp->status = NFS4_OK;
3195 	cs->deleg = FALSE;
3196 }
3197 
3198 /*
3199  * A directory entry is a valid nfsv4 entry if
3200  * - it has a non-zero ino
3201  * - it is not a dot or dotdot name
3202  * - it is visible in a pseudo export or in a real export that can
3203  *   only have a limited view.
3204  */
3205 static bool_t
3206 valid_nfs4_entry(struct exportinfo *exi, struct dirent64 *dp,
3207 		int *expseudo, int check_visible)
3208 {
3209 	if (dp->d_ino == 0 || NFS_IS_DOTNAME(dp->d_name)) {
3210 		*expseudo = 0;
3211 		return (FALSE);
3212 	}
3213 
3214 	if (! check_visible) {
3215 		*expseudo = 0;
3216 		return (TRUE);
3217 	}
3218 
3219 	return (nfs_visible_inode(exi, dp->d_ino, expseudo));
3220 }
3221 
3222 /*
3223  * set_rdattr_params sets up the variables used to manage what information
3224  * to get for each directory entry.
3225  */
3226 static nfsstat4
3227 set_rdattr_params(struct nfs4_svgetit_arg *sargp,
3228 		bitmap4 attrs, bool_t *need_to_lookup)
3229 {
3230 	uint_t	va_mask;
3231 	nfsstat4 status;
3232 	bitmap4 objbits;
3233 
3234 	status = bitmap4_to_attrmask(attrs, sargp);
3235 	if (status != NFS4_OK) {
3236 		/*
3237 		 * could not even figure attr mask
3238 		 */
3239 		return (status);
3240 	}
3241 	va_mask = sargp->vap->va_mask;
3242 
3243 	/*
3244 	 * dirent's d_ino is always correct value for mounted_on_fileid.
3245 	 * mntdfid_set is set once here, but mounted_on_fileid is
3246 	 * set in main dirent processing loop for each dirent.
3247 	 * The mntdfid_set is a simple optimization that lets the
3248 	 * server attr code avoid work when caller is readdir.
3249 	 */
3250 	sargp->mntdfid_set = TRUE;
3251 
3252 	/*
3253 	 * Lookup entry only if client asked for any of the following:
3254 	 * a) vattr attrs
3255 	 * b) vfs attrs
3256 	 * c) attrs w/per-object scope requested (change, filehandle, etc)
3257 	 *    other than mounted_on_fileid (which we can take from dirent)
3258 	 */
3259 	objbits = attrs ? attrs & NFS4_VP_ATTR_MASK : 0;
3260 
3261 	if (va_mask || sargp->sbp || (objbits & ~FATTR4_MOUNTED_ON_FILEID_MASK))
3262 		*need_to_lookup = TRUE;
3263 	else
3264 		*need_to_lookup = FALSE;
3265 
3266 	if (sargp->sbp == NULL)
3267 		return (NFS4_OK);
3268 
3269 	/*
3270 	 * If filesystem attrs are requested, get them now from the
3271 	 * directory vp, as most entries will have same filesystem. The only
3272 	 * exception are mounted over entries but we handle
3273 	 * those as we go (XXX mounted over detection not yet implemented).
3274 	 */
3275 	sargp->vap->va_mask = 0;	/* to avoid VOP_GETATTR */
3276 	status = bitmap4_get_sysattrs(sargp);
3277 	sargp->vap->va_mask = va_mask;
3278 
3279 	if ((status != NFS4_OK) && sargp->rdattr_error_req) {
3280 		/*
3281 		 * Failed to get filesystem attributes.
3282 		 * Return a rdattr_error for each entry, but don't fail.
3283 		 * However, don't get any obj-dependent attrs.
3284 		 */
3285 		sargp->rdattr_error = status;	/* for rdattr_error */
3286 		*need_to_lookup = FALSE;
3287 		/*
3288 		 * At least get fileid for regular readdir output
3289 		 */
3290 		sargp->vap->va_mask &= AT_NODEID;
3291 		status = NFS4_OK;
3292 	}
3293 
3294 	return (status);
3295 }
3296 
3297 /*
3298  * readlink: args: CURRENT_FH.
3299  *	res: status. If success - CURRENT_FH unchanged, return linktext.
3300  */
3301 
3302 /* ARGSUSED */
3303 static void
3304 rfs4_op_readlink(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3305 	struct compound_state *cs)
3306 {
3307 	READLINK4res *resp = &resop->nfs_resop4_u.opreadlink;
3308 	int error;
3309 	vnode_t *vp;
3310 	struct iovec iov;
3311 	struct vattr va;
3312 	struct uio uio;
3313 	char *data;
3314 
3315 	/* CURRENT_FH: directory */
3316 	vp = cs->vp;
3317 	if (vp == NULL) {
3318 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
3319 		return;
3320 	}
3321 
3322 	if (cs->access == CS_ACCESS_DENIED) {
3323 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
3324 		return;
3325 	}
3326 
3327 	if (vp->v_type == VDIR) {
3328 		*cs->statusp = resp->status = NFS4ERR_ISDIR;
3329 		return;
3330 	}
3331 
3332 	if (vp->v_type != VLNK) {
3333 		*cs->statusp = resp->status = NFS4ERR_INVAL;
3334 		return;
3335 	}
3336 
3337 	va.va_mask = AT_MODE;
3338 	error = VOP_GETATTR(vp, &va, 0, cs->cr);
3339 	if (error) {
3340 		*cs->statusp = resp->status = puterrno4(error);
3341 		return;
3342 	}
3343 
3344 	if (MANDLOCK(vp, va.va_mode)) {
3345 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
3346 		return;
3347 	}
3348 
3349 	data = kmem_alloc(MAXPATHLEN + 1, KM_SLEEP);
3350 
3351 	iov.iov_base = data;
3352 	iov.iov_len = MAXPATHLEN;
3353 	uio.uio_iov = &iov;
3354 	uio.uio_iovcnt = 1;
3355 	uio.uio_segflg = UIO_SYSSPACE;
3356 	uio.uio_extflg = UIO_COPY_CACHED;
3357 	uio.uio_loffset = 0;
3358 	uio.uio_resid = MAXPATHLEN;
3359 
3360 	error = VOP_READLINK(vp, &uio, cs->cr);
3361 
3362 	if (error) {
3363 		kmem_free((caddr_t)data, (uint_t)MAXPATHLEN + 1);
3364 		*cs->statusp = resp->status = puterrno4(error);
3365 		return;
3366 	}
3367 
3368 	*(data + MAXPATHLEN - uio.uio_resid) = '\0';
3369 
3370 	/*
3371 	 * treat link name as data
3372 	 */
3373 	(void) str_to_utf8(data, &resp->link);
3374 
3375 	kmem_free((caddr_t)data, (uint_t)MAXPATHLEN + 1);
3376 	*cs->statusp = resp->status = NFS4_OK;
3377 }
3378 
3379 static void
3380 rfs4_op_readlink_free(nfs_resop4 *resop)
3381 {
3382 	READLINK4res *resp = &resop->nfs_resop4_u.opreadlink;
3383 	utf8string *symlink = &resp->link;
3384 
3385 	if (symlink->utf8string_val) {
3386 		UTF8STRING_FREE(*symlink)
3387 	}
3388 }
3389 
3390 /*
3391  * release_lockowner:
3392  *	Release any state associated with the supplied
3393  *	lockowner. Note if any lo_state is holding locks we will not
3394  *	rele that lo_state and thus the lockowner will not be destroyed.
3395  *	A client using lock after the lock owner stateid has been released
3396  *	will suffer the consequence of NFS4ERR_BAD_STATEID and would have
3397  *	to reissue the lock with new_lock_owner set to TRUE.
3398  *	args: lock_owner
3399  *	res:  status
3400  */
3401 /* ARGSUSED */
3402 static void
3403 rfs4_op_release_lockowner(nfs_argop4 *argop, nfs_resop4 *resop,
3404 	struct svc_req *req, struct compound_state *cs)
3405 {
3406 	RELEASE_LOCKOWNER4args *ap = &argop->nfs_argop4_u.oprelease_lockowner;
3407 	RELEASE_LOCKOWNER4res *resp = &resop->nfs_resop4_u.oprelease_lockowner;
3408 	rfs4_lockowner_t *lo;
3409 	rfs4_openowner_t *oop;
3410 	rfs4_state_t *sp;
3411 	rfs4_lo_state_t *lsp;
3412 	rfs4_client_t *cp;
3413 	bool_t create = FALSE;
3414 	locklist_t *llist;
3415 	sysid_t sysid;
3416 
3417 	/* Make sure there is a clientid around for this request */
3418 	cp = rfs4_findclient_by_id(ap->lock_owner.clientid, FALSE);
3419 
3420 	if (cp == NULL) {
3421 		*cs->statusp = resp->status =
3422 			rfs4_check_clientid(&ap->lock_owner.clientid, 0);
3423 		return;
3424 	}
3425 	rfs4_client_rele(cp);
3426 
3427 	lo = rfs4_findlockowner(&ap->lock_owner, &create);
3428 	if (lo == NULL) {
3429 		*cs->statusp = resp->status = NFS4_OK;
3430 		return;
3431 	}
3432 	ASSERT(lo->client != NULL);
3433 
3434 	/*
3435 	 * Check for EXPIRED client. If so will reap state with in a lease
3436 	 * period or on next set_clientid_confirm step
3437 	 */
3438 	if (rfs4_lease_expired(lo->client)) {
3439 		rfs4_lockowner_rele(lo);
3440 		*cs->statusp = resp->status = NFS4ERR_EXPIRED;
3441 		return;
3442 	}
3443 
3444 	/*
3445 	 * If no sysid has been assigned, then no locks exist; just return.
3446 	 */
3447 	rfs4_dbe_lock(lo->client->dbe);
3448 	if (lo->client->sysidt == LM_NOSYSID) {
3449 		rfs4_lockowner_rele(lo);
3450 		rfs4_dbe_unlock(lo->client->dbe);
3451 		return;
3452 	}
3453 
3454 	sysid = lo->client->sysidt;
3455 	rfs4_dbe_unlock(lo->client->dbe);
3456 
3457 	/*
3458 	 * Mark the lockowner invalid.
3459 	 */
3460 	rfs4_dbe_hide(lo->dbe);
3461 
3462 	/*
3463 	 * sysid-pid pair should now not be used since the lockowner is
3464 	 * invalid. If the client were to instantiate the lockowner again
3465 	 * it would be assigned a new pid. Thus we can get the list of
3466 	 * current locks.
3467 	 */
3468 
3469 	llist = flk_get_active_locks(sysid, lo->pid);
3470 	/* If we are still holding locks fail */
3471 	if (llist != NULL) {
3472 
3473 		*cs->statusp = resp->status = NFS4ERR_LOCKS_HELD;
3474 
3475 		flk_free_locklist(llist);
3476 		/*
3477 		 * We need to unhide the lockowner so the client can
3478 		 * try it again. The bad thing here is if the client
3479 		 * has a logic error that took it here in the first place
3480 		 * he probably has lost accounting of the locks that it
3481 		 * is holding. So we may have dangling state until the
3482 		 * open owner state is reaped via close. One scenario
3483 		 * that could possibly occur is that the client has
3484 		 * sent the unlock request(s) in separate threads
3485 		 * and has not waited for the replies before sending the
3486 		 * RELEASE_LOCKOWNER request. Presumably, it would expect
3487 		 * and deal appropriately with NFS4ERR_LOCKS_HELD, by
3488 		 * reissuing the request.
3489 		 */
3490 		rfs4_dbe_unhide(lo->dbe);
3491 		rfs4_lockowner_rele(lo);
3492 		return;
3493 	}
3494 
3495 	/*
3496 	 * For the corresponding client we need to check each open
3497 	 * owner for any opens that have lockowner state associated
3498 	 * with this lockowner.
3499 	 */
3500 
3501 	rfs4_dbe_lock(lo->client->dbe);
3502 	for (oop = lo->client->openownerlist.next->oop; oop != NULL;
3503 	    oop = oop->openownerlist.next->oop) {
3504 
3505 		rfs4_dbe_lock(oop->dbe);
3506 		for (sp = oop->ownerstateids.next->sp; sp != NULL;
3507 		    sp = sp->ownerstateids.next->sp) {
3508 
3509 			rfs4_dbe_lock(sp->dbe);
3510 			for (lsp = sp->lockownerlist.next->lsp;
3511 			    lsp != NULL; lsp = lsp->lockownerlist.next->lsp) {
3512 				if (lsp->locker == lo) {
3513 					rfs4_dbe_lock(lsp->dbe);
3514 					rfs4_dbe_invalidate(lsp->dbe);
3515 					rfs4_dbe_unlock(lsp->dbe);
3516 				}
3517 			}
3518 			rfs4_dbe_unlock(sp->dbe);
3519 		}
3520 		rfs4_dbe_unlock(oop->dbe);
3521 	}
3522 	rfs4_dbe_unlock(lo->client->dbe);
3523 
3524 	rfs4_lockowner_rele(lo);
3525 
3526 	*cs->statusp = resp->status = NFS4_OK;
3527 }
3528 
3529 /*
3530  * short utility function to lookup a file and recall the delegation
3531  */
3532 static rfs4_file_t *
3533 rfs4_lookup_and_findfile(vnode_t *dvp, char *nm, vnode_t **vpp,
3534 	int *lkup_error, cred_t *cr)
3535 {
3536 	vnode_t *vp;
3537 	rfs4_file_t *fp = NULL;
3538 	bool_t fcreate = FALSE;
3539 	int error;
3540 
3541 	if (vpp)
3542 		*vpp = NULL;
3543 
3544 	if ((error = VOP_LOOKUP(dvp, nm, &vp, NULL, 0, NULL, cr)) == 0) {
3545 		if (vp->v_type == VREG)
3546 			fp = rfs4_findfile(vp, NULL, &fcreate);
3547 		if (vpp)
3548 			*vpp = vp;
3549 		else
3550 			VN_RELE(vp);
3551 	}
3552 
3553 	if (lkup_error)
3554 		*lkup_error = error;
3555 
3556 	return (fp);
3557 }
3558 
3559 /*
3560  * remove: args: CURRENT_FH: directory; name.
3561  *	res: status. If success - CURRENT_FH unchanged, return change_info
3562  *		for directory.
3563  */
3564 /* ARGSUSED */
3565 static void
3566 rfs4_op_remove(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3567 	struct compound_state *cs)
3568 {
3569 	REMOVE4args *args = &argop->nfs_argop4_u.opremove;
3570 	REMOVE4res *resp = &resop->nfs_resop4_u.opremove;
3571 	int error;
3572 	vnode_t *dvp, *vp;
3573 	struct vattr bdva, idva, adva;
3574 	char *nm;
3575 	uint_t len;
3576 	rfs4_file_t *fp;
3577 	int in_crit = 0;
3578 
3579 	/* CURRENT_FH: directory */
3580 	dvp = cs->vp;
3581 	if (dvp == NULL) {
3582 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
3583 		return;
3584 	}
3585 
3586 	if (cs->access == CS_ACCESS_DENIED) {
3587 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
3588 		return;
3589 	}
3590 
3591 	/*
3592 	 * If there is an unshared filesystem mounted on this vnode,
3593 	 * Do not allow to remove anything in this directory.
3594 	 */
3595 	if (vn_ismntpt(dvp)) {
3596 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
3597 		return;
3598 	}
3599 
3600 	if (dvp->v_type != VDIR) {
3601 		*cs->statusp = resp->status = NFS4ERR_NOTDIR;
3602 		return;
3603 	}
3604 
3605 	if (!utf8_dir_verify(&args->target)) {
3606 		*cs->statusp = resp->status = NFS4ERR_INVAL;
3607 		return;
3608 	}
3609 
3610 	/*
3611 	 * Lookup the file so that we can check if it's a directory
3612 	 */
3613 	nm = utf8_to_fn(&args->target, &len, NULL);
3614 	if (nm == NULL) {
3615 		*cs->statusp = resp->status = NFS4ERR_INVAL;
3616 		return;
3617 	}
3618 
3619 	if (len > MAXNAMELEN) {
3620 		*cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
3621 		kmem_free(nm, len);
3622 		return;
3623 	}
3624 
3625 	if (rdonly4(cs->exi, cs->vp, req)) {
3626 		*cs->statusp = resp->status = NFS4ERR_ROFS;
3627 		kmem_free(nm, len);
3628 		return;
3629 	}
3630 
3631 	/*
3632 	 * Lookup the file to determine type and while we are see if
3633 	 * there is a file struct around and check for delegation.
3634 	 * We don't need to acquire va_seq before this lookup, if
3635 	 * it causes an update, cinfo.before will not match, which will
3636 	 * trigger a cache flush even if atomic is TRUE.
3637 	 */
3638 	if (fp = rfs4_lookup_and_findfile(dvp, nm, &vp, &error, cs->cr)) {
3639 		if (rfs4_check_delegated_byfp(FWRITE, fp, TRUE, TRUE, TRUE,
3640 						NULL)) {
3641 			VN_RELE(vp);
3642 			rfs4_file_rele(fp);
3643 			*cs->statusp = resp->status = NFS4ERR_DELAY;
3644 			kmem_free(nm, len);
3645 			return;
3646 		}
3647 	}
3648 
3649 	/* Didn't find anything to remove */
3650 	if (vp == NULL) {
3651 		*cs->statusp = resp->status = error;
3652 		kmem_free(nm, len);
3653 		return;
3654 	}
3655 
3656 	if (nbl_need_check(vp)) {
3657 		nbl_start_crit(vp, RW_READER);
3658 		in_crit = 1;
3659 		if (nbl_conflict(vp, NBL_REMOVE, 0, 0, 0)) {
3660 			*cs->statusp = resp->status = NFS4ERR_FILE_OPEN;
3661 			kmem_free(nm, len);
3662 			nbl_end_crit(vp);
3663 			VN_RELE(vp);
3664 			if (fp) {
3665 				rfs4_clear_dont_grant(fp);
3666 				rfs4_file_rele(fp);
3667 			}
3668 			return;
3669 		}
3670 	}
3671 
3672 	/* Get dir "before" change value */
3673 	bdva.va_mask = AT_CTIME|AT_SEQ;
3674 	error = VOP_GETATTR(dvp, &bdva, 0, cs->cr);
3675 	if (error) {
3676 		*cs->statusp = resp->status = puterrno4(error);
3677 		kmem_free(nm, len);
3678 		return;
3679 	}
3680 	NFS4_SET_FATTR4_CHANGE(resp->cinfo.before, bdva.va_ctime)
3681 
3682 	/* Actually do the REMOVE operation */
3683 	if (vp->v_type == VDIR) {
3684 		/*
3685 		 * Can't remove a directory that has a mounted-on filesystem.
3686 		 */
3687 		if (vn_ismntpt(vp)) {
3688 			error = EACCES;
3689 		} else {
3690 			/*
3691 			 * System V defines rmdir to return EEXIST,
3692 			 * not * ENOTEMPTY, if the directory is not
3693 			 * empty.  A System V NFS server needs to map
3694 			 * NFS4ERR_EXIST to NFS4ERR_NOTEMPTY to
3695 			 * transmit over the wire.
3696 			 */
3697 			if ((error = VOP_RMDIR(dvp, nm, rootdir, cs->cr))
3698 				== EEXIST)
3699 				error = ENOTEMPTY;
3700 		}
3701 	} else {
3702 		if ((error = VOP_REMOVE(dvp, nm, cs->cr)) == 0 &&
3703 			fp != NULL) {
3704 			struct vattr va;
3705 			vnode_t *tvp;
3706 
3707 			rfs4_dbe_lock(fp->dbe);
3708 			tvp = fp->vp;
3709 			if (tvp)
3710 				VN_HOLD(tvp);
3711 			rfs4_dbe_unlock(fp->dbe);
3712 
3713 			if (tvp) {
3714 				/*
3715 				 * This is va_seq safe because we are not
3716 				 * manipulating dvp.
3717 				 */
3718 				va.va_mask = AT_NLINK;
3719 				if (!VOP_GETATTR(tvp, &va, 0, cs->cr) &&
3720 					va.va_nlink == 0) {
3721 					/* Remove state on file remove */
3722 					if (in_crit) {
3723 						nbl_end_crit(vp);
3724 						in_crit = 0;
3725 					}
3726 					rfs4_close_all_state(fp);
3727 				}
3728 				VN_RELE(tvp);
3729 			}
3730 		}
3731 	}
3732 
3733 	if (in_crit)
3734 		nbl_end_crit(vp);
3735 	VN_RELE(vp);
3736 
3737 	if (fp) {
3738 		rfs4_clear_dont_grant(fp);
3739 		rfs4_file_rele(fp);
3740 	}
3741 	kmem_free(nm, len);
3742 
3743 	if (error) {
3744 		*cs->statusp = resp->status = puterrno4(error);
3745 		return;
3746 	}
3747 
3748 	/*
3749 	 * Get the initial "after" sequence number, if it fails, set to zero
3750 	 */
3751 	idva.va_mask = AT_SEQ;
3752 	if (VOP_GETATTR(dvp, &idva, 0, cs->cr))
3753 		idva.va_seq = 0;
3754 
3755 	/*
3756 	 * Force modified data and metadata out to stable storage.
3757 	 */
3758 	(void) VOP_FSYNC(dvp, 0, cs->cr);
3759 
3760 	/*
3761 	 * Get "after" change value, if it fails, simply return the
3762 	 * before value.
3763 	 */
3764 	adva.va_mask = AT_CTIME|AT_SEQ;
3765 	if (VOP_GETATTR(dvp, &adva, 0, cs->cr)) {
3766 		adva.va_ctime = bdva.va_ctime;
3767 		adva.va_seq = 0;
3768 	}
3769 
3770 	NFS4_SET_FATTR4_CHANGE(resp->cinfo.after, adva.va_ctime)
3771 
3772 	/*
3773 	 * The cinfo.atomic = TRUE only if we have
3774 	 * non-zero va_seq's, and it has incremented by exactly one
3775 	 * during the VOP_REMOVE/RMDIR and it didn't change during
3776 	 * the VOP_FSYNC.
3777 	 */
3778 	if (bdva.va_seq && idva.va_seq && adva.va_seq &&
3779 			idva.va_seq == (bdva.va_seq + 1) &&
3780 			idva.va_seq == adva.va_seq)
3781 		resp->cinfo.atomic = TRUE;
3782 	else
3783 		resp->cinfo.atomic = FALSE;
3784 
3785 	*cs->statusp = resp->status = NFS4_OK;
3786 }
3787 
3788 /*
3789  * rename: args: SAVED_FH: from directory, CURRENT_FH: target directory,
3790  *		oldname and newname.
3791  *	res: status. If success - CURRENT_FH unchanged, return change_info
3792  *		for both from and target directories.
3793  */
3794 /* ARGSUSED */
3795 static void
3796 rfs4_op_rename(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3797 	struct compound_state *cs)
3798 {
3799 	RENAME4args *args = &argop->nfs_argop4_u.oprename;
3800 	RENAME4res *resp = &resop->nfs_resop4_u.oprename;
3801 	int error;
3802 	vnode_t *odvp;
3803 	vnode_t *ndvp;
3804 	vnode_t *srcvp, *targvp;
3805 	struct vattr obdva, oidva, oadva;
3806 	struct vattr nbdva, nidva, nadva;
3807 	char *onm, *nnm;
3808 	uint_t olen, nlen;
3809 	rfs4_file_t *fp, *sfp;
3810 	int in_crit_src, in_crit_targ;
3811 	int fp_rele_grant_hold, sfp_rele_grant_hold;
3812 
3813 	fp = sfp = NULL;
3814 	srcvp = targvp = NULL;
3815 	in_crit_src = in_crit_targ = 0;
3816 	fp_rele_grant_hold = sfp_rele_grant_hold = 0;
3817 
3818 	/* CURRENT_FH: target directory */
3819 	ndvp = cs->vp;
3820 	if (ndvp == NULL) {
3821 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
3822 		return;
3823 	}
3824 
3825 	/* SAVED_FH: from directory */
3826 	odvp = cs->saved_vp;
3827 	if (odvp == NULL) {
3828 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
3829 		return;
3830 	}
3831 
3832 	if (cs->access == CS_ACCESS_DENIED) {
3833 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
3834 		return;
3835 	}
3836 
3837 	/*
3838 	 * If there is an unshared filesystem mounted on this vnode,
3839 	 * do not allow to rename objects in this directory.
3840 	 */
3841 	if (vn_ismntpt(odvp)) {
3842 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
3843 		return;
3844 	}
3845 
3846 	/*
3847 	 * If there is an unshared filesystem mounted on this vnode,
3848 	 * do not allow to rename to this directory.
3849 	 */
3850 	if (vn_ismntpt(ndvp)) {
3851 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
3852 		return;
3853 	}
3854 
3855 	if (odvp->v_type != VDIR || ndvp->v_type != VDIR) {
3856 		*cs->statusp = resp->status = NFS4ERR_NOTDIR;
3857 		return;
3858 	}
3859 
3860 	if (cs->saved_exi != cs->exi) {
3861 		*cs->statusp = resp->status = NFS4ERR_XDEV;
3862 		return;
3863 	}
3864 
3865 	if (!utf8_dir_verify(&args->oldname)) {
3866 		*cs->statusp = resp->status = NFS4ERR_INVAL;
3867 		return;
3868 	}
3869 
3870 	if (!utf8_dir_verify(&args->newname)) {
3871 		*cs->statusp = resp->status = NFS4ERR_INVAL;
3872 		return;
3873 	}
3874 
3875 	onm = utf8_to_fn(&args->oldname, &olen, NULL);
3876 	if (onm == NULL) {
3877 		*cs->statusp = resp->status = NFS4ERR_INVAL;
3878 		return;
3879 	}
3880 
3881 	nnm = utf8_to_fn(&args->newname, &nlen, NULL);
3882 	if (nnm == NULL) {
3883 		*cs->statusp = resp->status = NFS4ERR_INVAL;
3884 		kmem_free(onm, olen);
3885 		return;
3886 	}
3887 
3888 	if (olen > MAXNAMELEN || nlen > MAXNAMELEN) {
3889 		*cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
3890 		kmem_free(onm, olen);
3891 		kmem_free(nnm, nlen);
3892 		return;
3893 	}
3894 
3895 
3896 	if (rdonly4(cs->exi, cs->vp, req)) {
3897 		*cs->statusp = resp->status = NFS4ERR_ROFS;
3898 		kmem_free(onm, olen);
3899 		kmem_free(nnm, nlen);
3900 		return;
3901 	}
3902 
3903 	/*
3904 	 * Is the source a file and have a delegation?
3905 	 * We don't need to acquire va_seq before these lookups, if
3906 	 * it causes an update, cinfo.before will not match, which will
3907 	 * trigger a cache flush even if atomic is TRUE.
3908 	 */
3909 	if (sfp = rfs4_lookup_and_findfile(odvp, onm, &srcvp, &error, cs->cr)) {
3910 		if (rfs4_check_delegated_byfp(FWRITE, sfp, TRUE, TRUE, TRUE,
3911 						NULL)) {
3912 			*cs->statusp = resp->status = NFS4ERR_DELAY;
3913 			goto err_out;
3914 		}
3915 	}
3916 
3917 	if (srcvp == NULL) {
3918 		*cs->statusp = resp->status = puterrno4(error);
3919 		kmem_free(onm, olen);
3920 		kmem_free(nnm, nlen);
3921 		return;
3922 	}
3923 
3924 	sfp_rele_grant_hold = 1;
3925 
3926 	/* Does the destination exist and a file and have a delegation? */
3927 	if (fp = rfs4_lookup_and_findfile(ndvp, nnm, &targvp, NULL, cs->cr)) {
3928 		if (rfs4_check_delegated_byfp(FWRITE, fp, TRUE, TRUE, TRUE,
3929 						NULL)) {
3930 			*cs->statusp = resp->status = NFS4ERR_DELAY;
3931 			goto err_out;
3932 		}
3933 	}
3934 	fp_rele_grant_hold = 1;
3935 
3936 
3937 	/* Check for NBMAND lock on both source and target */
3938 	if (nbl_need_check(srcvp)) {
3939 		nbl_start_crit(srcvp, RW_READER);
3940 		in_crit_src = 1;
3941 		if (nbl_conflict(srcvp, NBL_RENAME, 0, 0, 0)) {
3942 			*cs->statusp = resp->status = NFS4ERR_FILE_OPEN;
3943 			goto err_out;
3944 		}
3945 	}
3946 
3947 	if (targvp && nbl_need_check(targvp)) {
3948 		nbl_start_crit(targvp, RW_READER);
3949 		in_crit_targ = 1;
3950 		if (nbl_conflict(targvp, NBL_REMOVE, 0, 0, 0)) {
3951 			*cs->statusp = resp->status = NFS4ERR_FILE_OPEN;
3952 			goto err_out;
3953 		}
3954 	}
3955 
3956 	/* Get source "before" change value */
3957 	obdva.va_mask = AT_CTIME|AT_SEQ;
3958 	error = VOP_GETATTR(odvp, &obdva, 0, cs->cr);
3959 	if (!error) {
3960 		nbdva.va_mask = AT_CTIME|AT_SEQ;
3961 		error = VOP_GETATTR(ndvp, &nbdva, 0, cs->cr);
3962 	}
3963 	if (error) {
3964 		*cs->statusp = resp->status = puterrno4(error);
3965 		goto err_out;
3966 	}
3967 
3968 	NFS4_SET_FATTR4_CHANGE(resp->source_cinfo.before, obdva.va_ctime)
3969 	NFS4_SET_FATTR4_CHANGE(resp->target_cinfo.before, nbdva.va_ctime)
3970 
3971 	if ((error = VOP_RENAME(odvp, onm, ndvp, nnm, cs->cr)) == 0 &&
3972 		fp != NULL) {
3973 		struct vattr va;
3974 		vnode_t *tvp;
3975 
3976 		rfs4_dbe_lock(fp->dbe);
3977 		tvp = fp->vp;
3978 		if (tvp)
3979 			VN_HOLD(tvp);
3980 		rfs4_dbe_unlock(fp->dbe);
3981 
3982 		if (tvp) {
3983 			va.va_mask = AT_NLINK;
3984 			if (!VOP_GETATTR(tvp, &va, 0, cs->cr) &&
3985 				va.va_nlink == 0) {
3986 				/* The file is gone and so should the state */
3987 				if (in_crit_targ) {
3988 					nbl_end_crit(targvp);
3989 					in_crit_targ = 0;
3990 				}
3991 				rfs4_close_all_state(fp);
3992 			}
3993 			VN_RELE(tvp);
3994 		}
3995 	}
3996 	if (error == 0) {
3997 		char *tmp;
3998 
3999 		/* fix the path name for the renamed file */
4000 		mutex_enter(&srcvp->v_lock);
4001 		tmp = srcvp->v_path;
4002 		srcvp->v_path = NULL;
4003 		mutex_exit(&srcvp->v_lock);
4004 		vn_setpath(rootdir, ndvp, srcvp, nnm, nlen - 1);
4005 		if (tmp != NULL)
4006 			kmem_free(tmp, strlen(tmp) + 1);
4007 	}
4008 
4009 	if (in_crit_src)
4010 		nbl_end_crit(srcvp);
4011 	if (srcvp)
4012 		VN_RELE(srcvp);
4013 	if (in_crit_targ)
4014 		nbl_end_crit(targvp);
4015 	if (targvp)
4016 		VN_RELE(targvp);
4017 
4018 	if (sfp) {
4019 		rfs4_clear_dont_grant(sfp);
4020 		rfs4_file_rele(sfp);
4021 	}
4022 	if (fp) {
4023 		rfs4_clear_dont_grant(fp);
4024 		rfs4_file_rele(fp);
4025 	}
4026 
4027 	kmem_free(onm, olen);
4028 	kmem_free(nnm, nlen);
4029 
4030 	/*
4031 	 * Get the initial "after" sequence number, if it fails, set to zero
4032 	 */
4033 	oidva.va_mask = AT_SEQ;
4034 	if (VOP_GETATTR(odvp, &oidva, 0, cs->cr))
4035 		oidva.va_seq = 0;
4036 
4037 	nidva.va_mask = AT_SEQ;
4038 	if (VOP_GETATTR(ndvp, &nidva, 0, cs->cr))
4039 		nidva.va_seq = 0;
4040 
4041 	/*
4042 	 * Force modified data and metadata out to stable storage.
4043 	 */
4044 	(void) VOP_FSYNC(odvp, 0, cs->cr);
4045 	(void) VOP_FSYNC(ndvp, 0, cs->cr);
4046 
4047 	if (error) {
4048 		*cs->statusp = resp->status = puterrno4(error);
4049 		return;
4050 	}
4051 
4052 	/*
4053 	 * Get "after" change values, if it fails, simply return the
4054 	 * before value.
4055 	 */
4056 	oadva.va_mask = AT_CTIME|AT_SEQ;
4057 	if (VOP_GETATTR(odvp, &oadva, 0, cs->cr)) {
4058 		oadva.va_ctime = obdva.va_ctime;
4059 		oadva.va_seq = 0;
4060 	}
4061 
4062 	nadva.va_mask = AT_CTIME|AT_SEQ;
4063 	if (VOP_GETATTR(odvp, &nadva, 0, cs->cr)) {
4064 		nadva.va_ctime = nbdva.va_ctime;
4065 		nadva.va_seq = 0;
4066 	}
4067 
4068 	NFS4_SET_FATTR4_CHANGE(resp->source_cinfo.after, oadva.va_ctime)
4069 	NFS4_SET_FATTR4_CHANGE(resp->target_cinfo.after, nadva.va_ctime)
4070 
4071 	/*
4072 	 * The cinfo.atomic = TRUE only if we have
4073 	 * non-zero va_seq's, and it has incremented by exactly one
4074 	 * during the VOP_RENAME and it didn't change during the VOP_FSYNC.
4075 	 */
4076 	if (obdva.va_seq && oidva.va_seq && oadva.va_seq &&
4077 			oidva.va_seq == (obdva.va_seq + 1) &&
4078 			oidva.va_seq == oadva.va_seq)
4079 		resp->source_cinfo.atomic = TRUE;
4080 	else
4081 		resp->source_cinfo.atomic = FALSE;
4082 
4083 	if (nbdva.va_seq && nidva.va_seq && nadva.va_seq &&
4084 			nidva.va_seq == (nbdva.va_seq + 1) &&
4085 			nidva.va_seq == nadva.va_seq)
4086 		resp->target_cinfo.atomic = TRUE;
4087 	else
4088 		resp->target_cinfo.atomic = FALSE;
4089 
4090 #ifdef	VOLATILE_FH_TEST
4091 	{
4092 	extern void add_volrnm_fh(struct exportinfo *, vnode_t *);
4093 
4094 	/*
4095 	 * Add the renamed file handle to the volatile rename list
4096 	 */
4097 	if (cs->exi->exi_export.ex_flags & EX_VOLRNM) {
4098 		/* file handles may expire on rename */
4099 		vnode_t *vp;
4100 
4101 		nnm = utf8_to_fn(&args->newname, &nlen, NULL);
4102 		/*
4103 		 * Already know that nnm will be a valid string
4104 		 */
4105 		error = VOP_LOOKUP(ndvp, nnm, &vp, NULL, 0, NULL, cs->cr);
4106 		kmem_free(nnm, nlen);
4107 		if (!error) {
4108 			add_volrnm_fh(cs->exi, vp);
4109 			VN_RELE(vp);
4110 		}
4111 	}
4112 	}
4113 #endif	/* VOLATILE_FH_TEST */
4114 
4115 	*cs->statusp = resp->status = NFS4_OK;
4116 	return;
4117 
4118 err_out:
4119 	kmem_free(onm, olen);
4120 	kmem_free(nnm, nlen);
4121 
4122 	if (in_crit_src) nbl_end_crit(srcvp);
4123 	if (in_crit_targ) nbl_end_crit(targvp);
4124 	if (targvp) VN_RELE(targvp);
4125 	if (srcvp) VN_RELE(srcvp);
4126 	if (sfp) {
4127 		if (sfp_rele_grant_hold) rfs4_clear_dont_grant(sfp);
4128 		rfs4_file_rele(sfp);
4129 	}
4130 	if (fp) {
4131 		if (fp_rele_grant_hold) rfs4_clear_dont_grant(fp);
4132 		rfs4_file_rele(fp);
4133 	}
4134 }
4135 
4136 /* ARGSUSED */
4137 static void
4138 rfs4_op_renew(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
4139 	struct compound_state *cs)
4140 {
4141 	RENEW4args *args = &argop->nfs_argop4_u.oprenew;
4142 	RENEW4res *resp = &resop->nfs_resop4_u.oprenew;
4143 	rfs4_client_t *cp;
4144 
4145 	if ((cp = rfs4_findclient_by_id(args->clientid, FALSE)) == NULL) {
4146 		*cs->statusp = resp->status =
4147 			rfs4_check_clientid(&args->clientid, 0);
4148 		return;
4149 	}
4150 
4151 	if (rfs4_lease_expired(cp)) {
4152 		rfs4_client_rele(cp);
4153 		*cs->statusp = resp->status = NFS4ERR_EXPIRED;
4154 		return;
4155 	}
4156 
4157 	rfs4_update_lease(cp);
4158 
4159 	mutex_enter(cp->cbinfo.cb_lock);
4160 	if (cp->cbinfo.cb_notified_of_cb_path_down == FALSE) {
4161 		cp->cbinfo.cb_notified_of_cb_path_down = TRUE;
4162 		*cs->statusp = resp->status = NFS4ERR_CB_PATH_DOWN;
4163 	} else {
4164 		*cs->statusp = resp->status = NFS4_OK;
4165 	}
4166 	mutex_exit(cp->cbinfo.cb_lock);
4167 
4168 	rfs4_client_rele(cp);
4169 
4170 }
4171 
4172 /* ARGSUSED */
4173 static void
4174 rfs4_op_restorefh(nfs_argop4 *args, nfs_resop4 *resop, struct svc_req *req,
4175 	struct compound_state *cs)
4176 {
4177 	RESTOREFH4res *resp = &resop->nfs_resop4_u.oprestorefh;
4178 
4179 	/* No need to check cs->access - we are not accessing any object */
4180 	if ((cs->saved_vp == NULL) || (cs->saved_fh.nfs_fh4_val == NULL)) {
4181 		*cs->statusp = resp->status = NFS4ERR_RESTOREFH;
4182 		return;
4183 	}
4184 	if (cs->vp != NULL) {
4185 		VN_RELE(cs->vp);
4186 	}
4187 	cs->vp = cs->saved_vp;
4188 	cs->saved_vp = NULL;
4189 	cs->exi = cs->saved_exi;
4190 	nfs_fh4_copy(&cs->saved_fh, &cs->fh);
4191 	*cs->statusp = resp->status = NFS4_OK;
4192 	cs->deleg = FALSE;
4193 }
4194 
4195 /* ARGSUSED */
4196 static void
4197 rfs4_op_savefh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
4198 	struct compound_state *cs)
4199 {
4200 	SAVEFH4res *resp = &resop->nfs_resop4_u.opsavefh;
4201 
4202 	/* No need to check cs->access - we are not accessing any object */
4203 	if (cs->vp == NULL) {
4204 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
4205 		return;
4206 	}
4207 	if (cs->saved_vp != NULL) {
4208 		VN_RELE(cs->saved_vp);
4209 	}
4210 	cs->saved_vp = cs->vp;
4211 	VN_HOLD(cs->saved_vp);
4212 	cs->saved_exi = cs->exi;
4213 	/*
4214 	 * since SAVEFH is fairly rare, don't alloc space for its fh
4215 	 * unless necessary.
4216 	 */
4217 	if (cs->saved_fh.nfs_fh4_val == NULL) {
4218 		cs->saved_fh.nfs_fh4_val = kmem_alloc(NFS4_FHSIZE, KM_SLEEP);
4219 	}
4220 	nfs_fh4_copy(&cs->fh, &cs->saved_fh);
4221 	*cs->statusp = resp->status = NFS4_OK;
4222 }
4223 
4224 /*
4225  * rfs4_verify_attr is called when nfsv4 Setattr failed, but we wish to
4226  * return the bitmap of attrs that were set successfully. It is also
4227  * called by Verify/Nverify to test the vattr/vfsstat attrs. It should
4228  * always be called only after rfs4_do_set_attrs().
4229  *
4230  * Verify that the attributes are same as the expected ones. sargp->vap
4231  * and sargp->sbp contain the input attributes as translated from fattr4.
4232  *
4233  * This function verifies only the attrs that correspond to a vattr or
4234  * vfsstat struct. That is because of the extra step needed to get the
4235  * corresponding system structs. Other attributes have already been set or
4236  * verified by do_rfs4_set_attrs.
4237  *
4238  * Return 0 if all attrs match, -1 if some don't, error if error processing.
4239  */
4240 static int
4241 rfs4_verify_attr(struct nfs4_svgetit_arg *sargp,
4242 	bitmap4 *resp, struct nfs4_ntov_table *ntovp)
4243 {
4244 	int error, ret_error = 0;
4245 	int i, k;
4246 	uint_t sva_mask = sargp->vap->va_mask;
4247 	uint_t vbit;
4248 	union nfs4_attr_u *na;
4249 	uint8_t *amap;
4250 	bool_t getsb = ntovp->vfsstat;
4251 
4252 	if (sva_mask != 0) {
4253 		/*
4254 		 * Okay to overwrite sargp->vap because we verify based
4255 		 * on the incoming values.
4256 		 */
4257 		ret_error = VOP_GETATTR(sargp->cs->vp, sargp->vap, 0,
4258 				sargp->cs->cr);
4259 		if (ret_error) {
4260 			if (resp == NULL)
4261 				return (ret_error);
4262 			/*
4263 			 * Must return bitmap of successful attrs
4264 			 */
4265 			sva_mask = 0;	/* to prevent checking vap later */
4266 		} else {
4267 			/*
4268 			 * Some file systems clobber va_mask. it is probably
4269 			 * wrong of them to do so, nonethless we practice
4270 			 * defensive coding.
4271 			 * See bug id 4276830.
4272 			 */
4273 			sargp->vap->va_mask = sva_mask;
4274 		}
4275 	}
4276 
4277 	if (getsb) {
4278 		/*
4279 		 * Now get the superblock and loop on the bitmap, as there is
4280 		 * no simple way of translating from superblock to bitmap4.
4281 		 */
4282 		ret_error = VFS_STATVFS(sargp->cs->vp->v_vfsp, sargp->sbp);
4283 		if (ret_error) {
4284 			if (resp == NULL)
4285 				goto errout;
4286 			getsb = FALSE;
4287 		}
4288 	}
4289 
4290 	/*
4291 	 * Now loop and verify each attribute which getattr returned
4292 	 * whether it's the same as the input.
4293 	 */
4294 	if (resp == NULL && !getsb && (sva_mask == 0))
4295 		goto errout;
4296 
4297 	na = ntovp->na;
4298 	amap = ntovp->amap;
4299 	k = 0;
4300 	for (i = 0; i < ntovp->attrcnt; i++, na++, amap++) {
4301 		k = *amap;
4302 		ASSERT(nfs4_ntov_map[k].nval == k);
4303 		vbit = nfs4_ntov_map[k].vbit;
4304 
4305 		/*
4306 		 * If vattr attribute but VOP_GETATTR failed, or it's
4307 		 * superblock attribute but VFS_STATVFS failed, skip
4308 		 */
4309 		if (vbit) {
4310 			if ((vbit & sva_mask) == 0)
4311 				continue;
4312 		} else if (!(getsb && nfs4_ntov_map[k].vfsstat)) {
4313 			continue;
4314 		}
4315 		error = (*nfs4_ntov_map[k].sv_getit)(
4316 				NFS4ATTR_VERIT, sargp, na);
4317 		if (resp != NULL) {
4318 			if (error)
4319 				ret_error = -1;	/* not all match */
4320 			else	/* update response bitmap */
4321 				*resp |= nfs4_ntov_map[k].fbit;
4322 			continue;
4323 		}
4324 		if (error) {
4325 			ret_error = -1;	/* not all match */
4326 			break;
4327 		}
4328 	}
4329 errout:
4330 	return (ret_error);
4331 }
4332 
4333 /*
4334  * Decode the attribute to be set/verified. If the attr requires a sys op
4335  * (VOP_GETATTR, VFS_VFSSTAT), and the request is to verify, then don't
4336  * call the sv_getit function for it, because the sys op hasn't yet been done.
4337  * Return 0 for success, error code if failed.
4338  *
4339  * Note: the decoded arg is not freed here but in nfs4_ntov_table_free.
4340  */
4341 static int
4342 decode_fattr4_attr(nfs4_attr_cmd_t cmd, struct nfs4_svgetit_arg *sargp,
4343 	int k, XDR *xdrp, bitmap4 *resp_bval, union nfs4_attr_u *nap)
4344 {
4345 	int error = 0;
4346 	bool_t set_later;
4347 
4348 	sargp->vap->va_mask |= nfs4_ntov_map[k].vbit;
4349 
4350 	if ((*nfs4_ntov_map[k].xfunc)(xdrp, nap)) {
4351 		set_later = nfs4_ntov_map[k].vbit || nfs4_ntov_map[k].vfsstat;
4352 		/*
4353 		 * don't verify yet if a vattr or sb dependent attr,
4354 		 * because we don't have their sys values yet.
4355 		 * Will be done later.
4356 		 */
4357 		if (! (set_later && (cmd == NFS4ATTR_VERIT))) {
4358 			/*
4359 			 * ACLs are a special case, since setting the MODE
4360 			 * conflicts with setting the ACL.  We delay setting
4361 			 * the ACL until all other attributes have been set.
4362 			 * The ACL gets set in do_rfs4_op_setattr().
4363 			 */
4364 			if (nfs4_ntov_map[k].fbit != FATTR4_ACL_MASK) {
4365 				error = (*nfs4_ntov_map[k].sv_getit)(cmd,
4366 				    sargp, nap);
4367 				if (error) {
4368 					xdr_free(nfs4_ntov_map[k].xfunc,
4369 					    (caddr_t)nap);
4370 				}
4371 			}
4372 		}
4373 	} else {
4374 #ifdef  DEBUG
4375 		cmn_err(CE_NOTE, "decode_fattr4_attr: error "
4376 			"decoding attribute %d\n", k);
4377 #endif
4378 		error = EINVAL;
4379 	}
4380 	if (!error && resp_bval && !set_later) {
4381 		*resp_bval |= nfs4_ntov_map[k].fbit;
4382 	}
4383 
4384 	return (error);
4385 }
4386 
4387 /*
4388  * Set vattr based on incoming fattr4 attrs - used by setattr.
4389  * Set response mask. Ignore any values that are not writable vattr attrs.
4390  */
4391 static nfsstat4
4392 do_rfs4_set_attrs(bitmap4 *resp, fattr4 *fattrp, struct compound_state *cs,
4393 		struct nfs4_svgetit_arg *sargp, struct nfs4_ntov_table *ntovp,
4394 		nfs4_attr_cmd_t cmd)
4395 {
4396 	int error = 0;
4397 	int i;
4398 	char *attrs = fattrp->attrlist4;
4399 	uint32_t attrslen = fattrp->attrlist4_len;
4400 	XDR xdr;
4401 	nfsstat4 status = NFS4_OK;
4402 	vnode_t *vp = cs->vp;
4403 	union nfs4_attr_u *na;
4404 	uint8_t *amap;
4405 
4406 #ifndef lint
4407 	/*
4408 	 * Make sure that maximum attribute number can be expressed as an
4409 	 * 8 bit quantity.
4410 	 */
4411 	ASSERT(NFS4_MAXNUM_ATTRS <= (UINT8_MAX + 1));
4412 #endif
4413 
4414 	if (vp == NULL) {
4415 		if (resp)
4416 			*resp = 0;
4417 		return (NFS4ERR_NOFILEHANDLE);
4418 	}
4419 	if (cs->access == CS_ACCESS_DENIED) {
4420 		if (resp)
4421 			*resp = 0;
4422 		return (NFS4ERR_ACCESS);
4423 	}
4424 
4425 	sargp->op = cmd;
4426 	sargp->cs = cs;
4427 	sargp->flag = 0;	/* may be set later */
4428 	sargp->vap->va_mask = 0;
4429 	sargp->rdattr_error = NFS4_OK;
4430 	sargp->rdattr_error_req = FALSE;
4431 	/* sargp->sbp is set by the caller */
4432 
4433 	xdrmem_create(&xdr, attrs, attrslen, XDR_DECODE);
4434 
4435 	na = ntovp->na;
4436 	amap = ntovp->amap;
4437 
4438 	/*
4439 	 * The following loop iterates on the nfs4_ntov_map checking
4440 	 * if the fbit is set in the requested bitmap.
4441 	 * If set then we process the arguments using the
4442 	 * rfs4_fattr4 conversion functions to populate the setattr
4443 	 * vattr and va_mask. Any settable attrs that are not using vattr
4444 	 * will be set in this loop.
4445 	 */
4446 	for (i = 0; i < nfs4_ntov_map_size; i++) {
4447 		if (!(fattrp->attrmask & nfs4_ntov_map[i].fbit)) {
4448 			continue;
4449 		}
4450 		/*
4451 		 * If setattr, must be a writable attr.
4452 		 * If verify/nverify, must be a readable attr.
4453 		 */
4454 		if ((error = (*nfs4_ntov_map[i].sv_getit)(
4455 				    NFS4ATTR_SUPPORTED, sargp, NULL)) != 0) {
4456 			/*
4457 			 * Client tries to set/verify an
4458 			 * unsupported attribute, tries to set
4459 			 * a read only attr or verify a write
4460 			 * only one - error!
4461 			 */
4462 			break;
4463 		}
4464 		/*
4465 		 * Decode the attribute to set/verify
4466 		 */
4467 		error = decode_fattr4_attr(cmd, sargp, nfs4_ntov_map[i].nval,
4468 					&xdr, resp ? resp : NULL, na);
4469 		if (error)
4470 			break;
4471 		*amap++ = (uint8_t)nfs4_ntov_map[i].nval;
4472 		na++;
4473 		(ntovp->attrcnt)++;
4474 		if (nfs4_ntov_map[i].vfsstat)
4475 			ntovp->vfsstat = TRUE;
4476 	}
4477 
4478 	if (error != 0)
4479 		status = (error == ENOTSUP ?	NFS4ERR_ATTRNOTSUPP :
4480 						puterrno4(error));
4481 	/* xdrmem_destroy(&xdrs); */	/* NO-OP */
4482 	return (status);
4483 }
4484 
4485 static nfsstat4
4486 do_rfs4_op_setattr(bitmap4 *resp, fattr4 *fattrp, struct compound_state *cs,
4487 		stateid4 *stateid)
4488 {
4489 	int error = 0;
4490 	struct nfs4_svgetit_arg sarg;
4491 	bool_t trunc;
4492 
4493 	nfsstat4 status = NFS4_OK;
4494 	cred_t *cr = cs->cr;
4495 	vnode_t *vp = cs->vp;
4496 	struct nfs4_ntov_table ntov;
4497 	struct statvfs64 sb;
4498 	struct vattr bva;
4499 	struct flock64 bf;
4500 	int in_crit = 0;
4501 	uint_t saved_mask = 0;
4502 	caller_context_t ct;
4503 
4504 	*resp = 0;
4505 	sarg.sbp = &sb;
4506 	nfs4_ntov_table_init(&ntov);
4507 	status = do_rfs4_set_attrs(resp, fattrp, cs, &sarg, &ntov,
4508 			NFS4ATTR_SETIT);
4509 	if (status != NFS4_OK) {
4510 		/*
4511 		 * failed set attrs
4512 		 */
4513 		goto done;
4514 	}
4515 	if ((sarg.vap->va_mask == 0) &&
4516 	    (! (fattrp->attrmask & FATTR4_ACL_MASK))) {
4517 		/*
4518 		 * no further work to be done
4519 		 */
4520 		goto done;
4521 	}
4522 
4523 	/*
4524 	 * If we got a request to set the ACL and the MODE, only
4525 	 * allow changing VSUID, VSGID, and VSVTX.  Attempting
4526 	 * to change any other bits, along with setting an ACL,
4527 	 * gives NFS4ERR_INVAL.
4528 	 */
4529 	if ((fattrp->attrmask & FATTR4_ACL_MASK) &&
4530 	    (fattrp->attrmask & FATTR4_MODE_MASK)) {
4531 		vattr_t va;
4532 
4533 		va.va_mask = AT_MODE;
4534 		error = VOP_GETATTR(vp, &va, 0, cs->cr);
4535 		if (error) {
4536 			status = puterrno4(error);
4537 			goto done;
4538 		}
4539 		if ((sarg.vap->va_mode ^ va.va_mode) &
4540 		    ~(VSUID | VSGID | VSVTX)) {
4541 			status = NFS4ERR_INVAL;
4542 			goto done;
4543 		}
4544 	}
4545 
4546 	/* Check stateid only if size has been set */
4547 	if (sarg.vap->va_mask & AT_SIZE) {
4548 		trunc = (sarg.vap->va_size == 0);
4549 		status = rfs4_check_stateid(FWRITE, cs->vp, stateid,
4550 			trunc, &cs->deleg, sarg.vap->va_mask & AT_SIZE);
4551 		if (status != NFS4_OK)
4552 			goto done;
4553 	}
4554 
4555 	ct.cc_sysid = 0;
4556 	ct.cc_pid = 0;
4557 	ct.cc_caller_id = nfs4_srv_caller_id;
4558 
4559 	/* XXX start of possible race with delegations */
4560 
4561 	/*
4562 	 * We need to specially handle size changes because it is
4563 	 * possible for the client to create a file with read-only
4564 	 * modes, but with the file opened for writing. If the client
4565 	 * then tries to set the file size, e.g. ftruncate(3C),
4566 	 * fcntl(F_FREESP), the normal access checking done in
4567 	 * VOP_SETATTR would prevent the client from doing it even though
4568 	 * it should be allowed to do so.  To get around this, we do the
4569 	 * access checking for ourselves and use VOP_SPACE which doesn't
4570 	 * do the access checking.
4571 	 * Also the client should not be allowed to change the file
4572 	 * size if there is a conflicting non-blocking mandatory lock in
4573 	 * the region of the change.
4574 	 */
4575 	if (vp->v_type == VREG && (sarg.vap->va_mask & AT_SIZE)) {
4576 		u_offset_t offset;
4577 		ssize_t length;
4578 
4579 		/*
4580 		 * ufs_setattr clears AT_SIZE from vap->va_mask, but
4581 		 * before returning, sarg.vap->va_mask is used to
4582 		 * generate the setattr reply bitmap.  We also clear
4583 		 * AT_SIZE below before calling VOP_SPACE.  For both
4584 		 * of these cases, the va_mask needs to be saved here
4585 		 * and restored after calling VOP_SETATTR.
4586 		 */
4587 		saved_mask = sarg.vap->va_mask;
4588 
4589 		/*
4590 		 * Check any possible conflict due to NBMAND locks.
4591 		 * Get into critical region before VOP_GETATTR, so the
4592 		 * size attribute is valid when checking conflicts.
4593 		 */
4594 		if (nbl_need_check(vp)) {
4595 			nbl_start_crit(vp, RW_READER);
4596 			in_crit = 1;
4597 		}
4598 
4599 		bva.va_mask = AT_UID|AT_SIZE;
4600 		if (error = VOP_GETATTR(vp, &bva, 0, cr)) {
4601 			status = puterrno4(error);
4602 			goto done;
4603 		}
4604 
4605 		if (in_crit) {
4606 			if (sarg.vap->va_size < bva.va_size) {
4607 				offset = sarg.vap->va_size;
4608 				length = bva.va_size - sarg.vap->va_size;
4609 			} else {
4610 				offset = bva.va_size;
4611 				length = sarg.vap->va_size - bva.va_size;
4612 			}
4613 			if (nbl_conflict(vp, NBL_WRITE, offset, length, 0)) {
4614 				status = NFS4ERR_LOCKED;
4615 				goto done;
4616 			}
4617 		}
4618 
4619 		if (crgetuid(cr) == bva.va_uid) {
4620 			sarg.vap->va_mask &= ~AT_SIZE;
4621 			bf.l_type = F_WRLCK;
4622 			bf.l_whence = 0;
4623 			bf.l_start = (off64_t)sarg.vap->va_size;
4624 			bf.l_len = 0;
4625 			bf.l_sysid = 0;
4626 			bf.l_pid = 0;
4627 			error = VOP_SPACE(vp, F_FREESP, &bf, FWRITE,
4628 					(offset_t)sarg.vap->va_size, cr, &ct);
4629 		}
4630 	}
4631 
4632 	if (!error && sarg.vap->va_mask != 0)
4633 		error = VOP_SETATTR(vp, sarg.vap, sarg.flag, cr, &ct);
4634 
4635 	/* restore va_mask -- ufs_setattr clears AT_SIZE */
4636 	if (saved_mask & AT_SIZE)
4637 		sarg.vap->va_mask |= AT_SIZE;
4638 
4639 	/*
4640 	 * If an ACL was being set, it has been delayed until now,
4641 	 * in order to set the mode (via the VOP_SETATTR() above) first.
4642 	 */
4643 	if ((! error) && (fattrp->attrmask & FATTR4_ACL_MASK)) {
4644 		int i;
4645 
4646 		for (i = 0; i < NFS4_MAXNUM_ATTRS; i++)
4647 			if (ntov.amap[i] == FATTR4_ACL)
4648 				break;
4649 		if (i < NFS4_MAXNUM_ATTRS) {
4650 			error = (*nfs4_ntov_map[FATTR4_ACL].sv_getit)(
4651 			    NFS4ATTR_SETIT, &sarg, &ntov.na[i]);
4652 			if (error == 0) {
4653 				*resp |= FATTR4_ACL_MASK;
4654 			} else if (error == ENOTSUP) {
4655 				(void) rfs4_verify_attr(&sarg, resp, &ntov);
4656 				status = NFS4ERR_ATTRNOTSUPP;
4657 				goto done;
4658 			}
4659 		} else {
4660 			NFS4_DEBUG(rfs4_debug,
4661 			    (CE_NOTE, "do_rfs4_op_setattr: "
4662 			    "unable to find ACL in fattr4"));
4663 			error = EINVAL;
4664 		}
4665 	}
4666 
4667 	if (error) {
4668 		status = puterrno4(error);
4669 
4670 		/*
4671 		 * Set the response bitmap when setattr failed.
4672 		 * If VOP_SETATTR partially succeeded, test by doing a
4673 		 * VOP_GETATTR on the object and comparing the data
4674 		 * to the setattr arguments.
4675 		 */
4676 		(void) rfs4_verify_attr(&sarg, resp, &ntov);
4677 	} else {
4678 		/*
4679 		 * Force modified metadata out to stable storage.
4680 		 */
4681 		(void) VOP_FSYNC(vp, FNODSYNC, cr);
4682 		/*
4683 		 * Set response bitmap
4684 		 */
4685 		nfs4_vmask_to_nmask_set(sarg.vap->va_mask, resp);
4686 	}
4687 
4688 /* Return early and already have a NFSv4 error */
4689 done:
4690 	/*
4691 	 * Except for nfs4_vmask_to_nmask_set(), vattr --> fattr
4692 	 * conversion sets both readable and writeable NFS4 attrs
4693 	 * for AT_MTIME and AT_ATIME.  The line below masks out
4694 	 * unrequested attrs from the setattr result bitmap.  This
4695 	 * is placed after the done: label to catch the ATTRNOTSUP
4696 	 * case.
4697 	 */
4698 	*resp &= fattrp->attrmask;
4699 
4700 	if (in_crit)
4701 		nbl_end_crit(vp);
4702 
4703 	nfs4_ntov_table_free(&ntov, &sarg);
4704 
4705 	return (status);
4706 }
4707 
4708 /* ARGSUSED */
4709 static void
4710 rfs4_op_setattr(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
4711 	struct compound_state *cs)
4712 {
4713 	SETATTR4args *args = &argop->nfs_argop4_u.opsetattr;
4714 	SETATTR4res *resp = &resop->nfs_resop4_u.opsetattr;
4715 
4716 	if (cs->vp == NULL) {
4717 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
4718 		return;
4719 	}
4720 
4721 	/*
4722 	 * If there is an unshared filesystem mounted on this vnode,
4723 	 * do not allow to setattr on this vnode.
4724 	 */
4725 	if (vn_ismntpt(cs->vp)) {
4726 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
4727 		return;
4728 	}
4729 
4730 	resp->attrsset = 0;
4731 
4732 	if (rdonly4(cs->exi, cs->vp, req)) {
4733 		*cs->statusp = resp->status = NFS4ERR_ROFS;
4734 		return;
4735 	}
4736 
4737 	*cs->statusp = resp->status =
4738 		do_rfs4_op_setattr(&resp->attrsset, &args->obj_attributes, cs,
4739 			&args->stateid);
4740 }
4741 
4742 /* ARGSUSED */
4743 static void
4744 rfs4_op_verify(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
4745 	struct compound_state *cs)
4746 {
4747 	/*
4748 	 * verify and nverify are exactly the same, except that nverify
4749 	 * succeeds when some argument changed, and verify succeeds when
4750 	 * when none changed.
4751 	 */
4752 
4753 	VERIFY4args  *args = &argop->nfs_argop4_u.opverify;
4754 	VERIFY4res *resp = &resop->nfs_resop4_u.opverify;
4755 
4756 	int error;
4757 	struct nfs4_svgetit_arg sarg;
4758 	struct statvfs64 sb;
4759 	struct nfs4_ntov_table ntov;
4760 
4761 	if (cs->vp == NULL) {
4762 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
4763 		return;
4764 	}
4765 
4766 	sarg.sbp = &sb;
4767 	nfs4_ntov_table_init(&ntov);
4768 	resp->status = do_rfs4_set_attrs(NULL, &args->obj_attributes, cs,
4769 				&sarg, &ntov, NFS4ATTR_VERIT);
4770 	if (resp->status != NFS4_OK) {
4771 		/*
4772 		 * do_rfs4_set_attrs will try to verify systemwide attrs,
4773 		 * so could return -1 for "no match".
4774 		 */
4775 		if (resp->status == -1)
4776 			resp->status = NFS4ERR_NOT_SAME;
4777 		goto done;
4778 	}
4779 	error = rfs4_verify_attr(&sarg, NULL, &ntov);
4780 	switch (error) {
4781 	case 0:
4782 		resp->status = NFS4_OK;
4783 		break;
4784 	case -1:
4785 		resp->status = NFS4ERR_NOT_SAME;
4786 		break;
4787 	default:
4788 		resp->status = puterrno4(error);
4789 		break;
4790 	}
4791 done:
4792 	*cs->statusp = resp->status;
4793 	nfs4_ntov_table_free(&ntov, &sarg);
4794 }
4795 
4796 /* ARGSUSED */
4797 static void
4798 rfs4_op_nverify(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
4799 	struct compound_state *cs)
4800 {
4801 	/*
4802 	 * verify and nverify are exactly the same, except that nverify
4803 	 * succeeds when some argument changed, and verify succeeds when
4804 	 * when none changed.
4805 	 */
4806 
4807 	NVERIFY4args  *args = &argop->nfs_argop4_u.opnverify;
4808 	NVERIFY4res *resp = &resop->nfs_resop4_u.opnverify;
4809 
4810 	int error;
4811 	struct nfs4_svgetit_arg sarg;
4812 	struct statvfs64 sb;
4813 	struct nfs4_ntov_table ntov;
4814 
4815 	if (cs->vp == NULL) {
4816 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
4817 		return;
4818 	}
4819 	sarg.sbp = &sb;
4820 	nfs4_ntov_table_init(&ntov);
4821 	resp->status = do_rfs4_set_attrs(NULL, &args->obj_attributes, cs,
4822 				&sarg, &ntov, NFS4ATTR_VERIT);
4823 	if (resp->status != NFS4_OK) {
4824 		/*
4825 		 * do_rfs4_set_attrs will try to verify systemwide attrs,
4826 		 * so could return -1 for "no match".
4827 		 */
4828 		if (resp->status == -1)
4829 			resp->status = NFS4_OK;
4830 		goto done;
4831 	}
4832 	error = rfs4_verify_attr(&sarg, NULL, &ntov);
4833 	switch (error) {
4834 	case 0:
4835 		resp->status = NFS4ERR_SAME;
4836 		break;
4837 	case -1:
4838 		resp->status = NFS4_OK;
4839 		break;
4840 	default:
4841 		resp->status = puterrno4(error);
4842 		break;
4843 	}
4844 done:
4845 	*cs->statusp = resp->status;
4846 	nfs4_ntov_table_free(&ntov, &sarg);
4847 }
4848 
4849 /*
4850  * XXX - This should live in an NFS header file.
4851  */
4852 #define	MAX_IOVECS	12
4853 
4854 /* ARGSUSED */
4855 static void
4856 rfs4_op_write(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
4857 	struct compound_state *cs)
4858 {
4859 	WRITE4args  *args = &argop->nfs_argop4_u.opwrite;
4860 	WRITE4res *resp = &resop->nfs_resop4_u.opwrite;
4861 	int error;
4862 	vnode_t *vp;
4863 	struct vattr bva;
4864 	u_offset_t rlimit;
4865 	struct uio uio;
4866 	struct iovec iov[MAX_IOVECS];
4867 	struct iovec *iovp;
4868 	int iovcnt;
4869 	int ioflag;
4870 	cred_t *savecred, *cr;
4871 	bool_t *deleg = &cs->deleg;
4872 	nfsstat4 stat;
4873 	int in_crit = 0;
4874 
4875 	vp = cs->vp;
4876 	if (vp == NULL) {
4877 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
4878 		return;
4879 	}
4880 	if (cs->access == CS_ACCESS_DENIED) {
4881 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
4882 		return;
4883 	}
4884 
4885 	cr = cs->cr;
4886 
4887 	/*
4888 	 * We have to enter the critical region before calling VOP_RWLOCK
4889 	 * to avoid a deadlock with ufs.
4890 	 */
4891 	if (nbl_need_check(vp)) {
4892 		nbl_start_crit(vp, RW_READER);
4893 		in_crit = 1;
4894 		if (nbl_conflict(vp, NBL_WRITE,
4895 				args->offset, args->data_len, 0)) {
4896 			*cs->statusp = resp->status = NFS4ERR_LOCKED;
4897 			goto out;
4898 		}
4899 	}
4900 
4901 	if ((stat = rfs4_check_stateid(FWRITE, vp, &args->stateid, FALSE,
4902 					deleg, TRUE)) != NFS4_OK) {
4903 		*cs->statusp = resp->status = stat;
4904 		goto out;
4905 	}
4906 
4907 	bva.va_mask = AT_MODE | AT_UID;
4908 	error = VOP_GETATTR(vp, &bva, 0, cr);
4909 
4910 	/*
4911 	 * If we can't get the attributes, then we can't do the
4912 	 * right access checking.  So, we'll fail the request.
4913 	 */
4914 	if (error) {
4915 		*cs->statusp = resp->status = puterrno4(error);
4916 		goto out;
4917 	}
4918 
4919 	if (rdonly4(cs->exi, cs->vp, req)) {
4920 		*cs->statusp = resp->status = NFS4ERR_ROFS;
4921 		goto out;
4922 	}
4923 
4924 	if (vp->v_type != VREG) {
4925 		*cs->statusp = resp->status =
4926 			((vp->v_type == VDIR) ? NFS4ERR_ISDIR : NFS4ERR_INVAL);
4927 		goto out;
4928 	}
4929 
4930 	if (crgetuid(cr) != bva.va_uid &&
4931 	    (error = VOP_ACCESS(vp, VWRITE, 0, cr))) {
4932 		*cs->statusp = resp->status = puterrno4(error);
4933 		goto out;
4934 	}
4935 
4936 	if (MANDLOCK(vp, bva.va_mode)) {
4937 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
4938 		goto out;
4939 	}
4940 
4941 	if (args->data_len == 0) {
4942 		*cs->statusp = resp->status = NFS4_OK;
4943 		resp->count = 0;
4944 		resp->committed = args->stable;
4945 		resp->writeverf = Write4verf;
4946 		goto out;
4947 	}
4948 
4949 	if (args->mblk != NULL) {
4950 		mblk_t *m;
4951 		uint_t bytes, round_len;
4952 
4953 		iovcnt = 0;
4954 		bytes = 0;
4955 		round_len = roundup(args->data_len, BYTES_PER_XDR_UNIT);
4956 		for (m = args->mblk;
4957 		    m != NULL && bytes < round_len;
4958 		    m = m->b_cont) {
4959 			iovcnt++;
4960 			bytes += MBLKL(m);
4961 		}
4962 #ifdef DEBUG
4963 		/* should have ended on an mblk boundary */
4964 		if (bytes != round_len) {
4965 			printf("bytes=0x%x, round_len=0x%x, req len=0x%x\n",
4966 			    bytes, round_len, args->data_len);
4967 			printf("args=%p, args->mblk=%p, m=%p", (void *)args,
4968 			    (void *)args->mblk, (void *)m);
4969 			ASSERT(bytes == round_len);
4970 		}
4971 #endif
4972 		if (iovcnt <= MAX_IOVECS) {
4973 			iovp = iov;
4974 		} else {
4975 			iovp = kmem_alloc(sizeof (*iovp) * iovcnt, KM_SLEEP);
4976 		}
4977 		mblk_to_iov(args->mblk, iovcnt, iovp);
4978 	} else {
4979 		iovcnt = 1;
4980 		iovp = iov;
4981 		iovp->iov_base = args->data_val;
4982 		iovp->iov_len = args->data_len;
4983 	}
4984 
4985 	uio.uio_iov = iovp;
4986 	uio.uio_iovcnt = iovcnt;
4987 
4988 	uio.uio_segflg = UIO_SYSSPACE;
4989 	uio.uio_extflg = UIO_COPY_DEFAULT;
4990 	uio.uio_loffset = args->offset;
4991 	uio.uio_resid = args->data_len;
4992 	uio.uio_llimit = curproc->p_fsz_ctl;
4993 	rlimit = uio.uio_llimit - args->offset;
4994 	if (rlimit < (u_offset_t)uio.uio_resid)
4995 		uio.uio_resid = (int)rlimit;
4996 
4997 	if (args->stable == UNSTABLE4)
4998 		ioflag = 0;
4999 	else if (args->stable == FILE_SYNC4)
5000 		ioflag = FSYNC;
5001 	else if (args->stable == DATA_SYNC4)
5002 		ioflag = FDSYNC;
5003 	else {
5004 		if (iovp != iov)
5005 			kmem_free(iovp, sizeof (*iovp) * iovcnt);
5006 		*cs->statusp = resp->status = NFS4ERR_INVAL;
5007 		goto out;
5008 	}
5009 
5010 	/*
5011 	 * We're changing creds because VM may fault and we need
5012 	 * the cred of the current thread to be used if quota
5013 	 * checking is enabled.
5014 	 */
5015 	savecred = curthread->t_cred;
5016 	curthread->t_cred = cr;
5017 	error = do_io(FWRITE, vp, &uio, ioflag, cr);
5018 	curthread->t_cred = savecred;
5019 
5020 	if (iovp != iov)
5021 		kmem_free(iovp, sizeof (*iovp) * iovcnt);
5022 
5023 	if (error) {
5024 		*cs->statusp = resp->status = puterrno4(error);
5025 		goto out;
5026 	}
5027 
5028 	*cs->statusp = resp->status = NFS4_OK;
5029 	resp->count = args->data_len - uio.uio_resid;
5030 
5031 	if (ioflag == 0)
5032 		resp->committed = UNSTABLE4;
5033 	else
5034 		resp->committed = FILE_SYNC4;
5035 
5036 	resp->writeverf = Write4verf;
5037 
5038 out:
5039 	if (in_crit)
5040 		nbl_end_crit(vp);
5041 }
5042 
5043 
5044 /* XXX put in a header file */
5045 extern int	sec_svc_getcred(struct svc_req *, cred_t *,  caddr_t *, int *);
5046 
5047 void
5048 rfs4_compound(COMPOUND4args *args, COMPOUND4res *resp, struct exportinfo *exi,
5049 	struct svc_req *req, cred_t *cr)
5050 {
5051 	uint_t i;
5052 	struct compound_state cs;
5053 
5054 	rfs4_init_compound_state(&cs);
5055 	/*
5056 	 * Form a reply tag by copying over the reqeuest tag.
5057 	 */
5058 	resp->tag.utf8string_val =
5059 				kmem_alloc(args->tag.utf8string_len, KM_SLEEP);
5060 	resp->tag.utf8string_len = args->tag.utf8string_len;
5061 	bcopy(args->tag.utf8string_val, resp->tag.utf8string_val,
5062 					resp->tag.utf8string_len);
5063 
5064 	cs.statusp = &resp->status;
5065 
5066 	/*
5067 	 * XXX for now, minorversion should be zero
5068 	 */
5069 	if (args->minorversion != NFS4_MINORVERSION) {
5070 		resp->array_len = 0;
5071 		resp->array = NULL;
5072 		resp->status = NFS4ERR_MINOR_VERS_MISMATCH;
5073 		return;
5074 	}
5075 
5076 	resp->array_len = args->array_len;
5077 	resp->array = kmem_zalloc(args->array_len * sizeof (nfs_resop4),
5078 		KM_SLEEP);
5079 
5080 	ASSERT(exi == NULL);
5081 	ASSERT(cr == NULL);
5082 
5083 	cr = crget();
5084 	ASSERT(cr != NULL);
5085 
5086 	if (sec_svc_getcred(req, cr, &cs.principal, &cs.nfsflavor) == 0) {
5087 		crfree(cr);
5088 		return;
5089 	}
5090 
5091 	cs.basecr = cr;
5092 
5093 	cs.req = req;
5094 
5095 	/*
5096 	 * For now, NFS4 compound processing must be protected by
5097 	 * exported_lock because it can access more than one exportinfo
5098 	 * per compound and share/unshare can now change multiple
5099 	 * exinfo structs.  The NFS2/3 code only refs 1 exportinfo
5100 	 * per proc (excluding public exinfo), and exi_count design
5101 	 * is sufficient to protect concurrent execution of NFS2/3
5102 	 * ops along with unexport.  This lock will be removed as
5103 	 * part of the NFSv4 phase 2 namespace redesign work.
5104 	 */
5105 	rw_enter(&exported_lock, RW_READER);
5106 
5107 	/*
5108 	 * If this is the first compound we've seen, we need to start all
5109 	 * new instances' grace periods.
5110 	 */
5111 	if (rfs4_seen_first_compound == 0) {
5112 		rfs4_grace_start_new();
5113 		/*
5114 		 * This must be set after rfs4_grace_start_new(), otherwise
5115 		 * another thread could proceed past here before the former
5116 		 * is finished.
5117 		 */
5118 		rfs4_seen_first_compound = 1;
5119 	}
5120 
5121 	for (i = 0; i < args->array_len && cs.cont; i++) {
5122 		nfs_argop4 *argop;
5123 		nfs_resop4 *resop;
5124 		uint_t op;
5125 
5126 		argop = &args->array[i];
5127 		resop = &resp->array[i];
5128 		resop->resop = argop->argop;
5129 		op = (uint_t)resop->resop;
5130 
5131 		if (op < rfsv4disp_cnt) {
5132 			/*
5133 			 * Count the individual ops here; NULL and COMPOUND
5134 			 * are counted in common_dispatch()
5135 			 */
5136 			rfsproccnt_v4_ptr[op].value.ui64++;
5137 
5138 			NFS4_DEBUG(rfs4_debug > 1,
5139 				(CE_NOTE, "Executing %s", rfs4_op_string[op]));
5140 			(*rfsv4disptab[op].dis_proc)(argop, resop, req, &cs);
5141 			NFS4_DEBUG(rfs4_debug > 1,
5142 				(CE_NOTE, "%s returned %d",
5143 				rfs4_op_string[op], *cs.statusp));
5144 			if (*cs.statusp != NFS4_OK)
5145 				cs.cont = FALSE;
5146 		} else {
5147 			/*
5148 			 * This is effectively dead code since XDR code
5149 			 * will have already returned BADXDR if op doesn't
5150 			 * decode to legal value.  This only done for a
5151 			 * day when XDR code doesn't verify v4 opcodes.
5152 			 */
5153 			op = OP_ILLEGAL;
5154 			rfsproccnt_v4_ptr[OP_ILLEGAL_IDX].value.ui64++;
5155 
5156 			rfs4_op_illegal(argop, resop, req, &cs);
5157 			cs.cont = FALSE;
5158 		}
5159 
5160 		/*
5161 		 * If not at last op, and if we are to stop, then
5162 		 * compact the results array.
5163 		 */
5164 		if ((i + 1) < args->array_len && !cs.cont) {
5165 			nfs_resop4 *new_res = kmem_alloc(
5166 				(i+1) * sizeof (nfs_resop4), KM_SLEEP);
5167 			bcopy(resp->array,
5168 				new_res, (i+1) * sizeof (nfs_resop4));
5169 			kmem_free(resp->array,
5170 				args->array_len * sizeof (nfs_resop4));
5171 
5172 			resp->array_len =  i + 1;
5173 			resp->array = new_res;
5174 		}
5175 	}
5176 
5177 	rw_exit(&exported_lock);
5178 
5179 	if (cs.vp)
5180 		VN_RELE(cs.vp);
5181 	if (cs.saved_vp)
5182 		VN_RELE(cs.saved_vp);
5183 	if (cs.saved_fh.nfs_fh4_val)
5184 		kmem_free(cs.saved_fh.nfs_fh4_val, NFS4_FHSIZE);
5185 
5186 	if (cs.basecr)
5187 		crfree(cs.basecr);
5188 	if (cs.cr)
5189 		crfree(cs.cr);
5190 }
5191 
5192 /*
5193  * XXX because of what appears to be duplicate calls to rfs4_compound_free
5194  * XXX zero out the tag and array values. Need to investigate why the
5195  * XXX calls occur, but at least prevent the panic for now.
5196  */
5197 void
5198 rfs4_compound_free(COMPOUND4res *resp)
5199 {
5200 	uint_t i;
5201 
5202 	if (resp->tag.utf8string_val) {
5203 		UTF8STRING_FREE(resp->tag)
5204 	}
5205 
5206 	for (i = 0; i < resp->array_len; i++) {
5207 		nfs_resop4 *resop;
5208 		uint_t op;
5209 
5210 		resop = &resp->array[i];
5211 		op = (uint_t)resop->resop;
5212 		if (op < rfsv4disp_cnt) {
5213 			(*rfsv4disptab[op].dis_resfree)(resop);
5214 		}
5215 	}
5216 	if (resp->array != NULL) {
5217 		kmem_free(resp->array, resp->array_len * sizeof (nfs_resop4));
5218 	}
5219 }
5220 
5221 /*
5222  * Process the value of the compound request rpc flags, as a bit-AND
5223  * of the individual per-op flags (idempotent, allowork, publicfh_ok)
5224  */
5225 void
5226 rfs4_compound_flagproc(COMPOUND4args *args, int *flagp)
5227 {
5228 	int i;
5229 	int flag = RPC_ALL;
5230 
5231 	for (i = 0; flag && i < args->array_len; i++) {
5232 		uint_t op;
5233 
5234 		op = (uint_t)args->array[i].argop;
5235 
5236 		if (op < rfsv4disp_cnt)
5237 			flag &= rfsv4disptab[op].dis_flags;
5238 		else
5239 			flag = 0;
5240 	}
5241 	*flagp = flag;
5242 }
5243 
5244 nfsstat4
5245 rfs4_client_sysid(rfs4_client_t *cp, sysid_t *sp)
5246 {
5247 	nfsstat4 e;
5248 
5249 	rfs4_dbe_lock(cp->dbe);
5250 
5251 	if (cp->sysidt != LM_NOSYSID) {
5252 		*sp = cp->sysidt;
5253 		e = NFS4_OK;
5254 
5255 	} else if ((cp->sysidt = lm_alloc_sysidt()) != LM_NOSYSID) {
5256 		*sp = cp->sysidt;
5257 		e = NFS4_OK;
5258 
5259 		NFS4_DEBUG(rfs4_debug, (CE_NOTE,
5260 			"rfs4_client_sysid: allocated 0x%x\n", *sp));
5261 	} else
5262 		e = NFS4ERR_DELAY;
5263 
5264 	rfs4_dbe_unlock(cp->dbe);
5265 	return (e);
5266 }
5267 
5268 #if defined(DEBUG) && ! defined(lint)
5269 static void lock_print(char *str, int operation, struct flock64 *flk)
5270 {
5271 	char *op, *type;
5272 
5273 	switch (operation) {
5274 	case F_GETLK: op = "F_GETLK";
5275 		break;
5276 	case F_SETLK: op = "F_SETLK";
5277 		break;
5278 	default: op = "F_UNKNOWN";
5279 		break;
5280 	}
5281 	switch (flk->l_type) {
5282 	case F_UNLCK: type = "F_UNLCK";
5283 		break;
5284 	case F_RDLCK: type = "F_RDLCK";
5285 		break;
5286 	case F_WRLCK: type = "F_WRLCK";
5287 		break;
5288 	default: type = "F_UNKNOWN";
5289 		break;
5290 	}
5291 
5292 	ASSERT(flk->l_whence == 0);
5293 	cmn_err(CE_NOTE, "%s:  %s, type = %s, off = %llx len = %llx pid = %d",
5294 		str, op, type,
5295 		(longlong_t)flk->l_start,
5296 		flk->l_len ? (longlong_t)flk->l_len : ~0LL,
5297 		flk->l_pid);
5298 }
5299 
5300 #define	LOCK_PRINT(d, s, t, f) if (d) lock_print(s, t, f)
5301 #else
5302 #define	LOCK_PRINT(d, s, t, f)
5303 #endif
5304 
5305 /*ARGSUSED*/
5306 static bool_t
5307 creds_ok(cred_set_t cr_set, struct svc_req *req, struct compound_state *cs)
5308 {
5309 	return (TRUE);
5310 }
5311 
5312 /*
5313  * Look up the pathname using the vp in cs as the directory vnode.
5314  * cs->vp will be the vnode for the file on success
5315  */
5316 
5317 static nfsstat4
5318 rfs4_lookup(component4 *component, struct svc_req *req,
5319 	    struct compound_state *cs)
5320 {
5321 	char *nm;
5322 	uint32_t len;
5323 	nfsstat4 status;
5324 
5325 	if (cs->vp == NULL) {
5326 		return (NFS4ERR_NOFILEHANDLE);
5327 	}
5328 	if (cs->vp->v_type != VDIR) {
5329 		return (NFS4ERR_NOTDIR);
5330 	}
5331 
5332 	if (!utf8_dir_verify(component))
5333 		return (NFS4ERR_INVAL);
5334 
5335 	nm = utf8_to_fn(component, &len, NULL);
5336 	if (nm == NULL) {
5337 		return (NFS4ERR_INVAL);
5338 	}
5339 
5340 	if (len > MAXNAMELEN) {
5341 		kmem_free(nm, len);
5342 		return (NFS4ERR_NAMETOOLONG);
5343 	}
5344 
5345 	status = do_rfs4_op_lookup(nm, len, req, cs);
5346 
5347 	kmem_free(nm, len);
5348 
5349 	return (status);
5350 }
5351 
5352 static nfsstat4
5353 rfs4_lookupfile(component4 *component, struct svc_req *req,
5354 		struct compound_state *cs, uint32_t access,
5355 		change_info4 *cinfo)
5356 {
5357 	nfsstat4 status;
5358 	vnode_t *dvp = cs->vp;
5359 	vattr_t bva, ava, fva;
5360 	int error;
5361 
5362 	/* Get "before" change value */
5363 	bva.va_mask = AT_CTIME|AT_SEQ;
5364 	error = VOP_GETATTR(dvp, &bva, 0, cs->cr);
5365 	if (error)
5366 		return (puterrno4(error));
5367 
5368 	/* rfs4_lookup may VN_RELE directory */
5369 	VN_HOLD(dvp);
5370 
5371 	status = rfs4_lookup(component, req, cs);
5372 	if (status != NFS4_OK) {
5373 		VN_RELE(dvp);
5374 		return (status);
5375 	}
5376 
5377 	/*
5378 	 * Get "after" change value, if it fails, simply return the
5379 	 * before value.
5380 	 */
5381 	ava.va_mask = AT_CTIME|AT_SEQ;
5382 	if (VOP_GETATTR(dvp, &ava, 0, cs->cr)) {
5383 		ava.va_ctime = bva.va_ctime;
5384 		ava.va_seq = 0;
5385 	}
5386 	VN_RELE(dvp);
5387 
5388 	/*
5389 	 * Validate the file is a file
5390 	 */
5391 	fva.va_mask = AT_TYPE|AT_MODE;
5392 	error = VOP_GETATTR(cs->vp, &fva, 0, cs->cr);
5393 	if (error)
5394 		return (puterrno4(error));
5395 
5396 	if (fva.va_type != VREG) {
5397 		if (fva.va_type == VDIR)
5398 			return (NFS4ERR_ISDIR);
5399 		if (fva.va_type == VLNK)
5400 			return (NFS4ERR_SYMLINK);
5401 		return (NFS4ERR_INVAL);
5402 	}
5403 
5404 	NFS4_SET_FATTR4_CHANGE(cinfo->before, bva.va_ctime);
5405 	NFS4_SET_FATTR4_CHANGE(cinfo->after, ava.va_ctime);
5406 
5407 	/*
5408 	 * It is undefined if VOP_LOOKUP will change va_seq, so
5409 	 * cinfo.atomic = TRUE only if we have
5410 	 * non-zero va_seq's, and they have not changed.
5411 	 */
5412 	if (bva.va_seq && ava.va_seq && ava.va_seq == bva.va_seq)
5413 		cinfo->atomic = TRUE;
5414 	else
5415 		cinfo->atomic = FALSE;
5416 
5417 	/* Check for mandatory locking */
5418 	cs->mandlock = MANDLOCK(cs->vp, fva.va_mode);
5419 	return (check_open_access(access, cs, req));
5420 }
5421 
5422 static nfsstat4
5423 create_vnode(vnode_t *dvp, char *nm,  vattr_t *vap, createmode4 mode,
5424 	    timespec32_t *mtime, cred_t *cr, vnode_t **vpp, bool_t *created)
5425 {
5426 	int error;
5427 	nfsstat4 status = NFS4_OK;
5428 	vattr_t va;
5429 
5430 tryagain:
5431 
5432 	/*
5433 	 * The file open mode used is VWRITE.  If the client needs
5434 	 * some other semantic, then it should do the access checking
5435 	 * itself.  It would have been nice to have the file open mode
5436 	 * passed as part of the arguments.
5437 	 */
5438 
5439 	*created = TRUE;
5440 	error = VOP_CREATE(dvp, nm, vap, EXCL, VWRITE, vpp, cr, 0);
5441 
5442 	if (error) {
5443 		*created = FALSE;
5444 
5445 		/*
5446 		 * If we got something other than file already exists
5447 		 * then just return this error.  Otherwise, we got
5448 		 * EEXIST.  If we were doing a GUARDED create, then
5449 		 * just return this error.  Otherwise, we need to
5450 		 * make sure that this wasn't a duplicate of an
5451 		 * exclusive create request.
5452 		 *
5453 		 * The assumption is made that a non-exclusive create
5454 		 * request will never return EEXIST.
5455 		 */
5456 
5457 		if (error != EEXIST || mode == GUARDED4) {
5458 			status = puterrno4(error);
5459 			return (status);
5460 		}
5461 		error = VOP_LOOKUP(dvp, nm, vpp, NULL, 0, NULL, cr);
5462 
5463 		if (error) {
5464 			/*
5465 			 * We couldn't find the file that we thought that
5466 			 * we just created.  So, we'll just try creating
5467 			 * it again.
5468 			 */
5469 			if (error == ENOENT)
5470 				goto tryagain;
5471 
5472 			status = puterrno4(error);
5473 			return (status);
5474 		}
5475 
5476 		if (mode == UNCHECKED4) {
5477 			/* existing object must be regular file */
5478 			if ((*vpp)->v_type != VREG) {
5479 				if ((*vpp)->v_type == VDIR)
5480 					status = NFS4ERR_ISDIR;
5481 				else if ((*vpp)->v_type == VLNK)
5482 					status = NFS4ERR_SYMLINK;
5483 				else
5484 					status = NFS4ERR_INVAL;
5485 				VN_RELE(*vpp);
5486 				return (status);
5487 			}
5488 
5489 			return (NFS4_OK);
5490 		}
5491 
5492 		/* Check for duplicate request */
5493 		ASSERT(mtime != 0);
5494 		va.va_mask = AT_MTIME;
5495 		error = VOP_GETATTR(*vpp, &va, 0, cr);
5496 		if (!error) {
5497 			/* We found the file */
5498 			if (va.va_mtime.tv_sec != mtime->tv_sec ||
5499 			    va.va_mtime.tv_nsec != mtime->tv_nsec) {
5500 				/* but its not our creation */
5501 				VN_RELE(*vpp);
5502 				return (NFS4ERR_EXIST);
5503 			}
5504 			*created = TRUE; /* retrans of create == created */
5505 			return (NFS4_OK);
5506 		}
5507 		VN_RELE(*vpp);
5508 		return (NFS4ERR_EXIST);
5509 	}
5510 
5511 	return (NFS4_OK);
5512 }
5513 
5514 static nfsstat4
5515 check_open_access(uint32_t access,
5516 		struct compound_state *cs, struct svc_req *req)
5517 {
5518 	int error;
5519 	vnode_t *vp;
5520 	bool_t readonly;
5521 	cred_t *cr = cs->cr;
5522 
5523 	/* For now we don't allow mandatory locking as per V2/V3 */
5524 	if (cs->access == CS_ACCESS_DENIED || cs->mandlock) {
5525 		return (NFS4ERR_ACCESS);
5526 	}
5527 
5528 	vp = cs->vp;
5529 	ASSERT(cr != NULL && vp->v_type == VREG);
5530 
5531 	/*
5532 	 * If the file system is exported read only and we are trying
5533 	 * to open for write, then return NFS4ERR_ROFS
5534 	 */
5535 
5536 	readonly = rdonly4(cs->exi, cs->vp, req);
5537 
5538 	if ((access & OPEN4_SHARE_ACCESS_WRITE) && readonly)
5539 		return (NFS4ERR_ROFS);
5540 
5541 	if (access & OPEN4_SHARE_ACCESS_READ) {
5542 		if ((VOP_ACCESS(vp, VREAD, 0, cr) != 0) &&
5543 		    (VOP_ACCESS(vp, VEXEC, 0, cr) != 0)) {
5544 			return (NFS4ERR_ACCESS);
5545 		}
5546 	}
5547 
5548 	if (access & OPEN4_SHARE_ACCESS_WRITE) {
5549 		error = VOP_ACCESS(vp, VWRITE, 0, cr);
5550 		if (error)
5551 			return (NFS4ERR_ACCESS);
5552 	}
5553 
5554 	return (NFS4_OK);
5555 }
5556 
5557 static nfsstat4
5558 rfs4_createfile(OPEN4args *args, struct svc_req *req, struct compound_state *cs,
5559 		change_info4 *cinfo, bitmap4 *attrset, clientid4 clientid)
5560 {
5561 	struct nfs4_svgetit_arg sarg;
5562 	struct nfs4_ntov_table ntov;
5563 
5564 	bool_t ntov_table_init = FALSE;
5565 	struct statvfs64 sb;
5566 	nfsstat4 status;
5567 	vnode_t *vp;
5568 	vattr_t bva, ava, iva, cva, *vap;
5569 	vnode_t *dvp;
5570 	timespec32_t *mtime;
5571 	char *nm = NULL;
5572 	uint_t buflen;
5573 	bool_t created;
5574 	bool_t setsize = FALSE;
5575 	len_t reqsize;
5576 	int error;
5577 	bool_t trunc;
5578 	caller_context_t ct;
5579 	component4 *component;
5580 
5581 	sarg.sbp = &sb;
5582 
5583 	dvp = cs->vp;
5584 
5585 	/* Check if the file system is read only */
5586 	if (rdonly4(cs->exi, dvp, req))
5587 		return (NFS4ERR_ROFS);
5588 
5589 	/*
5590 	 * Get the last component of path name in nm. cs will reference
5591 	 * the including directory on success.
5592 	 */
5593 	component = &args->open_claim4_u.file;
5594 	if (!utf8_dir_verify(component))
5595 		return (NFS4ERR_INVAL);
5596 
5597 	nm = utf8_to_fn(component, &buflen, NULL);
5598 
5599 	if (nm == NULL)
5600 		return (NFS4ERR_RESOURCE);
5601 
5602 	if (buflen > MAXNAMELEN) {
5603 		kmem_free(nm, buflen);
5604 		return (NFS4ERR_NAMETOOLONG);
5605 	}
5606 
5607 	bva.va_mask = AT_TYPE|AT_CTIME|AT_SEQ;
5608 	error = VOP_GETATTR(dvp, &bva, 0, cs->cr);
5609 	if (error) {
5610 		kmem_free(nm, buflen);
5611 		return (puterrno4(error));
5612 	}
5613 
5614 	if (bva.va_type != VDIR) {
5615 		kmem_free(nm, buflen);
5616 		return (NFS4ERR_NOTDIR);
5617 	}
5618 
5619 	NFS4_SET_FATTR4_CHANGE(cinfo->before, bva.va_ctime)
5620 
5621 	switch (args->mode) {
5622 	case GUARDED4:
5623 		/*FALLTHROUGH*/
5624 	case UNCHECKED4:
5625 		nfs4_ntov_table_init(&ntov);
5626 		ntov_table_init = TRUE;
5627 
5628 		*attrset = 0;
5629 		status = do_rfs4_set_attrs(attrset,
5630 					&args->createhow4_u.createattrs,
5631 					cs, &sarg, &ntov, NFS4ATTR_SETIT);
5632 
5633 		if (status == NFS4_OK && (sarg.vap->va_mask & AT_TYPE) &&
5634 		    sarg.vap->va_type != VREG) {
5635 			if (sarg.vap->va_type == VDIR)
5636 				status = NFS4ERR_ISDIR;
5637 			else if (sarg.vap->va_type == VLNK)
5638 				status = NFS4ERR_SYMLINK;
5639 			else
5640 				status = NFS4ERR_INVAL;
5641 		}
5642 
5643 		if (status != NFS4_OK) {
5644 			kmem_free(nm, buflen);
5645 			nfs4_ntov_table_free(&ntov, &sarg);
5646 			*attrset = 0;
5647 			return (status);
5648 		}
5649 
5650 		vap = sarg.vap;
5651 		vap->va_type = VREG;
5652 		vap->va_mask |= AT_TYPE;
5653 
5654 		if ((vap->va_mask & AT_MODE) == 0) {
5655 			vap->va_mask |= AT_MODE;
5656 			vap->va_mode = (mode_t)0600;
5657 		}
5658 
5659 		if (vap->va_mask & AT_SIZE) {
5660 
5661 			/* Disallow create with a non-zero size */
5662 
5663 			if ((reqsize = sarg.vap->va_size) != 0) {
5664 				kmem_free(nm, buflen);
5665 				nfs4_ntov_table_free(&ntov, &sarg);
5666 				*attrset = 0;
5667 				return (NFS4ERR_INVAL);
5668 			}
5669 			setsize = TRUE;
5670 		}
5671 		break;
5672 
5673 	case EXCLUSIVE4:
5674 		/* prohibit EXCL create of named attributes */
5675 		if (dvp->v_flag & V_XATTRDIR) {
5676 			kmem_free(nm, buflen);
5677 			*attrset = 0;
5678 			return (NFS4ERR_INVAL);
5679 		}
5680 
5681 		cva.va_mask = AT_TYPE | AT_MTIME | AT_MODE;
5682 		cva.va_type = VREG;
5683 		/*
5684 		 * Ensure no time overflows. Assumes underlying
5685 		 * filesystem supports at least 32 bits.
5686 		 * Truncate nsec to usec resolution to allow valid
5687 		 * compares even if the underlying filesystem truncates.
5688 		 */
5689 		mtime = (timespec32_t *)&args->createhow4_u.createverf;
5690 		cva.va_mtime.tv_sec = mtime->tv_sec % TIME32_MAX;
5691 		cva.va_mtime.tv_nsec = (mtime->tv_nsec / 1000) * 1000;
5692 		cva.va_mode = (mode_t)0;
5693 		vap = &cva;
5694 		break;
5695 	}
5696 
5697 	status = create_vnode(dvp, nm, vap, args->mode, mtime,
5698 						cs->cr, &vp, &created);
5699 	kmem_free(nm, buflen);
5700 
5701 	if (status != NFS4_OK) {
5702 		if (ntov_table_init)
5703 			nfs4_ntov_table_free(&ntov, &sarg);
5704 		*attrset = 0;
5705 		return (status);
5706 	}
5707 
5708 	trunc = (setsize && !created);
5709 
5710 	if (args->mode != EXCLUSIVE4) {
5711 		bitmap4 createmask = args->createhow4_u.createattrs.attrmask;
5712 
5713 		/*
5714 		 * True verification that object was created with correct
5715 		 * attrs is impossible.  The attrs could have been changed
5716 		 * immediately after object creation.  If attributes did
5717 		 * not verify, the only recourse for the server is to
5718 		 * destroy the object.  Maybe if some attrs (like gid)
5719 		 * are set incorrectly, the object should be destroyed;
5720 		 * however, seems bad as a default policy.  Do we really
5721 		 * want to destroy an object over one of the times not
5722 		 * verifying correctly?  For these reasons, the server
5723 		 * currently sets bits in attrset for createattrs
5724 		 * that were set; however, no verification is done.
5725 		 *
5726 		 * vmask_to_nmask accounts for vattr bits set on create
5727 		 *	[do_rfs4_set_attrs() only sets resp bits for
5728 		 *	 non-vattr/vfs bits.]
5729 		 * Mask off any bits we set by default so as not to return
5730 		 * more attrset bits than were requested in createattrs
5731 		 */
5732 		if (created) {
5733 			nfs4_vmask_to_nmask(sarg.vap->va_mask, attrset);
5734 			*attrset &= createmask;
5735 		} else {
5736 			/*
5737 			 * We did not create the vnode (we tried but it
5738 			 * already existed).  In this case, the only createattr
5739 			 * that the spec allows the server to set is size,
5740 			 * and even then, it can only be set if it is 0.
5741 			 */
5742 			*attrset = 0;
5743 			if (trunc)
5744 				*attrset = FATTR4_SIZE_MASK;
5745 		}
5746 	}
5747 	if (ntov_table_init)
5748 		nfs4_ntov_table_free(&ntov, &sarg);
5749 
5750 	/*
5751 	 * Get the initial "after" sequence number, if it fails,
5752 	 * set to zero, time to before.
5753 	 */
5754 	iva.va_mask = AT_CTIME|AT_SEQ;
5755 	if (VOP_GETATTR(dvp, &iva, 0, cs->cr)) {
5756 		iva.va_seq = 0;
5757 		iva.va_ctime = bva.va_ctime;
5758 	}
5759 
5760 	/*
5761 	 * create_vnode attempts to create the file exclusive,
5762 	 * if it already exists the VOP_CREATE will fail and
5763 	 * may not increase va_seq. It is atomic if
5764 	 * we haven't changed the directory, but if it has changed
5765 	 * we don't know what changed it.
5766 	 */
5767 	if (!created) {
5768 		if (bva.va_seq && iva.va_seq &&
5769 			bva.va_seq == iva.va_seq)
5770 			cinfo->atomic = TRUE;
5771 		else
5772 			cinfo->atomic = FALSE;
5773 		NFS4_SET_FATTR4_CHANGE(cinfo->after, iva.va_ctime);
5774 	} else {
5775 		/*
5776 		 * The entry was created, we need to sync the
5777 		 * directory metadata.
5778 		 */
5779 		(void) VOP_FSYNC(dvp, 0, cs->cr);
5780 
5781 		/*
5782 		 * Get "after" change value, if it fails, simply return the
5783 		 * before value.
5784 		 */
5785 		ava.va_mask = AT_CTIME|AT_SEQ;
5786 		if (VOP_GETATTR(dvp, &ava, 0, cs->cr)) {
5787 			ava.va_ctime = bva.va_ctime;
5788 			ava.va_seq = 0;
5789 		}
5790 
5791 		NFS4_SET_FATTR4_CHANGE(cinfo->after, ava.va_ctime);
5792 
5793 		/*
5794 		 * The cinfo->atomic = TRUE only if we have
5795 		 * non-zero va_seq's, and it has incremented by exactly one
5796 		 * during the create_vnode and it didn't
5797 		 * change during the VOP_FSYNC.
5798 		 */
5799 		if (bva.va_seq && iva.va_seq && ava.va_seq &&
5800 				iva.va_seq == (bva.va_seq + 1) &&
5801 				iva.va_seq == ava.va_seq)
5802 			cinfo->atomic = TRUE;
5803 		else
5804 			cinfo->atomic = FALSE;
5805 	}
5806 
5807 	/* Check for mandatory locking and that the size gets set. */
5808 	cva.va_mask = AT_MODE;
5809 	if (setsize)
5810 		cva.va_mask |= AT_SIZE;
5811 
5812 	/* Assume the worst */
5813 	cs->mandlock = TRUE;
5814 
5815 	if (VOP_GETATTR(vp, &cva, 0, cs->cr) == 0) {
5816 		cs->mandlock = MANDLOCK(cs->vp, cva.va_mode);
5817 
5818 		/*
5819 		 * Truncate the file if necessary; this would be
5820 		 * the case for create over an existing file.
5821 		 */
5822 
5823 		if (trunc) {
5824 			int in_crit = 0;
5825 			rfs4_file_t *fp;
5826 			bool_t create = FALSE;
5827 
5828 			/*
5829 			 * We are writing over an existing file.
5830 			 * Check to see if we need to recall a delegation.
5831 			 */
5832 			rfs4_hold_deleg_policy();
5833 			if ((fp = rfs4_findfile(vp, NULL, &create)) != NULL) {
5834 				if (rfs4_check_delegated_byfp(FWRITE, fp,
5835 					(reqsize == 0), FALSE, FALSE,
5836 							&clientid)) {
5837 
5838 					rfs4_file_rele(fp);
5839 					rfs4_rele_deleg_policy();
5840 					VN_RELE(vp);
5841 					*attrset = 0;
5842 					return (NFS4ERR_DELAY);
5843 				}
5844 				rfs4_file_rele(fp);
5845 			}
5846 			rfs4_rele_deleg_policy();
5847 
5848 			if (nbl_need_check(vp)) {
5849 				in_crit = 1;
5850 
5851 				ASSERT(reqsize == 0);
5852 
5853 				nbl_start_crit(vp, RW_READER);
5854 				if (nbl_conflict(vp, NBL_WRITE, 0,
5855 						cva.va_size, 0)) {
5856 					in_crit = 0;
5857 					nbl_end_crit(vp);
5858 					VN_RELE(vp);
5859 					*attrset = 0;
5860 					return (NFS4ERR_ACCESS);
5861 				}
5862 			}
5863 			ct.cc_sysid = 0;
5864 			ct.cc_pid = 0;
5865 			ct.cc_caller_id = nfs4_srv_caller_id;
5866 
5867 			cva.va_mask = AT_SIZE;
5868 			cva.va_size = reqsize;
5869 			(void) VOP_SETATTR(vp, &cva, 0, cs->cr, &ct);
5870 			if (in_crit)
5871 				nbl_end_crit(vp);
5872 		}
5873 	}
5874 
5875 	error = makefh4(&cs->fh, vp, cs->exi);
5876 
5877 	/*
5878 	 * Force modified data and metadata out to stable storage.
5879 	 */
5880 	(void) VOP_FSYNC(vp, FNODSYNC, cs->cr);
5881 
5882 	if (error) {
5883 		VN_RELE(vp);
5884 		*attrset = 0;
5885 		return (puterrno4(error));
5886 	}
5887 
5888 	/* if parent dir is attrdir, set namedattr fh flag */
5889 	if (dvp->v_flag & V_XATTRDIR)
5890 		set_fh4_flag(&cs->fh, FH4_NAMEDATTR);
5891 
5892 	if (cs->vp)
5893 		VN_RELE(cs->vp);
5894 
5895 	cs->vp = vp;
5896 
5897 	/*
5898 	 * if we did not create the file, we will need to check
5899 	 * the access bits on the file
5900 	 */
5901 
5902 	if (!created) {
5903 		if (setsize)
5904 			args->share_access |= OPEN4_SHARE_ACCESS_WRITE;
5905 		status = check_open_access(args->share_access, cs, req);
5906 		if (status != NFS4_OK)
5907 			*attrset = 0;
5908 	}
5909 	return (status);
5910 }
5911 
5912 /*ARGSUSED*/
5913 static void
5914 rfs4_do_open(struct compound_state *cs, struct svc_req *req,
5915 		rfs4_openowner_t *oo, delegreq_t deleg,
5916 		uint32_t access, uint32_t deny,
5917 		OPEN4res *resp)
5918 {
5919 	/* XXX Currently not using req  */
5920 	rfs4_state_t *state;
5921 	rfs4_file_t *file;
5922 	bool_t screate = TRUE;
5923 	bool_t fcreate = TRUE;
5924 	uint32_t amodes;
5925 	uint32_t dmodes;
5926 	rfs4_deleg_state_t *dsp;
5927 	struct shrlock shr;
5928 	struct shr_locowner shr_loco;
5929 	sysid_t sysid;
5930 	nfsstat4 status;
5931 	int fflags = 0;
5932 	int recall = 0;
5933 	int err;
5934 
5935 	/* get the file struct and hold a lock on it during initial open */
5936 	file = rfs4_findfile_withlock(cs->vp, &cs->fh, &fcreate);
5937 	if (file == NULL) {
5938 		NFS4_DEBUG(rfs4_debug,
5939 			(CE_NOTE, "rfs4_do_open: can't find file"));
5940 		resp->status = NFS4ERR_SERVERFAULT;
5941 		return;
5942 	}
5943 
5944 	state = rfs4_findstate_by_owner_file(oo, file, &screate);
5945 	if (state == NULL) {
5946 		NFS4_DEBUG(rfs4_debug,
5947 			(CE_NOTE, "rfs4_do_open: can't find state"));
5948 		resp->status = NFS4ERR_RESOURCE;
5949 		/* No need to keep any reference */
5950 		rfs4_file_rele_withunlock(file);
5951 		return;
5952 	}
5953 
5954 	/*
5955 	 * Check for conflicts in deny and access before checking for
5956 	 * conflicts in delegation.  We don't want to recall a
5957 	 * delegation based on an open that will eventually fail based
5958 	 * on shares modes.
5959 	 */
5960 
5961 	shr.s_access = (short)access;
5962 	shr.s_deny = (short)deny;
5963 	shr.s_pid = rfs4_dbe_getid(oo->dbe);
5964 
5965 	if ((status = rfs4_client_sysid(oo->client, &sysid)) != NFS4_OK) {
5966 		resp->status = status;
5967 		rfs4_file_rele(file);
5968 		/* Not a fully formed open; "close" it */
5969 		if (screate == TRUE)
5970 			rfs4_state_close(state, FALSE, FALSE, cs->cr);
5971 		rfs4_state_rele(state);
5972 		return;
5973 	}
5974 	shr.s_sysid = sysid;
5975 	shr_loco.sl_pid = shr.s_pid;
5976 	shr_loco.sl_id = shr.s_sysid;
5977 	shr.s_owner = (caddr_t)&shr_loco;
5978 	shr.s_own_len = sizeof (shr_loco);
5979 
5980 	fflags = 0;
5981 	if (access & OPEN4_SHARE_ACCESS_READ)
5982 		fflags |= FREAD;
5983 	if (access & OPEN4_SHARE_ACCESS_WRITE)
5984 		fflags |= FWRITE;
5985 
5986 	if ((err = vop_shrlock(cs->vp, F_SHARE, &shr, fflags)) != 0) {
5987 
5988 		resp->status = err == EAGAIN ?
5989 			NFS4ERR_SHARE_DENIED : puterrno4(err);
5990 
5991 		rfs4_file_rele(file);
5992 		/* Not a fully formed open; "close" it */
5993 		if (screate == TRUE)
5994 			rfs4_state_close(state, FALSE, FALSE, cs->cr);
5995 		rfs4_state_rele(state);
5996 		return;
5997 	}
5998 
5999 	rfs4_dbe_lock(state->dbe);
6000 	rfs4_dbe_lock(file->dbe);
6001 
6002 	/*
6003 	 * Calculate the new deny and access mode that this open is adding to
6004 	 * the file for this open owner;
6005 	 */
6006 	dmodes = (deny & ~state->share_deny);
6007 	amodes = (access & ~state->share_access);
6008 
6009 	/*
6010 	 * Check to see if this file is delegated and if so, if a
6011 	 * recall needs to be done.
6012 	 */
6013 	if (rfs4_check_recall(state, access)) {
6014 		rfs4_dbe_unlock(file->dbe);
6015 		rfs4_dbe_unlock(state->dbe);
6016 		rfs4_recall_deleg(file, FALSE, state->owner->client);
6017 		delay(NFS4_DELEGATION_CONFLICT_DELAY);
6018 		rfs4_dbe_lock(state->dbe);
6019 		rfs4_dbe_lock(file->dbe);
6020 		/* Let's see if the delegation was returned */
6021 		if (rfs4_check_recall(state, access)) {
6022 			rfs4_dbe_unlock(file->dbe);
6023 			rfs4_dbe_unlock(state->dbe);
6024 			rfs4_file_rele(file);
6025 			rfs4_update_lease(state->owner->client);
6026 			/* recalculate flags to match what was added */
6027 			fflags = 0;
6028 			if (amodes & OPEN4_SHARE_ACCESS_READ)
6029 				fflags |= FREAD;
6030 			if (amodes & OPEN4_SHARE_ACCESS_WRITE)
6031 				fflags |= FWRITE;
6032 			(void) vop_shrlock(cs->vp, F_UNSHARE, &shr, fflags);
6033 			/* Not a fully formed open; "close" it */
6034 			if (screate == TRUE)
6035 				rfs4_state_close(state, FALSE, FALSE, cs->cr);
6036 			rfs4_state_rele(state);
6037 			resp->status = NFS4ERR_DELAY;
6038 			return;
6039 		}
6040 	}
6041 
6042 	if (dmodes & OPEN4_SHARE_DENY_READ)
6043 		file->deny_read++;
6044 	if (dmodes & OPEN4_SHARE_DENY_WRITE)
6045 		file->deny_write++;
6046 	file->share_deny |= deny;
6047 	state->share_deny |= deny;
6048 
6049 	if (amodes & OPEN4_SHARE_ACCESS_READ)
6050 		file->access_read++;
6051 	if (amodes & OPEN4_SHARE_ACCESS_WRITE)
6052 		file->access_write++;
6053 	file->share_access |= access;
6054 	state->share_access |= access;
6055 
6056 	/*
6057 	 * Check for delegation here. if the deleg argument is not
6058 	 * DELEG_ANY, then this is a reclaim from a client and
6059 	 * we must honor the delegation requested. If necessary we can
6060 	 * set the recall flag.
6061 	 */
6062 
6063 	dsp = rfs4_grant_delegation(deleg, state, &recall);
6064 
6065 	cs->deleg = (file->dinfo->dtype == OPEN_DELEGATE_WRITE);
6066 
6067 	next_stateid(&state->stateid);
6068 
6069 	resp->stateid = state->stateid.stateid;
6070 
6071 	rfs4_dbe_unlock(file->dbe);
6072 	rfs4_dbe_unlock(state->dbe);
6073 
6074 	if (dsp) {
6075 		rfs4_set_deleg_response(dsp, &resp->delegation, NULL, recall);
6076 		rfs4_deleg_state_rele(dsp);
6077 	}
6078 
6079 	rfs4_file_rele(file);
6080 	rfs4_state_rele(state);
6081 
6082 	resp->status = NFS4_OK;
6083 }
6084 
6085 /*ARGSUSED*/
6086 static void
6087 rfs4_do_opennull(struct compound_state *cs, struct svc_req *req,
6088 		OPEN4args *args, rfs4_openowner_t *oo, OPEN4res *resp)
6089 {
6090 	change_info4 *cinfo = &resp->cinfo;
6091 	bitmap4 *attrset = &resp->attrset;
6092 
6093 	if (args->opentype == OPEN4_NOCREATE)
6094 		resp->status = rfs4_lookupfile(&args->open_claim4_u.file,
6095 					req, cs, args->share_access, cinfo);
6096 	else {
6097 		/* inhibit delegation grants during exclusive create */
6098 
6099 		if (args->mode == EXCLUSIVE4)
6100 			rfs4_disable_delegation();
6101 
6102 		resp->status = rfs4_createfile(args, req, cs, cinfo, attrset,
6103 					oo->client->clientid);
6104 	}
6105 
6106 	if (resp->status == NFS4_OK) {
6107 
6108 		/* cs->vp cs->fh now reference the desired file */
6109 
6110 		rfs4_do_open(cs, req, oo, DELEG_ANY, args->share_access,
6111 						args->share_deny, resp);
6112 
6113 		/*
6114 		 * If rfs4_createfile set attrset, we must
6115 		 * clear this attrset before the response is copied.
6116 		 */
6117 		if (resp->status != NFS4_OK && resp->attrset) {
6118 			resp->attrset = 0;
6119 		}
6120 	}
6121 	else
6122 		*cs->statusp = resp->status;
6123 
6124 	if (args->mode == EXCLUSIVE4)
6125 		rfs4_enable_delegation();
6126 }
6127 
6128 /*ARGSUSED*/
6129 static void
6130 rfs4_do_openprev(struct compound_state *cs, struct svc_req *req,
6131 		OPEN4args *args, rfs4_openowner_t *oo, OPEN4res *resp)
6132 {
6133 	change_info4 *cinfo = &resp->cinfo;
6134 	vattr_t va;
6135 	vtype_t v_type = cs->vp->v_type;
6136 	int error = 0;
6137 
6138 	/* Verify that we have a regular file */
6139 	if (v_type != VREG) {
6140 		if (v_type == VDIR)
6141 			resp->status = NFS4ERR_ISDIR;
6142 		else if (v_type == VLNK)
6143 			resp->status = NFS4ERR_SYMLINK;
6144 		else
6145 			resp->status = NFS4ERR_INVAL;
6146 		return;
6147 	}
6148 
6149 	va.va_mask = AT_MODE|AT_UID;
6150 	error = VOP_GETATTR(cs->vp, &va, 0, cs->cr);
6151 	if (error) {
6152 		resp->status = puterrno4(error);
6153 		return;
6154 	}
6155 
6156 	cs->mandlock = MANDLOCK(cs->vp, va.va_mode);
6157 
6158 	/*
6159 	 * Check if we have access to the file, Note the the file
6160 	 * could have originally been open UNCHECKED or GUARDED
6161 	 * with mode bits that will now fail, but there is nothing
6162 	 * we can really do about that except in the case that the
6163 	 * owner of the file is the one requesting the open.
6164 	 */
6165 	if (crgetuid(cs->cr) != va.va_uid) {
6166 		resp->status = check_open_access(args->share_access, cs, req);
6167 		if (resp->status != NFS4_OK) {
6168 			return;
6169 		}
6170 	}
6171 
6172 	/*
6173 	 * cinfo on a CLAIM_PREVIOUS is undefined, initialize to zero
6174 	 */
6175 	cinfo->before = 0;
6176 	cinfo->after = 0;
6177 	cinfo->atomic = FALSE;
6178 
6179 	rfs4_do_open(cs, req, oo,
6180 		NFS4_DELEG4TYPE2REQTYPE(args->open_claim4_u.delegate_type),
6181 		args->share_access, args->share_deny, resp);
6182 }
6183 
6184 static void
6185 rfs4_do_opendelcur(struct compound_state *cs, struct svc_req *req,
6186 		OPEN4args *args, rfs4_openowner_t *oo, OPEN4res *resp)
6187 {
6188 	int error;
6189 	nfsstat4 status;
6190 	stateid4 stateid =
6191 			args->open_claim4_u.delegate_cur_info.delegate_stateid;
6192 	rfs4_deleg_state_t *dsp;
6193 
6194 	/*
6195 	 * Find the state info from the stateid and confirm that the
6196 	 * file is delegated.  If the state openowner is the same as
6197 	 * the supplied openowner we're done. If not, get the file
6198 	 * info from the found state info. Use that file info to
6199 	 * create the state for this lock owner. Note solaris doen't
6200 	 * really need the pathname to find the file. We may want to
6201 	 * lookup the pathname and make sure that the vp exist and
6202 	 * matches the vp in the file structure. However it is
6203 	 * possible that the pathname nolonger exists (local process
6204 	 * unlinks the file), so this may not be that useful.
6205 	 */
6206 
6207 	status = rfs4_get_deleg_state(&stateid, &dsp);
6208 	if (status != NFS4_OK) {
6209 		resp->status = status;
6210 		return;
6211 	}
6212 
6213 	ASSERT(dsp->finfo->dinfo->dtype != OPEN_DELEGATE_NONE);
6214 
6215 	/*
6216 	 * New lock owner, create state. Since this was probably called
6217 	 * in response to a CB_RECALL we set deleg to DELEG_NONE
6218 	 */
6219 
6220 	ASSERT(cs->vp != NULL);
6221 	VN_RELE(cs->vp);
6222 	VN_HOLD(dsp->finfo->vp);
6223 	cs->vp = dsp->finfo->vp;
6224 
6225 	if (error = makefh4(&cs->fh, cs->vp, cs->exi)) {
6226 		rfs4_deleg_state_rele(dsp);
6227 		*cs->statusp = resp->status = puterrno4(error);
6228 		return;
6229 	}
6230 
6231 	/* Mark progress for delegation returns */
6232 	dsp->finfo->dinfo->time_lastwrite = gethrestime_sec();
6233 	rfs4_deleg_state_rele(dsp);
6234 	rfs4_do_open(cs, req, oo, DELEG_NONE,
6235 				args->share_access, args->share_deny, resp);
6236 }
6237 
6238 /*ARGSUSED*/
6239 static void
6240 rfs4_do_opendelprev(struct compound_state *cs, struct svc_req *req,
6241 			OPEN4args *args, rfs4_openowner_t *oo, OPEN4res *resp)
6242 {
6243 	/*
6244 	 * Lookup the pathname, it must already exist since this file
6245 	 * was delegated.
6246 	 *
6247 	 * Find the file and state info for this vp and open owner pair.
6248 	 *	check that they are in fact delegated.
6249 	 *	check that the state access and deny modes are the same.
6250 	 *
6251 	 * Return the delgation possibly seting the recall flag.
6252 	 */
6253 	rfs4_file_t *file;
6254 	rfs4_state_t *state;
6255 	bool_t create = FALSE;
6256 	bool_t dcreate = FALSE;
6257 	rfs4_deleg_state_t *dsp;
6258 	nfsace4 *ace;
6259 
6260 
6261 	/* Note we ignore oflags */
6262 	resp->status = rfs4_lookupfile(&args->open_claim4_u.file_delegate_prev,
6263 				req, cs, args->share_access, &resp->cinfo);
6264 
6265 	if (resp->status != NFS4_OK) {
6266 		return;
6267 	}
6268 
6269 	/* get the file struct and hold a lock on it during initial open */
6270 	file = rfs4_findfile_withlock(cs->vp, NULL, &create);
6271 	if (file == NULL) {
6272 		NFS4_DEBUG(rfs4_debug,
6273 			(CE_NOTE, "rfs4_do_opendelprev: can't find file"));
6274 		resp->status = NFS4ERR_SERVERFAULT;
6275 		return;
6276 	}
6277 
6278 	state = rfs4_findstate_by_owner_file(oo, file, &create);
6279 	if (state == NULL) {
6280 		NFS4_DEBUG(rfs4_debug,
6281 			(CE_NOTE, "rfs4_do_opendelprev: can't find state"));
6282 		resp->status = NFS4ERR_SERVERFAULT;
6283 		rfs4_file_rele_withunlock(file);
6284 		return;
6285 	}
6286 
6287 	rfs4_dbe_lock(state->dbe);
6288 	rfs4_dbe_lock(file->dbe);
6289 	if (args->share_access != state->share_access ||
6290 			args->share_deny != state->share_deny ||
6291 			state->finfo->dinfo->dtype == OPEN_DELEGATE_NONE) {
6292 		NFS4_DEBUG(rfs4_debug,
6293 			(CE_NOTE, "rfs4_do_opendelprev: state mixup"));
6294 		rfs4_dbe_unlock(file->dbe);
6295 		rfs4_dbe_unlock(state->dbe);
6296 		rfs4_file_rele(file);
6297 		rfs4_state_rele(state);
6298 		resp->status = NFS4ERR_SERVERFAULT;
6299 		return;
6300 	}
6301 	rfs4_dbe_unlock(file->dbe);
6302 	rfs4_dbe_unlock(state->dbe);
6303 
6304 	dsp = rfs4_finddeleg(state, &dcreate);
6305 	if (dsp == NULL) {
6306 		rfs4_state_rele(state);
6307 		rfs4_file_rele(file);
6308 		resp->status = NFS4ERR_SERVERFAULT;
6309 		return;
6310 	}
6311 
6312 	next_stateid(&state->stateid);
6313 
6314 	resp->stateid = state->stateid.stateid;
6315 
6316 	resp->delegation.delegation_type = dsp->dtype;
6317 
6318 	if (dsp->dtype == OPEN_DELEGATE_READ) {
6319 		open_read_delegation4 *rv =
6320 			&resp->delegation.open_delegation4_u.read;
6321 
6322 		rv->stateid = dsp->delegid.stateid;
6323 		rv->recall = FALSE; /* no policy in place to set to TRUE */
6324 		ace = &rv->permissions;
6325 	} else {
6326 		open_write_delegation4 *rv =
6327 			&resp->delegation.open_delegation4_u.write;
6328 
6329 		rv->stateid = dsp->delegid.stateid;
6330 		rv->recall = FALSE;  /* no policy in place to set to TRUE */
6331 		ace = &rv->permissions;
6332 		rv->space_limit.limitby = NFS_LIMIT_SIZE;
6333 		rv->space_limit.nfs_space_limit4_u.filesize = UINT64_MAX;
6334 	}
6335 
6336 	/* XXX For now */
6337 	ace->type = ACE4_ACCESS_ALLOWED_ACE_TYPE;
6338 	ace->flag = 0;
6339 	ace->access_mask = 0;
6340 	ace->who.utf8string_len = 0;
6341 	ace->who.utf8string_val = 0;
6342 
6343 	rfs4_deleg_state_rele(dsp);
6344 	rfs4_state_rele(state);
6345 	rfs4_file_rele(file);
6346 }
6347 
6348 typedef enum {
6349 	NFS4_CHKSEQ_OKAY = 0,
6350 	NFS4_CHKSEQ_REPLAY = 1,
6351 	NFS4_CHKSEQ_BAD = 2
6352 } rfs4_chkseq_t;
6353 
6354 /*
6355  * Generic function for sequence number checks.
6356  */
6357 static rfs4_chkseq_t
6358 rfs4_check_seqid(seqid4 seqid, nfs_resop4 *lastop,
6359 		seqid4 rqst_seq, nfs_resop4 *resop, bool_t copyres)
6360 {
6361 	/* Same sequence ids and matching operations? */
6362 	if (seqid == rqst_seq && resop->resop == lastop->resop) {
6363 		if (copyres == TRUE) {
6364 			rfs4_free_reply(resop);
6365 			rfs4_copy_reply(resop, lastop);
6366 		}
6367 		NFS4_DEBUG(rfs4_debug, (CE_NOTE,
6368 			"Replayed SEQID %d\n", seqid));
6369 		return (NFS4_CHKSEQ_REPLAY);
6370 	}
6371 
6372 	/* If the incoming sequence is not the next expected then it is bad */
6373 	if (rqst_seq != seqid + 1) {
6374 		if (rqst_seq == seqid) {
6375 			NFS4_DEBUG(rfs4_debug,
6376 				(CE_NOTE, "BAD SEQID: Replayed sequence id "
6377 				"but last op was %d current op is %d\n",
6378 				lastop->resop, resop->resop));
6379 			return (NFS4_CHKSEQ_BAD);
6380 		}
6381 		NFS4_DEBUG(rfs4_debug,
6382 			(CE_NOTE, "BAD SEQID: got %u expecting %u\n",
6383 				rqst_seq, seqid));
6384 		return (NFS4_CHKSEQ_BAD);
6385 	}
6386 
6387 	/* Everything okay -- next expected */
6388 	return (NFS4_CHKSEQ_OKAY);
6389 }
6390 
6391 
6392 static rfs4_chkseq_t
6393 rfs4_check_open_seqid(seqid4 seqid, rfs4_openowner_t *op, nfs_resop4 *resop)
6394 {
6395 	rfs4_chkseq_t rc;
6396 
6397 	rfs4_dbe_lock(op->dbe);
6398 	rc = rfs4_check_seqid(op->open_seqid, op->reply, seqid, resop, TRUE);
6399 	rfs4_dbe_unlock(op->dbe);
6400 
6401 	if (rc == NFS4_CHKSEQ_OKAY)
6402 		rfs4_update_lease(op->client);
6403 
6404 	return (rc);
6405 }
6406 
6407 static rfs4_chkseq_t
6408 rfs4_check_olo_seqid(seqid4 olo_seqid, rfs4_openowner_t *op,
6409 	nfs_resop4 *resop)
6410 {
6411 	rfs4_chkseq_t rc;
6412 
6413 	rfs4_dbe_lock(op->dbe);
6414 	rc = rfs4_check_seqid(op->open_seqid, op->reply,
6415 		olo_seqid, resop, FALSE);
6416 	rfs4_dbe_unlock(op->dbe);
6417 
6418 	return (rc);
6419 }
6420 
6421 static rfs4_chkseq_t
6422 rfs4_check_lock_seqid(seqid4 seqid, rfs4_lo_state_t *lp, nfs_resop4 *resop)
6423 {
6424 	rfs4_chkseq_t rc = NFS4_CHKSEQ_OKAY;
6425 
6426 	rfs4_dbe_lock(lp->dbe);
6427 	if (!lp->skip_seqid_check)
6428 		rc = rfs4_check_seqid(lp->seqid, lp->reply,
6429 			seqid, resop, TRUE);
6430 	rfs4_dbe_unlock(lp->dbe);
6431 
6432 	return (rc);
6433 }
6434 
6435 static void
6436 rfs4_op_open(nfs_argop4 *argop, nfs_resop4 *resop,
6437 	    struct svc_req *req, struct compound_state *cs)
6438 {
6439 	OPEN4args *args = &argop->nfs_argop4_u.opopen;
6440 	OPEN4res *resp = &resop->nfs_resop4_u.opopen;
6441 	open_owner4 *owner = &args->owner;
6442 	open_claim_type4 claim = args->claim;
6443 	rfs4_client_t *cp;
6444 	rfs4_openowner_t *oo;
6445 	bool_t create;
6446 	bool_t replay = FALSE;
6447 	int can_reclaim;
6448 
6449 
6450 	if (cs->vp == NULL) {
6451 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
6452 		return;
6453 	}
6454 
6455 	/*
6456 	 * Need to check clientid and lease expiration first based on
6457 	 * error ordering and incrementing sequence id.
6458 	 */
6459 	cp = rfs4_findclient_by_id(owner->clientid, FALSE);
6460 	if (cp == NULL) {
6461 		*cs->statusp = resp->status =
6462 			rfs4_check_clientid(&owner->clientid, 0);
6463 		return;
6464 	}
6465 
6466 	if (rfs4_lease_expired(cp)) {
6467 		rfs4_client_close(cp);
6468 		*cs->statusp = resp->status = NFS4ERR_EXPIRED;
6469 		return;
6470 	}
6471 	can_reclaim = cp->can_reclaim;
6472 
6473 	/*
6474 	 * Find the open_owner for use from this point forward.  Take
6475 	 * care in updating the sequence id based on the type of error
6476 	 * being returned.
6477 	 */
6478 retry:
6479 	create = TRUE;
6480 	oo = rfs4_findopenowner(owner, &create, args->seqid);
6481 	if (oo == NULL) {
6482 		*cs->statusp = resp->status = NFS4ERR_STALE_CLIENTID;
6483 		rfs4_client_rele(cp);
6484 		return;
6485 	}
6486 
6487 	/* Hold off access to the sequence space while the open is done */
6488 	rfs4_sw_enter(&oo->oo_sw);
6489 
6490 	/*
6491 	 * If the open_owner existed before at the server, then check
6492 	 * the sequence id.
6493 	 */
6494 	if (!create && !oo->postpone_confirm) {
6495 		switch (rfs4_check_open_seqid(args->seqid, oo, resop)) {
6496 		case NFS4_CHKSEQ_BAD:
6497 			if ((args->seqid > oo->open_seqid) &&
6498 				oo->need_confirm) {
6499 				rfs4_free_opens(oo, TRUE, FALSE);
6500 				rfs4_sw_exit(&oo->oo_sw);
6501 				rfs4_openowner_rele(oo);
6502 				goto retry;
6503 			}
6504 			resp->status = NFS4ERR_BAD_SEQID;
6505 			goto out;
6506 		case NFS4_CHKSEQ_REPLAY: /* replay of previous request */
6507 			replay = TRUE;
6508 			goto out;
6509 		default:
6510 			break;
6511 		}
6512 
6513 		/*
6514 		 * Sequence was ok and open owner exists
6515 		 * check to see if we have yet to see an
6516 		 * open_confirm.
6517 		 */
6518 		if (oo->need_confirm) {
6519 			rfs4_free_opens(oo, TRUE, FALSE);
6520 			rfs4_sw_exit(&oo->oo_sw);
6521 			rfs4_openowner_rele(oo);
6522 			goto retry;
6523 		}
6524 	}
6525 	/* Grace only applies to regular-type OPENs */
6526 	if (rfs4_clnt_in_grace(cp) &&
6527 	    (claim == CLAIM_NULL || claim == CLAIM_DELEGATE_CUR)) {
6528 		*cs->statusp = resp->status = NFS4ERR_GRACE;
6529 		goto out;
6530 	}
6531 
6532 	/*
6533 	 * If previous state at the server existed then can_reclaim
6534 	 * will be set. If not reply NFS4ERR_NO_GRACE to the
6535 	 * client.
6536 	 */
6537 	if (rfs4_clnt_in_grace(cp) && claim == CLAIM_PREVIOUS && !can_reclaim) {
6538 		*cs->statusp = resp->status = NFS4ERR_NO_GRACE;
6539 		goto out;
6540 	}
6541 
6542 
6543 	/*
6544 	 * Reject the open if the client has missed the grace period
6545 	 */
6546 	if (!rfs4_clnt_in_grace(cp) && claim == CLAIM_PREVIOUS) {
6547 		*cs->statusp = resp->status = NFS4ERR_NO_GRACE;
6548 		goto out;
6549 	}
6550 
6551 	/* Couple of up-front bookkeeping items */
6552 	if (oo->need_confirm) {
6553 		/*
6554 		 * If this is a reclaim OPEN then we should not ask
6555 		 * for a confirmation of the open_owner per the
6556 		 * protocol specification.
6557 		 */
6558 		if (claim == CLAIM_PREVIOUS)
6559 			oo->need_confirm = FALSE;
6560 		else
6561 			resp->rflags |= OPEN4_RESULT_CONFIRM;
6562 	}
6563 	resp->rflags |= OPEN4_RESULT_LOCKTYPE_POSIX;
6564 
6565 	/*
6566 	 * If there is an unshared filesystem mounted on this vnode,
6567 	 * do not allow to open/create in this directory.
6568 	 */
6569 	if (vn_ismntpt(cs->vp)) {
6570 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
6571 		goto out;
6572 	}
6573 
6574 	/*
6575 	 * access must READ, WRITE, or BOTH.  No access is invalid.
6576 	 * deny can be READ, WRITE, BOTH, or NONE.
6577 	 * bits not defined for access/deny are invalid.
6578 	 */
6579 	if (! (args->share_access & OPEN4_SHARE_ACCESS_BOTH) ||
6580 	    (args->share_access & ~OPEN4_SHARE_ACCESS_BOTH) ||
6581 	    (args->share_deny & ~OPEN4_SHARE_DENY_BOTH)) {
6582 		*cs->statusp = resp->status = NFS4ERR_INVAL;
6583 		goto out;
6584 	}
6585 
6586 
6587 	/*
6588 	 * make sure attrset is zero before response is built.
6589 	 */
6590 	resp->attrset = 0;
6591 
6592 	switch (claim) {
6593 	case CLAIM_NULL:
6594 		rfs4_do_opennull(cs, req, args, oo, resp);
6595 	    break;
6596 	case CLAIM_PREVIOUS:
6597 		rfs4_do_openprev(cs, req, args, oo, resp);
6598 	    break;
6599 	case CLAIM_DELEGATE_CUR:
6600 		rfs4_do_opendelcur(cs, req, args, oo, resp);
6601 	    break;
6602 	case CLAIM_DELEGATE_PREV:
6603 		rfs4_do_opendelprev(cs, req, args, oo, resp);
6604 	    break;
6605 	default:
6606 		resp->status = NFS4ERR_INVAL;
6607 		break;
6608 	}
6609 
6610 out:
6611 	rfs4_client_rele(cp);
6612 
6613 	/* Catch sequence id handling here to make it a little easier */
6614 	switch (resp->status) {
6615 	case NFS4ERR_BADXDR:
6616 	case NFS4ERR_BAD_SEQID:
6617 	case NFS4ERR_BAD_STATEID:
6618 	case NFS4ERR_NOFILEHANDLE:
6619 	case NFS4ERR_RESOURCE:
6620 	case NFS4ERR_STALE_CLIENTID:
6621 	case NFS4ERR_STALE_STATEID:
6622 		/*
6623 		 * The protocol states that if any of these errors are
6624 		 * being returned, the sequence id should not be
6625 		 * incremented.  Any other return requires an
6626 		 * increment.
6627 		 */
6628 		break;
6629 	default:
6630 		/* Always update the lease in this case */
6631 		rfs4_update_lease(oo->client);
6632 
6633 		/* Regular response - copy the result */
6634 		if (!replay)
6635 			rfs4_update_open_resp(oo, resop, &cs->fh);
6636 
6637 		/*
6638 		 * REPLAY case: Only if the previous response was OK
6639 		 * do we copy the filehandle.  If not OK, no
6640 		 * filehandle to copy.
6641 		 */
6642 		if (replay == TRUE &&
6643 		    resp->status == NFS4_OK &&
6644 		    oo->reply_fh.nfs_fh4_val) {
6645 			/*
6646 			 * If this is a replay, we must restore the
6647 			 * current filehandle/vp to that of what was
6648 			 * returned originally.  Try our best to do
6649 			 * it.
6650 			 */
6651 			nfs_fh4_fmt_t *fh_fmtp =
6652 				(nfs_fh4_fmt_t *)oo->reply_fh.nfs_fh4_val;
6653 
6654 			cs->exi = checkexport4(&fh_fmtp->fh4_fsid,
6655 				(fid_t *)&fh_fmtp->fh4_xlen, NULL);
6656 
6657 			if (cs->exi == NULL) {
6658 				resp->status = NFS4ERR_STALE;
6659 				goto finish;
6660 			}
6661 
6662 			VN_RELE(cs->vp);
6663 
6664 			cs->vp = nfs4_fhtovp(&oo->reply_fh, cs->exi,
6665 				&resp->status);
6666 
6667 			if (cs->vp == NULL)
6668 				goto finish;
6669 
6670 			nfs_fh4_copy(&oo->reply_fh, &cs->fh);
6671 		}
6672 
6673 		/*
6674 		 * If this was a replay, no need to update the
6675 		 * sequence id. If the open_owner was not created on
6676 		 * this pass, then update.  The first use of an
6677 		 * open_owner will not bump the sequence id.
6678 		 */
6679 		if (replay == FALSE && !create)
6680 			rfs4_update_open_sequence(oo);
6681 		/*
6682 		 * If the client is receiving an error and the
6683 		 * open_owner needs to be confirmed, there is no way
6684 		 * to notify the client of this fact ignoring the fact
6685 		 * that the server has no method of returning a
6686 		 * stateid to confirm.  Therefore, the server needs to
6687 		 * mark this open_owner in a way as to avoid the
6688 		 * sequence id checking the next time the client uses
6689 		 * this open_owner.
6690 		 */
6691 		if (resp->status != NFS4_OK && oo->need_confirm)
6692 			oo->postpone_confirm = TRUE;
6693 		/*
6694 		 * If OK response then clear the postpone flag and
6695 		 * reset the sequence id to keep in sync with the
6696 		 * client.
6697 		 */
6698 		if (resp->status == NFS4_OK && oo->postpone_confirm) {
6699 			oo->postpone_confirm = FALSE;
6700 			oo->open_seqid = args->seqid;
6701 		}
6702 		break;
6703 	}
6704 
6705 finish:
6706 	*cs->statusp = resp->status;
6707 
6708 	rfs4_sw_exit(&oo->oo_sw);
6709 	rfs4_openowner_rele(oo);
6710 }
6711 
6712 /*ARGSUSED*/
6713 void
6714 rfs4_op_open_confirm(nfs_argop4 *argop, nfs_resop4 *resop,
6715 		    struct svc_req *req, struct compound_state *cs)
6716 {
6717 	OPEN_CONFIRM4args *args = &argop->nfs_argop4_u.opopen_confirm;
6718 	OPEN_CONFIRM4res *resp = &resop->nfs_resop4_u.opopen_confirm;
6719 	rfs4_state_t *sp;
6720 	nfsstat4 status;
6721 
6722 	if (cs->vp == NULL) {
6723 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
6724 		return;
6725 	}
6726 
6727 	status = rfs4_get_state(&args->open_stateid, &sp, RFS4_DBS_VALID);
6728 	if (status != NFS4_OK) {
6729 		*cs->statusp = resp->status = status;
6730 		return;
6731 	}
6732 
6733 	/* Ensure specified filehandle matches */
6734 	if (cs->vp != sp->finfo->vp) {
6735 		rfs4_state_rele(sp);
6736 		*cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
6737 		return;
6738 	}
6739 
6740 	/* hold off other access to open_owner while we tinker */
6741 	rfs4_sw_enter(&sp->owner->oo_sw);
6742 
6743 	switch (rfs4_check_stateid_seqid(sp, &args->open_stateid)) {
6744 	case NFS4_CHECK_STATEID_OKAY:
6745 		if (rfs4_check_open_seqid(args->seqid, sp->owner,
6746 			resop) != 0) {
6747 			*cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
6748 			break;
6749 		}
6750 		/*
6751 		 * If it is the appropriate stateid and determined to
6752 		 * be "OKAY" then this means that the stateid does not
6753 		 * need to be confirmed and the client is in error for
6754 		 * sending an OPEN_CONFIRM.
6755 		 */
6756 		*cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
6757 		break;
6758 	case NFS4_CHECK_STATEID_OLD:
6759 		*cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
6760 		break;
6761 	case NFS4_CHECK_STATEID_BAD:
6762 		*cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
6763 		break;
6764 	case NFS4_CHECK_STATEID_EXPIRED:
6765 		*cs->statusp = resp->status = NFS4ERR_EXPIRED;
6766 		break;
6767 	case NFS4_CHECK_STATEID_CLOSED:
6768 		*cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
6769 		break;
6770 	case NFS4_CHECK_STATEID_REPLAY:
6771 		switch (rfs4_check_open_seqid(args->seqid, sp->owner, resop)) {
6772 		case NFS4_CHKSEQ_OKAY:
6773 			/*
6774 			 * This is replayed stateid; if seqid matches
6775 			 * next expected, then client is using wrong seqid.
6776 			 */
6777 			/* fall through */
6778 		case NFS4_CHKSEQ_BAD:
6779 			*cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
6780 			break;
6781 		case NFS4_CHKSEQ_REPLAY:
6782 			/*
6783 			 * Note this case is the duplicate case so
6784 			 * resp->status is already set.
6785 			 */
6786 			*cs->statusp = resp->status;
6787 			rfs4_update_lease(sp->owner->client);
6788 			break;
6789 		}
6790 		break;
6791 	case NFS4_CHECK_STATEID_UNCONFIRMED:
6792 		if (rfs4_check_open_seqid(args->seqid, sp->owner,
6793 			resop) != NFS4_CHKSEQ_OKAY) {
6794 			*cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
6795 			break;
6796 		}
6797 		*cs->statusp = resp->status = NFS4_OK;
6798 
6799 		next_stateid(&sp->stateid);
6800 		resp->open_stateid = sp->stateid.stateid;
6801 		sp->owner->need_confirm = FALSE;
6802 		rfs4_update_lease(sp->owner->client);
6803 		rfs4_update_open_sequence(sp->owner);
6804 		rfs4_update_open_resp(sp->owner, resop, NULL);
6805 		break;
6806 	default:
6807 		ASSERT(FALSE);
6808 		*cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
6809 		break;
6810 	}
6811 	rfs4_sw_exit(&sp->owner->oo_sw);
6812 	rfs4_state_rele(sp);
6813 }
6814 
6815 /*ARGSUSED*/
6816 void
6817 rfs4_op_open_downgrade(nfs_argop4 *argop, nfs_resop4 *resop,
6818 		    struct svc_req *req, struct compound_state *cs)
6819 {
6820 	OPEN_DOWNGRADE4args *args = &argop->nfs_argop4_u.opopen_downgrade;
6821 	OPEN_DOWNGRADE4res *resp = &resop->nfs_resop4_u.opopen_downgrade;
6822 	uint32_t access = args->share_access;
6823 	uint32_t deny = args->share_deny;
6824 	nfsstat4 status;
6825 	rfs4_state_t *sp;
6826 	rfs4_file_t *fp;
6827 
6828 	if (cs->vp == NULL) {
6829 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
6830 		return;
6831 	}
6832 
6833 	status = rfs4_get_state(&args->open_stateid, &sp, RFS4_DBS_VALID);
6834 	if (status != NFS4_OK) {
6835 		*cs->statusp = resp->status = status;
6836 		return;
6837 	}
6838 
6839 	/* Ensure specified filehandle matches */
6840 	if (cs->vp != sp->finfo->vp) {
6841 		rfs4_state_rele(sp);
6842 		*cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
6843 		return;
6844 	}
6845 
6846 	/* hold off other access to open_owner while we tinker */
6847 	rfs4_sw_enter(&sp->owner->oo_sw);
6848 
6849 	switch (rfs4_check_stateid_seqid(sp, &args->open_stateid)) {
6850 	case NFS4_CHECK_STATEID_OKAY:
6851 		if (rfs4_check_open_seqid(args->seqid, sp->owner,
6852 			resop) != NFS4_CHKSEQ_OKAY) {
6853 			*cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
6854 			goto end;
6855 		}
6856 		break;
6857 	case NFS4_CHECK_STATEID_OLD:
6858 		*cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
6859 		goto end;
6860 	case NFS4_CHECK_STATEID_BAD:
6861 		*cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
6862 		goto end;
6863 	case NFS4_CHECK_STATEID_EXPIRED:
6864 		*cs->statusp = resp->status = NFS4ERR_EXPIRED;
6865 		goto end;
6866 	case NFS4_CHECK_STATEID_CLOSED:
6867 		*cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
6868 		goto end;
6869 	case NFS4_CHECK_STATEID_UNCONFIRMED:
6870 		*cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
6871 		goto end;
6872 	case NFS4_CHECK_STATEID_REPLAY:
6873 		/* Check the sequence id for the open owner */
6874 		switch (rfs4_check_open_seqid(args->seqid, sp->owner, resop)) {
6875 		case NFS4_CHKSEQ_OKAY:
6876 			/*
6877 			 * This is replayed stateid; if seqid matches
6878 			 * next expected, then client is using wrong seqid.
6879 			 */
6880 			/* fall through */
6881 		case NFS4_CHKSEQ_BAD:
6882 			*cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
6883 			goto end;
6884 		case NFS4_CHKSEQ_REPLAY:
6885 			/*
6886 			 * Note this case is the duplicate case so
6887 			 * resp->status is already set.
6888 			 */
6889 			*cs->statusp = resp->status;
6890 			rfs4_update_lease(sp->owner->client);
6891 			goto end;
6892 		}
6893 		break;
6894 	default:
6895 		ASSERT(FALSE);
6896 		break;
6897 	}
6898 
6899 	rfs4_dbe_lock(sp->dbe);
6900 	/*
6901 	 * Check that the new access modes and deny modes are valid.
6902 	 * Check that no invalid bits are set.
6903 	 */
6904 	if ((access & ~(OPEN4_SHARE_ACCESS_READ | OPEN4_SHARE_ACCESS_WRITE)) ||
6905 	    (deny & ~(OPEN4_SHARE_DENY_READ | OPEN4_SHARE_DENY_READ))) {
6906 		*cs->statusp = resp->status = NFS4ERR_INVAL;
6907 		rfs4_update_open_sequence(sp->owner);
6908 		rfs4_dbe_unlock(sp->dbe);
6909 		goto end;
6910 	}
6911 
6912 	/*
6913 	 * The new modes must be a subset of the current modes and
6914 	 * the access must specify at least one mode. To test that
6915 	 * the new mode is a subset of the current modes we bitwise
6916 	 * AND them together and check that the result equals the new
6917 	 * mode. For example:
6918 	 * New mode, access == R and current mode, sp->share_access  == RW
6919 	 * access & sp->share_access == R == access, so the new access mode
6920 	 * is valid. Consider access == RW, sp->share_access = R
6921 	 * access & sp->share_access == R != access, so the new access mode
6922 	 * is invalid.
6923 	 */
6924 	if ((access & sp->share_access) != access ||
6925 	    (deny & sp->share_deny) != deny ||
6926 	    (access &
6927 	    (OPEN4_SHARE_ACCESS_READ | OPEN4_SHARE_ACCESS_WRITE)) == 0) {
6928 		*cs->statusp = resp->status = NFS4ERR_INVAL;
6929 		rfs4_update_open_sequence(sp->owner);
6930 		rfs4_dbe_unlock(sp->dbe);
6931 		goto end;
6932 	}
6933 
6934 	/*
6935 	 * Release any share locks associated with this stateID.
6936 	 * Strictly speaking, this violates the spec because the
6937 	 * spec effectively requires that open downgrade be atomic.
6938 	 * At present, fs_shrlock does not have this capability.
6939 	 */
6940 	rfs4_dbe_unlock(sp->dbe);
6941 	rfs4_unshare(sp);
6942 	rfs4_dbe_lock(sp->dbe);
6943 
6944 	fp = sp->finfo;
6945 	rfs4_dbe_lock(fp->dbe);
6946 
6947 	/*
6948 	 * If the current mode has deny read and the new mode
6949 	 * does not, decrement the number of deny read mode bits
6950 	 * and if it goes to zero turn off the deny read bit
6951 	 * on the file.
6952 	 */
6953 	if ((sp->share_deny & OPEN4_SHARE_DENY_READ) &&
6954 	    (deny & OPEN4_SHARE_DENY_READ) == 0) {
6955 		fp->deny_read--;
6956 		if (fp->deny_read == 0)
6957 			fp->share_deny &= ~OPEN4_SHARE_DENY_READ;
6958 	}
6959 
6960 	/*
6961 	 * If the current mode has deny write and the new mode
6962 	 * does not, decrement the number of deny write mode bits
6963 	 * and if it goes to zero turn off the deny write bit
6964 	 * on the file.
6965 	 */
6966 	if ((sp->share_deny & OPEN4_SHARE_DENY_WRITE) &&
6967 	    (deny & OPEN4_SHARE_DENY_WRITE) == 0) {
6968 		fp->deny_write--;
6969 		if (fp->deny_write == 0)
6970 			fp->share_deny &= ~OPEN4_SHARE_DENY_WRITE;
6971 	}
6972 
6973 	/*
6974 	 * If the current mode has access read and the new mode
6975 	 * does not, decrement the number of access read mode bits
6976 	 * and if it goes to zero turn off the access read bit
6977 	 * on the file.
6978 	 */
6979 	if ((sp->share_access & OPEN4_SHARE_ACCESS_READ) &&
6980 	    (access & OPEN4_SHARE_ACCESS_READ) == 0) {
6981 		fp->access_read--;
6982 		if (fp->access_read == 0)
6983 			fp->share_access &= ~OPEN4_SHARE_ACCESS_READ;
6984 	}
6985 
6986 	/*
6987 	 * If the current mode has access write and the new mode
6988 	 * does not, decrement the number of access write mode bits
6989 	 * and if it goes to zero turn off the access write bit
6990 	 * on the file.
6991 	 */
6992 	if ((sp->share_access & OPEN4_SHARE_ACCESS_WRITE) &&
6993 	    (access & OPEN4_SHARE_ACCESS_WRITE) == 0) {
6994 		fp->access_write--;
6995 		if (fp->access_write == 0)
6996 			fp->share_deny &= ~OPEN4_SHARE_ACCESS_WRITE;
6997 	}
6998 
6999 	/* Set the new access and deny modes */
7000 	sp->share_access = access;
7001 	sp->share_deny = deny;
7002 	/* Check that the file is still accessible */
7003 	ASSERT(fp->share_access);
7004 
7005 	rfs4_dbe_unlock(fp->dbe);
7006 
7007 	rfs4_dbe_unlock(sp->dbe);
7008 	if ((status = rfs4_share(sp)) != NFS4_OK) {
7009 		*cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
7010 		rfs4_update_open_sequence(sp->owner);
7011 		goto end;
7012 	}
7013 
7014 	rfs4_dbe_lock(sp->dbe);
7015 
7016 	/* Update the stateid */
7017 	next_stateid(&sp->stateid);
7018 	resp->open_stateid = sp->stateid.stateid;
7019 
7020 	rfs4_dbe_unlock(sp->dbe);
7021 
7022 	*cs->statusp = resp->status = NFS4_OK;
7023 	/* Update the lease */
7024 	rfs4_update_lease(sp->owner->client);
7025 	/* And the sequence */
7026 	rfs4_update_open_sequence(sp->owner);
7027 	rfs4_update_open_resp(sp->owner, resop, NULL);
7028 
7029 end:
7030 	rfs4_sw_exit(&sp->owner->oo_sw);
7031 	rfs4_state_rele(sp);
7032 }
7033 
7034 /*
7035  * The logic behind this function is detailed in the NFSv4 RFC in the
7036  * SETCLIENTID operation description under IMPLEMENTATION.  Refer to
7037  * that section for explicit guidance to server behavior for
7038  * SETCLIENTID.
7039  */
7040 void
7041 rfs4_op_setclientid(nfs_argop4 *argop, nfs_resop4 *resop,
7042 		    struct svc_req *req, struct compound_state *cs)
7043 {
7044 	SETCLIENTID4args *args = &argop->nfs_argop4_u.opsetclientid;
7045 	SETCLIENTID4res *res = &resop->nfs_resop4_u.opsetclientid;
7046 	rfs4_client_t *cp, *newcp, *cp_confirmed, *cp_unconfirmed;
7047 	bool_t create = TRUE;
7048 	char *addr, *netid;
7049 	int len;
7050 
7051 retry:
7052 	newcp = cp_confirmed = cp_unconfirmed = NULL;
7053 
7054 	/*
7055 	 * In search of an EXISTING client matching the incoming
7056 	 * request to establish a new client identifier at the server
7057 	 */
7058 	create = TRUE;
7059 	cp = rfs4_findclient(&args->client, &create, NULL);
7060 
7061 	/* Should never happen */
7062 	ASSERT(cp != NULL);
7063 
7064 	if (cp == NULL) {
7065 		*cs->statusp = res->status = NFS4ERR_SERVERFAULT;
7066 		return;
7067 	}
7068 
7069 	/*
7070 	 * Easiest case. Client identifier is newly created and is
7071 	 * unconfirmed.  Also note that for this case, no other
7072 	 * entries exist for the client identifier.  Nothing else to
7073 	 * check.  Just setup the response and respond.
7074 	 */
7075 	if (create) {
7076 		*cs->statusp = res->status = NFS4_OK;
7077 		res->SETCLIENTID4res_u.resok4.clientid = cp->clientid;
7078 		res->SETCLIENTID4res_u.resok4.setclientid_confirm =
7079 							cp->confirm_verf;
7080 		/* Setup callback information; CB_NULL confirmation later */
7081 		rfs4_client_setcb(cp, &args->callback, args->callback_ident);
7082 
7083 		rfs4_client_rele(cp);
7084 		return;
7085 	}
7086 
7087 	/*
7088 	 * An existing, confirmed client may exist but it may not have
7089 	 * been active for at least one lease period.  If so, then
7090 	 * "close" the client and create a new client identifier
7091 	 */
7092 	if (rfs4_lease_expired(cp)) {
7093 		rfs4_client_close(cp);
7094 		goto retry;
7095 	}
7096 
7097 	if (cp->need_confirm == TRUE)
7098 		cp_unconfirmed = cp;
7099 	else
7100 		cp_confirmed = cp;
7101 
7102 	cp = NULL;
7103 
7104 	/*
7105 	 * We have a confirmed client, now check for an
7106 	 * unconfimred entry
7107 	 */
7108 	if (cp_confirmed) {
7109 		/* If creds don't match then client identifier is inuse */
7110 		if (!creds_ok(cp_confirmed->cr_set, req, cs)) {
7111 			rfs4_cbinfo_t *cbp;
7112 			/*
7113 			 * Some one else has established this client
7114 			 * id. Try and say * who they are. We will use
7115 			 * the call back address supplied by * the
7116 			 * first client.
7117 			 */
7118 			*cs->statusp = res->status = NFS4ERR_CLID_INUSE;
7119 
7120 			addr = netid = NULL;
7121 
7122 			cbp = &cp_confirmed->cbinfo;
7123 			if (cbp->cb_callback.cb_location.r_addr &&
7124 			    cbp->cb_callback.cb_location.r_netid) {
7125 				cb_client4 *cbcp = &cbp->cb_callback;
7126 
7127 				len = strlen(cbcp->cb_location.r_addr)+1;
7128 				addr = kmem_alloc(len, KM_SLEEP);
7129 				bcopy(cbcp->cb_location.r_addr, addr, len);
7130 				len = strlen(cbcp->cb_location.r_netid)+1;
7131 				netid = kmem_alloc(len, KM_SLEEP);
7132 				bcopy(cbcp->cb_location.r_netid, netid, len);
7133 			}
7134 
7135 			res->SETCLIENTID4res_u.client_using.r_addr = addr;
7136 			res->SETCLIENTID4res_u.client_using.r_netid = netid;
7137 
7138 			rfs4_client_rele(cp_confirmed);
7139 		}
7140 
7141 		/*
7142 		 * Confirmed, creds match, and verifier matches; must
7143 		 * be an update of the callback info
7144 		 */
7145 		if (cp_confirmed->nfs_client.verifier ==
7146 						args->client.verifier) {
7147 			/* Setup callback information */
7148 			rfs4_client_setcb(cp_confirmed, &args->callback,
7149 						args->callback_ident);
7150 
7151 			/* everything okay -- move ahead */
7152 			*cs->statusp = res->status = NFS4_OK;
7153 			res->SETCLIENTID4res_u.resok4.clientid =
7154 				cp_confirmed->clientid;
7155 
7156 			/* update the confirm_verifier and return it */
7157 			rfs4_client_scv_next(cp_confirmed);
7158 			res->SETCLIENTID4res_u.resok4.setclientid_confirm =
7159 						cp_confirmed->confirm_verf;
7160 
7161 			rfs4_client_rele(cp_confirmed);
7162 			return;
7163 		}
7164 
7165 		/*
7166 		 * Creds match but the verifier doesn't.  Must search
7167 		 * for an unconfirmed client that would be replaced by
7168 		 * this request.
7169 		 */
7170 		create = FALSE;
7171 		cp_unconfirmed = rfs4_findclient(&args->client, &create,
7172 						cp_confirmed);
7173 	}
7174 
7175 	/*
7176 	 * At this point, we have taken care of the brand new client
7177 	 * struct, INUSE case, update of an existing, and confirmed
7178 	 * client struct.
7179 	 */
7180 
7181 	/*
7182 	 * check to see if things have changed while we originally
7183 	 * picked up the client struct.  If they have, then return and
7184 	 * retry the processing of this SETCLIENTID request.
7185 	 */
7186 	if (cp_unconfirmed) {
7187 		rfs4_dbe_lock(cp_unconfirmed->dbe);
7188 		if (!cp_unconfirmed->need_confirm) {
7189 			rfs4_dbe_unlock(cp_unconfirmed->dbe);
7190 			rfs4_client_rele(cp_unconfirmed);
7191 			if (cp_confirmed)
7192 				rfs4_client_rele(cp_confirmed);
7193 			goto retry;
7194 		}
7195 		/* do away with the old unconfirmed one */
7196 		rfs4_dbe_invalidate(cp_unconfirmed->dbe);
7197 		rfs4_dbe_unlock(cp_unconfirmed->dbe);
7198 		rfs4_client_rele(cp_unconfirmed);
7199 		cp_unconfirmed = NULL;
7200 	}
7201 
7202 	/*
7203 	 * This search will temporarily hide the confirmed client
7204 	 * struct while a new client struct is created as the
7205 	 * unconfirmed one.
7206 	 */
7207 	create = TRUE;
7208 	newcp = rfs4_findclient(&args->client, &create, cp_confirmed);
7209 
7210 	ASSERT(newcp != NULL);
7211 
7212 	if (newcp == NULL) {
7213 		*cs->statusp = res->status = NFS4ERR_SERVERFAULT;
7214 		rfs4_client_rele(cp_confirmed);
7215 		return;
7216 	}
7217 
7218 	/*
7219 	 * If one was not created, then a similar request must be in
7220 	 * process so release and start over with this one
7221 	 */
7222 	if (create != TRUE) {
7223 		rfs4_client_rele(newcp);
7224 		if (cp_confirmed)
7225 			rfs4_client_rele(cp_confirmed);
7226 		goto retry;
7227 	}
7228 
7229 	*cs->statusp = res->status = NFS4_OK;
7230 	res->SETCLIENTID4res_u.resok4.clientid = newcp->clientid;
7231 	res->SETCLIENTID4res_u.resok4.setclientid_confirm =
7232 							newcp->confirm_verf;
7233 	/* Setup callback information; CB_NULL confirmation later */
7234 	rfs4_client_setcb(newcp, &args->callback,
7235 				args->callback_ident);
7236 
7237 	newcp->cp_confirmed = cp_confirmed;
7238 
7239 	rfs4_client_rele(newcp);
7240 }
7241 
7242 /*ARGSUSED*/
7243 void
7244 rfs4_op_setclientid_confirm(nfs_argop4 *argop, nfs_resop4 *resop,
7245 			    struct svc_req *req, struct compound_state *cs)
7246 {
7247 	SETCLIENTID_CONFIRM4args *args =
7248 		&argop->nfs_argop4_u.opsetclientid_confirm;
7249 	SETCLIENTID_CONFIRM4res *res =
7250 		&resop->nfs_resop4_u.opsetclientid_confirm;
7251 	rfs4_client_t *cp, *cptoclose = NULL;
7252 
7253 	*cs->statusp = res->status = NFS4_OK;
7254 
7255 	cp = rfs4_findclient_by_id(args->clientid, TRUE);
7256 
7257 	if (cp == NULL) {
7258 		*cs->statusp = res->status =
7259 			rfs4_check_clientid(&args->clientid, 1);
7260 		return;
7261 	}
7262 
7263 	if (!creds_ok(cp, req, cs)) {
7264 		*cs->statusp = res->status = NFS4ERR_CLID_INUSE;
7265 		rfs4_client_rele(cp);
7266 		return;
7267 	}
7268 
7269 	/* If the verifier doesn't match, the record doesn't match */
7270 	if (cp->confirm_verf != args->setclientid_confirm) {
7271 		*cs->statusp = res->status = NFS4ERR_STALE_CLIENTID;
7272 		rfs4_client_rele(cp);
7273 		return;
7274 	}
7275 
7276 	rfs4_dbe_lock(cp->dbe);
7277 	cp->need_confirm = FALSE;
7278 	if (cp->cp_confirmed) {
7279 		cptoclose = cp->cp_confirmed;
7280 		cptoclose->ss_remove = 1;
7281 		cp->cp_confirmed = NULL;
7282 	}
7283 
7284 	/*
7285 	 * Record clientid in stable storage
7286 	 */
7287 	rfs4_ss_clid(cp, req);
7288 
7289 	rfs4_dbe_unlock(cp->dbe);
7290 
7291 	if (cptoclose)
7292 		/* don't need to rele, client_close does it */
7293 		rfs4_client_close(cptoclose);
7294 
7295 	/* If needed, initiate CB_NULL call for callback path */
7296 	rfs4_deleg_cb_check(cp);
7297 	rfs4_update_lease(cp);
7298 
7299 	/*
7300 	 * Update the client's associated server instance, if it's changed
7301 	 * since the client was created.
7302 	 */
7303 	if (rfs4_servinst(cp) != rfs4_cur_servinst)
7304 		rfs4_servinst_assign(cp, rfs4_cur_servinst);
7305 
7306 	/*
7307 	 * Check to see if client can perform reclaims
7308 	 */
7309 	rfs4_ss_chkclid(cp);
7310 
7311 	rfs4_client_rele(cp);
7312 }
7313 
7314 
7315 /*ARGSUSED*/
7316 void
7317 rfs4_op_close(nfs_argop4 *argop, nfs_resop4 *resop,
7318 	    struct svc_req *req, struct compound_state *cs)
7319 {
7320 	/* XXX Currently not using req arg */
7321 	CLOSE4args *args = &argop->nfs_argop4_u.opclose;
7322 	CLOSE4res *resp = &resop->nfs_resop4_u.opclose;
7323 	rfs4_state_t *sp;
7324 	nfsstat4 status;
7325 
7326 	if (cs->vp == NULL) {
7327 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
7328 		return;
7329 	}
7330 
7331 	status = rfs4_get_state(&args->open_stateid, &sp, RFS4_DBS_INVALID);
7332 	if (status != NFS4_OK) {
7333 		*cs->statusp = resp->status = status;
7334 		return;
7335 	}
7336 
7337 	/* Ensure specified filehandle matches */
7338 	if (cs->vp != sp->finfo->vp) {
7339 		rfs4_state_rele(sp);
7340 		*cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7341 		return;
7342 	}
7343 
7344 	/* hold off other access to open_owner while we tinker */
7345 	rfs4_sw_enter(&sp->owner->oo_sw);
7346 
7347 	switch (rfs4_check_stateid_seqid(sp, &args->open_stateid)) {
7348 	case NFS4_CHECK_STATEID_OKAY:
7349 		if (rfs4_check_open_seqid(args->seqid, sp->owner,
7350 			resop) != NFS4_CHKSEQ_OKAY) {
7351 			*cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
7352 			goto end;
7353 		}
7354 		break;
7355 	case NFS4_CHECK_STATEID_OLD:
7356 		*cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
7357 		goto end;
7358 	case NFS4_CHECK_STATEID_BAD:
7359 		*cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7360 		goto end;
7361 	case NFS4_CHECK_STATEID_EXPIRED:
7362 		*cs->statusp = resp->status = NFS4ERR_EXPIRED;
7363 		goto end;
7364 	case NFS4_CHECK_STATEID_CLOSED:
7365 		*cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
7366 		goto end;
7367 	case NFS4_CHECK_STATEID_UNCONFIRMED:
7368 		*cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7369 		goto end;
7370 	case NFS4_CHECK_STATEID_REPLAY:
7371 		/* Check the sequence id for the open owner */
7372 		switch (rfs4_check_open_seqid(args->seqid, sp->owner, resop)) {
7373 		case NFS4_CHKSEQ_OKAY:
7374 			/*
7375 			 * This is replayed stateid; if seqid matches
7376 			 * next expected, then client is using wrong seqid.
7377 			 */
7378 			/* FALL THROUGH */
7379 		case NFS4_CHKSEQ_BAD:
7380 			*cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
7381 			goto end;
7382 		case NFS4_CHKSEQ_REPLAY:
7383 			/*
7384 			 * Note this case is the duplicate case so
7385 			 * resp->status is already set.
7386 			 */
7387 			*cs->statusp = resp->status;
7388 			rfs4_update_lease(sp->owner->client);
7389 			goto end;
7390 		}
7391 		break;
7392 	default:
7393 		ASSERT(FALSE);
7394 		break;
7395 	}
7396 
7397 	rfs4_dbe_lock(sp->dbe);
7398 
7399 	/* Update the stateid. */
7400 	next_stateid(&sp->stateid);
7401 	resp->open_stateid = sp->stateid.stateid;
7402 
7403 	rfs4_dbe_unlock(sp->dbe);
7404 
7405 	rfs4_update_lease(sp->owner->client);
7406 	rfs4_update_open_sequence(sp->owner);
7407 	rfs4_update_open_resp(sp->owner, resop, NULL);
7408 
7409 	rfs4_state_close(sp, FALSE, FALSE, cs->cr);
7410 
7411 	*cs->statusp = resp->status = status;
7412 
7413 end:
7414 	rfs4_sw_exit(&sp->owner->oo_sw);
7415 	rfs4_state_rele(sp);
7416 }
7417 
7418 /*
7419  * Manage the counts on the file struct and close all file locks
7420  */
7421 /*ARGSUSED*/
7422 void
7423 rfs4_release_share_lock_state(rfs4_state_t *sp, cred_t *cr,
7424 	bool_t close_of_client)
7425 {
7426 	rfs4_file_t *fp = sp->finfo;
7427 	rfs4_lo_state_t *lsp;
7428 	struct shrlock shr;
7429 	struct shr_locowner shr_loco;
7430 	int fflags, s_access, s_deny;
7431 
7432 	fflags = s_access = s_deny = 0;
7433 	/*
7434 	 * Decrement the count for each access and deny bit that this
7435 	 * state has contributed to the file. If the file counts go to zero
7436 	 * clear the appropriate bit in the appropriate mask.
7437 	 */
7438 
7439 	if (sp->share_access & OPEN4_SHARE_ACCESS_READ) {
7440 		fp->access_read--;
7441 		fflags |= FREAD;
7442 		s_access |= F_RDACC;
7443 		if (fp->access_read == 0)
7444 			fp->share_access &= ~OPEN4_SHARE_ACCESS_READ;
7445 	}
7446 	if (sp->share_access & OPEN4_SHARE_ACCESS_WRITE) {
7447 		fp->access_write--;
7448 		fflags |= FWRITE;
7449 		s_access |= F_WRACC;
7450 		if (fp->access_write == 0)
7451 			fp->share_access &= ~OPEN4_SHARE_ACCESS_WRITE;
7452 	}
7453 	if (sp->share_deny & OPEN4_SHARE_DENY_READ) {
7454 		fp->deny_read--;
7455 		s_deny |= F_RDDNY;
7456 		if (fp->deny_read == 0)
7457 			fp->share_deny &= ~OPEN4_SHARE_DENY_READ;
7458 	}
7459 	if (sp->share_deny & OPEN4_SHARE_DENY_WRITE) {
7460 		fp->deny_write--;
7461 		s_deny |= F_WRDNY;
7462 		if (fp->deny_write == 0)
7463 			fp->share_deny &= ~OPEN4_SHARE_DENY_WRITE;
7464 	}
7465 
7466 	/*
7467 	 * If this call is part of the larger closing down of client
7468 	 * state then it is just easier to release all locks
7469 	 * associated with this client instead of going through each
7470 	 * individual file and cleaning locks there.
7471 	 */
7472 	if (close_of_client) {
7473 		if (sp->owner->client->unlksys_completed == FALSE &&
7474 		    sp->lockownerlist.next->lsp != NULL &&
7475 			sp->owner->client->sysidt != LM_NOSYSID) {
7476 			/* Is the PxFS kernel module loaded? */
7477 			if (lm_remove_file_locks != NULL) {
7478 				int new_sysid;
7479 
7480 				/* Encode the cluster nodeid in new sysid */
7481 				new_sysid = sp->owner->client->sysidt;
7482 				lm_set_nlmid_flk(&new_sysid);
7483 
7484 				/*
7485 				 * This PxFS routine removes file locks for a
7486 				 * client over all nodes of a cluster.
7487 				 */
7488 				NFS4_DEBUG(rfs4_debug, (CE_NOTE,
7489 				    "lm_remove_file_locks(sysid=0x%x)\n",
7490 				    new_sysid));
7491 				(*lm_remove_file_locks)(new_sysid);
7492 			} else {
7493 				struct flock64 flk;
7494 
7495 				/* Release all locks for this client */
7496 				flk.l_type = F_UNLKSYS;
7497 				flk.l_whence = 0;
7498 				flk.l_start = 0;
7499 				flk.l_len = 0;
7500 				flk.l_sysid = sp->owner->client->sysidt;
7501 				flk.l_pid = 0;
7502 				(void) VOP_FRLOCK(sp->finfo->vp, F_SETLK, &flk,
7503 				    F_REMOTELOCK | FREAD | FWRITE,
7504 				    (u_offset_t)0, NULL, CRED());
7505 			}
7506 
7507 			sp->owner->client->unlksys_completed = TRUE;
7508 		}
7509 	}
7510 
7511 	/*
7512 	 * Release all locks on this file by this lock owner or at
7513 	 * least mark the locks as having been released
7514 	 */
7515 	for (lsp = sp->lockownerlist.next->lsp; lsp != NULL;
7516 		lsp = lsp->lockownerlist.next->lsp) {
7517 
7518 		lsp->locks_cleaned = TRUE;
7519 
7520 		/* Was this already taken care of above? */
7521 		if (!close_of_client &&
7522 		    sp->owner->client->sysidt != LM_NOSYSID)
7523 			(void) cleanlocks(sp->finfo->vp, lsp->locker->pid,
7524 				lsp->locker->client->sysidt);
7525 	}
7526 
7527 	/*
7528 	 * Release any shrlocks associated with this open state ID.
7529 	 * This must be done before the rfs4_state gets marked closed.
7530 	 */
7531 	if (sp->owner->client->sysidt != LM_NOSYSID) {
7532 		shr.s_access = s_access;
7533 		shr.s_deny = s_deny;
7534 		shr.s_pid = rfs4_dbe_getid(sp->owner->dbe);
7535 		shr.s_sysid = sp->owner->client->sysidt;
7536 		shr_loco.sl_pid = shr.s_pid;
7537 		shr_loco.sl_id = shr.s_sysid;
7538 		shr.s_owner = (caddr_t)&shr_loco;
7539 		shr.s_own_len = sizeof (shr_loco);
7540 		(void) vop_shrlock(sp->finfo->vp, F_UNSHARE, &shr, fflags);
7541 	}
7542 }
7543 
7544 /*
7545  * lock_denied: Fill in a LOCK4deneid structure given an flock64 structure.
7546  */
7547 static nfsstat4
7548 lock_denied(LOCK4denied *dp, struct flock64 *flk)
7549 {
7550 	rfs4_lockowner_t *lo;
7551 	rfs4_client_t *cp;
7552 	uint32_t len;
7553 
7554 	lo = rfs4_findlockowner_by_pid(flk->l_pid);
7555 	if (lo != NULL) {
7556 		cp = lo->client;
7557 		if (rfs4_lease_expired(cp)) {
7558 			rfs4_lockowner_rele(lo);
7559 			rfs4_dbe_hold(cp->dbe);
7560 			rfs4_client_close(cp);
7561 			return (NFS4ERR_EXPIRED);
7562 		}
7563 		dp->owner.clientid = lo->owner.clientid;
7564 		len = lo->owner.owner_len;
7565 		dp->owner.owner_val = kmem_alloc(len, KM_SLEEP);
7566 		bcopy(lo->owner.owner_val, dp->owner.owner_val, len);
7567 		dp->owner.owner_len = len;
7568 		rfs4_lockowner_rele(lo);
7569 		goto finish;
7570 	}
7571 
7572 	/*
7573 	 * Its not a NFS4 lock. We take advantage that the upper 32 bits
7574 	 * of the client id contain the boot time for a NFS4 lock. So we
7575 	 * fabricate and identity by setting clientid to the sysid, and
7576 	 * the lock owner to the pid.
7577 	 */
7578 	dp->owner.clientid = flk->l_sysid;
7579 	len = sizeof (pid_t);
7580 	dp->owner.owner_len = len;
7581 	dp->owner.owner_val = kmem_alloc(len, KM_SLEEP);
7582 	bcopy(&flk->l_pid, dp->owner.owner_val, len);
7583 finish:
7584 	dp->offset = flk->l_start;
7585 	dp->length = flk->l_len;
7586 
7587 	if (flk->l_type == F_RDLCK)
7588 		dp->locktype = READ_LT;
7589 	else if (flk->l_type == F_WRLCK)
7590 		dp->locktype = WRITE_LT;
7591 	else
7592 		return (NFS4ERR_INVAL);	/* no mapping from POSIX ltype to v4 */
7593 
7594 	return (NFS4_OK);
7595 }
7596 
7597 static int
7598 setlock(vnode_t *vp, struct flock64 *flock, int flag, cred_t *cred)
7599 {
7600 	int error;
7601 	struct flock64 flk;
7602 	int i;
7603 	clock_t delaytime;
7604 
7605 retry:
7606 	delaytime = MSEC_TO_TICK_ROUNDUP(rfs4_lock_delay);
7607 
7608 	for (i = 0; i < rfs4_maxlock_tries; i++) {
7609 		LOCK_PRINT(rfs4_debug, "setlock", F_SETLK, flock);
7610 		error = VOP_FRLOCK(vp, F_SETLK,
7611 				flock, flag, (u_offset_t)0, NULL, cred);
7612 
7613 		if (error != EAGAIN && error != EACCES)
7614 			break;
7615 
7616 		if (i < rfs4_maxlock_tries - 1) {
7617 			delay(delaytime);
7618 			delaytime *= 2;
7619 		}
7620 	}
7621 
7622 	if (error == EAGAIN || error == EACCES) {
7623 		/* Get the owner of the lock */
7624 		flk = *flock;
7625 		LOCK_PRINT(rfs4_debug, "setlock", F_GETLK, &flk);
7626 		if (VOP_FRLOCK(vp, F_GETLK,
7627 			    &flk,  flag, (u_offset_t)0, NULL, cred) == 0) {
7628 			if (flk.l_type == F_UNLCK) {
7629 				/* No longer locked, retry */
7630 				goto retry;
7631 			}
7632 			*flock = flk;
7633 			LOCK_PRINT(rfs4_debug, "setlock(blocking lock)",
7634 				F_GETLK, &flk);
7635 		}
7636 	}
7637 
7638 	return (error);
7639 }
7640 
7641 /*ARGSUSED*/
7642 static nfsstat4
7643 rfs4_do_lock(rfs4_lo_state_t *lp, nfs_lock_type4 locktype,
7644 	    seqid4 seqid, offset4 offset,
7645 	    length4 length, cred_t *cred, nfs_resop4 *resop)
7646 {
7647 	nfsstat4 status;
7648 	rfs4_lockowner_t *lo = lp->locker;
7649 	rfs4_state_t *sp = lp->state;
7650 	struct flock64 flock;
7651 	int16_t ltype;
7652 	int flag;
7653 	int error;
7654 	sysid_t sysid;
7655 	LOCK4res *lres;
7656 
7657 	if (rfs4_lease_expired(lo->client)) {
7658 		return (NFS4ERR_EXPIRED);
7659 	}
7660 
7661 	if ((status = rfs4_client_sysid(lo->client, &sysid)) != NFS4_OK)
7662 		return (status);
7663 
7664 	/* Check for zero length. To lock to end of file use all ones for V4 */
7665 	if (length == 0)
7666 		return (NFS4ERR_INVAL);
7667 	else if (length == (length4)(~0))
7668 		length = 0;		/* Posix to end of file  */
7669 
7670 retry:
7671 	rfs4_dbe_lock(sp->dbe);
7672 
7673 
7674 	if (resop->resop != OP_LOCKU) {
7675 		switch (locktype) {
7676 		case READ_LT:
7677 		case READW_LT:
7678 			if ((sp->share_access
7679 			    & OPEN4_SHARE_ACCESS_READ) == 0) {
7680 				rfs4_dbe_unlock(sp->dbe);
7681 
7682 				return (NFS4ERR_OPENMODE);
7683 			}
7684 			ltype = F_RDLCK;
7685 			break;
7686 		case WRITE_LT:
7687 		case WRITEW_LT:
7688 			if ((sp->share_access
7689 			    & OPEN4_SHARE_ACCESS_WRITE) == 0) {
7690 				rfs4_dbe_unlock(sp->dbe);
7691 
7692 				return (NFS4ERR_OPENMODE);
7693 			}
7694 			ltype = F_WRLCK;
7695 			break;
7696 		}
7697 	} else
7698 		ltype = F_UNLCK;
7699 
7700 	flock.l_type = ltype;
7701 	flock.l_whence = 0;		/* SEEK_SET */
7702 	flock.l_start = offset;
7703 	flock.l_len = length;
7704 	flock.l_sysid = sysid;
7705 	flock.l_pid = lp->locker->pid;
7706 
7707 	/* Note that length4 is uint64_t but l_len and l_start are off64_t */
7708 	if (flock.l_len < 0 || flock.l_start < 0) {
7709 		rfs4_dbe_unlock(sp->dbe);
7710 		return (NFS4ERR_INVAL);
7711 	}
7712 
7713 	/*
7714 	 * N.B. FREAD has the same value as OPEN4_SHARE_ACCESS_READ and
7715 	 * FWRITE has the same value as OPEN4_SHARE_ACCESS_WRITE.
7716 	 */
7717 	flag = (int)sp->share_access | F_REMOTELOCK;
7718 
7719 	error = setlock(sp->finfo->vp, &flock, flag, cred);
7720 	if (error == 0) {
7721 		rfs4_dbe_lock(lp->dbe);
7722 		next_stateid(&lp->lockid);
7723 		rfs4_dbe_unlock(lp->dbe);
7724 	}
7725 
7726 	rfs4_dbe_unlock(sp->dbe);
7727 
7728 	/*
7729 	 * N.B. We map error values to nfsv4 errors. This is differrent
7730 	 * than puterrno4 routine.
7731 	 */
7732 	switch (error) {
7733 	case 0:
7734 		status = NFS4_OK;
7735 		break;
7736 	case EAGAIN:
7737 	case EACCES:		/* Old value */
7738 		/* Can only get here if op is OP_LOCK */
7739 		ASSERT(resop->resop == OP_LOCK);
7740 		lres = &resop->nfs_resop4_u.oplock;
7741 		status = NFS4ERR_DENIED;
7742 		if (lock_denied(&lres->LOCK4res_u.denied, &flock)
7743 			== NFS4ERR_EXPIRED)
7744 			goto retry;
7745 		break;
7746 	case ENOLCK:
7747 		status = NFS4ERR_DELAY;
7748 		break;
7749 	case EOVERFLOW:
7750 		status = NFS4ERR_INVAL;
7751 		break;
7752 	case EINVAL:
7753 		status = NFS4ERR_NOTSUPP;
7754 		break;
7755 	default:
7756 		cmn_err(CE_WARN, "rfs4_do_lock: unexpected errno (%d)",
7757 			error);
7758 		status = NFS4ERR_SERVERFAULT;
7759 		break;
7760 	}
7761 
7762 	return (status);
7763 }
7764 
7765 /*ARGSUSED*/
7766 void
7767 rfs4_op_lock(nfs_argop4 *argop, nfs_resop4 *resop,
7768 	    struct svc_req *req, struct compound_state *cs)
7769 {
7770 	/* XXX Currently not using req arg */
7771 	LOCK4args *args = &argop->nfs_argop4_u.oplock;
7772 	LOCK4res *resp = &resop->nfs_resop4_u.oplock;
7773 	nfsstat4 status;
7774 	stateid4 *stateid;
7775 	rfs4_lockowner_t *lo;
7776 	rfs4_client_t *cp;
7777 	rfs4_state_t *sp = NULL;
7778 	rfs4_lo_state_t *lsp = NULL;
7779 	bool_t ls_sw_held = FALSE;
7780 	bool_t create = TRUE;
7781 	bool_t lcreate = TRUE;
7782 	bool_t dup_lock = FALSE;
7783 	int rc;
7784 
7785 	if (cs->vp == NULL) {
7786 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
7787 		return;
7788 	}
7789 
7790 	if (args->locker.new_lock_owner) {
7791 		/* Create a new lockowner for this instance */
7792 		open_to_lock_owner4 *olo = &args->locker.locker4_u.open_owner;
7793 
7794 		NFS4_DEBUG(rfs4_debug, (CE_NOTE, "Creating new lock owner"));
7795 
7796 		stateid = &olo->open_stateid;
7797 		status = rfs4_get_state(stateid, &sp, RFS4_DBS_VALID);
7798 		if (status != NFS4_OK) {
7799 			NFS4_DEBUG(rfs4_debug,
7800 				(CE_NOTE, "Get state failed in lock %d",
7801 				status));
7802 			*cs->statusp = resp->status = status;
7803 			return;
7804 		}
7805 
7806 		/* Ensure specified filehandle matches */
7807 		if (cs->vp != sp->finfo->vp) {
7808 			rfs4_state_rele(sp);
7809 			*cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7810 			return;
7811 		}
7812 
7813 		/* hold off other access to open_owner while we tinker */
7814 		rfs4_sw_enter(&sp->owner->oo_sw);
7815 
7816 		switch (rc = rfs4_check_stateid_seqid(sp, stateid)) {
7817 		case NFS4_CHECK_STATEID_OLD:
7818 			*cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
7819 			goto end;
7820 		case NFS4_CHECK_STATEID_BAD:
7821 			*cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7822 			goto end;
7823 		case NFS4_CHECK_STATEID_EXPIRED:
7824 			*cs->statusp = resp->status = NFS4ERR_EXPIRED;
7825 			goto end;
7826 		case NFS4_CHECK_STATEID_UNCONFIRMED:
7827 			*cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7828 			goto end;
7829 		case NFS4_CHECK_STATEID_CLOSED:
7830 			*cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
7831 			goto end;
7832 		case NFS4_CHECK_STATEID_OKAY:
7833 		case NFS4_CHECK_STATEID_REPLAY:
7834 			switch (rfs4_check_olo_seqid(olo->open_seqid,
7835 				sp->owner, resop)) {
7836 			case NFS4_CHKSEQ_OKAY:
7837 				if (rc == NFS4_CHECK_STATEID_OKAY)
7838 					break;
7839 				/*
7840 				 * This is replayed stateid; if seqid
7841 				 * matches next expected, then client
7842 				 * is using wrong seqid.
7843 				 */
7844 				/* FALLTHROUGH */
7845 			case NFS4_CHKSEQ_BAD:
7846 				*cs->statusp = resp->status =
7847 					NFS4ERR_BAD_SEQID;
7848 				goto end;
7849 			case NFS4_CHKSEQ_REPLAY:
7850 				/* This is a duplicate LOCK request */
7851 				dup_lock = TRUE;
7852 
7853 				/*
7854 				 * For a duplicate we do not want to
7855 				 * create a new lockowner as it should
7856 				 * already exist.
7857 				 * Turn off the lockowner create flag.
7858 				 */
7859 				lcreate = FALSE;
7860 			}
7861 			break;
7862 		}
7863 
7864 		lo = rfs4_findlockowner(&olo->lock_owner, &lcreate);
7865 		if (lo == NULL) {
7866 			NFS4_DEBUG(rfs4_debug,
7867 				(CE_NOTE, "rfs4_op_lock: no lock owner"));
7868 			*cs->statusp = resp->status = NFS4ERR_RESOURCE;
7869 			goto end;
7870 		}
7871 
7872 		lsp = rfs4_findlo_state_by_owner(lo, sp, &create);
7873 		if (lsp == NULL) {
7874 			rfs4_update_lease(sp->owner->client);
7875 			/*
7876 			 * Only update theh open_seqid if this is not
7877 			 * a duplicate request
7878 			 */
7879 			if (dup_lock == FALSE) {
7880 				rfs4_update_open_sequence(sp->owner);
7881 			}
7882 
7883 			NFS4_DEBUG(rfs4_debug,
7884 				(CE_NOTE, "rfs4_op_lock: no state"));
7885 			*cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
7886 			rfs4_update_open_resp(sp->owner, resop, NULL);
7887 			rfs4_lockowner_rele(lo);
7888 			goto end;
7889 		}
7890 
7891 		/*
7892 		 * This is the new_lock_owner branch and the client is
7893 		 * supposed to be associating a new lock_owner with
7894 		 * the open file at this point.  If we find that a
7895 		 * lock_owner/state association already exists and a
7896 		 * successful LOCK request was returned to the client,
7897 		 * an error is returned to the client since this is
7898 		 * not appropriate.  The client should be using the
7899 		 * existing lock_owner branch.
7900 		 */
7901 		if (dup_lock == FALSE && create == FALSE) {
7902 			if (lsp->lock_completed == TRUE) {
7903 				*cs->statusp =
7904 					resp->status = NFS4ERR_BAD_SEQID;
7905 				rfs4_lockowner_rele(lo);
7906 				goto end;
7907 			}
7908 		}
7909 
7910 		rfs4_update_lease(sp->owner->client);
7911 
7912 		/*
7913 		 * Only update theh open_seqid if this is not
7914 		 * a duplicate request
7915 		 */
7916 		if (dup_lock == FALSE) {
7917 			rfs4_update_open_sequence(sp->owner);
7918 		}
7919 
7920 		/*
7921 		 * If this is a duplicate lock request, just copy the
7922 		 * previously saved reply and return.
7923 		 */
7924 		if (dup_lock == TRUE) {
7925 			/* verify that lock_seqid's match */
7926 			if (lsp->seqid != olo->lock_seqid) {
7927 				NFS4_DEBUG(rfs4_debug,
7928 				(CE_NOTE, "rfs4_op_lock: Dup-Lock seqid bad"
7929 				"lsp->seqid=%d old->seqid=%d",
7930 				lsp->seqid, olo->lock_seqid));
7931 				*cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
7932 			} else {
7933 				rfs4_copy_reply(resop, lsp->reply);
7934 				/*
7935 				 * Make sure to copy the just
7936 				 * retrieved reply status into the
7937 				 * overall compound status
7938 				 */
7939 				*cs->statusp = resp->status;
7940 			}
7941 			rfs4_lockowner_rele(lo);
7942 			goto end;
7943 		}
7944 
7945 		rfs4_dbe_lock(lsp->dbe);
7946 
7947 		/* Make sure to update the lock sequence id */
7948 		lsp->seqid = olo->lock_seqid;
7949 
7950 		NFS4_DEBUG(rfs4_debug,
7951 			(CE_NOTE, "Lock seqid established as %d", lsp->seqid));
7952 
7953 		/*
7954 		 * This is used to signify the newly created lockowner
7955 		 * stateid and its sequence number.  The checks for
7956 		 * sequence number and increment don't occur on the
7957 		 * very first lock request for a lockowner.
7958 		 */
7959 		lsp->skip_seqid_check = TRUE;
7960 
7961 		/* hold off other access to lsp while we tinker */
7962 		rfs4_sw_enter(&lsp->ls_sw);
7963 		ls_sw_held = TRUE;
7964 
7965 		rfs4_dbe_unlock(lsp->dbe);
7966 
7967 		rfs4_lockowner_rele(lo);
7968 	} else {
7969 		stateid = &args->locker.locker4_u.lock_owner.lock_stateid;
7970 		/* get lsp and hold the lock on the underlying file struct */
7971 		if ((status = rfs4_get_lo_state(stateid, &lsp, TRUE))
7972 		    != NFS4_OK) {
7973 			*cs->statusp = resp->status = status;
7974 			return;
7975 		}
7976 		create = FALSE;	/* We didn't create lsp */
7977 
7978 		/* Ensure specified filehandle matches */
7979 		if (cs->vp != lsp->state->finfo->vp) {
7980 			rfs4_lo_state_rele(lsp, TRUE);
7981 			*cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7982 			return;
7983 		}
7984 
7985 		/* hold off other access to lsp while we tinker */
7986 		rfs4_sw_enter(&lsp->ls_sw);
7987 		ls_sw_held = TRUE;
7988 
7989 		switch (rfs4_check_lo_stateid_seqid(lsp, stateid)) {
7990 		/*
7991 		 * The stateid looks like it was okay (expected to be
7992 		 * the next one)
7993 		 */
7994 		case NFS4_CHECK_STATEID_OKAY:
7995 			/*
7996 			 * The sequence id is now checked.  Determine
7997 			 * if this is a replay or if it is in the
7998 			 * expected (next) sequence.  In the case of a
7999 			 * replay, there are two replay conditions
8000 			 * that may occur.  The first is the normal
8001 			 * condition where a LOCK is done with a
8002 			 * NFS4_OK response and the stateid is
8003 			 * updated.  That case is handled below when
8004 			 * the stateid is identified as a REPLAY.  The
8005 			 * second is the case where an error is
8006 			 * returned, like NFS4ERR_DENIED, and the
8007 			 * sequence number is updated but the stateid
8008 			 * is not updated.  This second case is dealt
8009 			 * with here.  So it may seem odd that the
8010 			 * stateid is okay but the sequence id is a
8011 			 * replay but it is okay.
8012 			 */
8013 			switch (rfs4_check_lock_seqid(
8014 				args->locker.locker4_u.lock_owner.lock_seqid,
8015 				lsp, resop)) {
8016 			case NFS4_CHKSEQ_REPLAY:
8017 				if (resp->status != NFS4_OK) {
8018 					/*
8019 					 * Here is our replay and need
8020 					 * to verify that the last
8021 					 * response was an error.
8022 					 */
8023 					*cs->statusp = resp->status;
8024 					goto end;
8025 				}
8026 				/*
8027 				 * This is done since the sequence id
8028 				 * looked like a replay but it didn't
8029 				 * pass our check so a BAD_SEQID is
8030 				 * returned as a result.
8031 				 */
8032 				/*FALLTHROUGH*/
8033 			case NFS4_CHKSEQ_BAD:
8034 				*cs->statusp = resp->status =
8035 					NFS4ERR_BAD_SEQID;
8036 				goto end;
8037 			case NFS4_CHKSEQ_OKAY:
8038 				/* Everything looks okay move ahead */
8039 				break;
8040 			}
8041 			break;
8042 		case NFS4_CHECK_STATEID_OLD:
8043 			*cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
8044 			goto end;
8045 		case NFS4_CHECK_STATEID_BAD:
8046 			*cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8047 			goto end;
8048 		case NFS4_CHECK_STATEID_EXPIRED:
8049 			*cs->statusp = resp->status = NFS4ERR_EXPIRED;
8050 			goto end;
8051 		case NFS4_CHECK_STATEID_CLOSED:
8052 			*cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
8053 			goto end;
8054 		case NFS4_CHECK_STATEID_REPLAY:
8055 			switch (rfs4_check_lock_seqid(
8056 				args->locker.locker4_u.lock_owner.lock_seqid,
8057 				lsp, resop)) {
8058 			case NFS4_CHKSEQ_OKAY:
8059 				/*
8060 				 * This is a replayed stateid; if
8061 				 * seqid matches the next expected,
8062 				 * then client is using wrong seqid.
8063 				 */
8064 			case NFS4_CHKSEQ_BAD:
8065 				*cs->statusp = resp->status =
8066 					NFS4ERR_BAD_SEQID;
8067 				goto end;
8068 			case NFS4_CHKSEQ_REPLAY:
8069 				rfs4_update_lease(lsp->locker->client);
8070 				*cs->statusp = status = resp->status;
8071 				goto end;
8072 			}
8073 			break;
8074 		default:
8075 			ASSERT(FALSE);
8076 			break;
8077 		}
8078 
8079 		rfs4_update_lock_sequence(lsp);
8080 		rfs4_update_lease(lsp->locker->client);
8081 	}
8082 
8083 	/*
8084 	 * NFS4 only allows locking on regular files, so
8085 	 * verify type of object.
8086 	 */
8087 	if (cs->vp->v_type != VREG) {
8088 		if (cs->vp->v_type == VDIR)
8089 			status = NFS4ERR_ISDIR;
8090 		else
8091 			status = NFS4ERR_INVAL;
8092 		goto out;
8093 	}
8094 
8095 	cp = lsp->state->owner->client;
8096 
8097 	if (rfs4_clnt_in_grace(cp) && !args->reclaim) {
8098 		status = NFS4ERR_GRACE;
8099 		goto out;
8100 	}
8101 
8102 	if (rfs4_clnt_in_grace(cp) && args->reclaim && !cp->can_reclaim) {
8103 		status = NFS4ERR_NO_GRACE;
8104 		goto out;
8105 	}
8106 
8107 	if (!rfs4_clnt_in_grace(cp) && args->reclaim) {
8108 		status = NFS4ERR_NO_GRACE;
8109 		goto out;
8110 	}
8111 
8112 	if (lsp->state->finfo->dinfo->dtype == OPEN_DELEGATE_WRITE)
8113 		cs->deleg = TRUE;
8114 
8115 	status = rfs4_do_lock(lsp, args->locktype,
8116 				args->locker.locker4_u.lock_owner.lock_seqid,
8117 				args->offset,
8118 				args->length, cs->cr, resop);
8119 
8120 out:
8121 	lsp->skip_seqid_check = FALSE;
8122 
8123 	*cs->statusp = resp->status = status;
8124 
8125 	if (status == NFS4_OK) {
8126 		resp->LOCK4res_u.lock_stateid = lsp->lockid.stateid;
8127 		lsp->lock_completed = TRUE;
8128 	}
8129 	/*
8130 	 * Only update the "OPEN" response here if this was a new
8131 	 * lock_owner
8132 	 */
8133 	if (sp)
8134 		rfs4_update_open_resp(sp->owner, resop, NULL);
8135 
8136 	rfs4_update_lock_resp(lsp, resop);
8137 
8138 end:
8139 	if (lsp) {
8140 		if (ls_sw_held)
8141 			rfs4_sw_exit(&lsp->ls_sw);
8142 		/*
8143 		 * If an sp obtained, then the lsp does not represent
8144 		 * a lock on the file struct.
8145 		 */
8146 		if (sp != NULL)
8147 			rfs4_lo_state_rele(lsp, FALSE);
8148 		else
8149 			rfs4_lo_state_rele(lsp, TRUE);
8150 	}
8151 	if (sp) {
8152 		rfs4_sw_exit(&sp->owner->oo_sw);
8153 		rfs4_state_rele(sp);
8154 	}
8155 }
8156 
8157 /* free function for LOCK/LOCKT */
8158 static void
8159 lock_denied_free(nfs_resop4 *resop)
8160 {
8161 	LOCK4denied *dp = NULL;
8162 
8163 	switch (resop->resop) {
8164 	case OP_LOCK:
8165 		if (resop->nfs_resop4_u.oplock.status == NFS4ERR_DENIED)
8166 			dp = &resop->nfs_resop4_u.oplock.LOCK4res_u.denied;
8167 		break;
8168 	case OP_LOCKT:
8169 		if (resop->nfs_resop4_u.oplockt.status == NFS4ERR_DENIED)
8170 			dp = &resop->nfs_resop4_u.oplockt.denied;
8171 		break;
8172 	default:
8173 		break;
8174 	}
8175 
8176 	if (dp)
8177 		kmem_free(dp->owner.owner_val, dp->owner.owner_len);
8178 }
8179 
8180 /*ARGSUSED*/
8181 void
8182 rfs4_op_locku(nfs_argop4 *argop, nfs_resop4 *resop,
8183 	    struct svc_req *req, struct compound_state *cs)
8184 {
8185 	/* XXX Currently not using req arg */
8186 	LOCKU4args *args = &argop->nfs_argop4_u.oplocku;
8187 	LOCKU4res *resp = &resop->nfs_resop4_u.oplocku;
8188 	nfsstat4 status;
8189 	stateid4 *stateid = &args->lock_stateid;
8190 	rfs4_lo_state_t *lsp;
8191 
8192 	if (cs->vp == NULL) {
8193 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
8194 		return;
8195 	}
8196 
8197 	if ((status = rfs4_get_lo_state(stateid, &lsp, TRUE)) != NFS4_OK) {
8198 		*cs->statusp = resp->status = status;
8199 		return;
8200 	}
8201 
8202 	/* Ensure specified filehandle matches */
8203 	if (cs->vp != lsp->state->finfo->vp) {
8204 		rfs4_lo_state_rele(lsp, TRUE);
8205 		*cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8206 		return;
8207 	}
8208 
8209 	/* hold off other access to lsp while we tinker */
8210 	rfs4_sw_enter(&lsp->ls_sw);
8211 
8212 	switch (rfs4_check_lo_stateid_seqid(lsp, stateid)) {
8213 	case NFS4_CHECK_STATEID_OKAY:
8214 		if (rfs4_check_lock_seqid(args->seqid, lsp, resop)
8215 		    != NFS4_CHKSEQ_OKAY) {
8216 			*cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
8217 			goto end;
8218 		}
8219 		break;
8220 	case NFS4_CHECK_STATEID_OLD:
8221 		*cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
8222 		goto end;
8223 	case NFS4_CHECK_STATEID_BAD:
8224 		*cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8225 		goto end;
8226 	case NFS4_CHECK_STATEID_EXPIRED:
8227 		*cs->statusp = resp->status = NFS4ERR_EXPIRED;
8228 		goto end;
8229 	case NFS4_CHECK_STATEID_CLOSED:
8230 		*cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
8231 		goto end;
8232 	case NFS4_CHECK_STATEID_REPLAY:
8233 		switch (rfs4_check_lock_seqid(args->seqid, lsp, resop)) {
8234 		case NFS4_CHKSEQ_OKAY:
8235 				/*
8236 				 * This is a replayed stateid; if
8237 				 * seqid matches the next expected,
8238 				 * then client is using wrong seqid.
8239 				 */
8240 		case NFS4_CHKSEQ_BAD:
8241 			*cs->statusp = resp->status =
8242 				NFS4ERR_BAD_SEQID;
8243 			goto end;
8244 		case NFS4_CHKSEQ_REPLAY:
8245 			rfs4_update_lease(lsp->locker->client);
8246 			*cs->statusp = status = resp->status;
8247 			goto end;
8248 		}
8249 		break;
8250 	default:
8251 		ASSERT(FALSE);
8252 		break;
8253 	}
8254 
8255 	rfs4_update_lock_sequence(lsp);
8256 	rfs4_update_lease(lsp->locker->client);
8257 
8258 	/*
8259 	 * NFS4 only allows locking on regular files, so
8260 	 * verify type of object.
8261 	 */
8262 	if (cs->vp->v_type != VREG) {
8263 		if (cs->vp->v_type == VDIR)
8264 			status = NFS4ERR_ISDIR;
8265 		else
8266 			status = NFS4ERR_INVAL;
8267 		goto out;
8268 	}
8269 
8270 	if (rfs4_clnt_in_grace(lsp->state->owner->client)) {
8271 		status = NFS4ERR_GRACE;
8272 		goto out;
8273 	}
8274 
8275 	status = rfs4_do_lock(lsp, args->locktype,
8276 			    args->seqid, args->offset,
8277 			    args->length, cs->cr, resop);
8278 
8279 out:
8280 	*cs->statusp = resp->status = status;
8281 
8282 	if (status == NFS4_OK)
8283 		resp->lock_stateid = lsp->lockid.stateid;
8284 
8285 	rfs4_update_lock_resp(lsp, resop);
8286 
8287 end:
8288 	rfs4_sw_exit(&lsp->ls_sw);
8289 	rfs4_lo_state_rele(lsp, TRUE);
8290 }
8291 
8292 /*
8293  * LOCKT is a best effort routine, the client can not be guaranteed that
8294  * the status return is still in effect by the time the reply is received.
8295  * They are numerous race conditions in this routine, but we are not required
8296  * and can not be accurate.
8297  */
8298 /*ARGSUSED*/
8299 void
8300 rfs4_op_lockt(nfs_argop4 *argop, nfs_resop4 *resop,
8301 	    struct svc_req *req, struct compound_state *cs)
8302 {
8303 	LOCKT4args *args = &argop->nfs_argop4_u.oplockt;
8304 	LOCKT4res *resp = &resop->nfs_resop4_u.oplockt;
8305 	rfs4_lockowner_t *lo;
8306 	rfs4_client_t *cp;
8307 	bool_t create = FALSE;
8308 	struct flock64 flk;
8309 	int error;
8310 	int flag = FREAD | FWRITE;
8311 	int ltype;
8312 	length4 posix_length;
8313 	sysid_t sysid;
8314 	pid_t pid;
8315 
8316 	if (cs->vp == NULL) {
8317 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
8318 		return;
8319 	}
8320 
8321 	/*
8322 	 * NFS4 only allows locking on regular files, so
8323 	 * verify type of object.
8324 	 */
8325 	if (cs->vp->v_type != VREG) {
8326 		if (cs->vp->v_type == VDIR)
8327 			*cs->statusp = resp->status = NFS4ERR_ISDIR;
8328 		else
8329 			*cs->statusp = resp->status =  NFS4ERR_INVAL;
8330 		return;
8331 	}
8332 
8333 	/*
8334 	 * Check out the clientid to ensure the server knows about it
8335 	 * so that we correctly inform the client of a server reboot.
8336 	 */
8337 	if ((cp = rfs4_findclient_by_id(args->owner.clientid, FALSE))
8338 	    == NULL) {
8339 		*cs->statusp = resp->status =
8340 			rfs4_check_clientid(&args->owner.clientid, 0);
8341 		return;
8342 	}
8343 	if (rfs4_lease_expired(cp)) {
8344 		rfs4_client_close(cp);
8345 		/*
8346 		 * Protocol doesn't allow returning NFS4ERR_STALE as
8347 		 * other operations do on this check so STALE_CLIENTID
8348 		 * is returned instead
8349 		 */
8350 		*cs->statusp = resp->status = NFS4ERR_STALE_CLIENTID;
8351 		return;
8352 	}
8353 
8354 	if (rfs4_clnt_in_grace(cp)) {
8355 		*cs->statusp = resp->status = NFS4ERR_GRACE;
8356 		return;
8357 	}
8358 	rfs4_client_rele(cp);
8359 
8360 	resp->status = NFS4_OK;
8361 
8362 	switch (args->locktype) {
8363 	case READ_LT:
8364 	case READW_LT:
8365 		ltype = F_RDLCK;
8366 		break;
8367 	case WRITE_LT:
8368 	case WRITEW_LT:
8369 		ltype = F_WRLCK;
8370 		break;
8371 	}
8372 
8373 	posix_length = args->length;
8374 	/* Check for zero length. To lock to end of file use all ones for V4 */
8375 	if (posix_length == 0) {
8376 		*cs->statusp = resp->status = NFS4ERR_INVAL;
8377 		return;
8378 	} else if (posix_length == (length4)(~0)) {
8379 		posix_length = 0;	/* Posix to end of file  */
8380 	}
8381 
8382 	/* Find or create a lockowner */
8383 	lo = rfs4_findlockowner(&args->owner, &create);
8384 
8385 	if (lo) {
8386 		pid = lo->pid;
8387 		if ((resp->status =
8388 			rfs4_client_sysid(lo->client, &sysid)) != NFS4_OK)
8389 		goto out;
8390 	} else {
8391 		pid = 0;
8392 		sysid = lockt_sysid;
8393 	}
8394 retry:
8395 	flk.l_type = ltype;
8396 	flk.l_whence = 0;		/* SEEK_SET */
8397 	flk.l_start = args->offset;
8398 	flk.l_len = posix_length;
8399 	flk.l_sysid = sysid;
8400 	flk.l_pid = pid;
8401 	flag |= F_REMOTELOCK;
8402 
8403 	LOCK_PRINT(rfs4_debug, "rfs4_op_lockt", F_GETLK, &flk);
8404 
8405 	/* Note that length4 is uint64_t but l_len and l_start are off64_t */
8406 	if (flk.l_len < 0 || flk.l_start < 0) {
8407 		resp->status = NFS4ERR_INVAL;
8408 		goto out;
8409 	}
8410 	error = VOP_FRLOCK(cs->vp, F_GETLK, &flk, flag, (u_offset_t)0,
8411 	    NULL, cs->cr);
8412 
8413 	/*
8414 	 * N.B. We map error values to nfsv4 errors. This is differrent
8415 	 * than puterrno4 routine.
8416 	 */
8417 	switch (error) {
8418 	case 0:
8419 		if (flk.l_type == F_UNLCK)
8420 			resp->status = NFS4_OK;
8421 		else {
8422 			if (lock_denied(&resp->denied, &flk) == NFS4ERR_EXPIRED)
8423 				goto retry;
8424 			resp->status = NFS4ERR_DENIED;
8425 		}
8426 		break;
8427 	case EOVERFLOW:
8428 		resp->status = NFS4ERR_INVAL;
8429 		break;
8430 	case EINVAL:
8431 		resp->status = NFS4ERR_NOTSUPP;
8432 		break;
8433 	default:
8434 		cmn_err(CE_WARN, "rfs4_op_lockt: unexpected errno (%d)",
8435 			error);
8436 		resp->status = NFS4ERR_SERVERFAULT;
8437 		break;
8438 	}
8439 
8440 out:
8441 	if (lo)
8442 		rfs4_lockowner_rele(lo);
8443 	*cs->statusp = resp->status;
8444 }
8445 
8446 static int
8447 vop_shrlock(vnode_t *vp, int cmd, struct shrlock *sp, int fflags)
8448 {
8449 	int err;
8450 
8451 	if (cmd == F_UNSHARE && sp->s_deny == 0 && sp->s_access == 0)
8452 		return (0);
8453 
8454 	err = VOP_SHRLOCK(vp, cmd, sp, fflags, CRED());
8455 
8456 	NFS4_DEBUG(rfs4_shrlock_debug,
8457 		(CE_NOTE, "rfs4_shrlock %s vp=%p acc=%d dny=%d sysid=%d "
8458 		"pid=%d err=%d\n", cmd == F_SHARE ? "SHARE" : "UNSHR",
8459 		(void *) vp, sp->s_access, sp->s_deny, sp->s_sysid, sp->s_pid,
8460 		err));
8461 
8462 	return (err);
8463 }
8464 
8465 static int
8466 rfs4_shrlock(rfs4_state_t *sp, int cmd)
8467 {
8468 	struct shrlock shr;
8469 	struct shr_locowner shr_loco;
8470 	int fflags;
8471 
8472 	fflags = shr.s_access = shr.s_deny = 0;
8473 
8474 	if (sp->share_access & OPEN4_SHARE_ACCESS_READ) {
8475 		fflags |= FREAD;
8476 		shr.s_access |= F_RDACC;
8477 	}
8478 	if (sp->share_access & OPEN4_SHARE_ACCESS_WRITE) {
8479 		fflags |= FWRITE;
8480 		shr.s_access |= F_WRACC;
8481 	}
8482 	if (sp->share_deny & OPEN4_SHARE_DENY_READ)
8483 		shr.s_deny |= F_RDDNY;
8484 	if (sp->share_deny & OPEN4_SHARE_DENY_WRITE)
8485 		shr.s_deny |= F_WRDNY;
8486 
8487 	shr.s_pid = rfs4_dbe_getid(sp->owner->dbe);
8488 	shr.s_sysid = sp->owner->client->sysidt;
8489 	shr_loco.sl_pid = shr.s_pid;
8490 	shr_loco.sl_id = shr.s_sysid;
8491 	shr.s_owner = (caddr_t)&shr_loco;
8492 	shr.s_own_len = sizeof (shr_loco);
8493 	return (vop_shrlock(sp->finfo->vp, cmd, &shr, fflags));
8494 }
8495 
8496 static int
8497 rfs4_share(rfs4_state_t *sp)
8498 {
8499 	return (rfs4_shrlock(sp, F_SHARE));
8500 }
8501 
8502 void
8503 rfs4_unshare(rfs4_state_t *sp)
8504 {
8505 	(void) rfs4_shrlock(sp, F_UNSHARE);
8506 }
8507