xref: /titanic_41/usr/src/uts/common/fs/nfs/nfs4_srv.c (revision 1b25584432a35965d05ae6fd8a16249b9fca76df)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 /*
28  *	Copyright (c) 1983,1984,1985,1986,1987,1988,1989  AT&T.
29  *	All Rights Reserved
30  */
31 
32 #pragma ident	"%Z%%M%	%I%	%E% SMI"
33 
34 #include <sys/param.h>
35 #include <sys/types.h>
36 #include <sys/systm.h>
37 #include <sys/cred.h>
38 #include <sys/buf.h>
39 #include <sys/vfs.h>
40 #include <sys/vnode.h>
41 #include <sys/uio.h>
42 #include <sys/errno.h>
43 #include <sys/sysmacros.h>
44 #include <sys/statvfs.h>
45 #include <sys/kmem.h>
46 #include <sys/dirent.h>
47 #include <sys/cmn_err.h>
48 #include <sys/debug.h>
49 #include <sys/systeminfo.h>
50 #include <sys/flock.h>
51 #include <sys/pathname.h>
52 #include <sys/nbmlock.h>
53 #include <sys/share.h>
54 #include <sys/atomic.h>
55 #include <sys/policy.h>
56 #include <sys/fem.h>
57 
58 #include <rpc/types.h>
59 #include <rpc/auth.h>
60 #include <rpc/rpcsec_gss.h>
61 #include <rpc/svc.h>
62 
63 #include <nfs/nfs.h>
64 #include <nfs/export.h>
65 #include <nfs/lm.h>
66 #include <nfs/nfs4.h>
67 
68 #include <sys/strsubr.h>
69 #include <sys/strsun.h>
70 
71 #include <inet/common.h>
72 #include <inet/ip.h>
73 #include <inet/ip6.h>
74 
75 #define	RFS4_MAXLOCK_TRIES 4	/* Try to get the lock this many times */
76 static int rfs4_maxlock_tries = RFS4_MAXLOCK_TRIES;
77 #define	RFS4_LOCK_DELAY 10	/* Milliseconds */
78 static clock_t rfs4_lock_delay = RFS4_LOCK_DELAY;
79 
80 /* End of Tunables */
81 
82 /*
83  * Used to bump the stateid4.seqid value and show changes in the stateid
84  */
85 #define	next_stateid(sp) (++(sp)->bits.chgseq)
86 
87 /*
88  * RFS4_MINLEN_ENTRY4: XDR-encoded size of smallest possible dirent.
89  *	This is used to return NFS4ERR_TOOSMALL when clients specify
90  *	maxcount that isn't large enough to hold the smallest possible
91  *	XDR encoded dirent.
92  *
93  *	    sizeof cookie (8 bytes) +
94  *	    sizeof name_len (4 bytes) +
95  *	    sizeof smallest (padded) name (4 bytes) +
96  *	    sizeof bitmap4_len (12 bytes) +   NOTE: we always encode len=2 bm4
97  *	    sizeof attrlist4_len (4 bytes) +
98  *	    sizeof next boolean (4 bytes)
99  *
100  * RFS4_MINLEN_RDDIR4: XDR-encoded size of READDIR op reply containing
101  * the smallest possible entry4 (assumes no attrs requested).
102  *	sizeof nfsstat4 (4 bytes) +
103  *	sizeof verifier4 (8 bytes) +
104  *	sizeof entry4list bool (4 bytes) +
105  *	sizeof entry4 	(36 bytes) +
106  *	sizeof eof bool  (4 bytes)
107  *
108  * RFS4_MINLEN_RDDIR_BUF: minimum length of buffer server will provide to
109  *	VOP_READDIR.  Its value is the size of the maximum possible dirent
110  *	for solaris.  The DIRENT64_RECLEN macro returns	the size of dirent
111  *	required for a given name length.  MAXNAMELEN is the maximum
112  *	filename length allowed in Solaris.  The first two DIRENT64_RECLEN()
113  *	macros are to allow for . and .. entries -- just a minor tweak to try
114  *	and guarantee that buffer we give to VOP_READDIR will be large enough
115  *	to hold ., .., and the largest possible solaris dirent64.
116  */
117 #define	RFS4_MINLEN_ENTRY4 36
118 #define	RFS4_MINLEN_RDDIR4 (4 + NFS4_VERIFIER_SIZE + 4 + RFS4_MINLEN_ENTRY4 + 4)
119 #define	RFS4_MINLEN_RDDIR_BUF \
120 	(DIRENT64_RECLEN(1) + DIRENT64_RECLEN(2) + DIRENT64_RECLEN(MAXNAMELEN))
121 
122 /*
123  * It would be better to pad to 4 bytes since that's what XDR would do,
124  * but the dirents UFS gives us are already padded to 8, so just take
125  * what we're given.  Dircount is only a hint anyway.  Currently the
126  * solaris kernel is ASCII only, so there's no point in calling the
127  * UTF8 functions.
128  *
129  * dirent64: named padded to provide 8 byte struct alignment
130  *	d_ino(8) + d_off(8) + d_reclen(2) + d_name(namelen + null(1) + pad)
131  *
132  * cookie: uint64_t   +  utf8namelen: uint_t  +   utf8name padded to 8 bytes
133  *
134  */
135 #define	DIRENT64_TO_DIRCOUNT(dp) \
136 	(3 * BYTES_PER_XDR_UNIT + DIRENT64_NAMELEN((dp)->d_reclen))
137 
138 time_t rfs4_start_time;			/* Initialized in rfs4_srvrinit */
139 
140 static sysid_t lockt_sysid;		/* dummy sysid for all LOCKT calls */
141 
142 u_longlong_t nfs4_srv_caller_id;
143 
144 verifier4	Write4verf;
145 verifier4	Readdir4verf;
146 
147 void		rfs4_init_compound_state(struct compound_state *);
148 
149 static void	nullfree(caddr_t);
150 static void	rfs4_op_inval(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
151 			struct compound_state *);
152 static void	rfs4_op_access(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
153 			struct compound_state *);
154 static void	rfs4_op_close(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
155 			struct compound_state *);
156 static void	rfs4_op_commit(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
157 			struct compound_state *);
158 static void	rfs4_op_create(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
159 			struct compound_state *);
160 static void	rfs4_op_create_free(nfs_resop4 *resop);
161 static void	rfs4_op_delegreturn(nfs_argop4 *, nfs_resop4 *,
162 				    struct svc_req *, struct compound_state *);
163 static void	rfs4_op_getattr(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
164 			struct compound_state *);
165 static void	rfs4_op_getattr_free(nfs_resop4 *);
166 static void	rfs4_op_getfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
167 			struct compound_state *);
168 static void	rfs4_op_getfh_free(nfs_resop4 *);
169 static void	rfs4_op_illegal(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
170 			struct compound_state *);
171 static void	rfs4_op_link(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
172 			struct compound_state *);
173 static void	rfs4_op_lock(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
174 			struct compound_state *);
175 static void	lock_denied_free(nfs_resop4 *);
176 static void	rfs4_op_locku(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
177 			struct compound_state *);
178 static void	rfs4_op_lockt(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
179 			struct compound_state *);
180 static void	rfs4_op_lookup(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
181 			struct compound_state *);
182 static void	rfs4_op_lookupp(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
183 			struct compound_state *);
184 static void	rfs4_op_openattr(nfs_argop4 *argop, nfs_resop4 *resop,
185 				struct svc_req *req, struct compound_state *cs);
186 static void	rfs4_op_nverify(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
187 			struct compound_state *);
188 static void	rfs4_op_open(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
189 			struct compound_state *);
190 static void	rfs4_op_open_confirm(nfs_argop4 *, nfs_resop4 *,
191 			struct svc_req *, struct compound_state *);
192 static void	rfs4_op_open_downgrade(nfs_argop4 *, nfs_resop4 *,
193 			struct svc_req *, struct compound_state *);
194 static void	rfs4_op_putfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
195 			struct compound_state *);
196 static void	rfs4_op_putpubfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
197 			struct compound_state *);
198 static void	rfs4_op_putrootfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
199 			struct compound_state *);
200 static void	rfs4_op_read(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
201 			struct compound_state *);
202 static void	rfs4_op_read_free(nfs_resop4 *);
203 static void	rfs4_op_readdir_free(nfs_resop4 *resop);
204 static void	rfs4_op_readlink(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
205 			struct compound_state *);
206 static void	rfs4_op_readlink_free(nfs_resop4 *);
207 static void	rfs4_op_release_lockowner(nfs_argop4 *, nfs_resop4 *,
208 			struct svc_req *, struct compound_state *);
209 static void	rfs4_op_remove(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
210 			struct compound_state *);
211 static void	rfs4_op_rename(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
212 			struct compound_state *);
213 static void	rfs4_op_renew(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
214 			struct compound_state *);
215 static void	rfs4_op_restorefh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
216 			struct compound_state *);
217 static void	rfs4_op_savefh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
218 			struct compound_state *);
219 static void	rfs4_op_setattr(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
220 			struct compound_state *);
221 static void	rfs4_op_verify(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
222 			struct compound_state *);
223 static void	rfs4_op_write(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
224 			struct compound_state *);
225 static void	rfs4_op_setclientid(nfs_argop4 *, nfs_resop4 *,
226 			struct svc_req *, struct compound_state *);
227 static void	rfs4_op_setclientid_confirm(nfs_argop4 *, nfs_resop4 *,
228 			struct svc_req *req, struct compound_state *);
229 static void	rfs4_op_secinfo(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
230 			struct compound_state *);
231 static void	rfs4_op_secinfo_free(nfs_resop4 *);
232 
233 static nfsstat4 check_open_access(uint32_t,
234 				struct compound_state *, struct svc_req *);
235 nfsstat4 rfs4_client_sysid(rfs4_client_t *, sysid_t *);
236 static int	vop_shrlock(vnode_t *, int, struct shrlock *, int);
237 static int 	rfs4_shrlock(rfs4_state_t *, int);
238 static int	rfs4_share(rfs4_state_t *);
239 void rfs4_ss_clid(rfs4_client_t *, struct svc_req *);
240 
241 /*
242  * translation table for attrs
243  */
244 struct nfs4_ntov_table {
245 	union nfs4_attr_u *na;
246 	uint8_t amap[NFS4_MAXNUM_ATTRS];
247 	int attrcnt;
248 	bool_t vfsstat;
249 };
250 
251 static void	nfs4_ntov_table_init(struct nfs4_ntov_table *ntovp);
252 static void	nfs4_ntov_table_free(struct nfs4_ntov_table *ntovp,
253 				    struct nfs4_svgetit_arg *sargp);
254 
255 static nfsstat4	do_rfs4_set_attrs(bitmap4 *resp, fattr4 *fattrp,
256 		    struct compound_state *cs, struct nfs4_svgetit_arg *sargp,
257 		    struct nfs4_ntov_table *ntovp, nfs4_attr_cmd_t cmd);
258 
259 fem_t	*deleg_rdops;
260 fem_t	*deleg_wrops;
261 
262 rfs4_servinst_t	*rfs4_cur_servinst = NULL;	/* current server instance */
263 kmutex_t	rfs4_servinst_lock;		/* protects linked list */
264 int		rfs4_seen_first_compound;	/* set first time we see one */
265 
266 #ifdef DEBUG
267 int	rfs4_servinst_debug = 0;
268 #endif
269 
270 /*
271  * NFS4 op dispatch table
272  */
273 
274 struct rfsv4disp {
275 	void	(*dis_proc)();		/* proc to call */
276 	void	(*dis_resfree)();	/* frees space allocated by proc */
277 	int	dis_flags;		/* RPC_IDEMPOTENT, etc... */
278 };
279 
280 static struct rfsv4disp rfsv4disptab[] = {
281 	/*
282 	 * NFS VERSION 4
283 	 */
284 
285 	/* RFS_NULL = 0 */
286 	{rfs4_op_illegal, nullfree, 0},
287 
288 	/* UNUSED = 1 */
289 	{rfs4_op_illegal, nullfree, 0},
290 
291 	/* UNUSED = 2 */
292 	{rfs4_op_illegal, nullfree, 0},
293 
294 	/* OP_ACCESS = 3 */
295 	{rfs4_op_access, nullfree, RPC_IDEMPOTENT},
296 
297 	/* OP_CLOSE = 4 */
298 	{rfs4_op_close, nullfree, 0},
299 
300 	/* OP_COMMIT = 5 */
301 	{rfs4_op_commit, nullfree, RPC_IDEMPOTENT},
302 
303 	/* OP_CREATE = 6 */
304 	{rfs4_op_create, nullfree, 0},
305 
306 	/* OP_DELEGPURGE = 7 */
307 	{rfs4_op_inval, nullfree, 0},
308 
309 	/* OP_DELEGRETURN = 8 */
310 	{rfs4_op_delegreturn, nullfree, 0},
311 
312 	/* OP_GETATTR = 9 */
313 	{rfs4_op_getattr, rfs4_op_getattr_free, RPC_IDEMPOTENT},
314 
315 	/* OP_GETFH = 10 */
316 	{rfs4_op_getfh, rfs4_op_getfh_free, RPC_ALL},
317 
318 	/* OP_LINK = 11 */
319 	{rfs4_op_link, nullfree, 0},
320 
321 	/* OP_LOCK = 12 */
322 	{rfs4_op_lock, lock_denied_free, 0},
323 
324 	/* OP_LOCKT = 13 */
325 	{rfs4_op_lockt, lock_denied_free, 0},
326 
327 	/* OP_LOCKU = 14 */
328 	{rfs4_op_locku, nullfree, 0},
329 
330 	/* OP_LOOKUP = 15 */
331 	{rfs4_op_lookup, nullfree, (RPC_IDEMPOTENT|RPC_PUBLICFH_OK)},
332 
333 	/* OP_LOOKUPP = 16 */
334 	{rfs4_op_lookupp, nullfree, (RPC_IDEMPOTENT|RPC_PUBLICFH_OK)},
335 
336 	/* OP_NVERIFY = 17 */
337 	{rfs4_op_nverify, nullfree, RPC_IDEMPOTENT},
338 
339 	/* OP_OPEN = 18 */
340 	{rfs4_op_open, rfs4_free_reply, 0},
341 
342 	/* OP_OPENATTR = 19 */
343 	{rfs4_op_openattr, nullfree, 0},
344 
345 	/* OP_OPEN_CONFIRM = 20 */
346 	{rfs4_op_open_confirm, nullfree, 0},
347 
348 	/* OP_OPEN_DOWNGRADE = 21 */
349 	{rfs4_op_open_downgrade, nullfree, 0},
350 
351 	/* OP_OPEN_PUTFH = 22 */
352 	{rfs4_op_putfh, nullfree, RPC_ALL},
353 
354 	/* OP_PUTPUBFH = 23 */
355 	{rfs4_op_putpubfh, nullfree, RPC_ALL},
356 
357 	/* OP_PUTROOTFH = 24 */
358 	{rfs4_op_putrootfh, nullfree, RPC_ALL},
359 
360 	/* OP_READ = 25 */
361 	{rfs4_op_read, rfs4_op_read_free, RPC_IDEMPOTENT},
362 
363 	/* OP_READDIR = 26 */
364 	{rfs4_op_readdir, rfs4_op_readdir_free, RPC_IDEMPOTENT},
365 
366 	/* OP_READLINK = 27 */
367 	{rfs4_op_readlink, rfs4_op_readlink_free, RPC_IDEMPOTENT},
368 
369 	/* OP_REMOVE = 28 */
370 	{rfs4_op_remove, nullfree, 0},
371 
372 	/* OP_RENAME = 29 */
373 	{rfs4_op_rename, nullfree, 0},
374 
375 	/* OP_RENEW = 30 */
376 	{rfs4_op_renew, nullfree, 0},
377 
378 	/* OP_RESTOREFH = 31 */
379 	{rfs4_op_restorefh, nullfree, RPC_ALL},
380 
381 	/* OP_SAVEFH = 32 */
382 	{rfs4_op_savefh, nullfree, RPC_ALL},
383 
384 	/* OP_SECINFO = 33 */
385 	{rfs4_op_secinfo, rfs4_op_secinfo_free, 0},
386 
387 	/* OP_SETATTR = 34 */
388 	{rfs4_op_setattr, nullfree, 0},
389 
390 	/* OP_SETCLIENTID = 35 */
391 	{rfs4_op_setclientid, nullfree, 0},
392 
393 	/* OP_SETCLIENTID_CONFIRM = 36 */
394 	{rfs4_op_setclientid_confirm, nullfree, 0},
395 
396 	/* OP_VERIFY = 37 */
397 	{rfs4_op_verify, nullfree, RPC_IDEMPOTENT},
398 
399 	/* OP_WRITE = 38 */
400 	{rfs4_op_write, nullfree, 0},
401 
402 	/* OP_RELEASE_LOCKOWNER = 39 */
403 	{rfs4_op_release_lockowner, nullfree, 0},
404 };
405 
406 static uint_t rfsv4disp_cnt = sizeof (rfsv4disptab) / sizeof (rfsv4disptab[0]);
407 
408 #define	OP_ILLEGAL_IDX (rfsv4disp_cnt)
409 
410 #ifdef DEBUG
411 
412 int rfs4_fillone_debug = 0;
413 int rfs4_shrlock_debug = 0;
414 int rfs4_no_stub_access = 1;
415 int rfs4_rddir_debug = 0;
416 
417 static char *rfs4_op_string[] = {
418 	"rfs4_op_null",
419 	"rfs4_op_1 unused",
420 	"rfs4_op_2 unused",
421 	"rfs4_op_access",
422 	"rfs4_op_close",
423 	"rfs4_op_commit",
424 	"rfs4_op_create",
425 	"rfs4_op_delegpurge",
426 	"rfs4_op_delegreturn",
427 	"rfs4_op_getattr",
428 	"rfs4_op_getfh",
429 	"rfs4_op_link",
430 	"rfs4_op_lock",
431 	"rfs4_op_lockt",
432 	"rfs4_op_locku",
433 	"rfs4_op_lookup",
434 	"rfs4_op_lookupp",
435 	"rfs4_op_nverify",
436 	"rfs4_op_open",
437 	"rfs4_op_openattr",
438 	"rfs4_op_open_confirm",
439 	"rfs4_op_open_downgrade",
440 	"rfs4_op_putfh",
441 	"rfs4_op_putpubfh",
442 	"rfs4_op_putrootfh",
443 	"rfs4_op_read",
444 	"rfs4_op_readdir",
445 	"rfs4_op_readlink",
446 	"rfs4_op_remove",
447 	"rfs4_op_rename",
448 	"rfs4_op_renew",
449 	"rfs4_op_restorefh",
450 	"rfs4_op_savefh",
451 	"rfs4_op_secinfo",
452 	"rfs4_op_setattr",
453 	"rfs4_op_setclientid",
454 	"rfs4_op_setclient_confirm",
455 	"rfs4_op_verify",
456 	"rfs4_op_write",
457 	"rfs4_op_release_lockowner",
458 	"rfs4_op_illegal"
459 };
460 #endif
461 
462 void rfs4_ss_chkclid(rfs4_client_t *);
463 
464 #ifdef	nextdp
465 #undef nextdp
466 #endif
467 #define	nextdp(dp)	((struct dirent64 *)((char *)(dp) + (dp)->d_reclen))
468 
469 static const fs_operation_def_t nfs4_rd_deleg_tmpl[] = {
470 	VOPNAME_OPEN, deleg_rdopen,
471 	VOPNAME_WRITE, deleg_write,
472 	VOPNAME_SETATTR, deleg_setattr,
473 	VOPNAME_RWLOCK, deleg_rd_rwlock,
474 	VOPNAME_SPACE, deleg_space,
475 	VOPNAME_SETSECATTR, deleg_setsecattr,
476 	VOPNAME_VNEVENT, deleg_vnevent,
477 	NULL, NULL
478 };
479 static const fs_operation_def_t nfs4_wr_deleg_tmpl[] = {
480 	VOPNAME_OPEN, deleg_wropen,
481 	VOPNAME_READ, deleg_read,
482 	VOPNAME_WRITE, deleg_write,
483 	VOPNAME_SETATTR, deleg_setattr,
484 	VOPNAME_RWLOCK, deleg_wr_rwlock,
485 	VOPNAME_SPACE, deleg_space,
486 	VOPNAME_SETSECATTR, deleg_setsecattr,
487 	VOPNAME_VNEVENT, deleg_vnevent,
488 	NULL, NULL
489 };
490 
491 int
492 rfs4_srvrinit(void)
493 {
494 	timespec32_t verf;
495 	int error;
496 	extern void rfs4_attr_init();
497 	extern krwlock_t rfs4_deleg_policy_lock;
498 
499 	/*
500 	 * The following algorithm attempts to find a unique verifier
501 	 * to be used as the write verifier returned from the server
502 	 * to the client.  It is important that this verifier change
503 	 * whenever the server reboots.  Of secondary importance, it
504 	 * is important for the verifier to be unique between two
505 	 * different servers.
506 	 *
507 	 * Thus, an attempt is made to use the system hostid and the
508 	 * current time in seconds when the nfssrv kernel module is
509 	 * loaded.  It is assumed that an NFS server will not be able
510 	 * to boot and then to reboot in less than a second.  If the
511 	 * hostid has not been set, then the current high resolution
512 	 * time is used.  This will ensure different verifiers each
513 	 * time the server reboots and minimize the chances that two
514 	 * different servers will have the same verifier.
515 	 * XXX - this is broken on LP64 kernels.
516 	 */
517 	verf.tv_sec = (time_t)nfs_atoi(hw_serial);
518 	if (verf.tv_sec != 0) {
519 		verf.tv_nsec = gethrestime_sec();
520 	} else {
521 		timespec_t tverf;
522 
523 		gethrestime(&tverf);
524 		verf.tv_sec = (time_t)tverf.tv_sec;
525 		verf.tv_nsec = tverf.tv_nsec;
526 	}
527 
528 	Write4verf = *(uint64_t *)&verf;
529 
530 	rfs4_attr_init();
531 	mutex_init(&rfs4_deleg_lock, NULL, MUTEX_DEFAULT, NULL);
532 
533 	/* Used to manage create/destroy of server state */
534 	mutex_init(&rfs4_state_lock, NULL, MUTEX_DEFAULT, NULL);
535 
536 	/* Used to manage access to server instance linked list */
537 	mutex_init(&rfs4_servinst_lock, NULL, MUTEX_DEFAULT, NULL);
538 
539 	/* Used to manage access to rfs4_deleg_policy */
540 	rw_init(&rfs4_deleg_policy_lock, NULL, RW_DEFAULT, NULL);
541 
542 	error = fem_create("deleg_rdops", nfs4_rd_deleg_tmpl, &deleg_rdops);
543 	if (error != 0) {
544 		rfs4_disable_delegation();
545 	} else {
546 		error = fem_create("deleg_wrops", nfs4_wr_deleg_tmpl,
547 				&deleg_wrops);
548 		if (error != 0) {
549 			rfs4_disable_delegation();
550 			fem_free(deleg_rdops);
551 		}
552 	}
553 
554 	nfs4_srv_caller_id = fs_new_caller_id();
555 
556 	lockt_sysid = lm_alloc_sysidt();
557 
558 	return (0);
559 }
560 
561 void
562 rfs4_srvrfini(void)
563 {
564 	extern krwlock_t rfs4_deleg_policy_lock;
565 
566 	if (lockt_sysid != LM_NOSYSID) {
567 		lm_free_sysidt(lockt_sysid);
568 		lockt_sysid = LM_NOSYSID;
569 	}
570 
571 	mutex_destroy(&rfs4_deleg_lock);
572 	mutex_destroy(&rfs4_state_lock);
573 	rw_destroy(&rfs4_deleg_policy_lock);
574 
575 	fem_free(deleg_rdops);
576 	fem_free(deleg_wrops);
577 }
578 
579 void
580 rfs4_init_compound_state(struct compound_state *cs)
581 {
582 	bzero(cs, sizeof (*cs));
583 	cs->cont = TRUE;
584 	cs->access = CS_ACCESS_DENIED;
585 	cs->deleg = FALSE;
586 	cs->mandlock = FALSE;
587 	cs->fh.nfs_fh4_val = cs->fhbuf;
588 }
589 
590 void
591 rfs4_grace_start(rfs4_servinst_t *sip)
592 {
593 	time_t now = gethrestime_sec();
594 
595 	NFS4_DEBUG(rfs4_servinst_debug, (CE_NOTE,
596 	    "rfs4_grace_start: inst %p: 0x%lx", (void *)sip, now));
597 
598 	rw_enter(&sip->rwlock, RW_WRITER);
599 	sip->start_time = now;
600 	sip->grace_period = rfs4_grace_period;
601 	rw_exit(&sip->rwlock);
602 }
603 
604 /*
605  * returns true if the instance's grace period has never been started
606  */
607 int
608 rfs4_servinst_grace_new(rfs4_servinst_t *sip)
609 {
610 	time_t start_time;
611 
612 	rw_enter(&sip->rwlock, RW_READER);
613 	start_time = sip->start_time;
614 	rw_exit(&sip->rwlock);
615 
616 	return (start_time == 0);
617 }
618 
619 /*
620  * Indicates if server instance is within the
621  * grace period.
622  */
623 int
624 rfs4_servinst_in_grace(rfs4_servinst_t *sip)
625 {
626 	time_t grace_expiry;
627 
628 	rw_enter(&sip->rwlock, RW_READER);
629 	grace_expiry = sip->start_time + sip->grace_period;
630 	rw_exit(&sip->rwlock);
631 
632 	return (gethrestime_sec() < grace_expiry);
633 }
634 
635 int
636 rfs4_clnt_in_grace(rfs4_client_t *cp)
637 {
638 	ASSERT(rfs4_dbe_refcnt(cp->dbe) > 0);
639 
640 	return (rfs4_servinst_in_grace(cp->server_instance));
641 }
642 
643 /*
644  * reset all currently active grace periods
645  */
646 void
647 rfs4_grace_reset_all(void)
648 {
649 #ifdef DEBUG
650 	int n = 0;
651 #endif
652 	rfs4_servinst_t *sip;
653 
654 	mutex_enter(&rfs4_servinst_lock);
655 	for (sip = rfs4_cur_servinst; sip != NULL; sip = sip->prev) {
656 		if (rfs4_servinst_in_grace(sip)) {
657 			rfs4_grace_start(sip);
658 #ifdef DEBUG
659 			n++;
660 #endif
661 		}
662 	}
663 	mutex_exit(&rfs4_servinst_lock);
664 
665 	NFS4_DEBUG(rfs4_servinst_debug, (CE_NOTE,
666 	    "rfs4_grace_reset_all: reset %d instances", n));
667 }
668 
669 /*
670  * start any new instances' grace periods
671  */
672 void
673 rfs4_grace_start_new(void)
674 {
675 #ifdef DEBUG
676 	int n = 0;
677 #endif
678 	rfs4_servinst_t *sip;
679 
680 	mutex_enter(&rfs4_servinst_lock);
681 	for (sip = rfs4_cur_servinst; sip != NULL; sip = sip->prev) {
682 		if (rfs4_servinst_grace_new(sip))
683 			rfs4_grace_start(sip);
684 #ifdef DEBUG
685 		n++;
686 #endif
687 	}
688 	mutex_exit(&rfs4_servinst_lock);
689 
690 	NFS4_DEBUG(rfs4_servinst_debug, (CE_NOTE,
691 	    "rfs4_grace_start_new: started %d new instances", n));
692 }
693 
694 /*
695  * Create a new server instance, and make it the currently active instance.
696  * Note that starting the grace period too early will reduce the clients'
697  * recovery window.
698  */
699 void
700 rfs4_servinst_create(int start_grace)
701 {
702 	rfs4_servinst_t *sip;
703 
704 	sip = kmem_alloc(sizeof (rfs4_servinst_t), KM_SLEEP);
705 	rw_init(&sip->rwlock, NULL, RW_DEFAULT, NULL);
706 
707 	sip->start_time = (time_t)0;
708 	sip->grace_period = (time_t)0;
709 	sip->next = NULL;
710 	sip->prev = NULL;
711 
712 	mutex_enter(&rfs4_servinst_lock);
713 	if (rfs4_cur_servinst == NULL) {
714 		NFS4_DEBUG(rfs4_servinst_debug, (CE_NOTE,
715 		    "rfs4_servinst_create: creating first instance"));
716 	} else {
717 		/* add to linked list */
718 		sip->prev = rfs4_cur_servinst;
719 		rfs4_cur_servinst->next = sip;
720 	}
721 	if (start_grace)
722 		rfs4_grace_start(sip);
723 	/* make the new instance "current" */
724 	rfs4_cur_servinst = sip;
725 	mutex_exit(&rfs4_servinst_lock);
726 
727 	NFS4_DEBUG(rfs4_servinst_debug, (CE_NOTE,
728 	    "rfs4_servinst_create: new current instance: %p; start_grace: %d",
729 	    (void *)sip, start_grace));
730 }
731 
732 /*
733  * In future, we might add a rfs4_servinst_destroy(sip) but, for now, destroy
734  * all instances directly.
735  */
736 void
737 rfs4_servinst_destroy_all(void)
738 {
739 	rfs4_servinst_t *sip, *prev, *current;
740 #ifdef DEBUG
741 	int n = 0;
742 #endif
743 
744 	mutex_enter(&rfs4_servinst_lock);
745 	ASSERT(rfs4_cur_servinst != NULL);
746 	current = rfs4_cur_servinst;
747 	rfs4_cur_servinst = NULL;
748 	for (sip = current; sip != NULL; sip = prev) {
749 		prev = sip->prev;
750 		rw_destroy(&sip->rwlock);
751 		kmem_free(sip, sizeof (rfs4_servinst_t));
752 #ifdef DEBUG
753 		n++;
754 #endif
755 	}
756 	mutex_exit(&rfs4_servinst_lock);
757 
758 	NFS4_DEBUG(rfs4_servinst_debug, (CE_NOTE,
759 	    "rfs4_servinst_destroy_all: destroyed %d instances", n));
760 }
761 
762 /*
763  * Assign the current server instance to a client_t.
764  * Should be called with cp->dbe held.
765  */
766 void
767 rfs4_servinst_assign(rfs4_client_t *cp, rfs4_servinst_t *sip)
768 {
769 	ASSERT(rfs4_dbe_refcnt(cp->dbe) > 0);
770 
771 	NFS4_DEBUG(rfs4_servinst_debug, (CE_NOTE,
772 	    "rfs4_servinst_assign: client: %p, old: %p, new: %p", (void *)cp,
773 	    (void *)cp->server_instance, (void *)sip));
774 
775 	/*
776 	 * The lock ensures that if the current instance is in the process
777 	 * of changing, we will see the new one.
778 	 */
779 	mutex_enter(&rfs4_servinst_lock);
780 	cp->server_instance = sip;
781 	mutex_exit(&rfs4_servinst_lock);
782 }
783 
784 rfs4_servinst_t *
785 rfs4_servinst(rfs4_client_t *cp)
786 {
787 	ASSERT(rfs4_dbe_refcnt(cp->dbe) > 0);
788 
789 	return (cp->server_instance);
790 }
791 
792 /* ARGSUSED */
793 static void
794 nullfree(caddr_t resop)
795 {
796 }
797 
798 /*
799  * This is a fall-through for invalid or not implemented (yet) ops
800  */
801 /* ARGSUSED */
802 static void
803 rfs4_op_inval(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
804 	struct compound_state *cs)
805 {
806 	*cs->statusp = *((nfsstat4 *)&(resop)->nfs_resop4_u) = NFS4ERR_INVAL;
807 }
808 
809 /*
810  * Check if the security flavor, nfsnum, is in the flavor_list.
811  */
812 bool_t
813 in_flavor_list(int nfsnum, int *flavor_list, int count)
814 {
815 	int i;
816 
817 	for (i = 0; i < count; i++) {
818 		if (nfsnum == flavor_list[i])
819 			return (TRUE);
820 	}
821 	return (FALSE);
822 }
823 
824 /*
825  * Used by rfs4_op_secinfo to get the security information from the
826  * export structure associated with the component.
827  */
828 /* ARGSUSED */
829 static nfsstat4
830 do_rfs4_op_secinfo(struct compound_state *cs, char *nm, SECINFO4res *resp)
831 {
832 	int error, different_export = 0;
833 	vnode_t *dvp, *vp, *tvp;
834 	struct exportinfo *exi = NULL;
835 	fid_t fid;
836 	uint_t count, i;
837 	secinfo4 *resok_val;
838 	struct secinfo *secp;
839 	bool_t did_traverse;
840 	int dotdot, walk;
841 
842 	dvp = cs->vp;
843 	dotdot = (nm[0] == '.' && nm[1] == '.' && nm[2] == '\0');
844 
845 	/*
846 	 * If dotdotting, then need to check whether it's above the
847 	 * root of a filesystem, or above an export point.
848 	 */
849 	if (dotdot) {
850 
851 		/*
852 		 * If dotdotting at the root of a filesystem, then
853 		 * need to traverse back to the mounted-on filesystem
854 		 * and do the dotdot lookup there.
855 		 */
856 		if (cs->vp->v_flag & VROOT) {
857 
858 			/*
859 			 * If at the system root, then can
860 			 * go up no further.
861 			 */
862 			if (VN_CMP(dvp, rootdir))
863 				return (puterrno4(ENOENT));
864 
865 			/*
866 			 * Traverse back to the mounted-on filesystem
867 			 */
868 			dvp = untraverse(cs->vp);
869 
870 			/*
871 			 * Set the different_export flag so we remember
872 			 * to pick up a new exportinfo entry for
873 			 * this new filesystem.
874 			 */
875 			different_export = 1;
876 		} else {
877 
878 			/*
879 			 * If dotdotting above an export point then set
880 			 * the different_export to get new export info.
881 			 */
882 			different_export = nfs_exported(cs->exi, cs->vp);
883 		}
884 	}
885 
886 	/*
887 	 * Get the vnode for the component "nm".
888 	 */
889 	error = VOP_LOOKUP(dvp, nm, &vp, NULL, 0, NULL, cs->cr);
890 	if (error)
891 		return (puterrno4(error));
892 
893 	VN_SETPATH(rootdir, dvp, vp, nm, strlen(nm));
894 
895 	/*
896 	 * If the vnode is in a pseudo filesystem, or if the security flavor
897 	 * used in the request is valid but not an explicitly shared flavor,
898 	 * or the access bit indicates that this is a limited access,
899 	 * check whether this vnode is visible.
900 	 */
901 	if (!different_export &&
902 	    (PSEUDO(cs->exi) || ! is_exported_sec(cs->nfsflavor, cs->exi) ||
903 	    cs->access & CS_ACCESS_LIMITED)) {
904 		if (! nfs_visible(cs->exi, vp, &different_export)) {
905 			VN_RELE(vp);
906 			return (puterrno4(ENOENT));
907 		}
908 	}
909 
910 	/*
911 	 * If it's a mountpoint, then traverse it.
912 	 */
913 	if (vn_ismntpt(vp)) {
914 		tvp = vp;
915 		if ((error = traverse(&tvp)) != 0) {
916 			VN_RELE(vp);
917 			return (puterrno4(error));
918 		}
919 		/* remember that we had to traverse mountpoint */
920 		did_traverse = TRUE;
921 		vp = tvp;
922 		different_export = 1;
923 	} else if (vp->v_vfsp != dvp->v_vfsp) {
924 		/*
925 		 * If vp isn't a mountpoint and the vfs ptrs aren't the same,
926 		 * then vp is probably an LOFS object.  We don't need the
927 		 * realvp, we just need to know that we might have crossed
928 		 * a server fs boundary and need to call checkexport4.
929 		 * (LOFS lookup hides server fs mountpoints, and actually calls
930 		 * traverse)
931 		 */
932 		different_export = 1;
933 		did_traverse = FALSE;
934 	}
935 
936 	/*
937 	 * Get the export information for it.
938 	 */
939 	if (different_export) {
940 
941 		bzero(&fid, sizeof (fid));
942 		fid.fid_len = MAXFIDSZ;
943 		error = vop_fid_pseudo(vp, &fid);
944 		if (error) {
945 			VN_RELE(vp);
946 			return (puterrno4(error));
947 		}
948 
949 		if (dotdot)
950 			exi = nfs_vptoexi(NULL, vp, cs->cr, &walk, NULL, TRUE);
951 		else
952 			exi = checkexport4(&vp->v_vfsp->vfs_fsid, &fid, vp);
953 
954 		if (exi == NULL) {
955 			if (did_traverse == TRUE) {
956 				/*
957 				 * If this vnode is a mounted-on vnode,
958 				 * but the mounted-on file system is not
959 				 * exported, send back the secinfo for
960 				 * the exported node that the mounted-on
961 				 * vnode lives in.
962 				 */
963 				exi = cs->exi;
964 			} else {
965 				VN_RELE(vp);
966 				return (puterrno4(EACCES));
967 			}
968 		}
969 	} else {
970 		exi = cs->exi;
971 	}
972 	ASSERT(exi != NULL);
973 
974 
975 	/*
976 	 * Create the secinfo result based on the security information
977 	 * from the exportinfo structure (exi).
978 	 *
979 	 * Return all flavors for a pseudo node.
980 	 * For a real export node, return the flavor that the client
981 	 * has access with.
982 	 */
983 	ASSERT(RW_LOCK_HELD(&exported_lock));
984 	if (PSEUDO(exi)) {
985 		count = exi->exi_export.ex_seccnt; /* total sec count */
986 		resok_val = kmem_alloc(count * sizeof (secinfo4), KM_SLEEP);
987 		secp = exi->exi_export.ex_secinfo;
988 
989 		for (i = 0; i < count; i++) {
990 		    resok_val[i].flavor = secp[i].s_secinfo.sc_rpcnum;
991 		    if (resok_val[i].flavor == RPCSEC_GSS) {
992 			rpcsec_gss_info *info;
993 
994 			info = &resok_val[i].flavor_info;
995 			info->qop = secp[i].s_secinfo.sc_qop;
996 			info->service =
997 				(rpc_gss_svc_t)secp[i].s_secinfo.sc_service;
998 
999 			/* get oid opaque data */
1000 			info->oid.sec_oid4_len =
1001 				secp[i].s_secinfo.sc_gss_mech_type->length;
1002 			info->oid.sec_oid4_val =
1003 				kmem_alloc(
1004 				    secp[i].s_secinfo.sc_gss_mech_type->length,
1005 				    KM_SLEEP);
1006 			bcopy(secp[i].s_secinfo.sc_gss_mech_type->elements,
1007 				info->oid.sec_oid4_val, info->oid.sec_oid4_len);
1008 		    }
1009 		}
1010 		resp->SECINFO4resok_len = count;
1011 		resp->SECINFO4resok_val = resok_val;
1012 	} else {
1013 		int ret_cnt = 0, k = 0;
1014 		int *flavor_list;
1015 
1016 		count = exi->exi_export.ex_seccnt; /* total sec count */
1017 		secp = exi->exi_export.ex_secinfo;
1018 
1019 		flavor_list = kmem_alloc(count * sizeof (int), KM_SLEEP);
1020 		/* find out which flavors to return */
1021 		for (i = 0; i < count; i ++) {
1022 			int access, flavor, perm;
1023 
1024 			flavor = secp[i].s_secinfo.sc_nfsnum;
1025 			perm = secp[i].s_flags;
1026 
1027 			access = nfsauth4_secinfo_access(exi, cs->req,
1028 						flavor, perm);
1029 
1030 			if (! (access & NFSAUTH_DENIED) &&
1031 			    ! (access & NFSAUTH_WRONGSEC)) {
1032 				flavor_list[ret_cnt] = flavor;
1033 				ret_cnt++;
1034 			}
1035 		}
1036 
1037 		/* Create the returning SECINFO value */
1038 		resok_val = kmem_alloc(ret_cnt * sizeof (secinfo4), KM_SLEEP);
1039 
1040 		for (i = 0; i < count; i++) {
1041 		/* If the flavor is in the flavor list, fill in resok_val. */
1042 		    if (in_flavor_list(secp[i].s_secinfo.sc_nfsnum,
1043 						flavor_list, ret_cnt)) {
1044 			resok_val[k].flavor = secp[i].s_secinfo.sc_rpcnum;
1045 			if (resok_val[k].flavor == RPCSEC_GSS) {
1046 			    rpcsec_gss_info *info;
1047 
1048 			    info = &resok_val[k].flavor_info;
1049 			    info->qop = secp[i].s_secinfo.sc_qop;
1050 			    info->service =
1051 				(rpc_gss_svc_t)secp[i].s_secinfo.sc_service;
1052 
1053 			    /* get oid opaque data */
1054 			    info->oid.sec_oid4_len =
1055 				secp[i].s_secinfo.sc_gss_mech_type->length;
1056 			    info->oid.sec_oid4_val =
1057 				kmem_alloc(
1058 				    secp[i].s_secinfo.sc_gss_mech_type->length,
1059 				    KM_SLEEP);
1060 			    bcopy(secp[i].s_secinfo.sc_gss_mech_type->elements,
1061 				info->oid.sec_oid4_val, info->oid.sec_oid4_len);
1062 			}
1063 			k++;
1064 		    }
1065 		    if (k >= ret_cnt)
1066 			break;
1067 		}
1068 		resp->SECINFO4resok_len = ret_cnt;
1069 		resp->SECINFO4resok_val = resok_val;
1070 		kmem_free(flavor_list, count * sizeof (int));
1071 	}
1072 
1073 	VN_RELE(vp);
1074 	return (NFS4_OK);
1075 }
1076 
1077 /*
1078  * SECINFO (Operation 33): Obtain required security information on
1079  * the component name in the format of (security-mechanism-oid, qop, service)
1080  * triplets.
1081  */
1082 /* ARGSUSED */
1083 static void
1084 rfs4_op_secinfo(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1085 	struct compound_state *cs)
1086 {
1087 	SECINFO4res *resp = &resop->nfs_resop4_u.opsecinfo;
1088 	utf8string *utfnm = &argop->nfs_argop4_u.opsecinfo.name;
1089 	uint_t len;
1090 	char *nm;
1091 
1092 	/*
1093 	 * Current file handle (cfh) should have been set before getting
1094 	 * into this function. If not, return error.
1095 	 */
1096 	if (cs->vp == NULL) {
1097 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
1098 		return;
1099 	}
1100 
1101 	if (cs->vp->v_type != VDIR) {
1102 		*cs->statusp = resp->status = NFS4ERR_NOTDIR;
1103 		return;
1104 	}
1105 
1106 	/*
1107 	 * Verify the component name. If failed, error out, but
1108 	 * do not error out if the component name is a "..".
1109 	 * SECINFO will return its parents secinfo data for SECINFO "..".
1110 	 */
1111 	if (!utf8_dir_verify(utfnm)) {
1112 		if (utfnm->utf8string_len != 2 ||
1113 				utfnm->utf8string_val[0] != '.' ||
1114 				utfnm->utf8string_val[1] != '.') {
1115 			*cs->statusp = resp->status = NFS4ERR_INVAL;
1116 			return;
1117 		}
1118 	}
1119 
1120 	nm = utf8_to_str(utfnm, &len, NULL);
1121 	if (nm == NULL) {
1122 		*cs->statusp = resp->status = NFS4ERR_INVAL;
1123 		return;
1124 	}
1125 
1126 	if (len > MAXNAMELEN) {
1127 		*cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
1128 		kmem_free(nm, len);
1129 		return;
1130 	}
1131 
1132 	*cs->statusp = resp->status = do_rfs4_op_secinfo(cs, nm, resp);
1133 
1134 	kmem_free(nm, len);
1135 }
1136 
1137 /*
1138  * Free SECINFO result.
1139  */
1140 /* ARGSUSED */
1141 static void
1142 rfs4_op_secinfo_free(nfs_resop4 *resop)
1143 {
1144 	SECINFO4res *resp = &resop->nfs_resop4_u.opsecinfo;
1145 	int count, i;
1146 	secinfo4 *resok_val;
1147 
1148 	/* If this is not an Ok result, nothing to free. */
1149 	if (resp->status != NFS4_OK) {
1150 		return;
1151 	}
1152 
1153 	count = resp->SECINFO4resok_len;
1154 	resok_val = resp->SECINFO4resok_val;
1155 
1156 	for (i = 0; i < count; i++) {
1157 	    if (resok_val[i].flavor == RPCSEC_GSS) {
1158 		rpcsec_gss_info *info;
1159 
1160 		info = &resok_val[i].flavor_info;
1161 		kmem_free(info->oid.sec_oid4_val, info->oid.sec_oid4_len);
1162 	    }
1163 	}
1164 	kmem_free(resok_val, count * sizeof (secinfo4));
1165 	resp->SECINFO4resok_len = 0;
1166 	resp->SECINFO4resok_val = NULL;
1167 }
1168 
1169 /* ARGSUSED */
1170 static void
1171 rfs4_op_access(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1172 	struct compound_state *cs)
1173 {
1174 	ACCESS4args *args = &argop->nfs_argop4_u.opaccess;
1175 	ACCESS4res *resp = &resop->nfs_resop4_u.opaccess;
1176 	int error;
1177 	vnode_t *vp;
1178 	struct vattr va;
1179 	int checkwriteperm;
1180 	cred_t *cr = cs->cr;
1181 
1182 #if 0	/* XXX allow access even if !cs->access. Eventually only pseudo fs */
1183 	if (cs->access == CS_ACCESS_DENIED) {
1184 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
1185 		return;
1186 	}
1187 #endif
1188 	if (cs->vp == NULL) {
1189 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
1190 		return;
1191 	}
1192 
1193 	ASSERT(cr != NULL);
1194 
1195 	vp = cs->vp;
1196 
1197 	/*
1198 	 * If the file system is exported read only, it is not appropriate
1199 	 * to check write permissions for regular files and directories.
1200 	 * Special files are interpreted by the client, so the underlying
1201 	 * permissions are sent back to the client for interpretation.
1202 	 */
1203 	if (rdonly4(cs->exi, cs->vp, req) &&
1204 		(vp->v_type == VREG || vp->v_type == VDIR))
1205 		checkwriteperm = 0;
1206 	else
1207 		checkwriteperm = 1;
1208 
1209 	/*
1210 	 * XXX
1211 	 * We need the mode so that we can correctly determine access
1212 	 * permissions relative to a mandatory lock file.  Access to
1213 	 * mandatory lock files is denied on the server, so it might
1214 	 * as well be reflected to the server during the open.
1215 	 */
1216 	va.va_mask = AT_MODE;
1217 	error = VOP_GETATTR(vp, &va, 0, cr);
1218 	if (error) {
1219 		*cs->statusp = resp->status = puterrno4(error);
1220 		return;
1221 	}
1222 
1223 	resp->access = 0;
1224 	resp->supported = 0;
1225 
1226 	if (args->access & ACCESS4_READ) {
1227 		error = VOP_ACCESS(vp, VREAD, 0, cr);
1228 		if (!error && !MANDLOCK(vp, va.va_mode))
1229 			resp->access |= ACCESS4_READ;
1230 		resp->supported |= ACCESS4_READ;
1231 	}
1232 	if ((args->access & ACCESS4_LOOKUP) && vp->v_type == VDIR) {
1233 		error = VOP_ACCESS(vp, VEXEC, 0, cr);
1234 		if (!error)
1235 			resp->access |= ACCESS4_LOOKUP;
1236 		resp->supported |= ACCESS4_LOOKUP;
1237 	}
1238 	if (checkwriteperm &&
1239 	    (args->access & (ACCESS4_MODIFY|ACCESS4_EXTEND))) {
1240 		error = VOP_ACCESS(vp, VWRITE, 0, cr);
1241 		if (!error && !MANDLOCK(vp, va.va_mode))
1242 			resp->access |=
1243 			    (args->access & (ACCESS4_MODIFY|ACCESS4_EXTEND));
1244 		resp->supported |= (ACCESS4_MODIFY|ACCESS4_EXTEND);
1245 	}
1246 
1247 	if (checkwriteperm &&
1248 	    (args->access & ACCESS4_DELETE) && vp->v_type == VDIR) {
1249 		error = VOP_ACCESS(vp, VWRITE, 0, cr);
1250 		if (!error)
1251 			resp->access |= ACCESS4_DELETE;
1252 		resp->supported |= ACCESS4_DELETE;
1253 	}
1254 	if (args->access & ACCESS4_EXECUTE && vp->v_type != VDIR) {
1255 		error = VOP_ACCESS(vp, VEXEC, 0, cr);
1256 		if (!error && !MANDLOCK(vp, va.va_mode))
1257 			resp->access |= ACCESS4_EXECUTE;
1258 		resp->supported |= ACCESS4_EXECUTE;
1259 	}
1260 
1261 	*cs->statusp = resp->status = NFS4_OK;
1262 }
1263 
1264 /* ARGSUSED */
1265 static void
1266 rfs4_op_commit(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1267 	struct compound_state *cs)
1268 {
1269 	COMMIT4args *args = &argop->nfs_argop4_u.opcommit;
1270 	COMMIT4res *resp = &resop->nfs_resop4_u.opcommit;
1271 	int error;
1272 	vnode_t *vp = cs->vp;
1273 	cred_t *cr = cs->cr;
1274 	vattr_t va;
1275 
1276 	if (vp == NULL) {
1277 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
1278 		return;
1279 	}
1280 	if (cs->access == CS_ACCESS_DENIED) {
1281 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
1282 		return;
1283 	}
1284 
1285 	if (args->offset + args->count < args->offset) {
1286 		*cs->statusp = resp->status = NFS4ERR_INVAL;
1287 		return;
1288 	}
1289 
1290 	va.va_mask = AT_UID;
1291 	error = VOP_GETATTR(vp, &va, 0, cr);
1292 
1293 	/*
1294 	 * If we can't get the attributes, then we can't do the
1295 	 * right access checking.  So, we'll fail the request.
1296 	 */
1297 	if (error) {
1298 		*cs->statusp = resp->status = puterrno4(error);
1299 		return;
1300 	}
1301 	if (rdonly4(cs->exi, cs->vp, req)) {
1302 		*cs->statusp = resp->status = NFS4ERR_ROFS;
1303 		return;
1304 	}
1305 
1306 	if (vp->v_type != VREG) {
1307 		if (vp->v_type == VDIR)
1308 			resp->status = NFS4ERR_ISDIR;
1309 		else
1310 			resp->status = NFS4ERR_INVAL;
1311 		*cs->statusp = resp->status;
1312 		return;
1313 	}
1314 
1315 	if (crgetuid(cr) != va.va_uid &&
1316 	    (error = VOP_ACCESS(vp, VWRITE, 0, cs->cr))) {
1317 		*cs->statusp = resp->status = puterrno4(error);
1318 		return;
1319 	}
1320 
1321 	error = VOP_PUTPAGE(vp, args->offset, args->count, 0, cr);
1322 	if (!error)
1323 		error = VOP_FSYNC(vp, FNODSYNC, cr);
1324 
1325 	if (error) {
1326 		*cs->statusp = resp->status = puterrno4(error);
1327 		return;
1328 	}
1329 
1330 	*cs->statusp = resp->status = NFS4_OK;
1331 	resp->writeverf = Write4verf;
1332 }
1333 
1334 /*
1335  * rfs4_op_mknod is called from rfs4_op_create after all initial verification
1336  * was completed. It does the nfsv4 create for special files.
1337  */
1338 /* ARGSUSED */
1339 static vnode_t *
1340 do_rfs4_op_mknod(CREATE4args *args, CREATE4res *resp, struct svc_req *req,
1341 	struct compound_state *cs, vattr_t *vap, char *nm)
1342 {
1343 	int error;
1344 	cred_t *cr = cs->cr;
1345 	vnode_t *dvp = cs->vp;
1346 	vnode_t *vp = NULL;
1347 	int mode;
1348 	enum vcexcl excl;
1349 
1350 	switch (args->type) {
1351 	case NF4CHR:
1352 	case NF4BLK:
1353 		if (secpolicy_sys_devices(cr) != 0) {
1354 			*cs->statusp = resp->status = NFS4ERR_PERM;
1355 			return (NULL);
1356 		}
1357 		if (args->type == NF4CHR)
1358 			vap->va_type = VCHR;
1359 		else
1360 			vap->va_type = VBLK;
1361 		vap->va_rdev = makedevice(args->ftype4_u.devdata.specdata1,
1362 					args->ftype4_u.devdata.specdata2);
1363 		vap->va_mask |= AT_RDEV;
1364 		break;
1365 	case NF4SOCK:
1366 		vap->va_type = VSOCK;
1367 		break;
1368 	case NF4FIFO:
1369 		vap->va_type = VFIFO;
1370 		break;
1371 	default:
1372 		*cs->statusp = resp->status = NFS4ERR_BADTYPE;
1373 		return (NULL);
1374 	}
1375 
1376 	/*
1377 	 * Must specify the mode.
1378 	 */
1379 	if (!(vap->va_mask & AT_MODE)) {
1380 		*cs->statusp = resp->status = NFS4ERR_INVAL;
1381 		return (NULL);
1382 	}
1383 
1384 	excl = EXCL;
1385 
1386 	mode = 0;
1387 
1388 	error = VOP_CREATE(dvp, nm, vap, excl, mode, &vp, cr, 0);
1389 	if (error) {
1390 		*cs->statusp = resp->status = puterrno4(error);
1391 		return (NULL);
1392 	}
1393 	return (vp);
1394 }
1395 
1396 /*
1397  * nfsv4 create is used to create non-regular files. For regular files,
1398  * use nfsv4 open.
1399  */
1400 /* ARGSUSED */
1401 static void
1402 rfs4_op_create(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1403 	struct compound_state *cs)
1404 {
1405 	CREATE4args *args = &argop->nfs_argop4_u.opcreate;
1406 	CREATE4res *resp = &resop->nfs_resop4_u.opcreate;
1407 	int error;
1408 	struct vattr bva, iva, iva2, ava, *vap;
1409 	cred_t *cr = cs->cr;
1410 	vnode_t *dvp = cs->vp;
1411 	vnode_t *vp = NULL;
1412 	char *nm, *lnm;
1413 	uint_t len, llen;
1414 	int syncval = 0;
1415 	struct nfs4_svgetit_arg sarg;
1416 	struct nfs4_ntov_table ntov;
1417 	struct statvfs64 sb;
1418 	nfsstat4 status;
1419 
1420 	resp->attrset = 0;
1421 
1422 	if (dvp == NULL) {
1423 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
1424 		return;
1425 	}
1426 
1427 	/*
1428 	 * If there is an unshared filesystem mounted on this vnode,
1429 	 * do not allow to create an object in this directory.
1430 	 */
1431 	if (vn_ismntpt(dvp)) {
1432 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
1433 		return;
1434 	}
1435 
1436 	/* Verify that type is correct */
1437 	switch (args->type) {
1438 	case NF4LNK:
1439 	case NF4BLK:
1440 	case NF4CHR:
1441 	case NF4SOCK:
1442 	case NF4FIFO:
1443 	case NF4DIR:
1444 		break;
1445 	default:
1446 		*cs->statusp = resp->status = NFS4ERR_BADTYPE;
1447 		return;
1448 	};
1449 
1450 	if (cs->access == CS_ACCESS_DENIED) {
1451 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
1452 		return;
1453 	}
1454 	if (dvp->v_type != VDIR) {
1455 		*cs->statusp = resp->status = NFS4ERR_NOTDIR;
1456 		return;
1457 	}
1458 	if (!utf8_dir_verify(&args->objname)) {
1459 		*cs->statusp = resp->status = NFS4ERR_INVAL;
1460 		return;
1461 	}
1462 
1463 	if (rdonly4(cs->exi, cs->vp, req)) {
1464 		*cs->statusp = resp->status = NFS4ERR_ROFS;
1465 		return;
1466 	}
1467 
1468 	/*
1469 	 * Name of newly created object
1470 	 */
1471 	nm = utf8_to_fn(&args->objname, &len, NULL);
1472 	if (nm == NULL) {
1473 		*cs->statusp = resp->status = NFS4ERR_INVAL;
1474 		return;
1475 	}
1476 
1477 	if (len > MAXNAMELEN) {
1478 		*cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
1479 		kmem_free(nm, len);
1480 		return;
1481 	}
1482 
1483 	resp->attrset = 0;
1484 
1485 	sarg.sbp = &sb;
1486 	nfs4_ntov_table_init(&ntov);
1487 
1488 	status = do_rfs4_set_attrs(&resp->attrset,
1489 					&args->createattrs, cs, &sarg,
1490 					&ntov, NFS4ATTR_SETIT);
1491 
1492 	if (sarg.vap->va_mask == 0 && status == NFS4_OK)
1493 		status = NFS4ERR_INVAL;
1494 
1495 	if (status != NFS4_OK) {
1496 		*cs->statusp = resp->status = status;
1497 		kmem_free(nm, len);
1498 		nfs4_ntov_table_free(&ntov, &sarg);
1499 		resp->attrset = 0;
1500 		return;
1501 	}
1502 
1503 	/* Get "before" change value */
1504 	bva.va_mask = AT_CTIME|AT_SEQ;
1505 	error = VOP_GETATTR(dvp, &bva, 0, cr);
1506 	if (error) {
1507 		*cs->statusp = resp->status = puterrno4(error);
1508 		kmem_free(nm, len);
1509 		nfs4_ntov_table_free(&ntov, &sarg);
1510 		resp->attrset = 0;
1511 		return;
1512 	}
1513 	NFS4_SET_FATTR4_CHANGE(resp->cinfo.before, bva.va_ctime)
1514 
1515 	vap = sarg.vap;
1516 
1517 	/*
1518 	 * Set default initial values for attributes when not specified
1519 	 * in createattrs.
1520 	 */
1521 	if ((vap->va_mask & AT_UID) == 0) {
1522 		vap->va_uid = crgetuid(cr);
1523 		vap->va_mask |= AT_UID;
1524 	}
1525 	if ((vap->va_mask & AT_GID) == 0) {
1526 		vap->va_gid = crgetgid(cr);
1527 		vap->va_mask |= AT_GID;
1528 	}
1529 
1530 	vap->va_mask |= AT_TYPE;
1531 	switch (args->type) {
1532 	case NF4DIR:
1533 		vap->va_type = VDIR;
1534 		if ((vap->va_mask & AT_MODE) == 0) {
1535 			vap->va_mode = 0700;	/* default: owner rwx only */
1536 			vap->va_mask |= AT_MODE;
1537 		}
1538 		error = VOP_MKDIR(dvp, nm, vap, &vp, cr);
1539 		if (error)
1540 			break;
1541 
1542 		/*
1543 		 * Get the initial "after" sequence number, if it fails,
1544 		 * set to zero
1545 		 */
1546 		iva.va_mask = AT_SEQ;
1547 		if (VOP_GETATTR(dvp, &iva, 0, cs->cr))
1548 			iva.va_seq = 0;
1549 		break;
1550 	case NF4LNK:
1551 		vap->va_type = VLNK;
1552 		if ((vap->va_mask & AT_MODE) == 0) {
1553 			vap->va_mode = 0700;	/* default: owner rwx only */
1554 			vap->va_mask |= AT_MODE;
1555 		}
1556 
1557 		/*
1558 		 * symlink names must be treated as data
1559 		 */
1560 		lnm = utf8_to_str(&args->ftype4_u.linkdata, &llen, NULL);
1561 
1562 		if (lnm == NULL) {
1563 			*cs->statusp = resp->status = NFS4ERR_INVAL;
1564 			kmem_free(nm, len);
1565 			nfs4_ntov_table_free(&ntov, &sarg);
1566 			resp->attrset = 0;
1567 			return;
1568 		}
1569 
1570 		if (llen > MAXPATHLEN) {
1571 			*cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
1572 			kmem_free(nm, len);
1573 			kmem_free(lnm, llen);
1574 			nfs4_ntov_table_free(&ntov, &sarg);
1575 			resp->attrset = 0;
1576 			return;
1577 		}
1578 
1579 		error = VOP_SYMLINK(dvp, nm, vap, lnm, cr);
1580 		if (lnm != NULL)
1581 			kmem_free(lnm, llen);
1582 		if (error)
1583 			break;
1584 
1585 		/*
1586 		 * Get the initial "after" sequence number, if it fails,
1587 		 * set to zero
1588 		 */
1589 		iva.va_mask = AT_SEQ;
1590 		if (VOP_GETATTR(dvp, &iva, 0, cs->cr))
1591 			iva.va_seq = 0;
1592 
1593 		error = VOP_LOOKUP(dvp, nm, &vp, NULL, 0, NULL, cr);
1594 		if (error)
1595 			break;
1596 
1597 		VN_SETPATH(rootdir, dvp, vp, nm, strlen(nm));
1598 
1599 		/*
1600 		 * va_seq is not safe over VOP calls, check it again
1601 		 * if it has changed zero out iva to force atomic = FALSE.
1602 		 */
1603 		iva2.va_mask = AT_SEQ;
1604 		if (VOP_GETATTR(dvp, &iva2, 0, cs->cr) ||
1605 						iva2.va_seq != iva.va_seq)
1606 			iva.va_seq = 0;
1607 		break;
1608 	default:
1609 		/*
1610 		 * probably a special file.
1611 		 */
1612 		if ((vap->va_mask & AT_MODE) == 0) {
1613 			vap->va_mode = 0600;	/* default: owner rw only */
1614 			vap->va_mask |= AT_MODE;
1615 		}
1616 		syncval = FNODSYNC;
1617 		/*
1618 		 * We know this will only generate one VOP call
1619 		 */
1620 		vp = do_rfs4_op_mknod(args, resp, req, cs, vap, nm);
1621 
1622 		if (vp == NULL) {
1623 			kmem_free(nm, len);
1624 			nfs4_ntov_table_free(&ntov, &sarg);
1625 			resp->attrset = 0;
1626 			return;
1627 		}
1628 
1629 		/*
1630 		 * Get the initial "after" sequence number, if it fails,
1631 		 * set to zero
1632 		 */
1633 		iva.va_mask = AT_SEQ;
1634 		if (VOP_GETATTR(dvp, &iva, 0, cs->cr))
1635 			iva.va_seq = 0;
1636 
1637 		break;
1638 	}
1639 	kmem_free(nm, len);
1640 
1641 	if (error) {
1642 		*cs->statusp = resp->status = puterrno4(error);
1643 	}
1644 
1645 	/*
1646 	 * Force modified data and metadata out to stable storage.
1647 	 */
1648 	(void) VOP_FSYNC(dvp, 0, cr);
1649 
1650 	if (resp->status != NFS4_OK) {
1651 		if (vp != NULL)
1652 			VN_RELE(vp);
1653 		nfs4_ntov_table_free(&ntov, &sarg);
1654 		resp->attrset = 0;
1655 		return;
1656 	}
1657 
1658 	/*
1659 	 * Finish setup of cinfo response, "before" value already set.
1660 	 * Get "after" change value, if it fails, simply return the
1661 	 * before value.
1662 	 */
1663 	ava.va_mask = AT_CTIME|AT_SEQ;
1664 	if (VOP_GETATTR(dvp, &ava, 0, cr)) {
1665 		ava.va_ctime = bva.va_ctime;
1666 		ava.va_seq = 0;
1667 	}
1668 	NFS4_SET_FATTR4_CHANGE(resp->cinfo.after, ava.va_ctime);
1669 
1670 	/*
1671 	 * True verification that object was created with correct
1672 	 * attrs is impossible.  The attrs could have been changed
1673 	 * immediately after object creation.  If attributes did
1674 	 * not verify, the only recourse for the server is to
1675 	 * destroy the object.  Maybe if some attrs (like gid)
1676 	 * are set incorrectly, the object should be destroyed;
1677 	 * however, seems bad as a default policy.  Do we really
1678 	 * want to destroy an object over one of the times not
1679 	 * verifying correctly?  For these reasons, the server
1680 	 * currently sets bits in attrset for createattrs
1681 	 * that were set; however, no verification is done.
1682 	 *
1683 	 * vmask_to_nmask accounts for vattr bits set on create
1684 	 *	[do_rfs4_set_attrs() only sets resp bits for
1685 	 *	 non-vattr/vfs bits.]
1686 	 * Mask off any bits set by default so as not to return
1687 	 * more attrset bits than were requested in createattrs
1688 	 */
1689 	nfs4_vmask_to_nmask(sarg.vap->va_mask, &resp->attrset);
1690 	resp->attrset &= args->createattrs.attrmask;
1691 	nfs4_ntov_table_free(&ntov, &sarg);
1692 
1693 	error = makefh4(&cs->fh, vp, cs->exi);
1694 	if (error) {
1695 		*cs->statusp = resp->status = puterrno4(error);
1696 	}
1697 
1698 	/*
1699 	 * The cinfo.atomic = TRUE only if we got no errors, we have
1700 	 * non-zero va_seq's, and it has incremented by exactly one
1701 	 * during the creation and it didn't change during the VOP_LOOKUP
1702 	 * or VOP_FSYNC.
1703 	 */
1704 	if (!error && bva.va_seq && iva.va_seq && ava.va_seq &&
1705 			iva.va_seq == (bva.va_seq + 1) &&
1706 			iva.va_seq == ava.va_seq)
1707 		resp->cinfo.atomic = TRUE;
1708 	else
1709 		resp->cinfo.atomic = FALSE;
1710 
1711 	(void) VOP_FSYNC(vp, syncval, cr);
1712 
1713 	if (resp->status != NFS4_OK) {
1714 		VN_RELE(vp);
1715 		return;
1716 	}
1717 	if (cs->vp)
1718 		VN_RELE(cs->vp);
1719 
1720 	cs->vp = vp;
1721 	*cs->statusp = resp->status = NFS4_OK;
1722 }
1723 
1724 
1725 /*ARGSUSED*/
1726 static void
1727 rfs4_op_delegreturn(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1728 	struct compound_state *cs)
1729 {
1730 	DELEGRETURN4args *args = &argop->nfs_argop4_u.opdelegreturn;
1731 	DELEGRETURN4res *resp = &resop->nfs_resop4_u.opdelegreturn;
1732 	rfs4_deleg_state_t *dsp;
1733 	nfsstat4 status;
1734 
1735 	status = rfs4_get_deleg_state(&args->deleg_stateid, &dsp);
1736 	resp->status = *cs->statusp = status;
1737 	if (status != NFS4_OK)
1738 		return;
1739 
1740 	/* Ensure specified filehandle matches */
1741 	if (cs->vp != dsp->finfo->vp) {
1742 		resp->status = *cs->statusp = NFS4ERR_BAD_STATEID;
1743 	} else
1744 		rfs4_return_deleg(dsp, FALSE);
1745 
1746 	rfs4_update_lease(dsp->client);
1747 
1748 	rfs4_deleg_state_rele(dsp);
1749 }
1750 
1751 /*
1752  * Check to see if a given "flavor" is an explicitly shared flavor.
1753  * The assumption of this routine is the "flavor" is already a valid
1754  * flavor in the secinfo list of "exi".
1755  *
1756  *	e.g.
1757  *		# share -o sec=flavor1 /export
1758  *		# share -o sec=flavor2 /export/home
1759  *
1760  *		flavor2 is not an explicitly shared flavor for /export,
1761  *		however it is in the secinfo list for /export thru the
1762  *		server namespace setup.
1763  */
1764 int
1765 is_exported_sec(int flavor, struct exportinfo *exi)
1766 {
1767 	int	i;
1768 	struct secinfo *sp;
1769 
1770 	sp = exi->exi_export.ex_secinfo;
1771 	for (i = 0; i < exi->exi_export.ex_seccnt; i++) {
1772 		if (flavor == sp[i].s_secinfo.sc_nfsnum ||
1773 		    sp[i].s_secinfo.sc_nfsnum == AUTH_NONE) {
1774 			return (SEC_REF_EXPORTED(&sp[i]));
1775 		}
1776 	}
1777 
1778 	/* Should not reach this point based on the assumption */
1779 	return (0);
1780 }
1781 
1782 /*
1783  * Check if the security flavor used in the request matches what is
1784  * required at the export point or at the root pseudo node (exi_root).
1785  *
1786  * returns 1 if there's a match or if exported with AUTH_NONE; 0 otherwise.
1787  *
1788  */
1789 static int
1790 secinfo_match_or_authnone(struct compound_state *cs)
1791 {
1792 	int	i;
1793 	struct secinfo *sp;
1794 
1795 	/*
1796 	 * Check cs->nfsflavor (from the request) against
1797 	 * the current export data in cs->exi.
1798 	 */
1799 	sp = cs->exi->exi_export.ex_secinfo;
1800 	for (i = 0; i < cs->exi->exi_export.ex_seccnt; i++) {
1801 		if (cs->nfsflavor == sp[i].s_secinfo.sc_nfsnum ||
1802 		    sp[i].s_secinfo.sc_nfsnum == AUTH_NONE)
1803 			return (1);
1804 	}
1805 
1806 	return (0);
1807 }
1808 
1809 /*
1810  * Check the access authority for the client and return the correct error.
1811  */
1812 nfsstat4
1813 call_checkauth4(struct compound_state *cs, struct svc_req *req)
1814 {
1815 	int	authres;
1816 
1817 	/*
1818 	 * First, check if the security flavor used in the request
1819 	 * are among the flavors set in the server namespace.
1820 	 */
1821 	if (!secinfo_match_or_authnone(cs)) {
1822 		*cs->statusp = NFS4ERR_WRONGSEC;
1823 		return (*cs->statusp);
1824 	}
1825 
1826 	authres = checkauth4(cs, req);
1827 
1828 	if (authres > 0) {
1829 		*cs->statusp = NFS4_OK;
1830 		if (! (cs->access & CS_ACCESS_LIMITED))
1831 			cs->access = CS_ACCESS_OK;
1832 	} else if (authres == 0) {
1833 		*cs->statusp = NFS4ERR_ACCESS;
1834 	} else if (authres == -2) {
1835 		*cs->statusp = NFS4ERR_WRONGSEC;
1836 	} else {
1837 		*cs->statusp = NFS4ERR_DELAY;
1838 	}
1839 	return (*cs->statusp);
1840 }
1841 
1842 /*
1843  * bitmap4_to_attrmask is called by getattr and readdir.
1844  * It sets up the vattr mask and determines whether vfsstat call is needed
1845  * based on the input bitmap.
1846  * Returns nfsv4 status.
1847  */
1848 static nfsstat4
1849 bitmap4_to_attrmask(bitmap4 breq, struct nfs4_svgetit_arg *sargp)
1850 {
1851 	int i;
1852 	uint_t	va_mask;
1853 	struct statvfs64 *sbp = sargp->sbp;
1854 
1855 	sargp->sbp = NULL;
1856 	sargp->flag = 0;
1857 	sargp->rdattr_error = NFS4_OK;
1858 	sargp->mntdfid_set = FALSE;
1859 	if (sargp->cs->vp)
1860 		sargp->xattr = get_fh4_flag(&sargp->cs->fh,
1861 					    FH4_ATTRDIR | FH4_NAMEDATTR);
1862 	else
1863 		sargp->xattr = 0;
1864 
1865 	/*
1866 	 * Set rdattr_error_req to true if return error per
1867 	 * failed entry rather than fail the readdir.
1868 	 */
1869 	if (breq & FATTR4_RDATTR_ERROR_MASK)
1870 		sargp->rdattr_error_req = 1;
1871 	else
1872 		sargp->rdattr_error_req = 0;
1873 
1874 	/*
1875 	 * generate the va_mask
1876 	 * Handle the easy cases first
1877 	 */
1878 	switch (breq) {
1879 	case NFS4_NTOV_ATTR_MASK:
1880 		sargp->vap->va_mask = NFS4_NTOV_ATTR_AT_MASK;
1881 		return (NFS4_OK);
1882 
1883 	case NFS4_FS_ATTR_MASK:
1884 		sargp->vap->va_mask = NFS4_FS_ATTR_AT_MASK;
1885 		sargp->sbp = sbp;
1886 		return (NFS4_OK);
1887 
1888 	case NFS4_NTOV_ATTR_CACHE_MASK:
1889 		sargp->vap->va_mask = NFS4_NTOV_ATTR_CACHE_AT_MASK;
1890 		return (NFS4_OK);
1891 
1892 	case FATTR4_LEASE_TIME_MASK:
1893 		sargp->vap->va_mask = 0;
1894 		return (NFS4_OK);
1895 
1896 	default:
1897 		va_mask = 0;
1898 		for (i = 0; i < nfs4_ntov_map_size; i++) {
1899 			if ((breq & nfs4_ntov_map[i].fbit) &&
1900 							nfs4_ntov_map[i].vbit)
1901 				va_mask |= nfs4_ntov_map[i].vbit;
1902 		}
1903 
1904 		/*
1905 		 * Check is vfsstat is needed
1906 		 */
1907 		if (breq & NFS4_FS_ATTR_MASK)
1908 			sargp->sbp = sbp;
1909 
1910 		sargp->vap->va_mask = va_mask;
1911 		return (NFS4_OK);
1912 	}
1913 	/* NOTREACHED */
1914 }
1915 
1916 /*
1917  * bitmap4_get_sysattrs is called by getattr and readdir.
1918  * It calls both VOP_GETATTR and VFS_STATVFS calls to get the attrs.
1919  * Returns nfsv4 status.
1920  */
1921 static nfsstat4
1922 bitmap4_get_sysattrs(struct nfs4_svgetit_arg *sargp)
1923 {
1924 	int error;
1925 	struct compound_state *cs = sargp->cs;
1926 	vnode_t *vp = cs->vp;
1927 
1928 	if (sargp->sbp != NULL) {
1929 		if (error = VFS_STATVFS(vp->v_vfsp, sargp->sbp)) {
1930 			sargp->sbp = NULL;	/* to identify error */
1931 			return (puterrno4(error));
1932 		}
1933 	}
1934 
1935 	return (rfs4_vop_getattr(vp, sargp->vap, 0, cs->cr));
1936 }
1937 
1938 static void
1939 nfs4_ntov_table_init(struct nfs4_ntov_table *ntovp)
1940 {
1941 	ntovp->na = kmem_zalloc(sizeof (union nfs4_attr_u) * nfs4_ntov_map_size,
1942 			KM_SLEEP);
1943 	ntovp->attrcnt = 0;
1944 	ntovp->vfsstat = FALSE;
1945 }
1946 
1947 static void
1948 nfs4_ntov_table_free(struct nfs4_ntov_table *ntovp,
1949 	struct nfs4_svgetit_arg *sargp)
1950 {
1951 	int i;
1952 	union nfs4_attr_u *na;
1953 	uint8_t *amap;
1954 
1955 	/*
1956 	 * XXX Should do the same checks for whether the bit is set
1957 	 */
1958 	for (i = 0, na = ntovp->na, amap = ntovp->amap;
1959 		i < ntovp->attrcnt; i++, na++, amap++) {
1960 		(void) (*nfs4_ntov_map[*amap].sv_getit)(
1961 			NFS4ATTR_FREEIT, sargp, na);
1962 	}
1963 	if ((sargp->op == NFS4ATTR_SETIT) || (sargp->op == NFS4ATTR_VERIT)) {
1964 		/*
1965 		 * xdr_free for getattr will be done later
1966 		 */
1967 		for (i = 0, na = ntovp->na, amap = ntovp->amap;
1968 			i < ntovp->attrcnt; i++, na++, amap++) {
1969 			xdr_free(nfs4_ntov_map[*amap].xfunc, (caddr_t)na);
1970 		}
1971 	}
1972 	kmem_free(ntovp->na, sizeof (union nfs4_attr_u) * nfs4_ntov_map_size);
1973 }
1974 
1975 /*
1976  * do_rfs4_op_getattr gets the system attrs and converts into fattr4.
1977  */
1978 static nfsstat4
1979 do_rfs4_op_getattr(bitmap4 breq, fattr4 *fattrp,
1980 	struct nfs4_svgetit_arg *sargp)
1981 {
1982 	int error = 0;
1983 	int i, k;
1984 	struct nfs4_ntov_table ntov;
1985 	XDR xdr;
1986 	ulong_t xdr_size;
1987 	char *xdr_attrs;
1988 	nfsstat4 status = NFS4_OK;
1989 	nfsstat4 prev_rdattr_error = sargp->rdattr_error;
1990 	union nfs4_attr_u *na;
1991 	uint8_t *amap;
1992 
1993 	sargp->op = NFS4ATTR_GETIT;
1994 	sargp->flag = 0;
1995 
1996 	fattrp->attrmask = 0;
1997 	/* if no bits requested, then return empty fattr4 */
1998 	if (breq == 0) {
1999 		fattrp->attrlist4_len = 0;
2000 		fattrp->attrlist4 = NULL;
2001 		return (NFS4_OK);
2002 	}
2003 
2004 	/*
2005 	 * return NFS4ERR_INVAL when client requests write-only attrs
2006 	 */
2007 	if (breq & (FATTR4_TIME_ACCESS_SET_MASK | FATTR4_TIME_MODIFY_SET_MASK))
2008 		return (NFS4ERR_INVAL);
2009 
2010 	nfs4_ntov_table_init(&ntov);
2011 	na = ntov.na;
2012 	amap = ntov.amap;
2013 
2014 	/*
2015 	 * Now loop to get or verify the attrs
2016 	 */
2017 	for (i = 0; i < nfs4_ntov_map_size; i++) {
2018 		if (breq & nfs4_ntov_map[i].fbit) {
2019 			if ((*nfs4_ntov_map[i].sv_getit)(
2020 				    NFS4ATTR_SUPPORTED, sargp, NULL) == 0) {
2021 
2022 				error = (*nfs4_ntov_map[i].sv_getit)(
2023 						NFS4ATTR_GETIT, sargp, na);
2024 
2025 				/*
2026 				 * Possible error values:
2027 				 * >0 if sv_getit failed to
2028 				 * get the attr; 0 if succeeded;
2029 				 * <0 if rdattr_error and the
2030 				 * attribute cannot be returned.
2031 				 */
2032 				if (error && !(sargp->rdattr_error_req))
2033 					goto done;
2034 				/*
2035 				 * If error then just for entry
2036 				 */
2037 				if (error == 0) {
2038 					fattrp->attrmask |=
2039 						nfs4_ntov_map[i].fbit;
2040 					*amap++ =
2041 						(uint8_t)nfs4_ntov_map[i].nval;
2042 					na++;
2043 					(ntov.attrcnt)++;
2044 				} else if ((error > 0) &&
2045 					(sargp->rdattr_error == NFS4_OK)) {
2046 					sargp->rdattr_error = puterrno4(error);
2047 				}
2048 				error = 0;
2049 			}
2050 		}
2051 	}
2052 
2053 	/*
2054 	 * If rdattr_error was set after the return value for it was assigned,
2055 	 * update it.
2056 	 */
2057 	if (prev_rdattr_error != sargp->rdattr_error) {
2058 		na = ntov.na;
2059 		amap = ntov.amap;
2060 		for (i = 0; i < ntov.attrcnt; i++, na++, amap++) {
2061 			k = *amap;
2062 			if (k < FATTR4_RDATTR_ERROR) {
2063 				continue;
2064 			}
2065 			if ((k == FATTR4_RDATTR_ERROR) &&
2066 			    ((*nfs4_ntov_map[k].sv_getit)(
2067 				NFS4ATTR_SUPPORTED, sargp, NULL) == 0)) {
2068 
2069 				(void) (*nfs4_ntov_map[k].sv_getit)(
2070 						NFS4ATTR_GETIT, sargp, na);
2071 			}
2072 			break;
2073 		}
2074 	}
2075 
2076 	xdr_size = 0;
2077 	na = ntov.na;
2078 	amap = ntov.amap;
2079 	for (i = 0; i < ntov.attrcnt; i++, na++, amap++) {
2080 		xdr_size += xdr_sizeof(nfs4_ntov_map[*amap].xfunc, na);
2081 	}
2082 
2083 	fattrp->attrlist4_len = xdr_size;
2084 	if (xdr_size) {
2085 		/* freed by rfs4_op_getattr_free() */
2086 		fattrp->attrlist4 = xdr_attrs = kmem_zalloc(xdr_size, KM_SLEEP);
2087 
2088 		xdrmem_create(&xdr, xdr_attrs, xdr_size, XDR_ENCODE);
2089 
2090 		na = ntov.na;
2091 		amap = ntov.amap;
2092 		for (i = 0; i < ntov.attrcnt; i++, na++, amap++) {
2093 			if (!(*nfs4_ntov_map[*amap].xfunc)(&xdr, na)) {
2094 				cmn_err(CE_WARN, "do_rfs4_op_getattr: xdr "
2095 					"encode of attribute %d failed\n",
2096 					*amap);
2097 				status = NFS4ERR_SERVERFAULT;
2098 				break;
2099 			}
2100 		}
2101 		/* xdrmem_destroy(&xdrs); */	/* NO-OP */
2102 	} else {
2103 		fattrp->attrlist4 = NULL;
2104 	}
2105 done:
2106 
2107 	nfs4_ntov_table_free(&ntov, sargp);
2108 
2109 	if (error != 0)
2110 		status = puterrno4(error);
2111 
2112 	return (status);
2113 }
2114 
2115 /* ARGSUSED */
2116 static void
2117 rfs4_op_getattr(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2118 	struct compound_state *cs)
2119 {
2120 	GETATTR4args *args = &argop->nfs_argop4_u.opgetattr;
2121 	GETATTR4res *resp = &resop->nfs_resop4_u.opgetattr;
2122 	struct nfs4_svgetit_arg sarg;
2123 	struct statvfs64 sb;
2124 	nfsstat4 status;
2125 
2126 	if (cs->vp == NULL) {
2127 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2128 		return;
2129 	}
2130 
2131 	if (cs->access == CS_ACCESS_DENIED) {
2132 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
2133 		return;
2134 	}
2135 
2136 	sarg.sbp = &sb;
2137 	sarg.cs = cs;
2138 
2139 	status = bitmap4_to_attrmask(args->attr_request, &sarg);
2140 	if (status == NFS4_OK) {
2141 		status = bitmap4_get_sysattrs(&sarg);
2142 		if (status == NFS4_OK)
2143 			status = do_rfs4_op_getattr(args->attr_request,
2144 				&resp->obj_attributes, &sarg);
2145 	}
2146 	*cs->statusp = resp->status = status;
2147 }
2148 
2149 static void
2150 rfs4_op_getattr_free(nfs_resop4 *resop)
2151 {
2152 	GETATTR4res *resp = &resop->nfs_resop4_u.opgetattr;
2153 
2154 	nfs4_fattr4_free(&resp->obj_attributes);
2155 }
2156 
2157 /* ARGSUSED */
2158 static void
2159 rfs4_op_getfh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2160 	struct compound_state *cs)
2161 {
2162 	GETFH4res *resp = &resop->nfs_resop4_u.opgetfh;
2163 
2164 	if (cs->vp == NULL) {
2165 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2166 		return;
2167 	}
2168 	if (cs->access == CS_ACCESS_DENIED) {
2169 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
2170 		return;
2171 	}
2172 
2173 	resp->object.nfs_fh4_val =
2174 		kmem_alloc(cs->fh.nfs_fh4_len, KM_SLEEP);
2175 	nfs_fh4_copy(&cs->fh, &resp->object);
2176 	*cs->statusp = resp->status = NFS4_OK;
2177 }
2178 
2179 static void
2180 rfs4_op_getfh_free(nfs_resop4 *resop)
2181 {
2182 	GETFH4res *resp = &resop->nfs_resop4_u.opgetfh;
2183 
2184 	if (resp->status == NFS4_OK &&
2185 	    resp->object.nfs_fh4_val != NULL) {
2186 		kmem_free(resp->object.nfs_fh4_val, resp->object.nfs_fh4_len);
2187 		resp->object.nfs_fh4_val = NULL;
2188 		resp->object.nfs_fh4_len = 0;
2189 	}
2190 }
2191 
2192 /*
2193  * illegal: args: void
2194  *	    res : status (NFS4ERR_OP_ILLEGAL)
2195  */
2196 /* ARGSUSED */
2197 static void
2198 rfs4_op_illegal(nfs_argop4 *argop, nfs_resop4 *resop,
2199 	struct svc_req *req, struct compound_state *cs)
2200 {
2201 	ILLEGAL4res *resp = &resop->nfs_resop4_u.opillegal;
2202 
2203 	resop->resop = OP_ILLEGAL;
2204 	*cs->statusp = resp->status = NFS4ERR_OP_ILLEGAL;
2205 }
2206 
2207 /*
2208  * link: args: SAVED_FH: file, CURRENT_FH: target directory
2209  *	 res: status. If success - CURRENT_FH unchanged, return change_info
2210  */
2211 /* ARGSUSED */
2212 static void
2213 rfs4_op_link(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2214 	struct compound_state *cs)
2215 {
2216 	LINK4args *args = &argop->nfs_argop4_u.oplink;
2217 	LINK4res *resp = &resop->nfs_resop4_u.oplink;
2218 	int error;
2219 	vnode_t *vp;
2220 	vnode_t *dvp;
2221 	struct vattr bdva, idva, adva;
2222 	char *nm;
2223 	uint_t  len;
2224 
2225 	/* SAVED_FH: source object */
2226 	vp = cs->saved_vp;
2227 	if (vp == NULL) {
2228 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2229 		return;
2230 	}
2231 
2232 	/* CURRENT_FH: target directory */
2233 	dvp = cs->vp;
2234 	if (dvp == NULL) {
2235 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2236 		return;
2237 	}
2238 
2239 	/*
2240 	 * If there is a non-shared filesystem mounted on this vnode,
2241 	 * do not allow to link any file in this directory.
2242 	 */
2243 	if (vn_ismntpt(dvp)) {
2244 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
2245 		return;
2246 	}
2247 
2248 	if (cs->access == CS_ACCESS_DENIED) {
2249 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
2250 		return;
2251 	}
2252 
2253 	/* Check source object's type validity */
2254 	if (vp->v_type == VDIR) {
2255 		*cs->statusp = resp->status = NFS4ERR_ISDIR;
2256 		return;
2257 	}
2258 
2259 	/* Check target directory's type */
2260 	if (dvp->v_type != VDIR) {
2261 		*cs->statusp = resp->status = NFS4ERR_NOTDIR;
2262 		return;
2263 	}
2264 
2265 	if (cs->saved_exi != cs->exi) {
2266 		*cs->statusp = resp->status = NFS4ERR_XDEV;
2267 		return;
2268 	}
2269 
2270 	if (!utf8_dir_verify(&args->newname)) {
2271 		*cs->statusp = resp->status = NFS4ERR_INVAL;
2272 		return;
2273 	}
2274 
2275 	nm = utf8_to_fn(&args->newname, &len, NULL);
2276 	if (nm == NULL) {
2277 		*cs->statusp = resp->status = NFS4ERR_INVAL;
2278 		return;
2279 	}
2280 
2281 	if (len > MAXNAMELEN) {
2282 		*cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
2283 		kmem_free(nm, len);
2284 		return;
2285 	}
2286 
2287 	if (rdonly4(cs->exi, cs->vp, req)) {
2288 		*cs->statusp = resp->status = NFS4ERR_ROFS;
2289 		kmem_free(nm, len);
2290 		return;
2291 	}
2292 
2293 	/* Get "before" change value */
2294 	bdva.va_mask = AT_CTIME|AT_SEQ;
2295 	error = VOP_GETATTR(dvp, &bdva, 0, cs->cr);
2296 	if (error) {
2297 		*cs->statusp = resp->status = puterrno4(error);
2298 		kmem_free(nm, len);
2299 		return;
2300 	}
2301 
2302 	NFS4_SET_FATTR4_CHANGE(resp->cinfo.before, bdva.va_ctime)
2303 
2304 	error = VOP_LINK(dvp, vp, nm, cs->cr);
2305 
2306 	kmem_free(nm, len);
2307 
2308 	/*
2309 	 * Get the initial "after" sequence number, if it fails, set to zero
2310 	 */
2311 	idva.va_mask = AT_SEQ;
2312 	if (VOP_GETATTR(dvp, &idva, 0, cs->cr))
2313 		idva.va_seq = 0;
2314 
2315 	/*
2316 	 * Force modified data and metadata out to stable storage.
2317 	 */
2318 	(void) VOP_FSYNC(vp, FNODSYNC, cs->cr);
2319 	(void) VOP_FSYNC(dvp, 0, cs->cr);
2320 
2321 	if (error) {
2322 		*cs->statusp = resp->status = puterrno4(error);
2323 		return;
2324 	}
2325 
2326 	/*
2327 	 * Get "after" change value, if it fails, simply return the
2328 	 * before value.
2329 	 */
2330 	adva.va_mask = AT_CTIME|AT_SEQ;
2331 	if (VOP_GETATTR(dvp, &adva, 0, cs->cr)) {
2332 		adva.va_ctime = bdva.va_ctime;
2333 		adva.va_seq = 0;
2334 	}
2335 
2336 	NFS4_SET_FATTR4_CHANGE(resp->cinfo.after, adva.va_ctime)
2337 
2338 	/*
2339 	 * The cinfo.atomic = TRUE only if we have
2340 	 * non-zero va_seq's, and it has incremented by exactly one
2341 	 * during the VOP_LINK and it didn't change during the VOP_FSYNC.
2342 	 */
2343 	if (bdva.va_seq && idva.va_seq && adva.va_seq &&
2344 			idva.va_seq == (bdva.va_seq + 1) &&
2345 			idva.va_seq == adva.va_seq)
2346 		resp->cinfo.atomic = TRUE;
2347 	else
2348 		resp->cinfo.atomic = FALSE;
2349 
2350 	*cs->statusp = resp->status = NFS4_OK;
2351 }
2352 
2353 /*
2354  * Used by rfs4_op_lookup and rfs4_op_lookupp to do the actual work.
2355  */
2356 
2357 /* ARGSUSED */
2358 static nfsstat4
2359 do_rfs4_op_lookup(char *nm, uint_t buflen, struct svc_req *req,
2360 	struct compound_state *cs)
2361 {
2362 	int error;
2363 	int different_export = 0;
2364 	vnode_t *vp, *tvp, *pre_tvp = NULL, *oldvp = NULL;
2365 	struct exportinfo *exi = NULL, *pre_exi = NULL;
2366 	nfsstat4 stat;
2367 	fid_t fid;
2368 	int attrdir, dotdot, walk;
2369 	bool_t is_newvp = FALSE;
2370 
2371 	if (cs->vp->v_flag & V_XATTRDIR) {
2372 		attrdir = 1;
2373 		ASSERT(get_fh4_flag(&cs->fh, FH4_ATTRDIR));
2374 	} else {
2375 		attrdir = 0;
2376 		ASSERT(! get_fh4_flag(&cs->fh, FH4_ATTRDIR));
2377 	}
2378 
2379 	dotdot = (nm[0] == '.' && nm[1] == '.' && nm[2] == '\0');
2380 
2381 	/*
2382 	 * If dotdotting, then need to check whether it's
2383 	 * above the root of a filesystem, or above an
2384 	 * export point.
2385 	 */
2386 	if (dotdot) {
2387 
2388 		/*
2389 		 * If dotdotting at the root of a filesystem, then
2390 		 * need to traverse back to the mounted-on filesystem
2391 		 * and do the dotdot lookup there.
2392 		 */
2393 		if (cs->vp->v_flag & VROOT) {
2394 
2395 			/*
2396 			 * If at the system root, then can
2397 			 * go up no further.
2398 			 */
2399 			if (VN_CMP(cs->vp, rootdir))
2400 				return (puterrno4(ENOENT));
2401 
2402 			/*
2403 			 * Traverse back to the mounted-on filesystem
2404 			 */
2405 			cs->vp = untraverse(cs->vp);
2406 
2407 			/*
2408 			 * Set the different_export flag so we remember
2409 			 * to pick up a new exportinfo entry for
2410 			 * this new filesystem.
2411 			 */
2412 			different_export = 1;
2413 		} else {
2414 
2415 			/*
2416 			 * If dotdotting above an export point then set
2417 			 * the different_export to get new export info.
2418 			 */
2419 			different_export = nfs_exported(cs->exi, cs->vp);
2420 		}
2421 	}
2422 
2423 	error = VOP_LOOKUP(cs->vp, nm, &vp, NULL, 0, NULL, cs->cr);
2424 	if (error)
2425 		return (puterrno4(error));
2426 
2427 	VN_SETPATH(rootdir, cs->vp, vp, nm, strlen(nm));
2428 
2429 	/*
2430 	 * If the vnode is in a pseudo filesystem, check whether it is visible.
2431 	 *
2432 	 * XXX if the vnode is a symlink and it is not visible in
2433 	 * a pseudo filesystem, return ENOENT (not following symlink).
2434 	 * V4 client can not mount such symlink. This is a regression
2435 	 * from V2/V3.
2436 	 *
2437 	 * In the same exported filesystem, if the security flavor used
2438 	 * is not an explicitly shared flavor, limit the view to the visible
2439 	 * list entries only. This is not a WRONGSEC case because it's already
2440 	 * checked via PUTROOTFH/PUTPUBFH or PUTFH.
2441 	 */
2442 	if (!different_export &&
2443 	    (PSEUDO(cs->exi) || ! is_exported_sec(cs->nfsflavor, cs->exi) ||
2444 	    cs->access & CS_ACCESS_LIMITED)) {
2445 		if (! nfs_visible(cs->exi, vp, &different_export)) {
2446 			VN_RELE(vp);
2447 			return (puterrno4(ENOENT));
2448 		}
2449 	}
2450 
2451 	/*
2452 	 * If it's a mountpoint, then traverse it.
2453 	 */
2454 	if (vn_ismntpt(vp)) {
2455 		pre_exi = cs->exi;	/* save pre-traversed exportinfo */
2456 		pre_tvp = vp;		/* save pre-traversed vnode	*/
2457 
2458 		/*
2459 		 * hold pre_tvp to counteract rele by traverse.  We will
2460 		 * need pre_tvp below if checkexport4 fails
2461 		 */
2462 		VN_HOLD(pre_tvp);
2463 		tvp = vp;
2464 		if ((error = traverse(&tvp)) != 0) {
2465 			VN_RELE(vp);
2466 			VN_RELE(pre_tvp);
2467 			return (puterrno4(error));
2468 		}
2469 		vp = tvp;
2470 		different_export = 1;
2471 	} else if (vp->v_vfsp != cs->vp->v_vfsp) {
2472 		/*
2473 		 * The vfsp comparison is to handle the case where
2474 		 * a LOFS mount is shared.  lo_lookup traverses mount points,
2475 		 * and NFS is unaware of local fs transistions because
2476 		 * v_vfsmountedhere isn't set.  For this special LOFS case,
2477 		 * the dir and the obj returned by lookup will have different
2478 		 * vfs ptrs.
2479 		 */
2480 		different_export = 1;
2481 	}
2482 
2483 	if (different_export) {
2484 
2485 		bzero(&fid, sizeof (fid));
2486 		fid.fid_len = MAXFIDSZ;
2487 		error = vop_fid_pseudo(vp, &fid);
2488 		if (error) {
2489 			VN_RELE(vp);
2490 			if (pre_tvp)
2491 				VN_RELE(pre_tvp);
2492 			return (puterrno4(error));
2493 		}
2494 
2495 		if (dotdot)
2496 			exi = nfs_vptoexi(NULL, vp, cs->cr, &walk, NULL, TRUE);
2497 		else
2498 			exi = checkexport4(&vp->v_vfsp->vfs_fsid, &fid, vp);
2499 
2500 		if (exi == NULL) {
2501 			if (pre_tvp) {
2502 				/*
2503 				 * If this vnode is a mounted-on vnode,
2504 				 * but the mounted-on file system is not
2505 				 * exported, send back the filehandle for
2506 				 * the mounted-on vnode, not the root of
2507 				 * the mounted-on file system.
2508 				 */
2509 				VN_RELE(vp);
2510 				vp = pre_tvp;
2511 				exi = pre_exi;
2512 			} else {
2513 				VN_RELE(vp);
2514 				return (puterrno4(EACCES));
2515 			}
2516 		} else if (pre_tvp) {
2517 			/* we're done with pre_tvp now. release extra hold */
2518 			VN_RELE(pre_tvp);
2519 		}
2520 
2521 		cs->exi = exi;
2522 
2523 		/*
2524 		 * Now we do a checkauth4. The reason is that
2525 		 * this client/user may not have access to the new
2526 		 * exported file system, and if he does,
2527 		 * the client/user may be mapped to a different uid.
2528 		 *
2529 		 * We start with a new cr, because the checkauth4 done
2530 		 * in the PUT*FH operation over wrote the cred's uid,
2531 		 * gid, etc, and we want the real thing before calling
2532 		 * checkauth4()
2533 		 */
2534 		crfree(cs->cr);
2535 		cs->cr = crdup(cs->basecr);
2536 
2537 		if (cs->vp)
2538 			oldvp = cs->vp;
2539 		cs->vp = vp;
2540 		is_newvp = TRUE;
2541 
2542 		stat = call_checkauth4(cs, req);
2543 		if (stat != NFS4_OK) {
2544 			VN_RELE(cs->vp);
2545 			cs->vp = oldvp;
2546 			return (stat);
2547 		}
2548 	}
2549 
2550 	error = makefh4(&cs->fh, vp, cs->exi);
2551 
2552 	if (error) {
2553 		if (is_newvp) {
2554 			VN_RELE(cs->vp);
2555 			cs->vp = oldvp;
2556 		} else
2557 			VN_RELE(vp);
2558 		return (puterrno4(error));
2559 	}
2560 
2561 	if (!is_newvp) {
2562 		if (cs->vp)
2563 			VN_RELE(cs->vp);
2564 		cs->vp = vp;
2565 	} else if (oldvp)
2566 		VN_RELE(oldvp);
2567 
2568 	/*
2569 	 * if did lookup on attrdir and didn't lookup .., set named
2570 	 * attr fh flag
2571 	 */
2572 	if (attrdir && ! dotdot)
2573 		set_fh4_flag(&cs->fh, FH4_NAMEDATTR);
2574 
2575 	/* Assume false for now, open proc will set this */
2576 	cs->mandlock = FALSE;
2577 
2578 	return (NFS4_OK);
2579 }
2580 
2581 /* ARGSUSED */
2582 static void
2583 rfs4_op_lookup(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2584 	struct compound_state *cs)
2585 {
2586 	LOOKUP4args *args = &argop->nfs_argop4_u.oplookup;
2587 	LOOKUP4res *resp = &resop->nfs_resop4_u.oplookup;
2588 	char *nm;
2589 	uint_t len;
2590 
2591 	if (cs->vp == NULL) {
2592 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2593 		return;
2594 	}
2595 
2596 	if (cs->vp->v_type == VLNK) {
2597 		*cs->statusp = resp->status = NFS4ERR_SYMLINK;
2598 		return;
2599 	}
2600 
2601 	if (cs->vp->v_type != VDIR) {
2602 		*cs->statusp = resp->status = NFS4ERR_NOTDIR;
2603 		return;
2604 	}
2605 
2606 	if (!utf8_dir_verify(&args->objname)) {
2607 		*cs->statusp = resp->status = NFS4ERR_INVAL;
2608 		return;
2609 	}
2610 
2611 	nm = utf8_to_str(&args->objname, &len, NULL);
2612 	if (nm == NULL) {
2613 		*cs->statusp = resp->status = NFS4ERR_INVAL;
2614 		return;
2615 	}
2616 
2617 	if (len > MAXNAMELEN) {
2618 		*cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
2619 		kmem_free(nm, len);
2620 		return;
2621 	}
2622 
2623 	*cs->statusp = resp->status = do_rfs4_op_lookup(nm, len, req, cs);
2624 
2625 	kmem_free(nm, len);
2626 }
2627 
2628 /* ARGSUSED */
2629 static void
2630 rfs4_op_lookupp(nfs_argop4 *args, nfs_resop4 *resop, struct svc_req *req,
2631 	struct compound_state *cs)
2632 {
2633 	LOOKUPP4res *resp = &resop->nfs_resop4_u.oplookupp;
2634 
2635 	if (cs->vp == NULL) {
2636 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2637 		return;
2638 	}
2639 
2640 	if (cs->vp->v_type != VDIR) {
2641 		*cs->statusp = resp->status = NFS4ERR_NOTDIR;
2642 		return;
2643 	}
2644 
2645 	*cs->statusp = resp->status = do_rfs4_op_lookup("..", 3, req, cs);
2646 
2647 	/*
2648 	 * From NFSV4 Specification, LOOKUPP should not check for
2649 	 * NFS4ERR_WRONGSEC. Retrun NFS4_OK instead.
2650 	 */
2651 	if (resp->status == NFS4ERR_WRONGSEC) {
2652 		*cs->statusp = resp->status = NFS4_OK;
2653 	}
2654 }
2655 
2656 
2657 /*ARGSUSED2*/
2658 static void
2659 rfs4_op_openattr(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2660 	struct compound_state *cs)
2661 {
2662 	OPENATTR4args	*args = &argop->nfs_argop4_u.opopenattr;
2663 	OPENATTR4res	*resp = &resop->nfs_resop4_u.opopenattr;
2664 	vnode_t		*avp = NULL;
2665 	int		lookup_flags = LOOKUP_XATTR, error;
2666 	int		exp_ro = 0;
2667 
2668 	if (cs->vp == NULL) {
2669 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2670 		return;
2671 	}
2672 
2673 	/*
2674 	 * Make a couple of checks made by copen()
2675 	 *
2676 	 * Check to make sure underlying fs supports xattrs.  This
2677 	 * is required because solaris filesystem implementations
2678 	 * (UFS/TMPFS) don't enforce the noxattr mount option
2679 	 * in VOP_LOOKUP(LOOKUP_XATTR).  If fs doesn't support this
2680 	 * pathconf cmd or if fs supports cmd but doesn't claim
2681 	 * support for xattr, return NOTSUPP.  It would be better
2682 	 * to use VOP_PATHCONF( _PC_XATTR_ENABLED) for this; however,
2683 	 * that cmd is not available to VOP_PATHCONF interface
2684 	 * (it's only implemented inside pathconf syscall)...
2685 	 *
2686 	 * Verify permission to put attributes on files (access
2687 	 * checks from copen).
2688 	 */
2689 
2690 	if ((cs->vp->v_vfsp->vfs_flag & VFS_XATTR) == 0) {
2691 		error = ENOTSUP;
2692 		goto error_out;
2693 	}
2694 
2695 	if ((VOP_ACCESS(cs->vp, VREAD, 0, cs->cr) != 0) &&
2696 	    (VOP_ACCESS(cs->vp, VWRITE, 0, cs->cr) != 0) &&
2697 	    (VOP_ACCESS(cs->vp, VEXEC, 0, cs->cr) != 0)) {
2698 		error = EACCES;
2699 		goto error_out;
2700 	}
2701 
2702 	/*
2703 	 * The CREATE_XATTR_DIR VOP flag cannot be specified if
2704 	 * the file system is exported read-only -- regardless of
2705 	 * createdir flag.  Otherwise the attrdir would be created
2706 	 * (assuming server fs isn't mounted readonly locally).  If
2707 	 * VOP_LOOKUP returns ENOENT in this case, the error will
2708 	 * be translated into EROFS.  ENOSYS is mapped to ENOTSUP
2709 	 * because specfs has no VOP_LOOKUP op, so the macro would
2710 	 * return ENOSYS.  EINVAL is returned by all (current)
2711 	 * Solaris file system implementations when any of their
2712 	 * restrictions are violated (xattr(dir) can't have xattrdir).
2713 	 * Returning NOTSUPP is more appropriate in this case
2714 	 * because the object will never be able to have an attrdir.
2715 	 */
2716 	if (args->createdir && ! (exp_ro = rdonly4(cs->exi, cs->vp, req)))
2717 		lookup_flags |= CREATE_XATTR_DIR;
2718 
2719 	error = VOP_LOOKUP(cs->vp, "", &avp, NULL, lookup_flags, NULL, cs->cr);
2720 
2721 	if (error) {
2722 		if (error == ENOENT && args->createdir && exp_ro)
2723 			error = EROFS;
2724 		else if (error == EINVAL || error == ENOSYS)
2725 			error = ENOTSUP;
2726 		goto error_out;
2727 	}
2728 
2729 	ASSERT(avp->v_flag & V_XATTRDIR);
2730 
2731 	error = makefh4(&cs->fh, avp, cs->exi);
2732 
2733 	if (error) {
2734 		VN_RELE(avp);
2735 		goto error_out;
2736 	}
2737 
2738 	VN_RELE(cs->vp);
2739 	cs->vp = avp;
2740 
2741 	/*
2742 	 * There is no requirement for an attrdir fh flag
2743 	 * because the attrdir has a vnode flag to distinguish
2744 	 * it from regular (non-xattr) directories.  The
2745 	 * FH4_ATTRDIR flag is set for future sanity checks.
2746 	 */
2747 	set_fh4_flag(&cs->fh, FH4_ATTRDIR);
2748 	*cs->statusp = resp->status = NFS4_OK;
2749 	return;
2750 
2751 error_out:
2752 
2753 	*cs->statusp = resp->status = puterrno4(error);
2754 }
2755 
2756 static int
2757 do_io(int direction, vnode_t *vp, struct uio *uio, int ioflag, cred_t *cred)
2758 {
2759 	int error;
2760 	int i;
2761 	clock_t delaytime;
2762 	caller_context_t ct;
2763 
2764 	delaytime = MSEC_TO_TICK_ROUNDUP(rfs4_lock_delay);
2765 
2766 	/*
2767 	 * Don't block on mandatory locks. If this routine returns
2768 	 * EAGAIN, the caller should return NFS4ERR_LOCKED.
2769 	 */
2770 	uio->uio_fmode = FNONBLOCK;
2771 
2772 	ct.cc_sysid = 0;
2773 	ct.cc_pid = 0;
2774 	ct.cc_caller_id = nfs4_srv_caller_id;
2775 
2776 	for (i = 0; i < rfs4_maxlock_tries; i++) {
2777 
2778 
2779 		if (direction == FREAD) {
2780 			(void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, &ct);
2781 			error = VOP_READ(vp, uio, ioflag, cred, &ct);
2782 			VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
2783 		} else {
2784 			(void) VOP_RWLOCK(vp, V_WRITELOCK_TRUE, &ct);
2785 			error = VOP_WRITE(vp, uio, ioflag, cred, &ct);
2786 			VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, &ct);
2787 		}
2788 
2789 		if (error != EAGAIN)
2790 			break;
2791 
2792 		if (i < rfs4_maxlock_tries - 1) {
2793 			delay(delaytime);
2794 			delaytime *= 2;
2795 		}
2796 	}
2797 
2798 	return (error);
2799 }
2800 
2801 /* ARGSUSED */
2802 static void
2803 rfs4_op_read(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2804 	struct compound_state *cs)
2805 {
2806 	READ4args *args = &argop->nfs_argop4_u.opread;
2807 	READ4res *resp = &resop->nfs_resop4_u.opread;
2808 	int error;
2809 	int verror;
2810 	vnode_t *vp;
2811 	struct vattr va;
2812 	struct iovec iov;
2813 	struct uio uio;
2814 	u_offset_t offset;
2815 	bool_t *deleg = &cs->deleg;
2816 	nfsstat4 stat;
2817 	int in_crit = 0;
2818 	mblk_t *mp;
2819 	int alloc_err = 0;
2820 
2821 	vp = cs->vp;
2822 	if (vp == NULL) {
2823 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2824 		return;
2825 	}
2826 	if (cs->access == CS_ACCESS_DENIED) {
2827 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
2828 		return;
2829 	}
2830 
2831 	/*
2832 	 * Enter the critical region before calling VOP_RWLOCK
2833 	 * to avoid a deadlock with write requests.
2834 	 */
2835 	if (nbl_need_check(vp)) {
2836 		nbl_start_crit(vp, RW_READER);
2837 		in_crit = 1;
2838 		if (nbl_conflict(vp, NBL_READ, args->offset, args->count, 0)) {
2839 			*cs->statusp = resp->status = NFS4ERR_LOCKED;
2840 			goto out;
2841 		}
2842 	}
2843 
2844 	if ((stat = rfs4_check_stateid(FREAD, vp, &args->stateid, FALSE,
2845 					deleg, TRUE)) != NFS4_OK) {
2846 		*cs->statusp = resp->status = stat;
2847 		goto out;
2848 	}
2849 
2850 	va.va_mask = AT_MODE|AT_SIZE|AT_UID;
2851 	verror = VOP_GETATTR(vp, &va, 0, cs->cr);
2852 
2853 	/*
2854 	 * If we can't get the attributes, then we can't do the
2855 	 * right access checking.  So, we'll fail the request.
2856 	 */
2857 	if (verror) {
2858 		*cs->statusp = resp->status = puterrno4(verror);
2859 		goto out;
2860 	}
2861 
2862 	if (vp->v_type != VREG) {
2863 		*cs->statusp = resp->status =
2864 			((vp->v_type == VDIR) ? NFS4ERR_ISDIR : NFS4ERR_INVAL);
2865 		goto out;
2866 	}
2867 
2868 	if (crgetuid(cs->cr) != va.va_uid &&
2869 	    (error = VOP_ACCESS(vp, VREAD, 0, cs->cr)) &&
2870 	    (error = VOP_ACCESS(vp, VEXEC, 0, cs->cr))) {
2871 		*cs->statusp = resp->status = puterrno4(error);
2872 		goto out;
2873 	}
2874 
2875 	if (MANDLOCK(vp, va.va_mode)) { /* XXX - V4 supports mand locking */
2876 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
2877 		goto out;
2878 	}
2879 
2880 	offset = args->offset;
2881 	if (offset >= va.va_size) {
2882 		*cs->statusp = resp->status = NFS4_OK;
2883 		resp->eof = TRUE;
2884 		resp->data_len = 0;
2885 		resp->data_val = NULL;
2886 		resp->mblk = NULL;
2887 		*cs->statusp = resp->status = NFS4_OK;
2888 		goto out;
2889 	}
2890 
2891 	if (args->count == 0) {
2892 		*cs->statusp = resp->status = NFS4_OK;
2893 		resp->eof = FALSE;
2894 		resp->data_len = 0;
2895 		resp->data_val = NULL;
2896 		resp->mblk = NULL;
2897 		goto out;
2898 	}
2899 
2900 	/*
2901 	 * Do not allocate memory more than maximum allowed
2902 	 * transfer size
2903 	 */
2904 	if (args->count > rfs4_tsize(req))
2905 		args->count = rfs4_tsize(req);
2906 
2907 	/*
2908 	 * mp will contain the data to be sent out in the read reply.
2909 	 * It will be freed after the reply has been sent.
2910 	 * Let's roundup the data to a BYTES_PER_XDR_UNIT multiple,
2911 	 * so that the call to xdrmblk_putmblk() never fails.
2912 	 * If the first alloc of the requested size fails, then
2913 	 * decrease the size to something more reasonable and wait
2914 	 * for the allocation to occur.
2915 	 */
2916 	mp = allocb(RNDUP(args->count), BPRI_MED);
2917 	if (mp == NULL) {
2918 		if (args->count > MAXBSIZE)
2919 			args->count = MAXBSIZE;
2920 		mp = allocb_wait(RNDUP(args->count), BPRI_MED,
2921 				STR_NOSIG, &alloc_err);
2922 	}
2923 	ASSERT(mp != NULL);
2924 	ASSERT(alloc_err == 0);
2925 
2926 	iov.iov_base = (caddr_t)mp->b_datap->db_base;
2927 	iov.iov_len = args->count;
2928 	uio.uio_iov = &iov;
2929 	uio.uio_iovcnt = 1;
2930 	uio.uio_segflg = UIO_SYSSPACE;
2931 	uio.uio_extflg = UIO_COPY_CACHED;
2932 	uio.uio_loffset = args->offset;
2933 	uio.uio_resid = args->count;
2934 
2935 	error = do_io(FREAD, vp, &uio, 0, cs->cr);
2936 
2937 	va.va_mask = AT_SIZE;
2938 	verror = VOP_GETATTR(vp, &va, 0, cs->cr);
2939 
2940 	if (error) {
2941 		freeb(mp);
2942 		*cs->statusp = resp->status = puterrno4(error);
2943 		goto out;
2944 	}
2945 
2946 	*cs->statusp = resp->status = NFS4_OK;
2947 
2948 	ASSERT(uio.uio_resid >= 0);
2949 	resp->data_len = args->count - uio.uio_resid;
2950 	resp->data_val = (char *)mp->b_datap->db_base;
2951 	resp->mblk = mp;
2952 
2953 	if (!verror && offset + resp->data_len == va.va_size)
2954 		resp->eof = TRUE;
2955 	else
2956 		resp->eof = FALSE;
2957 
2958 out:
2959 	if (in_crit)
2960 		nbl_end_crit(vp);
2961 }
2962 
2963 static void
2964 rfs4_op_read_free(nfs_resop4 *resop)
2965 {
2966 	READ4res *resp = &resop->nfs_resop4_u.opread;
2967 
2968 	if (resp->status == NFS4_OK && resp->mblk != NULL) {
2969 		freeb(resp->mblk);
2970 		resp->mblk = NULL;
2971 		resp->data_val = NULL;
2972 		resp->data_len = 0;
2973 	}
2974 }
2975 
2976 static void
2977 rfs4_op_readdir_free(nfs_resop4 *resop)
2978 {
2979 	READDIR4res *resp = &resop->nfs_resop4_u.opreaddir;
2980 
2981 	if (resp->status == NFS4_OK && resp->mblk != NULL) {
2982 		freeb(resp->mblk);
2983 		resp->mblk = NULL;
2984 		resp->data_len = 0;
2985 	}
2986 }
2987 
2988 
2989 /* ARGSUSED */
2990 static void
2991 rfs4_op_putpubfh(nfs_argop4 *args, nfs_resop4 *resop, struct svc_req *req,
2992 	struct compound_state *cs)
2993 {
2994 	PUTPUBFH4res *resp = &resop->nfs_resop4_u.opputpubfh;
2995 	int error;
2996 	vnode_t *vp;
2997 	struct exportinfo *exi, *sav_exi;
2998 	nfs_fh4_fmt_t *fh_fmtp;
2999 
3000 	if (cs->vp) {
3001 		VN_RELE(cs->vp);
3002 		cs->vp = NULL;
3003 	}
3004 
3005 	if (cs->cr)
3006 		crfree(cs->cr);
3007 
3008 	cs->cr = crdup(cs->basecr);
3009 
3010 	vp = exi_public->exi_vp;
3011 	if (vp == NULL) {
3012 		*cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
3013 		return;
3014 	}
3015 
3016 	error = makefh4(&cs->fh, vp, exi_public);
3017 	if (error != 0) {
3018 		*cs->statusp = resp->status = puterrno4(error);
3019 		return;
3020 	}
3021 	sav_exi = cs->exi;
3022 	if (exi_public == exi_root) {
3023 		/*
3024 		 * No filesystem is actually shared public, so we default
3025 		 * to exi_root. In this case, we must check whether root
3026 		 * is exported.
3027 		 */
3028 		fh_fmtp = (nfs_fh4_fmt_t *)cs->fh.nfs_fh4_val;
3029 
3030 		/*
3031 		 * if root filesystem is exported, the exportinfo struct that we
3032 		 * should use is what checkexport4 returns, because root_exi is
3033 		 * actually a mostly empty struct.
3034 		 */
3035 		exi = checkexport4(&fh_fmtp->fh4_fsid,
3036 			(fid_t *)&fh_fmtp->fh4_xlen, NULL);
3037 		cs->exi = ((exi != NULL) ? exi : exi_public);
3038 	} else {
3039 		/*
3040 		 * it's a properly shared filesystem
3041 		 */
3042 		cs->exi = exi_public;
3043 	}
3044 
3045 	VN_HOLD(vp);
3046 	cs->vp = vp;
3047 
3048 	if ((resp->status = call_checkauth4(cs, req)) != NFS4_OK) {
3049 		VN_RELE(cs->vp);
3050 		cs->vp = NULL;
3051 		cs->exi = sav_exi;
3052 		return;
3053 	}
3054 
3055 	*cs->statusp = resp->status = NFS4_OK;
3056 }
3057 
3058 /*
3059  * XXX - issue with put*fh operations. Suppose /export/home is exported.
3060  * Suppose an NFS client goes to mount /export/home/joe. If /export, home,
3061  * or joe have restrictive search permissions, then we shouldn't let
3062  * the client get a file handle. This is easy to enforce. However, we
3063  * don't know what security flavor should be used until we resolve the
3064  * path name. Another complication is uid mapping. If root is
3065  * the user, then it will be mapped to the anonymous user by default,
3066  * but we won't know that till we've resolved the path name. And we won't
3067  * know what the anonymous user is.
3068  * Luckily, SECINFO is specified to take a full filename.
3069  * So what we will have to in rfs4_op_lookup is check that flavor of
3070  * the target object matches that of the request, and if root was the
3071  * caller, check for the root= and anon= options, and if necessary,
3072  * repeat the lookup using the right cred_t. But that's not done yet.
3073  */
3074 /* ARGSUSED */
3075 static void
3076 rfs4_op_putfh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3077 	struct compound_state *cs)
3078 {
3079 	PUTFH4args *args = &argop->nfs_argop4_u.opputfh;
3080 	PUTFH4res *resp = &resop->nfs_resop4_u.opputfh;
3081 	nfs_fh4_fmt_t *fh_fmtp;
3082 
3083 	if (cs->vp) {
3084 		VN_RELE(cs->vp);
3085 		cs->vp = NULL;
3086 	}
3087 
3088 	if (cs->cr) {
3089 		crfree(cs->cr);
3090 		cs->cr = NULL;
3091 	}
3092 
3093 
3094 	if (args->object.nfs_fh4_len < NFS_FH4_LEN) {
3095 		*cs->statusp = resp->status = NFS4ERR_BADHANDLE;
3096 		return;
3097 	}
3098 
3099 	fh_fmtp = (nfs_fh4_fmt_t *)args->object.nfs_fh4_val;
3100 	cs->exi = checkexport4(&fh_fmtp->fh4_fsid, (fid_t *)&fh_fmtp->fh4_xlen,
3101 				NULL);
3102 
3103 	if (cs->exi == NULL) {
3104 		*cs->statusp = resp->status = NFS4ERR_STALE;
3105 		return;
3106 	}
3107 
3108 	cs->cr = crdup(cs->basecr);
3109 
3110 	ASSERT(cs->cr != NULL);
3111 
3112 	if (! (cs->vp = nfs4_fhtovp(&args->object, cs->exi, &resp->status))) {
3113 		*cs->statusp = resp->status;
3114 		return;
3115 	}
3116 
3117 	if ((resp->status = call_checkauth4(cs, req)) != NFS4_OK) {
3118 		VN_RELE(cs->vp);
3119 		cs->vp = NULL;
3120 		return;
3121 	}
3122 
3123 	nfs_fh4_copy(&args->object, &cs->fh);
3124 	*cs->statusp = resp->status = NFS4_OK;
3125 	cs->deleg = FALSE;
3126 }
3127 
3128 /* ARGSUSED */
3129 static void
3130 rfs4_op_putrootfh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3131 	struct compound_state *cs)
3132 
3133 {
3134 	PUTROOTFH4res *resp = &resop->nfs_resop4_u.opputrootfh;
3135 	int error;
3136 	fid_t fid;
3137 	struct exportinfo *exi, *sav_exi;
3138 
3139 	if (cs->vp) {
3140 		VN_RELE(cs->vp);
3141 		cs->vp = NULL;
3142 	}
3143 
3144 	if (cs->cr)
3145 		crfree(cs->cr);
3146 
3147 	cs->cr = crdup(cs->basecr);
3148 
3149 	/*
3150 	 * Using rootdir, the system root vnode,
3151 	 * get its fid.
3152 	 */
3153 	bzero(&fid, sizeof (fid));
3154 	fid.fid_len = MAXFIDSZ;
3155 	error = vop_fid_pseudo(rootdir, &fid);
3156 	if (error != 0) {
3157 		*cs->statusp = resp->status = puterrno4(error);
3158 		return;
3159 	}
3160 
3161 	/*
3162 	 * Then use the root fsid & fid it to find out if it's exported
3163 	 *
3164 	 * If the server root isn't exported directly, then
3165 	 * it should at least be a pseudo export based on
3166 	 * one or more exports further down in the server's
3167 	 * file tree.
3168 	 */
3169 	exi = checkexport4(&rootdir->v_vfsp->vfs_fsid, &fid, NULL);
3170 	if (exi == NULL || exi->exi_export.ex_flags & EX_PUBLIC) {
3171 		NFS4_DEBUG(rfs4_debug,
3172 			(CE_WARN, "rfs4_op_putrootfh: export check failure"));
3173 		*cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
3174 		return;
3175 	}
3176 
3177 	/*
3178 	 * Now make a filehandle based on the root
3179 	 * export and root vnode.
3180 	 */
3181 	error = makefh4(&cs->fh, rootdir, exi);
3182 	if (error != 0) {
3183 		*cs->statusp = resp->status = puterrno4(error);
3184 		return;
3185 	}
3186 
3187 	sav_exi = cs->exi;
3188 	cs->exi = exi;
3189 
3190 	VN_HOLD(rootdir);
3191 	cs->vp = rootdir;
3192 
3193 	if ((resp->status = call_checkauth4(cs, req)) != NFS4_OK) {
3194 		VN_RELE(rootdir);
3195 		cs->vp = NULL;
3196 		cs->exi = sav_exi;
3197 		return;
3198 	}
3199 
3200 	*cs->statusp = resp->status = NFS4_OK;
3201 	cs->deleg = FALSE;
3202 }
3203 
3204 /*
3205  * A directory entry is a valid nfsv4 entry if
3206  * - it has a non-zero ino
3207  * - it is not a dot or dotdot name
3208  * - it is visible in a pseudo export or in a real export that can
3209  *   only have a limited view.
3210  */
3211 static bool_t
3212 valid_nfs4_entry(struct exportinfo *exi, struct dirent64 *dp,
3213 		int *expseudo, int check_visible)
3214 {
3215 	if (dp->d_ino == 0 || NFS_IS_DOTNAME(dp->d_name)) {
3216 		*expseudo = 0;
3217 		return (FALSE);
3218 	}
3219 
3220 	if (! check_visible) {
3221 		*expseudo = 0;
3222 		return (TRUE);
3223 	}
3224 
3225 	return (nfs_visible_inode(exi, dp->d_ino, expseudo));
3226 }
3227 
3228 /*
3229  * set_rdattr_params sets up the variables used to manage what information
3230  * to get for each directory entry.
3231  */
3232 static nfsstat4
3233 set_rdattr_params(struct nfs4_svgetit_arg *sargp,
3234 		bitmap4 attrs, bool_t *need_to_lookup)
3235 {
3236 	uint_t	va_mask;
3237 	nfsstat4 status;
3238 	bitmap4 objbits;
3239 
3240 	status = bitmap4_to_attrmask(attrs, sargp);
3241 	if (status != NFS4_OK) {
3242 		/*
3243 		 * could not even figure attr mask
3244 		 */
3245 		return (status);
3246 	}
3247 	va_mask = sargp->vap->va_mask;
3248 
3249 	/*
3250 	 * dirent's d_ino is always correct value for mounted_on_fileid.
3251 	 * mntdfid_set is set once here, but mounted_on_fileid is
3252 	 * set in main dirent processing loop for each dirent.
3253 	 * The mntdfid_set is a simple optimization that lets the
3254 	 * server attr code avoid work when caller is readdir.
3255 	 */
3256 	sargp->mntdfid_set = TRUE;
3257 
3258 	/*
3259 	 * Lookup entry only if client asked for any of the following:
3260 	 * a) vattr attrs
3261 	 * b) vfs attrs
3262 	 * c) attrs w/per-object scope requested (change, filehandle, etc)
3263 	 *    other than mounted_on_fileid (which we can take from dirent)
3264 	 */
3265 	objbits = attrs ? attrs & NFS4_VP_ATTR_MASK : 0;
3266 
3267 	if (va_mask || sargp->sbp || (objbits & ~FATTR4_MOUNTED_ON_FILEID_MASK))
3268 		*need_to_lookup = TRUE;
3269 	else
3270 		*need_to_lookup = FALSE;
3271 
3272 	if (sargp->sbp == NULL)
3273 		return (NFS4_OK);
3274 
3275 	/*
3276 	 * If filesystem attrs are requested, get them now from the
3277 	 * directory vp, as most entries will have same filesystem. The only
3278 	 * exception are mounted over entries but we handle
3279 	 * those as we go (XXX mounted over detection not yet implemented).
3280 	 */
3281 	sargp->vap->va_mask = 0;	/* to avoid VOP_GETATTR */
3282 	status = bitmap4_get_sysattrs(sargp);
3283 	sargp->vap->va_mask = va_mask;
3284 
3285 	if ((status != NFS4_OK) && sargp->rdattr_error_req) {
3286 		/*
3287 		 * Failed to get filesystem attributes.
3288 		 * Return a rdattr_error for each entry, but don't fail.
3289 		 * However, don't get any obj-dependent attrs.
3290 		 */
3291 		sargp->rdattr_error = status;	/* for rdattr_error */
3292 		*need_to_lookup = FALSE;
3293 		/*
3294 		 * At least get fileid for regular readdir output
3295 		 */
3296 		sargp->vap->va_mask &= AT_NODEID;
3297 		status = NFS4_OK;
3298 	}
3299 
3300 	return (status);
3301 }
3302 
3303 /*
3304  * readlink: args: CURRENT_FH.
3305  *	res: status. If success - CURRENT_FH unchanged, return linktext.
3306  */
3307 
3308 /* ARGSUSED */
3309 static void
3310 rfs4_op_readlink(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3311 	struct compound_state *cs)
3312 {
3313 	READLINK4res *resp = &resop->nfs_resop4_u.opreadlink;
3314 	int error;
3315 	vnode_t *vp;
3316 	struct iovec iov;
3317 	struct vattr va;
3318 	struct uio uio;
3319 	char *data;
3320 
3321 	/* CURRENT_FH: directory */
3322 	vp = cs->vp;
3323 	if (vp == NULL) {
3324 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
3325 		return;
3326 	}
3327 
3328 	if (cs->access == CS_ACCESS_DENIED) {
3329 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
3330 		return;
3331 	}
3332 
3333 	if (vp->v_type == VDIR) {
3334 		*cs->statusp = resp->status = NFS4ERR_ISDIR;
3335 		return;
3336 	}
3337 
3338 	if (vp->v_type != VLNK) {
3339 		*cs->statusp = resp->status = NFS4ERR_INVAL;
3340 		return;
3341 	}
3342 
3343 	va.va_mask = AT_MODE;
3344 	error = VOP_GETATTR(vp, &va, 0, cs->cr);
3345 	if (error) {
3346 		*cs->statusp = resp->status = puterrno4(error);
3347 		return;
3348 	}
3349 
3350 	if (MANDLOCK(vp, va.va_mode)) {
3351 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
3352 		return;
3353 	}
3354 
3355 	data = kmem_alloc(MAXPATHLEN + 1, KM_SLEEP);
3356 
3357 	iov.iov_base = data;
3358 	iov.iov_len = MAXPATHLEN;
3359 	uio.uio_iov = &iov;
3360 	uio.uio_iovcnt = 1;
3361 	uio.uio_segflg = UIO_SYSSPACE;
3362 	uio.uio_extflg = UIO_COPY_CACHED;
3363 	uio.uio_loffset = 0;
3364 	uio.uio_resid = MAXPATHLEN;
3365 
3366 	error = VOP_READLINK(vp, &uio, cs->cr);
3367 
3368 	if (error) {
3369 		kmem_free((caddr_t)data, (uint_t)MAXPATHLEN + 1);
3370 		*cs->statusp = resp->status = puterrno4(error);
3371 		return;
3372 	}
3373 
3374 	*(data + MAXPATHLEN - uio.uio_resid) = '\0';
3375 
3376 	/*
3377 	 * treat link name as data
3378 	 */
3379 	(void) str_to_utf8(data, &resp->link);
3380 
3381 	kmem_free((caddr_t)data, (uint_t)MAXPATHLEN + 1);
3382 	*cs->statusp = resp->status = NFS4_OK;
3383 }
3384 
3385 static void
3386 rfs4_op_readlink_free(nfs_resop4 *resop)
3387 {
3388 	READLINK4res *resp = &resop->nfs_resop4_u.opreadlink;
3389 	utf8string *symlink = &resp->link;
3390 
3391 	if (symlink->utf8string_val) {
3392 		UTF8STRING_FREE(*symlink)
3393 	}
3394 }
3395 
3396 /*
3397  * release_lockowner:
3398  *	Release any state associated with the supplied
3399  *	lockowner. Note if any lo_state is holding locks we will not
3400  *	rele that lo_state and thus the lockowner will not be destroyed.
3401  *	A client using lock after the lock owner stateid has been released
3402  *	will suffer the consequence of NFS4ERR_BAD_STATEID and would have
3403  *	to reissue the lock with new_lock_owner set to TRUE.
3404  *	args: lock_owner
3405  *	res:  status
3406  */
3407 /* ARGSUSED */
3408 static void
3409 rfs4_op_release_lockowner(nfs_argop4 *argop, nfs_resop4 *resop,
3410 	struct svc_req *req, struct compound_state *cs)
3411 {
3412 	RELEASE_LOCKOWNER4args *ap = &argop->nfs_argop4_u.oprelease_lockowner;
3413 	RELEASE_LOCKOWNER4res *resp = &resop->nfs_resop4_u.oprelease_lockowner;
3414 	rfs4_lockowner_t *lo;
3415 	rfs4_openowner_t *oop;
3416 	rfs4_state_t *sp;
3417 	rfs4_lo_state_t *lsp;
3418 	rfs4_client_t *cp;
3419 	bool_t create = FALSE;
3420 	locklist_t *llist;
3421 	sysid_t sysid;
3422 
3423 	/* Make sure there is a clientid around for this request */
3424 	cp = rfs4_findclient_by_id(ap->lock_owner.clientid, FALSE);
3425 
3426 	if (cp == NULL) {
3427 		*cs->statusp = resp->status =
3428 			rfs4_check_clientid(&ap->lock_owner.clientid, 0);
3429 		return;
3430 	}
3431 	rfs4_client_rele(cp);
3432 
3433 	lo = rfs4_findlockowner(&ap->lock_owner, &create);
3434 	if (lo == NULL) {
3435 		*cs->statusp = resp->status = NFS4_OK;
3436 		return;
3437 	}
3438 	ASSERT(lo->client != NULL);
3439 
3440 	/*
3441 	 * Check for EXPIRED client. If so will reap state with in a lease
3442 	 * period or on next set_clientid_confirm step
3443 	 */
3444 	if (rfs4_lease_expired(lo->client)) {
3445 		rfs4_lockowner_rele(lo);
3446 		*cs->statusp = resp->status = NFS4ERR_EXPIRED;
3447 		return;
3448 	}
3449 
3450 	/*
3451 	 * If no sysid has been assigned, then no locks exist; just return.
3452 	 */
3453 	rfs4_dbe_lock(lo->client->dbe);
3454 	if (lo->client->sysidt == LM_NOSYSID) {
3455 		rfs4_lockowner_rele(lo);
3456 		rfs4_dbe_unlock(lo->client->dbe);
3457 		return;
3458 	}
3459 
3460 	sysid = lo->client->sysidt;
3461 	rfs4_dbe_unlock(lo->client->dbe);
3462 
3463 	/*
3464 	 * Mark the lockowner invalid.
3465 	 */
3466 	rfs4_dbe_hide(lo->dbe);
3467 
3468 	/*
3469 	 * sysid-pid pair should now not be used since the lockowner is
3470 	 * invalid. If the client were to instantiate the lockowner again
3471 	 * it would be assigned a new pid. Thus we can get the list of
3472 	 * current locks.
3473 	 */
3474 
3475 	llist = flk_get_active_locks(sysid, lo->pid);
3476 	/* If we are still holding locks fail */
3477 	if (llist != NULL) {
3478 
3479 		*cs->statusp = resp->status = NFS4ERR_LOCKS_HELD;
3480 
3481 		flk_free_locklist(llist);
3482 		/*
3483 		 * We need to unhide the lockowner so the client can
3484 		 * try it again. The bad thing here is if the client
3485 		 * has a logic error that took it here in the first place
3486 		 * he probably has lost accounting of the locks that it
3487 		 * is holding. So we may have dangling state until the
3488 		 * open owner state is reaped via close. One scenario
3489 		 * that could possibly occur is that the client has
3490 		 * sent the unlock request(s) in separate threads
3491 		 * and has not waited for the replies before sending the
3492 		 * RELEASE_LOCKOWNER request. Presumably, it would expect
3493 		 * and deal appropriately with NFS4ERR_LOCKS_HELD, by
3494 		 * reissuing the request.
3495 		 */
3496 		rfs4_dbe_unhide(lo->dbe);
3497 		rfs4_lockowner_rele(lo);
3498 		return;
3499 	}
3500 
3501 	/*
3502 	 * For the corresponding client we need to check each open
3503 	 * owner for any opens that have lockowner state associated
3504 	 * with this lockowner.
3505 	 */
3506 
3507 	rfs4_dbe_lock(lo->client->dbe);
3508 	for (oop = lo->client->openownerlist.next->oop; oop != NULL;
3509 	    oop = oop->openownerlist.next->oop) {
3510 
3511 		rfs4_dbe_lock(oop->dbe);
3512 		for (sp = oop->ownerstateids.next->sp; sp != NULL;
3513 		    sp = sp->ownerstateids.next->sp) {
3514 
3515 			rfs4_dbe_lock(sp->dbe);
3516 			for (lsp = sp->lockownerlist.next->lsp;
3517 			    lsp != NULL; lsp = lsp->lockownerlist.next->lsp) {
3518 				if (lsp->locker == lo) {
3519 					rfs4_dbe_lock(lsp->dbe);
3520 					rfs4_dbe_invalidate(lsp->dbe);
3521 					rfs4_dbe_unlock(lsp->dbe);
3522 				}
3523 			}
3524 			rfs4_dbe_unlock(sp->dbe);
3525 		}
3526 		rfs4_dbe_unlock(oop->dbe);
3527 	}
3528 	rfs4_dbe_unlock(lo->client->dbe);
3529 
3530 	rfs4_lockowner_rele(lo);
3531 
3532 	*cs->statusp = resp->status = NFS4_OK;
3533 }
3534 
3535 /*
3536  * short utility function to lookup a file and recall the delegation
3537  */
3538 static rfs4_file_t *
3539 rfs4_lookup_and_findfile(vnode_t *dvp, char *nm, vnode_t **vpp,
3540 	int *lkup_error, cred_t *cr)
3541 {
3542 	vnode_t *vp;
3543 	rfs4_file_t *fp = NULL;
3544 	bool_t fcreate = FALSE;
3545 	int error;
3546 
3547 	if (vpp)
3548 		*vpp = NULL;
3549 
3550 	if ((error = VOP_LOOKUP(dvp, nm, &vp, NULL, 0, NULL, cr)) == 0) {
3551 		VN_SETPATH(rootdir, dvp, vp, nm, strlen(nm));
3552 		if (vp->v_type == VREG)
3553 			fp = rfs4_findfile(vp, NULL, &fcreate);
3554 		if (vpp)
3555 			*vpp = vp;
3556 		else
3557 			VN_RELE(vp);
3558 	}
3559 
3560 	if (lkup_error)
3561 		*lkup_error = error;
3562 
3563 	return (fp);
3564 }
3565 
3566 /*
3567  * remove: args: CURRENT_FH: directory; name.
3568  *	res: status. If success - CURRENT_FH unchanged, return change_info
3569  *		for directory.
3570  */
3571 /* ARGSUSED */
3572 static void
3573 rfs4_op_remove(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3574 	struct compound_state *cs)
3575 {
3576 	REMOVE4args *args = &argop->nfs_argop4_u.opremove;
3577 	REMOVE4res *resp = &resop->nfs_resop4_u.opremove;
3578 	int error;
3579 	vnode_t *dvp, *vp;
3580 	struct vattr bdva, idva, adva;
3581 	char *nm;
3582 	uint_t len;
3583 	rfs4_file_t *fp;
3584 	int in_crit = 0;
3585 
3586 	/* CURRENT_FH: directory */
3587 	dvp = cs->vp;
3588 	if (dvp == NULL) {
3589 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
3590 		return;
3591 	}
3592 
3593 	if (cs->access == CS_ACCESS_DENIED) {
3594 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
3595 		return;
3596 	}
3597 
3598 	/*
3599 	 * If there is an unshared filesystem mounted on this vnode,
3600 	 * Do not allow to remove anything in this directory.
3601 	 */
3602 	if (vn_ismntpt(dvp)) {
3603 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
3604 		return;
3605 	}
3606 
3607 	if (dvp->v_type != VDIR) {
3608 		*cs->statusp = resp->status = NFS4ERR_NOTDIR;
3609 		return;
3610 	}
3611 
3612 	if (!utf8_dir_verify(&args->target)) {
3613 		*cs->statusp = resp->status = NFS4ERR_INVAL;
3614 		return;
3615 	}
3616 
3617 	/*
3618 	 * Lookup the file so that we can check if it's a directory
3619 	 */
3620 	nm = utf8_to_fn(&args->target, &len, NULL);
3621 	if (nm == NULL) {
3622 		*cs->statusp = resp->status = NFS4ERR_INVAL;
3623 		return;
3624 	}
3625 
3626 	if (len > MAXNAMELEN) {
3627 		*cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
3628 		kmem_free(nm, len);
3629 		return;
3630 	}
3631 
3632 	if (rdonly4(cs->exi, cs->vp, req)) {
3633 		*cs->statusp = resp->status = NFS4ERR_ROFS;
3634 		kmem_free(nm, len);
3635 		return;
3636 	}
3637 
3638 	/*
3639 	 * Lookup the file to determine type and while we are see if
3640 	 * there is a file struct around and check for delegation.
3641 	 * We don't need to acquire va_seq before this lookup, if
3642 	 * it causes an update, cinfo.before will not match, which will
3643 	 * trigger a cache flush even if atomic is TRUE.
3644 	 */
3645 	if (fp = rfs4_lookup_and_findfile(dvp, nm, &vp, &error, cs->cr)) {
3646 		if (rfs4_check_delegated_byfp(FWRITE, fp, TRUE, TRUE, TRUE,
3647 						NULL)) {
3648 			VN_RELE(vp);
3649 			rfs4_file_rele(fp);
3650 			*cs->statusp = resp->status = NFS4ERR_DELAY;
3651 			kmem_free(nm, len);
3652 			return;
3653 		}
3654 	}
3655 
3656 	/* Didn't find anything to remove */
3657 	if (vp == NULL) {
3658 		*cs->statusp = resp->status = error;
3659 		kmem_free(nm, len);
3660 		return;
3661 	}
3662 
3663 	if (nbl_need_check(vp)) {
3664 		nbl_start_crit(vp, RW_READER);
3665 		in_crit = 1;
3666 		if (nbl_conflict(vp, NBL_REMOVE, 0, 0, 0)) {
3667 			*cs->statusp = resp->status = NFS4ERR_FILE_OPEN;
3668 			kmem_free(nm, len);
3669 			nbl_end_crit(vp);
3670 			VN_RELE(vp);
3671 			if (fp) {
3672 				rfs4_clear_dont_grant(fp);
3673 				rfs4_file_rele(fp);
3674 			}
3675 			return;
3676 		}
3677 	}
3678 
3679 	/* Get dir "before" change value */
3680 	bdva.va_mask = AT_CTIME|AT_SEQ;
3681 	error = VOP_GETATTR(dvp, &bdva, 0, cs->cr);
3682 	if (error) {
3683 		*cs->statusp = resp->status = puterrno4(error);
3684 		kmem_free(nm, len);
3685 		return;
3686 	}
3687 	NFS4_SET_FATTR4_CHANGE(resp->cinfo.before, bdva.va_ctime)
3688 
3689 	/* Actually do the REMOVE operation */
3690 	if (vp->v_type == VDIR) {
3691 		/*
3692 		 * Can't remove a directory that has a mounted-on filesystem.
3693 		 */
3694 		if (vn_ismntpt(vp)) {
3695 			error = EACCES;
3696 		} else {
3697 			/*
3698 			 * System V defines rmdir to return EEXIST,
3699 			 * not * ENOTEMPTY, if the directory is not
3700 			 * empty.  A System V NFS server needs to map
3701 			 * NFS4ERR_EXIST to NFS4ERR_NOTEMPTY to
3702 			 * transmit over the wire.
3703 			 */
3704 			if ((error = VOP_RMDIR(dvp, nm, rootdir, cs->cr))
3705 				== EEXIST)
3706 				error = ENOTEMPTY;
3707 		}
3708 	} else {
3709 		if ((error = VOP_REMOVE(dvp, nm, cs->cr)) == 0 &&
3710 			fp != NULL) {
3711 			struct vattr va;
3712 			vnode_t *tvp;
3713 
3714 			rfs4_dbe_lock(fp->dbe);
3715 			tvp = fp->vp;
3716 			if (tvp)
3717 				VN_HOLD(tvp);
3718 			rfs4_dbe_unlock(fp->dbe);
3719 
3720 			if (tvp) {
3721 				/*
3722 				 * This is va_seq safe because we are not
3723 				 * manipulating dvp.
3724 				 */
3725 				va.va_mask = AT_NLINK;
3726 				if (!VOP_GETATTR(tvp, &va, 0, cs->cr) &&
3727 					va.va_nlink == 0) {
3728 					/* Remove state on file remove */
3729 					if (in_crit) {
3730 						nbl_end_crit(vp);
3731 						in_crit = 0;
3732 					}
3733 					rfs4_close_all_state(fp);
3734 				}
3735 				VN_RELE(tvp);
3736 			}
3737 		}
3738 	}
3739 
3740 	if (in_crit)
3741 		nbl_end_crit(vp);
3742 	VN_RELE(vp);
3743 
3744 	if (fp) {
3745 		rfs4_clear_dont_grant(fp);
3746 		rfs4_file_rele(fp);
3747 	}
3748 	kmem_free(nm, len);
3749 
3750 	if (error) {
3751 		*cs->statusp = resp->status = puterrno4(error);
3752 		return;
3753 	}
3754 
3755 	/*
3756 	 * Get the initial "after" sequence number, if it fails, set to zero
3757 	 */
3758 	idva.va_mask = AT_SEQ;
3759 	if (VOP_GETATTR(dvp, &idva, 0, cs->cr))
3760 		idva.va_seq = 0;
3761 
3762 	/*
3763 	 * Force modified data and metadata out to stable storage.
3764 	 */
3765 	(void) VOP_FSYNC(dvp, 0, cs->cr);
3766 
3767 	/*
3768 	 * Get "after" change value, if it fails, simply return the
3769 	 * before value.
3770 	 */
3771 	adva.va_mask = AT_CTIME|AT_SEQ;
3772 	if (VOP_GETATTR(dvp, &adva, 0, cs->cr)) {
3773 		adva.va_ctime = bdva.va_ctime;
3774 		adva.va_seq = 0;
3775 	}
3776 
3777 	NFS4_SET_FATTR4_CHANGE(resp->cinfo.after, adva.va_ctime)
3778 
3779 	/*
3780 	 * The cinfo.atomic = TRUE only if we have
3781 	 * non-zero va_seq's, and it has incremented by exactly one
3782 	 * during the VOP_REMOVE/RMDIR and it didn't change during
3783 	 * the VOP_FSYNC.
3784 	 */
3785 	if (bdva.va_seq && idva.va_seq && adva.va_seq &&
3786 			idva.va_seq == (bdva.va_seq + 1) &&
3787 			idva.va_seq == adva.va_seq)
3788 		resp->cinfo.atomic = TRUE;
3789 	else
3790 		resp->cinfo.atomic = FALSE;
3791 
3792 	*cs->statusp = resp->status = NFS4_OK;
3793 }
3794 
3795 /*
3796  * rename: args: SAVED_FH: from directory, CURRENT_FH: target directory,
3797  *		oldname and newname.
3798  *	res: status. If success - CURRENT_FH unchanged, return change_info
3799  *		for both from and target directories.
3800  */
3801 /* ARGSUSED */
3802 static void
3803 rfs4_op_rename(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3804 	struct compound_state *cs)
3805 {
3806 	RENAME4args *args = &argop->nfs_argop4_u.oprename;
3807 	RENAME4res *resp = &resop->nfs_resop4_u.oprename;
3808 	int error;
3809 	vnode_t *odvp;
3810 	vnode_t *ndvp;
3811 	vnode_t *srcvp, *targvp;
3812 	struct vattr obdva, oidva, oadva;
3813 	struct vattr nbdva, nidva, nadva;
3814 	char *onm, *nnm;
3815 	uint_t olen, nlen;
3816 	rfs4_file_t *fp, *sfp;
3817 	int in_crit_src, in_crit_targ;
3818 	int fp_rele_grant_hold, sfp_rele_grant_hold;
3819 
3820 	fp = sfp = NULL;
3821 	srcvp = targvp = NULL;
3822 	in_crit_src = in_crit_targ = 0;
3823 	fp_rele_grant_hold = sfp_rele_grant_hold = 0;
3824 
3825 	/* CURRENT_FH: target directory */
3826 	ndvp = cs->vp;
3827 	if (ndvp == NULL) {
3828 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
3829 		return;
3830 	}
3831 
3832 	/* SAVED_FH: from directory */
3833 	odvp = cs->saved_vp;
3834 	if (odvp == NULL) {
3835 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
3836 		return;
3837 	}
3838 
3839 	if (cs->access == CS_ACCESS_DENIED) {
3840 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
3841 		return;
3842 	}
3843 
3844 	/*
3845 	 * If there is an unshared filesystem mounted on this vnode,
3846 	 * do not allow to rename objects in this directory.
3847 	 */
3848 	if (vn_ismntpt(odvp)) {
3849 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
3850 		return;
3851 	}
3852 
3853 	/*
3854 	 * If there is an unshared filesystem mounted on this vnode,
3855 	 * do not allow to rename to this directory.
3856 	 */
3857 	if (vn_ismntpt(ndvp)) {
3858 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
3859 		return;
3860 	}
3861 
3862 	if (odvp->v_type != VDIR || ndvp->v_type != VDIR) {
3863 		*cs->statusp = resp->status = NFS4ERR_NOTDIR;
3864 		return;
3865 	}
3866 
3867 	if (cs->saved_exi != cs->exi) {
3868 		*cs->statusp = resp->status = NFS4ERR_XDEV;
3869 		return;
3870 	}
3871 
3872 	if (!utf8_dir_verify(&args->oldname)) {
3873 		*cs->statusp = resp->status = NFS4ERR_INVAL;
3874 		return;
3875 	}
3876 
3877 	if (!utf8_dir_verify(&args->newname)) {
3878 		*cs->statusp = resp->status = NFS4ERR_INVAL;
3879 		return;
3880 	}
3881 
3882 	onm = utf8_to_fn(&args->oldname, &olen, NULL);
3883 	if (onm == NULL) {
3884 		*cs->statusp = resp->status = NFS4ERR_INVAL;
3885 		return;
3886 	}
3887 
3888 	nnm = utf8_to_fn(&args->newname, &nlen, NULL);
3889 	if (nnm == NULL) {
3890 		*cs->statusp = resp->status = NFS4ERR_INVAL;
3891 		kmem_free(onm, olen);
3892 		return;
3893 	}
3894 
3895 	if (olen > MAXNAMELEN || nlen > MAXNAMELEN) {
3896 		*cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
3897 		kmem_free(onm, olen);
3898 		kmem_free(nnm, nlen);
3899 		return;
3900 	}
3901 
3902 
3903 	if (rdonly4(cs->exi, cs->vp, req)) {
3904 		*cs->statusp = resp->status = NFS4ERR_ROFS;
3905 		kmem_free(onm, olen);
3906 		kmem_free(nnm, nlen);
3907 		return;
3908 	}
3909 
3910 	/*
3911 	 * Is the source a file and have a delegation?
3912 	 * We don't need to acquire va_seq before these lookups, if
3913 	 * it causes an update, cinfo.before will not match, which will
3914 	 * trigger a cache flush even if atomic is TRUE.
3915 	 */
3916 	if (sfp = rfs4_lookup_and_findfile(odvp, onm, &srcvp, &error, cs->cr)) {
3917 		if (rfs4_check_delegated_byfp(FWRITE, sfp, TRUE, TRUE, TRUE,
3918 						NULL)) {
3919 			*cs->statusp = resp->status = NFS4ERR_DELAY;
3920 			goto err_out;
3921 		}
3922 	}
3923 
3924 	if (srcvp == NULL) {
3925 		*cs->statusp = resp->status = puterrno4(error);
3926 		kmem_free(onm, olen);
3927 		kmem_free(nnm, nlen);
3928 		return;
3929 	}
3930 
3931 	sfp_rele_grant_hold = 1;
3932 
3933 	/* Does the destination exist and a file and have a delegation? */
3934 	if (fp = rfs4_lookup_and_findfile(ndvp, nnm, &targvp, NULL, cs->cr)) {
3935 		if (rfs4_check_delegated_byfp(FWRITE, fp, TRUE, TRUE, TRUE,
3936 						NULL)) {
3937 			*cs->statusp = resp->status = NFS4ERR_DELAY;
3938 			goto err_out;
3939 		}
3940 	}
3941 	fp_rele_grant_hold = 1;
3942 
3943 
3944 	/* Check for NBMAND lock on both source and target */
3945 	if (nbl_need_check(srcvp)) {
3946 		nbl_start_crit(srcvp, RW_READER);
3947 		in_crit_src = 1;
3948 		if (nbl_conflict(srcvp, NBL_RENAME, 0, 0, 0)) {
3949 			*cs->statusp = resp->status = NFS4ERR_FILE_OPEN;
3950 			goto err_out;
3951 		}
3952 	}
3953 
3954 	if (targvp && nbl_need_check(targvp)) {
3955 		nbl_start_crit(targvp, RW_READER);
3956 		in_crit_targ = 1;
3957 		if (nbl_conflict(targvp, NBL_REMOVE, 0, 0, 0)) {
3958 			*cs->statusp = resp->status = NFS4ERR_FILE_OPEN;
3959 			goto err_out;
3960 		}
3961 	}
3962 
3963 	/* Get source "before" change value */
3964 	obdva.va_mask = AT_CTIME|AT_SEQ;
3965 	error = VOP_GETATTR(odvp, &obdva, 0, cs->cr);
3966 	if (!error) {
3967 		nbdva.va_mask = AT_CTIME|AT_SEQ;
3968 		error = VOP_GETATTR(ndvp, &nbdva, 0, cs->cr);
3969 	}
3970 	if (error) {
3971 		*cs->statusp = resp->status = puterrno4(error);
3972 		goto err_out;
3973 	}
3974 
3975 	NFS4_SET_FATTR4_CHANGE(resp->source_cinfo.before, obdva.va_ctime)
3976 	NFS4_SET_FATTR4_CHANGE(resp->target_cinfo.before, nbdva.va_ctime)
3977 
3978 	if ((error = VOP_RENAME(odvp, onm, ndvp, nnm, cs->cr)) == 0 &&
3979 		fp != NULL) {
3980 		struct vattr va;
3981 		vnode_t *tvp;
3982 
3983 		rfs4_dbe_lock(fp->dbe);
3984 		tvp = fp->vp;
3985 		if (tvp)
3986 			VN_HOLD(tvp);
3987 		rfs4_dbe_unlock(fp->dbe);
3988 
3989 		if (tvp) {
3990 			va.va_mask = AT_NLINK;
3991 			if (!VOP_GETATTR(tvp, &va, 0, cs->cr) &&
3992 				va.va_nlink == 0) {
3993 				/* The file is gone and so should the state */
3994 				if (in_crit_targ) {
3995 					nbl_end_crit(targvp);
3996 					in_crit_targ = 0;
3997 				}
3998 				rfs4_close_all_state(fp);
3999 			}
4000 			VN_RELE(tvp);
4001 		}
4002 	}
4003 
4004 	if (in_crit_src)
4005 		nbl_end_crit(srcvp);
4006 	if (srcvp)
4007 		VN_RELE(srcvp);
4008 	if (in_crit_targ)
4009 		nbl_end_crit(targvp);
4010 	if (targvp)
4011 		VN_RELE(targvp);
4012 
4013 	if (sfp) {
4014 		rfs4_clear_dont_grant(sfp);
4015 		rfs4_file_rele(sfp);
4016 	}
4017 	if (fp) {
4018 		rfs4_clear_dont_grant(fp);
4019 		rfs4_file_rele(fp);
4020 	}
4021 
4022 	kmem_free(onm, olen);
4023 	kmem_free(nnm, nlen);
4024 
4025 	/*
4026 	 * Get the initial "after" sequence number, if it fails, set to zero
4027 	 */
4028 	oidva.va_mask = AT_SEQ;
4029 	if (VOP_GETATTR(odvp, &oidva, 0, cs->cr))
4030 		oidva.va_seq = 0;
4031 
4032 	nidva.va_mask = AT_SEQ;
4033 	if (VOP_GETATTR(ndvp, &nidva, 0, cs->cr))
4034 		nidva.va_seq = 0;
4035 
4036 	/*
4037 	 * Force modified data and metadata out to stable storage.
4038 	 */
4039 	(void) VOP_FSYNC(odvp, 0, cs->cr);
4040 	(void) VOP_FSYNC(ndvp, 0, cs->cr);
4041 
4042 	if (error) {
4043 		*cs->statusp = resp->status = puterrno4(error);
4044 		return;
4045 	}
4046 
4047 	/*
4048 	 * Get "after" change values, if it fails, simply return the
4049 	 * before value.
4050 	 */
4051 	oadva.va_mask = AT_CTIME|AT_SEQ;
4052 	if (VOP_GETATTR(odvp, &oadva, 0, cs->cr)) {
4053 		oadva.va_ctime = obdva.va_ctime;
4054 		oadva.va_seq = 0;
4055 	}
4056 
4057 	nadva.va_mask = AT_CTIME|AT_SEQ;
4058 	if (VOP_GETATTR(odvp, &nadva, 0, cs->cr)) {
4059 		nadva.va_ctime = nbdva.va_ctime;
4060 		nadva.va_seq = 0;
4061 	}
4062 
4063 	NFS4_SET_FATTR4_CHANGE(resp->source_cinfo.after, oadva.va_ctime)
4064 	NFS4_SET_FATTR4_CHANGE(resp->target_cinfo.after, nadva.va_ctime)
4065 
4066 	/*
4067 	 * The cinfo.atomic = TRUE only if we have
4068 	 * non-zero va_seq's, and it has incremented by exactly one
4069 	 * during the VOP_RENAME and it didn't change during the VOP_FSYNC.
4070 	 */
4071 	if (obdva.va_seq && oidva.va_seq && oadva.va_seq &&
4072 			oidva.va_seq == (obdva.va_seq + 1) &&
4073 			oidva.va_seq == oadva.va_seq)
4074 		resp->source_cinfo.atomic = TRUE;
4075 	else
4076 		resp->source_cinfo.atomic = FALSE;
4077 
4078 	if (nbdva.va_seq && nidva.va_seq && nadva.va_seq &&
4079 			nidva.va_seq == (nbdva.va_seq + 1) &&
4080 			nidva.va_seq == nadva.va_seq)
4081 		resp->target_cinfo.atomic = TRUE;
4082 	else
4083 		resp->target_cinfo.atomic = FALSE;
4084 
4085 #ifdef	VOLATILE_FH_TEST
4086 	{
4087 	extern void add_volrnm_fh(struct exportinfo *, vnode_t *);
4088 
4089 	/*
4090 	 * Add the renamed file handle to the volatile rename list
4091 	 */
4092 	if (cs->exi->exi_export.ex_flags & EX_VOLRNM) {
4093 		/* file handles may expire on rename */
4094 		vnode_t *vp;
4095 
4096 		nnm = utf8_to_fn(&args->newname, &nlen, NULL);
4097 		/*
4098 		 * Already know that nnm will be a valid string
4099 		 */
4100 		error = VOP_LOOKUP(ndvp, nnm, &vp, NULL, 0, NULL, cs->cr);
4101 		kmem_free(nnm, nlen);
4102 		if (!error) {
4103 			add_volrnm_fh(cs->exi, vp);
4104 			VN_RELE(vp);
4105 		}
4106 	}
4107 	}
4108 #endif	/* VOLATILE_FH_TEST */
4109 
4110 	*cs->statusp = resp->status = NFS4_OK;
4111 	return;
4112 
4113 err_out:
4114 	kmem_free(onm, olen);
4115 	kmem_free(nnm, nlen);
4116 
4117 	if (in_crit_src) nbl_end_crit(srcvp);
4118 	if (in_crit_targ) nbl_end_crit(targvp);
4119 	if (targvp) VN_RELE(targvp);
4120 	if (srcvp) VN_RELE(srcvp);
4121 	if (sfp) {
4122 		if (sfp_rele_grant_hold) rfs4_clear_dont_grant(sfp);
4123 		rfs4_file_rele(sfp);
4124 	}
4125 	if (fp) {
4126 		if (fp_rele_grant_hold) rfs4_clear_dont_grant(fp);
4127 		rfs4_file_rele(fp);
4128 	}
4129 }
4130 
4131 /* ARGSUSED */
4132 static void
4133 rfs4_op_renew(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
4134 	struct compound_state *cs)
4135 {
4136 	RENEW4args *args = &argop->nfs_argop4_u.oprenew;
4137 	RENEW4res *resp = &resop->nfs_resop4_u.oprenew;
4138 	rfs4_client_t *cp;
4139 
4140 	if ((cp = rfs4_findclient_by_id(args->clientid, FALSE)) == NULL) {
4141 		*cs->statusp = resp->status =
4142 			rfs4_check_clientid(&args->clientid, 0);
4143 		return;
4144 	}
4145 
4146 	if (rfs4_lease_expired(cp)) {
4147 		rfs4_client_rele(cp);
4148 		*cs->statusp = resp->status = NFS4ERR_EXPIRED;
4149 		return;
4150 	}
4151 
4152 	rfs4_update_lease(cp);
4153 
4154 	mutex_enter(cp->cbinfo.cb_lock);
4155 	if (cp->cbinfo.cb_notified_of_cb_path_down == FALSE) {
4156 		cp->cbinfo.cb_notified_of_cb_path_down = TRUE;
4157 		*cs->statusp = resp->status = NFS4ERR_CB_PATH_DOWN;
4158 	} else {
4159 		*cs->statusp = resp->status = NFS4_OK;
4160 	}
4161 	mutex_exit(cp->cbinfo.cb_lock);
4162 
4163 	rfs4_client_rele(cp);
4164 
4165 }
4166 
4167 /* ARGSUSED */
4168 static void
4169 rfs4_op_restorefh(nfs_argop4 *args, nfs_resop4 *resop, struct svc_req *req,
4170 	struct compound_state *cs)
4171 {
4172 	RESTOREFH4res *resp = &resop->nfs_resop4_u.oprestorefh;
4173 
4174 	/* No need to check cs->access - we are not accessing any object */
4175 	if ((cs->saved_vp == NULL) || (cs->saved_fh.nfs_fh4_val == NULL)) {
4176 		*cs->statusp = resp->status = NFS4ERR_RESTOREFH;
4177 		return;
4178 	}
4179 	if (cs->vp != NULL) {
4180 		VN_RELE(cs->vp);
4181 	}
4182 	cs->vp = cs->saved_vp;
4183 	cs->saved_vp = NULL;
4184 	cs->exi = cs->saved_exi;
4185 	nfs_fh4_copy(&cs->saved_fh, &cs->fh);
4186 	*cs->statusp = resp->status = NFS4_OK;
4187 	cs->deleg = FALSE;
4188 }
4189 
4190 /* ARGSUSED */
4191 static void
4192 rfs4_op_savefh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
4193 	struct compound_state *cs)
4194 {
4195 	SAVEFH4res *resp = &resop->nfs_resop4_u.opsavefh;
4196 
4197 	/* No need to check cs->access - we are not accessing any object */
4198 	if (cs->vp == NULL) {
4199 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
4200 		return;
4201 	}
4202 	if (cs->saved_vp != NULL) {
4203 		VN_RELE(cs->saved_vp);
4204 	}
4205 	cs->saved_vp = cs->vp;
4206 	VN_HOLD(cs->saved_vp);
4207 	cs->saved_exi = cs->exi;
4208 	/*
4209 	 * since SAVEFH is fairly rare, don't alloc space for its fh
4210 	 * unless necessary.
4211 	 */
4212 	if (cs->saved_fh.nfs_fh4_val == NULL) {
4213 		cs->saved_fh.nfs_fh4_val = kmem_alloc(NFS4_FHSIZE, KM_SLEEP);
4214 	}
4215 	nfs_fh4_copy(&cs->fh, &cs->saved_fh);
4216 	*cs->statusp = resp->status = NFS4_OK;
4217 }
4218 
4219 /*
4220  * rfs4_verify_attr is called when nfsv4 Setattr failed, but we wish to
4221  * return the bitmap of attrs that were set successfully. It is also
4222  * called by Verify/Nverify to test the vattr/vfsstat attrs. It should
4223  * always be called only after rfs4_do_set_attrs().
4224  *
4225  * Verify that the attributes are same as the expected ones. sargp->vap
4226  * and sargp->sbp contain the input attributes as translated from fattr4.
4227  *
4228  * This function verifies only the attrs that correspond to a vattr or
4229  * vfsstat struct. That is because of the extra step needed to get the
4230  * corresponding system structs. Other attributes have already been set or
4231  * verified by do_rfs4_set_attrs.
4232  *
4233  * Return 0 if all attrs match, -1 if some don't, error if error processing.
4234  */
4235 static int
4236 rfs4_verify_attr(struct nfs4_svgetit_arg *sargp,
4237 	bitmap4 *resp, struct nfs4_ntov_table *ntovp)
4238 {
4239 	int error, ret_error = 0;
4240 	int i, k;
4241 	uint_t sva_mask = sargp->vap->va_mask;
4242 	uint_t vbit;
4243 	union nfs4_attr_u *na;
4244 	uint8_t *amap;
4245 	bool_t getsb = ntovp->vfsstat;
4246 
4247 	if (sva_mask != 0) {
4248 		/*
4249 		 * Okay to overwrite sargp->vap because we verify based
4250 		 * on the incoming values.
4251 		 */
4252 		ret_error = VOP_GETATTR(sargp->cs->vp, sargp->vap, 0,
4253 				sargp->cs->cr);
4254 		if (ret_error) {
4255 			if (resp == NULL)
4256 				return (ret_error);
4257 			/*
4258 			 * Must return bitmap of successful attrs
4259 			 */
4260 			sva_mask = 0;	/* to prevent checking vap later */
4261 		} else {
4262 			/*
4263 			 * Some file systems clobber va_mask. it is probably
4264 			 * wrong of them to do so, nonethless we practice
4265 			 * defensive coding.
4266 			 * See bug id 4276830.
4267 			 */
4268 			sargp->vap->va_mask = sva_mask;
4269 		}
4270 	}
4271 
4272 	if (getsb) {
4273 		/*
4274 		 * Now get the superblock and loop on the bitmap, as there is
4275 		 * no simple way of translating from superblock to bitmap4.
4276 		 */
4277 		ret_error = VFS_STATVFS(sargp->cs->vp->v_vfsp, sargp->sbp);
4278 		if (ret_error) {
4279 			if (resp == NULL)
4280 				goto errout;
4281 			getsb = FALSE;
4282 		}
4283 	}
4284 
4285 	/*
4286 	 * Now loop and verify each attribute which getattr returned
4287 	 * whether it's the same as the input.
4288 	 */
4289 	if (resp == NULL && !getsb && (sva_mask == 0))
4290 		goto errout;
4291 
4292 	na = ntovp->na;
4293 	amap = ntovp->amap;
4294 	k = 0;
4295 	for (i = 0; i < ntovp->attrcnt; i++, na++, amap++) {
4296 		k = *amap;
4297 		ASSERT(nfs4_ntov_map[k].nval == k);
4298 		vbit = nfs4_ntov_map[k].vbit;
4299 
4300 		/*
4301 		 * If vattr attribute but VOP_GETATTR failed, or it's
4302 		 * superblock attribute but VFS_STATVFS failed, skip
4303 		 */
4304 		if (vbit) {
4305 			if ((vbit & sva_mask) == 0)
4306 				continue;
4307 		} else if (!(getsb && nfs4_ntov_map[k].vfsstat)) {
4308 			continue;
4309 		}
4310 		error = (*nfs4_ntov_map[k].sv_getit)(
4311 				NFS4ATTR_VERIT, sargp, na);
4312 		if (resp != NULL) {
4313 			if (error)
4314 				ret_error = -1;	/* not all match */
4315 			else	/* update response bitmap */
4316 				*resp |= nfs4_ntov_map[k].fbit;
4317 			continue;
4318 		}
4319 		if (error) {
4320 			ret_error = -1;	/* not all match */
4321 			break;
4322 		}
4323 	}
4324 errout:
4325 	return (ret_error);
4326 }
4327 
4328 /*
4329  * Decode the attribute to be set/verified. If the attr requires a sys op
4330  * (VOP_GETATTR, VFS_VFSSTAT), and the request is to verify, then don't
4331  * call the sv_getit function for it, because the sys op hasn't yet been done.
4332  * Return 0 for success, error code if failed.
4333  *
4334  * Note: the decoded arg is not freed here but in nfs4_ntov_table_free.
4335  */
4336 static int
4337 decode_fattr4_attr(nfs4_attr_cmd_t cmd, struct nfs4_svgetit_arg *sargp,
4338 	int k, XDR *xdrp, bitmap4 *resp_bval, union nfs4_attr_u *nap)
4339 {
4340 	int error = 0;
4341 	bool_t set_later;
4342 
4343 	sargp->vap->va_mask |= nfs4_ntov_map[k].vbit;
4344 
4345 	if ((*nfs4_ntov_map[k].xfunc)(xdrp, nap)) {
4346 		set_later = nfs4_ntov_map[k].vbit || nfs4_ntov_map[k].vfsstat;
4347 		/*
4348 		 * don't verify yet if a vattr or sb dependent attr,
4349 		 * because we don't have their sys values yet.
4350 		 * Will be done later.
4351 		 */
4352 		if (! (set_later && (cmd == NFS4ATTR_VERIT))) {
4353 			/*
4354 			 * ACLs are a special case, since setting the MODE
4355 			 * conflicts with setting the ACL.  We delay setting
4356 			 * the ACL until all other attributes have been set.
4357 			 * The ACL gets set in do_rfs4_op_setattr().
4358 			 */
4359 			if (nfs4_ntov_map[k].fbit != FATTR4_ACL_MASK) {
4360 				error = (*nfs4_ntov_map[k].sv_getit)(cmd,
4361 				    sargp, nap);
4362 				if (error) {
4363 					xdr_free(nfs4_ntov_map[k].xfunc,
4364 					    (caddr_t)nap);
4365 				}
4366 			}
4367 		}
4368 	} else {
4369 #ifdef  DEBUG
4370 		cmn_err(CE_NOTE, "decode_fattr4_attr: error "
4371 			"decoding attribute %d\n", k);
4372 #endif
4373 		error = EINVAL;
4374 	}
4375 	if (!error && resp_bval && !set_later) {
4376 		*resp_bval |= nfs4_ntov_map[k].fbit;
4377 	}
4378 
4379 	return (error);
4380 }
4381 
4382 /*
4383  * Set vattr based on incoming fattr4 attrs - used by setattr.
4384  * Set response mask. Ignore any values that are not writable vattr attrs.
4385  */
4386 static nfsstat4
4387 do_rfs4_set_attrs(bitmap4 *resp, fattr4 *fattrp, struct compound_state *cs,
4388 		struct nfs4_svgetit_arg *sargp, struct nfs4_ntov_table *ntovp,
4389 		nfs4_attr_cmd_t cmd)
4390 {
4391 	int error = 0;
4392 	int i;
4393 	char *attrs = fattrp->attrlist4;
4394 	uint32_t attrslen = fattrp->attrlist4_len;
4395 	XDR xdr;
4396 	nfsstat4 status = NFS4_OK;
4397 	vnode_t *vp = cs->vp;
4398 	union nfs4_attr_u *na;
4399 	uint8_t *amap;
4400 
4401 #ifndef lint
4402 	/*
4403 	 * Make sure that maximum attribute number can be expressed as an
4404 	 * 8 bit quantity.
4405 	 */
4406 	ASSERT(NFS4_MAXNUM_ATTRS <= (UINT8_MAX + 1));
4407 #endif
4408 
4409 	if (vp == NULL) {
4410 		if (resp)
4411 			*resp = 0;
4412 		return (NFS4ERR_NOFILEHANDLE);
4413 	}
4414 	if (cs->access == CS_ACCESS_DENIED) {
4415 		if (resp)
4416 			*resp = 0;
4417 		return (NFS4ERR_ACCESS);
4418 	}
4419 
4420 	sargp->op = cmd;
4421 	sargp->cs = cs;
4422 	sargp->flag = 0;	/* may be set later */
4423 	sargp->vap->va_mask = 0;
4424 	sargp->rdattr_error = NFS4_OK;
4425 	sargp->rdattr_error_req = FALSE;
4426 	/* sargp->sbp is set by the caller */
4427 
4428 	xdrmem_create(&xdr, attrs, attrslen, XDR_DECODE);
4429 
4430 	na = ntovp->na;
4431 	amap = ntovp->amap;
4432 
4433 	/*
4434 	 * The following loop iterates on the nfs4_ntov_map checking
4435 	 * if the fbit is set in the requested bitmap.
4436 	 * If set then we process the arguments using the
4437 	 * rfs4_fattr4 conversion functions to populate the setattr
4438 	 * vattr and va_mask. Any settable attrs that are not using vattr
4439 	 * will be set in this loop.
4440 	 */
4441 	for (i = 0; i < nfs4_ntov_map_size; i++) {
4442 		if (!(fattrp->attrmask & nfs4_ntov_map[i].fbit)) {
4443 			continue;
4444 		}
4445 		/*
4446 		 * If setattr, must be a writable attr.
4447 		 * If verify/nverify, must be a readable attr.
4448 		 */
4449 		if ((error = (*nfs4_ntov_map[i].sv_getit)(
4450 				    NFS4ATTR_SUPPORTED, sargp, NULL)) != 0) {
4451 			/*
4452 			 * Client tries to set/verify an
4453 			 * unsupported attribute, tries to set
4454 			 * a read only attr or verify a write
4455 			 * only one - error!
4456 			 */
4457 			break;
4458 		}
4459 		/*
4460 		 * Decode the attribute to set/verify
4461 		 */
4462 		error = decode_fattr4_attr(cmd, sargp, nfs4_ntov_map[i].nval,
4463 					&xdr, resp ? resp : NULL, na);
4464 		if (error)
4465 			break;
4466 		*amap++ = (uint8_t)nfs4_ntov_map[i].nval;
4467 		na++;
4468 		(ntovp->attrcnt)++;
4469 		if (nfs4_ntov_map[i].vfsstat)
4470 			ntovp->vfsstat = TRUE;
4471 	}
4472 
4473 	if (error != 0)
4474 		status = (error == ENOTSUP ?	NFS4ERR_ATTRNOTSUPP :
4475 						puterrno4(error));
4476 	/* xdrmem_destroy(&xdrs); */	/* NO-OP */
4477 	return (status);
4478 }
4479 
4480 static nfsstat4
4481 do_rfs4_op_setattr(bitmap4 *resp, fattr4 *fattrp, struct compound_state *cs,
4482 		stateid4 *stateid)
4483 {
4484 	int error = 0;
4485 	struct nfs4_svgetit_arg sarg;
4486 	bool_t trunc;
4487 
4488 	nfsstat4 status = NFS4_OK;
4489 	cred_t *cr = cs->cr;
4490 	vnode_t *vp = cs->vp;
4491 	struct nfs4_ntov_table ntov;
4492 	struct statvfs64 sb;
4493 	struct vattr bva;
4494 	struct flock64 bf;
4495 	int in_crit = 0;
4496 	uint_t saved_mask = 0;
4497 	caller_context_t ct;
4498 
4499 	*resp = 0;
4500 	sarg.sbp = &sb;
4501 	nfs4_ntov_table_init(&ntov);
4502 	status = do_rfs4_set_attrs(resp, fattrp, cs, &sarg, &ntov,
4503 			NFS4ATTR_SETIT);
4504 	if (status != NFS4_OK) {
4505 		/*
4506 		 * failed set attrs
4507 		 */
4508 		goto done;
4509 	}
4510 	if ((sarg.vap->va_mask == 0) &&
4511 	    (! (fattrp->attrmask & FATTR4_ACL_MASK))) {
4512 		/*
4513 		 * no further work to be done
4514 		 */
4515 		goto done;
4516 	}
4517 
4518 	/*
4519 	 * If we got a request to set the ACL and the MODE, only
4520 	 * allow changing VSUID, VSGID, and VSVTX.  Attempting
4521 	 * to change any other bits, along with setting an ACL,
4522 	 * gives NFS4ERR_INVAL.
4523 	 */
4524 	if ((fattrp->attrmask & FATTR4_ACL_MASK) &&
4525 	    (fattrp->attrmask & FATTR4_MODE_MASK)) {
4526 		vattr_t va;
4527 
4528 		va.va_mask = AT_MODE;
4529 		error = VOP_GETATTR(vp, &va, 0, cs->cr);
4530 		if (error) {
4531 			status = puterrno4(error);
4532 			goto done;
4533 		}
4534 		if ((sarg.vap->va_mode ^ va.va_mode) &
4535 		    ~(VSUID | VSGID | VSVTX)) {
4536 			status = NFS4ERR_INVAL;
4537 			goto done;
4538 		}
4539 	}
4540 
4541 
4542 	/* Check stateid only if size has been set */
4543 	if (sarg.vap->va_mask & AT_SIZE) {
4544 		trunc = (sarg.vap->va_size == 0);
4545 		status = rfs4_check_stateid(FWRITE, cs->vp, stateid,
4546 			trunc, &cs->deleg, sarg.vap->va_mask & AT_SIZE);
4547 		if (status != NFS4_OK)
4548 			goto done;
4549 	}
4550 
4551 	ct.cc_sysid = 0;
4552 	ct.cc_pid = 0;
4553 	ct.cc_caller_id = nfs4_srv_caller_id;
4554 
4555 	/* XXX start of possible race with delegations */
4556 
4557 	/*
4558 	 * We need to specially handle size changes because it is
4559 	 * possible for the client to create a file with read-only
4560 	 * modes, but with the file opened for writing. If the client
4561 	 * then tries to set the file size, e.g. ftruncate(3C),
4562 	 * fcntl(F_FREESP), the normal access checking done in
4563 	 * VOP_SETATTR would prevent the client from doing it even though
4564 	 * it should be allowed to do so.  To get around this, we do the
4565 	 * access checking for ourselves and use VOP_SPACE which doesn't
4566 	 * do the access checking.
4567 	 * Also the client should not be allowed to change the file
4568 	 * size if there is a conflicting non-blocking mandatory lock in
4569 	 * the region of the change.
4570 	 */
4571 	if (vp->v_type == VREG && (sarg.vap->va_mask & AT_SIZE)) {
4572 		u_offset_t offset;
4573 		ssize_t length;
4574 
4575 		/*
4576 		 * Check any possible conflict due to NBMAND locks.
4577 		 * Get into critical region before VOP_GETATTR, so the
4578 		 * size attribute is valid when checking conflicts.
4579 		 */
4580 		if (nbl_need_check(vp)) {
4581 			nbl_start_crit(vp, RW_READER);
4582 			in_crit = 1;
4583 		}
4584 
4585 		bva.va_mask = AT_UID|AT_SIZE;
4586 		if (error = VOP_GETATTR(vp, &bva, 0, cr)) {
4587 			status = puterrno4(error);
4588 			goto done;
4589 		}
4590 
4591 		if (in_crit) {
4592 			if (sarg.vap->va_size < bva.va_size) {
4593 				offset = sarg.vap->va_size;
4594 				length = bva.va_size - sarg.vap->va_size;
4595 			} else {
4596 				offset = bva.va_size;
4597 				length = sarg.vap->va_size - bva.va_size;
4598 			}
4599 			if (nbl_conflict(vp, NBL_WRITE, offset, length, 0)) {
4600 				status = NFS4ERR_LOCKED;
4601 				goto done;
4602 			}
4603 		}
4604 
4605 		if (crgetuid(cr) == bva.va_uid) {
4606 			saved_mask = sarg.vap->va_mask;
4607 			sarg.vap->va_mask &= ~AT_SIZE;
4608 			bf.l_type = F_WRLCK;
4609 			bf.l_whence = 0;
4610 			bf.l_start = (off64_t)sarg.vap->va_size;
4611 			bf.l_len = 0;
4612 			bf.l_sysid = 0;
4613 			bf.l_pid = 0;
4614 			error = VOP_SPACE(vp, F_FREESP, &bf, FWRITE,
4615 					(offset_t)sarg.vap->va_size, cr, &ct);
4616 		}
4617 	}
4618 
4619 	if (!error && sarg.vap->va_mask != 0)
4620 		error = VOP_SETATTR(vp, sarg.vap, sarg.flag, cr, &ct);
4621 
4622 	/* restore AT_SIZE */
4623 	if (saved_mask & AT_SIZE)
4624 		sarg.vap->va_mask |= AT_SIZE;
4625 
4626 	/*
4627 	 * If an ACL was being set, it has been delayed until now,
4628 	 * in order to set the mode (via the VOP_SETATTR() above) first.
4629 	 */
4630 	if ((! error) && (fattrp->attrmask & FATTR4_ACL_MASK)) {
4631 		int i;
4632 
4633 		for (i = 0; i < NFS4_MAXNUM_ATTRS; i++)
4634 			if (ntov.amap[i] == FATTR4_ACL)
4635 				break;
4636 		if (i < NFS4_MAXNUM_ATTRS) {
4637 			error = (*nfs4_ntov_map[FATTR4_ACL].sv_getit)(
4638 			    NFS4ATTR_SETIT, &sarg, &ntov.na[i]);
4639 			if (error == 0) {
4640 				*resp |= FATTR4_ACL_MASK;
4641 			} else if (error == ENOTSUP) {
4642 				(void) rfs4_verify_attr(&sarg, resp, &ntov);
4643 				status = NFS4ERR_ATTRNOTSUPP;
4644 				goto done;
4645 			}
4646 		} else {
4647 			NFS4_DEBUG(rfs4_debug,
4648 			    (CE_NOTE, "do_rfs4_op_setattr: "
4649 			    "unable to find ACL in fattr4"));
4650 			error = EINVAL;
4651 		}
4652 	}
4653 
4654 	if (error) {
4655 		status = puterrno4(error);
4656 
4657 		/*
4658 		 * Set the response bitmap when setattr failed.
4659 		 * If VOP_SETATTR partially succeeded, test by doing a
4660 		 * VOP_GETATTR on the object and comparing the data
4661 		 * to the setattr arguments.
4662 		 */
4663 		(void) rfs4_verify_attr(&sarg, resp, &ntov);
4664 	} else {
4665 		/*
4666 		 * Force modified metadata out to stable storage.
4667 		 */
4668 		(void) VOP_FSYNC(vp, FNODSYNC, cr);
4669 		/*
4670 		 * Set response bitmap
4671 		 */
4672 		nfs4_vmask_to_nmask(sarg.vap->va_mask, resp);
4673 	}
4674 
4675 /* Return early and already have a NFSv4 error */
4676 done:
4677 	if (in_crit)
4678 		nbl_end_crit(vp);
4679 
4680 	nfs4_ntov_table_free(&ntov, &sarg);
4681 
4682 	return (status);
4683 }
4684 
4685 /* ARGSUSED */
4686 static void
4687 rfs4_op_setattr(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
4688 	struct compound_state *cs)
4689 {
4690 	SETATTR4args *args = &argop->nfs_argop4_u.opsetattr;
4691 	SETATTR4res *resp = &resop->nfs_resop4_u.opsetattr;
4692 
4693 	if (cs->vp == NULL) {
4694 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
4695 		return;
4696 	}
4697 
4698 	/*
4699 	 * If there is an unshared filesystem mounted on this vnode,
4700 	 * do not allow to setattr on this vnode.
4701 	 */
4702 	if (vn_ismntpt(cs->vp)) {
4703 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
4704 		return;
4705 	}
4706 
4707 	resp->attrsset = 0;
4708 
4709 	if (rdonly4(cs->exi, cs->vp, req)) {
4710 		*cs->statusp = resp->status = NFS4ERR_ROFS;
4711 		return;
4712 	}
4713 
4714 	*cs->statusp = resp->status =
4715 		do_rfs4_op_setattr(&resp->attrsset, &args->obj_attributes, cs,
4716 			&args->stateid);
4717 }
4718 
4719 /* ARGSUSED */
4720 static void
4721 rfs4_op_verify(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
4722 	struct compound_state *cs)
4723 {
4724 	/*
4725 	 * verify and nverify are exactly the same, except that nverify
4726 	 * succeeds when some argument changed, and verify succeeds when
4727 	 * when none changed.
4728 	 */
4729 
4730 	VERIFY4args  *args = &argop->nfs_argop4_u.opverify;
4731 	VERIFY4res *resp = &resop->nfs_resop4_u.opverify;
4732 
4733 	int error;
4734 	struct nfs4_svgetit_arg sarg;
4735 	struct statvfs64 sb;
4736 	struct nfs4_ntov_table ntov;
4737 
4738 	if (cs->vp == NULL) {
4739 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
4740 		return;
4741 	}
4742 
4743 	sarg.sbp = &sb;
4744 	nfs4_ntov_table_init(&ntov);
4745 	resp->status = do_rfs4_set_attrs(NULL, &args->obj_attributes, cs,
4746 				&sarg, &ntov, NFS4ATTR_VERIT);
4747 	if (resp->status != NFS4_OK) {
4748 		/*
4749 		 * do_rfs4_set_attrs will try to verify systemwide attrs,
4750 		 * so could return -1 for "no match".
4751 		 */
4752 		if (resp->status == -1)
4753 			resp->status = NFS4ERR_NOT_SAME;
4754 		goto done;
4755 	}
4756 	error = rfs4_verify_attr(&sarg, NULL, &ntov);
4757 	switch (error) {
4758 	case 0:
4759 		resp->status = NFS4_OK;
4760 		break;
4761 	case -1:
4762 		resp->status = NFS4ERR_NOT_SAME;
4763 		break;
4764 	default:
4765 		resp->status = puterrno4(error);
4766 		break;
4767 	}
4768 done:
4769 	*cs->statusp = resp->status;
4770 	nfs4_ntov_table_free(&ntov, &sarg);
4771 }
4772 
4773 /* ARGSUSED */
4774 static void
4775 rfs4_op_nverify(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
4776 	struct compound_state *cs)
4777 {
4778 	/*
4779 	 * verify and nverify are exactly the same, except that nverify
4780 	 * succeeds when some argument changed, and verify succeeds when
4781 	 * when none changed.
4782 	 */
4783 
4784 	NVERIFY4args  *args = &argop->nfs_argop4_u.opnverify;
4785 	NVERIFY4res *resp = &resop->nfs_resop4_u.opnverify;
4786 
4787 	int error;
4788 	struct nfs4_svgetit_arg sarg;
4789 	struct statvfs64 sb;
4790 	struct nfs4_ntov_table ntov;
4791 
4792 	if (cs->vp == NULL) {
4793 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
4794 		return;
4795 	}
4796 	sarg.sbp = &sb;
4797 	nfs4_ntov_table_init(&ntov);
4798 	resp->status = do_rfs4_set_attrs(NULL, &args->obj_attributes, cs,
4799 				&sarg, &ntov, NFS4ATTR_VERIT);
4800 	if (resp->status != NFS4_OK) {
4801 		/*
4802 		 * do_rfs4_set_attrs will try to verify systemwide attrs,
4803 		 * so could return -1 for "no match".
4804 		 */
4805 		if (resp->status == -1)
4806 			resp->status = NFS4_OK;
4807 		goto done;
4808 	}
4809 	error = rfs4_verify_attr(&sarg, NULL, &ntov);
4810 	switch (error) {
4811 	case 0:
4812 		resp->status = NFS4ERR_SAME;
4813 		break;
4814 	case -1:
4815 		resp->status = NFS4_OK;
4816 		break;
4817 	default:
4818 		resp->status = puterrno4(error);
4819 		break;
4820 	}
4821 done:
4822 	*cs->statusp = resp->status;
4823 	nfs4_ntov_table_free(&ntov, &sarg);
4824 }
4825 
4826 /*
4827  * XXX - This should live in an NFS header file.
4828  */
4829 #define	MAX_IOVECS	12
4830 
4831 /* ARGSUSED */
4832 static void
4833 rfs4_op_write(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
4834 	struct compound_state *cs)
4835 {
4836 	WRITE4args  *args = &argop->nfs_argop4_u.opwrite;
4837 	WRITE4res *resp = &resop->nfs_resop4_u.opwrite;
4838 	int error;
4839 	vnode_t *vp;
4840 	struct vattr bva;
4841 	u_offset_t rlimit;
4842 	struct uio uio;
4843 	struct iovec iov[MAX_IOVECS];
4844 	struct iovec *iovp;
4845 	int iovcnt;
4846 	int ioflag;
4847 	cred_t *savecred, *cr;
4848 	bool_t *deleg = &cs->deleg;
4849 	nfsstat4 stat;
4850 	int in_crit = 0;
4851 
4852 	vp = cs->vp;
4853 	if (vp == NULL) {
4854 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
4855 		return;
4856 	}
4857 	if (cs->access == CS_ACCESS_DENIED) {
4858 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
4859 		return;
4860 	}
4861 
4862 	cr = cs->cr;
4863 
4864 	/*
4865 	 * We have to enter the critical region before calling VOP_RWLOCK
4866 	 * to avoid a deadlock with ufs.
4867 	 */
4868 	if (nbl_need_check(vp)) {
4869 		nbl_start_crit(vp, RW_READER);
4870 		in_crit = 1;
4871 		if (nbl_conflict(vp, NBL_WRITE,
4872 				args->offset, args->data_len, 0)) {
4873 			*cs->statusp = resp->status = NFS4ERR_LOCKED;
4874 			goto out;
4875 		}
4876 	}
4877 
4878 	if ((stat = rfs4_check_stateid(FWRITE, vp, &args->stateid, FALSE,
4879 					deleg, TRUE)) != NFS4_OK) {
4880 		*cs->statusp = resp->status = stat;
4881 		goto out;
4882 	}
4883 
4884 	bva.va_mask = AT_MODE | AT_UID;
4885 	error = VOP_GETATTR(vp, &bva, 0, cr);
4886 
4887 	/*
4888 	 * If we can't get the attributes, then we can't do the
4889 	 * right access checking.  So, we'll fail the request.
4890 	 */
4891 	if (error) {
4892 		*cs->statusp = resp->status = puterrno4(error);
4893 		goto out;
4894 	}
4895 
4896 	if (rdonly4(cs->exi, cs->vp, req)) {
4897 		*cs->statusp = resp->status = NFS4ERR_ROFS;
4898 		goto out;
4899 	}
4900 
4901 	if (vp->v_type != VREG) {
4902 		*cs->statusp = resp->status =
4903 			((vp->v_type == VDIR) ? NFS4ERR_ISDIR : NFS4ERR_INVAL);
4904 		goto out;
4905 	}
4906 
4907 	if (crgetuid(cr) != bva.va_uid &&
4908 	    (error = VOP_ACCESS(vp, VWRITE, 0, cr))) {
4909 		*cs->statusp = resp->status = puterrno4(error);
4910 		goto out;
4911 	}
4912 
4913 	if (MANDLOCK(vp, bva.va_mode)) {
4914 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
4915 		goto out;
4916 	}
4917 
4918 	if (args->data_len == 0) {
4919 		*cs->statusp = resp->status = NFS4_OK;
4920 		resp->count = 0;
4921 		resp->committed = args->stable;
4922 		resp->writeverf = Write4verf;
4923 		goto out;
4924 	}
4925 
4926 	if (args->mblk != NULL) {
4927 		mblk_t *m;
4928 		uint_t bytes, round_len;
4929 
4930 		iovcnt = 0;
4931 		bytes = 0;
4932 		round_len = roundup(args->data_len, BYTES_PER_XDR_UNIT);
4933 		for (m = args->mblk;
4934 		    m != NULL && bytes < round_len;
4935 		    m = m->b_cont) {
4936 			iovcnt++;
4937 			bytes += MBLKL(m);
4938 		}
4939 #ifdef DEBUG
4940 		/* should have ended on an mblk boundary */
4941 		if (bytes != round_len) {
4942 			printf("bytes=0x%x, round_len=0x%x, req len=0x%x\n",
4943 			    bytes, round_len, args->data_len);
4944 			printf("args=%p, args->mblk=%p, m=%p", (void *)args,
4945 			    (void *)args->mblk, (void *)m);
4946 			ASSERT(bytes == round_len);
4947 		}
4948 #endif
4949 		if (iovcnt <= MAX_IOVECS) {
4950 			iovp = iov;
4951 		} else {
4952 			iovp = kmem_alloc(sizeof (*iovp) * iovcnt, KM_SLEEP);
4953 		}
4954 		mblk_to_iov(args->mblk, iovcnt, iovp);
4955 	} else {
4956 		iovcnt = 1;
4957 		iovp = iov;
4958 		iovp->iov_base = args->data_val;
4959 		iovp->iov_len = args->data_len;
4960 	}
4961 
4962 	uio.uio_iov = iovp;
4963 	uio.uio_iovcnt = iovcnt;
4964 
4965 	uio.uio_segflg = UIO_SYSSPACE;
4966 	uio.uio_extflg = UIO_COPY_DEFAULT;
4967 	uio.uio_loffset = args->offset;
4968 	uio.uio_resid = args->data_len;
4969 	uio.uio_llimit = curproc->p_fsz_ctl;
4970 	rlimit = uio.uio_llimit - args->offset;
4971 	if (rlimit < (u_offset_t)uio.uio_resid)
4972 		uio.uio_resid = (int)rlimit;
4973 
4974 	if (args->stable == UNSTABLE4)
4975 		ioflag = 0;
4976 	else if (args->stable == FILE_SYNC4)
4977 		ioflag = FSYNC;
4978 	else if (args->stable == DATA_SYNC4)
4979 		ioflag = FDSYNC;
4980 	else {
4981 		if (iovp != iov)
4982 			kmem_free(iovp, sizeof (*iovp) * iovcnt);
4983 		*cs->statusp = resp->status = NFS4ERR_INVAL;
4984 		goto out;
4985 	}
4986 
4987 	/*
4988 	 * We're changing creds because VM may fault and we need
4989 	 * the cred of the current thread to be used if quota
4990 	 * checking is enabled.
4991 	 */
4992 	savecred = curthread->t_cred;
4993 	curthread->t_cred = cr;
4994 	error = do_io(FWRITE, vp, &uio, ioflag, cr);
4995 	curthread->t_cred = savecred;
4996 
4997 	if (iovp != iov)
4998 		kmem_free(iovp, sizeof (*iovp) * iovcnt);
4999 
5000 	if (error) {
5001 		*cs->statusp = resp->status = puterrno4(error);
5002 		goto out;
5003 	}
5004 
5005 	*cs->statusp = resp->status = NFS4_OK;
5006 	resp->count = args->data_len - uio.uio_resid;
5007 
5008 	if (ioflag == 0)
5009 		resp->committed = UNSTABLE4;
5010 	else
5011 		resp->committed = FILE_SYNC4;
5012 
5013 	resp->writeverf = Write4verf;
5014 
5015 out:
5016 	if (in_crit)
5017 		nbl_end_crit(vp);
5018 }
5019 
5020 
5021 /* XXX put in a header file */
5022 extern int	sec_svc_getcred(struct svc_req *, cred_t *,  caddr_t *, int *);
5023 
5024 void
5025 rfs4_compound(COMPOUND4args *args, COMPOUND4res *resp, struct exportinfo *exi,
5026 	struct svc_req *req, cred_t *cr)
5027 {
5028 	uint_t i;
5029 	struct compound_state cs;
5030 
5031 	rfs4_init_compound_state(&cs);
5032 	/*
5033 	 * Form a reply tag by copying over the reqeuest tag.
5034 	 */
5035 	resp->tag.utf8string_val =
5036 				kmem_alloc(args->tag.utf8string_len, KM_SLEEP);
5037 	resp->tag.utf8string_len = args->tag.utf8string_len;
5038 	bcopy(args->tag.utf8string_val, resp->tag.utf8string_val,
5039 					resp->tag.utf8string_len);
5040 
5041 	cs.statusp = &resp->status;
5042 
5043 	/*
5044 	 * XXX for now, minorversion should be zero
5045 	 */
5046 	if (args->minorversion != NFS4_MINORVERSION) {
5047 		resp->array_len = 0;
5048 		resp->array = NULL;
5049 		resp->status = NFS4ERR_MINOR_VERS_MISMATCH;
5050 		return;
5051 	}
5052 
5053 	resp->array_len = args->array_len;
5054 	resp->array = kmem_zalloc(args->array_len * sizeof (nfs_resop4),
5055 		KM_SLEEP);
5056 
5057 	ASSERT(exi == NULL);
5058 	ASSERT(cr == NULL);
5059 
5060 	cr = crget();
5061 	ASSERT(cr != NULL);
5062 
5063 	if (sec_svc_getcred(req, cr, &cs.principal, &cs.nfsflavor) == 0) {
5064 		crfree(cr);
5065 		return;
5066 	}
5067 
5068 	cs.basecr = cr;
5069 
5070 	cs.req = req;
5071 
5072 	/*
5073 	 * For now, NFS4 compound processing must be protected by
5074 	 * exported_lock because it can access more than one exportinfo
5075 	 * per compound and share/unshare can now change multiple
5076 	 * exinfo structs.  The NFS2/3 code only refs 1 exportinfo
5077 	 * per proc (excluding public exinfo), and exi_count design
5078 	 * is sufficient to protect concurrent execution of NFS2/3
5079 	 * ops along with unexport.  This lock will be removed as
5080 	 * part of the NFSv4 phase 2 namespace redesign work.
5081 	 */
5082 	rw_enter(&exported_lock, RW_READER);
5083 
5084 	/*
5085 	 * If this is the first compound we've seen, we need to start all
5086 	 * new instances' grace periods.
5087 	 */
5088 	if (rfs4_seen_first_compound == 0) {
5089 		rfs4_grace_start_new();
5090 		/*
5091 		 * This must be set after rfs4_grace_start_new(), otherwise
5092 		 * another thread could proceed past here before the former
5093 		 * is finished.
5094 		 */
5095 		rfs4_seen_first_compound = 1;
5096 	}
5097 
5098 	for (i = 0; i < args->array_len && cs.cont; i++) {
5099 		nfs_argop4 *argop;
5100 		nfs_resop4 *resop;
5101 		uint_t op;
5102 
5103 		argop = &args->array[i];
5104 		resop = &resp->array[i];
5105 		resop->resop = argop->argop;
5106 		op = (uint_t)resop->resop;
5107 
5108 		if (op < rfsv4disp_cnt) {
5109 			/*
5110 			 * Count the individual ops here; NULL and COMPOUND
5111 			 * are counted in common_dispatch()
5112 			 */
5113 			rfsproccnt_v4_ptr[op].value.ui64++;
5114 
5115 			NFS4_DEBUG(rfs4_debug > 1,
5116 				(CE_NOTE, "Executing %s", rfs4_op_string[op]));
5117 			(*rfsv4disptab[op].dis_proc)(argop, resop, req, &cs);
5118 			NFS4_DEBUG(rfs4_debug > 1,
5119 				(CE_NOTE, "%s returned %d",
5120 				rfs4_op_string[op], *cs.statusp));
5121 			if (*cs.statusp != NFS4_OK)
5122 				cs.cont = FALSE;
5123 		} else {
5124 			/*
5125 			 * This is effectively dead code since XDR code
5126 			 * will have already returned BADXDR if op doesn't
5127 			 * decode to legal value.  This only done for a
5128 			 * day when XDR code doesn't verify v4 opcodes.
5129 			 */
5130 			op = OP_ILLEGAL;
5131 			rfsproccnt_v4_ptr[OP_ILLEGAL_IDX].value.ui64++;
5132 
5133 			rfs4_op_illegal(argop, resop, req, &cs);
5134 			cs.cont = FALSE;
5135 		}
5136 
5137 		/*
5138 		 * If not at last op, and if we are to stop, then
5139 		 * compact the results array.
5140 		 */
5141 		if ((i + 1) < args->array_len && !cs.cont) {
5142 			nfs_resop4 *new_res = kmem_alloc(
5143 				(i+1) * sizeof (nfs_resop4), KM_SLEEP);
5144 			bcopy(resp->array,
5145 				new_res, (i+1) * sizeof (nfs_resop4));
5146 			kmem_free(resp->array,
5147 				args->array_len * sizeof (nfs_resop4));
5148 
5149 			resp->array_len =  i + 1;
5150 			resp->array = new_res;
5151 		}
5152 	}
5153 
5154 	rw_exit(&exported_lock);
5155 
5156 	if (cs.vp)
5157 		VN_RELE(cs.vp);
5158 	if (cs.saved_vp)
5159 		VN_RELE(cs.saved_vp);
5160 	if (cs.saved_fh.nfs_fh4_val)
5161 		kmem_free(cs.saved_fh.nfs_fh4_val, NFS4_FHSIZE);
5162 
5163 	if (cs.basecr)
5164 		crfree(cs.basecr);
5165 	if (cs.cr)
5166 		crfree(cs.cr);
5167 }
5168 
5169 /*
5170  * XXX because of what appears to be duplicate calls to rfs4_compound_free
5171  * XXX zero out the tag and array values. Need to investigate why the
5172  * XXX calls occur, but at least prevent the panic for now.
5173  */
5174 void
5175 rfs4_compound_free(COMPOUND4res *resp)
5176 {
5177 	uint_t i;
5178 
5179 	if (resp->tag.utf8string_val) {
5180 		UTF8STRING_FREE(resp->tag)
5181 	}
5182 
5183 	for (i = 0; i < resp->array_len; i++) {
5184 		nfs_resop4 *resop;
5185 		uint_t op;
5186 
5187 		resop = &resp->array[i];
5188 		op = (uint_t)resop->resop;
5189 		if (op < rfsv4disp_cnt) {
5190 			(*rfsv4disptab[op].dis_resfree)(resop);
5191 		}
5192 	}
5193 	if (resp->array != NULL) {
5194 		kmem_free(resp->array, resp->array_len * sizeof (nfs_resop4));
5195 	}
5196 }
5197 
5198 /*
5199  * Process the value of the compound request rpc flags, as a bit-AND
5200  * of the individual per-op flags (idempotent, allowork, publicfh_ok)
5201  */
5202 void
5203 rfs4_compound_flagproc(COMPOUND4args *args, int *flagp)
5204 {
5205 	int i;
5206 	int flag = RPC_ALL;
5207 
5208 	for (i = 0; flag && i < args->array_len; i++) {
5209 		uint_t op;
5210 
5211 		op = (uint_t)args->array[i].argop;
5212 
5213 		if (op < rfsv4disp_cnt)
5214 			flag &= rfsv4disptab[op].dis_flags;
5215 		else
5216 			flag = 0;
5217 	}
5218 	*flagp = flag;
5219 }
5220 
5221 nfsstat4
5222 rfs4_client_sysid(rfs4_client_t *cp, sysid_t *sp)
5223 {
5224 	nfsstat4 e;
5225 
5226 	rfs4_dbe_lock(cp->dbe);
5227 
5228 	if (cp->sysidt != LM_NOSYSID) {
5229 		*sp = cp->sysidt;
5230 		e = NFS4_OK;
5231 
5232 	} else if ((cp->sysidt = lm_alloc_sysidt()) != LM_NOSYSID) {
5233 		*sp = cp->sysidt;
5234 		e = NFS4_OK;
5235 
5236 		NFS4_DEBUG(rfs4_debug, (CE_NOTE,
5237 			"rfs4_client_sysid: allocated 0x%x\n", *sp));
5238 	} else
5239 		e = NFS4ERR_DELAY;
5240 
5241 	rfs4_dbe_unlock(cp->dbe);
5242 	return (e);
5243 }
5244 
5245 #if defined(DEBUG) && ! defined(lint)
5246 static void lock_print(char *str, int operation, struct flock64 *flk)
5247 {
5248 	char *op, *type;
5249 
5250 	switch (operation) {
5251 	case F_GETLK: op = "F_GETLK";
5252 		break;
5253 	case F_SETLK: op = "F_SETLK";
5254 		break;
5255 	default: op = "F_UNKNOWN";
5256 		break;
5257 	}
5258 	switch (flk->l_type) {
5259 	case F_UNLCK: type = "F_UNLCK";
5260 		break;
5261 	case F_RDLCK: type = "F_RDLCK";
5262 		break;
5263 	case F_WRLCK: type = "F_WRLCK";
5264 		break;
5265 	default: type = "F_UNKNOWN";
5266 		break;
5267 	}
5268 
5269 	ASSERT(flk->l_whence == 0);
5270 	cmn_err(CE_NOTE, "%s:  %s, type = %s, off = %llx len = %llx pid = %d",
5271 		str, op, type,
5272 		(longlong_t)flk->l_start,
5273 		flk->l_len ? (longlong_t)flk->l_len : ~0LL,
5274 		flk->l_pid);
5275 }
5276 
5277 #define	LOCK_PRINT(d, s, t, f) if (d) lock_print(s, t, f)
5278 #else
5279 #define	LOCK_PRINT(d, s, t, f)
5280 #endif
5281 
5282 /*ARGSUSED*/
5283 static bool_t
5284 creds_ok(cred_set_t cr_set, struct svc_req *req, struct compound_state *cs)
5285 {
5286 	return (TRUE);
5287 }
5288 
5289 /*
5290  * Look up the pathname using the vp in cs as the directory vnode.
5291  * cs->vp will be the vnode for the file on success
5292  */
5293 
5294 static nfsstat4
5295 rfs4_lookup(component4 *component, struct svc_req *req,
5296 	    struct compound_state *cs)
5297 {
5298 	char *nm;
5299 	uint32_t len;
5300 	nfsstat4 status;
5301 
5302 	if (cs->vp == NULL) {
5303 		return (NFS4ERR_NOFILEHANDLE);
5304 	}
5305 	if (cs->vp->v_type != VDIR) {
5306 		return (NFS4ERR_NOTDIR);
5307 	}
5308 
5309 	if (!utf8_dir_verify(component))
5310 		return (NFS4ERR_INVAL);
5311 
5312 	nm = utf8_to_fn(component, &len, NULL);
5313 	if (nm == NULL) {
5314 		return (NFS4ERR_INVAL);
5315 	}
5316 
5317 	if (len > MAXNAMELEN) {
5318 		kmem_free(nm, len);
5319 		return (NFS4ERR_NAMETOOLONG);
5320 	}
5321 
5322 	status = do_rfs4_op_lookup(nm, len, req, cs);
5323 
5324 	kmem_free(nm, len);
5325 
5326 	return (status);
5327 }
5328 
5329 static nfsstat4
5330 rfs4_lookupfile(component4 *component, struct svc_req *req,
5331 		struct compound_state *cs, uint32_t access,
5332 		change_info4 *cinfo)
5333 {
5334 	nfsstat4 status;
5335 	vnode_t *dvp = cs->vp;
5336 	vattr_t bva, ava, fva;
5337 	int error;
5338 
5339 	/* Get "before" change value */
5340 	bva.va_mask = AT_CTIME|AT_SEQ;
5341 	error = VOP_GETATTR(dvp, &bva, 0, cs->cr);
5342 	if (error)
5343 		return (puterrno4(error));
5344 
5345 	/* rfs4_lookup may VN_RELE directory */
5346 	VN_HOLD(dvp);
5347 
5348 	status = rfs4_lookup(component, req, cs);
5349 	if (status != NFS4_OK) {
5350 		VN_RELE(dvp);
5351 		return (status);
5352 	}
5353 
5354 	/*
5355 	 * Get "after" change value, if it fails, simply return the
5356 	 * before value.
5357 	 */
5358 	ava.va_mask = AT_CTIME|AT_SEQ;
5359 	if (VOP_GETATTR(dvp, &ava, 0, cs->cr)) {
5360 		ava.va_ctime = bva.va_ctime;
5361 		ava.va_seq = 0;
5362 	}
5363 	VN_RELE(dvp);
5364 
5365 	/*
5366 	 * Validate the file is a file
5367 	 */
5368 	fva.va_mask = AT_TYPE|AT_MODE;
5369 	error = VOP_GETATTR(cs->vp, &fva, 0, cs->cr);
5370 	if (error)
5371 		return (puterrno4(error));
5372 
5373 	if (fva.va_type != VREG) {
5374 		if (fva.va_type == VDIR)
5375 			return (NFS4ERR_ISDIR);
5376 		if (fva.va_type == VLNK)
5377 			return (NFS4ERR_SYMLINK);
5378 		return (NFS4ERR_INVAL);
5379 	}
5380 
5381 	NFS4_SET_FATTR4_CHANGE(cinfo->before, bva.va_ctime);
5382 	NFS4_SET_FATTR4_CHANGE(cinfo->after, ava.va_ctime);
5383 
5384 	/*
5385 	 * It is undefined if VOP_LOOKUP will change va_seq, so
5386 	 * cinfo.atomic = TRUE only if we have
5387 	 * non-zero va_seq's, and they have not changed.
5388 	 */
5389 	if (bva.va_seq && ava.va_seq && ava.va_seq == bva.va_seq)
5390 		cinfo->atomic = TRUE;
5391 	else
5392 		cinfo->atomic = FALSE;
5393 
5394 	/* Check for mandatory locking */
5395 	cs->mandlock = MANDLOCK(cs->vp, fva.va_mode);
5396 	return (check_open_access(access, cs, req));
5397 }
5398 
5399 static nfsstat4
5400 create_vnode(vnode_t *dvp, char *nm,  vattr_t *vap, createmode4 mode,
5401 	    timespec32_t *mtime, cred_t *cr, vnode_t **vpp, bool_t *created)
5402 {
5403 	int error;
5404 	nfsstat4 status = NFS4_OK;
5405 	vattr_t va;
5406 
5407 tryagain:
5408 
5409 	/*
5410 	 * The file open mode used is VWRITE.  If the client needs
5411 	 * some other semantic, then it should do the access checking
5412 	 * itself.  It would have been nice to have the file open mode
5413 	 * passed as part of the arguments.
5414 	 */
5415 
5416 	*created = TRUE;
5417 	error = VOP_CREATE(dvp, nm, vap, EXCL, VWRITE, vpp, cr, 0);
5418 
5419 	if (error) {
5420 		*created = FALSE;
5421 
5422 		/*
5423 		 * If we got something other than file already exists
5424 		 * then just return this error.  Otherwise, we got
5425 		 * EEXIST.  If we were doing a GUARDED create, then
5426 		 * just return this error.  Otherwise, we need to
5427 		 * make sure that this wasn't a duplicate of an
5428 		 * exclusive create request.
5429 		 *
5430 		 * The assumption is made that a non-exclusive create
5431 		 * request will never return EEXIST.
5432 		 */
5433 
5434 		if (error != EEXIST || mode == GUARDED4) {
5435 			status = puterrno4(error);
5436 			return (status);
5437 		}
5438 		error = VOP_LOOKUP(dvp, nm, vpp, NULL, 0, NULL, cr);
5439 
5440 		if (error) {
5441 			/*
5442 			 * We couldn't find the file that we thought that
5443 			 * we just created.  So, we'll just try creating
5444 			 * it again.
5445 			 */
5446 			if (error == ENOENT)
5447 				goto tryagain;
5448 
5449 			status = puterrno4(error);
5450 			return (status);
5451 		}
5452 
5453 		VN_SETPATH(rootdir, dvp, *vpp, nm, strlen(nm));
5454 
5455 		if (mode == UNCHECKED4) {
5456 			/* existing object must be regular file */
5457 			if ((*vpp)->v_type != VREG) {
5458 				if ((*vpp)->v_type == VDIR)
5459 					status = NFS4ERR_ISDIR;
5460 				else if ((*vpp)->v_type == VLNK)
5461 					status = NFS4ERR_SYMLINK;
5462 				else
5463 					status = NFS4ERR_INVAL;
5464 				VN_RELE(*vpp);
5465 				return (status);
5466 			}
5467 
5468 			return (NFS4_OK);
5469 		}
5470 
5471 		/* Check for duplicate request */
5472 		ASSERT(mtime != 0);
5473 		va.va_mask = AT_MTIME;
5474 		error = VOP_GETATTR(*vpp, &va, 0, cr);
5475 		if (!error) {
5476 			/* We found the file */
5477 			if (va.va_mtime.tv_sec != mtime->tv_sec ||
5478 			    va.va_mtime.tv_nsec != mtime->tv_nsec) {
5479 				/* but its not our creation */
5480 				VN_RELE(*vpp);
5481 				return (NFS4ERR_EXIST);
5482 			}
5483 			*created = TRUE; /* retrans of create == created */
5484 			return (NFS4_OK);
5485 		}
5486 		VN_RELE(*vpp);
5487 		return (NFS4ERR_EXIST);
5488 	}
5489 
5490 	return (NFS4_OK);
5491 }
5492 
5493 static nfsstat4
5494 check_open_access(uint32_t access,
5495 		struct compound_state *cs, struct svc_req *req)
5496 {
5497 	int error;
5498 	vnode_t *vp;
5499 	bool_t readonly;
5500 	cred_t *cr = cs->cr;
5501 
5502 	/* For now we don't allow mandatory locking as per V2/V3 */
5503 	if (cs->access == CS_ACCESS_DENIED || cs->mandlock) {
5504 		return (NFS4ERR_ACCESS);
5505 	}
5506 
5507 	vp = cs->vp;
5508 	ASSERT(cr != NULL && vp->v_type == VREG);
5509 
5510 	/*
5511 	 * If the file system is exported read only and we are trying
5512 	 * to open for write, then return NFS4ERR_ROFS
5513 	 */
5514 
5515 	readonly = rdonly4(cs->exi, cs->vp, req);
5516 
5517 	if ((access & OPEN4_SHARE_ACCESS_WRITE) && readonly)
5518 		return (NFS4ERR_ROFS);
5519 
5520 	if (access & OPEN4_SHARE_ACCESS_READ) {
5521 		if ((VOP_ACCESS(vp, VREAD, 0, cr) != 0) &&
5522 		    (VOP_ACCESS(vp, VEXEC, 0, cr) != 0)) {
5523 			return (NFS4ERR_ACCESS);
5524 		}
5525 	}
5526 
5527 	if (access & OPEN4_SHARE_ACCESS_WRITE) {
5528 		error = VOP_ACCESS(vp, VWRITE, 0, cr);
5529 		if (error)
5530 			return (NFS4ERR_ACCESS);
5531 	}
5532 
5533 	return (NFS4_OK);
5534 }
5535 
5536 static nfsstat4
5537 rfs4_createfile(OPEN4args *args, struct svc_req *req, struct compound_state *cs,
5538 		change_info4 *cinfo, bitmap4 *attrset, clientid4 clientid)
5539 {
5540 	struct nfs4_svgetit_arg sarg;
5541 	struct nfs4_ntov_table ntov;
5542 
5543 	bool_t ntov_table_init = FALSE;
5544 	struct statvfs64 sb;
5545 	nfsstat4 status;
5546 	vnode_t *vp;
5547 	vattr_t bva, ava, iva, cva, *vap;
5548 	vnode_t *dvp;
5549 	timespec32_t *mtime;
5550 	char *nm = NULL;
5551 	uint_t buflen;
5552 	bool_t created;
5553 	bool_t setsize = FALSE;
5554 	len_t reqsize;
5555 	int error;
5556 	bool_t trunc;
5557 	caller_context_t ct;
5558 	component4 *component;
5559 
5560 	sarg.sbp = &sb;
5561 
5562 	dvp = cs->vp;
5563 
5564 	/* Check if the file system is read only */
5565 	if (rdonly4(cs->exi, dvp, req))
5566 		return (NFS4ERR_ROFS);
5567 
5568 	/*
5569 	 * Get the last component of path name in nm. cs will reference
5570 	 * the including directory on success.
5571 	 */
5572 	component = &args->open_claim4_u.file;
5573 	if (!utf8_dir_verify(component))
5574 		return (NFS4ERR_INVAL);
5575 
5576 	nm = utf8_to_fn(component, &buflen, NULL);
5577 
5578 	if (nm == NULL)
5579 		return (NFS4ERR_RESOURCE);
5580 
5581 	if (buflen > MAXNAMELEN) {
5582 		kmem_free(nm, buflen);
5583 		return (NFS4ERR_NAMETOOLONG);
5584 	}
5585 
5586 	bva.va_mask = AT_TYPE|AT_CTIME|AT_SEQ;
5587 	error = VOP_GETATTR(dvp, &bva, 0, cs->cr);
5588 	if (error) {
5589 		kmem_free(nm, buflen);
5590 		return (puterrno4(error));
5591 	}
5592 
5593 	if (bva.va_type != VDIR) {
5594 		kmem_free(nm, buflen);
5595 		return (NFS4ERR_NOTDIR);
5596 	}
5597 
5598 	NFS4_SET_FATTR4_CHANGE(cinfo->before, bva.va_ctime)
5599 
5600 	switch (args->mode) {
5601 	case GUARDED4:
5602 		/*FALLTHROUGH*/
5603 	case UNCHECKED4:
5604 		nfs4_ntov_table_init(&ntov);
5605 		ntov_table_init = TRUE;
5606 
5607 		*attrset = 0;
5608 		status = do_rfs4_set_attrs(attrset,
5609 					&args->createhow4_u.createattrs,
5610 					cs, &sarg, &ntov, NFS4ATTR_SETIT);
5611 
5612 		if (status == NFS4_OK && (sarg.vap->va_mask & AT_TYPE) &&
5613 		    sarg.vap->va_type != VREG) {
5614 			if (sarg.vap->va_type == VDIR)
5615 				status = NFS4ERR_ISDIR;
5616 			else if (sarg.vap->va_type == VLNK)
5617 				status = NFS4ERR_SYMLINK;
5618 			else
5619 				status = NFS4ERR_INVAL;
5620 		}
5621 
5622 		if (status != NFS4_OK) {
5623 			kmem_free(nm, buflen);
5624 			nfs4_ntov_table_free(&ntov, &sarg);
5625 			*attrset = 0;
5626 			return (status);
5627 		}
5628 
5629 		vap = sarg.vap;
5630 		vap->va_type = VREG;
5631 		vap->va_mask |= AT_TYPE;
5632 
5633 		if ((vap->va_mask & AT_MODE) == 0) {
5634 			vap->va_mask |= AT_MODE;
5635 			vap->va_mode = (mode_t)0600;
5636 		}
5637 
5638 		if (vap->va_mask & AT_SIZE) {
5639 
5640 			/* Disallow create with a non-zero size */
5641 
5642 			if ((reqsize = sarg.vap->va_size) != 0) {
5643 				kmem_free(nm, buflen);
5644 				nfs4_ntov_table_free(&ntov, &sarg);
5645 				*attrset = 0;
5646 				return (NFS4ERR_INVAL);
5647 			}
5648 			setsize = TRUE;
5649 		}
5650 		break;
5651 
5652 	case EXCLUSIVE4:
5653 		/* prohibit EXCL create of named attributes */
5654 		if (dvp->v_flag & V_XATTRDIR) {
5655 			kmem_free(nm, buflen);
5656 			*attrset = 0;
5657 			return (NFS4ERR_INVAL);
5658 		}
5659 
5660 		cva.va_mask = AT_TYPE | AT_MTIME | AT_MODE;
5661 		cva.va_type = VREG;
5662 		/*
5663 		 * Ensure no time overflows. Assumes underlying
5664 		 * filesystem supports at least 32 bits.
5665 		 * Truncate nsec to usec resolution to allow valid
5666 		 * compares even if the underlying filesystem truncates.
5667 		 */
5668 		mtime = (timespec32_t *)&args->createhow4_u.createverf;
5669 		cva.va_mtime.tv_sec = mtime->tv_sec % TIME32_MAX;
5670 		cva.va_mtime.tv_nsec = (mtime->tv_nsec / 1000) * 1000;
5671 		cva.va_mode = (mode_t)0;
5672 		vap = &cva;
5673 		break;
5674 	}
5675 
5676 	status = create_vnode(dvp, nm, vap, args->mode, mtime,
5677 						cs->cr, &vp, &created);
5678 	kmem_free(nm, buflen);
5679 
5680 	if (status != NFS4_OK) {
5681 		if (ntov_table_init)
5682 			nfs4_ntov_table_free(&ntov, &sarg);
5683 		*attrset = 0;
5684 		return (status);
5685 	}
5686 
5687 	trunc = (setsize && !created);
5688 
5689 	if (args->mode != EXCLUSIVE4) {
5690 		bitmap4 createmask = args->createhow4_u.createattrs.attrmask;
5691 
5692 		/*
5693 		 * True verification that object was created with correct
5694 		 * attrs is impossible.  The attrs could have been changed
5695 		 * immediately after object creation.  If attributes did
5696 		 * not verify, the only recourse for the server is to
5697 		 * destroy the object.  Maybe if some attrs (like gid)
5698 		 * are set incorrectly, the object should be destroyed;
5699 		 * however, seems bad as a default policy.  Do we really
5700 		 * want to destroy an object over one of the times not
5701 		 * verifying correctly?  For these reasons, the server
5702 		 * currently sets bits in attrset for createattrs
5703 		 * that were set; however, no verification is done.
5704 		 *
5705 		 * vmask_to_nmask accounts for vattr bits set on create
5706 		 *	[do_rfs4_set_attrs() only sets resp bits for
5707 		 *	 non-vattr/vfs bits.]
5708 		 * Mask off any bits we set by default so as not to return
5709 		 * more attrset bits than were requested in createattrs
5710 		 */
5711 		if (created) {
5712 			nfs4_vmask_to_nmask(sarg.vap->va_mask, attrset);
5713 			*attrset &= createmask;
5714 		} else {
5715 			/*
5716 			 * We did not create the vnode (we tried but it
5717 			 * already existed).  In this case, the only createattr
5718 			 * that the spec allows the server to set is size,
5719 			 * and even then, it can only be set if it is 0.
5720 			 */
5721 			*attrset = 0;
5722 			if (trunc)
5723 				*attrset = FATTR4_SIZE_MASK;
5724 		}
5725 	}
5726 	if (ntov_table_init)
5727 		nfs4_ntov_table_free(&ntov, &sarg);
5728 
5729 	/*
5730 	 * Get the initial "after" sequence number, if it fails,
5731 	 * set to zero, time to before.
5732 	 */
5733 	iva.va_mask = AT_CTIME|AT_SEQ;
5734 	if (VOP_GETATTR(dvp, &iva, 0, cs->cr)) {
5735 		iva.va_seq = 0;
5736 		iva.va_ctime = bva.va_ctime;
5737 	}
5738 
5739 	/*
5740 	 * create_vnode attempts to create the file exclusive,
5741 	 * if it already exists the VOP_CREATE will fail and
5742 	 * may not increase va_seq. It is atomic if
5743 	 * we haven't changed the directory, but if it has changed
5744 	 * we don't know what changed it.
5745 	 */
5746 	if (!created) {
5747 		if (bva.va_seq && iva.va_seq &&
5748 			bva.va_seq == iva.va_seq)
5749 			cinfo->atomic = TRUE;
5750 		else
5751 			cinfo->atomic = FALSE;
5752 		NFS4_SET_FATTR4_CHANGE(cinfo->after, iva.va_ctime);
5753 	} else {
5754 		/*
5755 		 * The entry was created, we need to sync the
5756 		 * directory metadata.
5757 		 */
5758 		(void) VOP_FSYNC(dvp, 0, cs->cr);
5759 
5760 		/*
5761 		 * Get "after" change value, if it fails, simply return the
5762 		 * before value.
5763 		 */
5764 		ava.va_mask = AT_CTIME|AT_SEQ;
5765 		if (VOP_GETATTR(dvp, &ava, 0, cs->cr)) {
5766 			ava.va_ctime = bva.va_ctime;
5767 			ava.va_seq = 0;
5768 		}
5769 
5770 		NFS4_SET_FATTR4_CHANGE(cinfo->after, ava.va_ctime);
5771 
5772 		/*
5773 		 * The cinfo->atomic = TRUE only if we have
5774 		 * non-zero va_seq's, and it has incremented by exactly one
5775 		 * during the create_vnode and it didn't
5776 		 * change during the VOP_FSYNC.
5777 		 */
5778 		if (bva.va_seq && iva.va_seq && ava.va_seq &&
5779 				iva.va_seq == (bva.va_seq + 1) &&
5780 				iva.va_seq == ava.va_seq)
5781 			cinfo->atomic = TRUE;
5782 		else
5783 			cinfo->atomic = FALSE;
5784 	}
5785 
5786 	/* Check for mandatory locking and that the size gets set. */
5787 	cva.va_mask = AT_MODE;
5788 	if (setsize)
5789 		cva.va_mask |= AT_SIZE;
5790 
5791 	/* Assume the worst */
5792 	cs->mandlock = TRUE;
5793 
5794 	if (VOP_GETATTR(vp, &cva, 0, cs->cr) == 0) {
5795 		cs->mandlock = MANDLOCK(cs->vp, cva.va_mode);
5796 
5797 		/*
5798 		 * Truncate the file if necessary; this would be
5799 		 * the case for create over an existing file.
5800 		 */
5801 
5802 		if (trunc) {
5803 			int in_crit = 0;
5804 			rfs4_file_t *fp;
5805 			bool_t create = FALSE;
5806 
5807 			/*
5808 			 * We are writing over an existing file.
5809 			 * Check to see if we need to recall a delegation.
5810 			 */
5811 			rfs4_hold_deleg_policy();
5812 			if ((fp = rfs4_findfile(vp, NULL, &create)) != NULL) {
5813 				if (rfs4_check_delegated_byfp(FWRITE, fp,
5814 					(reqsize == 0), FALSE, FALSE,
5815 							&clientid)) {
5816 
5817 					rfs4_file_rele(fp);
5818 					rfs4_rele_deleg_policy();
5819 					VN_RELE(vp);
5820 					*attrset = 0;
5821 					return (NFS4ERR_DELAY);
5822 				}
5823 				rfs4_file_rele(fp);
5824 			}
5825 			rfs4_rele_deleg_policy();
5826 
5827 			if (nbl_need_check(vp)) {
5828 				in_crit = 1;
5829 
5830 				ASSERT(reqsize == 0);
5831 
5832 				nbl_start_crit(vp, RW_READER);
5833 				if (nbl_conflict(vp, NBL_WRITE, 0,
5834 						cva.va_size, 0)) {
5835 					in_crit = 0;
5836 					nbl_end_crit(vp);
5837 					VN_RELE(vp);
5838 					*attrset = 0;
5839 					return (NFS4ERR_ACCESS);
5840 				}
5841 			}
5842 			ct.cc_sysid = 0;
5843 			ct.cc_pid = 0;
5844 			ct.cc_caller_id = nfs4_srv_caller_id;
5845 
5846 			cva.va_mask = AT_SIZE;
5847 			cva.va_size = reqsize;
5848 			(void) VOP_SETATTR(vp, &cva, 0, cs->cr, &ct);
5849 			if (in_crit)
5850 				nbl_end_crit(vp);
5851 		}
5852 	}
5853 
5854 	error = makefh4(&cs->fh, vp, cs->exi);
5855 
5856 	/*
5857 	 * Force modified data and metadata out to stable storage.
5858 	 */
5859 	(void) VOP_FSYNC(vp, FNODSYNC, cs->cr);
5860 
5861 	if (error) {
5862 		VN_RELE(vp);
5863 		*attrset = 0;
5864 		return (puterrno4(error));
5865 	}
5866 
5867 	/* if parent dir is attrdir, set namedattr fh flag */
5868 	if (dvp->v_flag & V_XATTRDIR)
5869 		set_fh4_flag(&cs->fh, FH4_NAMEDATTR);
5870 
5871 	if (cs->vp)
5872 		VN_RELE(cs->vp);
5873 
5874 	cs->vp = vp;
5875 
5876 	/*
5877 	 * if we did not create the file, we will need to check
5878 	 * the access bits on the file
5879 	 */
5880 
5881 	if (!created) {
5882 		if (setsize)
5883 			args->share_access |= OPEN4_SHARE_ACCESS_WRITE;
5884 		status = check_open_access(args->share_access, cs, req);
5885 		if (status != NFS4_OK)
5886 			*attrset = 0;
5887 	}
5888 	return (status);
5889 }
5890 
5891 /*ARGSUSED*/
5892 static void
5893 rfs4_do_open(struct compound_state *cs, struct svc_req *req,
5894 		rfs4_openowner_t *oo, delegreq_t deleg,
5895 		uint32_t access, uint32_t deny,
5896 		OPEN4res *resp)
5897 {
5898 	/* XXX Currently not using req  */
5899 	rfs4_state_t *state;
5900 	rfs4_file_t *file;
5901 	bool_t screate = TRUE;
5902 	bool_t fcreate = TRUE;
5903 	uint32_t amodes;
5904 	uint32_t dmodes;
5905 	rfs4_deleg_state_t *dsp;
5906 	struct shrlock shr;
5907 	struct shr_locowner shr_loco;
5908 	sysid_t sysid;
5909 	nfsstat4 status;
5910 	int fflags = 0;
5911 	int recall = 0;
5912 	int err;
5913 
5914 	/* get the file struct and hold a lock on it during initial open */
5915 	file = rfs4_findfile_withlock(cs->vp, &cs->fh, &fcreate);
5916 	if (file == NULL) {
5917 		NFS4_DEBUG(rfs4_debug,
5918 			(CE_NOTE, "rfs4_do_open: can't find file"));
5919 		resp->status = NFS4ERR_SERVERFAULT;
5920 		return;
5921 	}
5922 
5923 	state = rfs4_findstate_by_owner_file(oo, file, &screate);
5924 	if (state == NULL) {
5925 		NFS4_DEBUG(rfs4_debug,
5926 			(CE_NOTE, "rfs4_do_open: can't find state"));
5927 		resp->status = NFS4ERR_RESOURCE;
5928 		/* No need to keep any reference */
5929 		rfs4_file_rele_withunlock(file);
5930 		return;
5931 	}
5932 
5933 	/*
5934 	 * Check for conflicts in deny and access before checking for
5935 	 * conflicts in delegation.  We don't want to recall a
5936 	 * delegation based on an open that will eventually fail based
5937 	 * on shares modes.
5938 	 */
5939 
5940 	shr.s_access = (short)access;
5941 	shr.s_deny = (short)deny;
5942 	shr.s_pid = rfs4_dbe_getid(oo->dbe);
5943 
5944 	if ((status = rfs4_client_sysid(oo->client, &sysid)) != NFS4_OK) {
5945 		resp->status = status;
5946 		rfs4_file_rele(file);
5947 		/* Not a fully formed open; "close" it */
5948 		if (screate == TRUE)
5949 			rfs4_state_close(state, FALSE, FALSE, cs->cr);
5950 		rfs4_state_rele(state);
5951 		return;
5952 	}
5953 	shr.s_sysid = sysid;
5954 	shr_loco.sl_pid = shr.s_pid;
5955 	shr_loco.sl_id = shr.s_sysid;
5956 	shr.s_owner = (caddr_t)&shr_loco;
5957 	shr.s_own_len = sizeof (shr_loco);
5958 
5959 	fflags = 0;
5960 	if (access & OPEN4_SHARE_ACCESS_READ)
5961 		fflags |= FREAD;
5962 	if (access & OPEN4_SHARE_ACCESS_WRITE)
5963 		fflags |= FWRITE;
5964 
5965 	if ((err = vop_shrlock(cs->vp, F_SHARE, &shr, fflags)) != 0) {
5966 
5967 		resp->status = err == EAGAIN ?
5968 			NFS4ERR_SHARE_DENIED : puterrno4(err);
5969 
5970 		rfs4_file_rele(file);
5971 		/* Not a fully formed open; "close" it */
5972 		if (screate == TRUE)
5973 			rfs4_state_close(state, FALSE, FALSE, cs->cr);
5974 		rfs4_state_rele(state);
5975 		return;
5976 	}
5977 
5978 	rfs4_dbe_lock(state->dbe);
5979 	rfs4_dbe_lock(file->dbe);
5980 
5981 	/*
5982 	 * Calculate the new deny and access mode that this open is adding to
5983 	 * the file for this open owner;
5984 	 */
5985 	dmodes = (deny & ~state->share_deny);
5986 	amodes = (access & ~state->share_access);
5987 
5988 	/*
5989 	 * Check to see if this file is delegated and if so, if a
5990 	 * recall needs to be done.
5991 	 */
5992 	if (rfs4_check_recall(state, access)) {
5993 		rfs4_dbe_unlock(file->dbe);
5994 		rfs4_dbe_unlock(state->dbe);
5995 		rfs4_recall_deleg(file, FALSE, state->owner->client);
5996 		delay(NFS4_DELEGATION_CONFLICT_DELAY);
5997 		rfs4_dbe_lock(state->dbe);
5998 		rfs4_dbe_lock(file->dbe);
5999 		/* Let's see if the delegation was returned */
6000 		if (rfs4_check_recall(state, access)) {
6001 			rfs4_dbe_unlock(file->dbe);
6002 			rfs4_dbe_unlock(state->dbe);
6003 			rfs4_file_rele(file);
6004 			rfs4_update_lease(state->owner->client);
6005 			/* recalculate flags to match what was added */
6006 			fflags = 0;
6007 			if (amodes & OPEN4_SHARE_ACCESS_READ)
6008 				fflags |= FREAD;
6009 			if (amodes & OPEN4_SHARE_ACCESS_WRITE)
6010 				fflags |= FWRITE;
6011 			(void) vop_shrlock(cs->vp, F_UNSHARE, &shr, fflags);
6012 			/* Not a fully formed open; "close" it */
6013 			if (screate == TRUE)
6014 				rfs4_state_close(state, FALSE, FALSE, cs->cr);
6015 			rfs4_state_rele(state);
6016 			resp->status = NFS4ERR_DELAY;
6017 			return;
6018 		}
6019 	}
6020 
6021 	if (dmodes & OPEN4_SHARE_DENY_READ)
6022 		file->deny_read++;
6023 	if (dmodes & OPEN4_SHARE_DENY_WRITE)
6024 		file->deny_write++;
6025 	file->share_deny |= deny;
6026 	state->share_deny |= deny;
6027 
6028 	if (amodes & OPEN4_SHARE_ACCESS_READ)
6029 		file->access_read++;
6030 	if (amodes & OPEN4_SHARE_ACCESS_WRITE)
6031 		file->access_write++;
6032 	file->share_access |= access;
6033 	state->share_access |= access;
6034 
6035 	/*
6036 	 * Check for delegation here. if the deleg argument is not
6037 	 * DELEG_ANY, then this is a reclaim from a client and
6038 	 * we must honor the delegation requested. If necessary we can
6039 	 * set the recall flag.
6040 	 */
6041 
6042 	dsp = rfs4_grant_delegation(deleg, state, &recall);
6043 
6044 	cs->deleg = (file->dinfo->dtype == OPEN_DELEGATE_WRITE);
6045 
6046 	next_stateid(&state->stateid);
6047 
6048 	resp->stateid = state->stateid.stateid;
6049 
6050 	rfs4_dbe_unlock(file->dbe);
6051 	rfs4_dbe_unlock(state->dbe);
6052 
6053 	if (dsp) {
6054 		rfs4_set_deleg_response(dsp, &resp->delegation, NULL, recall);
6055 		rfs4_deleg_state_rele(dsp);
6056 	}
6057 
6058 	rfs4_file_rele(file);
6059 	rfs4_state_rele(state);
6060 
6061 	resp->status = NFS4_OK;
6062 }
6063 
6064 /*ARGSUSED*/
6065 static void
6066 rfs4_do_opennull(struct compound_state *cs, struct svc_req *req,
6067 		OPEN4args *args, rfs4_openowner_t *oo, OPEN4res *resp)
6068 {
6069 	change_info4 *cinfo = &resp->cinfo;
6070 	bitmap4 *attrset = &resp->attrset;
6071 
6072 	if (args->opentype == OPEN4_NOCREATE)
6073 		resp->status = rfs4_lookupfile(&args->open_claim4_u.file,
6074 					req, cs, args->share_access, cinfo);
6075 	else {
6076 		/* inhibit delegation grants during exclusive create */
6077 
6078 		if (args->mode == EXCLUSIVE4)
6079 			rfs4_disable_delegation();
6080 
6081 		resp->status = rfs4_createfile(args, req, cs, cinfo, attrset,
6082 					oo->client->clientid);
6083 	}
6084 
6085 	if (resp->status == NFS4_OK) {
6086 
6087 		/* cs->vp cs->fh now reference the desired file */
6088 
6089 		rfs4_do_open(cs, req, oo, DELEG_ANY, args->share_access,
6090 						args->share_deny, resp);
6091 
6092 		/*
6093 		 * If rfs4_createfile set attrset, we must
6094 		 * clear this attrset before the response is copied.
6095 		 */
6096 		if (resp->status != NFS4_OK && resp->attrset) {
6097 			resp->attrset = 0;
6098 		}
6099 	}
6100 	else
6101 		*cs->statusp = resp->status;
6102 
6103 	if (args->mode == EXCLUSIVE4)
6104 		rfs4_enable_delegation();
6105 }
6106 
6107 /*ARGSUSED*/
6108 static void
6109 rfs4_do_openprev(struct compound_state *cs, struct svc_req *req,
6110 		OPEN4args *args, rfs4_openowner_t *oo, OPEN4res *resp)
6111 {
6112 	change_info4 *cinfo = &resp->cinfo;
6113 	vattr_t va;
6114 	vtype_t v_type = cs->vp->v_type;
6115 	int error = 0;
6116 
6117 	/* Verify that we have a regular file */
6118 	if (v_type != VREG) {
6119 		if (v_type == VDIR)
6120 			resp->status = NFS4ERR_ISDIR;
6121 		else if (v_type == VLNK)
6122 			resp->status = NFS4ERR_SYMLINK;
6123 		else
6124 			resp->status = NFS4ERR_INVAL;
6125 		return;
6126 	}
6127 
6128 	va.va_mask = AT_MODE|AT_UID;
6129 	error = VOP_GETATTR(cs->vp, &va, 0, cs->cr);
6130 	if (error) {
6131 		resp->status = puterrno4(error);
6132 		return;
6133 	}
6134 
6135 	cs->mandlock = MANDLOCK(cs->vp, va.va_mode);
6136 
6137 	/*
6138 	 * Check if we have access to the file, Note the the file
6139 	 * could have originally been open UNCHECKED or GUARDED
6140 	 * with mode bits that will now fail, but there is nothing
6141 	 * we can really do about that except in the case that the
6142 	 * owner of the file is the one requesting the open.
6143 	 */
6144 	if (crgetuid(cs->cr) != va.va_uid) {
6145 		resp->status = check_open_access(args->share_access, cs, req);
6146 		if (resp->status != NFS4_OK) {
6147 			return;
6148 		}
6149 	}
6150 
6151 	/*
6152 	 * cinfo on a CLAIM_PREVIOUS is undefined, initialize to zero
6153 	 */
6154 	cinfo->before = 0;
6155 	cinfo->after = 0;
6156 	cinfo->atomic = FALSE;
6157 
6158 	rfs4_do_open(cs, req, oo,
6159 		NFS4_DELEG4TYPE2REQTYPE(args->open_claim4_u.delegate_type),
6160 		args->share_access, args->share_deny, resp);
6161 }
6162 
6163 static void
6164 rfs4_do_opendelcur(struct compound_state *cs, struct svc_req *req,
6165 		OPEN4args *args, rfs4_openowner_t *oo, OPEN4res *resp)
6166 {
6167 	int error;
6168 	nfsstat4 status;
6169 	stateid4 stateid =
6170 			args->open_claim4_u.delegate_cur_info.delegate_stateid;
6171 	rfs4_deleg_state_t *dsp;
6172 
6173 	/*
6174 	 * Find the state info from the stateid and confirm that the
6175 	 * file is delegated.  If the state openowner is the same as
6176 	 * the supplied openowner we're done. If not, get the file
6177 	 * info from the found state info. Use that file info to
6178 	 * create the state for this lock owner. Note solaris doen't
6179 	 * really need the pathname to find the file. We may want to
6180 	 * lookup the pathname and make sure that the vp exist and
6181 	 * matches the vp in the file structure. However it is
6182 	 * possible that the pathname nolonger exists (local process
6183 	 * unlinks the file), so this may not be that useful.
6184 	 */
6185 
6186 	status = rfs4_get_deleg_state(&stateid, &dsp);
6187 	if (status != NFS4_OK) {
6188 		resp->status = status;
6189 		return;
6190 	}
6191 
6192 	ASSERT(dsp->finfo->dinfo->dtype != OPEN_DELEGATE_NONE);
6193 
6194 	/*
6195 	 * New lock owner, create state. Since this was probably called
6196 	 * in response to a CB_RECALL we set deleg to DELEG_NONE
6197 	 */
6198 
6199 	ASSERT(cs->vp != NULL);
6200 	VN_RELE(cs->vp);
6201 	VN_HOLD(dsp->finfo->vp);
6202 	cs->vp = dsp->finfo->vp;
6203 
6204 	if (error = makefh4(&cs->fh, cs->vp, cs->exi)) {
6205 		rfs4_deleg_state_rele(dsp);
6206 		*cs->statusp = resp->status = puterrno4(error);
6207 		return;
6208 	}
6209 
6210 	/* Mark progress for delegation returns */
6211 	dsp->finfo->dinfo->time_lastwrite = gethrestime_sec();
6212 	rfs4_deleg_state_rele(dsp);
6213 	rfs4_do_open(cs, req, oo, DELEG_NONE,
6214 				args->share_access, args->share_deny, resp);
6215 }
6216 
6217 /*ARGSUSED*/
6218 static void
6219 rfs4_do_opendelprev(struct compound_state *cs, struct svc_req *req,
6220 			OPEN4args *args, rfs4_openowner_t *oo, OPEN4res *resp)
6221 {
6222 	/*
6223 	 * Lookup the pathname, it must already exist since this file
6224 	 * was delegated.
6225 	 *
6226 	 * Find the file and state info for this vp and open owner pair.
6227 	 *	check that they are in fact delegated.
6228 	 *	check that the state access and deny modes are the same.
6229 	 *
6230 	 * Return the delgation possibly seting the recall flag.
6231 	 */
6232 	rfs4_file_t *file;
6233 	rfs4_state_t *state;
6234 	bool_t create = FALSE;
6235 	bool_t dcreate = FALSE;
6236 	rfs4_deleg_state_t *dsp;
6237 	nfsace4 *ace;
6238 
6239 
6240 	/* Note we ignore oflags */
6241 	resp->status = rfs4_lookupfile(&args->open_claim4_u.file_delegate_prev,
6242 				req, cs, args->share_access, &resp->cinfo);
6243 
6244 	if (resp->status != NFS4_OK) {
6245 		return;
6246 	}
6247 
6248 	/* get the file struct and hold a lock on it during initial open */
6249 	file = rfs4_findfile_withlock(cs->vp, NULL, &create);
6250 	if (file == NULL) {
6251 		NFS4_DEBUG(rfs4_debug,
6252 			(CE_NOTE, "rfs4_do_opendelprev: can't find file"));
6253 		resp->status = NFS4ERR_SERVERFAULT;
6254 		return;
6255 	}
6256 
6257 	state = rfs4_findstate_by_owner_file(oo, file, &create);
6258 	if (state == NULL) {
6259 		NFS4_DEBUG(rfs4_debug,
6260 			(CE_NOTE, "rfs4_do_opendelprev: can't find state"));
6261 		resp->status = NFS4ERR_SERVERFAULT;
6262 		rfs4_file_rele_withunlock(file);
6263 		return;
6264 	}
6265 
6266 	rfs4_dbe_lock(state->dbe);
6267 	rfs4_dbe_lock(file->dbe);
6268 	if (args->share_access != state->share_access ||
6269 			args->share_deny != state->share_deny ||
6270 			state->finfo->dinfo->dtype == OPEN_DELEGATE_NONE) {
6271 		NFS4_DEBUG(rfs4_debug,
6272 			(CE_NOTE, "rfs4_do_opendelprev: state mixup"));
6273 		rfs4_dbe_unlock(file->dbe);
6274 		rfs4_dbe_unlock(state->dbe);
6275 		rfs4_file_rele(file);
6276 		rfs4_state_rele(state);
6277 		resp->status = NFS4ERR_SERVERFAULT;
6278 		return;
6279 	}
6280 	rfs4_dbe_unlock(file->dbe);
6281 	rfs4_dbe_unlock(state->dbe);
6282 
6283 	dsp = rfs4_finddeleg(state, &dcreate);
6284 	if (dsp == NULL) {
6285 		rfs4_state_rele(state);
6286 		rfs4_file_rele(file);
6287 		resp->status = NFS4ERR_SERVERFAULT;
6288 		return;
6289 	}
6290 
6291 	next_stateid(&state->stateid);
6292 
6293 	resp->stateid = state->stateid.stateid;
6294 
6295 	resp->delegation.delegation_type = dsp->dtype;
6296 
6297 	if (dsp->dtype == OPEN_DELEGATE_READ) {
6298 		open_read_delegation4 *rv =
6299 			&resp->delegation.open_delegation4_u.read;
6300 
6301 		rv->stateid = dsp->delegid.stateid;
6302 		rv->recall = FALSE; /* no policy in place to set to TRUE */
6303 		ace = &rv->permissions;
6304 	} else {
6305 		open_write_delegation4 *rv =
6306 			&resp->delegation.open_delegation4_u.write;
6307 
6308 		rv->stateid = dsp->delegid.stateid;
6309 		rv->recall = FALSE;  /* no policy in place to set to TRUE */
6310 		ace = &rv->permissions;
6311 		rv->space_limit.limitby = NFS_LIMIT_SIZE;
6312 		rv->space_limit.nfs_space_limit4_u.filesize = UINT64_MAX;
6313 	}
6314 
6315 	/* XXX For now */
6316 	ace->type = ACE4_ACCESS_ALLOWED_ACE_TYPE;
6317 	ace->flag = 0;
6318 	ace->access_mask = 0;
6319 	ace->who.utf8string_len = 0;
6320 	ace->who.utf8string_val = 0;
6321 
6322 	rfs4_deleg_state_rele(dsp);
6323 	rfs4_state_rele(state);
6324 	rfs4_file_rele(file);
6325 }
6326 
6327 typedef enum {
6328 	NFS4_CHKSEQ_OKAY = 0,
6329 	NFS4_CHKSEQ_REPLAY = 1,
6330 	NFS4_CHKSEQ_BAD = 2
6331 } rfs4_chkseq_t;
6332 
6333 /*
6334  * Generic function for sequence number checks.
6335  */
6336 static rfs4_chkseq_t
6337 rfs4_check_seqid(seqid4 seqid, nfs_resop4 *lastop,
6338 		seqid4 rqst_seq, nfs_resop4 *resop, bool_t copyres)
6339 {
6340 	/* Same sequence ids and matching operations? */
6341 	if (seqid == rqst_seq && resop->resop == lastop->resop) {
6342 		if (copyres == TRUE) {
6343 			rfs4_free_reply(resop);
6344 			rfs4_copy_reply(resop, lastop);
6345 		}
6346 		NFS4_DEBUG(rfs4_debug, (CE_NOTE,
6347 			"Replayed SEQID %d\n", seqid));
6348 		return (NFS4_CHKSEQ_REPLAY);
6349 	}
6350 
6351 	/* If the incoming sequence is not the next expected then it is bad */
6352 	if (rqst_seq != seqid + 1) {
6353 		if (rqst_seq == seqid) {
6354 			NFS4_DEBUG(rfs4_debug,
6355 				(CE_NOTE, "BAD SEQID: Replayed sequence id "
6356 				"but last op was %d current op is %d\n",
6357 				lastop->resop, resop->resop));
6358 			return (NFS4_CHKSEQ_BAD);
6359 		}
6360 		NFS4_DEBUG(rfs4_debug,
6361 			(CE_NOTE, "BAD SEQID: got %u expecting %u\n",
6362 				rqst_seq, seqid));
6363 		return (NFS4_CHKSEQ_BAD);
6364 	}
6365 
6366 	/* Everything okay -- next expected */
6367 	return (NFS4_CHKSEQ_OKAY);
6368 }
6369 
6370 
6371 static rfs4_chkseq_t
6372 rfs4_check_open_seqid(seqid4 seqid, rfs4_openowner_t *op, nfs_resop4 *resop)
6373 {
6374 	rfs4_chkseq_t rc;
6375 
6376 	rfs4_dbe_lock(op->dbe);
6377 	rc = rfs4_check_seqid(op->open_seqid, op->reply, seqid, resop, TRUE);
6378 	rfs4_dbe_unlock(op->dbe);
6379 
6380 	if (rc == NFS4_CHKSEQ_OKAY)
6381 		rfs4_update_lease(op->client);
6382 
6383 	return (rc);
6384 }
6385 
6386 static rfs4_chkseq_t
6387 rfs4_check_olo_seqid(seqid4 olo_seqid, rfs4_openowner_t *op,
6388 	nfs_resop4 *resop)
6389 {
6390 	rfs4_chkseq_t rc;
6391 
6392 	rfs4_dbe_lock(op->dbe);
6393 	rc = rfs4_check_seqid(op->open_seqid, op->reply,
6394 		olo_seqid, resop, FALSE);
6395 	rfs4_dbe_unlock(op->dbe);
6396 
6397 	return (rc);
6398 }
6399 
6400 static rfs4_chkseq_t
6401 rfs4_check_lock_seqid(seqid4 seqid, rfs4_lo_state_t *lp, nfs_resop4 *resop)
6402 {
6403 	rfs4_chkseq_t rc = NFS4_CHKSEQ_OKAY;
6404 
6405 	rfs4_dbe_lock(lp->dbe);
6406 	if (!lp->skip_seqid_check)
6407 		rc = rfs4_check_seqid(lp->seqid, lp->reply,
6408 			seqid, resop, TRUE);
6409 	rfs4_dbe_unlock(lp->dbe);
6410 
6411 	return (rc);
6412 }
6413 
6414 static void
6415 rfs4_op_open(nfs_argop4 *argop, nfs_resop4 *resop,
6416 	    struct svc_req *req, struct compound_state *cs)
6417 {
6418 	OPEN4args *args = &argop->nfs_argop4_u.opopen;
6419 	OPEN4res *resp = &resop->nfs_resop4_u.opopen;
6420 	open_owner4 *owner = &args->owner;
6421 	open_claim_type4 claim = args->claim;
6422 	rfs4_client_t *cp;
6423 	rfs4_openowner_t *oo;
6424 	bool_t create;
6425 	bool_t replay = FALSE;
6426 	int can_reclaim;
6427 
6428 
6429 	if (cs->vp == NULL) {
6430 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
6431 		return;
6432 	}
6433 
6434 	/*
6435 	 * Need to check clientid and lease expiration first based on
6436 	 * error ordering and incrementing sequence id.
6437 	 */
6438 	cp = rfs4_findclient_by_id(owner->clientid, FALSE);
6439 	if (cp == NULL) {
6440 		*cs->statusp = resp->status =
6441 			rfs4_check_clientid(&owner->clientid, 0);
6442 		return;
6443 	}
6444 
6445 	if (rfs4_lease_expired(cp)) {
6446 		rfs4_client_close(cp);
6447 		*cs->statusp = resp->status = NFS4ERR_EXPIRED;
6448 		return;
6449 	}
6450 	can_reclaim = cp->can_reclaim;
6451 
6452 	/*
6453 	 * Find the open_owner for use from this point forward.  Take
6454 	 * care in updating the sequence id based on the type of error
6455 	 * being returned.
6456 	 */
6457 retry:
6458 	create = TRUE;
6459 	oo = rfs4_findopenowner(owner, &create, args->seqid);
6460 	if (oo == NULL) {
6461 		*cs->statusp = resp->status = NFS4ERR_STALE_CLIENTID;
6462 		rfs4_client_rele(cp);
6463 		return;
6464 	}
6465 
6466 	/* Hold off access to the sequence space while the open is done */
6467 	rfs4_sw_enter(&oo->oo_sw);
6468 
6469 	/*
6470 	 * If the open_owner existed before at the server, then check
6471 	 * the sequence id.
6472 	 */
6473 	if (!create && !oo->postpone_confirm) {
6474 		switch (rfs4_check_open_seqid(args->seqid, oo, resop)) {
6475 		case NFS4_CHKSEQ_BAD:
6476 			if ((args->seqid > oo->open_seqid) &&
6477 				oo->need_confirm) {
6478 				rfs4_free_opens(oo, TRUE, FALSE);
6479 				rfs4_sw_exit(&oo->oo_sw);
6480 				rfs4_openowner_rele(oo);
6481 				goto retry;
6482 			}
6483 			resp->status = NFS4ERR_BAD_SEQID;
6484 			goto out;
6485 		case NFS4_CHKSEQ_REPLAY: /* replay of previous request */
6486 			replay = TRUE;
6487 			goto out;
6488 		default:
6489 			break;
6490 		}
6491 
6492 		/*
6493 		 * Sequence was ok and open owner exists
6494 		 * check to see if we have yet to see an
6495 		 * open_confirm.
6496 		 */
6497 		if (oo->need_confirm) {
6498 			rfs4_free_opens(oo, TRUE, FALSE);
6499 			rfs4_sw_exit(&oo->oo_sw);
6500 			rfs4_openowner_rele(oo);
6501 			goto retry;
6502 		}
6503 	}
6504 	/* Grace only applies to regular-type OPENs */
6505 	if (rfs4_clnt_in_grace(cp) &&
6506 	    (claim == CLAIM_NULL || claim == CLAIM_DELEGATE_CUR)) {
6507 		*cs->statusp = resp->status = NFS4ERR_GRACE;
6508 		goto out;
6509 	}
6510 
6511 	/*
6512 	 * If previous state at the server existed then can_reclaim
6513 	 * will be set. If not reply NFS4ERR_NO_GRACE to the
6514 	 * client.
6515 	 */
6516 	if (rfs4_clnt_in_grace(cp) && claim == CLAIM_PREVIOUS && !can_reclaim) {
6517 		*cs->statusp = resp->status = NFS4ERR_NO_GRACE;
6518 		goto out;
6519 	}
6520 
6521 
6522 	/*
6523 	 * Reject the open if the client has missed the grace period
6524 	 */
6525 	if (!rfs4_clnt_in_grace(cp) && claim == CLAIM_PREVIOUS) {
6526 		*cs->statusp = resp->status = NFS4ERR_NO_GRACE;
6527 		goto out;
6528 	}
6529 
6530 	/* Couple of up-front bookkeeping items */
6531 	if (oo->need_confirm) {
6532 		/*
6533 		 * If this is a reclaim OPEN then we should not ask
6534 		 * for a confirmation of the open_owner per the
6535 		 * protocol specification.
6536 		 */
6537 		if (claim == CLAIM_PREVIOUS)
6538 			oo->need_confirm = FALSE;
6539 		else
6540 			resp->rflags |= OPEN4_RESULT_CONFIRM;
6541 	}
6542 	resp->rflags |= OPEN4_RESULT_LOCKTYPE_POSIX;
6543 
6544 	/*
6545 	 * If there is an unshared filesystem mounted on this vnode,
6546 	 * do not allow to open/create in this directory.
6547 	 */
6548 	if (vn_ismntpt(cs->vp)) {
6549 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
6550 		goto out;
6551 	}
6552 
6553 	/*
6554 	 * access must READ, WRITE, or BOTH.  No access is invalid.
6555 	 * deny can be READ, WRITE, BOTH, or NONE.
6556 	 * bits not defined for access/deny are invalid.
6557 	 */
6558 	if (! (args->share_access & OPEN4_SHARE_ACCESS_BOTH) ||
6559 	    (args->share_access & ~OPEN4_SHARE_ACCESS_BOTH) ||
6560 	    (args->share_deny & ~OPEN4_SHARE_DENY_BOTH)) {
6561 		*cs->statusp = resp->status = NFS4ERR_INVAL;
6562 		goto out;
6563 	}
6564 
6565 
6566 	/*
6567 	 * make sure attrset is zero before response is built.
6568 	 */
6569 	resp->attrset = 0;
6570 
6571 	switch (claim) {
6572 	case CLAIM_NULL:
6573 		rfs4_do_opennull(cs, req, args, oo, resp);
6574 	    break;
6575 	case CLAIM_PREVIOUS:
6576 		rfs4_do_openprev(cs, req, args, oo, resp);
6577 	    break;
6578 	case CLAIM_DELEGATE_CUR:
6579 		rfs4_do_opendelcur(cs, req, args, oo, resp);
6580 	    break;
6581 	case CLAIM_DELEGATE_PREV:
6582 		rfs4_do_opendelprev(cs, req, args, oo, resp);
6583 	    break;
6584 	default:
6585 		resp->status = NFS4ERR_INVAL;
6586 		break;
6587 	}
6588 
6589 out:
6590 	rfs4_client_rele(cp);
6591 
6592 	/* Catch sequence id handling here to make it a little easier */
6593 	switch (resp->status) {
6594 	case NFS4ERR_BADXDR:
6595 	case NFS4ERR_BAD_SEQID:
6596 	case NFS4ERR_BAD_STATEID:
6597 	case NFS4ERR_NOFILEHANDLE:
6598 	case NFS4ERR_RESOURCE:
6599 	case NFS4ERR_STALE_CLIENTID:
6600 	case NFS4ERR_STALE_STATEID:
6601 		/*
6602 		 * The protocol states that if any of these errors are
6603 		 * being returned, the sequence id should not be
6604 		 * incremented.  Any other return requires an
6605 		 * increment.
6606 		 */
6607 		break;
6608 	default:
6609 		/* Always update the lease in this case */
6610 		rfs4_update_lease(oo->client);
6611 
6612 		/* Regular response - copy the result */
6613 		if (!replay)
6614 			rfs4_update_open_resp(oo, resop, &cs->fh);
6615 
6616 		/*
6617 		 * REPLAY case: Only if the previous response was OK
6618 		 * do we copy the filehandle.  If not OK, no
6619 		 * filehandle to copy.
6620 		 */
6621 		if (replay == TRUE &&
6622 		    resp->status == NFS4_OK &&
6623 		    oo->reply_fh.nfs_fh4_val) {
6624 			/*
6625 			 * If this is a replay, we must restore the
6626 			 * current filehandle/vp to that of what was
6627 			 * returned originally.  Try our best to do
6628 			 * it.
6629 			 */
6630 			nfs_fh4_fmt_t *fh_fmtp =
6631 				(nfs_fh4_fmt_t *)oo->reply_fh.nfs_fh4_val;
6632 
6633 			cs->exi = checkexport4(&fh_fmtp->fh4_fsid,
6634 				(fid_t *)&fh_fmtp->fh4_xlen, NULL);
6635 
6636 			if (cs->exi == NULL) {
6637 				resp->status = NFS4ERR_STALE;
6638 				goto finish;
6639 			}
6640 
6641 			VN_RELE(cs->vp);
6642 
6643 			cs->vp = nfs4_fhtovp(&oo->reply_fh, cs->exi,
6644 				&resp->status);
6645 
6646 			if (cs->vp == NULL)
6647 				goto finish;
6648 
6649 			nfs_fh4_copy(&oo->reply_fh, &cs->fh);
6650 		}
6651 
6652 		/*
6653 		 * If this was a replay, no need to update the
6654 		 * sequence id. If the open_owner was not created on
6655 		 * this pass, then update.  The first use of an
6656 		 * open_owner will not bump the sequence id.
6657 		 */
6658 		if (replay == FALSE && !create)
6659 			rfs4_update_open_sequence(oo);
6660 		/*
6661 		 * If the client is receiving an error and the
6662 		 * open_owner needs to be confirmed, there is no way
6663 		 * to notify the client of this fact ignoring the fact
6664 		 * that the server has no method of returning a
6665 		 * stateid to confirm.  Therefore, the server needs to
6666 		 * mark this open_owner in a way as to avoid the
6667 		 * sequence id checking the next time the client uses
6668 		 * this open_owner.
6669 		 */
6670 		if (resp->status != NFS4_OK && oo->need_confirm)
6671 			oo->postpone_confirm = TRUE;
6672 		/*
6673 		 * If OK response then clear the postpone flag and
6674 		 * reset the sequence id to keep in sync with the
6675 		 * client.
6676 		 */
6677 		if (resp->status == NFS4_OK && oo->postpone_confirm) {
6678 			oo->postpone_confirm = FALSE;
6679 			oo->open_seqid = args->seqid;
6680 		}
6681 		break;
6682 	}
6683 
6684 finish:
6685 	*cs->statusp = resp->status;
6686 
6687 	rfs4_sw_exit(&oo->oo_sw);
6688 	rfs4_openowner_rele(oo);
6689 }
6690 
6691 /*ARGSUSED*/
6692 void
6693 rfs4_op_open_confirm(nfs_argop4 *argop, nfs_resop4 *resop,
6694 		    struct svc_req *req, struct compound_state *cs)
6695 {
6696 	OPEN_CONFIRM4args *args = &argop->nfs_argop4_u.opopen_confirm;
6697 	OPEN_CONFIRM4res *resp = &resop->nfs_resop4_u.opopen_confirm;
6698 	rfs4_state_t *sp;
6699 	nfsstat4 status;
6700 
6701 	if (cs->vp == NULL) {
6702 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
6703 		return;
6704 	}
6705 
6706 	status = rfs4_get_state(&args->open_stateid, &sp, RFS4_DBS_VALID);
6707 	if (status != NFS4_OK) {
6708 		*cs->statusp = resp->status = status;
6709 		return;
6710 	}
6711 
6712 	/* Ensure specified filehandle matches */
6713 	if (cs->vp != sp->finfo->vp) {
6714 		rfs4_state_rele(sp);
6715 		*cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
6716 		return;
6717 	}
6718 
6719 	/* hold off other access to open_owner while we tinker */
6720 	rfs4_sw_enter(&sp->owner->oo_sw);
6721 
6722 	switch (rfs4_check_stateid_seqid(sp, &args->open_stateid)) {
6723 	case NFS4_CHECK_STATEID_OKAY:
6724 		if (rfs4_check_open_seqid(args->seqid, sp->owner,
6725 			resop) != 0) {
6726 			*cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
6727 			break;
6728 		}
6729 		/*
6730 		 * If it is the appropriate stateid and determined to
6731 		 * be "OKAY" then this means that the stateid does not
6732 		 * need to be confirmed and the client is in error for
6733 		 * sending an OPEN_CONFIRM.
6734 		 */
6735 		*cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
6736 		break;
6737 	case NFS4_CHECK_STATEID_OLD:
6738 		*cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
6739 		break;
6740 	case NFS4_CHECK_STATEID_BAD:
6741 		*cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
6742 		break;
6743 	case NFS4_CHECK_STATEID_EXPIRED:
6744 		*cs->statusp = resp->status = NFS4ERR_EXPIRED;
6745 		break;
6746 	case NFS4_CHECK_STATEID_CLOSED:
6747 		*cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
6748 		break;
6749 	case NFS4_CHECK_STATEID_REPLAY:
6750 		switch (rfs4_check_open_seqid(args->seqid, sp->owner, resop)) {
6751 		case NFS4_CHKSEQ_OKAY:
6752 			/*
6753 			 * This is replayed stateid; if seqid matches
6754 			 * next expected, then client is using wrong seqid.
6755 			 */
6756 			/* fall through */
6757 		case NFS4_CHKSEQ_BAD:
6758 			*cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
6759 			break;
6760 		case NFS4_CHKSEQ_REPLAY:
6761 			/*
6762 			 * Note this case is the duplicate case so
6763 			 * resp->status is already set.
6764 			 */
6765 			*cs->statusp = resp->status;
6766 			rfs4_update_lease(sp->owner->client);
6767 			break;
6768 		}
6769 		break;
6770 	case NFS4_CHECK_STATEID_UNCONFIRMED:
6771 		if (rfs4_check_open_seqid(args->seqid, sp->owner,
6772 			resop) != NFS4_CHKSEQ_OKAY) {
6773 			*cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
6774 			break;
6775 		}
6776 		*cs->statusp = resp->status = NFS4_OK;
6777 
6778 		next_stateid(&sp->stateid);
6779 		resp->open_stateid = sp->stateid.stateid;
6780 		sp->owner->need_confirm = FALSE;
6781 		rfs4_update_lease(sp->owner->client);
6782 		rfs4_update_open_sequence(sp->owner);
6783 		rfs4_update_open_resp(sp->owner, resop, NULL);
6784 		break;
6785 	default:
6786 		ASSERT(FALSE);
6787 		*cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
6788 		break;
6789 	}
6790 	rfs4_sw_exit(&sp->owner->oo_sw);
6791 	rfs4_state_rele(sp);
6792 }
6793 
6794 /*ARGSUSED*/
6795 void
6796 rfs4_op_open_downgrade(nfs_argop4 *argop, nfs_resop4 *resop,
6797 		    struct svc_req *req, struct compound_state *cs)
6798 {
6799 	OPEN_DOWNGRADE4args *args = &argop->nfs_argop4_u.opopen_downgrade;
6800 	OPEN_DOWNGRADE4res *resp = &resop->nfs_resop4_u.opopen_downgrade;
6801 	uint32_t access = args->share_access;
6802 	uint32_t deny = args->share_deny;
6803 	nfsstat4 status;
6804 	rfs4_state_t *sp;
6805 	rfs4_file_t *fp;
6806 
6807 	if (cs->vp == NULL) {
6808 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
6809 		return;
6810 	}
6811 
6812 	status = rfs4_get_state(&args->open_stateid, &sp, RFS4_DBS_VALID);
6813 	if (status != NFS4_OK) {
6814 		*cs->statusp = resp->status = status;
6815 		return;
6816 	}
6817 
6818 	/* Ensure specified filehandle matches */
6819 	if (cs->vp != sp->finfo->vp) {
6820 		rfs4_state_rele(sp);
6821 		*cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
6822 		return;
6823 	}
6824 
6825 	/* hold off other access to open_owner while we tinker */
6826 	rfs4_sw_enter(&sp->owner->oo_sw);
6827 
6828 	switch (rfs4_check_stateid_seqid(sp, &args->open_stateid)) {
6829 	case NFS4_CHECK_STATEID_OKAY:
6830 		if (rfs4_check_open_seqid(args->seqid, sp->owner,
6831 			resop) != NFS4_CHKSEQ_OKAY) {
6832 			*cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
6833 			goto end;
6834 		}
6835 		break;
6836 	case NFS4_CHECK_STATEID_OLD:
6837 		*cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
6838 		goto end;
6839 	case NFS4_CHECK_STATEID_BAD:
6840 		*cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
6841 		goto end;
6842 	case NFS4_CHECK_STATEID_EXPIRED:
6843 		*cs->statusp = resp->status = NFS4ERR_EXPIRED;
6844 		goto end;
6845 	case NFS4_CHECK_STATEID_CLOSED:
6846 		*cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
6847 		goto end;
6848 	case NFS4_CHECK_STATEID_UNCONFIRMED:
6849 		*cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
6850 		goto end;
6851 	case NFS4_CHECK_STATEID_REPLAY:
6852 		/* Check the sequence id for the open owner */
6853 		switch (rfs4_check_open_seqid(args->seqid, sp->owner, resop)) {
6854 		case NFS4_CHKSEQ_OKAY:
6855 			/*
6856 			 * This is replayed stateid; if seqid matches
6857 			 * next expected, then client is using wrong seqid.
6858 			 */
6859 			/* fall through */
6860 		case NFS4_CHKSEQ_BAD:
6861 			*cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
6862 			goto end;
6863 		case NFS4_CHKSEQ_REPLAY:
6864 			/*
6865 			 * Note this case is the duplicate case so
6866 			 * resp->status is already set.
6867 			 */
6868 			*cs->statusp = resp->status;
6869 			rfs4_update_lease(sp->owner->client);
6870 			goto end;
6871 		}
6872 		break;
6873 	default:
6874 		ASSERT(FALSE);
6875 		break;
6876 	}
6877 
6878 	rfs4_dbe_lock(sp->dbe);
6879 	/*
6880 	 * Check that the new access modes and deny modes are valid.
6881 	 * Check that no invalid bits are set.
6882 	 */
6883 	if ((access & ~(OPEN4_SHARE_ACCESS_READ | OPEN4_SHARE_ACCESS_WRITE)) ||
6884 	    (deny & ~(OPEN4_SHARE_DENY_READ | OPEN4_SHARE_DENY_READ))) {
6885 		*cs->statusp = resp->status = NFS4ERR_INVAL;
6886 		rfs4_update_open_sequence(sp->owner);
6887 		rfs4_dbe_unlock(sp->dbe);
6888 		goto end;
6889 	}
6890 
6891 	/*
6892 	 * The new modes must be a subset of the current modes and
6893 	 * the access must specify at least one mode. To test that
6894 	 * the new mode is a subset of the current modes we bitwise
6895 	 * AND them together and check that the result equals the new
6896 	 * mode. For example:
6897 	 * New mode, access == R and current mode, sp->share_access  == RW
6898 	 * access & sp->share_access == R == access, so the new access mode
6899 	 * is valid. Consider access == RW, sp->share_access = R
6900 	 * access & sp->share_access == R != access, so the new access mode
6901 	 * is invalid.
6902 	 */
6903 	if ((access & sp->share_access) != access ||
6904 	    (deny & sp->share_deny) != deny ||
6905 	    (access &
6906 	    (OPEN4_SHARE_ACCESS_READ | OPEN4_SHARE_ACCESS_WRITE)) == 0) {
6907 		*cs->statusp = resp->status = NFS4ERR_INVAL;
6908 		rfs4_update_open_sequence(sp->owner);
6909 		rfs4_dbe_unlock(sp->dbe);
6910 		goto end;
6911 	}
6912 
6913 	/*
6914 	 * Release any share locks associated with this stateID.
6915 	 * Strictly speaking, this violates the spec because the
6916 	 * spec effectively requires that open downgrade be atomic.
6917 	 * At present, fs_shrlock does not have this capability.
6918 	 */
6919 	rfs4_dbe_unlock(sp->dbe);
6920 	rfs4_unshare(sp);
6921 	rfs4_dbe_lock(sp->dbe);
6922 
6923 	fp = sp->finfo;
6924 	rfs4_dbe_lock(fp->dbe);
6925 
6926 	/*
6927 	 * If the current mode has deny read and the new mode
6928 	 * does not, decrement the number of deny read mode bits
6929 	 * and if it goes to zero turn off the deny read bit
6930 	 * on the file.
6931 	 */
6932 	if ((sp->share_deny & OPEN4_SHARE_DENY_READ) &&
6933 	    (deny & OPEN4_SHARE_DENY_READ) == 0) {
6934 		fp->deny_read--;
6935 		if (fp->deny_read == 0)
6936 			fp->share_deny &= ~OPEN4_SHARE_DENY_READ;
6937 	}
6938 
6939 	/*
6940 	 * If the current mode has deny write and the new mode
6941 	 * does not, decrement the number of deny write mode bits
6942 	 * and if it goes to zero turn off the deny write bit
6943 	 * on the file.
6944 	 */
6945 	if ((sp->share_deny & OPEN4_SHARE_DENY_WRITE) &&
6946 	    (deny & OPEN4_SHARE_DENY_WRITE) == 0) {
6947 		fp->deny_write--;
6948 		if (fp->deny_write == 0)
6949 			fp->share_deny &= ~OPEN4_SHARE_DENY_WRITE;
6950 	}
6951 
6952 	/*
6953 	 * If the current mode has access read and the new mode
6954 	 * does not, decrement the number of access read mode bits
6955 	 * and if it goes to zero turn off the access read bit
6956 	 * on the file.
6957 	 */
6958 	if ((sp->share_access & OPEN4_SHARE_ACCESS_READ) &&
6959 	    (access & OPEN4_SHARE_ACCESS_READ) == 0) {
6960 		fp->access_read--;
6961 		if (fp->access_read == 0)
6962 			fp->share_access &= ~OPEN4_SHARE_ACCESS_READ;
6963 	}
6964 
6965 	/*
6966 	 * If the current mode has access write and the new mode
6967 	 * does not, decrement the number of access write mode bits
6968 	 * and if it goes to zero turn off the access write bit
6969 	 * on the file.
6970 	 */
6971 	if ((sp->share_access & OPEN4_SHARE_ACCESS_WRITE) &&
6972 	    (access & OPEN4_SHARE_ACCESS_WRITE) == 0) {
6973 		fp->access_write--;
6974 		if (fp->access_write == 0)
6975 			fp->share_deny &= ~OPEN4_SHARE_ACCESS_WRITE;
6976 	}
6977 
6978 	/* Set the new access and deny modes */
6979 	sp->share_access = access;
6980 	sp->share_deny = deny;
6981 	/* Check that the file is still accessible */
6982 	ASSERT(fp->share_access);
6983 
6984 	rfs4_dbe_unlock(fp->dbe);
6985 
6986 	rfs4_dbe_unlock(sp->dbe);
6987 	if ((status = rfs4_share(sp)) != NFS4_OK) {
6988 		*cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
6989 		rfs4_update_open_sequence(sp->owner);
6990 		goto end;
6991 	}
6992 
6993 	rfs4_dbe_lock(sp->dbe);
6994 
6995 	/* Update the stateid */
6996 	next_stateid(&sp->stateid);
6997 	resp->open_stateid = sp->stateid.stateid;
6998 
6999 	rfs4_dbe_unlock(sp->dbe);
7000 
7001 	*cs->statusp = resp->status = NFS4_OK;
7002 	/* Update the lease */
7003 	rfs4_update_lease(sp->owner->client);
7004 	/* And the sequence */
7005 	rfs4_update_open_sequence(sp->owner);
7006 	rfs4_update_open_resp(sp->owner, resop, NULL);
7007 
7008 end:
7009 	rfs4_sw_exit(&sp->owner->oo_sw);
7010 	rfs4_state_rele(sp);
7011 }
7012 
7013 /*
7014  * The logic behind this function is detailed in the NFSv4 RFC in the
7015  * SETCLIENTID operation description under IMPLEMENTATION.  Refer to
7016  * that section for explicit guidance to server behavior for
7017  * SETCLIENTID.
7018  */
7019 void
7020 rfs4_op_setclientid(nfs_argop4 *argop, nfs_resop4 *resop,
7021 		    struct svc_req *req, struct compound_state *cs)
7022 {
7023 	SETCLIENTID4args *args = &argop->nfs_argop4_u.opsetclientid;
7024 	SETCLIENTID4res *res = &resop->nfs_resop4_u.opsetclientid;
7025 	rfs4_client_t *cp, *newcp, *cp_confirmed, *cp_unconfirmed;
7026 	bool_t create = TRUE;
7027 	char *addr, *netid;
7028 	int len;
7029 
7030 retry:
7031 	newcp = cp_confirmed = cp_unconfirmed = NULL;
7032 
7033 	/*
7034 	 * In search of an EXISTING client matching the incoming
7035 	 * request to establish a new client identifier at the server
7036 	 */
7037 	create = TRUE;
7038 	cp = rfs4_findclient(&args->client, &create, NULL);
7039 
7040 	/* Should never happen */
7041 	ASSERT(cp != NULL);
7042 
7043 	if (cp == NULL) {
7044 		*cs->statusp = res->status = NFS4ERR_SERVERFAULT;
7045 		return;
7046 	}
7047 
7048 	/*
7049 	 * Easiest case. Client identifier is newly created and is
7050 	 * unconfirmed.  Also note that for this case, no other
7051 	 * entries exist for the client identifier.  Nothing else to
7052 	 * check.  Just setup the response and respond.
7053 	 */
7054 	if (create) {
7055 		*cs->statusp = res->status = NFS4_OK;
7056 		res->SETCLIENTID4res_u.resok4.clientid = cp->clientid;
7057 		res->SETCLIENTID4res_u.resok4.setclientid_confirm =
7058 							cp->confirm_verf;
7059 		/* Setup callback information; CB_NULL confirmation later */
7060 		rfs4_client_setcb(cp, &args->callback, args->callback_ident);
7061 
7062 		rfs4_client_rele(cp);
7063 		return;
7064 	}
7065 
7066 	/*
7067 	 * An existing, confirmed client may exist but it may not have
7068 	 * been active for at least one lease period.  If so, then
7069 	 * "close" the client and create a new client identifier
7070 	 */
7071 	if (rfs4_lease_expired(cp)) {
7072 		rfs4_client_close(cp);
7073 		goto retry;
7074 	}
7075 
7076 	if (cp->need_confirm == TRUE)
7077 		cp_unconfirmed = cp;
7078 	else
7079 		cp_confirmed = cp;
7080 
7081 	cp = NULL;
7082 
7083 	/*
7084 	 * We have a confirmed client, now check for an
7085 	 * unconfimred entry
7086 	 */
7087 	if (cp_confirmed) {
7088 		/* If creds don't match then client identifier is inuse */
7089 		if (!creds_ok(cp_confirmed->cr_set, req, cs)) {
7090 			rfs4_cbinfo_t *cbp;
7091 			/*
7092 			 * Some one else has established this client
7093 			 * id. Try and say * who they are. We will use
7094 			 * the call back address supplied by * the
7095 			 * first client.
7096 			 */
7097 			*cs->statusp = res->status = NFS4ERR_CLID_INUSE;
7098 
7099 			addr = netid = NULL;
7100 
7101 			cbp = &cp_confirmed->cbinfo;
7102 			if (cbp->cb_callback.cb_location.r_addr &&
7103 			    cbp->cb_callback.cb_location.r_netid) {
7104 				cb_client4 *cbcp = &cbp->cb_callback;
7105 
7106 				len = strlen(cbcp->cb_location.r_addr)+1;
7107 				addr = kmem_alloc(len, KM_SLEEP);
7108 				bcopy(cbcp->cb_location.r_addr, addr, len);
7109 				len = strlen(cbcp->cb_location.r_netid)+1;
7110 				netid = kmem_alloc(len, KM_SLEEP);
7111 				bcopy(cbcp->cb_location.r_netid, netid, len);
7112 			}
7113 
7114 			res->SETCLIENTID4res_u.client_using.r_addr = addr;
7115 			res->SETCLIENTID4res_u.client_using.r_netid = netid;
7116 
7117 			rfs4_client_rele(cp_confirmed);
7118 		}
7119 
7120 		/*
7121 		 * Confirmed, creds match, and verifier matches; must
7122 		 * be an update of the callback info
7123 		 */
7124 		if (cp_confirmed->nfs_client.verifier ==
7125 						args->client.verifier) {
7126 			/* Setup callback information */
7127 			rfs4_client_setcb(cp_confirmed, &args->callback,
7128 						args->callback_ident);
7129 
7130 			/* everything okay -- move ahead */
7131 			*cs->statusp = res->status = NFS4_OK;
7132 			res->SETCLIENTID4res_u.resok4.clientid =
7133 				cp_confirmed->clientid;
7134 
7135 			/* update the confirm_verifier and return it */
7136 			rfs4_client_scv_next(cp_confirmed);
7137 			res->SETCLIENTID4res_u.resok4.setclientid_confirm =
7138 						cp_confirmed->confirm_verf;
7139 
7140 			rfs4_client_rele(cp_confirmed);
7141 			return;
7142 		}
7143 
7144 		/*
7145 		 * Creds match but the verifier doesn't.  Must search
7146 		 * for an unconfirmed client that would be replaced by
7147 		 * this request.
7148 		 */
7149 		create = FALSE;
7150 		cp_unconfirmed = rfs4_findclient(&args->client, &create,
7151 						cp_confirmed);
7152 	}
7153 
7154 	/*
7155 	 * At this point, we have taken care of the brand new client
7156 	 * struct, INUSE case, update of an existing, and confirmed
7157 	 * client struct.
7158 	 */
7159 
7160 	/*
7161 	 * check to see if things have changed while we originally
7162 	 * picked up the client struct.  If they have, then return and
7163 	 * retry the processing of this SETCLIENTID request.
7164 	 */
7165 	if (cp_unconfirmed) {
7166 		rfs4_dbe_lock(cp_unconfirmed->dbe);
7167 		if (!cp_unconfirmed->need_confirm) {
7168 			rfs4_dbe_unlock(cp_unconfirmed->dbe);
7169 			rfs4_client_rele(cp_unconfirmed);
7170 			if (cp_confirmed)
7171 				rfs4_client_rele(cp_confirmed);
7172 			goto retry;
7173 		}
7174 		/* do away with the old unconfirmed one */
7175 		rfs4_dbe_invalidate(cp_unconfirmed->dbe);
7176 		rfs4_dbe_unlock(cp_unconfirmed->dbe);
7177 		rfs4_client_rele(cp_unconfirmed);
7178 		cp_unconfirmed = NULL;
7179 	}
7180 
7181 	/*
7182 	 * This search will temporarily hide the confirmed client
7183 	 * struct while a new client struct is created as the
7184 	 * unconfirmed one.
7185 	 */
7186 	create = TRUE;
7187 	newcp = rfs4_findclient(&args->client, &create, cp_confirmed);
7188 
7189 	ASSERT(newcp != NULL);
7190 
7191 	if (newcp == NULL) {
7192 		*cs->statusp = res->status = NFS4ERR_SERVERFAULT;
7193 		rfs4_client_rele(cp_confirmed);
7194 		return;
7195 	}
7196 
7197 	/*
7198 	 * If one was not created, then a similar request must be in
7199 	 * process so release and start over with this one
7200 	 */
7201 	if (create != TRUE) {
7202 		rfs4_client_rele(newcp);
7203 		if (cp_confirmed)
7204 			rfs4_client_rele(cp_confirmed);
7205 		goto retry;
7206 	}
7207 
7208 	*cs->statusp = res->status = NFS4_OK;
7209 	res->SETCLIENTID4res_u.resok4.clientid = newcp->clientid;
7210 	res->SETCLIENTID4res_u.resok4.setclientid_confirm =
7211 							newcp->confirm_verf;
7212 	/* Setup callback information; CB_NULL confirmation later */
7213 	rfs4_client_setcb(newcp, &args->callback,
7214 				args->callback_ident);
7215 
7216 	newcp->cp_confirmed = cp_confirmed;
7217 
7218 	rfs4_client_rele(newcp);
7219 }
7220 
7221 /*ARGSUSED*/
7222 void
7223 rfs4_op_setclientid_confirm(nfs_argop4 *argop, nfs_resop4 *resop,
7224 			    struct svc_req *req, struct compound_state *cs)
7225 {
7226 	SETCLIENTID_CONFIRM4args *args =
7227 		&argop->nfs_argop4_u.opsetclientid_confirm;
7228 	SETCLIENTID_CONFIRM4res *res =
7229 		&resop->nfs_resop4_u.opsetclientid_confirm;
7230 	rfs4_client_t *cp, *cptoclose = NULL;
7231 
7232 	*cs->statusp = res->status = NFS4_OK;
7233 
7234 	cp = rfs4_findclient_by_id(args->clientid, TRUE);
7235 
7236 	if (cp == NULL) {
7237 		*cs->statusp = res->status =
7238 			rfs4_check_clientid(&args->clientid, 1);
7239 		return;
7240 	}
7241 
7242 	if (!creds_ok(cp, req, cs)) {
7243 		*cs->statusp = res->status = NFS4ERR_CLID_INUSE;
7244 		rfs4_client_rele(cp);
7245 		return;
7246 	}
7247 
7248 	/* If the verifier doesn't match, the record doesn't match */
7249 	if (cp->confirm_verf != args->setclientid_confirm) {
7250 		*cs->statusp = res->status = NFS4ERR_STALE_CLIENTID;
7251 		rfs4_client_rele(cp);
7252 		return;
7253 	}
7254 
7255 	rfs4_dbe_lock(cp->dbe);
7256 	cp->need_confirm = FALSE;
7257 	if (cp->cp_confirmed) {
7258 		cptoclose = cp->cp_confirmed;
7259 		cptoclose->ss_remove = 1;
7260 		cp->cp_confirmed = NULL;
7261 	}
7262 
7263 	/*
7264 	 * Record clientid in stable storage
7265 	 */
7266 	rfs4_ss_clid(cp, req);
7267 
7268 	rfs4_dbe_unlock(cp->dbe);
7269 
7270 	if (cptoclose)
7271 		/* don't need to rele, client_close does it */
7272 		rfs4_client_close(cptoclose);
7273 
7274 	/* If needed, initiate CB_NULL call for callback path */
7275 	rfs4_deleg_cb_check(cp);
7276 	rfs4_update_lease(cp);
7277 
7278 	/*
7279 	 * Update the client's associated server instance, if it's changed
7280 	 * since the client was created.
7281 	 */
7282 	if (rfs4_servinst(cp) != rfs4_cur_servinst)
7283 		rfs4_servinst_assign(cp, rfs4_cur_servinst);
7284 
7285 	/*
7286 	 * Check to see if client can perform reclaims
7287 	 */
7288 	rfs4_ss_chkclid(cp);
7289 
7290 	rfs4_client_rele(cp);
7291 }
7292 
7293 
7294 /*ARGSUSED*/
7295 void
7296 rfs4_op_close(nfs_argop4 *argop, nfs_resop4 *resop,
7297 	    struct svc_req *req, struct compound_state *cs)
7298 {
7299 	/* XXX Currently not using req arg */
7300 	CLOSE4args *args = &argop->nfs_argop4_u.opclose;
7301 	CLOSE4res *resp = &resop->nfs_resop4_u.opclose;
7302 	rfs4_state_t *sp;
7303 	nfsstat4 status;
7304 
7305 	if (cs->vp == NULL) {
7306 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
7307 		return;
7308 	}
7309 
7310 	status = rfs4_get_state(&args->open_stateid, &sp, RFS4_DBS_INVALID);
7311 	if (status != NFS4_OK) {
7312 		*cs->statusp = resp->status = status;
7313 		return;
7314 	}
7315 
7316 	/* Ensure specified filehandle matches */
7317 	if (cs->vp != sp->finfo->vp) {
7318 		rfs4_state_rele(sp);
7319 		*cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7320 		return;
7321 	}
7322 
7323 	/* hold off other access to open_owner while we tinker */
7324 	rfs4_sw_enter(&sp->owner->oo_sw);
7325 
7326 	switch (rfs4_check_stateid_seqid(sp, &args->open_stateid)) {
7327 	case NFS4_CHECK_STATEID_OKAY:
7328 		if (rfs4_check_open_seqid(args->seqid, sp->owner,
7329 			resop) != NFS4_CHKSEQ_OKAY) {
7330 			*cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
7331 			goto end;
7332 		}
7333 		break;
7334 	case NFS4_CHECK_STATEID_OLD:
7335 		*cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
7336 		goto end;
7337 	case NFS4_CHECK_STATEID_BAD:
7338 		*cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7339 		goto end;
7340 	case NFS4_CHECK_STATEID_EXPIRED:
7341 		*cs->statusp = resp->status = NFS4ERR_EXPIRED;
7342 		goto end;
7343 	case NFS4_CHECK_STATEID_CLOSED:
7344 		*cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
7345 		goto end;
7346 	case NFS4_CHECK_STATEID_UNCONFIRMED:
7347 		*cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7348 		goto end;
7349 	case NFS4_CHECK_STATEID_REPLAY:
7350 		/* Check the sequence id for the open owner */
7351 		switch (rfs4_check_open_seqid(args->seqid, sp->owner, resop)) {
7352 		case NFS4_CHKSEQ_OKAY:
7353 			/*
7354 			 * This is replayed stateid; if seqid matches
7355 			 * next expected, then client is using wrong seqid.
7356 			 */
7357 			/* FALL THROUGH */
7358 		case NFS4_CHKSEQ_BAD:
7359 			*cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
7360 			goto end;
7361 		case NFS4_CHKSEQ_REPLAY:
7362 			/*
7363 			 * Note this case is the duplicate case so
7364 			 * resp->status is already set.
7365 			 */
7366 			*cs->statusp = resp->status;
7367 			rfs4_update_lease(sp->owner->client);
7368 			goto end;
7369 		}
7370 		break;
7371 	default:
7372 		ASSERT(FALSE);
7373 		break;
7374 	}
7375 
7376 	rfs4_dbe_lock(sp->dbe);
7377 
7378 	/* Update the stateid. */
7379 	next_stateid(&sp->stateid);
7380 	resp->open_stateid = sp->stateid.stateid;
7381 
7382 	rfs4_dbe_unlock(sp->dbe);
7383 
7384 	rfs4_update_lease(sp->owner->client);
7385 	rfs4_update_open_sequence(sp->owner);
7386 	rfs4_update_open_resp(sp->owner, resop, NULL);
7387 
7388 	rfs4_state_close(sp, FALSE, FALSE, cs->cr);
7389 
7390 	*cs->statusp = resp->status = status;
7391 
7392 end:
7393 	rfs4_sw_exit(&sp->owner->oo_sw);
7394 	rfs4_state_rele(sp);
7395 }
7396 
7397 /*
7398  * Manage the counts on the file struct and close all file locks
7399  */
7400 /*ARGSUSED*/
7401 void
7402 rfs4_release_share_lock_state(rfs4_state_t *sp, cred_t *cr,
7403 	bool_t close_of_client)
7404 {
7405 	rfs4_file_t *fp = sp->finfo;
7406 	rfs4_lo_state_t *lsp;
7407 	struct shrlock shr;
7408 	struct shr_locowner shr_loco;
7409 	int fflags, s_access, s_deny;
7410 
7411 	fflags = s_access = s_deny = 0;
7412 	/*
7413 	 * Decrement the count for each access and deny bit that this
7414 	 * state has contributed to the file. If the file counts go to zero
7415 	 * clear the appropriate bit in the appropriate mask.
7416 	 */
7417 
7418 	if (sp->share_access & OPEN4_SHARE_ACCESS_READ) {
7419 		fp->access_read--;
7420 		fflags |= FREAD;
7421 		s_access |= F_RDACC;
7422 		if (fp->access_read == 0)
7423 			fp->share_access &= ~OPEN4_SHARE_ACCESS_READ;
7424 	}
7425 	if (sp->share_access & OPEN4_SHARE_ACCESS_WRITE) {
7426 		fp->access_write--;
7427 		fflags |= FWRITE;
7428 		s_access |= F_WRACC;
7429 		if (fp->access_write == 0)
7430 			fp->share_access &= ~OPEN4_SHARE_ACCESS_WRITE;
7431 	}
7432 	if (sp->share_deny & OPEN4_SHARE_DENY_READ) {
7433 		fp->deny_read--;
7434 		s_deny |= F_RDDNY;
7435 		if (fp->deny_read == 0)
7436 			fp->share_deny &= ~OPEN4_SHARE_DENY_READ;
7437 	}
7438 	if (sp->share_deny & OPEN4_SHARE_DENY_WRITE) {
7439 		fp->deny_write--;
7440 		s_deny |= F_WRDNY;
7441 		if (fp->deny_write == 0)
7442 			fp->share_deny &= ~OPEN4_SHARE_DENY_WRITE;
7443 	}
7444 
7445 	/*
7446 	 * If this call is part of the larger closing down of client
7447 	 * state then it is just easier to release all locks
7448 	 * associated with this client instead of going through each
7449 	 * individual file and cleaning locks there.
7450 	 */
7451 	if (close_of_client) {
7452 		if (sp->owner->client->unlksys_completed == FALSE &&
7453 		    sp->lockownerlist.next->lsp != NULL &&
7454 			sp->owner->client->sysidt != LM_NOSYSID) {
7455 			/* Is the PxFS kernel module loaded? */
7456 			if (lm_remove_file_locks != NULL) {
7457 				int new_sysid;
7458 
7459 				/* Encode the cluster nodeid in new sysid */
7460 				new_sysid = sp->owner->client->sysidt;
7461 				lm_set_nlmid_flk(&new_sysid);
7462 
7463 				/*
7464 				 * This PxFS routine removes file locks for a
7465 				 * client over all nodes of a cluster.
7466 				 */
7467 				NFS4_DEBUG(rfs4_debug, (CE_NOTE,
7468 				    "lm_remove_file_locks(sysid=0x%x)\n",
7469 				    new_sysid));
7470 				(*lm_remove_file_locks)(new_sysid);
7471 			} else {
7472 				struct flock64 flk;
7473 
7474 				/* Release all locks for this client */
7475 				flk.l_type = F_UNLKSYS;
7476 				flk.l_whence = 0;
7477 				flk.l_start = 0;
7478 				flk.l_len = 0;
7479 				flk.l_sysid = sp->owner->client->sysidt;
7480 				flk.l_pid = 0;
7481 				(void) VOP_FRLOCK(sp->finfo->vp, F_SETLK, &flk,
7482 				    F_REMOTELOCK | FREAD | FWRITE,
7483 				    (u_offset_t)0, NULL, CRED());
7484 			}
7485 
7486 			sp->owner->client->unlksys_completed = TRUE;
7487 		}
7488 	}
7489 
7490 	/*
7491 	 * Release all locks on this file by this lock owner or at
7492 	 * least mark the locks as having been released
7493 	 */
7494 	for (lsp = sp->lockownerlist.next->lsp; lsp != NULL;
7495 		lsp = lsp->lockownerlist.next->lsp) {
7496 
7497 		lsp->locks_cleaned = TRUE;
7498 
7499 		/* Was this already taken care of above? */
7500 		if (!close_of_client &&
7501 		    sp->owner->client->sysidt != LM_NOSYSID)
7502 			(void) cleanlocks(sp->finfo->vp, lsp->locker->pid,
7503 				lsp->locker->client->sysidt);
7504 	}
7505 
7506 	/*
7507 	 * Release any shrlocks associated with this open state ID.
7508 	 * This must be done before the rfs4_state gets marked closed.
7509 	 */
7510 	if (sp->owner->client->sysidt != LM_NOSYSID) {
7511 		shr.s_access = s_access;
7512 		shr.s_deny = s_deny;
7513 		shr.s_pid = rfs4_dbe_getid(sp->owner->dbe);
7514 		shr.s_sysid = sp->owner->client->sysidt;
7515 		shr_loco.sl_pid = shr.s_pid;
7516 		shr_loco.sl_id = shr.s_sysid;
7517 		shr.s_owner = (caddr_t)&shr_loco;
7518 		shr.s_own_len = sizeof (shr_loco);
7519 		(void) vop_shrlock(sp->finfo->vp, F_UNSHARE, &shr, fflags);
7520 	}
7521 }
7522 
7523 /*
7524  * lock_denied: Fill in a LOCK4deneid structure given an flock64 structure.
7525  */
7526 static nfsstat4
7527 lock_denied(LOCK4denied *dp, struct flock64 *flk)
7528 {
7529 	rfs4_lockowner_t *lo;
7530 	rfs4_client_t *cp;
7531 	uint32_t len;
7532 
7533 	lo = rfs4_findlockowner_by_pid(flk->l_pid);
7534 	if (lo != NULL) {
7535 		cp = lo->client;
7536 		if (rfs4_lease_expired(cp)) {
7537 			rfs4_lockowner_rele(lo);
7538 			rfs4_dbe_hold(cp->dbe);
7539 			rfs4_client_close(cp);
7540 			return (NFS4ERR_EXPIRED);
7541 		}
7542 		dp->owner.clientid = lo->owner.clientid;
7543 		len = lo->owner.owner_len;
7544 		dp->owner.owner_val = kmem_alloc(len, KM_SLEEP);
7545 		bcopy(lo->owner.owner_val, dp->owner.owner_val, len);
7546 		dp->owner.owner_len = len;
7547 		rfs4_lockowner_rele(lo);
7548 		goto finish;
7549 	}
7550 
7551 	/*
7552 	 * Its not a NFS4 lock. We take advantage that the upper 32 bits
7553 	 * of the client id contain the boot time for a NFS4 lock. So we
7554 	 * fabricate and identity by setting clientid to the sysid, and
7555 	 * the lock owner to the pid.
7556 	 */
7557 	dp->owner.clientid = flk->l_sysid;
7558 	len = sizeof (pid_t);
7559 	dp->owner.owner_len = len;
7560 	dp->owner.owner_val = kmem_alloc(len, KM_SLEEP);
7561 	bcopy(&flk->l_pid, dp->owner.owner_val, len);
7562 finish:
7563 	dp->offset = flk->l_start;
7564 	dp->length = flk->l_len;
7565 
7566 	if (flk->l_type == F_RDLCK)
7567 		dp->locktype = READ_LT;
7568 	else if (flk->l_type == F_WRLCK)
7569 		dp->locktype = WRITE_LT;
7570 	else
7571 		return (NFS4ERR_INVAL);	/* no mapping from POSIX ltype to v4 */
7572 
7573 	return (NFS4_OK);
7574 }
7575 
7576 static int
7577 setlock(vnode_t *vp, struct flock64 *flock, int flag, cred_t *cred)
7578 {
7579 	int error;
7580 	struct flock64 flk;
7581 	int i;
7582 	clock_t delaytime;
7583 
7584 retry:
7585 	delaytime = MSEC_TO_TICK_ROUNDUP(rfs4_lock_delay);
7586 
7587 	for (i = 0; i < rfs4_maxlock_tries; i++) {
7588 		LOCK_PRINT(rfs4_debug, "setlock", F_SETLK, flock);
7589 		error = VOP_FRLOCK(vp, F_SETLK,
7590 				flock, flag, (u_offset_t)0, NULL, cred);
7591 
7592 		if (error != EAGAIN && error != EACCES)
7593 			break;
7594 
7595 		if (i < rfs4_maxlock_tries - 1) {
7596 			delay(delaytime);
7597 			delaytime *= 2;
7598 		}
7599 	}
7600 
7601 	if (error == EAGAIN || error == EACCES) {
7602 		/* Get the owner of the lock */
7603 		flk = *flock;
7604 		LOCK_PRINT(rfs4_debug, "setlock", F_GETLK, &flk);
7605 		if (VOP_FRLOCK(vp, F_GETLK,
7606 			    &flk,  flag, (u_offset_t)0, NULL, cred) == 0) {
7607 			if (flk.l_type == F_UNLCK) {
7608 				/* No longer locked, retry */
7609 				goto retry;
7610 			}
7611 			*flock = flk;
7612 			LOCK_PRINT(rfs4_debug, "setlock(blocking lock)",
7613 				F_GETLK, &flk);
7614 		}
7615 	}
7616 
7617 	return (error);
7618 }
7619 
7620 /*ARGSUSED*/
7621 static nfsstat4
7622 rfs4_do_lock(rfs4_lo_state_t *lp, nfs_lock_type4 locktype,
7623 	    seqid4 seqid, offset4 offset,
7624 	    length4 length, cred_t *cred, nfs_resop4 *resop)
7625 {
7626 	nfsstat4 status;
7627 	rfs4_lockowner_t *lo = lp->locker;
7628 	rfs4_state_t *sp = lp->state;
7629 	struct flock64 flock;
7630 	int16_t ltype;
7631 	int flag;
7632 	int error;
7633 	sysid_t sysid;
7634 	LOCK4res *lres;
7635 
7636 	if (rfs4_lease_expired(lo->client)) {
7637 		return (NFS4ERR_EXPIRED);
7638 	}
7639 
7640 	if ((status = rfs4_client_sysid(lo->client, &sysid)) != NFS4_OK)
7641 		return (status);
7642 
7643 	/* Check for zero length. To lock to end of file use all ones for V4 */
7644 	if (length == 0)
7645 		return (NFS4ERR_INVAL);
7646 	else if (length == (length4)(~0))
7647 		length = 0;		/* Posix to end of file  */
7648 
7649 retry:
7650 	rfs4_dbe_lock(sp->dbe);
7651 
7652 
7653 	if (resop->resop != OP_LOCKU) {
7654 		switch (locktype) {
7655 		case READ_LT:
7656 		case READW_LT:
7657 			if ((sp->share_access
7658 			    & OPEN4_SHARE_ACCESS_READ) == 0) {
7659 				rfs4_dbe_unlock(sp->dbe);
7660 
7661 				return (NFS4ERR_OPENMODE);
7662 			}
7663 			ltype = F_RDLCK;
7664 			break;
7665 		case WRITE_LT:
7666 		case WRITEW_LT:
7667 			if ((sp->share_access
7668 			    & OPEN4_SHARE_ACCESS_WRITE) == 0) {
7669 				rfs4_dbe_unlock(sp->dbe);
7670 
7671 				return (NFS4ERR_OPENMODE);
7672 			}
7673 			ltype = F_WRLCK;
7674 			break;
7675 		}
7676 	} else
7677 		ltype = F_UNLCK;
7678 
7679 	flock.l_type = ltype;
7680 	flock.l_whence = 0;		/* SEEK_SET */
7681 	flock.l_start = offset;
7682 	flock.l_len = length;
7683 	flock.l_sysid = sysid;
7684 	flock.l_pid = lp->locker->pid;
7685 
7686 	/* Note that length4 is uint64_t but l_len and l_start are off64_t */
7687 	if (flock.l_len < 0 || flock.l_start < 0) {
7688 		rfs4_dbe_unlock(sp->dbe);
7689 		return (NFS4ERR_INVAL);
7690 	}
7691 
7692 	/*
7693 	 * N.B. FREAD has the same value as OPEN4_SHARE_ACCESS_READ and
7694 	 * FWRITE has the same value as OPEN4_SHARE_ACCESS_WRITE.
7695 	 */
7696 	flag = (int)sp->share_access | F_REMOTELOCK;
7697 
7698 	error = setlock(sp->finfo->vp, &flock, flag, cred);
7699 	if (error == 0) {
7700 		rfs4_dbe_lock(lp->dbe);
7701 		next_stateid(&lp->lockid);
7702 		rfs4_dbe_unlock(lp->dbe);
7703 	}
7704 
7705 	rfs4_dbe_unlock(sp->dbe);
7706 
7707 	/*
7708 	 * N.B. We map error values to nfsv4 errors. This is differrent
7709 	 * than puterrno4 routine.
7710 	 */
7711 	switch (error) {
7712 	case 0:
7713 		status = NFS4_OK;
7714 		break;
7715 	case EAGAIN:
7716 	case EACCES:		/* Old value */
7717 		/* Can only get here if op is OP_LOCK */
7718 		ASSERT(resop->resop == OP_LOCK);
7719 		lres = &resop->nfs_resop4_u.oplock;
7720 		status = NFS4ERR_DENIED;
7721 		if (lock_denied(&lres->LOCK4res_u.denied, &flock)
7722 			== NFS4ERR_EXPIRED)
7723 			goto retry;
7724 		break;
7725 	case ENOLCK:
7726 		status = NFS4ERR_DELAY;
7727 		break;
7728 	case EOVERFLOW:
7729 		status = NFS4ERR_INVAL;
7730 		break;
7731 	case EINVAL:
7732 		status = NFS4ERR_NOTSUPP;
7733 		break;
7734 	default:
7735 		cmn_err(CE_WARN, "rfs4_do_lock: unexpected errno (%d)",
7736 			error);
7737 		status = NFS4ERR_SERVERFAULT;
7738 		break;
7739 	}
7740 
7741 	return (status);
7742 }
7743 
7744 /*ARGSUSED*/
7745 void
7746 rfs4_op_lock(nfs_argop4 *argop, nfs_resop4 *resop,
7747 	    struct svc_req *req, struct compound_state *cs)
7748 {
7749 	/* XXX Currently not using req arg */
7750 	LOCK4args *args = &argop->nfs_argop4_u.oplock;
7751 	LOCK4res *resp = &resop->nfs_resop4_u.oplock;
7752 	nfsstat4 status;
7753 	stateid4 *stateid;
7754 	rfs4_lockowner_t *lo;
7755 	rfs4_client_t *cp;
7756 	rfs4_state_t *sp = NULL;
7757 	rfs4_lo_state_t *lsp = NULL;
7758 	bool_t ls_sw_held = FALSE;
7759 	bool_t create = TRUE;
7760 	bool_t lcreate = TRUE;
7761 	bool_t dup_lock = FALSE;
7762 	int rc;
7763 
7764 	if (cs->vp == NULL) {
7765 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
7766 		return;
7767 	}
7768 
7769 	if (args->locker.new_lock_owner) {
7770 		/* Create a new lockowner for this instance */
7771 		open_to_lock_owner4 *olo = &args->locker.locker4_u.open_owner;
7772 
7773 		NFS4_DEBUG(rfs4_debug, (CE_NOTE, "Creating new lock owner"));
7774 
7775 		stateid = &olo->open_stateid;
7776 		status = rfs4_get_state(stateid, &sp, RFS4_DBS_VALID);
7777 		if (status != NFS4_OK) {
7778 			NFS4_DEBUG(rfs4_debug,
7779 				(CE_NOTE, "Get state failed in lock %d",
7780 				status));
7781 			*cs->statusp = resp->status = status;
7782 			return;
7783 		}
7784 
7785 		/* Ensure specified filehandle matches */
7786 		if (cs->vp != sp->finfo->vp) {
7787 			rfs4_state_rele(sp);
7788 			*cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7789 			return;
7790 		}
7791 
7792 		/* hold off other access to open_owner while we tinker */
7793 		rfs4_sw_enter(&sp->owner->oo_sw);
7794 
7795 		switch (rc = rfs4_check_stateid_seqid(sp, stateid)) {
7796 		case NFS4_CHECK_STATEID_OLD:
7797 			*cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
7798 			goto end;
7799 		case NFS4_CHECK_STATEID_BAD:
7800 			*cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7801 			goto end;
7802 		case NFS4_CHECK_STATEID_EXPIRED:
7803 			*cs->statusp = resp->status = NFS4ERR_EXPIRED;
7804 			goto end;
7805 		case NFS4_CHECK_STATEID_UNCONFIRMED:
7806 			*cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7807 			goto end;
7808 		case NFS4_CHECK_STATEID_CLOSED:
7809 			*cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
7810 			goto end;
7811 		case NFS4_CHECK_STATEID_OKAY:
7812 		case NFS4_CHECK_STATEID_REPLAY:
7813 			switch (rfs4_check_olo_seqid(olo->open_seqid,
7814 				sp->owner, resop)) {
7815 			case NFS4_CHKSEQ_OKAY:
7816 				if (rc == NFS4_CHECK_STATEID_OKAY)
7817 					break;
7818 				/*
7819 				 * This is replayed stateid; if seqid
7820 				 * matches next expected, then client
7821 				 * is using wrong seqid.
7822 				 */
7823 				/* FALLTHROUGH */
7824 			case NFS4_CHKSEQ_BAD:
7825 				*cs->statusp = resp->status =
7826 					NFS4ERR_BAD_SEQID;
7827 				goto end;
7828 			case NFS4_CHKSEQ_REPLAY:
7829 				/* This is a duplicate LOCK request */
7830 				dup_lock = TRUE;
7831 
7832 				/*
7833 				 * For a duplicate we do not want to
7834 				 * create a new lockowner as it should
7835 				 * already exist.
7836 				 * Turn off the lockowner create flag.
7837 				 */
7838 				lcreate = FALSE;
7839 			}
7840 			break;
7841 		}
7842 
7843 		lo = rfs4_findlockowner(&olo->lock_owner, &lcreate);
7844 		if (lo == NULL) {
7845 			NFS4_DEBUG(rfs4_debug,
7846 				(CE_NOTE, "rfs4_op_lock: no lock owner"));
7847 			*cs->statusp = resp->status = NFS4ERR_RESOURCE;
7848 			goto end;
7849 		}
7850 
7851 		lsp = rfs4_findlo_state_by_owner(lo, sp, &create);
7852 		if (lsp == NULL) {
7853 			rfs4_update_lease(sp->owner->client);
7854 			/*
7855 			 * Only update theh open_seqid if this is not
7856 			 * a duplicate request
7857 			 */
7858 			if (dup_lock == FALSE) {
7859 				rfs4_update_open_sequence(sp->owner);
7860 			}
7861 
7862 			NFS4_DEBUG(rfs4_debug,
7863 				(CE_NOTE, "rfs4_op_lock: no state"));
7864 			*cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
7865 			rfs4_update_open_resp(sp->owner, resop, NULL);
7866 			rfs4_lockowner_rele(lo);
7867 			goto end;
7868 		}
7869 
7870 		/*
7871 		 * This is the new_lock_owner branch and the client is
7872 		 * supposed to be associating a new lock_owner with
7873 		 * the open file at this point.  If we find that a
7874 		 * lock_owner/state association already exists and a
7875 		 * successful LOCK request was returned to the client,
7876 		 * an error is returned to the client since this is
7877 		 * not appropriate.  The client should be using the
7878 		 * existing lock_owner branch.
7879 		 */
7880 		if (dup_lock == FALSE && create == FALSE) {
7881 			if (lsp->lock_completed == TRUE) {
7882 				*cs->statusp =
7883 					resp->status = NFS4ERR_BAD_SEQID;
7884 				rfs4_lockowner_rele(lo);
7885 				goto end;
7886 			}
7887 		}
7888 
7889 		rfs4_update_lease(sp->owner->client);
7890 
7891 		/*
7892 		 * Only update theh open_seqid if this is not
7893 		 * a duplicate request
7894 		 */
7895 		if (dup_lock == FALSE) {
7896 			rfs4_update_open_sequence(sp->owner);
7897 		}
7898 
7899 		/*
7900 		 * If this is a duplicate lock request, just copy the
7901 		 * previously saved reply and return.
7902 		 */
7903 		if (dup_lock == TRUE) {
7904 			/* verify that lock_seqid's match */
7905 			if (lsp->seqid != olo->lock_seqid) {
7906 				NFS4_DEBUG(rfs4_debug,
7907 				(CE_NOTE, "rfs4_op_lock: Dup-Lock seqid bad"
7908 				"lsp->seqid=%d old->seqid=%d",
7909 				lsp->seqid, olo->lock_seqid));
7910 				*cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
7911 			} else {
7912 				rfs4_copy_reply(resop, lsp->reply);
7913 				/*
7914 				 * Make sure to copy the just
7915 				 * retrieved reply status into the
7916 				 * overall compound status
7917 				 */
7918 				*cs->statusp = resp->status;
7919 			}
7920 			rfs4_lockowner_rele(lo);
7921 			goto end;
7922 		}
7923 
7924 		rfs4_dbe_lock(lsp->dbe);
7925 
7926 		/* Make sure to update the lock sequence id */
7927 		lsp->seqid = olo->lock_seqid;
7928 
7929 		NFS4_DEBUG(rfs4_debug,
7930 			(CE_NOTE, "Lock seqid established as %d", lsp->seqid));
7931 
7932 		/*
7933 		 * This is used to signify the newly created lockowner
7934 		 * stateid and its sequence number.  The checks for
7935 		 * sequence number and increment don't occur on the
7936 		 * very first lock request for a lockowner.
7937 		 */
7938 		lsp->skip_seqid_check = TRUE;
7939 
7940 		/* hold off other access to lsp while we tinker */
7941 		rfs4_sw_enter(&lsp->ls_sw);
7942 		ls_sw_held = TRUE;
7943 
7944 		rfs4_dbe_unlock(lsp->dbe);
7945 
7946 		rfs4_lockowner_rele(lo);
7947 	} else {
7948 		stateid = &args->locker.locker4_u.lock_owner.lock_stateid;
7949 		/* get lsp and hold the lock on the underlying file struct */
7950 		if ((status = rfs4_get_lo_state(stateid, &lsp, TRUE))
7951 		    != NFS4_OK) {
7952 			*cs->statusp = resp->status = status;
7953 			return;
7954 		}
7955 		create = FALSE;	/* We didn't create lsp */
7956 
7957 		/* Ensure specified filehandle matches */
7958 		if (cs->vp != lsp->state->finfo->vp) {
7959 			rfs4_lo_state_rele(lsp, TRUE);
7960 			*cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7961 			return;
7962 		}
7963 
7964 		/* hold off other access to lsp while we tinker */
7965 		rfs4_sw_enter(&lsp->ls_sw);
7966 		ls_sw_held = TRUE;
7967 
7968 		switch (rfs4_check_lo_stateid_seqid(lsp, stateid)) {
7969 		/*
7970 		 * The stateid looks like it was okay (expected to be
7971 		 * the next one)
7972 		 */
7973 		case NFS4_CHECK_STATEID_OKAY:
7974 			/*
7975 			 * The sequence id is now checked.  Determine
7976 			 * if this is a replay or if it is in the
7977 			 * expected (next) sequence.  In the case of a
7978 			 * replay, there are two replay conditions
7979 			 * that may occur.  The first is the normal
7980 			 * condition where a LOCK is done with a
7981 			 * NFS4_OK response and the stateid is
7982 			 * updated.  That case is handled below when
7983 			 * the stateid is identified as a REPLAY.  The
7984 			 * second is the case where an error is
7985 			 * returned, like NFS4ERR_DENIED, and the
7986 			 * sequence number is updated but the stateid
7987 			 * is not updated.  This second case is dealt
7988 			 * with here.  So it may seem odd that the
7989 			 * stateid is okay but the sequence id is a
7990 			 * replay but it is okay.
7991 			 */
7992 			switch (rfs4_check_lock_seqid(
7993 				args->locker.locker4_u.lock_owner.lock_seqid,
7994 				lsp, resop)) {
7995 			case NFS4_CHKSEQ_REPLAY:
7996 				if (resp->status != NFS4_OK) {
7997 					/*
7998 					 * Here is our replay and need
7999 					 * to verify that the last
8000 					 * response was an error.
8001 					 */
8002 					*cs->statusp = resp->status;
8003 					goto end;
8004 				}
8005 				/*
8006 				 * This is done since the sequence id
8007 				 * looked like a replay but it didn't
8008 				 * pass our check so a BAD_SEQID is
8009 				 * returned as a result.
8010 				 */
8011 				/*FALLTHROUGH*/
8012 			case NFS4_CHKSEQ_BAD:
8013 				*cs->statusp = resp->status =
8014 					NFS4ERR_BAD_SEQID;
8015 				goto end;
8016 			case NFS4_CHKSEQ_OKAY:
8017 				/* Everything looks okay move ahead */
8018 				break;
8019 			}
8020 			break;
8021 		case NFS4_CHECK_STATEID_OLD:
8022 			*cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
8023 			goto end;
8024 		case NFS4_CHECK_STATEID_BAD:
8025 			*cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8026 			goto end;
8027 		case NFS4_CHECK_STATEID_EXPIRED:
8028 			*cs->statusp = resp->status = NFS4ERR_EXPIRED;
8029 			goto end;
8030 		case NFS4_CHECK_STATEID_CLOSED:
8031 			*cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
8032 			goto end;
8033 		case NFS4_CHECK_STATEID_REPLAY:
8034 			switch (rfs4_check_lock_seqid(
8035 				args->locker.locker4_u.lock_owner.lock_seqid,
8036 				lsp, resop)) {
8037 			case NFS4_CHKSEQ_OKAY:
8038 				/*
8039 				 * This is a replayed stateid; if
8040 				 * seqid matches the next expected,
8041 				 * then client is using wrong seqid.
8042 				 */
8043 			case NFS4_CHKSEQ_BAD:
8044 				*cs->statusp = resp->status =
8045 					NFS4ERR_BAD_SEQID;
8046 				goto end;
8047 			case NFS4_CHKSEQ_REPLAY:
8048 				rfs4_update_lease(lsp->locker->client);
8049 				*cs->statusp = status = resp->status;
8050 				goto end;
8051 			}
8052 			break;
8053 		default:
8054 			ASSERT(FALSE);
8055 			break;
8056 		}
8057 
8058 		rfs4_update_lock_sequence(lsp);
8059 		rfs4_update_lease(lsp->locker->client);
8060 	}
8061 
8062 	/*
8063 	 * NFS4 only allows locking on regular files, so
8064 	 * verify type of object.
8065 	 */
8066 	if (cs->vp->v_type != VREG) {
8067 		if (cs->vp->v_type == VDIR)
8068 			status = NFS4ERR_ISDIR;
8069 		else
8070 			status = NFS4ERR_INVAL;
8071 		goto out;
8072 	}
8073 
8074 	cp = lsp->state->owner->client;
8075 
8076 	if (rfs4_clnt_in_grace(cp) && !args->reclaim) {
8077 		status = NFS4ERR_GRACE;
8078 		goto out;
8079 	}
8080 
8081 	if (rfs4_clnt_in_grace(cp) && args->reclaim && !cp->can_reclaim) {
8082 		status = NFS4ERR_NO_GRACE;
8083 		goto out;
8084 	}
8085 
8086 	if (!rfs4_clnt_in_grace(cp) && args->reclaim) {
8087 		status = NFS4ERR_NO_GRACE;
8088 		goto out;
8089 	}
8090 
8091 	if (lsp->state->finfo->dinfo->dtype == OPEN_DELEGATE_WRITE)
8092 		cs->deleg = TRUE;
8093 
8094 	status = rfs4_do_lock(lsp, args->locktype,
8095 				args->locker.locker4_u.lock_owner.lock_seqid,
8096 				args->offset,
8097 				args->length, cs->cr, resop);
8098 
8099 out:
8100 	lsp->skip_seqid_check = FALSE;
8101 
8102 	*cs->statusp = resp->status = status;
8103 
8104 	if (status == NFS4_OK) {
8105 		resp->LOCK4res_u.lock_stateid = lsp->lockid.stateid;
8106 		lsp->lock_completed = TRUE;
8107 	}
8108 	/*
8109 	 * Only update the "OPEN" response here if this was a new
8110 	 * lock_owner
8111 	 */
8112 	if (sp)
8113 		rfs4_update_open_resp(sp->owner, resop, NULL);
8114 
8115 	rfs4_update_lock_resp(lsp, resop);
8116 
8117 end:
8118 	if (lsp) {
8119 		if (ls_sw_held)
8120 			rfs4_sw_exit(&lsp->ls_sw);
8121 		/*
8122 		 * If an sp obtained, then the lsp does not represent
8123 		 * a lock on the file struct.
8124 		 */
8125 		if (sp != NULL)
8126 			rfs4_lo_state_rele(lsp, FALSE);
8127 		else
8128 			rfs4_lo_state_rele(lsp, TRUE);
8129 	}
8130 	if (sp) {
8131 		rfs4_sw_exit(&sp->owner->oo_sw);
8132 		rfs4_state_rele(sp);
8133 	}
8134 }
8135 
8136 /* free function for LOCK/LOCKT */
8137 static void
8138 lock_denied_free(nfs_resop4 *resop)
8139 {
8140 	LOCK4denied *dp = NULL;
8141 
8142 	switch (resop->resop) {
8143 	case OP_LOCK:
8144 		if (resop->nfs_resop4_u.oplock.status == NFS4ERR_DENIED)
8145 			dp = &resop->nfs_resop4_u.oplock.LOCK4res_u.denied;
8146 		break;
8147 	case OP_LOCKT:
8148 		if (resop->nfs_resop4_u.oplockt.status == NFS4ERR_DENIED)
8149 			dp = &resop->nfs_resop4_u.oplockt.denied;
8150 		break;
8151 	default:
8152 		break;
8153 	}
8154 
8155 	if (dp)
8156 		kmem_free(dp->owner.owner_val, dp->owner.owner_len);
8157 }
8158 
8159 /*ARGSUSED*/
8160 void
8161 rfs4_op_locku(nfs_argop4 *argop, nfs_resop4 *resop,
8162 	    struct svc_req *req, struct compound_state *cs)
8163 {
8164 	/* XXX Currently not using req arg */
8165 	LOCKU4args *args = &argop->nfs_argop4_u.oplocku;
8166 	LOCKU4res *resp = &resop->nfs_resop4_u.oplocku;
8167 	nfsstat4 status;
8168 	stateid4 *stateid = &args->lock_stateid;
8169 	rfs4_lo_state_t *lsp;
8170 
8171 	if (cs->vp == NULL) {
8172 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
8173 		return;
8174 	}
8175 
8176 	if ((status = rfs4_get_lo_state(stateid, &lsp, TRUE)) != NFS4_OK) {
8177 		*cs->statusp = resp->status = status;
8178 		return;
8179 	}
8180 
8181 	/* Ensure specified filehandle matches */
8182 	if (cs->vp != lsp->state->finfo->vp) {
8183 		rfs4_lo_state_rele(lsp, TRUE);
8184 		*cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8185 		return;
8186 	}
8187 
8188 	/* hold off other access to lsp while we tinker */
8189 	rfs4_sw_enter(&lsp->ls_sw);
8190 
8191 	switch (rfs4_check_lo_stateid_seqid(lsp, stateid)) {
8192 	case NFS4_CHECK_STATEID_OKAY:
8193 		if (rfs4_check_lock_seqid(args->seqid, lsp, resop)
8194 		    != NFS4_CHKSEQ_OKAY) {
8195 			*cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
8196 			goto end;
8197 		}
8198 		break;
8199 	case NFS4_CHECK_STATEID_OLD:
8200 		*cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
8201 		goto end;
8202 	case NFS4_CHECK_STATEID_BAD:
8203 		*cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8204 		goto end;
8205 	case NFS4_CHECK_STATEID_EXPIRED:
8206 		*cs->statusp = resp->status = NFS4ERR_EXPIRED;
8207 		goto end;
8208 	case NFS4_CHECK_STATEID_CLOSED:
8209 		*cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
8210 		goto end;
8211 	case NFS4_CHECK_STATEID_REPLAY:
8212 		switch (rfs4_check_lock_seqid(args->seqid, lsp, resop)) {
8213 		case NFS4_CHKSEQ_OKAY:
8214 				/*
8215 				 * This is a replayed stateid; if
8216 				 * seqid matches the next expected,
8217 				 * then client is using wrong seqid.
8218 				 */
8219 		case NFS4_CHKSEQ_BAD:
8220 			*cs->statusp = resp->status =
8221 				NFS4ERR_BAD_SEQID;
8222 			goto end;
8223 		case NFS4_CHKSEQ_REPLAY:
8224 			rfs4_update_lease(lsp->locker->client);
8225 			*cs->statusp = status = resp->status;
8226 			goto end;
8227 		}
8228 		break;
8229 	default:
8230 		ASSERT(FALSE);
8231 		break;
8232 	}
8233 
8234 	rfs4_update_lock_sequence(lsp);
8235 	rfs4_update_lease(lsp->locker->client);
8236 
8237 	/*
8238 	 * NFS4 only allows locking on regular files, so
8239 	 * verify type of object.
8240 	 */
8241 	if (cs->vp->v_type != VREG) {
8242 		if (cs->vp->v_type == VDIR)
8243 			status = NFS4ERR_ISDIR;
8244 		else
8245 			status = NFS4ERR_INVAL;
8246 		goto out;
8247 	}
8248 
8249 	if (rfs4_clnt_in_grace(lsp->state->owner->client)) {
8250 		status = NFS4ERR_GRACE;
8251 		goto out;
8252 	}
8253 
8254 	status = rfs4_do_lock(lsp, args->locktype,
8255 			    args->seqid, args->offset,
8256 			    args->length, cs->cr, resop);
8257 
8258 out:
8259 	*cs->statusp = resp->status = status;
8260 
8261 	if (status == NFS4_OK)
8262 		resp->lock_stateid = lsp->lockid.stateid;
8263 
8264 	rfs4_update_lock_resp(lsp, resop);
8265 
8266 end:
8267 	rfs4_sw_exit(&lsp->ls_sw);
8268 	rfs4_lo_state_rele(lsp, TRUE);
8269 }
8270 
8271 /*
8272  * LOCKT is a best effort routine, the client can not be guaranteed that
8273  * the status return is still in effect by the time the reply is received.
8274  * They are numerous race conditions in this routine, but we are not required
8275  * and can not be accurate.
8276  */
8277 /*ARGSUSED*/
8278 void
8279 rfs4_op_lockt(nfs_argop4 *argop, nfs_resop4 *resop,
8280 	    struct svc_req *req, struct compound_state *cs)
8281 {
8282 	LOCKT4args *args = &argop->nfs_argop4_u.oplockt;
8283 	LOCKT4res *resp = &resop->nfs_resop4_u.oplockt;
8284 	rfs4_lockowner_t *lo;
8285 	rfs4_client_t *cp;
8286 	bool_t create = FALSE;
8287 	struct flock64 flk;
8288 	int error;
8289 	int flag = FREAD | FWRITE;
8290 	int ltype;
8291 	length4 posix_length;
8292 	sysid_t sysid;
8293 	pid_t pid;
8294 
8295 	if (cs->vp == NULL) {
8296 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
8297 		return;
8298 	}
8299 
8300 	/*
8301 	 * NFS4 only allows locking on regular files, so
8302 	 * verify type of object.
8303 	 */
8304 	if (cs->vp->v_type != VREG) {
8305 		if (cs->vp->v_type == VDIR)
8306 			*cs->statusp = resp->status = NFS4ERR_ISDIR;
8307 		else
8308 			*cs->statusp = resp->status =  NFS4ERR_INVAL;
8309 		return;
8310 	}
8311 
8312 	/*
8313 	 * Check out the clientid to ensure the server knows about it
8314 	 * so that we correctly inform the client of a server reboot.
8315 	 */
8316 	if ((cp = rfs4_findclient_by_id(args->owner.clientid, FALSE))
8317 	    == NULL) {
8318 		*cs->statusp = resp->status =
8319 			rfs4_check_clientid(&args->owner.clientid, 0);
8320 		return;
8321 	}
8322 	if (rfs4_lease_expired(cp)) {
8323 		rfs4_client_close(cp);
8324 		/*
8325 		 * Protocol doesn't allow returning NFS4ERR_STALE as
8326 		 * other operations do on this check so STALE_CLIENTID
8327 		 * is returned instead
8328 		 */
8329 		*cs->statusp = resp->status = NFS4ERR_STALE_CLIENTID;
8330 		return;
8331 	}
8332 
8333 	if (rfs4_clnt_in_grace(cp)) {
8334 		*cs->statusp = resp->status = NFS4ERR_GRACE;
8335 		return;
8336 	}
8337 	rfs4_client_rele(cp);
8338 
8339 	resp->status = NFS4_OK;
8340 
8341 	switch (args->locktype) {
8342 	case READ_LT:
8343 	case READW_LT:
8344 		ltype = F_RDLCK;
8345 		break;
8346 	case WRITE_LT:
8347 	case WRITEW_LT:
8348 		ltype = F_WRLCK;
8349 		break;
8350 	}
8351 
8352 	posix_length = args->length;
8353 	/* Check for zero length. To lock to end of file use all ones for V4 */
8354 	if (posix_length == 0) {
8355 		*cs->statusp = resp->status = NFS4ERR_INVAL;
8356 		return;
8357 	} else if (posix_length == (length4)(~0)) {
8358 		posix_length = 0;	/* Posix to end of file  */
8359 	}
8360 
8361 	/* Find or create a lockowner */
8362 	lo = rfs4_findlockowner(&args->owner, &create);
8363 
8364 	if (lo) {
8365 		pid = lo->pid;
8366 		if ((resp->status =
8367 			rfs4_client_sysid(lo->client, &sysid)) != NFS4_OK)
8368 		goto out;
8369 	} else {
8370 		pid = 0;
8371 		sysid = lockt_sysid;
8372 	}
8373 retry:
8374 	flk.l_type = ltype;
8375 	flk.l_whence = 0;		/* SEEK_SET */
8376 	flk.l_start = args->offset;
8377 	flk.l_len = posix_length;
8378 	flk.l_sysid = sysid;
8379 	flk.l_pid = pid;
8380 	flag |= F_REMOTELOCK;
8381 
8382 	LOCK_PRINT(rfs4_debug, "rfs4_op_lockt", F_GETLK, &flk);
8383 
8384 	/* Note that length4 is uint64_t but l_len and l_start are off64_t */
8385 	if (flk.l_len < 0 || flk.l_start < 0) {
8386 		resp->status = NFS4ERR_INVAL;
8387 		goto out;
8388 	}
8389 	error = VOP_FRLOCK(cs->vp, F_GETLK, &flk, flag, (u_offset_t)0,
8390 	    NULL, cs->cr);
8391 
8392 	/*
8393 	 * N.B. We map error values to nfsv4 errors. This is differrent
8394 	 * than puterrno4 routine.
8395 	 */
8396 	switch (error) {
8397 	case 0:
8398 		if (flk.l_type == F_UNLCK)
8399 			resp->status = NFS4_OK;
8400 		else {
8401 			if (lock_denied(&resp->denied, &flk) == NFS4ERR_EXPIRED)
8402 				goto retry;
8403 			resp->status = NFS4ERR_DENIED;
8404 		}
8405 		break;
8406 	case EOVERFLOW:
8407 		resp->status = NFS4ERR_INVAL;
8408 		break;
8409 	case EINVAL:
8410 		resp->status = NFS4ERR_NOTSUPP;
8411 		break;
8412 	default:
8413 		cmn_err(CE_WARN, "rfs4_op_lockt: unexpected errno (%d)",
8414 			error);
8415 		resp->status = NFS4ERR_SERVERFAULT;
8416 		break;
8417 	}
8418 
8419 out:
8420 	if (lo)
8421 		rfs4_lockowner_rele(lo);
8422 	*cs->statusp = resp->status;
8423 }
8424 
8425 static int
8426 vop_shrlock(vnode_t *vp, int cmd, struct shrlock *sp, int fflags)
8427 {
8428 	int err;
8429 
8430 	if (cmd == F_UNSHARE && sp->s_deny == 0 && sp->s_access == 0)
8431 		return (0);
8432 
8433 	err = VOP_SHRLOCK(vp, cmd, sp, fflags, CRED());
8434 
8435 	NFS4_DEBUG(rfs4_shrlock_debug,
8436 		(CE_NOTE, "rfs4_shrlock %s vp=%p acc=%d dny=%d sysid=%d "
8437 		"pid=%d err=%d\n", cmd == F_SHARE ? "SHARE" : "UNSHR",
8438 		(void *) vp, sp->s_access, sp->s_deny, sp->s_sysid, sp->s_pid,
8439 		err));
8440 
8441 	return (err);
8442 }
8443 
8444 static int
8445 rfs4_shrlock(rfs4_state_t *sp, int cmd)
8446 {
8447 	struct shrlock shr;
8448 	struct shr_locowner shr_loco;
8449 	int fflags;
8450 
8451 	fflags = shr.s_access = shr.s_deny = 0;
8452 
8453 	if (sp->share_access & OPEN4_SHARE_ACCESS_READ) {
8454 		fflags |= FREAD;
8455 		shr.s_access |= F_RDACC;
8456 	}
8457 	if (sp->share_access & OPEN4_SHARE_ACCESS_WRITE) {
8458 		fflags |= FWRITE;
8459 		shr.s_access |= F_WRACC;
8460 	}
8461 	if (sp->share_deny & OPEN4_SHARE_DENY_READ)
8462 		shr.s_deny |= F_RDDNY;
8463 	if (sp->share_deny & OPEN4_SHARE_DENY_WRITE)
8464 		shr.s_deny |= F_WRDNY;
8465 
8466 	shr.s_pid = rfs4_dbe_getid(sp->owner->dbe);
8467 	shr.s_sysid = sp->owner->client->sysidt;
8468 	shr_loco.sl_pid = shr.s_pid;
8469 	shr_loco.sl_id = shr.s_sysid;
8470 	shr.s_owner = (caddr_t)&shr_loco;
8471 	shr.s_own_len = sizeof (shr_loco);
8472 	return (vop_shrlock(sp->finfo->vp, cmd, &shr, fflags));
8473 }
8474 
8475 static int
8476 rfs4_share(rfs4_state_t *sp)
8477 {
8478 	return (rfs4_shrlock(sp, F_SHARE));
8479 }
8480 
8481 void
8482 rfs4_unshare(rfs4_state_t *sp)
8483 {
8484 	(void) rfs4_shrlock(sp, F_UNSHARE);
8485 }
8486