xref: /titanic_41/usr/src/uts/common/fs/nfs/nfs4_srv.c (revision 99ebb4ca412cb0a19d77a3899a87c055b9c30fa8)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 /*
27  *	Copyright (c) 1983,1984,1985,1986,1987,1988,1989  AT&T.
28  *	All Rights Reserved
29  */
30 
31 #pragma ident	"%Z%%M%	%I%	%E% SMI"
32 
33 #include <sys/param.h>
34 #include <sys/types.h>
35 #include <sys/systm.h>
36 #include <sys/cred.h>
37 #include <sys/buf.h>
38 #include <sys/vfs.h>
39 #include <sys/vnode.h>
40 #include <sys/uio.h>
41 #include <sys/errno.h>
42 #include <sys/sysmacros.h>
43 #include <sys/statvfs.h>
44 #include <sys/kmem.h>
45 #include <sys/dirent.h>
46 #include <sys/cmn_err.h>
47 #include <sys/debug.h>
48 #include <sys/systeminfo.h>
49 #include <sys/flock.h>
50 #include <sys/pathname.h>
51 #include <sys/nbmlock.h>
52 #include <sys/share.h>
53 #include <sys/atomic.h>
54 #include <sys/policy.h>
55 #include <sys/fem.h>
56 #include <sys/sdt.h>
57 #include <sys/ddi.h>
58 
59 #include <rpc/types.h>
60 #include <rpc/auth.h>
61 #include <rpc/rpcsec_gss.h>
62 #include <rpc/svc.h>
63 
64 #include <nfs/nfs.h>
65 #include <nfs/export.h>
66 #include <nfs/lm.h>
67 #include <nfs/nfs4.h>
68 
69 #include <sys/strsubr.h>
70 #include <sys/strsun.h>
71 
72 #include <inet/common.h>
73 #include <inet/ip.h>
74 #include <inet/ip6.h>
75 
76 #include <sys/tsol/label.h>
77 #include <sys/tsol/tndb.h>
78 
79 #define	RFS4_MAXLOCK_TRIES 4	/* Try to get the lock this many times */
80 static int rfs4_maxlock_tries = RFS4_MAXLOCK_TRIES;
81 #define	RFS4_LOCK_DELAY 10	/* Milliseconds */
82 static clock_t rfs4_lock_delay = RFS4_LOCK_DELAY;
83 
84 /* End of Tunables */
85 
86 /*
87  * Used to bump the stateid4.seqid value and show changes in the stateid
88  */
89 #define	next_stateid(sp) (++(sp)->bits.chgseq)
90 
91 /*
92  * RFS4_MINLEN_ENTRY4: XDR-encoded size of smallest possible dirent.
93  *	This is used to return NFS4ERR_TOOSMALL when clients specify
94  *	maxcount that isn't large enough to hold the smallest possible
95  *	XDR encoded dirent.
96  *
97  *	    sizeof cookie (8 bytes) +
98  *	    sizeof name_len (4 bytes) +
99  *	    sizeof smallest (padded) name (4 bytes) +
100  *	    sizeof bitmap4_len (12 bytes) +   NOTE: we always encode len=2 bm4
101  *	    sizeof attrlist4_len (4 bytes) +
102  *	    sizeof next boolean (4 bytes)
103  *
104  * RFS4_MINLEN_RDDIR4: XDR-encoded size of READDIR op reply containing
105  * the smallest possible entry4 (assumes no attrs requested).
106  *	sizeof nfsstat4 (4 bytes) +
107  *	sizeof verifier4 (8 bytes) +
108  *	sizeof entry4list bool (4 bytes) +
109  *	sizeof entry4 	(36 bytes) +
110  *	sizeof eof bool  (4 bytes)
111  *
112  * RFS4_MINLEN_RDDIR_BUF: minimum length of buffer server will provide to
113  *	VOP_READDIR.  Its value is the size of the maximum possible dirent
114  *	for solaris.  The DIRENT64_RECLEN macro returns	the size of dirent
115  *	required for a given name length.  MAXNAMELEN is the maximum
116  *	filename length allowed in Solaris.  The first two DIRENT64_RECLEN()
117  *	macros are to allow for . and .. entries -- just a minor tweak to try
118  *	and guarantee that buffer we give to VOP_READDIR will be large enough
119  *	to hold ., .., and the largest possible solaris dirent64.
120  */
121 #define	RFS4_MINLEN_ENTRY4 36
122 #define	RFS4_MINLEN_RDDIR4 (4 + NFS4_VERIFIER_SIZE + 4 + RFS4_MINLEN_ENTRY4 + 4)
123 #define	RFS4_MINLEN_RDDIR_BUF \
124 	(DIRENT64_RECLEN(1) + DIRENT64_RECLEN(2) + DIRENT64_RECLEN(MAXNAMELEN))
125 
126 /*
127  * It would be better to pad to 4 bytes since that's what XDR would do,
128  * but the dirents UFS gives us are already padded to 8, so just take
129  * what we're given.  Dircount is only a hint anyway.  Currently the
130  * solaris kernel is ASCII only, so there's no point in calling the
131  * UTF8 functions.
132  *
133  * dirent64: named padded to provide 8 byte struct alignment
134  *	d_ino(8) + d_off(8) + d_reclen(2) + d_name(namelen + null(1) + pad)
135  *
136  * cookie: uint64_t   +  utf8namelen: uint_t  +   utf8name padded to 8 bytes
137  *
138  */
139 #define	DIRENT64_TO_DIRCOUNT(dp) \
140 	(3 * BYTES_PER_XDR_UNIT + DIRENT64_NAMELEN((dp)->d_reclen))
141 
142 /*
143  * types of label comparison
144  */
145 #define	EQUALITY_CHECK	0
146 #define	DOMINANCE_CHECK	1
147 
148 time_t rfs4_start_time;			/* Initialized in rfs4_srvrinit */
149 
150 static sysid_t lockt_sysid;		/* dummy sysid for all LOCKT calls */
151 
152 u_longlong_t nfs4_srv_caller_id;
153 
154 verifier4	Write4verf;
155 verifier4	Readdir4verf;
156 
157 void		rfs4_init_compound_state(struct compound_state *);
158 
159 static void	nullfree(caddr_t);
160 static void	rfs4_op_inval(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
161 			struct compound_state *);
162 static void	rfs4_op_access(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
163 			struct compound_state *);
164 static void	rfs4_op_close(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
165 			struct compound_state *);
166 static void	rfs4_op_commit(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
167 			struct compound_state *);
168 static void	rfs4_op_create(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
169 			struct compound_state *);
170 static void	rfs4_op_create_free(nfs_resop4 *resop);
171 static void	rfs4_op_delegreturn(nfs_argop4 *, nfs_resop4 *,
172 				    struct svc_req *, struct compound_state *);
173 static void	rfs4_op_getattr(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
174 			struct compound_state *);
175 static void	rfs4_op_getattr_free(nfs_resop4 *);
176 static void	rfs4_op_getfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
177 			struct compound_state *);
178 static void	rfs4_op_getfh_free(nfs_resop4 *);
179 static void	rfs4_op_illegal(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
180 			struct compound_state *);
181 static void	rfs4_op_link(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
182 			struct compound_state *);
183 static void	rfs4_op_lock(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
184 			struct compound_state *);
185 static void	lock_denied_free(nfs_resop4 *);
186 static void	rfs4_op_locku(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
187 			struct compound_state *);
188 static void	rfs4_op_lockt(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
189 			struct compound_state *);
190 static void	rfs4_op_lookup(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
191 			struct compound_state *);
192 static void	rfs4_op_lookupp(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
193 			struct compound_state *);
194 static void	rfs4_op_openattr(nfs_argop4 *argop, nfs_resop4 *resop,
195 				struct svc_req *req, struct compound_state *cs);
196 static void	rfs4_op_nverify(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
197 			struct compound_state *);
198 static void	rfs4_op_open(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
199 			struct compound_state *);
200 static void	rfs4_op_open_confirm(nfs_argop4 *, nfs_resop4 *,
201 			struct svc_req *, struct compound_state *);
202 static void	rfs4_op_open_downgrade(nfs_argop4 *, nfs_resop4 *,
203 			struct svc_req *, struct compound_state *);
204 static void	rfs4_op_putfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
205 			struct compound_state *);
206 static void	rfs4_op_putpubfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
207 			struct compound_state *);
208 static void	rfs4_op_putrootfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
209 			struct compound_state *);
210 static void	rfs4_op_read(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
211 			struct compound_state *);
212 static void	rfs4_op_read_free(nfs_resop4 *);
213 static void	rfs4_op_readdir_free(nfs_resop4 *resop);
214 static void	rfs4_op_readlink(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
215 			struct compound_state *);
216 static void	rfs4_op_readlink_free(nfs_resop4 *);
217 static void	rfs4_op_release_lockowner(nfs_argop4 *, nfs_resop4 *,
218 			struct svc_req *, struct compound_state *);
219 static void	rfs4_op_remove(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
220 			struct compound_state *);
221 static void	rfs4_op_rename(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
222 			struct compound_state *);
223 static void	rfs4_op_renew(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
224 			struct compound_state *);
225 static void	rfs4_op_restorefh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
226 			struct compound_state *);
227 static void	rfs4_op_savefh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
228 			struct compound_state *);
229 static void	rfs4_op_setattr(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
230 			struct compound_state *);
231 static void	rfs4_op_verify(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
232 			struct compound_state *);
233 static void	rfs4_op_write(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
234 			struct compound_state *);
235 static void	rfs4_op_setclientid(nfs_argop4 *, nfs_resop4 *,
236 			struct svc_req *, struct compound_state *);
237 static void	rfs4_op_setclientid_confirm(nfs_argop4 *, nfs_resop4 *,
238 			struct svc_req *req, struct compound_state *);
239 static void	rfs4_op_secinfo(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
240 			struct compound_state *);
241 static void	rfs4_op_secinfo_free(nfs_resop4 *);
242 
243 static nfsstat4 check_open_access(uint32_t,
244 				struct compound_state *, struct svc_req *);
245 nfsstat4 rfs4_client_sysid(rfs4_client_t *, sysid_t *);
246 static int	vop_shrlock(vnode_t *, int, struct shrlock *, int);
247 static int 	rfs4_shrlock(rfs4_state_t *, int);
248 static int	rfs4_share(rfs4_state_t *);
249 void rfs4_ss_clid(rfs4_client_t *, struct svc_req *);
250 
251 /*
252  * translation table for attrs
253  */
254 struct nfs4_ntov_table {
255 	union nfs4_attr_u *na;
256 	uint8_t amap[NFS4_MAXNUM_ATTRS];
257 	int attrcnt;
258 	bool_t vfsstat;
259 };
260 
261 static void	nfs4_ntov_table_init(struct nfs4_ntov_table *ntovp);
262 static void	nfs4_ntov_table_free(struct nfs4_ntov_table *ntovp,
263 				    struct nfs4_svgetit_arg *sargp);
264 
265 static nfsstat4	do_rfs4_set_attrs(bitmap4 *resp, fattr4 *fattrp,
266 		    struct compound_state *cs, struct nfs4_svgetit_arg *sargp,
267 		    struct nfs4_ntov_table *ntovp, nfs4_attr_cmd_t cmd);
268 
269 fem_t	*deleg_rdops;
270 fem_t	*deleg_wrops;
271 
272 rfs4_servinst_t	*rfs4_cur_servinst = NULL;	/* current server instance */
273 kmutex_t	rfs4_servinst_lock;		/* protects linked list */
274 int		rfs4_seen_first_compound;	/* set first time we see one */
275 
276 /*
277  * NFS4 op dispatch table
278  */
279 
280 struct rfsv4disp {
281 	void	(*dis_proc)();		/* proc to call */
282 	void	(*dis_resfree)();	/* frees space allocated by proc */
283 	int	dis_flags;		/* RPC_IDEMPOTENT, etc... */
284 };
285 
286 static struct rfsv4disp rfsv4disptab[] = {
287 	/*
288 	 * NFS VERSION 4
289 	 */
290 
291 	/* RFS_NULL = 0 */
292 	{rfs4_op_illegal, nullfree, 0},
293 
294 	/* UNUSED = 1 */
295 	{rfs4_op_illegal, nullfree, 0},
296 
297 	/* UNUSED = 2 */
298 	{rfs4_op_illegal, nullfree, 0},
299 
300 	/* OP_ACCESS = 3 */
301 	{rfs4_op_access, nullfree, RPC_IDEMPOTENT},
302 
303 	/* OP_CLOSE = 4 */
304 	{rfs4_op_close, nullfree, 0},
305 
306 	/* OP_COMMIT = 5 */
307 	{rfs4_op_commit, nullfree, RPC_IDEMPOTENT},
308 
309 	/* OP_CREATE = 6 */
310 	{rfs4_op_create, nullfree, 0},
311 
312 	/* OP_DELEGPURGE = 7 */
313 	{rfs4_op_inval, nullfree, 0},
314 
315 	/* OP_DELEGRETURN = 8 */
316 	{rfs4_op_delegreturn, nullfree, 0},
317 
318 	/* OP_GETATTR = 9 */
319 	{rfs4_op_getattr, rfs4_op_getattr_free, RPC_IDEMPOTENT},
320 
321 	/* OP_GETFH = 10 */
322 	{rfs4_op_getfh, rfs4_op_getfh_free, RPC_ALL},
323 
324 	/* OP_LINK = 11 */
325 	{rfs4_op_link, nullfree, 0},
326 
327 	/* OP_LOCK = 12 */
328 	{rfs4_op_lock, lock_denied_free, 0},
329 
330 	/* OP_LOCKT = 13 */
331 	{rfs4_op_lockt, lock_denied_free, 0},
332 
333 	/* OP_LOCKU = 14 */
334 	{rfs4_op_locku, nullfree, 0},
335 
336 	/* OP_LOOKUP = 15 */
337 	{rfs4_op_lookup, nullfree, (RPC_IDEMPOTENT|RPC_PUBLICFH_OK)},
338 
339 	/* OP_LOOKUPP = 16 */
340 	{rfs4_op_lookupp, nullfree, (RPC_IDEMPOTENT|RPC_PUBLICFH_OK)},
341 
342 	/* OP_NVERIFY = 17 */
343 	{rfs4_op_nverify, nullfree, RPC_IDEMPOTENT},
344 
345 	/* OP_OPEN = 18 */
346 	{rfs4_op_open, rfs4_free_reply, 0},
347 
348 	/* OP_OPENATTR = 19 */
349 	{rfs4_op_openattr, nullfree, 0},
350 
351 	/* OP_OPEN_CONFIRM = 20 */
352 	{rfs4_op_open_confirm, nullfree, 0},
353 
354 	/* OP_OPEN_DOWNGRADE = 21 */
355 	{rfs4_op_open_downgrade, nullfree, 0},
356 
357 	/* OP_OPEN_PUTFH = 22 */
358 	{rfs4_op_putfh, nullfree, RPC_ALL},
359 
360 	/* OP_PUTPUBFH = 23 */
361 	{rfs4_op_putpubfh, nullfree, RPC_ALL},
362 
363 	/* OP_PUTROOTFH = 24 */
364 	{rfs4_op_putrootfh, nullfree, RPC_ALL},
365 
366 	/* OP_READ = 25 */
367 	{rfs4_op_read, rfs4_op_read_free, RPC_IDEMPOTENT},
368 
369 	/* OP_READDIR = 26 */
370 	{rfs4_op_readdir, rfs4_op_readdir_free, RPC_IDEMPOTENT},
371 
372 	/* OP_READLINK = 27 */
373 	{rfs4_op_readlink, rfs4_op_readlink_free, RPC_IDEMPOTENT},
374 
375 	/* OP_REMOVE = 28 */
376 	{rfs4_op_remove, nullfree, 0},
377 
378 	/* OP_RENAME = 29 */
379 	{rfs4_op_rename, nullfree, 0},
380 
381 	/* OP_RENEW = 30 */
382 	{rfs4_op_renew, nullfree, 0},
383 
384 	/* OP_RESTOREFH = 31 */
385 	{rfs4_op_restorefh, nullfree, RPC_ALL},
386 
387 	/* OP_SAVEFH = 32 */
388 	{rfs4_op_savefh, nullfree, RPC_ALL},
389 
390 	/* OP_SECINFO = 33 */
391 	{rfs4_op_secinfo, rfs4_op_secinfo_free, 0},
392 
393 	/* OP_SETATTR = 34 */
394 	{rfs4_op_setattr, nullfree, 0},
395 
396 	/* OP_SETCLIENTID = 35 */
397 	{rfs4_op_setclientid, nullfree, 0},
398 
399 	/* OP_SETCLIENTID_CONFIRM = 36 */
400 	{rfs4_op_setclientid_confirm, nullfree, 0},
401 
402 	/* OP_VERIFY = 37 */
403 	{rfs4_op_verify, nullfree, RPC_IDEMPOTENT},
404 
405 	/* OP_WRITE = 38 */
406 	{rfs4_op_write, nullfree, 0},
407 
408 	/* OP_RELEASE_LOCKOWNER = 39 */
409 	{rfs4_op_release_lockowner, nullfree, 0},
410 };
411 
412 static uint_t rfsv4disp_cnt = sizeof (rfsv4disptab) / sizeof (rfsv4disptab[0]);
413 
414 #define	OP_ILLEGAL_IDX (rfsv4disp_cnt)
415 
416 #ifdef DEBUG
417 
418 int rfs4_fillone_debug = 0;
419 int rfs4_shrlock_debug = 0;
420 int rfs4_no_stub_access = 1;
421 int rfs4_rddir_debug = 0;
422 
423 static char *rfs4_op_string[] = {
424 	"rfs4_op_null",
425 	"rfs4_op_1 unused",
426 	"rfs4_op_2 unused",
427 	"rfs4_op_access",
428 	"rfs4_op_close",
429 	"rfs4_op_commit",
430 	"rfs4_op_create",
431 	"rfs4_op_delegpurge",
432 	"rfs4_op_delegreturn",
433 	"rfs4_op_getattr",
434 	"rfs4_op_getfh",
435 	"rfs4_op_link",
436 	"rfs4_op_lock",
437 	"rfs4_op_lockt",
438 	"rfs4_op_locku",
439 	"rfs4_op_lookup",
440 	"rfs4_op_lookupp",
441 	"rfs4_op_nverify",
442 	"rfs4_op_open",
443 	"rfs4_op_openattr",
444 	"rfs4_op_open_confirm",
445 	"rfs4_op_open_downgrade",
446 	"rfs4_op_putfh",
447 	"rfs4_op_putpubfh",
448 	"rfs4_op_putrootfh",
449 	"rfs4_op_read",
450 	"rfs4_op_readdir",
451 	"rfs4_op_readlink",
452 	"rfs4_op_remove",
453 	"rfs4_op_rename",
454 	"rfs4_op_renew",
455 	"rfs4_op_restorefh",
456 	"rfs4_op_savefh",
457 	"rfs4_op_secinfo",
458 	"rfs4_op_setattr",
459 	"rfs4_op_setclientid",
460 	"rfs4_op_setclient_confirm",
461 	"rfs4_op_verify",
462 	"rfs4_op_write",
463 	"rfs4_op_release_lockowner",
464 	"rfs4_op_illegal"
465 };
466 #endif
467 
468 void rfs4_ss_chkclid(rfs4_client_t *);
469 
470 extern size_t strlcpy(char *dst, const char *src, size_t dstsize);
471 
472 #ifdef	nextdp
473 #undef nextdp
474 #endif
475 #define	nextdp(dp)	((struct dirent64 *)((char *)(dp) + (dp)->d_reclen))
476 
477 static const fs_operation_def_t nfs4_rd_deleg_tmpl[] = {
478 	VOPNAME_OPEN, deleg_rdopen,
479 	VOPNAME_WRITE, deleg_write,
480 	VOPNAME_SETATTR, deleg_setattr,
481 	VOPNAME_RWLOCK, deleg_rd_rwlock,
482 	VOPNAME_SPACE, deleg_space,
483 	VOPNAME_SETSECATTR, deleg_setsecattr,
484 	VOPNAME_VNEVENT, deleg_vnevent,
485 	NULL, NULL
486 };
487 static const fs_operation_def_t nfs4_wr_deleg_tmpl[] = {
488 	VOPNAME_OPEN, deleg_wropen,
489 	VOPNAME_READ, deleg_read,
490 	VOPNAME_WRITE, deleg_write,
491 	VOPNAME_SETATTR, deleg_setattr,
492 	VOPNAME_RWLOCK, deleg_wr_rwlock,
493 	VOPNAME_SPACE, deleg_space,
494 	VOPNAME_SETSECATTR, deleg_setsecattr,
495 	VOPNAME_VNEVENT, deleg_vnevent,
496 	NULL, NULL
497 };
498 
499 int
500 rfs4_srvrinit(void)
501 {
502 	timespec32_t verf;
503 	int error;
504 	extern void rfs4_attr_init();
505 	extern krwlock_t rfs4_deleg_policy_lock;
506 
507 	/*
508 	 * The following algorithm attempts to find a unique verifier
509 	 * to be used as the write verifier returned from the server
510 	 * to the client.  It is important that this verifier change
511 	 * whenever the server reboots.  Of secondary importance, it
512 	 * is important for the verifier to be unique between two
513 	 * different servers.
514 	 *
515 	 * Thus, an attempt is made to use the system hostid and the
516 	 * current time in seconds when the nfssrv kernel module is
517 	 * loaded.  It is assumed that an NFS server will not be able
518 	 * to boot and then to reboot in less than a second.  If the
519 	 * hostid has not been set, then the current high resolution
520 	 * time is used.  This will ensure different verifiers each
521 	 * time the server reboots and minimize the chances that two
522 	 * different servers will have the same verifier.
523 	 * XXX - this is broken on LP64 kernels.
524 	 */
525 	verf.tv_sec = (time_t)nfs_atoi(hw_serial);
526 	if (verf.tv_sec != 0) {
527 		verf.tv_nsec = gethrestime_sec();
528 	} else {
529 		timespec_t tverf;
530 
531 		gethrestime(&tverf);
532 		verf.tv_sec = (time_t)tverf.tv_sec;
533 		verf.tv_nsec = tverf.tv_nsec;
534 	}
535 
536 	Write4verf = *(uint64_t *)&verf;
537 
538 	rfs4_attr_init();
539 	mutex_init(&rfs4_deleg_lock, NULL, MUTEX_DEFAULT, NULL);
540 
541 	/* Used to manage create/destroy of server state */
542 	mutex_init(&rfs4_state_lock, NULL, MUTEX_DEFAULT, NULL);
543 
544 	/* Used to manage access to server instance linked list */
545 	mutex_init(&rfs4_servinst_lock, NULL, MUTEX_DEFAULT, NULL);
546 
547 	/* Used to manage access to rfs4_deleg_policy */
548 	rw_init(&rfs4_deleg_policy_lock, NULL, RW_DEFAULT, NULL);
549 
550 	error = fem_create("deleg_rdops", nfs4_rd_deleg_tmpl, &deleg_rdops);
551 	if (error != 0) {
552 		rfs4_disable_delegation();
553 	} else {
554 		error = fem_create("deleg_wrops", nfs4_wr_deleg_tmpl,
555 				&deleg_wrops);
556 		if (error != 0) {
557 			rfs4_disable_delegation();
558 			fem_free(deleg_rdops);
559 		}
560 	}
561 
562 	nfs4_srv_caller_id = fs_new_caller_id();
563 
564 	lockt_sysid = lm_alloc_sysidt();
565 
566 	return (0);
567 }
568 
569 void
570 rfs4_srvrfini(void)
571 {
572 	extern krwlock_t rfs4_deleg_policy_lock;
573 
574 	if (lockt_sysid != LM_NOSYSID) {
575 		lm_free_sysidt(lockt_sysid);
576 		lockt_sysid = LM_NOSYSID;
577 	}
578 
579 	mutex_destroy(&rfs4_deleg_lock);
580 	mutex_destroy(&rfs4_state_lock);
581 	rw_destroy(&rfs4_deleg_policy_lock);
582 
583 	fem_free(deleg_rdops);
584 	fem_free(deleg_wrops);
585 }
586 
587 void
588 rfs4_init_compound_state(struct compound_state *cs)
589 {
590 	bzero(cs, sizeof (*cs));
591 	cs->cont = TRUE;
592 	cs->access = CS_ACCESS_DENIED;
593 	cs->deleg = FALSE;
594 	cs->mandlock = FALSE;
595 	cs->fh.nfs_fh4_val = cs->fhbuf;
596 }
597 
598 void
599 rfs4_grace_start(rfs4_servinst_t *sip)
600 {
601 	time_t now = gethrestime_sec();
602 
603 	rw_enter(&sip->rwlock, RW_WRITER);
604 	sip->start_time = now;
605 	sip->grace_period = rfs4_grace_period;
606 	rw_exit(&sip->rwlock);
607 }
608 
609 /*
610  * returns true if the instance's grace period has never been started
611  */
612 int
613 rfs4_servinst_grace_new(rfs4_servinst_t *sip)
614 {
615 	time_t start_time;
616 
617 	rw_enter(&sip->rwlock, RW_READER);
618 	start_time = sip->start_time;
619 	rw_exit(&sip->rwlock);
620 
621 	return (start_time == 0);
622 }
623 
624 /*
625  * Indicates if server instance is within the
626  * grace period.
627  */
628 int
629 rfs4_servinst_in_grace(rfs4_servinst_t *sip)
630 {
631 	time_t grace_expiry;
632 
633 	rw_enter(&sip->rwlock, RW_READER);
634 	grace_expiry = sip->start_time + sip->grace_period;
635 	rw_exit(&sip->rwlock);
636 
637 	return (gethrestime_sec() < grace_expiry);
638 }
639 
640 int
641 rfs4_clnt_in_grace(rfs4_client_t *cp)
642 {
643 	ASSERT(rfs4_dbe_refcnt(cp->dbe) > 0);
644 
645 	return (rfs4_servinst_in_grace(cp->server_instance));
646 }
647 
648 /*
649  * reset all currently active grace periods
650  */
651 void
652 rfs4_grace_reset_all(void)
653 {
654 	rfs4_servinst_t *sip;
655 
656 	mutex_enter(&rfs4_servinst_lock);
657 	for (sip = rfs4_cur_servinst; sip != NULL; sip = sip->prev)
658 		if (rfs4_servinst_in_grace(sip))
659 			rfs4_grace_start(sip);
660 	mutex_exit(&rfs4_servinst_lock);
661 }
662 
663 /*
664  * start any new instances' grace periods
665  */
666 void
667 rfs4_grace_start_new(void)
668 {
669 	rfs4_servinst_t *sip;
670 
671 	mutex_enter(&rfs4_servinst_lock);
672 	for (sip = rfs4_cur_servinst; sip != NULL; sip = sip->prev)
673 		if (rfs4_servinst_grace_new(sip))
674 			rfs4_grace_start(sip);
675 	mutex_exit(&rfs4_servinst_lock);
676 }
677 
678 static rfs4_dss_path_t *
679 rfs4_dss_newpath(rfs4_servinst_t *sip, char *path, unsigned index)
680 {
681 	size_t len;
682 	rfs4_dss_path_t *dss_path;
683 
684 	dss_path = kmem_alloc(sizeof (rfs4_dss_path_t), KM_SLEEP);
685 
686 	/*
687 	 * Take a copy of the string, since the original may be overwritten.
688 	 * Sadly, no strdup() in the kernel.
689 	 */
690 	/* allow for NUL */
691 	len = strlen(path) + 1;
692 	dss_path->path = kmem_alloc(len, KM_SLEEP);
693 	(void) strlcpy(dss_path->path, path, len);
694 
695 	/* associate with servinst */
696 	dss_path->sip = sip;
697 	dss_path->index = index;
698 
699 	/*
700 	 * Add to list of served paths.
701 	 * No locking required, as we're only ever called at startup.
702 	 */
703 	if (rfs4_dss_pathlist == NULL) {
704 		/* this is the first dss_path_t */
705 
706 		/* needed for insque/remque */
707 		dss_path->next = dss_path->prev = dss_path;
708 
709 		rfs4_dss_pathlist = dss_path;
710 	} else {
711 		insque(dss_path, rfs4_dss_pathlist);
712 	}
713 
714 	return (dss_path);
715 }
716 
717 /*
718  * Create a new server instance, and make it the currently active instance.
719  * Note that starting the grace period too early will reduce the clients'
720  * recovery window.
721  */
722 void
723 rfs4_servinst_create(int start_grace, int dss_npaths, char **dss_paths)
724 {
725 	unsigned i;
726 	rfs4_servinst_t *sip;
727 	rfs4_oldstate_t *oldstate;
728 
729 	sip = kmem_alloc(sizeof (rfs4_servinst_t), KM_SLEEP);
730 	rw_init(&sip->rwlock, NULL, RW_DEFAULT, NULL);
731 
732 	sip->start_time = (time_t)0;
733 	sip->grace_period = (time_t)0;
734 	sip->next = NULL;
735 	sip->prev = NULL;
736 
737 	rw_init(&sip->oldstate_lock, NULL, RW_DEFAULT, NULL);
738 	/*
739 	 * This initial dummy entry is required to setup for insque/remque.
740 	 * It must be skipped over whenever the list is traversed.
741 	 */
742 	oldstate = kmem_alloc(sizeof (rfs4_oldstate_t), KM_SLEEP);
743 	/* insque/remque require initial list entry to be self-terminated */
744 	oldstate->next = oldstate;
745 	oldstate->prev = oldstate;
746 	sip->oldstate = oldstate;
747 
748 
749 	sip->dss_npaths = dss_npaths;
750 	sip->dss_paths = kmem_alloc(dss_npaths *
751 	    sizeof (rfs4_dss_path_t *), KM_SLEEP);
752 
753 	for (i = 0; i < dss_npaths; i++) {
754 		sip->dss_paths[i] = rfs4_dss_newpath(sip, dss_paths[i], i);
755 	}
756 
757 	mutex_enter(&rfs4_servinst_lock);
758 	if (rfs4_cur_servinst != NULL) {
759 		/* add to linked list */
760 		sip->prev = rfs4_cur_servinst;
761 		rfs4_cur_servinst->next = sip;
762 	}
763 	if (start_grace)
764 		rfs4_grace_start(sip);
765 	/* make the new instance "current" */
766 	rfs4_cur_servinst = sip;
767 
768 	mutex_exit(&rfs4_servinst_lock);
769 }
770 
771 /*
772  * In future, we might add a rfs4_servinst_destroy(sip) but, for now, destroy
773  * all instances directly.
774  */
775 void
776 rfs4_servinst_destroy_all(void)
777 {
778 	rfs4_servinst_t *sip, *prev, *current;
779 #ifdef DEBUG
780 	int n = 0;
781 #endif
782 
783 	mutex_enter(&rfs4_servinst_lock);
784 	ASSERT(rfs4_cur_servinst != NULL);
785 	current = rfs4_cur_servinst;
786 	rfs4_cur_servinst = NULL;
787 	for (sip = current; sip != NULL; sip = prev) {
788 		prev = sip->prev;
789 		rw_destroy(&sip->rwlock);
790 		if (sip->oldstate)
791 			kmem_free(sip->oldstate, sizeof (rfs4_oldstate_t));
792 		if (sip->dss_paths)
793 			kmem_free(sip->dss_paths,
794 			    sip->dss_npaths * sizeof (rfs4_dss_path_t *));
795 		kmem_free(sip, sizeof (rfs4_servinst_t));
796 #ifdef DEBUG
797 		n++;
798 #endif
799 	}
800 	mutex_exit(&rfs4_servinst_lock);
801 }
802 
803 /*
804  * Assign the current server instance to a client_t.
805  * Should be called with cp->dbe held.
806  */
807 void
808 rfs4_servinst_assign(rfs4_client_t *cp, rfs4_servinst_t *sip)
809 {
810 	ASSERT(rfs4_dbe_refcnt(cp->dbe) > 0);
811 
812 	/*
813 	 * The lock ensures that if the current instance is in the process
814 	 * of changing, we will see the new one.
815 	 */
816 	mutex_enter(&rfs4_servinst_lock);
817 	cp->server_instance = sip;
818 	mutex_exit(&rfs4_servinst_lock);
819 }
820 
821 rfs4_servinst_t *
822 rfs4_servinst(rfs4_client_t *cp)
823 {
824 	ASSERT(rfs4_dbe_refcnt(cp->dbe) > 0);
825 
826 	return (cp->server_instance);
827 }
828 
829 /* ARGSUSED */
830 static void
831 nullfree(caddr_t resop)
832 {
833 }
834 
835 /*
836  * This is a fall-through for invalid or not implemented (yet) ops
837  */
838 /* ARGSUSED */
839 static void
840 rfs4_op_inval(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
841 	struct compound_state *cs)
842 {
843 	*cs->statusp = *((nfsstat4 *)&(resop)->nfs_resop4_u) = NFS4ERR_INVAL;
844 }
845 
846 /*
847  * Check if the security flavor, nfsnum, is in the flavor_list.
848  */
849 bool_t
850 in_flavor_list(int nfsnum, int *flavor_list, int count)
851 {
852 	int i;
853 
854 	for (i = 0; i < count; i++) {
855 		if (nfsnum == flavor_list[i])
856 			return (TRUE);
857 	}
858 	return (FALSE);
859 }
860 
861 /*
862  * Used by rfs4_op_secinfo to get the security information from the
863  * export structure associated with the component.
864  */
865 /* ARGSUSED */
866 static nfsstat4
867 do_rfs4_op_secinfo(struct compound_state *cs, char *nm, SECINFO4res *resp)
868 {
869 	int error, different_export = 0;
870 	vnode_t *dvp, *vp, *tvp;
871 	struct exportinfo *exi = NULL;
872 	fid_t fid;
873 	uint_t count, i;
874 	secinfo4 *resok_val;
875 	struct secinfo *secp;
876 	bool_t did_traverse;
877 	int dotdot, walk;
878 
879 	dvp = cs->vp;
880 	dotdot = (nm[0] == '.' && nm[1] == '.' && nm[2] == '\0');
881 
882 	/*
883 	 * If dotdotting, then need to check whether it's above the
884 	 * root of a filesystem, or above an export point.
885 	 */
886 	if (dotdot) {
887 
888 		/*
889 		 * If dotdotting at the root of a filesystem, then
890 		 * need to traverse back to the mounted-on filesystem
891 		 * and do the dotdot lookup there.
892 		 */
893 		if (cs->vp->v_flag & VROOT) {
894 
895 			/*
896 			 * If at the system root, then can
897 			 * go up no further.
898 			 */
899 			if (VN_CMP(dvp, rootdir))
900 				return (puterrno4(ENOENT));
901 
902 			/*
903 			 * Traverse back to the mounted-on filesystem
904 			 */
905 			dvp = untraverse(cs->vp);
906 
907 			/*
908 			 * Set the different_export flag so we remember
909 			 * to pick up a new exportinfo entry for
910 			 * this new filesystem.
911 			 */
912 			different_export = 1;
913 		} else {
914 
915 			/*
916 			 * If dotdotting above an export point then set
917 			 * the different_export to get new export info.
918 			 */
919 			different_export = nfs_exported(cs->exi, cs->vp);
920 		}
921 	}
922 
923 	/*
924 	 * Get the vnode for the component "nm".
925 	 */
926 	error = VOP_LOOKUP(dvp, nm, &vp, NULL, 0, NULL, cs->cr);
927 	if (error)
928 		return (puterrno4(error));
929 
930 	/*
931 	 * If the vnode is in a pseudo filesystem, or if the security flavor
932 	 * used in the request is valid but not an explicitly shared flavor,
933 	 * or the access bit indicates that this is a limited access,
934 	 * check whether this vnode is visible.
935 	 */
936 	if (!different_export &&
937 	    (PSEUDO(cs->exi) || ! is_exported_sec(cs->nfsflavor, cs->exi) ||
938 	    cs->access & CS_ACCESS_LIMITED)) {
939 		if (! nfs_visible(cs->exi, vp, &different_export)) {
940 			VN_RELE(vp);
941 			return (puterrno4(ENOENT));
942 		}
943 	}
944 
945 	/*
946 	 * If it's a mountpoint, then traverse it.
947 	 */
948 	if (vn_ismntpt(vp)) {
949 		tvp = vp;
950 		if ((error = traverse(&tvp)) != 0) {
951 			VN_RELE(vp);
952 			return (puterrno4(error));
953 		}
954 		/* remember that we had to traverse mountpoint */
955 		did_traverse = TRUE;
956 		vp = tvp;
957 		different_export = 1;
958 	} else if (vp->v_vfsp != dvp->v_vfsp) {
959 		/*
960 		 * If vp isn't a mountpoint and the vfs ptrs aren't the same,
961 		 * then vp is probably an LOFS object.  We don't need the
962 		 * realvp, we just need to know that we might have crossed
963 		 * a server fs boundary and need to call checkexport4.
964 		 * (LOFS lookup hides server fs mountpoints, and actually calls
965 		 * traverse)
966 		 */
967 		different_export = 1;
968 		did_traverse = FALSE;
969 	}
970 
971 	/*
972 	 * Get the export information for it.
973 	 */
974 	if (different_export) {
975 
976 		bzero(&fid, sizeof (fid));
977 		fid.fid_len = MAXFIDSZ;
978 		error = vop_fid_pseudo(vp, &fid);
979 		if (error) {
980 			VN_RELE(vp);
981 			return (puterrno4(error));
982 		}
983 
984 		if (dotdot)
985 			exi = nfs_vptoexi(NULL, vp, cs->cr, &walk, NULL, TRUE);
986 		else
987 			exi = checkexport4(&vp->v_vfsp->vfs_fsid, &fid, vp);
988 
989 		if (exi == NULL) {
990 			if (did_traverse == TRUE) {
991 				/*
992 				 * If this vnode is a mounted-on vnode,
993 				 * but the mounted-on file system is not
994 				 * exported, send back the secinfo for
995 				 * the exported node that the mounted-on
996 				 * vnode lives in.
997 				 */
998 				exi = cs->exi;
999 			} else {
1000 				VN_RELE(vp);
1001 				return (puterrno4(EACCES));
1002 			}
1003 		}
1004 	} else {
1005 		exi = cs->exi;
1006 	}
1007 	ASSERT(exi != NULL);
1008 
1009 
1010 	/*
1011 	 * Create the secinfo result based on the security information
1012 	 * from the exportinfo structure (exi).
1013 	 *
1014 	 * Return all flavors for a pseudo node.
1015 	 * For a real export node, return the flavor that the client
1016 	 * has access with.
1017 	 */
1018 	ASSERT(RW_LOCK_HELD(&exported_lock));
1019 	if (PSEUDO(exi)) {
1020 		count = exi->exi_export.ex_seccnt; /* total sec count */
1021 		resok_val = kmem_alloc(count * sizeof (secinfo4), KM_SLEEP);
1022 		secp = exi->exi_export.ex_secinfo;
1023 
1024 		for (i = 0; i < count; i++) {
1025 		    resok_val[i].flavor = secp[i].s_secinfo.sc_rpcnum;
1026 		    if (resok_val[i].flavor == RPCSEC_GSS) {
1027 			rpcsec_gss_info *info;
1028 
1029 			info = &resok_val[i].flavor_info;
1030 			info->qop = secp[i].s_secinfo.sc_qop;
1031 			info->service =
1032 				(rpc_gss_svc_t)secp[i].s_secinfo.sc_service;
1033 
1034 			/* get oid opaque data */
1035 			info->oid.sec_oid4_len =
1036 				secp[i].s_secinfo.sc_gss_mech_type->length;
1037 			info->oid.sec_oid4_val =
1038 				kmem_alloc(
1039 				    secp[i].s_secinfo.sc_gss_mech_type->length,
1040 				    KM_SLEEP);
1041 			bcopy(secp[i].s_secinfo.sc_gss_mech_type->elements,
1042 				info->oid.sec_oid4_val, info->oid.sec_oid4_len);
1043 		    }
1044 		}
1045 		resp->SECINFO4resok_len = count;
1046 		resp->SECINFO4resok_val = resok_val;
1047 	} else {
1048 		int ret_cnt = 0, k = 0;
1049 		int *flavor_list;
1050 
1051 		count = exi->exi_export.ex_seccnt; /* total sec count */
1052 		secp = exi->exi_export.ex_secinfo;
1053 
1054 		flavor_list = kmem_alloc(count * sizeof (int), KM_SLEEP);
1055 		/* find out which flavors to return */
1056 		for (i = 0; i < count; i ++) {
1057 			int access, flavor, perm;
1058 
1059 			flavor = secp[i].s_secinfo.sc_nfsnum;
1060 			perm = secp[i].s_flags;
1061 
1062 			access = nfsauth4_secinfo_access(exi, cs->req,
1063 						flavor, perm);
1064 
1065 			if (! (access & NFSAUTH_DENIED) &&
1066 			    ! (access & NFSAUTH_WRONGSEC)) {
1067 				flavor_list[ret_cnt] = flavor;
1068 				ret_cnt++;
1069 			}
1070 		}
1071 
1072 		/* Create the returning SECINFO value */
1073 		resok_val = kmem_alloc(ret_cnt * sizeof (secinfo4), KM_SLEEP);
1074 
1075 		for (i = 0; i < count; i++) {
1076 		/* If the flavor is in the flavor list, fill in resok_val. */
1077 		    if (in_flavor_list(secp[i].s_secinfo.sc_nfsnum,
1078 						flavor_list, ret_cnt)) {
1079 			resok_val[k].flavor = secp[i].s_secinfo.sc_rpcnum;
1080 			if (resok_val[k].flavor == RPCSEC_GSS) {
1081 			    rpcsec_gss_info *info;
1082 
1083 			    info = &resok_val[k].flavor_info;
1084 			    info->qop = secp[i].s_secinfo.sc_qop;
1085 			    info->service =
1086 				(rpc_gss_svc_t)secp[i].s_secinfo.sc_service;
1087 
1088 			    /* get oid opaque data */
1089 			    info->oid.sec_oid4_len =
1090 				secp[i].s_secinfo.sc_gss_mech_type->length;
1091 			    info->oid.sec_oid4_val =
1092 				kmem_alloc(
1093 				    secp[i].s_secinfo.sc_gss_mech_type->length,
1094 				    KM_SLEEP);
1095 			    bcopy(secp[i].s_secinfo.sc_gss_mech_type->elements,
1096 				info->oid.sec_oid4_val, info->oid.sec_oid4_len);
1097 			}
1098 			k++;
1099 		    }
1100 		    if (k >= ret_cnt)
1101 			break;
1102 		}
1103 		resp->SECINFO4resok_len = ret_cnt;
1104 		resp->SECINFO4resok_val = resok_val;
1105 		kmem_free(flavor_list, count * sizeof (int));
1106 	}
1107 
1108 	VN_RELE(vp);
1109 	return (NFS4_OK);
1110 }
1111 
1112 /*
1113  * SECINFO (Operation 33): Obtain required security information on
1114  * the component name in the format of (security-mechanism-oid, qop, service)
1115  * triplets.
1116  */
1117 /* ARGSUSED */
1118 static void
1119 rfs4_op_secinfo(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1120 	struct compound_state *cs)
1121 {
1122 	SECINFO4res *resp = &resop->nfs_resop4_u.opsecinfo;
1123 	utf8string *utfnm = &argop->nfs_argop4_u.opsecinfo.name;
1124 	uint_t len;
1125 	char *nm;
1126 
1127 	/*
1128 	 * Current file handle (cfh) should have been set before getting
1129 	 * into this function. If not, return error.
1130 	 */
1131 	if (cs->vp == NULL) {
1132 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
1133 		return;
1134 	}
1135 
1136 	if (cs->vp->v_type != VDIR) {
1137 		*cs->statusp = resp->status = NFS4ERR_NOTDIR;
1138 		return;
1139 	}
1140 
1141 	/*
1142 	 * Verify the component name. If failed, error out, but
1143 	 * do not error out if the component name is a "..".
1144 	 * SECINFO will return its parents secinfo data for SECINFO "..".
1145 	 */
1146 	if (!utf8_dir_verify(utfnm)) {
1147 		if (utfnm->utf8string_len != 2 ||
1148 				utfnm->utf8string_val[0] != '.' ||
1149 				utfnm->utf8string_val[1] != '.') {
1150 			*cs->statusp = resp->status = NFS4ERR_INVAL;
1151 			return;
1152 		}
1153 	}
1154 
1155 	nm = utf8_to_str(utfnm, &len, NULL);
1156 	if (nm == NULL) {
1157 		*cs->statusp = resp->status = NFS4ERR_INVAL;
1158 		return;
1159 	}
1160 
1161 	if (len > MAXNAMELEN) {
1162 		*cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
1163 		kmem_free(nm, len);
1164 		return;
1165 	}
1166 
1167 	*cs->statusp = resp->status = do_rfs4_op_secinfo(cs, nm, resp);
1168 
1169 	kmem_free(nm, len);
1170 }
1171 
1172 /*
1173  * Free SECINFO result.
1174  */
1175 /* ARGSUSED */
1176 static void
1177 rfs4_op_secinfo_free(nfs_resop4 *resop)
1178 {
1179 	SECINFO4res *resp = &resop->nfs_resop4_u.opsecinfo;
1180 	int count, i;
1181 	secinfo4 *resok_val;
1182 
1183 	/* If this is not an Ok result, nothing to free. */
1184 	if (resp->status != NFS4_OK) {
1185 		return;
1186 	}
1187 
1188 	count = resp->SECINFO4resok_len;
1189 	resok_val = resp->SECINFO4resok_val;
1190 
1191 	for (i = 0; i < count; i++) {
1192 	    if (resok_val[i].flavor == RPCSEC_GSS) {
1193 		rpcsec_gss_info *info;
1194 
1195 		info = &resok_val[i].flavor_info;
1196 		kmem_free(info->oid.sec_oid4_val, info->oid.sec_oid4_len);
1197 	    }
1198 	}
1199 	kmem_free(resok_val, count * sizeof (secinfo4));
1200 	resp->SECINFO4resok_len = 0;
1201 	resp->SECINFO4resok_val = NULL;
1202 }
1203 
1204 /*
1205  * do label check on client label and server's file lable.
1206  */
1207 static boolean_t
1208 do_rfs4_label_check(bslabel_t *clabel, vnode_t *vp, int flag)
1209 {
1210 	bslabel_t *slabel;
1211 	ts_label_t *tslabel;
1212 	boolean_t result;
1213 
1214 	if ((tslabel = nfs4_getflabel(vp)) == NULL) {
1215 		return (B_FALSE);
1216 	}
1217 	slabel = label2bslabel(tslabel);
1218 	DTRACE_PROBE4(tx__rfs4__log__info__labelcheck, char *,
1219 	    "comparing server's file label(1) with client label(2) (vp(3))",
1220 	    bslabel_t *, slabel, bslabel_t *, clabel, vnode_t *, vp);
1221 
1222 	if (flag == EQUALITY_CHECK)
1223 		result = blequal(clabel, slabel);
1224 	else
1225 		result = bldominates(clabel, slabel);
1226 	label_rele(tslabel);
1227 	return (result);
1228 }
1229 
1230 /* ARGSUSED */
1231 static void
1232 rfs4_op_access(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1233 	struct compound_state *cs)
1234 {
1235 	ACCESS4args *args = &argop->nfs_argop4_u.opaccess;
1236 	ACCESS4res *resp = &resop->nfs_resop4_u.opaccess;
1237 	int error;
1238 	vnode_t *vp;
1239 	struct vattr va;
1240 	int checkwriteperm;
1241 	cred_t *cr = cs->cr;
1242 	bslabel_t *clabel, *slabel;
1243 	ts_label_t *tslabel;
1244 	boolean_t admin_low_client;
1245 
1246 #if 0	/* XXX allow access even if !cs->access. Eventually only pseudo fs */
1247 	if (cs->access == CS_ACCESS_DENIED) {
1248 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
1249 		return;
1250 	}
1251 #endif
1252 	if (cs->vp == NULL) {
1253 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
1254 		return;
1255 	}
1256 
1257 	ASSERT(cr != NULL);
1258 
1259 	vp = cs->vp;
1260 
1261 	/*
1262 	 * If the file system is exported read only, it is not appropriate
1263 	 * to check write permissions for regular files and directories.
1264 	 * Special files are interpreted by the client, so the underlying
1265 	 * permissions are sent back to the client for interpretation.
1266 	 */
1267 	if (rdonly4(cs->exi, cs->vp, req) &&
1268 		(vp->v_type == VREG || vp->v_type == VDIR))
1269 		checkwriteperm = 0;
1270 	else
1271 		checkwriteperm = 1;
1272 
1273 	/*
1274 	 * XXX
1275 	 * We need the mode so that we can correctly determine access
1276 	 * permissions relative to a mandatory lock file.  Access to
1277 	 * mandatory lock files is denied on the server, so it might
1278 	 * as well be reflected to the server during the open.
1279 	 */
1280 	va.va_mask = AT_MODE;
1281 	error = VOP_GETATTR(vp, &va, 0, cr);
1282 	if (error) {
1283 		*cs->statusp = resp->status = puterrno4(error);
1284 		return;
1285 	}
1286 	resp->access = 0;
1287 	resp->supported = 0;
1288 
1289 	if (is_system_labeled()) {
1290 		ASSERT(req->rq_label != NULL);
1291 		clabel = req->rq_label;
1292 		DTRACE_PROBE2(tx__rfs4__log__info__opaccess__clabel, char *,
1293 		    "got client label from request(1)",
1294 		    struct svc_req *, req);
1295 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
1296 			if ((tslabel = nfs4_getflabel(vp)) == NULL) {
1297 				*cs->statusp = resp->status = puterrno4(EACCES);
1298 				return;
1299 			}
1300 			slabel = label2bslabel(tslabel);
1301 			DTRACE_PROBE3(tx__rfs4__log__info__opaccess__slabel,
1302 			    char *, "got server label(1) for vp(2)",
1303 			    bslabel_t *, slabel, vnode_t *, vp);
1304 
1305 			admin_low_client = B_FALSE;
1306 		} else
1307 			admin_low_client = B_TRUE;
1308 	}
1309 
1310 	if (args->access & ACCESS4_READ) {
1311 		error = VOP_ACCESS(vp, VREAD, 0, cr);
1312 		if (!error && !MANDLOCK(vp, va.va_mode) &&
1313 		    (!is_system_labeled() || admin_low_client ||
1314 		    bldominates(clabel, slabel)))
1315 			resp->access |= ACCESS4_READ;
1316 		resp->supported |= ACCESS4_READ;
1317 	}
1318 	if ((args->access & ACCESS4_LOOKUP) && vp->v_type == VDIR) {
1319 		error = VOP_ACCESS(vp, VEXEC, 0, cr);
1320 		if (!error && (!is_system_labeled() || admin_low_client ||
1321 		    bldominates(clabel, slabel)))
1322 			resp->access |= ACCESS4_LOOKUP;
1323 		resp->supported |= ACCESS4_LOOKUP;
1324 	}
1325 	if (checkwriteperm &&
1326 	    (args->access & (ACCESS4_MODIFY|ACCESS4_EXTEND))) {
1327 		error = VOP_ACCESS(vp, VWRITE, 0, cr);
1328 		if (!error && !MANDLOCK(vp, va.va_mode) &&
1329 		    (!is_system_labeled() || admin_low_client ||
1330 		    blequal(clabel, slabel)))
1331 			resp->access |=
1332 			    (args->access & (ACCESS4_MODIFY|ACCESS4_EXTEND));
1333 		resp->supported |= (ACCESS4_MODIFY|ACCESS4_EXTEND);
1334 	}
1335 
1336 	if (checkwriteperm &&
1337 	    (args->access & ACCESS4_DELETE) && vp->v_type == VDIR) {
1338 		error = VOP_ACCESS(vp, VWRITE, 0, cr);
1339 		if (!error && (!is_system_labeled() || admin_low_client ||
1340 		    blequal(clabel, slabel)))
1341 			resp->access |= ACCESS4_DELETE;
1342 		resp->supported |= ACCESS4_DELETE;
1343 	}
1344 	if (args->access & ACCESS4_EXECUTE && vp->v_type != VDIR) {
1345 		error = VOP_ACCESS(vp, VEXEC, 0, cr);
1346 		if (!error && !MANDLOCK(vp, va.va_mode) &&
1347 		    (!is_system_labeled() || admin_low_client ||
1348 		    bldominates(clabel, slabel)))
1349 			resp->access |= ACCESS4_EXECUTE;
1350 		resp->supported |= ACCESS4_EXECUTE;
1351 	}
1352 
1353 	if (is_system_labeled() && !admin_low_client)
1354 		label_rele(tslabel);
1355 
1356 	*cs->statusp = resp->status = NFS4_OK;
1357 }
1358 
1359 /* ARGSUSED */
1360 static void
1361 rfs4_op_commit(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1362 	struct compound_state *cs)
1363 {
1364 	COMMIT4args *args = &argop->nfs_argop4_u.opcommit;
1365 	COMMIT4res *resp = &resop->nfs_resop4_u.opcommit;
1366 	int error;
1367 	vnode_t *vp = cs->vp;
1368 	cred_t *cr = cs->cr;
1369 	vattr_t va;
1370 
1371 	if (vp == NULL) {
1372 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
1373 		return;
1374 	}
1375 	if (cs->access == CS_ACCESS_DENIED) {
1376 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
1377 		return;
1378 	}
1379 
1380 	if (args->offset + args->count < args->offset) {
1381 		*cs->statusp = resp->status = NFS4ERR_INVAL;
1382 		return;
1383 	}
1384 
1385 	va.va_mask = AT_UID;
1386 	error = VOP_GETATTR(vp, &va, 0, cr);
1387 
1388 	/*
1389 	 * If we can't get the attributes, then we can't do the
1390 	 * right access checking.  So, we'll fail the request.
1391 	 */
1392 	if (error) {
1393 		*cs->statusp = resp->status = puterrno4(error);
1394 		return;
1395 	}
1396 	if (rdonly4(cs->exi, cs->vp, req)) {
1397 		*cs->statusp = resp->status = NFS4ERR_ROFS;
1398 		return;
1399 	}
1400 
1401 	if (vp->v_type != VREG) {
1402 		if (vp->v_type == VDIR)
1403 			resp->status = NFS4ERR_ISDIR;
1404 		else
1405 			resp->status = NFS4ERR_INVAL;
1406 		*cs->statusp = resp->status;
1407 		return;
1408 	}
1409 
1410 	if (crgetuid(cr) != va.va_uid &&
1411 	    (error = VOP_ACCESS(vp, VWRITE, 0, cs->cr))) {
1412 		*cs->statusp = resp->status = puterrno4(error);
1413 		return;
1414 	}
1415 
1416 	error = VOP_PUTPAGE(vp, args->offset, args->count, 0, cr);
1417 	if (!error)
1418 		error = VOP_FSYNC(vp, FNODSYNC, cr);
1419 
1420 	if (error) {
1421 		*cs->statusp = resp->status = puterrno4(error);
1422 		return;
1423 	}
1424 
1425 	*cs->statusp = resp->status = NFS4_OK;
1426 	resp->writeverf = Write4verf;
1427 }
1428 
1429 /*
1430  * rfs4_op_mknod is called from rfs4_op_create after all initial verification
1431  * was completed. It does the nfsv4 create for special files.
1432  */
1433 /* ARGSUSED */
1434 static vnode_t *
1435 do_rfs4_op_mknod(CREATE4args *args, CREATE4res *resp, struct svc_req *req,
1436 	struct compound_state *cs, vattr_t *vap, char *nm)
1437 {
1438 	int error;
1439 	cred_t *cr = cs->cr;
1440 	vnode_t *dvp = cs->vp;
1441 	vnode_t *vp = NULL;
1442 	int mode;
1443 	enum vcexcl excl;
1444 
1445 	switch (args->type) {
1446 	case NF4CHR:
1447 	case NF4BLK:
1448 		if (secpolicy_sys_devices(cr) != 0) {
1449 			*cs->statusp = resp->status = NFS4ERR_PERM;
1450 			return (NULL);
1451 		}
1452 		if (args->type == NF4CHR)
1453 			vap->va_type = VCHR;
1454 		else
1455 			vap->va_type = VBLK;
1456 		vap->va_rdev = makedevice(args->ftype4_u.devdata.specdata1,
1457 					args->ftype4_u.devdata.specdata2);
1458 		vap->va_mask |= AT_RDEV;
1459 		break;
1460 	case NF4SOCK:
1461 		vap->va_type = VSOCK;
1462 		break;
1463 	case NF4FIFO:
1464 		vap->va_type = VFIFO;
1465 		break;
1466 	default:
1467 		*cs->statusp = resp->status = NFS4ERR_BADTYPE;
1468 		return (NULL);
1469 	}
1470 
1471 	/*
1472 	 * Must specify the mode.
1473 	 */
1474 	if (!(vap->va_mask & AT_MODE)) {
1475 		*cs->statusp = resp->status = NFS4ERR_INVAL;
1476 		return (NULL);
1477 	}
1478 
1479 	excl = EXCL;
1480 
1481 	mode = 0;
1482 
1483 	error = VOP_CREATE(dvp, nm, vap, excl, mode, &vp, cr, 0);
1484 	if (error) {
1485 		*cs->statusp = resp->status = puterrno4(error);
1486 		return (NULL);
1487 	}
1488 	return (vp);
1489 }
1490 
1491 /*
1492  * nfsv4 create is used to create non-regular files. For regular files,
1493  * use nfsv4 open.
1494  */
1495 /* ARGSUSED */
1496 static void
1497 rfs4_op_create(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1498 	struct compound_state *cs)
1499 {
1500 	CREATE4args *args = &argop->nfs_argop4_u.opcreate;
1501 	CREATE4res *resp = &resop->nfs_resop4_u.opcreate;
1502 	int error;
1503 	struct vattr bva, iva, iva2, ava, *vap;
1504 	cred_t *cr = cs->cr;
1505 	vnode_t *dvp = cs->vp;
1506 	vnode_t *vp = NULL;
1507 	char *nm, *lnm;
1508 	uint_t len, llen;
1509 	int syncval = 0;
1510 	struct nfs4_svgetit_arg sarg;
1511 	struct nfs4_ntov_table ntov;
1512 	struct statvfs64 sb;
1513 	nfsstat4 status;
1514 
1515 	resp->attrset = 0;
1516 
1517 	if (dvp == NULL) {
1518 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
1519 		return;
1520 	}
1521 
1522 	/*
1523 	 * If there is an unshared filesystem mounted on this vnode,
1524 	 * do not allow to create an object in this directory.
1525 	 */
1526 	if (vn_ismntpt(dvp)) {
1527 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
1528 		return;
1529 	}
1530 
1531 	/* Verify that type is correct */
1532 	switch (args->type) {
1533 	case NF4LNK:
1534 	case NF4BLK:
1535 	case NF4CHR:
1536 	case NF4SOCK:
1537 	case NF4FIFO:
1538 	case NF4DIR:
1539 		break;
1540 	default:
1541 		*cs->statusp = resp->status = NFS4ERR_BADTYPE;
1542 		return;
1543 	};
1544 
1545 	if (cs->access == CS_ACCESS_DENIED) {
1546 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
1547 		return;
1548 	}
1549 	if (dvp->v_type != VDIR) {
1550 		*cs->statusp = resp->status = NFS4ERR_NOTDIR;
1551 		return;
1552 	}
1553 	if (!utf8_dir_verify(&args->objname)) {
1554 		*cs->statusp = resp->status = NFS4ERR_INVAL;
1555 		return;
1556 	}
1557 
1558 	if (rdonly4(cs->exi, cs->vp, req)) {
1559 		*cs->statusp = resp->status = NFS4ERR_ROFS;
1560 		return;
1561 	}
1562 
1563 	/*
1564 	 * Name of newly created object
1565 	 */
1566 	nm = utf8_to_fn(&args->objname, &len, NULL);
1567 	if (nm == NULL) {
1568 		*cs->statusp = resp->status = NFS4ERR_INVAL;
1569 		return;
1570 	}
1571 
1572 	if (len > MAXNAMELEN) {
1573 		*cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
1574 		kmem_free(nm, len);
1575 		return;
1576 	}
1577 
1578 	resp->attrset = 0;
1579 
1580 	sarg.sbp = &sb;
1581 	nfs4_ntov_table_init(&ntov);
1582 
1583 	status = do_rfs4_set_attrs(&resp->attrset,
1584 					&args->createattrs, cs, &sarg,
1585 					&ntov, NFS4ATTR_SETIT);
1586 
1587 	if (sarg.vap->va_mask == 0 && status == NFS4_OK)
1588 		status = NFS4ERR_INVAL;
1589 
1590 	if (status != NFS4_OK) {
1591 		*cs->statusp = resp->status = status;
1592 		kmem_free(nm, len);
1593 		nfs4_ntov_table_free(&ntov, &sarg);
1594 		resp->attrset = 0;
1595 		return;
1596 	}
1597 
1598 	/* Get "before" change value */
1599 	bva.va_mask = AT_CTIME|AT_SEQ;
1600 	error = VOP_GETATTR(dvp, &bva, 0, cr);
1601 	if (error) {
1602 		*cs->statusp = resp->status = puterrno4(error);
1603 		kmem_free(nm, len);
1604 		nfs4_ntov_table_free(&ntov, &sarg);
1605 		resp->attrset = 0;
1606 		return;
1607 	}
1608 	NFS4_SET_FATTR4_CHANGE(resp->cinfo.before, bva.va_ctime)
1609 
1610 	vap = sarg.vap;
1611 
1612 	/*
1613 	 * Set default initial values for attributes when not specified
1614 	 * in createattrs.
1615 	 */
1616 	if ((vap->va_mask & AT_UID) == 0) {
1617 		vap->va_uid = crgetuid(cr);
1618 		vap->va_mask |= AT_UID;
1619 	}
1620 	if ((vap->va_mask & AT_GID) == 0) {
1621 		vap->va_gid = crgetgid(cr);
1622 		vap->va_mask |= AT_GID;
1623 	}
1624 
1625 	vap->va_mask |= AT_TYPE;
1626 	switch (args->type) {
1627 	case NF4DIR:
1628 		vap->va_type = VDIR;
1629 		if ((vap->va_mask & AT_MODE) == 0) {
1630 			vap->va_mode = 0700;	/* default: owner rwx only */
1631 			vap->va_mask |= AT_MODE;
1632 		}
1633 		error = VOP_MKDIR(dvp, nm, vap, &vp, cr);
1634 		if (error)
1635 			break;
1636 
1637 		/*
1638 		 * Get the initial "after" sequence number, if it fails,
1639 		 * set to zero
1640 		 */
1641 		iva.va_mask = AT_SEQ;
1642 		if (VOP_GETATTR(dvp, &iva, 0, cs->cr))
1643 			iva.va_seq = 0;
1644 		break;
1645 	case NF4LNK:
1646 		vap->va_type = VLNK;
1647 		if ((vap->va_mask & AT_MODE) == 0) {
1648 			vap->va_mode = 0700;	/* default: owner rwx only */
1649 			vap->va_mask |= AT_MODE;
1650 		}
1651 
1652 		/*
1653 		 * symlink names must be treated as data
1654 		 */
1655 		lnm = utf8_to_str(&args->ftype4_u.linkdata, &llen, NULL);
1656 
1657 		if (lnm == NULL) {
1658 			*cs->statusp = resp->status = NFS4ERR_INVAL;
1659 			kmem_free(nm, len);
1660 			nfs4_ntov_table_free(&ntov, &sarg);
1661 			resp->attrset = 0;
1662 			return;
1663 		}
1664 
1665 		if (llen > MAXPATHLEN) {
1666 			*cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
1667 			kmem_free(nm, len);
1668 			kmem_free(lnm, llen);
1669 			nfs4_ntov_table_free(&ntov, &sarg);
1670 			resp->attrset = 0;
1671 			return;
1672 		}
1673 
1674 		error = VOP_SYMLINK(dvp, nm, vap, lnm, cr);
1675 		if (lnm != NULL)
1676 			kmem_free(lnm, llen);
1677 		if (error)
1678 			break;
1679 
1680 		/*
1681 		 * Get the initial "after" sequence number, if it fails,
1682 		 * set to zero
1683 		 */
1684 		iva.va_mask = AT_SEQ;
1685 		if (VOP_GETATTR(dvp, &iva, 0, cs->cr))
1686 			iva.va_seq = 0;
1687 
1688 		error = VOP_LOOKUP(dvp, nm, &vp, NULL, 0, NULL, cr);
1689 		if (error)
1690 			break;
1691 
1692 		/*
1693 		 * va_seq is not safe over VOP calls, check it again
1694 		 * if it has changed zero out iva to force atomic = FALSE.
1695 		 */
1696 		iva2.va_mask = AT_SEQ;
1697 		if (VOP_GETATTR(dvp, &iva2, 0, cs->cr) ||
1698 						iva2.va_seq != iva.va_seq)
1699 			iva.va_seq = 0;
1700 		break;
1701 	default:
1702 		/*
1703 		 * probably a special file.
1704 		 */
1705 		if ((vap->va_mask & AT_MODE) == 0) {
1706 			vap->va_mode = 0600;	/* default: owner rw only */
1707 			vap->va_mask |= AT_MODE;
1708 		}
1709 		syncval = FNODSYNC;
1710 		/*
1711 		 * We know this will only generate one VOP call
1712 		 */
1713 		vp = do_rfs4_op_mknod(args, resp, req, cs, vap, nm);
1714 
1715 		if (vp == NULL) {
1716 			kmem_free(nm, len);
1717 			nfs4_ntov_table_free(&ntov, &sarg);
1718 			resp->attrset = 0;
1719 			return;
1720 		}
1721 
1722 		/*
1723 		 * Get the initial "after" sequence number, if it fails,
1724 		 * set to zero
1725 		 */
1726 		iva.va_mask = AT_SEQ;
1727 		if (VOP_GETATTR(dvp, &iva, 0, cs->cr))
1728 			iva.va_seq = 0;
1729 
1730 		break;
1731 	}
1732 	kmem_free(nm, len);
1733 
1734 	if (error) {
1735 		*cs->statusp = resp->status = puterrno4(error);
1736 	}
1737 
1738 	/*
1739 	 * Force modified data and metadata out to stable storage.
1740 	 */
1741 	(void) VOP_FSYNC(dvp, 0, cr);
1742 
1743 	if (resp->status != NFS4_OK) {
1744 		if (vp != NULL)
1745 			VN_RELE(vp);
1746 		nfs4_ntov_table_free(&ntov, &sarg);
1747 		resp->attrset = 0;
1748 		return;
1749 	}
1750 
1751 	/*
1752 	 * Finish setup of cinfo response, "before" value already set.
1753 	 * Get "after" change value, if it fails, simply return the
1754 	 * before value.
1755 	 */
1756 	ava.va_mask = AT_CTIME|AT_SEQ;
1757 	if (VOP_GETATTR(dvp, &ava, 0, cr)) {
1758 		ava.va_ctime = bva.va_ctime;
1759 		ava.va_seq = 0;
1760 	}
1761 	NFS4_SET_FATTR4_CHANGE(resp->cinfo.after, ava.va_ctime);
1762 
1763 	/*
1764 	 * True verification that object was created with correct
1765 	 * attrs is impossible.  The attrs could have been changed
1766 	 * immediately after object creation.  If attributes did
1767 	 * not verify, the only recourse for the server is to
1768 	 * destroy the object.  Maybe if some attrs (like gid)
1769 	 * are set incorrectly, the object should be destroyed;
1770 	 * however, seems bad as a default policy.  Do we really
1771 	 * want to destroy an object over one of the times not
1772 	 * verifying correctly?  For these reasons, the server
1773 	 * currently sets bits in attrset for createattrs
1774 	 * that were set; however, no verification is done.
1775 	 *
1776 	 * vmask_to_nmask accounts for vattr bits set on create
1777 	 *	[do_rfs4_set_attrs() only sets resp bits for
1778 	 *	 non-vattr/vfs bits.]
1779 	 * Mask off any bits set by default so as not to return
1780 	 * more attrset bits than were requested in createattrs
1781 	 */
1782 	nfs4_vmask_to_nmask(sarg.vap->va_mask, &resp->attrset);
1783 	resp->attrset &= args->createattrs.attrmask;
1784 	nfs4_ntov_table_free(&ntov, &sarg);
1785 
1786 	error = makefh4(&cs->fh, vp, cs->exi);
1787 	if (error) {
1788 		*cs->statusp = resp->status = puterrno4(error);
1789 	}
1790 
1791 	/*
1792 	 * The cinfo.atomic = TRUE only if we got no errors, we have
1793 	 * non-zero va_seq's, and it has incremented by exactly one
1794 	 * during the creation and it didn't change during the VOP_LOOKUP
1795 	 * or VOP_FSYNC.
1796 	 */
1797 	if (!error && bva.va_seq && iva.va_seq && ava.va_seq &&
1798 			iva.va_seq == (bva.va_seq + 1) &&
1799 			iva.va_seq == ava.va_seq)
1800 		resp->cinfo.atomic = TRUE;
1801 	else
1802 		resp->cinfo.atomic = FALSE;
1803 
1804 	(void) VOP_FSYNC(vp, syncval, cr);
1805 
1806 	if (resp->status != NFS4_OK) {
1807 		VN_RELE(vp);
1808 		return;
1809 	}
1810 	if (cs->vp)
1811 		VN_RELE(cs->vp);
1812 
1813 	cs->vp = vp;
1814 	*cs->statusp = resp->status = NFS4_OK;
1815 }
1816 
1817 
1818 /*ARGSUSED*/
1819 static void
1820 rfs4_op_delegreturn(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1821 	struct compound_state *cs)
1822 {
1823 	DELEGRETURN4args *args = &argop->nfs_argop4_u.opdelegreturn;
1824 	DELEGRETURN4res *resp = &resop->nfs_resop4_u.opdelegreturn;
1825 	rfs4_deleg_state_t *dsp;
1826 	nfsstat4 status;
1827 
1828 	status = rfs4_get_deleg_state(&args->deleg_stateid, &dsp);
1829 	resp->status = *cs->statusp = status;
1830 	if (status != NFS4_OK)
1831 		return;
1832 
1833 	/* Ensure specified filehandle matches */
1834 	if (cs->vp != dsp->finfo->vp) {
1835 		resp->status = *cs->statusp = NFS4ERR_BAD_STATEID;
1836 	} else
1837 		rfs4_return_deleg(dsp, FALSE);
1838 
1839 	rfs4_update_lease(dsp->client);
1840 
1841 	rfs4_deleg_state_rele(dsp);
1842 }
1843 
1844 /*
1845  * Check to see if a given "flavor" is an explicitly shared flavor.
1846  * The assumption of this routine is the "flavor" is already a valid
1847  * flavor in the secinfo list of "exi".
1848  *
1849  *	e.g.
1850  *		# share -o sec=flavor1 /export
1851  *		# share -o sec=flavor2 /export/home
1852  *
1853  *		flavor2 is not an explicitly shared flavor for /export,
1854  *		however it is in the secinfo list for /export thru the
1855  *		server namespace setup.
1856  */
1857 int
1858 is_exported_sec(int flavor, struct exportinfo *exi)
1859 {
1860 	int	i;
1861 	struct secinfo *sp;
1862 
1863 	sp = exi->exi_export.ex_secinfo;
1864 	for (i = 0; i < exi->exi_export.ex_seccnt; i++) {
1865 		if (flavor == sp[i].s_secinfo.sc_nfsnum ||
1866 		    sp[i].s_secinfo.sc_nfsnum == AUTH_NONE) {
1867 			return (SEC_REF_EXPORTED(&sp[i]));
1868 		}
1869 	}
1870 
1871 	/* Should not reach this point based on the assumption */
1872 	return (0);
1873 }
1874 
1875 /*
1876  * Check if the security flavor used in the request matches what is
1877  * required at the export point or at the root pseudo node (exi_root).
1878  *
1879  * returns 1 if there's a match or if exported with AUTH_NONE; 0 otherwise.
1880  *
1881  */
1882 static int
1883 secinfo_match_or_authnone(struct compound_state *cs)
1884 {
1885 	int	i;
1886 	struct secinfo *sp;
1887 
1888 	/*
1889 	 * Check cs->nfsflavor (from the request) against
1890 	 * the current export data in cs->exi.
1891 	 */
1892 	sp = cs->exi->exi_export.ex_secinfo;
1893 	for (i = 0; i < cs->exi->exi_export.ex_seccnt; i++) {
1894 		if (cs->nfsflavor == sp[i].s_secinfo.sc_nfsnum ||
1895 		    sp[i].s_secinfo.sc_nfsnum == AUTH_NONE)
1896 			return (1);
1897 	}
1898 
1899 	return (0);
1900 }
1901 
1902 /*
1903  * Check the access authority for the client and return the correct error.
1904  */
1905 nfsstat4
1906 call_checkauth4(struct compound_state *cs, struct svc_req *req)
1907 {
1908 	int	authres;
1909 
1910 	/*
1911 	 * First, check if the security flavor used in the request
1912 	 * are among the flavors set in the server namespace.
1913 	 */
1914 	if (!secinfo_match_or_authnone(cs)) {
1915 		*cs->statusp = NFS4ERR_WRONGSEC;
1916 		return (*cs->statusp);
1917 	}
1918 
1919 	authres = checkauth4(cs, req);
1920 
1921 	if (authres > 0) {
1922 		*cs->statusp = NFS4_OK;
1923 		if (! (cs->access & CS_ACCESS_LIMITED))
1924 			cs->access = CS_ACCESS_OK;
1925 	} else if (authres == 0) {
1926 		*cs->statusp = NFS4ERR_ACCESS;
1927 	} else if (authres == -2) {
1928 		*cs->statusp = NFS4ERR_WRONGSEC;
1929 	} else {
1930 		*cs->statusp = NFS4ERR_DELAY;
1931 	}
1932 	return (*cs->statusp);
1933 }
1934 
1935 /*
1936  * bitmap4_to_attrmask is called by getattr and readdir.
1937  * It sets up the vattr mask and determines whether vfsstat call is needed
1938  * based on the input bitmap.
1939  * Returns nfsv4 status.
1940  */
1941 static nfsstat4
1942 bitmap4_to_attrmask(bitmap4 breq, struct nfs4_svgetit_arg *sargp)
1943 {
1944 	int i;
1945 	uint_t	va_mask;
1946 	struct statvfs64 *sbp = sargp->sbp;
1947 
1948 	sargp->sbp = NULL;
1949 	sargp->flag = 0;
1950 	sargp->rdattr_error = NFS4_OK;
1951 	sargp->mntdfid_set = FALSE;
1952 	if (sargp->cs->vp)
1953 		sargp->xattr = get_fh4_flag(&sargp->cs->fh,
1954 					    FH4_ATTRDIR | FH4_NAMEDATTR);
1955 	else
1956 		sargp->xattr = 0;
1957 
1958 	/*
1959 	 * Set rdattr_error_req to true if return error per
1960 	 * failed entry rather than fail the readdir.
1961 	 */
1962 	if (breq & FATTR4_RDATTR_ERROR_MASK)
1963 		sargp->rdattr_error_req = 1;
1964 	else
1965 		sargp->rdattr_error_req = 0;
1966 
1967 	/*
1968 	 * generate the va_mask
1969 	 * Handle the easy cases first
1970 	 */
1971 	switch (breq) {
1972 	case NFS4_NTOV_ATTR_MASK:
1973 		sargp->vap->va_mask = NFS4_NTOV_ATTR_AT_MASK;
1974 		return (NFS4_OK);
1975 
1976 	case NFS4_FS_ATTR_MASK:
1977 		sargp->vap->va_mask = NFS4_FS_ATTR_AT_MASK;
1978 		sargp->sbp = sbp;
1979 		return (NFS4_OK);
1980 
1981 	case NFS4_NTOV_ATTR_CACHE_MASK:
1982 		sargp->vap->va_mask = NFS4_NTOV_ATTR_CACHE_AT_MASK;
1983 		return (NFS4_OK);
1984 
1985 	case FATTR4_LEASE_TIME_MASK:
1986 		sargp->vap->va_mask = 0;
1987 		return (NFS4_OK);
1988 
1989 	default:
1990 		va_mask = 0;
1991 		for (i = 0; i < nfs4_ntov_map_size; i++) {
1992 			if ((breq & nfs4_ntov_map[i].fbit) &&
1993 							nfs4_ntov_map[i].vbit)
1994 				va_mask |= nfs4_ntov_map[i].vbit;
1995 		}
1996 
1997 		/*
1998 		 * Check is vfsstat is needed
1999 		 */
2000 		if (breq & NFS4_FS_ATTR_MASK)
2001 			sargp->sbp = sbp;
2002 
2003 		sargp->vap->va_mask = va_mask;
2004 		return (NFS4_OK);
2005 	}
2006 	/* NOTREACHED */
2007 }
2008 
2009 /*
2010  * bitmap4_get_sysattrs is called by getattr and readdir.
2011  * It calls both VOP_GETATTR and VFS_STATVFS calls to get the attrs.
2012  * Returns nfsv4 status.
2013  */
2014 static nfsstat4
2015 bitmap4_get_sysattrs(struct nfs4_svgetit_arg *sargp)
2016 {
2017 	int error;
2018 	struct compound_state *cs = sargp->cs;
2019 	vnode_t *vp = cs->vp;
2020 
2021 	if (sargp->sbp != NULL) {
2022 		if (error = VFS_STATVFS(vp->v_vfsp, sargp->sbp)) {
2023 			sargp->sbp = NULL;	/* to identify error */
2024 			return (puterrno4(error));
2025 		}
2026 	}
2027 
2028 	return (rfs4_vop_getattr(vp, sargp->vap, 0, cs->cr));
2029 }
2030 
2031 static void
2032 nfs4_ntov_table_init(struct nfs4_ntov_table *ntovp)
2033 {
2034 	ntovp->na = kmem_zalloc(sizeof (union nfs4_attr_u) * nfs4_ntov_map_size,
2035 			KM_SLEEP);
2036 	ntovp->attrcnt = 0;
2037 	ntovp->vfsstat = FALSE;
2038 }
2039 
2040 static void
2041 nfs4_ntov_table_free(struct nfs4_ntov_table *ntovp,
2042 	struct nfs4_svgetit_arg *sargp)
2043 {
2044 	int i;
2045 	union nfs4_attr_u *na;
2046 	uint8_t *amap;
2047 
2048 	/*
2049 	 * XXX Should do the same checks for whether the bit is set
2050 	 */
2051 	for (i = 0, na = ntovp->na, amap = ntovp->amap;
2052 		i < ntovp->attrcnt; i++, na++, amap++) {
2053 		(void) (*nfs4_ntov_map[*amap].sv_getit)(
2054 			NFS4ATTR_FREEIT, sargp, na);
2055 	}
2056 	if ((sargp->op == NFS4ATTR_SETIT) || (sargp->op == NFS4ATTR_VERIT)) {
2057 		/*
2058 		 * xdr_free for getattr will be done later
2059 		 */
2060 		for (i = 0, na = ntovp->na, amap = ntovp->amap;
2061 			i < ntovp->attrcnt; i++, na++, amap++) {
2062 			xdr_free(nfs4_ntov_map[*amap].xfunc, (caddr_t)na);
2063 		}
2064 	}
2065 	kmem_free(ntovp->na, sizeof (union nfs4_attr_u) * nfs4_ntov_map_size);
2066 }
2067 
2068 /*
2069  * do_rfs4_op_getattr gets the system attrs and converts into fattr4.
2070  */
2071 static nfsstat4
2072 do_rfs4_op_getattr(bitmap4 breq, fattr4 *fattrp,
2073 	struct nfs4_svgetit_arg *sargp)
2074 {
2075 	int error = 0;
2076 	int i, k;
2077 	struct nfs4_ntov_table ntov;
2078 	XDR xdr;
2079 	ulong_t xdr_size;
2080 	char *xdr_attrs;
2081 	nfsstat4 status = NFS4_OK;
2082 	nfsstat4 prev_rdattr_error = sargp->rdattr_error;
2083 	union nfs4_attr_u *na;
2084 	uint8_t *amap;
2085 
2086 	sargp->op = NFS4ATTR_GETIT;
2087 	sargp->flag = 0;
2088 
2089 	fattrp->attrmask = 0;
2090 	/* if no bits requested, then return empty fattr4 */
2091 	if (breq == 0) {
2092 		fattrp->attrlist4_len = 0;
2093 		fattrp->attrlist4 = NULL;
2094 		return (NFS4_OK);
2095 	}
2096 
2097 	/*
2098 	 * return NFS4ERR_INVAL when client requests write-only attrs
2099 	 */
2100 	if (breq & (FATTR4_TIME_ACCESS_SET_MASK | FATTR4_TIME_MODIFY_SET_MASK))
2101 		return (NFS4ERR_INVAL);
2102 
2103 	nfs4_ntov_table_init(&ntov);
2104 	na = ntov.na;
2105 	amap = ntov.amap;
2106 
2107 	/*
2108 	 * Now loop to get or verify the attrs
2109 	 */
2110 	for (i = 0; i < nfs4_ntov_map_size; i++) {
2111 		if (breq & nfs4_ntov_map[i].fbit) {
2112 			if ((*nfs4_ntov_map[i].sv_getit)(
2113 				    NFS4ATTR_SUPPORTED, sargp, NULL) == 0) {
2114 
2115 				error = (*nfs4_ntov_map[i].sv_getit)(
2116 						NFS4ATTR_GETIT, sargp, na);
2117 
2118 				/*
2119 				 * Possible error values:
2120 				 * >0 if sv_getit failed to
2121 				 * get the attr; 0 if succeeded;
2122 				 * <0 if rdattr_error and the
2123 				 * attribute cannot be returned.
2124 				 */
2125 				if (error && !(sargp->rdattr_error_req))
2126 					goto done;
2127 				/*
2128 				 * If error then just for entry
2129 				 */
2130 				if (error == 0) {
2131 					fattrp->attrmask |=
2132 						nfs4_ntov_map[i].fbit;
2133 					*amap++ =
2134 						(uint8_t)nfs4_ntov_map[i].nval;
2135 					na++;
2136 					(ntov.attrcnt)++;
2137 				} else if ((error > 0) &&
2138 					(sargp->rdattr_error == NFS4_OK)) {
2139 					sargp->rdattr_error = puterrno4(error);
2140 				}
2141 				error = 0;
2142 			}
2143 		}
2144 	}
2145 
2146 	/*
2147 	 * If rdattr_error was set after the return value for it was assigned,
2148 	 * update it.
2149 	 */
2150 	if (prev_rdattr_error != sargp->rdattr_error) {
2151 		na = ntov.na;
2152 		amap = ntov.amap;
2153 		for (i = 0; i < ntov.attrcnt; i++, na++, amap++) {
2154 			k = *amap;
2155 			if (k < FATTR4_RDATTR_ERROR) {
2156 				continue;
2157 			}
2158 			if ((k == FATTR4_RDATTR_ERROR) &&
2159 			    ((*nfs4_ntov_map[k].sv_getit)(
2160 				NFS4ATTR_SUPPORTED, sargp, NULL) == 0)) {
2161 
2162 				(void) (*nfs4_ntov_map[k].sv_getit)(
2163 						NFS4ATTR_GETIT, sargp, na);
2164 			}
2165 			break;
2166 		}
2167 	}
2168 
2169 	xdr_size = 0;
2170 	na = ntov.na;
2171 	amap = ntov.amap;
2172 	for (i = 0; i < ntov.attrcnt; i++, na++, amap++) {
2173 		xdr_size += xdr_sizeof(nfs4_ntov_map[*amap].xfunc, na);
2174 	}
2175 
2176 	fattrp->attrlist4_len = xdr_size;
2177 	if (xdr_size) {
2178 		/* freed by rfs4_op_getattr_free() */
2179 		fattrp->attrlist4 = xdr_attrs = kmem_zalloc(xdr_size, KM_SLEEP);
2180 
2181 		xdrmem_create(&xdr, xdr_attrs, xdr_size, XDR_ENCODE);
2182 
2183 		na = ntov.na;
2184 		amap = ntov.amap;
2185 		for (i = 0; i < ntov.attrcnt; i++, na++, amap++) {
2186 			if (!(*nfs4_ntov_map[*amap].xfunc)(&xdr, na)) {
2187 				cmn_err(CE_WARN, "do_rfs4_op_getattr: xdr "
2188 					"encode of attribute %d failed\n",
2189 					*amap);
2190 				status = NFS4ERR_SERVERFAULT;
2191 				break;
2192 			}
2193 		}
2194 		/* xdrmem_destroy(&xdrs); */	/* NO-OP */
2195 	} else {
2196 		fattrp->attrlist4 = NULL;
2197 	}
2198 done:
2199 
2200 	nfs4_ntov_table_free(&ntov, sargp);
2201 
2202 	if (error != 0)
2203 		status = puterrno4(error);
2204 
2205 	return (status);
2206 }
2207 
2208 /* ARGSUSED */
2209 static void
2210 rfs4_op_getattr(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2211 	struct compound_state *cs)
2212 {
2213 	GETATTR4args *args = &argop->nfs_argop4_u.opgetattr;
2214 	GETATTR4res *resp = &resop->nfs_resop4_u.opgetattr;
2215 	struct nfs4_svgetit_arg sarg;
2216 	struct statvfs64 sb;
2217 	nfsstat4 status;
2218 
2219 	if (cs->vp == NULL) {
2220 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2221 		return;
2222 	}
2223 
2224 	if (cs->access == CS_ACCESS_DENIED) {
2225 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
2226 		return;
2227 	}
2228 
2229 	sarg.sbp = &sb;
2230 	sarg.cs = cs;
2231 
2232 	status = bitmap4_to_attrmask(args->attr_request, &sarg);
2233 	if (status == NFS4_OK) {
2234 		status = bitmap4_get_sysattrs(&sarg);
2235 		if (status == NFS4_OK)
2236 			status = do_rfs4_op_getattr(args->attr_request,
2237 				&resp->obj_attributes, &sarg);
2238 	}
2239 	*cs->statusp = resp->status = status;
2240 }
2241 
2242 static void
2243 rfs4_op_getattr_free(nfs_resop4 *resop)
2244 {
2245 	GETATTR4res *resp = &resop->nfs_resop4_u.opgetattr;
2246 
2247 	nfs4_fattr4_free(&resp->obj_attributes);
2248 }
2249 
2250 /* ARGSUSED */
2251 static void
2252 rfs4_op_getfh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2253 	struct compound_state *cs)
2254 {
2255 	GETFH4res *resp = &resop->nfs_resop4_u.opgetfh;
2256 
2257 	if (cs->vp == NULL) {
2258 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2259 		return;
2260 	}
2261 	if (cs->access == CS_ACCESS_DENIED) {
2262 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
2263 		return;
2264 	}
2265 
2266 	resp->object.nfs_fh4_val =
2267 		kmem_alloc(cs->fh.nfs_fh4_len, KM_SLEEP);
2268 	nfs_fh4_copy(&cs->fh, &resp->object);
2269 	*cs->statusp = resp->status = NFS4_OK;
2270 }
2271 
2272 static void
2273 rfs4_op_getfh_free(nfs_resop4 *resop)
2274 {
2275 	GETFH4res *resp = &resop->nfs_resop4_u.opgetfh;
2276 
2277 	if (resp->status == NFS4_OK &&
2278 	    resp->object.nfs_fh4_val != NULL) {
2279 		kmem_free(resp->object.nfs_fh4_val, resp->object.nfs_fh4_len);
2280 		resp->object.nfs_fh4_val = NULL;
2281 		resp->object.nfs_fh4_len = 0;
2282 	}
2283 }
2284 
2285 /*
2286  * illegal: args: void
2287  *	    res : status (NFS4ERR_OP_ILLEGAL)
2288  */
2289 /* ARGSUSED */
2290 static void
2291 rfs4_op_illegal(nfs_argop4 *argop, nfs_resop4 *resop,
2292 	struct svc_req *req, struct compound_state *cs)
2293 {
2294 	ILLEGAL4res *resp = &resop->nfs_resop4_u.opillegal;
2295 
2296 	resop->resop = OP_ILLEGAL;
2297 	*cs->statusp = resp->status = NFS4ERR_OP_ILLEGAL;
2298 }
2299 
2300 /*
2301  * link: args: SAVED_FH: file, CURRENT_FH: target directory
2302  *	 res: status. If success - CURRENT_FH unchanged, return change_info
2303  */
2304 /* ARGSUSED */
2305 static void
2306 rfs4_op_link(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2307 	struct compound_state *cs)
2308 {
2309 	LINK4args *args = &argop->nfs_argop4_u.oplink;
2310 	LINK4res *resp = &resop->nfs_resop4_u.oplink;
2311 	int error;
2312 	vnode_t *vp;
2313 	vnode_t *dvp;
2314 	struct vattr bdva, idva, adva;
2315 	char *nm;
2316 	uint_t  len;
2317 
2318 	/* SAVED_FH: source object */
2319 	vp = cs->saved_vp;
2320 	if (vp == NULL) {
2321 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2322 		return;
2323 	}
2324 
2325 	/* CURRENT_FH: target directory */
2326 	dvp = cs->vp;
2327 	if (dvp == NULL) {
2328 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2329 		return;
2330 	}
2331 
2332 	/*
2333 	 * If there is a non-shared filesystem mounted on this vnode,
2334 	 * do not allow to link any file in this directory.
2335 	 */
2336 	if (vn_ismntpt(dvp)) {
2337 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
2338 		return;
2339 	}
2340 
2341 	if (cs->access == CS_ACCESS_DENIED) {
2342 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
2343 		return;
2344 	}
2345 
2346 	/* Check source object's type validity */
2347 	if (vp->v_type == VDIR) {
2348 		*cs->statusp = resp->status = NFS4ERR_ISDIR;
2349 		return;
2350 	}
2351 
2352 	/* Check target directory's type */
2353 	if (dvp->v_type != VDIR) {
2354 		*cs->statusp = resp->status = NFS4ERR_NOTDIR;
2355 		return;
2356 	}
2357 
2358 	if (cs->saved_exi != cs->exi) {
2359 		*cs->statusp = resp->status = NFS4ERR_XDEV;
2360 		return;
2361 	}
2362 
2363 	if (!utf8_dir_verify(&args->newname)) {
2364 		*cs->statusp = resp->status = NFS4ERR_INVAL;
2365 		return;
2366 	}
2367 
2368 	nm = utf8_to_fn(&args->newname, &len, NULL);
2369 	if (nm == NULL) {
2370 		*cs->statusp = resp->status = NFS4ERR_INVAL;
2371 		return;
2372 	}
2373 
2374 	if (len > MAXNAMELEN) {
2375 		*cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
2376 		kmem_free(nm, len);
2377 		return;
2378 	}
2379 
2380 	if (rdonly4(cs->exi, cs->vp, req)) {
2381 		*cs->statusp = resp->status = NFS4ERR_ROFS;
2382 		kmem_free(nm, len);
2383 		return;
2384 	}
2385 
2386 	/* Get "before" change value */
2387 	bdva.va_mask = AT_CTIME|AT_SEQ;
2388 	error = VOP_GETATTR(dvp, &bdva, 0, cs->cr);
2389 	if (error) {
2390 		*cs->statusp = resp->status = puterrno4(error);
2391 		kmem_free(nm, len);
2392 		return;
2393 	}
2394 
2395 	NFS4_SET_FATTR4_CHANGE(resp->cinfo.before, bdva.va_ctime)
2396 
2397 	error = VOP_LINK(dvp, vp, nm, cs->cr);
2398 
2399 	kmem_free(nm, len);
2400 
2401 	/*
2402 	 * Get the initial "after" sequence number, if it fails, set to zero
2403 	 */
2404 	idva.va_mask = AT_SEQ;
2405 	if (VOP_GETATTR(dvp, &idva, 0, cs->cr))
2406 		idva.va_seq = 0;
2407 
2408 	/*
2409 	 * Force modified data and metadata out to stable storage.
2410 	 */
2411 	(void) VOP_FSYNC(vp, FNODSYNC, cs->cr);
2412 	(void) VOP_FSYNC(dvp, 0, cs->cr);
2413 
2414 	if (error) {
2415 		*cs->statusp = resp->status = puterrno4(error);
2416 		return;
2417 	}
2418 
2419 	/*
2420 	 * Get "after" change value, if it fails, simply return the
2421 	 * before value.
2422 	 */
2423 	adva.va_mask = AT_CTIME|AT_SEQ;
2424 	if (VOP_GETATTR(dvp, &adva, 0, cs->cr)) {
2425 		adva.va_ctime = bdva.va_ctime;
2426 		adva.va_seq = 0;
2427 	}
2428 
2429 	NFS4_SET_FATTR4_CHANGE(resp->cinfo.after, adva.va_ctime)
2430 
2431 	/*
2432 	 * The cinfo.atomic = TRUE only if we have
2433 	 * non-zero va_seq's, and it has incremented by exactly one
2434 	 * during the VOP_LINK and it didn't change during the VOP_FSYNC.
2435 	 */
2436 	if (bdva.va_seq && idva.va_seq && adva.va_seq &&
2437 			idva.va_seq == (bdva.va_seq + 1) &&
2438 			idva.va_seq == adva.va_seq)
2439 		resp->cinfo.atomic = TRUE;
2440 	else
2441 		resp->cinfo.atomic = FALSE;
2442 
2443 	*cs->statusp = resp->status = NFS4_OK;
2444 }
2445 
2446 /*
2447  * Used by rfs4_op_lookup and rfs4_op_lookupp to do the actual work.
2448  */
2449 
2450 /* ARGSUSED */
2451 static nfsstat4
2452 do_rfs4_op_lookup(char *nm, uint_t buflen, struct svc_req *req,
2453 	struct compound_state *cs)
2454 {
2455 	int error;
2456 	int different_export = 0;
2457 	vnode_t *vp, *tvp, *pre_tvp = NULL, *oldvp = NULL;
2458 	struct exportinfo *exi = NULL, *pre_exi = NULL;
2459 	nfsstat4 stat;
2460 	fid_t fid;
2461 	int attrdir, dotdot, walk;
2462 	bool_t is_newvp = FALSE;
2463 
2464 	if (cs->vp->v_flag & V_XATTRDIR) {
2465 		attrdir = 1;
2466 		ASSERT(get_fh4_flag(&cs->fh, FH4_ATTRDIR));
2467 	} else {
2468 		attrdir = 0;
2469 		ASSERT(! get_fh4_flag(&cs->fh, FH4_ATTRDIR));
2470 	}
2471 
2472 	dotdot = (nm[0] == '.' && nm[1] == '.' && nm[2] == '\0');
2473 
2474 	/*
2475 	 * If dotdotting, then need to check whether it's
2476 	 * above the root of a filesystem, or above an
2477 	 * export point.
2478 	 */
2479 	if (dotdot) {
2480 
2481 		/*
2482 		 * If dotdotting at the root of a filesystem, then
2483 		 * need to traverse back to the mounted-on filesystem
2484 		 * and do the dotdot lookup there.
2485 		 */
2486 		if (cs->vp->v_flag & VROOT) {
2487 
2488 			/*
2489 			 * If at the system root, then can
2490 			 * go up no further.
2491 			 */
2492 			if (VN_CMP(cs->vp, rootdir))
2493 				return (puterrno4(ENOENT));
2494 
2495 			/*
2496 			 * Traverse back to the mounted-on filesystem
2497 			 */
2498 			cs->vp = untraverse(cs->vp);
2499 
2500 			/*
2501 			 * Set the different_export flag so we remember
2502 			 * to pick up a new exportinfo entry for
2503 			 * this new filesystem.
2504 			 */
2505 			different_export = 1;
2506 		} else {
2507 
2508 			/*
2509 			 * If dotdotting above an export point then set
2510 			 * the different_export to get new export info.
2511 			 */
2512 			different_export = nfs_exported(cs->exi, cs->vp);
2513 		}
2514 	}
2515 
2516 	error = VOP_LOOKUP(cs->vp, nm, &vp, NULL, 0, NULL, cs->cr);
2517 	if (error)
2518 		return (puterrno4(error));
2519 
2520 	/*
2521 	 * If the vnode is in a pseudo filesystem, check whether it is visible.
2522 	 *
2523 	 * XXX if the vnode is a symlink and it is not visible in
2524 	 * a pseudo filesystem, return ENOENT (not following symlink).
2525 	 * V4 client can not mount such symlink. This is a regression
2526 	 * from V2/V3.
2527 	 *
2528 	 * In the same exported filesystem, if the security flavor used
2529 	 * is not an explicitly shared flavor, limit the view to the visible
2530 	 * list entries only. This is not a WRONGSEC case because it's already
2531 	 * checked via PUTROOTFH/PUTPUBFH or PUTFH.
2532 	 */
2533 	if (!different_export &&
2534 	    (PSEUDO(cs->exi) || ! is_exported_sec(cs->nfsflavor, cs->exi) ||
2535 	    cs->access & CS_ACCESS_LIMITED)) {
2536 		if (! nfs_visible(cs->exi, vp, &different_export)) {
2537 			VN_RELE(vp);
2538 			return (puterrno4(ENOENT));
2539 		}
2540 	}
2541 
2542 	/*
2543 	 * If it's a mountpoint, then traverse it.
2544 	 */
2545 	if (vn_ismntpt(vp)) {
2546 		pre_exi = cs->exi;	/* save pre-traversed exportinfo */
2547 		pre_tvp = vp;		/* save pre-traversed vnode	*/
2548 
2549 		/*
2550 		 * hold pre_tvp to counteract rele by traverse.  We will
2551 		 * need pre_tvp below if checkexport4 fails
2552 		 */
2553 		VN_HOLD(pre_tvp);
2554 		tvp = vp;
2555 		if ((error = traverse(&tvp)) != 0) {
2556 			VN_RELE(vp);
2557 			VN_RELE(pre_tvp);
2558 			return (puterrno4(error));
2559 		}
2560 		vp = tvp;
2561 		different_export = 1;
2562 	} else if (vp->v_vfsp != cs->vp->v_vfsp) {
2563 		/*
2564 		 * The vfsp comparison is to handle the case where
2565 		 * a LOFS mount is shared.  lo_lookup traverses mount points,
2566 		 * and NFS is unaware of local fs transistions because
2567 		 * v_vfsmountedhere isn't set.  For this special LOFS case,
2568 		 * the dir and the obj returned by lookup will have different
2569 		 * vfs ptrs.
2570 		 */
2571 		different_export = 1;
2572 	}
2573 
2574 	if (different_export) {
2575 
2576 		bzero(&fid, sizeof (fid));
2577 		fid.fid_len = MAXFIDSZ;
2578 		error = vop_fid_pseudo(vp, &fid);
2579 		if (error) {
2580 			VN_RELE(vp);
2581 			if (pre_tvp)
2582 				VN_RELE(pre_tvp);
2583 			return (puterrno4(error));
2584 		}
2585 
2586 		if (dotdot)
2587 			exi = nfs_vptoexi(NULL, vp, cs->cr, &walk, NULL, TRUE);
2588 		else
2589 			exi = checkexport4(&vp->v_vfsp->vfs_fsid, &fid, vp);
2590 
2591 		if (exi == NULL) {
2592 			if (pre_tvp) {
2593 				/*
2594 				 * If this vnode is a mounted-on vnode,
2595 				 * but the mounted-on file system is not
2596 				 * exported, send back the filehandle for
2597 				 * the mounted-on vnode, not the root of
2598 				 * the mounted-on file system.
2599 				 */
2600 				VN_RELE(vp);
2601 				vp = pre_tvp;
2602 				exi = pre_exi;
2603 			} else {
2604 				VN_RELE(vp);
2605 				return (puterrno4(EACCES));
2606 			}
2607 		} else if (pre_tvp) {
2608 			/* we're done with pre_tvp now. release extra hold */
2609 			VN_RELE(pre_tvp);
2610 		}
2611 
2612 		cs->exi = exi;
2613 
2614 		/*
2615 		 * Now we do a checkauth4. The reason is that
2616 		 * this client/user may not have access to the new
2617 		 * exported file system, and if he does,
2618 		 * the client/user may be mapped to a different uid.
2619 		 *
2620 		 * We start with a new cr, because the checkauth4 done
2621 		 * in the PUT*FH operation over wrote the cred's uid,
2622 		 * gid, etc, and we want the real thing before calling
2623 		 * checkauth4()
2624 		 */
2625 		crfree(cs->cr);
2626 		cs->cr = crdup(cs->basecr);
2627 
2628 		if (cs->vp)
2629 			oldvp = cs->vp;
2630 		cs->vp = vp;
2631 		is_newvp = TRUE;
2632 
2633 		stat = call_checkauth4(cs, req);
2634 		if (stat != NFS4_OK) {
2635 			VN_RELE(cs->vp);
2636 			cs->vp = oldvp;
2637 			return (stat);
2638 		}
2639 	}
2640 
2641 	/*
2642 	 * After various NFS checks, do a label check on the path
2643 	 * component. The label on this path should either be the
2644 	 * global zone's label or a zone's label. We are only
2645 	 * interested in the zone's label because exported files
2646 	 * in global zone is accessible (though read-only) to
2647 	 * clients. The exportability/visibility check is already
2648 	 * done before reaching this code.
2649 	 */
2650 	if (is_system_labeled()) {
2651 		bslabel_t *clabel;
2652 
2653 		ASSERT(req->rq_label != NULL);
2654 		clabel = req->rq_label;
2655 		DTRACE_PROBE2(tx__rfs4__log__info__oplookup__clabel, char *,
2656 		    "got client label from request(1)", struct svc_req *, req);
2657 
2658 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
2659 			if (!do_rfs4_label_check(clabel, vp, DOMINANCE_CHECK)) {
2660 				error = EACCES;
2661 				goto err_out;
2662 			}
2663 		} else {
2664 			/*
2665 			 * We grant access to admin_low label clients
2666 			 * only if the client is trusted, i.e. also
2667 			 * running Solaris Trusted Extension.
2668 			 */
2669 			struct sockaddr	*ca;
2670 			int		addr_type;
2671 			void		*ipaddr;
2672 			tsol_tpc_t	*tp;
2673 
2674 			ca = (struct sockaddr *)svc_getrpccaller(
2675 			    req->rq_xprt)->buf;
2676 			if (ca->sa_family == AF_INET) {
2677 				addr_type = IPV4_VERSION;
2678 				ipaddr = &((struct sockaddr_in *)ca)->sin_addr;
2679 			} else if (ca->sa_family == AF_INET6) {
2680 				addr_type = IPV6_VERSION;
2681 				ipaddr = &((struct sockaddr_in6 *)
2682 				    ca)->sin6_addr;
2683 			}
2684 			tp = find_tpc(ipaddr, addr_type, B_FALSE);
2685 			if (tp == NULL || tp->tpc_tp.tp_doi !=
2686 			    l_admin_low->tsl_doi || tp->tpc_tp.host_type !=
2687 			    SUN_CIPSO) {
2688 				error = EACCES;
2689 				goto err_out;
2690 			}
2691 		}
2692 	}
2693 
2694 	error = makefh4(&cs->fh, vp, cs->exi);
2695 
2696 err_out:
2697 	if (error) {
2698 		if (is_newvp) {
2699 			VN_RELE(cs->vp);
2700 			cs->vp = oldvp;
2701 		} else
2702 			VN_RELE(vp);
2703 		return (puterrno4(error));
2704 	}
2705 
2706 	if (!is_newvp) {
2707 		if (cs->vp)
2708 			VN_RELE(cs->vp);
2709 		cs->vp = vp;
2710 	} else if (oldvp)
2711 		VN_RELE(oldvp);
2712 
2713 	/*
2714 	 * if did lookup on attrdir and didn't lookup .., set named
2715 	 * attr fh flag
2716 	 */
2717 	if (attrdir && ! dotdot)
2718 		set_fh4_flag(&cs->fh, FH4_NAMEDATTR);
2719 
2720 	/* Assume false for now, open proc will set this */
2721 	cs->mandlock = FALSE;
2722 
2723 	return (NFS4_OK);
2724 }
2725 
2726 /* ARGSUSED */
2727 static void
2728 rfs4_op_lookup(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2729 	struct compound_state *cs)
2730 {
2731 	LOOKUP4args *args = &argop->nfs_argop4_u.oplookup;
2732 	LOOKUP4res *resp = &resop->nfs_resop4_u.oplookup;
2733 	char *nm;
2734 	uint_t len;
2735 
2736 	if (cs->vp == NULL) {
2737 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2738 		return;
2739 	}
2740 
2741 	if (cs->vp->v_type == VLNK) {
2742 		*cs->statusp = resp->status = NFS4ERR_SYMLINK;
2743 		return;
2744 	}
2745 
2746 	if (cs->vp->v_type != VDIR) {
2747 		*cs->statusp = resp->status = NFS4ERR_NOTDIR;
2748 		return;
2749 	}
2750 
2751 	if (!utf8_dir_verify(&args->objname)) {
2752 		*cs->statusp = resp->status = NFS4ERR_INVAL;
2753 		return;
2754 	}
2755 
2756 	nm = utf8_to_str(&args->objname, &len, NULL);
2757 	if (nm == NULL) {
2758 		*cs->statusp = resp->status = NFS4ERR_INVAL;
2759 		return;
2760 	}
2761 
2762 	if (len > MAXNAMELEN) {
2763 		*cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
2764 		kmem_free(nm, len);
2765 		return;
2766 	}
2767 
2768 	*cs->statusp = resp->status = do_rfs4_op_lookup(nm, len, req, cs);
2769 
2770 	kmem_free(nm, len);
2771 }
2772 
2773 /* ARGSUSED */
2774 static void
2775 rfs4_op_lookupp(nfs_argop4 *args, nfs_resop4 *resop, struct svc_req *req,
2776 	struct compound_state *cs)
2777 {
2778 	LOOKUPP4res *resp = &resop->nfs_resop4_u.oplookupp;
2779 
2780 	if (cs->vp == NULL) {
2781 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2782 		return;
2783 	}
2784 
2785 	if (cs->vp->v_type != VDIR) {
2786 		*cs->statusp = resp->status = NFS4ERR_NOTDIR;
2787 		return;
2788 	}
2789 
2790 	*cs->statusp = resp->status = do_rfs4_op_lookup("..", 3, req, cs);
2791 
2792 	/*
2793 	 * From NFSV4 Specification, LOOKUPP should not check for
2794 	 * NFS4ERR_WRONGSEC. Retrun NFS4_OK instead.
2795 	 */
2796 	if (resp->status == NFS4ERR_WRONGSEC) {
2797 		*cs->statusp = resp->status = NFS4_OK;
2798 	}
2799 }
2800 
2801 
2802 /*ARGSUSED2*/
2803 static void
2804 rfs4_op_openattr(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2805 	struct compound_state *cs)
2806 {
2807 	OPENATTR4args	*args = &argop->nfs_argop4_u.opopenattr;
2808 	OPENATTR4res	*resp = &resop->nfs_resop4_u.opopenattr;
2809 	vnode_t		*avp = NULL;
2810 	int		lookup_flags = LOOKUP_XATTR, error;
2811 	int		exp_ro = 0;
2812 
2813 	if (cs->vp == NULL) {
2814 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2815 		return;
2816 	}
2817 
2818 	/*
2819 	 * Make a couple of checks made by copen()
2820 	 *
2821 	 * Check to make sure underlying fs supports xattrs.  This
2822 	 * is required because solaris filesystem implementations
2823 	 * (UFS/TMPFS) don't enforce the noxattr mount option
2824 	 * in VOP_LOOKUP(LOOKUP_XATTR).  If fs doesn't support this
2825 	 * pathconf cmd or if fs supports cmd but doesn't claim
2826 	 * support for xattr, return NOTSUPP.  It would be better
2827 	 * to use VOP_PATHCONF( _PC_XATTR_ENABLED) for this; however,
2828 	 * that cmd is not available to VOP_PATHCONF interface
2829 	 * (it's only implemented inside pathconf syscall)...
2830 	 *
2831 	 * Verify permission to put attributes on files (access
2832 	 * checks from copen).
2833 	 */
2834 
2835 	if ((cs->vp->v_vfsp->vfs_flag & VFS_XATTR) == 0) {
2836 		error = ENOTSUP;
2837 		goto error_out;
2838 	}
2839 
2840 	if ((VOP_ACCESS(cs->vp, VREAD, 0, cs->cr) != 0) &&
2841 	    (VOP_ACCESS(cs->vp, VWRITE, 0, cs->cr) != 0) &&
2842 	    (VOP_ACCESS(cs->vp, VEXEC, 0, cs->cr) != 0)) {
2843 		error = EACCES;
2844 		goto error_out;
2845 	}
2846 
2847 	/*
2848 	 * The CREATE_XATTR_DIR VOP flag cannot be specified if
2849 	 * the file system is exported read-only -- regardless of
2850 	 * createdir flag.  Otherwise the attrdir would be created
2851 	 * (assuming server fs isn't mounted readonly locally).  If
2852 	 * VOP_LOOKUP returns ENOENT in this case, the error will
2853 	 * be translated into EROFS.  ENOSYS is mapped to ENOTSUP
2854 	 * because specfs has no VOP_LOOKUP op, so the macro would
2855 	 * return ENOSYS.  EINVAL is returned by all (current)
2856 	 * Solaris file system implementations when any of their
2857 	 * restrictions are violated (xattr(dir) can't have xattrdir).
2858 	 * Returning NOTSUPP is more appropriate in this case
2859 	 * because the object will never be able to have an attrdir.
2860 	 */
2861 	if (args->createdir && ! (exp_ro = rdonly4(cs->exi, cs->vp, req)))
2862 		lookup_flags |= CREATE_XATTR_DIR;
2863 
2864 	error = VOP_LOOKUP(cs->vp, "", &avp, NULL, lookup_flags, NULL, cs->cr);
2865 
2866 	if (error) {
2867 		if (error == ENOENT && args->createdir && exp_ro)
2868 			error = EROFS;
2869 		else if (error == EINVAL || error == ENOSYS)
2870 			error = ENOTSUP;
2871 		goto error_out;
2872 	}
2873 
2874 	ASSERT(avp->v_flag & V_XATTRDIR);
2875 
2876 	error = makefh4(&cs->fh, avp, cs->exi);
2877 
2878 	if (error) {
2879 		VN_RELE(avp);
2880 		goto error_out;
2881 	}
2882 
2883 	VN_RELE(cs->vp);
2884 	cs->vp = avp;
2885 
2886 	/*
2887 	 * There is no requirement for an attrdir fh flag
2888 	 * because the attrdir has a vnode flag to distinguish
2889 	 * it from regular (non-xattr) directories.  The
2890 	 * FH4_ATTRDIR flag is set for future sanity checks.
2891 	 */
2892 	set_fh4_flag(&cs->fh, FH4_ATTRDIR);
2893 	*cs->statusp = resp->status = NFS4_OK;
2894 	return;
2895 
2896 error_out:
2897 
2898 	*cs->statusp = resp->status = puterrno4(error);
2899 }
2900 
2901 static int
2902 do_io(int direction, vnode_t *vp, struct uio *uio, int ioflag, cred_t *cred)
2903 {
2904 	int error;
2905 	int i;
2906 	clock_t delaytime;
2907 	caller_context_t ct;
2908 
2909 	delaytime = MSEC_TO_TICK_ROUNDUP(rfs4_lock_delay);
2910 
2911 	/*
2912 	 * Don't block on mandatory locks. If this routine returns
2913 	 * EAGAIN, the caller should return NFS4ERR_LOCKED.
2914 	 */
2915 	uio->uio_fmode = FNONBLOCK;
2916 
2917 	ct.cc_sysid = 0;
2918 	ct.cc_pid = 0;
2919 	ct.cc_caller_id = nfs4_srv_caller_id;
2920 
2921 	for (i = 0; i < rfs4_maxlock_tries; i++) {
2922 
2923 
2924 		if (direction == FREAD) {
2925 			(void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, &ct);
2926 			error = VOP_READ(vp, uio, ioflag, cred, &ct);
2927 			VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
2928 		} else {
2929 			(void) VOP_RWLOCK(vp, V_WRITELOCK_TRUE, &ct);
2930 			error = VOP_WRITE(vp, uio, ioflag, cred, &ct);
2931 			VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, &ct);
2932 		}
2933 
2934 		if (error != EAGAIN)
2935 			break;
2936 
2937 		if (i < rfs4_maxlock_tries - 1) {
2938 			delay(delaytime);
2939 			delaytime *= 2;
2940 		}
2941 	}
2942 
2943 	return (error);
2944 }
2945 
2946 /* ARGSUSED */
2947 static void
2948 rfs4_op_read(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2949 	struct compound_state *cs)
2950 {
2951 	READ4args *args = &argop->nfs_argop4_u.opread;
2952 	READ4res *resp = &resop->nfs_resop4_u.opread;
2953 	int error;
2954 	int verror;
2955 	vnode_t *vp;
2956 	struct vattr va;
2957 	struct iovec iov;
2958 	struct uio uio;
2959 	u_offset_t offset;
2960 	bool_t *deleg = &cs->deleg;
2961 	nfsstat4 stat;
2962 	int in_crit = 0;
2963 	mblk_t *mp;
2964 	int alloc_err = 0;
2965 
2966 	vp = cs->vp;
2967 	if (vp == NULL) {
2968 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2969 		return;
2970 	}
2971 	if (cs->access == CS_ACCESS_DENIED) {
2972 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
2973 		return;
2974 	}
2975 
2976 	/*
2977 	 * Enter the critical region before calling VOP_RWLOCK
2978 	 * to avoid a deadlock with write requests.
2979 	 */
2980 	if (nbl_need_check(vp)) {
2981 		nbl_start_crit(vp, RW_READER);
2982 		in_crit = 1;
2983 		if (nbl_conflict(vp, NBL_READ, args->offset, args->count, 0)) {
2984 			*cs->statusp = resp->status = NFS4ERR_LOCKED;
2985 			goto out;
2986 		}
2987 	}
2988 
2989 	if ((stat = rfs4_check_stateid(FREAD, vp, &args->stateid, FALSE,
2990 					deleg, TRUE)) != NFS4_OK) {
2991 		*cs->statusp = resp->status = stat;
2992 		goto out;
2993 	}
2994 
2995 	va.va_mask = AT_MODE|AT_SIZE|AT_UID;
2996 	verror = VOP_GETATTR(vp, &va, 0, cs->cr);
2997 
2998 	/*
2999 	 * If we can't get the attributes, then we can't do the
3000 	 * right access checking.  So, we'll fail the request.
3001 	 */
3002 	if (verror) {
3003 		*cs->statusp = resp->status = puterrno4(verror);
3004 		goto out;
3005 	}
3006 
3007 	if (vp->v_type != VREG) {
3008 		*cs->statusp = resp->status =
3009 			((vp->v_type == VDIR) ? NFS4ERR_ISDIR : NFS4ERR_INVAL);
3010 		goto out;
3011 	}
3012 
3013 	if (crgetuid(cs->cr) != va.va_uid &&
3014 	    (error = VOP_ACCESS(vp, VREAD, 0, cs->cr)) &&
3015 	    (error = VOP_ACCESS(vp, VEXEC, 0, cs->cr))) {
3016 		*cs->statusp = resp->status = puterrno4(error);
3017 		goto out;
3018 	}
3019 
3020 	if (MANDLOCK(vp, va.va_mode)) { /* XXX - V4 supports mand locking */
3021 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
3022 		goto out;
3023 	}
3024 
3025 	offset = args->offset;
3026 	if (offset >= va.va_size) {
3027 		*cs->statusp = resp->status = NFS4_OK;
3028 		resp->eof = TRUE;
3029 		resp->data_len = 0;
3030 		resp->data_val = NULL;
3031 		resp->mblk = NULL;
3032 		*cs->statusp = resp->status = NFS4_OK;
3033 		goto out;
3034 	}
3035 
3036 	if (args->count == 0) {
3037 		*cs->statusp = resp->status = NFS4_OK;
3038 		resp->eof = FALSE;
3039 		resp->data_len = 0;
3040 		resp->data_val = NULL;
3041 		resp->mblk = NULL;
3042 		goto out;
3043 	}
3044 
3045 	/*
3046 	 * Do not allocate memory more than maximum allowed
3047 	 * transfer size
3048 	 */
3049 	if (args->count > rfs4_tsize(req))
3050 		args->count = rfs4_tsize(req);
3051 
3052 	/*
3053 	 * mp will contain the data to be sent out in the read reply.
3054 	 * It will be freed after the reply has been sent.
3055 	 * Let's roundup the data to a BYTES_PER_XDR_UNIT multiple,
3056 	 * so that the call to xdrmblk_putmblk() never fails.
3057 	 * If the first alloc of the requested size fails, then
3058 	 * decrease the size to something more reasonable and wait
3059 	 * for the allocation to occur.
3060 	 */
3061 	mp = allocb(RNDUP(args->count), BPRI_MED);
3062 	if (mp == NULL) {
3063 		if (args->count > MAXBSIZE)
3064 			args->count = MAXBSIZE;
3065 		mp = allocb_wait(RNDUP(args->count), BPRI_MED,
3066 				STR_NOSIG, &alloc_err);
3067 	}
3068 	ASSERT(mp != NULL);
3069 	ASSERT(alloc_err == 0);
3070 
3071 	iov.iov_base = (caddr_t)mp->b_datap->db_base;
3072 	iov.iov_len = args->count;
3073 	uio.uio_iov = &iov;
3074 	uio.uio_iovcnt = 1;
3075 	uio.uio_segflg = UIO_SYSSPACE;
3076 	uio.uio_extflg = UIO_COPY_CACHED;
3077 	uio.uio_loffset = args->offset;
3078 	uio.uio_resid = args->count;
3079 
3080 	error = do_io(FREAD, vp, &uio, 0, cs->cr);
3081 
3082 	va.va_mask = AT_SIZE;
3083 	verror = VOP_GETATTR(vp, &va, 0, cs->cr);
3084 
3085 	if (error) {
3086 		freeb(mp);
3087 		*cs->statusp = resp->status = puterrno4(error);
3088 		goto out;
3089 	}
3090 
3091 	*cs->statusp = resp->status = NFS4_OK;
3092 
3093 	ASSERT(uio.uio_resid >= 0);
3094 	resp->data_len = args->count - uio.uio_resid;
3095 	resp->data_val = (char *)mp->b_datap->db_base;
3096 	resp->mblk = mp;
3097 
3098 	if (!verror && offset + resp->data_len == va.va_size)
3099 		resp->eof = TRUE;
3100 	else
3101 		resp->eof = FALSE;
3102 
3103 out:
3104 	if (in_crit)
3105 		nbl_end_crit(vp);
3106 }
3107 
3108 static void
3109 rfs4_op_read_free(nfs_resop4 *resop)
3110 {
3111 	READ4res *resp = &resop->nfs_resop4_u.opread;
3112 
3113 	if (resp->status == NFS4_OK && resp->mblk != NULL) {
3114 		freeb(resp->mblk);
3115 		resp->mblk = NULL;
3116 		resp->data_val = NULL;
3117 		resp->data_len = 0;
3118 	}
3119 }
3120 
3121 static void
3122 rfs4_op_readdir_free(nfs_resop4 *resop)
3123 {
3124 	READDIR4res *resp = &resop->nfs_resop4_u.opreaddir;
3125 
3126 	if (resp->status == NFS4_OK && resp->mblk != NULL) {
3127 		freeb(resp->mblk);
3128 		resp->mblk = NULL;
3129 		resp->data_len = 0;
3130 	}
3131 }
3132 
3133 
3134 /* ARGSUSED */
3135 static void
3136 rfs4_op_putpubfh(nfs_argop4 *args, nfs_resop4 *resop, struct svc_req *req,
3137 	struct compound_state *cs)
3138 {
3139 	PUTPUBFH4res *resp = &resop->nfs_resop4_u.opputpubfh;
3140 	int error;
3141 	vnode_t *vp;
3142 	struct exportinfo *exi, *sav_exi;
3143 	nfs_fh4_fmt_t *fh_fmtp;
3144 
3145 	if (cs->vp) {
3146 		VN_RELE(cs->vp);
3147 		cs->vp = NULL;
3148 	}
3149 
3150 	if (cs->cr)
3151 		crfree(cs->cr);
3152 
3153 	cs->cr = crdup(cs->basecr);
3154 
3155 	vp = exi_public->exi_vp;
3156 	if (vp == NULL) {
3157 		*cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
3158 		return;
3159 	}
3160 
3161 	error = makefh4(&cs->fh, vp, exi_public);
3162 	if (error != 0) {
3163 		*cs->statusp = resp->status = puterrno4(error);
3164 		return;
3165 	}
3166 	sav_exi = cs->exi;
3167 	if (exi_public == exi_root) {
3168 		/*
3169 		 * No filesystem is actually shared public, so we default
3170 		 * to exi_root. In this case, we must check whether root
3171 		 * is exported.
3172 		 */
3173 		fh_fmtp = (nfs_fh4_fmt_t *)cs->fh.nfs_fh4_val;
3174 
3175 		/*
3176 		 * if root filesystem is exported, the exportinfo struct that we
3177 		 * should use is what checkexport4 returns, because root_exi is
3178 		 * actually a mostly empty struct.
3179 		 */
3180 		exi = checkexport4(&fh_fmtp->fh4_fsid,
3181 			(fid_t *)&fh_fmtp->fh4_xlen, NULL);
3182 		cs->exi = ((exi != NULL) ? exi : exi_public);
3183 	} else {
3184 		/*
3185 		 * it's a properly shared filesystem
3186 		 */
3187 		cs->exi = exi_public;
3188 	}
3189 
3190 	VN_HOLD(vp);
3191 	cs->vp = vp;
3192 
3193 	if ((resp->status = call_checkauth4(cs, req)) != NFS4_OK) {
3194 		VN_RELE(cs->vp);
3195 		cs->vp = NULL;
3196 		cs->exi = sav_exi;
3197 		return;
3198 	}
3199 
3200 	*cs->statusp = resp->status = NFS4_OK;
3201 }
3202 
3203 /*
3204  * XXX - issue with put*fh operations. Suppose /export/home is exported.
3205  * Suppose an NFS client goes to mount /export/home/joe. If /export, home,
3206  * or joe have restrictive search permissions, then we shouldn't let
3207  * the client get a file handle. This is easy to enforce. However, we
3208  * don't know what security flavor should be used until we resolve the
3209  * path name. Another complication is uid mapping. If root is
3210  * the user, then it will be mapped to the anonymous user by default,
3211  * but we won't know that till we've resolved the path name. And we won't
3212  * know what the anonymous user is.
3213  * Luckily, SECINFO is specified to take a full filename.
3214  * So what we will have to in rfs4_op_lookup is check that flavor of
3215  * the target object matches that of the request, and if root was the
3216  * caller, check for the root= and anon= options, and if necessary,
3217  * repeat the lookup using the right cred_t. But that's not done yet.
3218  */
3219 /* ARGSUSED */
3220 static void
3221 rfs4_op_putfh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3222 	struct compound_state *cs)
3223 {
3224 	PUTFH4args *args = &argop->nfs_argop4_u.opputfh;
3225 	PUTFH4res *resp = &resop->nfs_resop4_u.opputfh;
3226 	nfs_fh4_fmt_t *fh_fmtp;
3227 
3228 	if (cs->vp) {
3229 		VN_RELE(cs->vp);
3230 		cs->vp = NULL;
3231 	}
3232 
3233 	if (cs->cr) {
3234 		crfree(cs->cr);
3235 		cs->cr = NULL;
3236 	}
3237 
3238 
3239 	if (args->object.nfs_fh4_len < NFS_FH4_LEN) {
3240 		*cs->statusp = resp->status = NFS4ERR_BADHANDLE;
3241 		return;
3242 	}
3243 
3244 	fh_fmtp = (nfs_fh4_fmt_t *)args->object.nfs_fh4_val;
3245 	cs->exi = checkexport4(&fh_fmtp->fh4_fsid, (fid_t *)&fh_fmtp->fh4_xlen,
3246 				NULL);
3247 
3248 	if (cs->exi == NULL) {
3249 		*cs->statusp = resp->status = NFS4ERR_STALE;
3250 		return;
3251 	}
3252 
3253 	cs->cr = crdup(cs->basecr);
3254 
3255 	ASSERT(cs->cr != NULL);
3256 
3257 	if (! (cs->vp = nfs4_fhtovp(&args->object, cs->exi, &resp->status))) {
3258 		*cs->statusp = resp->status;
3259 		return;
3260 	}
3261 
3262 	if ((resp->status = call_checkauth4(cs, req)) != NFS4_OK) {
3263 		VN_RELE(cs->vp);
3264 		cs->vp = NULL;
3265 		return;
3266 	}
3267 
3268 	nfs_fh4_copy(&args->object, &cs->fh);
3269 	*cs->statusp = resp->status = NFS4_OK;
3270 	cs->deleg = FALSE;
3271 }
3272 
3273 /* ARGSUSED */
3274 static void
3275 rfs4_op_putrootfh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3276 	struct compound_state *cs)
3277 
3278 {
3279 	PUTROOTFH4res *resp = &resop->nfs_resop4_u.opputrootfh;
3280 	int error;
3281 	fid_t fid;
3282 	struct exportinfo *exi, *sav_exi;
3283 
3284 	if (cs->vp) {
3285 		VN_RELE(cs->vp);
3286 		cs->vp = NULL;
3287 	}
3288 
3289 	if (cs->cr)
3290 		crfree(cs->cr);
3291 
3292 	cs->cr = crdup(cs->basecr);
3293 
3294 	/*
3295 	 * Using rootdir, the system root vnode,
3296 	 * get its fid.
3297 	 */
3298 	bzero(&fid, sizeof (fid));
3299 	fid.fid_len = MAXFIDSZ;
3300 	error = vop_fid_pseudo(rootdir, &fid);
3301 	if (error != 0) {
3302 		*cs->statusp = resp->status = puterrno4(error);
3303 		return;
3304 	}
3305 
3306 	/*
3307 	 * Then use the root fsid & fid it to find out if it's exported
3308 	 *
3309 	 * If the server root isn't exported directly, then
3310 	 * it should at least be a pseudo export based on
3311 	 * one or more exports further down in the server's
3312 	 * file tree.
3313 	 */
3314 	exi = checkexport4(&rootdir->v_vfsp->vfs_fsid, &fid, NULL);
3315 	if (exi == NULL || exi->exi_export.ex_flags & EX_PUBLIC) {
3316 		NFS4_DEBUG(rfs4_debug,
3317 			(CE_WARN, "rfs4_op_putrootfh: export check failure"));
3318 		*cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
3319 		return;
3320 	}
3321 
3322 	/*
3323 	 * Now make a filehandle based on the root
3324 	 * export and root vnode.
3325 	 */
3326 	error = makefh4(&cs->fh, rootdir, exi);
3327 	if (error != 0) {
3328 		*cs->statusp = resp->status = puterrno4(error);
3329 		return;
3330 	}
3331 
3332 	sav_exi = cs->exi;
3333 	cs->exi = exi;
3334 
3335 	VN_HOLD(rootdir);
3336 	cs->vp = rootdir;
3337 
3338 	if ((resp->status = call_checkauth4(cs, req)) != NFS4_OK) {
3339 		VN_RELE(rootdir);
3340 		cs->vp = NULL;
3341 		cs->exi = sav_exi;
3342 		return;
3343 	}
3344 
3345 	*cs->statusp = resp->status = NFS4_OK;
3346 	cs->deleg = FALSE;
3347 }
3348 
3349 /*
3350  * A directory entry is a valid nfsv4 entry if
3351  * - it has a non-zero ino
3352  * - it is not a dot or dotdot name
3353  * - it is visible in a pseudo export or in a real export that can
3354  *   only have a limited view.
3355  */
3356 static bool_t
3357 valid_nfs4_entry(struct exportinfo *exi, struct dirent64 *dp,
3358 		int *expseudo, int check_visible)
3359 {
3360 	if (dp->d_ino == 0 || NFS_IS_DOTNAME(dp->d_name)) {
3361 		*expseudo = 0;
3362 		return (FALSE);
3363 	}
3364 
3365 	if (! check_visible) {
3366 		*expseudo = 0;
3367 		return (TRUE);
3368 	}
3369 
3370 	return (nfs_visible_inode(exi, dp->d_ino, expseudo));
3371 }
3372 
3373 /*
3374  * set_rdattr_params sets up the variables used to manage what information
3375  * to get for each directory entry.
3376  */
3377 static nfsstat4
3378 set_rdattr_params(struct nfs4_svgetit_arg *sargp,
3379 		bitmap4 attrs, bool_t *need_to_lookup)
3380 {
3381 	uint_t	va_mask;
3382 	nfsstat4 status;
3383 	bitmap4 objbits;
3384 
3385 	status = bitmap4_to_attrmask(attrs, sargp);
3386 	if (status != NFS4_OK) {
3387 		/*
3388 		 * could not even figure attr mask
3389 		 */
3390 		return (status);
3391 	}
3392 	va_mask = sargp->vap->va_mask;
3393 
3394 	/*
3395 	 * dirent's d_ino is always correct value for mounted_on_fileid.
3396 	 * mntdfid_set is set once here, but mounted_on_fileid is
3397 	 * set in main dirent processing loop for each dirent.
3398 	 * The mntdfid_set is a simple optimization that lets the
3399 	 * server attr code avoid work when caller is readdir.
3400 	 */
3401 	sargp->mntdfid_set = TRUE;
3402 
3403 	/*
3404 	 * Lookup entry only if client asked for any of the following:
3405 	 * a) vattr attrs
3406 	 * b) vfs attrs
3407 	 * c) attrs w/per-object scope requested (change, filehandle, etc)
3408 	 *    other than mounted_on_fileid (which we can take from dirent)
3409 	 */
3410 	objbits = attrs ? attrs & NFS4_VP_ATTR_MASK : 0;
3411 
3412 	if (va_mask || sargp->sbp || (objbits & ~FATTR4_MOUNTED_ON_FILEID_MASK))
3413 		*need_to_lookup = TRUE;
3414 	else
3415 		*need_to_lookup = FALSE;
3416 
3417 	if (sargp->sbp == NULL)
3418 		return (NFS4_OK);
3419 
3420 	/*
3421 	 * If filesystem attrs are requested, get them now from the
3422 	 * directory vp, as most entries will have same filesystem. The only
3423 	 * exception are mounted over entries but we handle
3424 	 * those as we go (XXX mounted over detection not yet implemented).
3425 	 */
3426 	sargp->vap->va_mask = 0;	/* to avoid VOP_GETATTR */
3427 	status = bitmap4_get_sysattrs(sargp);
3428 	sargp->vap->va_mask = va_mask;
3429 
3430 	if ((status != NFS4_OK) && sargp->rdattr_error_req) {
3431 		/*
3432 		 * Failed to get filesystem attributes.
3433 		 * Return a rdattr_error for each entry, but don't fail.
3434 		 * However, don't get any obj-dependent attrs.
3435 		 */
3436 		sargp->rdattr_error = status;	/* for rdattr_error */
3437 		*need_to_lookup = FALSE;
3438 		/*
3439 		 * At least get fileid for regular readdir output
3440 		 */
3441 		sargp->vap->va_mask &= AT_NODEID;
3442 		status = NFS4_OK;
3443 	}
3444 
3445 	return (status);
3446 }
3447 
3448 /*
3449  * readlink: args: CURRENT_FH.
3450  *	res: status. If success - CURRENT_FH unchanged, return linktext.
3451  */
3452 
3453 /* ARGSUSED */
3454 static void
3455 rfs4_op_readlink(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3456 	struct compound_state *cs)
3457 {
3458 	READLINK4res *resp = &resop->nfs_resop4_u.opreadlink;
3459 	int error;
3460 	vnode_t *vp;
3461 	struct iovec iov;
3462 	struct vattr va;
3463 	struct uio uio;
3464 	char *data;
3465 
3466 	/* CURRENT_FH: directory */
3467 	vp = cs->vp;
3468 	if (vp == NULL) {
3469 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
3470 		return;
3471 	}
3472 
3473 	if (cs->access == CS_ACCESS_DENIED) {
3474 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
3475 		return;
3476 	}
3477 
3478 	if (vp->v_type == VDIR) {
3479 		*cs->statusp = resp->status = NFS4ERR_ISDIR;
3480 		return;
3481 	}
3482 
3483 	if (vp->v_type != VLNK) {
3484 		*cs->statusp = resp->status = NFS4ERR_INVAL;
3485 		return;
3486 	}
3487 
3488 	va.va_mask = AT_MODE;
3489 	error = VOP_GETATTR(vp, &va, 0, cs->cr);
3490 	if (error) {
3491 		*cs->statusp = resp->status = puterrno4(error);
3492 		return;
3493 	}
3494 
3495 	if (MANDLOCK(vp, va.va_mode)) {
3496 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
3497 		return;
3498 	}
3499 
3500 	data = kmem_alloc(MAXPATHLEN + 1, KM_SLEEP);
3501 
3502 	iov.iov_base = data;
3503 	iov.iov_len = MAXPATHLEN;
3504 	uio.uio_iov = &iov;
3505 	uio.uio_iovcnt = 1;
3506 	uio.uio_segflg = UIO_SYSSPACE;
3507 	uio.uio_extflg = UIO_COPY_CACHED;
3508 	uio.uio_loffset = 0;
3509 	uio.uio_resid = MAXPATHLEN;
3510 
3511 	error = VOP_READLINK(vp, &uio, cs->cr);
3512 
3513 	if (error) {
3514 		kmem_free((caddr_t)data, (uint_t)MAXPATHLEN + 1);
3515 		*cs->statusp = resp->status = puterrno4(error);
3516 		return;
3517 	}
3518 
3519 	*(data + MAXPATHLEN - uio.uio_resid) = '\0';
3520 
3521 	/*
3522 	 * treat link name as data
3523 	 */
3524 	(void) str_to_utf8(data, &resp->link);
3525 
3526 	kmem_free((caddr_t)data, (uint_t)MAXPATHLEN + 1);
3527 	*cs->statusp = resp->status = NFS4_OK;
3528 }
3529 
3530 static void
3531 rfs4_op_readlink_free(nfs_resop4 *resop)
3532 {
3533 	READLINK4res *resp = &resop->nfs_resop4_u.opreadlink;
3534 	utf8string *symlink = &resp->link;
3535 
3536 	if (symlink->utf8string_val) {
3537 		UTF8STRING_FREE(*symlink)
3538 	}
3539 }
3540 
3541 /*
3542  * release_lockowner:
3543  *	Release any state associated with the supplied
3544  *	lockowner. Note if any lo_state is holding locks we will not
3545  *	rele that lo_state and thus the lockowner will not be destroyed.
3546  *	A client using lock after the lock owner stateid has been released
3547  *	will suffer the consequence of NFS4ERR_BAD_STATEID and would have
3548  *	to reissue the lock with new_lock_owner set to TRUE.
3549  *	args: lock_owner
3550  *	res:  status
3551  */
3552 /* ARGSUSED */
3553 static void
3554 rfs4_op_release_lockowner(nfs_argop4 *argop, nfs_resop4 *resop,
3555 	struct svc_req *req, struct compound_state *cs)
3556 {
3557 	RELEASE_LOCKOWNER4args *ap = &argop->nfs_argop4_u.oprelease_lockowner;
3558 	RELEASE_LOCKOWNER4res *resp = &resop->nfs_resop4_u.oprelease_lockowner;
3559 	rfs4_lockowner_t *lo;
3560 	rfs4_openowner_t *oop;
3561 	rfs4_state_t *sp;
3562 	rfs4_lo_state_t *lsp;
3563 	rfs4_client_t *cp;
3564 	bool_t create = FALSE;
3565 	locklist_t *llist;
3566 	sysid_t sysid;
3567 
3568 	/* Make sure there is a clientid around for this request */
3569 	cp = rfs4_findclient_by_id(ap->lock_owner.clientid, FALSE);
3570 
3571 	if (cp == NULL) {
3572 		*cs->statusp = resp->status =
3573 			rfs4_check_clientid(&ap->lock_owner.clientid, 0);
3574 		return;
3575 	}
3576 	rfs4_client_rele(cp);
3577 
3578 	lo = rfs4_findlockowner(&ap->lock_owner, &create);
3579 	if (lo == NULL) {
3580 		*cs->statusp = resp->status = NFS4_OK;
3581 		return;
3582 	}
3583 	ASSERT(lo->client != NULL);
3584 
3585 	/*
3586 	 * Check for EXPIRED client. If so will reap state with in a lease
3587 	 * period or on next set_clientid_confirm step
3588 	 */
3589 	if (rfs4_lease_expired(lo->client)) {
3590 		rfs4_lockowner_rele(lo);
3591 		*cs->statusp = resp->status = NFS4ERR_EXPIRED;
3592 		return;
3593 	}
3594 
3595 	/*
3596 	 * If no sysid has been assigned, then no locks exist; just return.
3597 	 */
3598 	rfs4_dbe_lock(lo->client->dbe);
3599 	if (lo->client->sysidt == LM_NOSYSID) {
3600 		rfs4_lockowner_rele(lo);
3601 		rfs4_dbe_unlock(lo->client->dbe);
3602 		return;
3603 	}
3604 
3605 	sysid = lo->client->sysidt;
3606 	rfs4_dbe_unlock(lo->client->dbe);
3607 
3608 	/*
3609 	 * Mark the lockowner invalid.
3610 	 */
3611 	rfs4_dbe_hide(lo->dbe);
3612 
3613 	/*
3614 	 * sysid-pid pair should now not be used since the lockowner is
3615 	 * invalid. If the client were to instantiate the lockowner again
3616 	 * it would be assigned a new pid. Thus we can get the list of
3617 	 * current locks.
3618 	 */
3619 
3620 	llist = flk_get_active_locks(sysid, lo->pid);
3621 	/* If we are still holding locks fail */
3622 	if (llist != NULL) {
3623 
3624 		*cs->statusp = resp->status = NFS4ERR_LOCKS_HELD;
3625 
3626 		flk_free_locklist(llist);
3627 		/*
3628 		 * We need to unhide the lockowner so the client can
3629 		 * try it again. The bad thing here is if the client
3630 		 * has a logic error that took it here in the first place
3631 		 * he probably has lost accounting of the locks that it
3632 		 * is holding. So we may have dangling state until the
3633 		 * open owner state is reaped via close. One scenario
3634 		 * that could possibly occur is that the client has
3635 		 * sent the unlock request(s) in separate threads
3636 		 * and has not waited for the replies before sending the
3637 		 * RELEASE_LOCKOWNER request. Presumably, it would expect
3638 		 * and deal appropriately with NFS4ERR_LOCKS_HELD, by
3639 		 * reissuing the request.
3640 		 */
3641 		rfs4_dbe_unhide(lo->dbe);
3642 		rfs4_lockowner_rele(lo);
3643 		return;
3644 	}
3645 
3646 	/*
3647 	 * For the corresponding client we need to check each open
3648 	 * owner for any opens that have lockowner state associated
3649 	 * with this lockowner.
3650 	 */
3651 
3652 	rfs4_dbe_lock(lo->client->dbe);
3653 	for (oop = lo->client->openownerlist.next->oop; oop != NULL;
3654 	    oop = oop->openownerlist.next->oop) {
3655 
3656 		rfs4_dbe_lock(oop->dbe);
3657 		for (sp = oop->ownerstateids.next->sp; sp != NULL;
3658 		    sp = sp->ownerstateids.next->sp) {
3659 
3660 			rfs4_dbe_lock(sp->dbe);
3661 			for (lsp = sp->lockownerlist.next->lsp;
3662 			    lsp != NULL; lsp = lsp->lockownerlist.next->lsp) {
3663 				if (lsp->locker == lo) {
3664 					rfs4_dbe_lock(lsp->dbe);
3665 					rfs4_dbe_invalidate(lsp->dbe);
3666 					rfs4_dbe_unlock(lsp->dbe);
3667 				}
3668 			}
3669 			rfs4_dbe_unlock(sp->dbe);
3670 		}
3671 		rfs4_dbe_unlock(oop->dbe);
3672 	}
3673 	rfs4_dbe_unlock(lo->client->dbe);
3674 
3675 	rfs4_lockowner_rele(lo);
3676 
3677 	*cs->statusp = resp->status = NFS4_OK;
3678 }
3679 
3680 /*
3681  * short utility function to lookup a file and recall the delegation
3682  */
3683 static rfs4_file_t *
3684 rfs4_lookup_and_findfile(vnode_t *dvp, char *nm, vnode_t **vpp,
3685 	int *lkup_error, cred_t *cr)
3686 {
3687 	vnode_t *vp;
3688 	rfs4_file_t *fp = NULL;
3689 	bool_t fcreate = FALSE;
3690 	int error;
3691 
3692 	if (vpp)
3693 		*vpp = NULL;
3694 
3695 	if ((error = VOP_LOOKUP(dvp, nm, &vp, NULL, 0, NULL, cr)) == 0) {
3696 		if (vp->v_type == VREG)
3697 			fp = rfs4_findfile(vp, NULL, &fcreate);
3698 		if (vpp)
3699 			*vpp = vp;
3700 		else
3701 			VN_RELE(vp);
3702 	}
3703 
3704 	if (lkup_error)
3705 		*lkup_error = error;
3706 
3707 	return (fp);
3708 }
3709 
3710 /*
3711  * remove: args: CURRENT_FH: directory; name.
3712  *	res: status. If success - CURRENT_FH unchanged, return change_info
3713  *		for directory.
3714  */
3715 /* ARGSUSED */
3716 static void
3717 rfs4_op_remove(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3718 	struct compound_state *cs)
3719 {
3720 	REMOVE4args *args = &argop->nfs_argop4_u.opremove;
3721 	REMOVE4res *resp = &resop->nfs_resop4_u.opremove;
3722 	int error;
3723 	vnode_t *dvp, *vp;
3724 	struct vattr bdva, idva, adva;
3725 	char *nm;
3726 	uint_t len;
3727 	rfs4_file_t *fp;
3728 	int in_crit = 0;
3729 	bslabel_t *clabel;
3730 
3731 	/* CURRENT_FH: directory */
3732 	dvp = cs->vp;
3733 	if (dvp == NULL) {
3734 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
3735 		return;
3736 	}
3737 
3738 	if (cs->access == CS_ACCESS_DENIED) {
3739 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
3740 		return;
3741 	}
3742 
3743 	/*
3744 	 * If there is an unshared filesystem mounted on this vnode,
3745 	 * Do not allow to remove anything in this directory.
3746 	 */
3747 	if (vn_ismntpt(dvp)) {
3748 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
3749 		return;
3750 	}
3751 
3752 	if (dvp->v_type != VDIR) {
3753 		*cs->statusp = resp->status = NFS4ERR_NOTDIR;
3754 		return;
3755 	}
3756 
3757 	if (!utf8_dir_verify(&args->target)) {
3758 		*cs->statusp = resp->status = NFS4ERR_INVAL;
3759 		return;
3760 	}
3761 
3762 	/*
3763 	 * Lookup the file so that we can check if it's a directory
3764 	 */
3765 	nm = utf8_to_fn(&args->target, &len, NULL);
3766 	if (nm == NULL) {
3767 		*cs->statusp = resp->status = NFS4ERR_INVAL;
3768 		return;
3769 	}
3770 
3771 	if (len > MAXNAMELEN) {
3772 		*cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
3773 		kmem_free(nm, len);
3774 		return;
3775 	}
3776 
3777 	if (rdonly4(cs->exi, cs->vp, req)) {
3778 		*cs->statusp = resp->status = NFS4ERR_ROFS;
3779 		kmem_free(nm, len);
3780 		return;
3781 	}
3782 
3783 	/*
3784 	 * Lookup the file to determine type and while we are see if
3785 	 * there is a file struct around and check for delegation.
3786 	 * We don't need to acquire va_seq before this lookup, if
3787 	 * it causes an update, cinfo.before will not match, which will
3788 	 * trigger a cache flush even if atomic is TRUE.
3789 	 */
3790 	if (fp = rfs4_lookup_and_findfile(dvp, nm, &vp, &error, cs->cr)) {
3791 		if (rfs4_check_delegated_byfp(FWRITE, fp, TRUE, TRUE, TRUE,
3792 						NULL)) {
3793 			VN_RELE(vp);
3794 			rfs4_file_rele(fp);
3795 			*cs->statusp = resp->status = NFS4ERR_DELAY;
3796 			kmem_free(nm, len);
3797 			return;
3798 		}
3799 	}
3800 
3801 	/* Didn't find anything to remove */
3802 	if (vp == NULL) {
3803 		*cs->statusp = resp->status = error;
3804 		kmem_free(nm, len);
3805 		return;
3806 	}
3807 
3808 	if (nbl_need_check(vp)) {
3809 		nbl_start_crit(vp, RW_READER);
3810 		in_crit = 1;
3811 		if (nbl_conflict(vp, NBL_REMOVE, 0, 0, 0)) {
3812 			*cs->statusp = resp->status = NFS4ERR_FILE_OPEN;
3813 			kmem_free(nm, len);
3814 			nbl_end_crit(vp);
3815 			VN_RELE(vp);
3816 			if (fp) {
3817 				rfs4_clear_dont_grant(fp);
3818 				rfs4_file_rele(fp);
3819 			}
3820 			return;
3821 		}
3822 	}
3823 
3824 	/* check label before allowing removal */
3825 	if (is_system_labeled()) {
3826 		ASSERT(req->rq_label != NULL);
3827 		clabel = req->rq_label;
3828 		DTRACE_PROBE2(tx__rfs4__log__info__opremove__clabel, char *,
3829 		    "got client label from request(1)",
3830 		    struct svc_req *, req);
3831 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
3832 			if (!do_rfs4_label_check(clabel, vp, EQUALITY_CHECK)) {
3833 				*cs->statusp = resp->status = NFS4ERR_ACCESS;
3834 				kmem_free(nm, len);
3835 				if (in_crit)
3836 					nbl_end_crit(vp);
3837 				VN_RELE(vp);
3838 				if (fp) {
3839 					rfs4_clear_dont_grant(fp);
3840 					rfs4_file_rele(fp);
3841 				}
3842 				return;
3843 			}
3844 		}
3845 	}
3846 
3847 	/* Get dir "before" change value */
3848 	bdva.va_mask = AT_CTIME|AT_SEQ;
3849 	error = VOP_GETATTR(dvp, &bdva, 0, cs->cr);
3850 	if (error) {
3851 		*cs->statusp = resp->status = puterrno4(error);
3852 		kmem_free(nm, len);
3853 		return;
3854 	}
3855 	NFS4_SET_FATTR4_CHANGE(resp->cinfo.before, bdva.va_ctime)
3856 
3857 	/* Actually do the REMOVE operation */
3858 	if (vp->v_type == VDIR) {
3859 		/*
3860 		 * Can't remove a directory that has a mounted-on filesystem.
3861 		 */
3862 		if (vn_ismntpt(vp)) {
3863 			error = EACCES;
3864 		} else {
3865 			/*
3866 			 * System V defines rmdir to return EEXIST,
3867 			 * not * ENOTEMPTY, if the directory is not
3868 			 * empty.  A System V NFS server needs to map
3869 			 * NFS4ERR_EXIST to NFS4ERR_NOTEMPTY to
3870 			 * transmit over the wire.
3871 			 */
3872 			if ((error = VOP_RMDIR(dvp, nm, rootdir, cs->cr))
3873 				== EEXIST)
3874 				error = ENOTEMPTY;
3875 		}
3876 	} else {
3877 		if ((error = VOP_REMOVE(dvp, nm, cs->cr)) == 0 &&
3878 			fp != NULL) {
3879 			struct vattr va;
3880 			vnode_t *tvp;
3881 
3882 			rfs4_dbe_lock(fp->dbe);
3883 			tvp = fp->vp;
3884 			if (tvp)
3885 				VN_HOLD(tvp);
3886 			rfs4_dbe_unlock(fp->dbe);
3887 
3888 			if (tvp) {
3889 				/*
3890 				 * This is va_seq safe because we are not
3891 				 * manipulating dvp.
3892 				 */
3893 				va.va_mask = AT_NLINK;
3894 				if (!VOP_GETATTR(tvp, &va, 0, cs->cr) &&
3895 					va.va_nlink == 0) {
3896 					/* Remove state on file remove */
3897 					if (in_crit) {
3898 						nbl_end_crit(vp);
3899 						in_crit = 0;
3900 					}
3901 					rfs4_close_all_state(fp);
3902 				}
3903 				VN_RELE(tvp);
3904 			}
3905 		}
3906 	}
3907 
3908 	if (in_crit)
3909 		nbl_end_crit(vp);
3910 	VN_RELE(vp);
3911 
3912 	if (fp) {
3913 		rfs4_clear_dont_grant(fp);
3914 		rfs4_file_rele(fp);
3915 	}
3916 	kmem_free(nm, len);
3917 
3918 	if (error) {
3919 		*cs->statusp = resp->status = puterrno4(error);
3920 		return;
3921 	}
3922 
3923 	/*
3924 	 * Get the initial "after" sequence number, if it fails, set to zero
3925 	 */
3926 	idva.va_mask = AT_SEQ;
3927 	if (VOP_GETATTR(dvp, &idva, 0, cs->cr))
3928 		idva.va_seq = 0;
3929 
3930 	/*
3931 	 * Force modified data and metadata out to stable storage.
3932 	 */
3933 	(void) VOP_FSYNC(dvp, 0, cs->cr);
3934 
3935 	/*
3936 	 * Get "after" change value, if it fails, simply return the
3937 	 * before value.
3938 	 */
3939 	adva.va_mask = AT_CTIME|AT_SEQ;
3940 	if (VOP_GETATTR(dvp, &adva, 0, cs->cr)) {
3941 		adva.va_ctime = bdva.va_ctime;
3942 		adva.va_seq = 0;
3943 	}
3944 
3945 	NFS4_SET_FATTR4_CHANGE(resp->cinfo.after, adva.va_ctime)
3946 
3947 	/*
3948 	 * The cinfo.atomic = TRUE only if we have
3949 	 * non-zero va_seq's, and it has incremented by exactly one
3950 	 * during the VOP_REMOVE/RMDIR and it didn't change during
3951 	 * the VOP_FSYNC.
3952 	 */
3953 	if (bdva.va_seq && idva.va_seq && adva.va_seq &&
3954 			idva.va_seq == (bdva.va_seq + 1) &&
3955 			idva.va_seq == adva.va_seq)
3956 		resp->cinfo.atomic = TRUE;
3957 	else
3958 		resp->cinfo.atomic = FALSE;
3959 
3960 	*cs->statusp = resp->status = NFS4_OK;
3961 }
3962 
3963 /*
3964  * rename: args: SAVED_FH: from directory, CURRENT_FH: target directory,
3965  *		oldname and newname.
3966  *	res: status. If success - CURRENT_FH unchanged, return change_info
3967  *		for both from and target directories.
3968  */
3969 /* ARGSUSED */
3970 static void
3971 rfs4_op_rename(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3972 	struct compound_state *cs)
3973 {
3974 	RENAME4args *args = &argop->nfs_argop4_u.oprename;
3975 	RENAME4res *resp = &resop->nfs_resop4_u.oprename;
3976 	int error;
3977 	vnode_t *odvp;
3978 	vnode_t *ndvp;
3979 	vnode_t *srcvp, *targvp;
3980 	struct vattr obdva, oidva, oadva;
3981 	struct vattr nbdva, nidva, nadva;
3982 	char *onm, *nnm;
3983 	uint_t olen, nlen;
3984 	rfs4_file_t *fp, *sfp;
3985 	int in_crit_src, in_crit_targ;
3986 	int fp_rele_grant_hold, sfp_rele_grant_hold;
3987 	bslabel_t *clabel;
3988 
3989 	fp = sfp = NULL;
3990 	srcvp = targvp = NULL;
3991 	in_crit_src = in_crit_targ = 0;
3992 	fp_rele_grant_hold = sfp_rele_grant_hold = 0;
3993 
3994 	/* CURRENT_FH: target directory */
3995 	ndvp = cs->vp;
3996 	if (ndvp == NULL) {
3997 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
3998 		return;
3999 	}
4000 
4001 	/* SAVED_FH: from directory */
4002 	odvp = cs->saved_vp;
4003 	if (odvp == NULL) {
4004 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
4005 		return;
4006 	}
4007 
4008 	if (cs->access == CS_ACCESS_DENIED) {
4009 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
4010 		return;
4011 	}
4012 
4013 	/*
4014 	 * If there is an unshared filesystem mounted on this vnode,
4015 	 * do not allow to rename objects in this directory.
4016 	 */
4017 	if (vn_ismntpt(odvp)) {
4018 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
4019 		return;
4020 	}
4021 
4022 	/*
4023 	 * If there is an unshared filesystem mounted on this vnode,
4024 	 * do not allow to rename to this directory.
4025 	 */
4026 	if (vn_ismntpt(ndvp)) {
4027 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
4028 		return;
4029 	}
4030 
4031 	if (odvp->v_type != VDIR || ndvp->v_type != VDIR) {
4032 		*cs->statusp = resp->status = NFS4ERR_NOTDIR;
4033 		return;
4034 	}
4035 
4036 	if (cs->saved_exi != cs->exi) {
4037 		*cs->statusp = resp->status = NFS4ERR_XDEV;
4038 		return;
4039 	}
4040 
4041 	if (!utf8_dir_verify(&args->oldname)) {
4042 		*cs->statusp = resp->status = NFS4ERR_INVAL;
4043 		return;
4044 	}
4045 
4046 	if (!utf8_dir_verify(&args->newname)) {
4047 		*cs->statusp = resp->status = NFS4ERR_INVAL;
4048 		return;
4049 	}
4050 
4051 	onm = utf8_to_fn(&args->oldname, &olen, NULL);
4052 	if (onm == NULL) {
4053 		*cs->statusp = resp->status = NFS4ERR_INVAL;
4054 		return;
4055 	}
4056 
4057 	nnm = utf8_to_fn(&args->newname, &nlen, NULL);
4058 	if (nnm == NULL) {
4059 		*cs->statusp = resp->status = NFS4ERR_INVAL;
4060 		kmem_free(onm, olen);
4061 		return;
4062 	}
4063 
4064 	if (olen > MAXNAMELEN || nlen > MAXNAMELEN) {
4065 		*cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
4066 		kmem_free(onm, olen);
4067 		kmem_free(nnm, nlen);
4068 		return;
4069 	}
4070 
4071 
4072 	if (rdonly4(cs->exi, cs->vp, req)) {
4073 		*cs->statusp = resp->status = NFS4ERR_ROFS;
4074 		kmem_free(onm, olen);
4075 		kmem_free(nnm, nlen);
4076 		return;
4077 	}
4078 
4079 	/* check label of the target dir */
4080 	if (is_system_labeled()) {
4081 		ASSERT(req->rq_label != NULL);
4082 		clabel = req->rq_label;
4083 		DTRACE_PROBE2(tx__rfs4__log__info__oprename__clabel, char *,
4084 		    "got client label from request(1)",
4085 		    struct svc_req *, req);
4086 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
4087 			if (!do_rfs4_label_check(clabel, ndvp,
4088 			    EQUALITY_CHECK)) {
4089 				*cs->statusp = resp->status = NFS4ERR_ACCESS;
4090 				return;
4091 			}
4092 		}
4093 	}
4094 
4095 	/*
4096 	 * Is the source a file and have a delegation?
4097 	 * We don't need to acquire va_seq before these lookups, if
4098 	 * it causes an update, cinfo.before will not match, which will
4099 	 * trigger a cache flush even if atomic is TRUE.
4100 	 */
4101 	if (sfp = rfs4_lookup_and_findfile(odvp, onm, &srcvp, &error, cs->cr)) {
4102 		if (rfs4_check_delegated_byfp(FWRITE, sfp, TRUE, TRUE, TRUE,
4103 						NULL)) {
4104 			*cs->statusp = resp->status = NFS4ERR_DELAY;
4105 			goto err_out;
4106 		}
4107 	}
4108 
4109 	if (srcvp == NULL) {
4110 		*cs->statusp = resp->status = puterrno4(error);
4111 		kmem_free(onm, olen);
4112 		kmem_free(nnm, nlen);
4113 		return;
4114 	}
4115 
4116 	sfp_rele_grant_hold = 1;
4117 
4118 	/* Does the destination exist and a file and have a delegation? */
4119 	if (fp = rfs4_lookup_and_findfile(ndvp, nnm, &targvp, NULL, cs->cr)) {
4120 		if (rfs4_check_delegated_byfp(FWRITE, fp, TRUE, TRUE, TRUE,
4121 						NULL)) {
4122 			*cs->statusp = resp->status = NFS4ERR_DELAY;
4123 			goto err_out;
4124 		}
4125 	}
4126 	fp_rele_grant_hold = 1;
4127 
4128 
4129 	/* Check for NBMAND lock on both source and target */
4130 	if (nbl_need_check(srcvp)) {
4131 		nbl_start_crit(srcvp, RW_READER);
4132 		in_crit_src = 1;
4133 		if (nbl_conflict(srcvp, NBL_RENAME, 0, 0, 0)) {
4134 			*cs->statusp = resp->status = NFS4ERR_FILE_OPEN;
4135 			goto err_out;
4136 		}
4137 	}
4138 
4139 	if (targvp && nbl_need_check(targvp)) {
4140 		nbl_start_crit(targvp, RW_READER);
4141 		in_crit_targ = 1;
4142 		if (nbl_conflict(targvp, NBL_REMOVE, 0, 0, 0)) {
4143 			*cs->statusp = resp->status = NFS4ERR_FILE_OPEN;
4144 			goto err_out;
4145 		}
4146 	}
4147 
4148 	/* Get source "before" change value */
4149 	obdva.va_mask = AT_CTIME|AT_SEQ;
4150 	error = VOP_GETATTR(odvp, &obdva, 0, cs->cr);
4151 	if (!error) {
4152 		nbdva.va_mask = AT_CTIME|AT_SEQ;
4153 		error = VOP_GETATTR(ndvp, &nbdva, 0, cs->cr);
4154 	}
4155 	if (error) {
4156 		*cs->statusp = resp->status = puterrno4(error);
4157 		goto err_out;
4158 	}
4159 
4160 	NFS4_SET_FATTR4_CHANGE(resp->source_cinfo.before, obdva.va_ctime)
4161 	NFS4_SET_FATTR4_CHANGE(resp->target_cinfo.before, nbdva.va_ctime)
4162 
4163 	if ((error = VOP_RENAME(odvp, onm, ndvp, nnm, cs->cr)) == 0 &&
4164 		fp != NULL) {
4165 		struct vattr va;
4166 		vnode_t *tvp;
4167 
4168 		rfs4_dbe_lock(fp->dbe);
4169 		tvp = fp->vp;
4170 		if (tvp)
4171 			VN_HOLD(tvp);
4172 		rfs4_dbe_unlock(fp->dbe);
4173 
4174 		if (tvp) {
4175 			va.va_mask = AT_NLINK;
4176 			if (!VOP_GETATTR(tvp, &va, 0, cs->cr) &&
4177 				va.va_nlink == 0) {
4178 				/* The file is gone and so should the state */
4179 				if (in_crit_targ) {
4180 					nbl_end_crit(targvp);
4181 					in_crit_targ = 0;
4182 				}
4183 				rfs4_close_all_state(fp);
4184 			}
4185 			VN_RELE(tvp);
4186 		}
4187 	}
4188 	if (error == 0) {
4189 		char *tmp;
4190 
4191 		/* fix the path name for the renamed file */
4192 		mutex_enter(&srcvp->v_lock);
4193 		tmp = srcvp->v_path;
4194 		srcvp->v_path = NULL;
4195 		mutex_exit(&srcvp->v_lock);
4196 		vn_setpath(rootdir, ndvp, srcvp, nnm, nlen - 1);
4197 		if (tmp != NULL)
4198 			kmem_free(tmp, strlen(tmp) + 1);
4199 	}
4200 
4201 	if (in_crit_src)
4202 		nbl_end_crit(srcvp);
4203 	if (srcvp)
4204 		VN_RELE(srcvp);
4205 	if (in_crit_targ)
4206 		nbl_end_crit(targvp);
4207 	if (targvp)
4208 		VN_RELE(targvp);
4209 
4210 	if (sfp) {
4211 		rfs4_clear_dont_grant(sfp);
4212 		rfs4_file_rele(sfp);
4213 	}
4214 	if (fp) {
4215 		rfs4_clear_dont_grant(fp);
4216 		rfs4_file_rele(fp);
4217 	}
4218 
4219 	kmem_free(onm, olen);
4220 	kmem_free(nnm, nlen);
4221 
4222 	/*
4223 	 * Get the initial "after" sequence number, if it fails, set to zero
4224 	 */
4225 	oidva.va_mask = AT_SEQ;
4226 	if (VOP_GETATTR(odvp, &oidva, 0, cs->cr))
4227 		oidva.va_seq = 0;
4228 
4229 	nidva.va_mask = AT_SEQ;
4230 	if (VOP_GETATTR(ndvp, &nidva, 0, cs->cr))
4231 		nidva.va_seq = 0;
4232 
4233 	/*
4234 	 * Force modified data and metadata out to stable storage.
4235 	 */
4236 	(void) VOP_FSYNC(odvp, 0, cs->cr);
4237 	(void) VOP_FSYNC(ndvp, 0, cs->cr);
4238 
4239 	if (error) {
4240 		*cs->statusp = resp->status = puterrno4(error);
4241 		return;
4242 	}
4243 
4244 	/*
4245 	 * Get "after" change values, if it fails, simply return the
4246 	 * before value.
4247 	 */
4248 	oadva.va_mask = AT_CTIME|AT_SEQ;
4249 	if (VOP_GETATTR(odvp, &oadva, 0, cs->cr)) {
4250 		oadva.va_ctime = obdva.va_ctime;
4251 		oadva.va_seq = 0;
4252 	}
4253 
4254 	nadva.va_mask = AT_CTIME|AT_SEQ;
4255 	if (VOP_GETATTR(odvp, &nadva, 0, cs->cr)) {
4256 		nadva.va_ctime = nbdva.va_ctime;
4257 		nadva.va_seq = 0;
4258 	}
4259 
4260 	NFS4_SET_FATTR4_CHANGE(resp->source_cinfo.after, oadva.va_ctime)
4261 	NFS4_SET_FATTR4_CHANGE(resp->target_cinfo.after, nadva.va_ctime)
4262 
4263 	/*
4264 	 * The cinfo.atomic = TRUE only if we have
4265 	 * non-zero va_seq's, and it has incremented by exactly one
4266 	 * during the VOP_RENAME and it didn't change during the VOP_FSYNC.
4267 	 */
4268 	if (obdva.va_seq && oidva.va_seq && oadva.va_seq &&
4269 			oidva.va_seq == (obdva.va_seq + 1) &&
4270 			oidva.va_seq == oadva.va_seq)
4271 		resp->source_cinfo.atomic = TRUE;
4272 	else
4273 		resp->source_cinfo.atomic = FALSE;
4274 
4275 	if (nbdva.va_seq && nidva.va_seq && nadva.va_seq &&
4276 			nidva.va_seq == (nbdva.va_seq + 1) &&
4277 			nidva.va_seq == nadva.va_seq)
4278 		resp->target_cinfo.atomic = TRUE;
4279 	else
4280 		resp->target_cinfo.atomic = FALSE;
4281 
4282 #ifdef	VOLATILE_FH_TEST
4283 	{
4284 	extern void add_volrnm_fh(struct exportinfo *, vnode_t *);
4285 
4286 	/*
4287 	 * Add the renamed file handle to the volatile rename list
4288 	 */
4289 	if (cs->exi->exi_export.ex_flags & EX_VOLRNM) {
4290 		/* file handles may expire on rename */
4291 		vnode_t *vp;
4292 
4293 		nnm = utf8_to_fn(&args->newname, &nlen, NULL);
4294 		/*
4295 		 * Already know that nnm will be a valid string
4296 		 */
4297 		error = VOP_LOOKUP(ndvp, nnm, &vp, NULL, 0, NULL, cs->cr);
4298 		kmem_free(nnm, nlen);
4299 		if (!error) {
4300 			add_volrnm_fh(cs->exi, vp);
4301 			VN_RELE(vp);
4302 		}
4303 	}
4304 	}
4305 #endif	/* VOLATILE_FH_TEST */
4306 
4307 	*cs->statusp = resp->status = NFS4_OK;
4308 	return;
4309 
4310 err_out:
4311 	kmem_free(onm, olen);
4312 	kmem_free(nnm, nlen);
4313 
4314 	if (in_crit_src) nbl_end_crit(srcvp);
4315 	if (in_crit_targ) nbl_end_crit(targvp);
4316 	if (targvp) VN_RELE(targvp);
4317 	if (srcvp) VN_RELE(srcvp);
4318 	if (sfp) {
4319 		if (sfp_rele_grant_hold) rfs4_clear_dont_grant(sfp);
4320 		rfs4_file_rele(sfp);
4321 	}
4322 	if (fp) {
4323 		if (fp_rele_grant_hold) rfs4_clear_dont_grant(fp);
4324 		rfs4_file_rele(fp);
4325 	}
4326 }
4327 
4328 /* ARGSUSED */
4329 static void
4330 rfs4_op_renew(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
4331 	struct compound_state *cs)
4332 {
4333 	RENEW4args *args = &argop->nfs_argop4_u.oprenew;
4334 	RENEW4res *resp = &resop->nfs_resop4_u.oprenew;
4335 	rfs4_client_t *cp;
4336 
4337 	if ((cp = rfs4_findclient_by_id(args->clientid, FALSE)) == NULL) {
4338 		*cs->statusp = resp->status =
4339 			rfs4_check_clientid(&args->clientid, 0);
4340 		return;
4341 	}
4342 
4343 	if (rfs4_lease_expired(cp)) {
4344 		rfs4_client_rele(cp);
4345 		*cs->statusp = resp->status = NFS4ERR_EXPIRED;
4346 		return;
4347 	}
4348 
4349 	rfs4_update_lease(cp);
4350 
4351 	mutex_enter(cp->cbinfo.cb_lock);
4352 	if (cp->cbinfo.cb_notified_of_cb_path_down == FALSE) {
4353 		cp->cbinfo.cb_notified_of_cb_path_down = TRUE;
4354 		*cs->statusp = resp->status = NFS4ERR_CB_PATH_DOWN;
4355 	} else {
4356 		*cs->statusp = resp->status = NFS4_OK;
4357 	}
4358 	mutex_exit(cp->cbinfo.cb_lock);
4359 
4360 	rfs4_client_rele(cp);
4361 
4362 }
4363 
4364 /* ARGSUSED */
4365 static void
4366 rfs4_op_restorefh(nfs_argop4 *args, nfs_resop4 *resop, struct svc_req *req,
4367 	struct compound_state *cs)
4368 {
4369 	RESTOREFH4res *resp = &resop->nfs_resop4_u.oprestorefh;
4370 
4371 	/* No need to check cs->access - we are not accessing any object */
4372 	if ((cs->saved_vp == NULL) || (cs->saved_fh.nfs_fh4_val == NULL)) {
4373 		*cs->statusp = resp->status = NFS4ERR_RESTOREFH;
4374 		return;
4375 	}
4376 	if (cs->vp != NULL) {
4377 		VN_RELE(cs->vp);
4378 	}
4379 	cs->vp = cs->saved_vp;
4380 	cs->saved_vp = NULL;
4381 	cs->exi = cs->saved_exi;
4382 	nfs_fh4_copy(&cs->saved_fh, &cs->fh);
4383 	*cs->statusp = resp->status = NFS4_OK;
4384 	cs->deleg = FALSE;
4385 }
4386 
4387 /* ARGSUSED */
4388 static void
4389 rfs4_op_savefh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
4390 	struct compound_state *cs)
4391 {
4392 	SAVEFH4res *resp = &resop->nfs_resop4_u.opsavefh;
4393 
4394 	/* No need to check cs->access - we are not accessing any object */
4395 	if (cs->vp == NULL) {
4396 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
4397 		return;
4398 	}
4399 	if (cs->saved_vp != NULL) {
4400 		VN_RELE(cs->saved_vp);
4401 	}
4402 	cs->saved_vp = cs->vp;
4403 	VN_HOLD(cs->saved_vp);
4404 	cs->saved_exi = cs->exi;
4405 	/*
4406 	 * since SAVEFH is fairly rare, don't alloc space for its fh
4407 	 * unless necessary.
4408 	 */
4409 	if (cs->saved_fh.nfs_fh4_val == NULL) {
4410 		cs->saved_fh.nfs_fh4_val = kmem_alloc(NFS4_FHSIZE, KM_SLEEP);
4411 	}
4412 	nfs_fh4_copy(&cs->fh, &cs->saved_fh);
4413 	*cs->statusp = resp->status = NFS4_OK;
4414 }
4415 
4416 /*
4417  * rfs4_verify_attr is called when nfsv4 Setattr failed, but we wish to
4418  * return the bitmap of attrs that were set successfully. It is also
4419  * called by Verify/Nverify to test the vattr/vfsstat attrs. It should
4420  * always be called only after rfs4_do_set_attrs().
4421  *
4422  * Verify that the attributes are same as the expected ones. sargp->vap
4423  * and sargp->sbp contain the input attributes as translated from fattr4.
4424  *
4425  * This function verifies only the attrs that correspond to a vattr or
4426  * vfsstat struct. That is because of the extra step needed to get the
4427  * corresponding system structs. Other attributes have already been set or
4428  * verified by do_rfs4_set_attrs.
4429  *
4430  * Return 0 if all attrs match, -1 if some don't, error if error processing.
4431  */
4432 static int
4433 rfs4_verify_attr(struct nfs4_svgetit_arg *sargp,
4434 	bitmap4 *resp, struct nfs4_ntov_table *ntovp)
4435 {
4436 	int error, ret_error = 0;
4437 	int i, k;
4438 	uint_t sva_mask = sargp->vap->va_mask;
4439 	uint_t vbit;
4440 	union nfs4_attr_u *na;
4441 	uint8_t *amap;
4442 	bool_t getsb = ntovp->vfsstat;
4443 
4444 	if (sva_mask != 0) {
4445 		/*
4446 		 * Okay to overwrite sargp->vap because we verify based
4447 		 * on the incoming values.
4448 		 */
4449 		ret_error = VOP_GETATTR(sargp->cs->vp, sargp->vap, 0,
4450 				sargp->cs->cr);
4451 		if (ret_error) {
4452 			if (resp == NULL)
4453 				return (ret_error);
4454 			/*
4455 			 * Must return bitmap of successful attrs
4456 			 */
4457 			sva_mask = 0;	/* to prevent checking vap later */
4458 		} else {
4459 			/*
4460 			 * Some file systems clobber va_mask. it is probably
4461 			 * wrong of them to do so, nonethless we practice
4462 			 * defensive coding.
4463 			 * See bug id 4276830.
4464 			 */
4465 			sargp->vap->va_mask = sva_mask;
4466 		}
4467 	}
4468 
4469 	if (getsb) {
4470 		/*
4471 		 * Now get the superblock and loop on the bitmap, as there is
4472 		 * no simple way of translating from superblock to bitmap4.
4473 		 */
4474 		ret_error = VFS_STATVFS(sargp->cs->vp->v_vfsp, sargp->sbp);
4475 		if (ret_error) {
4476 			if (resp == NULL)
4477 				goto errout;
4478 			getsb = FALSE;
4479 		}
4480 	}
4481 
4482 	/*
4483 	 * Now loop and verify each attribute which getattr returned
4484 	 * whether it's the same as the input.
4485 	 */
4486 	if (resp == NULL && !getsb && (sva_mask == 0))
4487 		goto errout;
4488 
4489 	na = ntovp->na;
4490 	amap = ntovp->amap;
4491 	k = 0;
4492 	for (i = 0; i < ntovp->attrcnt; i++, na++, amap++) {
4493 		k = *amap;
4494 		ASSERT(nfs4_ntov_map[k].nval == k);
4495 		vbit = nfs4_ntov_map[k].vbit;
4496 
4497 		/*
4498 		 * If vattr attribute but VOP_GETATTR failed, or it's
4499 		 * superblock attribute but VFS_STATVFS failed, skip
4500 		 */
4501 		if (vbit) {
4502 			if ((vbit & sva_mask) == 0)
4503 				continue;
4504 		} else if (!(getsb && nfs4_ntov_map[k].vfsstat)) {
4505 			continue;
4506 		}
4507 		error = (*nfs4_ntov_map[k].sv_getit)(
4508 				NFS4ATTR_VERIT, sargp, na);
4509 		if (resp != NULL) {
4510 			if (error)
4511 				ret_error = -1;	/* not all match */
4512 			else	/* update response bitmap */
4513 				*resp |= nfs4_ntov_map[k].fbit;
4514 			continue;
4515 		}
4516 		if (error) {
4517 			ret_error = -1;	/* not all match */
4518 			break;
4519 		}
4520 	}
4521 errout:
4522 	return (ret_error);
4523 }
4524 
4525 /*
4526  * Decode the attribute to be set/verified. If the attr requires a sys op
4527  * (VOP_GETATTR, VFS_VFSSTAT), and the request is to verify, then don't
4528  * call the sv_getit function for it, because the sys op hasn't yet been done.
4529  * Return 0 for success, error code if failed.
4530  *
4531  * Note: the decoded arg is not freed here but in nfs4_ntov_table_free.
4532  */
4533 static int
4534 decode_fattr4_attr(nfs4_attr_cmd_t cmd, struct nfs4_svgetit_arg *sargp,
4535 	int k, XDR *xdrp, bitmap4 *resp_bval, union nfs4_attr_u *nap)
4536 {
4537 	int error = 0;
4538 	bool_t set_later;
4539 
4540 	sargp->vap->va_mask |= nfs4_ntov_map[k].vbit;
4541 
4542 	if ((*nfs4_ntov_map[k].xfunc)(xdrp, nap)) {
4543 		set_later = nfs4_ntov_map[k].vbit || nfs4_ntov_map[k].vfsstat;
4544 		/*
4545 		 * don't verify yet if a vattr or sb dependent attr,
4546 		 * because we don't have their sys values yet.
4547 		 * Will be done later.
4548 		 */
4549 		if (! (set_later && (cmd == NFS4ATTR_VERIT))) {
4550 			/*
4551 			 * ACLs are a special case, since setting the MODE
4552 			 * conflicts with setting the ACL.  We delay setting
4553 			 * the ACL until all other attributes have been set.
4554 			 * The ACL gets set in do_rfs4_op_setattr().
4555 			 */
4556 			if (nfs4_ntov_map[k].fbit != FATTR4_ACL_MASK) {
4557 				error = (*nfs4_ntov_map[k].sv_getit)(cmd,
4558 				    sargp, nap);
4559 				if (error) {
4560 					xdr_free(nfs4_ntov_map[k].xfunc,
4561 					    (caddr_t)nap);
4562 				}
4563 			}
4564 		}
4565 	} else {
4566 #ifdef  DEBUG
4567 		cmn_err(CE_NOTE, "decode_fattr4_attr: error "
4568 			"decoding attribute %d\n", k);
4569 #endif
4570 		error = EINVAL;
4571 	}
4572 	if (!error && resp_bval && !set_later) {
4573 		*resp_bval |= nfs4_ntov_map[k].fbit;
4574 	}
4575 
4576 	return (error);
4577 }
4578 
4579 /*
4580  * Set vattr based on incoming fattr4 attrs - used by setattr.
4581  * Set response mask. Ignore any values that are not writable vattr attrs.
4582  */
4583 static nfsstat4
4584 do_rfs4_set_attrs(bitmap4 *resp, fattr4 *fattrp, struct compound_state *cs,
4585 		struct nfs4_svgetit_arg *sargp, struct nfs4_ntov_table *ntovp,
4586 		nfs4_attr_cmd_t cmd)
4587 {
4588 	int error = 0;
4589 	int i;
4590 	char *attrs = fattrp->attrlist4;
4591 	uint32_t attrslen = fattrp->attrlist4_len;
4592 	XDR xdr;
4593 	nfsstat4 status = NFS4_OK;
4594 	vnode_t *vp = cs->vp;
4595 	union nfs4_attr_u *na;
4596 	uint8_t *amap;
4597 
4598 #ifndef lint
4599 	/*
4600 	 * Make sure that maximum attribute number can be expressed as an
4601 	 * 8 bit quantity.
4602 	 */
4603 	ASSERT(NFS4_MAXNUM_ATTRS <= (UINT8_MAX + 1));
4604 #endif
4605 
4606 	if (vp == NULL) {
4607 		if (resp)
4608 			*resp = 0;
4609 		return (NFS4ERR_NOFILEHANDLE);
4610 	}
4611 	if (cs->access == CS_ACCESS_DENIED) {
4612 		if (resp)
4613 			*resp = 0;
4614 		return (NFS4ERR_ACCESS);
4615 	}
4616 
4617 	sargp->op = cmd;
4618 	sargp->cs = cs;
4619 	sargp->flag = 0;	/* may be set later */
4620 	sargp->vap->va_mask = 0;
4621 	sargp->rdattr_error = NFS4_OK;
4622 	sargp->rdattr_error_req = FALSE;
4623 	/* sargp->sbp is set by the caller */
4624 
4625 	xdrmem_create(&xdr, attrs, attrslen, XDR_DECODE);
4626 
4627 	na = ntovp->na;
4628 	amap = ntovp->amap;
4629 
4630 	/*
4631 	 * The following loop iterates on the nfs4_ntov_map checking
4632 	 * if the fbit is set in the requested bitmap.
4633 	 * If set then we process the arguments using the
4634 	 * rfs4_fattr4 conversion functions to populate the setattr
4635 	 * vattr and va_mask. Any settable attrs that are not using vattr
4636 	 * will be set in this loop.
4637 	 */
4638 	for (i = 0; i < nfs4_ntov_map_size; i++) {
4639 		if (!(fattrp->attrmask & nfs4_ntov_map[i].fbit)) {
4640 			continue;
4641 		}
4642 		/*
4643 		 * If setattr, must be a writable attr.
4644 		 * If verify/nverify, must be a readable attr.
4645 		 */
4646 		if ((error = (*nfs4_ntov_map[i].sv_getit)(
4647 				    NFS4ATTR_SUPPORTED, sargp, NULL)) != 0) {
4648 			/*
4649 			 * Client tries to set/verify an
4650 			 * unsupported attribute, tries to set
4651 			 * a read only attr or verify a write
4652 			 * only one - error!
4653 			 */
4654 			break;
4655 		}
4656 		/*
4657 		 * Decode the attribute to set/verify
4658 		 */
4659 		error = decode_fattr4_attr(cmd, sargp, nfs4_ntov_map[i].nval,
4660 					&xdr, resp ? resp : NULL, na);
4661 		if (error)
4662 			break;
4663 		*amap++ = (uint8_t)nfs4_ntov_map[i].nval;
4664 		na++;
4665 		(ntovp->attrcnt)++;
4666 		if (nfs4_ntov_map[i].vfsstat)
4667 			ntovp->vfsstat = TRUE;
4668 	}
4669 
4670 	if (error != 0)
4671 		status = (error == ENOTSUP ?	NFS4ERR_ATTRNOTSUPP :
4672 						puterrno4(error));
4673 	/* xdrmem_destroy(&xdrs); */	/* NO-OP */
4674 	return (status);
4675 }
4676 
4677 static nfsstat4
4678 do_rfs4_op_setattr(bitmap4 *resp, fattr4 *fattrp, struct compound_state *cs,
4679 		stateid4 *stateid)
4680 {
4681 	int error = 0;
4682 	struct nfs4_svgetit_arg sarg;
4683 	bool_t trunc;
4684 
4685 	nfsstat4 status = NFS4_OK;
4686 	cred_t *cr = cs->cr;
4687 	vnode_t *vp = cs->vp;
4688 	struct nfs4_ntov_table ntov;
4689 	struct statvfs64 sb;
4690 	struct vattr bva;
4691 	struct flock64 bf;
4692 	int in_crit = 0;
4693 	uint_t saved_mask = 0;
4694 	caller_context_t ct;
4695 
4696 	*resp = 0;
4697 	sarg.sbp = &sb;
4698 	nfs4_ntov_table_init(&ntov);
4699 	status = do_rfs4_set_attrs(resp, fattrp, cs, &sarg, &ntov,
4700 			NFS4ATTR_SETIT);
4701 	if (status != NFS4_OK) {
4702 		/*
4703 		 * failed set attrs
4704 		 */
4705 		goto done;
4706 	}
4707 	if ((sarg.vap->va_mask == 0) &&
4708 	    (! (fattrp->attrmask & FATTR4_ACL_MASK))) {
4709 		/*
4710 		 * no further work to be done
4711 		 */
4712 		goto done;
4713 	}
4714 
4715 	/*
4716 	 * If we got a request to set the ACL and the MODE, only
4717 	 * allow changing VSUID, VSGID, and VSVTX.  Attempting
4718 	 * to change any other bits, along with setting an ACL,
4719 	 * gives NFS4ERR_INVAL.
4720 	 */
4721 	if ((fattrp->attrmask & FATTR4_ACL_MASK) &&
4722 	    (fattrp->attrmask & FATTR4_MODE_MASK)) {
4723 		vattr_t va;
4724 
4725 		va.va_mask = AT_MODE;
4726 		error = VOP_GETATTR(vp, &va, 0, cs->cr);
4727 		if (error) {
4728 			status = puterrno4(error);
4729 			goto done;
4730 		}
4731 		if ((sarg.vap->va_mode ^ va.va_mode) &
4732 		    ~(VSUID | VSGID | VSVTX)) {
4733 			status = NFS4ERR_INVAL;
4734 			goto done;
4735 		}
4736 	}
4737 
4738 	/* Check stateid only if size has been set */
4739 	if (sarg.vap->va_mask & AT_SIZE) {
4740 		trunc = (sarg.vap->va_size == 0);
4741 		status = rfs4_check_stateid(FWRITE, cs->vp, stateid,
4742 			trunc, &cs->deleg, sarg.vap->va_mask & AT_SIZE);
4743 		if (status != NFS4_OK)
4744 			goto done;
4745 	}
4746 
4747 	ct.cc_sysid = 0;
4748 	ct.cc_pid = 0;
4749 	ct.cc_caller_id = nfs4_srv_caller_id;
4750 
4751 	/* XXX start of possible race with delegations */
4752 
4753 	/*
4754 	 * We need to specially handle size changes because it is
4755 	 * possible for the client to create a file with read-only
4756 	 * modes, but with the file opened for writing. If the client
4757 	 * then tries to set the file size, e.g. ftruncate(3C),
4758 	 * fcntl(F_FREESP), the normal access checking done in
4759 	 * VOP_SETATTR would prevent the client from doing it even though
4760 	 * it should be allowed to do so.  To get around this, we do the
4761 	 * access checking for ourselves and use VOP_SPACE which doesn't
4762 	 * do the access checking.
4763 	 * Also the client should not be allowed to change the file
4764 	 * size if there is a conflicting non-blocking mandatory lock in
4765 	 * the region of the change.
4766 	 */
4767 	if (vp->v_type == VREG && (sarg.vap->va_mask & AT_SIZE)) {
4768 		u_offset_t offset;
4769 		ssize_t length;
4770 
4771 		/*
4772 		 * ufs_setattr clears AT_SIZE from vap->va_mask, but
4773 		 * before returning, sarg.vap->va_mask is used to
4774 		 * generate the setattr reply bitmap.  We also clear
4775 		 * AT_SIZE below before calling VOP_SPACE.  For both
4776 		 * of these cases, the va_mask needs to be saved here
4777 		 * and restored after calling VOP_SETATTR.
4778 		 */
4779 		saved_mask = sarg.vap->va_mask;
4780 
4781 		/*
4782 		 * Check any possible conflict due to NBMAND locks.
4783 		 * Get into critical region before VOP_GETATTR, so the
4784 		 * size attribute is valid when checking conflicts.
4785 		 */
4786 		if (nbl_need_check(vp)) {
4787 			nbl_start_crit(vp, RW_READER);
4788 			in_crit = 1;
4789 		}
4790 
4791 		bva.va_mask = AT_UID|AT_SIZE;
4792 		if (error = VOP_GETATTR(vp, &bva, 0, cr)) {
4793 			status = puterrno4(error);
4794 			goto done;
4795 		}
4796 
4797 		if (in_crit) {
4798 			if (sarg.vap->va_size < bva.va_size) {
4799 				offset = sarg.vap->va_size;
4800 				length = bva.va_size - sarg.vap->va_size;
4801 			} else {
4802 				offset = bva.va_size;
4803 				length = sarg.vap->va_size - bva.va_size;
4804 			}
4805 			if (nbl_conflict(vp, NBL_WRITE, offset, length, 0)) {
4806 				status = NFS4ERR_LOCKED;
4807 				goto done;
4808 			}
4809 		}
4810 
4811 		if (crgetuid(cr) == bva.va_uid) {
4812 			sarg.vap->va_mask &= ~AT_SIZE;
4813 			bf.l_type = F_WRLCK;
4814 			bf.l_whence = 0;
4815 			bf.l_start = (off64_t)sarg.vap->va_size;
4816 			bf.l_len = 0;
4817 			bf.l_sysid = 0;
4818 			bf.l_pid = 0;
4819 			error = VOP_SPACE(vp, F_FREESP, &bf, FWRITE,
4820 					(offset_t)sarg.vap->va_size, cr, &ct);
4821 		}
4822 	}
4823 
4824 	if (!error && sarg.vap->va_mask != 0)
4825 		error = VOP_SETATTR(vp, sarg.vap, sarg.flag, cr, &ct);
4826 
4827 	/* restore va_mask -- ufs_setattr clears AT_SIZE */
4828 	if (saved_mask & AT_SIZE)
4829 		sarg.vap->va_mask |= AT_SIZE;
4830 
4831 	/*
4832 	 * If an ACL was being set, it has been delayed until now,
4833 	 * in order to set the mode (via the VOP_SETATTR() above) first.
4834 	 */
4835 	if ((! error) && (fattrp->attrmask & FATTR4_ACL_MASK)) {
4836 		int i;
4837 
4838 		for (i = 0; i < NFS4_MAXNUM_ATTRS; i++)
4839 			if (ntov.amap[i] == FATTR4_ACL)
4840 				break;
4841 		if (i < NFS4_MAXNUM_ATTRS) {
4842 			error = (*nfs4_ntov_map[FATTR4_ACL].sv_getit)(
4843 			    NFS4ATTR_SETIT, &sarg, &ntov.na[i]);
4844 			if (error == 0) {
4845 				*resp |= FATTR4_ACL_MASK;
4846 			} else if (error == ENOTSUP) {
4847 				(void) rfs4_verify_attr(&sarg, resp, &ntov);
4848 				status = NFS4ERR_ATTRNOTSUPP;
4849 				goto done;
4850 			}
4851 		} else {
4852 			NFS4_DEBUG(rfs4_debug,
4853 			    (CE_NOTE, "do_rfs4_op_setattr: "
4854 			    "unable to find ACL in fattr4"));
4855 			error = EINVAL;
4856 		}
4857 	}
4858 
4859 	if (error) {
4860 		status = puterrno4(error);
4861 
4862 		/*
4863 		 * Set the response bitmap when setattr failed.
4864 		 * If VOP_SETATTR partially succeeded, test by doing a
4865 		 * VOP_GETATTR on the object and comparing the data
4866 		 * to the setattr arguments.
4867 		 */
4868 		(void) rfs4_verify_attr(&sarg, resp, &ntov);
4869 	} else {
4870 		/*
4871 		 * Force modified metadata out to stable storage.
4872 		 */
4873 		(void) VOP_FSYNC(vp, FNODSYNC, cr);
4874 		/*
4875 		 * Set response bitmap
4876 		 */
4877 		nfs4_vmask_to_nmask_set(sarg.vap->va_mask, resp);
4878 	}
4879 
4880 /* Return early and already have a NFSv4 error */
4881 done:
4882 	/*
4883 	 * Except for nfs4_vmask_to_nmask_set(), vattr --> fattr
4884 	 * conversion sets both readable and writeable NFS4 attrs
4885 	 * for AT_MTIME and AT_ATIME.  The line below masks out
4886 	 * unrequested attrs from the setattr result bitmap.  This
4887 	 * is placed after the done: label to catch the ATTRNOTSUP
4888 	 * case.
4889 	 */
4890 	*resp &= fattrp->attrmask;
4891 
4892 	if (in_crit)
4893 		nbl_end_crit(vp);
4894 
4895 	nfs4_ntov_table_free(&ntov, &sarg);
4896 
4897 	return (status);
4898 }
4899 
4900 /* ARGSUSED */
4901 static void
4902 rfs4_op_setattr(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
4903 	struct compound_state *cs)
4904 {
4905 	SETATTR4args *args = &argop->nfs_argop4_u.opsetattr;
4906 	SETATTR4res *resp = &resop->nfs_resop4_u.opsetattr;
4907 	bslabel_t *clabel;
4908 
4909 	if (cs->vp == NULL) {
4910 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
4911 		return;
4912 	}
4913 
4914 	/*
4915 	 * If there is an unshared filesystem mounted on this vnode,
4916 	 * do not allow to setattr on this vnode.
4917 	 */
4918 	if (vn_ismntpt(cs->vp)) {
4919 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
4920 		return;
4921 	}
4922 
4923 	resp->attrsset = 0;
4924 
4925 	if (rdonly4(cs->exi, cs->vp, req)) {
4926 		*cs->statusp = resp->status = NFS4ERR_ROFS;
4927 		return;
4928 	}
4929 
4930 	/* check label before setting attributes */
4931 	if (is_system_labeled()) {
4932 		ASSERT(req->rq_label != NULL);
4933 		clabel = req->rq_label;
4934 		DTRACE_PROBE2(tx__rfs4__log__info__opsetattr__clabel, char *,
4935 		    "got client label from request(1)",
4936 		    struct svc_req *, req);
4937 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
4938 			if (!do_rfs4_label_check(clabel, cs->vp,
4939 			    EQUALITY_CHECK)) {
4940 				*cs->statusp = resp->status = NFS4ERR_ACCESS;
4941 				return;
4942 			}
4943 		}
4944 	}
4945 
4946 	*cs->statusp = resp->status =
4947 		do_rfs4_op_setattr(&resp->attrsset, &args->obj_attributes, cs,
4948 			&args->stateid);
4949 }
4950 
4951 /* ARGSUSED */
4952 static void
4953 rfs4_op_verify(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
4954 	struct compound_state *cs)
4955 {
4956 	/*
4957 	 * verify and nverify are exactly the same, except that nverify
4958 	 * succeeds when some argument changed, and verify succeeds when
4959 	 * when none changed.
4960 	 */
4961 
4962 	VERIFY4args  *args = &argop->nfs_argop4_u.opverify;
4963 	VERIFY4res *resp = &resop->nfs_resop4_u.opverify;
4964 
4965 	int error;
4966 	struct nfs4_svgetit_arg sarg;
4967 	struct statvfs64 sb;
4968 	struct nfs4_ntov_table ntov;
4969 
4970 	if (cs->vp == NULL) {
4971 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
4972 		return;
4973 	}
4974 
4975 	sarg.sbp = &sb;
4976 	nfs4_ntov_table_init(&ntov);
4977 	resp->status = do_rfs4_set_attrs(NULL, &args->obj_attributes, cs,
4978 				&sarg, &ntov, NFS4ATTR_VERIT);
4979 	if (resp->status != NFS4_OK) {
4980 		/*
4981 		 * do_rfs4_set_attrs will try to verify systemwide attrs,
4982 		 * so could return -1 for "no match".
4983 		 */
4984 		if (resp->status == -1)
4985 			resp->status = NFS4ERR_NOT_SAME;
4986 		goto done;
4987 	}
4988 	error = rfs4_verify_attr(&sarg, NULL, &ntov);
4989 	switch (error) {
4990 	case 0:
4991 		resp->status = NFS4_OK;
4992 		break;
4993 	case -1:
4994 		resp->status = NFS4ERR_NOT_SAME;
4995 		break;
4996 	default:
4997 		resp->status = puterrno4(error);
4998 		break;
4999 	}
5000 done:
5001 	*cs->statusp = resp->status;
5002 	nfs4_ntov_table_free(&ntov, &sarg);
5003 }
5004 
5005 /* ARGSUSED */
5006 static void
5007 rfs4_op_nverify(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
5008 	struct compound_state *cs)
5009 {
5010 	/*
5011 	 * verify and nverify are exactly the same, except that nverify
5012 	 * succeeds when some argument changed, and verify succeeds when
5013 	 * when none changed.
5014 	 */
5015 
5016 	NVERIFY4args  *args = &argop->nfs_argop4_u.opnverify;
5017 	NVERIFY4res *resp = &resop->nfs_resop4_u.opnverify;
5018 
5019 	int error;
5020 	struct nfs4_svgetit_arg sarg;
5021 	struct statvfs64 sb;
5022 	struct nfs4_ntov_table ntov;
5023 
5024 	if (cs->vp == NULL) {
5025 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
5026 		return;
5027 	}
5028 	sarg.sbp = &sb;
5029 	nfs4_ntov_table_init(&ntov);
5030 	resp->status = do_rfs4_set_attrs(NULL, &args->obj_attributes, cs,
5031 				&sarg, &ntov, NFS4ATTR_VERIT);
5032 	if (resp->status != NFS4_OK) {
5033 		/*
5034 		 * do_rfs4_set_attrs will try to verify systemwide attrs,
5035 		 * so could return -1 for "no match".
5036 		 */
5037 		if (resp->status == -1)
5038 			resp->status = NFS4_OK;
5039 		goto done;
5040 	}
5041 	error = rfs4_verify_attr(&sarg, NULL, &ntov);
5042 	switch (error) {
5043 	case 0:
5044 		resp->status = NFS4ERR_SAME;
5045 		break;
5046 	case -1:
5047 		resp->status = NFS4_OK;
5048 		break;
5049 	default:
5050 		resp->status = puterrno4(error);
5051 		break;
5052 	}
5053 done:
5054 	*cs->statusp = resp->status;
5055 	nfs4_ntov_table_free(&ntov, &sarg);
5056 }
5057 
5058 /*
5059  * XXX - This should live in an NFS header file.
5060  */
5061 #define	MAX_IOVECS	12
5062 
5063 /* ARGSUSED */
5064 static void
5065 rfs4_op_write(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
5066 	struct compound_state *cs)
5067 {
5068 	WRITE4args  *args = &argop->nfs_argop4_u.opwrite;
5069 	WRITE4res *resp = &resop->nfs_resop4_u.opwrite;
5070 	int error;
5071 	vnode_t *vp;
5072 	struct vattr bva;
5073 	u_offset_t rlimit;
5074 	struct uio uio;
5075 	struct iovec iov[MAX_IOVECS];
5076 	struct iovec *iovp;
5077 	int iovcnt;
5078 	int ioflag;
5079 	cred_t *savecred, *cr;
5080 	bool_t *deleg = &cs->deleg;
5081 	nfsstat4 stat;
5082 	int in_crit = 0;
5083 
5084 	vp = cs->vp;
5085 	if (vp == NULL) {
5086 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
5087 		return;
5088 	}
5089 	if (cs->access == CS_ACCESS_DENIED) {
5090 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
5091 		return;
5092 	}
5093 
5094 	cr = cs->cr;
5095 
5096 	/*
5097 	 * We have to enter the critical region before calling VOP_RWLOCK
5098 	 * to avoid a deadlock with ufs.
5099 	 */
5100 	if (nbl_need_check(vp)) {
5101 		nbl_start_crit(vp, RW_READER);
5102 		in_crit = 1;
5103 		if (nbl_conflict(vp, NBL_WRITE,
5104 				args->offset, args->data_len, 0)) {
5105 			*cs->statusp = resp->status = NFS4ERR_LOCKED;
5106 			goto out;
5107 		}
5108 	}
5109 
5110 	if ((stat = rfs4_check_stateid(FWRITE, vp, &args->stateid, FALSE,
5111 					deleg, TRUE)) != NFS4_OK) {
5112 		*cs->statusp = resp->status = stat;
5113 		goto out;
5114 	}
5115 
5116 	bva.va_mask = AT_MODE | AT_UID;
5117 	error = VOP_GETATTR(vp, &bva, 0, cr);
5118 
5119 	/*
5120 	 * If we can't get the attributes, then we can't do the
5121 	 * right access checking.  So, we'll fail the request.
5122 	 */
5123 	if (error) {
5124 		*cs->statusp = resp->status = puterrno4(error);
5125 		goto out;
5126 	}
5127 
5128 	if (rdonly4(cs->exi, cs->vp, req)) {
5129 		*cs->statusp = resp->status = NFS4ERR_ROFS;
5130 		goto out;
5131 	}
5132 
5133 	if (vp->v_type != VREG) {
5134 		*cs->statusp = resp->status =
5135 			((vp->v_type == VDIR) ? NFS4ERR_ISDIR : NFS4ERR_INVAL);
5136 		goto out;
5137 	}
5138 
5139 	if (crgetuid(cr) != bva.va_uid &&
5140 	    (error = VOP_ACCESS(vp, VWRITE, 0, cr))) {
5141 		*cs->statusp = resp->status = puterrno4(error);
5142 		goto out;
5143 	}
5144 
5145 	if (MANDLOCK(vp, bva.va_mode)) {
5146 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
5147 		goto out;
5148 	}
5149 
5150 	if (args->data_len == 0) {
5151 		*cs->statusp = resp->status = NFS4_OK;
5152 		resp->count = 0;
5153 		resp->committed = args->stable;
5154 		resp->writeverf = Write4verf;
5155 		goto out;
5156 	}
5157 
5158 	if (args->mblk != NULL) {
5159 		mblk_t *m;
5160 		uint_t bytes, round_len;
5161 
5162 		iovcnt = 0;
5163 		bytes = 0;
5164 		round_len = roundup(args->data_len, BYTES_PER_XDR_UNIT);
5165 		for (m = args->mblk;
5166 		    m != NULL && bytes < round_len;
5167 		    m = m->b_cont) {
5168 			iovcnt++;
5169 			bytes += MBLKL(m);
5170 		}
5171 #ifdef DEBUG
5172 		/* should have ended on an mblk boundary */
5173 		if (bytes != round_len) {
5174 			printf("bytes=0x%x, round_len=0x%x, req len=0x%x\n",
5175 			    bytes, round_len, args->data_len);
5176 			printf("args=%p, args->mblk=%p, m=%p", (void *)args,
5177 			    (void *)args->mblk, (void *)m);
5178 			ASSERT(bytes == round_len);
5179 		}
5180 #endif
5181 		if (iovcnt <= MAX_IOVECS) {
5182 			iovp = iov;
5183 		} else {
5184 			iovp = kmem_alloc(sizeof (*iovp) * iovcnt, KM_SLEEP);
5185 		}
5186 		mblk_to_iov(args->mblk, iovcnt, iovp);
5187 	} else {
5188 		iovcnt = 1;
5189 		iovp = iov;
5190 		iovp->iov_base = args->data_val;
5191 		iovp->iov_len = args->data_len;
5192 	}
5193 
5194 	uio.uio_iov = iovp;
5195 	uio.uio_iovcnt = iovcnt;
5196 
5197 	uio.uio_segflg = UIO_SYSSPACE;
5198 	uio.uio_extflg = UIO_COPY_DEFAULT;
5199 	uio.uio_loffset = args->offset;
5200 	uio.uio_resid = args->data_len;
5201 	uio.uio_llimit = curproc->p_fsz_ctl;
5202 	rlimit = uio.uio_llimit - args->offset;
5203 	if (rlimit < (u_offset_t)uio.uio_resid)
5204 		uio.uio_resid = (int)rlimit;
5205 
5206 	if (args->stable == UNSTABLE4)
5207 		ioflag = 0;
5208 	else if (args->stable == FILE_SYNC4)
5209 		ioflag = FSYNC;
5210 	else if (args->stable == DATA_SYNC4)
5211 		ioflag = FDSYNC;
5212 	else {
5213 		if (iovp != iov)
5214 			kmem_free(iovp, sizeof (*iovp) * iovcnt);
5215 		*cs->statusp = resp->status = NFS4ERR_INVAL;
5216 		goto out;
5217 	}
5218 
5219 	/*
5220 	 * We're changing creds because VM may fault and we need
5221 	 * the cred of the current thread to be used if quota
5222 	 * checking is enabled.
5223 	 */
5224 	savecred = curthread->t_cred;
5225 	curthread->t_cred = cr;
5226 	error = do_io(FWRITE, vp, &uio, ioflag, cr);
5227 	curthread->t_cred = savecred;
5228 
5229 	if (iovp != iov)
5230 		kmem_free(iovp, sizeof (*iovp) * iovcnt);
5231 
5232 	if (error) {
5233 		*cs->statusp = resp->status = puterrno4(error);
5234 		goto out;
5235 	}
5236 
5237 	*cs->statusp = resp->status = NFS4_OK;
5238 	resp->count = args->data_len - uio.uio_resid;
5239 
5240 	if (ioflag == 0)
5241 		resp->committed = UNSTABLE4;
5242 	else
5243 		resp->committed = FILE_SYNC4;
5244 
5245 	resp->writeverf = Write4verf;
5246 
5247 out:
5248 	if (in_crit)
5249 		nbl_end_crit(vp);
5250 }
5251 
5252 
5253 /* XXX put in a header file */
5254 extern int	sec_svc_getcred(struct svc_req *, cred_t *,  caddr_t *, int *);
5255 
5256 void
5257 rfs4_compound(COMPOUND4args *args, COMPOUND4res *resp, struct exportinfo *exi,
5258 	struct svc_req *req, cred_t *cr)
5259 {
5260 	uint_t i;
5261 	struct compound_state cs;
5262 
5263 	rfs4_init_compound_state(&cs);
5264 	/*
5265 	 * Form a reply tag by copying over the reqeuest tag.
5266 	 */
5267 	resp->tag.utf8string_val =
5268 				kmem_alloc(args->tag.utf8string_len, KM_SLEEP);
5269 	resp->tag.utf8string_len = args->tag.utf8string_len;
5270 	bcopy(args->tag.utf8string_val, resp->tag.utf8string_val,
5271 					resp->tag.utf8string_len);
5272 
5273 	cs.statusp = &resp->status;
5274 
5275 	/*
5276 	 * XXX for now, minorversion should be zero
5277 	 */
5278 	if (args->minorversion != NFS4_MINORVERSION) {
5279 		resp->array_len = 0;
5280 		resp->array = NULL;
5281 		resp->status = NFS4ERR_MINOR_VERS_MISMATCH;
5282 		return;
5283 	}
5284 
5285 	resp->array_len = args->array_len;
5286 	resp->array = kmem_zalloc(args->array_len * sizeof (nfs_resop4),
5287 		KM_SLEEP);
5288 
5289 	ASSERT(exi == NULL);
5290 	ASSERT(cr == NULL);
5291 
5292 	cr = crget();
5293 	ASSERT(cr != NULL);
5294 
5295 	if (sec_svc_getcred(req, cr, &cs.principal, &cs.nfsflavor) == 0) {
5296 		crfree(cr);
5297 		return;
5298 	}
5299 
5300 	cs.basecr = cr;
5301 
5302 	cs.req = req;
5303 
5304 	/*
5305 	 * For now, NFS4 compound processing must be protected by
5306 	 * exported_lock because it can access more than one exportinfo
5307 	 * per compound and share/unshare can now change multiple
5308 	 * exinfo structs.  The NFS2/3 code only refs 1 exportinfo
5309 	 * per proc (excluding public exinfo), and exi_count design
5310 	 * is sufficient to protect concurrent execution of NFS2/3
5311 	 * ops along with unexport.  This lock will be removed as
5312 	 * part of the NFSv4 phase 2 namespace redesign work.
5313 	 */
5314 	rw_enter(&exported_lock, RW_READER);
5315 
5316 	/*
5317 	 * If this is the first compound we've seen, we need to start all
5318 	 * new instances' grace periods.
5319 	 */
5320 	if (rfs4_seen_first_compound == 0) {
5321 		rfs4_grace_start_new();
5322 		/*
5323 		 * This must be set after rfs4_grace_start_new(), otherwise
5324 		 * another thread could proceed past here before the former
5325 		 * is finished.
5326 		 */
5327 		rfs4_seen_first_compound = 1;
5328 	}
5329 
5330 	for (i = 0; i < args->array_len && cs.cont; i++) {
5331 		nfs_argop4 *argop;
5332 		nfs_resop4 *resop;
5333 		uint_t op;
5334 
5335 		argop = &args->array[i];
5336 		resop = &resp->array[i];
5337 		resop->resop = argop->argop;
5338 		op = (uint_t)resop->resop;
5339 
5340 		if (op < rfsv4disp_cnt) {
5341 			/*
5342 			 * Count the individual ops here; NULL and COMPOUND
5343 			 * are counted in common_dispatch()
5344 			 */
5345 			rfsproccnt_v4_ptr[op].value.ui64++;
5346 
5347 			NFS4_DEBUG(rfs4_debug > 1,
5348 				(CE_NOTE, "Executing %s", rfs4_op_string[op]));
5349 			(*rfsv4disptab[op].dis_proc)(argop, resop, req, &cs);
5350 			NFS4_DEBUG(rfs4_debug > 1,
5351 				(CE_NOTE, "%s returned %d",
5352 				rfs4_op_string[op], *cs.statusp));
5353 			if (*cs.statusp != NFS4_OK)
5354 				cs.cont = FALSE;
5355 		} else {
5356 			/*
5357 			 * This is effectively dead code since XDR code
5358 			 * will have already returned BADXDR if op doesn't
5359 			 * decode to legal value.  This only done for a
5360 			 * day when XDR code doesn't verify v4 opcodes.
5361 			 */
5362 			op = OP_ILLEGAL;
5363 			rfsproccnt_v4_ptr[OP_ILLEGAL_IDX].value.ui64++;
5364 
5365 			rfs4_op_illegal(argop, resop, req, &cs);
5366 			cs.cont = FALSE;
5367 		}
5368 
5369 		/*
5370 		 * If not at last op, and if we are to stop, then
5371 		 * compact the results array.
5372 		 */
5373 		if ((i + 1) < args->array_len && !cs.cont) {
5374 			nfs_resop4 *new_res = kmem_alloc(
5375 				(i+1) * sizeof (nfs_resop4), KM_SLEEP);
5376 			bcopy(resp->array,
5377 				new_res, (i+1) * sizeof (nfs_resop4));
5378 			kmem_free(resp->array,
5379 				args->array_len * sizeof (nfs_resop4));
5380 
5381 			resp->array_len =  i + 1;
5382 			resp->array = new_res;
5383 		}
5384 	}
5385 
5386 	rw_exit(&exported_lock);
5387 
5388 	if (cs.vp)
5389 		VN_RELE(cs.vp);
5390 	if (cs.saved_vp)
5391 		VN_RELE(cs.saved_vp);
5392 	if (cs.saved_fh.nfs_fh4_val)
5393 		kmem_free(cs.saved_fh.nfs_fh4_val, NFS4_FHSIZE);
5394 
5395 	if (cs.basecr)
5396 		crfree(cs.basecr);
5397 	if (cs.cr)
5398 		crfree(cs.cr);
5399 	/*
5400 	 * done with this compound request, free the label
5401 	 */
5402 
5403 	if (req->rq_label != NULL) {
5404 		kmem_free(req->rq_label, sizeof (bslabel_t));
5405 		req->rq_label = NULL;
5406 	}
5407 }
5408 
5409 /*
5410  * XXX because of what appears to be duplicate calls to rfs4_compound_free
5411  * XXX zero out the tag and array values. Need to investigate why the
5412  * XXX calls occur, but at least prevent the panic for now.
5413  */
5414 void
5415 rfs4_compound_free(COMPOUND4res *resp)
5416 {
5417 	uint_t i;
5418 
5419 	if (resp->tag.utf8string_val) {
5420 		UTF8STRING_FREE(resp->tag)
5421 	}
5422 
5423 	for (i = 0; i < resp->array_len; i++) {
5424 		nfs_resop4 *resop;
5425 		uint_t op;
5426 
5427 		resop = &resp->array[i];
5428 		op = (uint_t)resop->resop;
5429 		if (op < rfsv4disp_cnt) {
5430 			(*rfsv4disptab[op].dis_resfree)(resop);
5431 		}
5432 	}
5433 	if (resp->array != NULL) {
5434 		kmem_free(resp->array, resp->array_len * sizeof (nfs_resop4));
5435 	}
5436 }
5437 
5438 /*
5439  * Process the value of the compound request rpc flags, as a bit-AND
5440  * of the individual per-op flags (idempotent, allowork, publicfh_ok)
5441  */
5442 void
5443 rfs4_compound_flagproc(COMPOUND4args *args, int *flagp)
5444 {
5445 	int i;
5446 	int flag = RPC_ALL;
5447 
5448 	for (i = 0; flag && i < args->array_len; i++) {
5449 		uint_t op;
5450 
5451 		op = (uint_t)args->array[i].argop;
5452 
5453 		if (op < rfsv4disp_cnt)
5454 			flag &= rfsv4disptab[op].dis_flags;
5455 		else
5456 			flag = 0;
5457 	}
5458 	*flagp = flag;
5459 }
5460 
5461 nfsstat4
5462 rfs4_client_sysid(rfs4_client_t *cp, sysid_t *sp)
5463 {
5464 	nfsstat4 e;
5465 
5466 	rfs4_dbe_lock(cp->dbe);
5467 
5468 	if (cp->sysidt != LM_NOSYSID) {
5469 		*sp = cp->sysidt;
5470 		e = NFS4_OK;
5471 
5472 	} else if ((cp->sysidt = lm_alloc_sysidt()) != LM_NOSYSID) {
5473 		*sp = cp->sysidt;
5474 		e = NFS4_OK;
5475 
5476 		NFS4_DEBUG(rfs4_debug, (CE_NOTE,
5477 			"rfs4_client_sysid: allocated 0x%x\n", *sp));
5478 	} else
5479 		e = NFS4ERR_DELAY;
5480 
5481 	rfs4_dbe_unlock(cp->dbe);
5482 	return (e);
5483 }
5484 
5485 #if defined(DEBUG) && ! defined(lint)
5486 static void lock_print(char *str, int operation, struct flock64 *flk)
5487 {
5488 	char *op, *type;
5489 
5490 	switch (operation) {
5491 	case F_GETLK: op = "F_GETLK";
5492 		break;
5493 	case F_SETLK: op = "F_SETLK";
5494 		break;
5495 	default: op = "F_UNKNOWN";
5496 		break;
5497 	}
5498 	switch (flk->l_type) {
5499 	case F_UNLCK: type = "F_UNLCK";
5500 		break;
5501 	case F_RDLCK: type = "F_RDLCK";
5502 		break;
5503 	case F_WRLCK: type = "F_WRLCK";
5504 		break;
5505 	default: type = "F_UNKNOWN";
5506 		break;
5507 	}
5508 
5509 	ASSERT(flk->l_whence == 0);
5510 	cmn_err(CE_NOTE, "%s:  %s, type = %s, off = %llx len = %llx pid = %d",
5511 		str, op, type,
5512 		(longlong_t)flk->l_start,
5513 		flk->l_len ? (longlong_t)flk->l_len : ~0LL,
5514 		flk->l_pid);
5515 }
5516 
5517 #define	LOCK_PRINT(d, s, t, f) if (d) lock_print(s, t, f)
5518 #else
5519 #define	LOCK_PRINT(d, s, t, f)
5520 #endif
5521 
5522 /*ARGSUSED*/
5523 static bool_t
5524 creds_ok(cred_set_t cr_set, struct svc_req *req, struct compound_state *cs)
5525 {
5526 	return (TRUE);
5527 }
5528 
5529 /*
5530  * Look up the pathname using the vp in cs as the directory vnode.
5531  * cs->vp will be the vnode for the file on success
5532  */
5533 
5534 static nfsstat4
5535 rfs4_lookup(component4 *component, struct svc_req *req,
5536 	    struct compound_state *cs)
5537 {
5538 	char *nm;
5539 	uint32_t len;
5540 	nfsstat4 status;
5541 
5542 	if (cs->vp == NULL) {
5543 		return (NFS4ERR_NOFILEHANDLE);
5544 	}
5545 	if (cs->vp->v_type != VDIR) {
5546 		return (NFS4ERR_NOTDIR);
5547 	}
5548 
5549 	if (!utf8_dir_verify(component))
5550 		return (NFS4ERR_INVAL);
5551 
5552 	nm = utf8_to_fn(component, &len, NULL);
5553 	if (nm == NULL) {
5554 		return (NFS4ERR_INVAL);
5555 	}
5556 
5557 	if (len > MAXNAMELEN) {
5558 		kmem_free(nm, len);
5559 		return (NFS4ERR_NAMETOOLONG);
5560 	}
5561 
5562 	status = do_rfs4_op_lookup(nm, len, req, cs);
5563 
5564 	kmem_free(nm, len);
5565 
5566 	return (status);
5567 }
5568 
5569 static nfsstat4
5570 rfs4_lookupfile(component4 *component, struct svc_req *req,
5571 		struct compound_state *cs, uint32_t access,
5572 		change_info4 *cinfo)
5573 {
5574 	nfsstat4 status;
5575 	vnode_t *dvp = cs->vp;
5576 	vattr_t bva, ava, fva;
5577 	int error;
5578 
5579 	/* Get "before" change value */
5580 	bva.va_mask = AT_CTIME|AT_SEQ;
5581 	error = VOP_GETATTR(dvp, &bva, 0, cs->cr);
5582 	if (error)
5583 		return (puterrno4(error));
5584 
5585 	/* rfs4_lookup may VN_RELE directory */
5586 	VN_HOLD(dvp);
5587 
5588 	status = rfs4_lookup(component, req, cs);
5589 	if (status != NFS4_OK) {
5590 		VN_RELE(dvp);
5591 		return (status);
5592 	}
5593 
5594 	/*
5595 	 * Get "after" change value, if it fails, simply return the
5596 	 * before value.
5597 	 */
5598 	ava.va_mask = AT_CTIME|AT_SEQ;
5599 	if (VOP_GETATTR(dvp, &ava, 0, cs->cr)) {
5600 		ava.va_ctime = bva.va_ctime;
5601 		ava.va_seq = 0;
5602 	}
5603 	VN_RELE(dvp);
5604 
5605 	/*
5606 	 * Validate the file is a file
5607 	 */
5608 	fva.va_mask = AT_TYPE|AT_MODE;
5609 	error = VOP_GETATTR(cs->vp, &fva, 0, cs->cr);
5610 	if (error)
5611 		return (puterrno4(error));
5612 
5613 	if (fva.va_type != VREG) {
5614 		if (fva.va_type == VDIR)
5615 			return (NFS4ERR_ISDIR);
5616 		if (fva.va_type == VLNK)
5617 			return (NFS4ERR_SYMLINK);
5618 		return (NFS4ERR_INVAL);
5619 	}
5620 
5621 	NFS4_SET_FATTR4_CHANGE(cinfo->before, bva.va_ctime);
5622 	NFS4_SET_FATTR4_CHANGE(cinfo->after, ava.va_ctime);
5623 
5624 	/*
5625 	 * It is undefined if VOP_LOOKUP will change va_seq, so
5626 	 * cinfo.atomic = TRUE only if we have
5627 	 * non-zero va_seq's, and they have not changed.
5628 	 */
5629 	if (bva.va_seq && ava.va_seq && ava.va_seq == bva.va_seq)
5630 		cinfo->atomic = TRUE;
5631 	else
5632 		cinfo->atomic = FALSE;
5633 
5634 	/* Check for mandatory locking */
5635 	cs->mandlock = MANDLOCK(cs->vp, fva.va_mode);
5636 	return (check_open_access(access, cs, req));
5637 }
5638 
5639 static nfsstat4
5640 create_vnode(vnode_t *dvp, char *nm,  vattr_t *vap, createmode4 mode,
5641 	    timespec32_t *mtime, cred_t *cr, vnode_t **vpp, bool_t *created)
5642 {
5643 	int error;
5644 	nfsstat4 status = NFS4_OK;
5645 	vattr_t va;
5646 
5647 tryagain:
5648 
5649 	/*
5650 	 * The file open mode used is VWRITE.  If the client needs
5651 	 * some other semantic, then it should do the access checking
5652 	 * itself.  It would have been nice to have the file open mode
5653 	 * passed as part of the arguments.
5654 	 */
5655 
5656 	*created = TRUE;
5657 	error = VOP_CREATE(dvp, nm, vap, EXCL, VWRITE, vpp, cr, 0);
5658 
5659 	if (error) {
5660 		*created = FALSE;
5661 
5662 		/*
5663 		 * If we got something other than file already exists
5664 		 * then just return this error.  Otherwise, we got
5665 		 * EEXIST.  If we were doing a GUARDED create, then
5666 		 * just return this error.  Otherwise, we need to
5667 		 * make sure that this wasn't a duplicate of an
5668 		 * exclusive create request.
5669 		 *
5670 		 * The assumption is made that a non-exclusive create
5671 		 * request will never return EEXIST.
5672 		 */
5673 
5674 		if (error != EEXIST || mode == GUARDED4) {
5675 			status = puterrno4(error);
5676 			return (status);
5677 		}
5678 		error = VOP_LOOKUP(dvp, nm, vpp, NULL, 0, NULL, cr);
5679 
5680 		if (error) {
5681 			/*
5682 			 * We couldn't find the file that we thought that
5683 			 * we just created.  So, we'll just try creating
5684 			 * it again.
5685 			 */
5686 			if (error == ENOENT)
5687 				goto tryagain;
5688 
5689 			status = puterrno4(error);
5690 			return (status);
5691 		}
5692 
5693 		if (mode == UNCHECKED4) {
5694 			/* existing object must be regular file */
5695 			if ((*vpp)->v_type != VREG) {
5696 				if ((*vpp)->v_type == VDIR)
5697 					status = NFS4ERR_ISDIR;
5698 				else if ((*vpp)->v_type == VLNK)
5699 					status = NFS4ERR_SYMLINK;
5700 				else
5701 					status = NFS4ERR_INVAL;
5702 				VN_RELE(*vpp);
5703 				return (status);
5704 			}
5705 
5706 			return (NFS4_OK);
5707 		}
5708 
5709 		/* Check for duplicate request */
5710 		ASSERT(mtime != 0);
5711 		va.va_mask = AT_MTIME;
5712 		error = VOP_GETATTR(*vpp, &va, 0, cr);
5713 		if (!error) {
5714 			/* We found the file */
5715 			if (va.va_mtime.tv_sec != mtime->tv_sec ||
5716 			    va.va_mtime.tv_nsec != mtime->tv_nsec) {
5717 				/* but its not our creation */
5718 				VN_RELE(*vpp);
5719 				return (NFS4ERR_EXIST);
5720 			}
5721 			*created = TRUE; /* retrans of create == created */
5722 			return (NFS4_OK);
5723 		}
5724 		VN_RELE(*vpp);
5725 		return (NFS4ERR_EXIST);
5726 	}
5727 
5728 	return (NFS4_OK);
5729 }
5730 
5731 static nfsstat4
5732 check_open_access(uint32_t access,
5733 		struct compound_state *cs, struct svc_req *req)
5734 {
5735 	int error;
5736 	vnode_t *vp;
5737 	bool_t readonly;
5738 	cred_t *cr = cs->cr;
5739 
5740 	/* For now we don't allow mandatory locking as per V2/V3 */
5741 	if (cs->access == CS_ACCESS_DENIED || cs->mandlock) {
5742 		return (NFS4ERR_ACCESS);
5743 	}
5744 
5745 	vp = cs->vp;
5746 	ASSERT(cr != NULL && vp->v_type == VREG);
5747 
5748 	/*
5749 	 * If the file system is exported read only and we are trying
5750 	 * to open for write, then return NFS4ERR_ROFS
5751 	 */
5752 
5753 	readonly = rdonly4(cs->exi, cs->vp, req);
5754 
5755 	if ((access & OPEN4_SHARE_ACCESS_WRITE) && readonly)
5756 		return (NFS4ERR_ROFS);
5757 
5758 	if (access & OPEN4_SHARE_ACCESS_READ) {
5759 		if ((VOP_ACCESS(vp, VREAD, 0, cr) != 0) &&
5760 		    (VOP_ACCESS(vp, VEXEC, 0, cr) != 0)) {
5761 			return (NFS4ERR_ACCESS);
5762 		}
5763 	}
5764 
5765 	if (access & OPEN4_SHARE_ACCESS_WRITE) {
5766 		error = VOP_ACCESS(vp, VWRITE, 0, cr);
5767 		if (error)
5768 			return (NFS4ERR_ACCESS);
5769 	}
5770 
5771 	return (NFS4_OK);
5772 }
5773 
5774 static nfsstat4
5775 rfs4_createfile(OPEN4args *args, struct svc_req *req, struct compound_state *cs,
5776 		change_info4 *cinfo, bitmap4 *attrset, clientid4 clientid)
5777 {
5778 	struct nfs4_svgetit_arg sarg;
5779 	struct nfs4_ntov_table ntov;
5780 
5781 	bool_t ntov_table_init = FALSE;
5782 	struct statvfs64 sb;
5783 	nfsstat4 status;
5784 	vnode_t *vp;
5785 	vattr_t bva, ava, iva, cva, *vap;
5786 	vnode_t *dvp;
5787 	timespec32_t *mtime;
5788 	char *nm = NULL;
5789 	uint_t buflen;
5790 	bool_t created;
5791 	bool_t setsize = FALSE;
5792 	len_t reqsize;
5793 	int error;
5794 	bool_t trunc;
5795 	caller_context_t ct;
5796 	component4 *component;
5797 	bslabel_t *clabel;
5798 
5799 	sarg.sbp = &sb;
5800 
5801 	dvp = cs->vp;
5802 
5803 	/* Check if the file system is read only */
5804 	if (rdonly4(cs->exi, dvp, req))
5805 		return (NFS4ERR_ROFS);
5806 
5807 	/* check the label of including directory */
5808 	if (is_system_labeled()) {
5809 		ASSERT(req->rq_label != NULL);
5810 		clabel = req->rq_label;
5811 		DTRACE_PROBE2(tx__rfs4__log__info__opremove__clabel, char *,
5812 		    "got client label from request(1)",
5813 		    struct svc_req *, req);
5814 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
5815 			if (!do_rfs4_label_check(clabel, dvp, EQUALITY_CHECK)) {
5816 				return (NFS4ERR_ACCESS);
5817 			}
5818 		}
5819 	}
5820 
5821 	/*
5822 	 * Get the last component of path name in nm. cs will reference
5823 	 * the including directory on success.
5824 	 */
5825 	component = &args->open_claim4_u.file;
5826 	if (!utf8_dir_verify(component))
5827 		return (NFS4ERR_INVAL);
5828 
5829 	nm = utf8_to_fn(component, &buflen, NULL);
5830 
5831 	if (nm == NULL)
5832 		return (NFS4ERR_RESOURCE);
5833 
5834 	if (buflen > MAXNAMELEN) {
5835 		kmem_free(nm, buflen);
5836 		return (NFS4ERR_NAMETOOLONG);
5837 	}
5838 
5839 	bva.va_mask = AT_TYPE|AT_CTIME|AT_SEQ;
5840 	error = VOP_GETATTR(dvp, &bva, 0, cs->cr);
5841 	if (error) {
5842 		kmem_free(nm, buflen);
5843 		return (puterrno4(error));
5844 	}
5845 
5846 	if (bva.va_type != VDIR) {
5847 		kmem_free(nm, buflen);
5848 		return (NFS4ERR_NOTDIR);
5849 	}
5850 
5851 	NFS4_SET_FATTR4_CHANGE(cinfo->before, bva.va_ctime)
5852 
5853 	switch (args->mode) {
5854 	case GUARDED4:
5855 		/*FALLTHROUGH*/
5856 	case UNCHECKED4:
5857 		nfs4_ntov_table_init(&ntov);
5858 		ntov_table_init = TRUE;
5859 
5860 		*attrset = 0;
5861 		status = do_rfs4_set_attrs(attrset,
5862 					&args->createhow4_u.createattrs,
5863 					cs, &sarg, &ntov, NFS4ATTR_SETIT);
5864 
5865 		if (status == NFS4_OK && (sarg.vap->va_mask & AT_TYPE) &&
5866 		    sarg.vap->va_type != VREG) {
5867 			if (sarg.vap->va_type == VDIR)
5868 				status = NFS4ERR_ISDIR;
5869 			else if (sarg.vap->va_type == VLNK)
5870 				status = NFS4ERR_SYMLINK;
5871 			else
5872 				status = NFS4ERR_INVAL;
5873 		}
5874 
5875 		if (status != NFS4_OK) {
5876 			kmem_free(nm, buflen);
5877 			nfs4_ntov_table_free(&ntov, &sarg);
5878 			*attrset = 0;
5879 			return (status);
5880 		}
5881 
5882 		vap = sarg.vap;
5883 		vap->va_type = VREG;
5884 		vap->va_mask |= AT_TYPE;
5885 
5886 		if ((vap->va_mask & AT_MODE) == 0) {
5887 			vap->va_mask |= AT_MODE;
5888 			vap->va_mode = (mode_t)0600;
5889 		}
5890 
5891 		if (vap->va_mask & AT_SIZE) {
5892 
5893 			/* Disallow create with a non-zero size */
5894 
5895 			if ((reqsize = sarg.vap->va_size) != 0) {
5896 				kmem_free(nm, buflen);
5897 				nfs4_ntov_table_free(&ntov, &sarg);
5898 				*attrset = 0;
5899 				return (NFS4ERR_INVAL);
5900 			}
5901 			setsize = TRUE;
5902 		}
5903 		break;
5904 
5905 	case EXCLUSIVE4:
5906 		/* prohibit EXCL create of named attributes */
5907 		if (dvp->v_flag & V_XATTRDIR) {
5908 			kmem_free(nm, buflen);
5909 			*attrset = 0;
5910 			return (NFS4ERR_INVAL);
5911 		}
5912 
5913 		cva.va_mask = AT_TYPE | AT_MTIME | AT_MODE;
5914 		cva.va_type = VREG;
5915 		/*
5916 		 * Ensure no time overflows. Assumes underlying
5917 		 * filesystem supports at least 32 bits.
5918 		 * Truncate nsec to usec resolution to allow valid
5919 		 * compares even if the underlying filesystem truncates.
5920 		 */
5921 		mtime = (timespec32_t *)&args->createhow4_u.createverf;
5922 		cva.va_mtime.tv_sec = mtime->tv_sec % TIME32_MAX;
5923 		cva.va_mtime.tv_nsec = (mtime->tv_nsec / 1000) * 1000;
5924 		cva.va_mode = (mode_t)0;
5925 		vap = &cva;
5926 		break;
5927 	}
5928 
5929 	status = create_vnode(dvp, nm, vap, args->mode, mtime,
5930 						cs->cr, &vp, &created);
5931 	kmem_free(nm, buflen);
5932 
5933 	if (status != NFS4_OK) {
5934 		if (ntov_table_init)
5935 			nfs4_ntov_table_free(&ntov, &sarg);
5936 		*attrset = 0;
5937 		return (status);
5938 	}
5939 
5940 	trunc = (setsize && !created);
5941 
5942 	if (args->mode != EXCLUSIVE4) {
5943 		bitmap4 createmask = args->createhow4_u.createattrs.attrmask;
5944 
5945 		/*
5946 		 * True verification that object was created with correct
5947 		 * attrs is impossible.  The attrs could have been changed
5948 		 * immediately after object creation.  If attributes did
5949 		 * not verify, the only recourse for the server is to
5950 		 * destroy the object.  Maybe if some attrs (like gid)
5951 		 * are set incorrectly, the object should be destroyed;
5952 		 * however, seems bad as a default policy.  Do we really
5953 		 * want to destroy an object over one of the times not
5954 		 * verifying correctly?  For these reasons, the server
5955 		 * currently sets bits in attrset for createattrs
5956 		 * that were set; however, no verification is done.
5957 		 *
5958 		 * vmask_to_nmask accounts for vattr bits set on create
5959 		 *	[do_rfs4_set_attrs() only sets resp bits for
5960 		 *	 non-vattr/vfs bits.]
5961 		 * Mask off any bits we set by default so as not to return
5962 		 * more attrset bits than were requested in createattrs
5963 		 */
5964 		if (created) {
5965 			nfs4_vmask_to_nmask(sarg.vap->va_mask, attrset);
5966 			*attrset &= createmask;
5967 		} else {
5968 			/*
5969 			 * We did not create the vnode (we tried but it
5970 			 * already existed).  In this case, the only createattr
5971 			 * that the spec allows the server to set is size,
5972 			 * and even then, it can only be set if it is 0.
5973 			 */
5974 			*attrset = 0;
5975 			if (trunc)
5976 				*attrset = FATTR4_SIZE_MASK;
5977 		}
5978 	}
5979 	if (ntov_table_init)
5980 		nfs4_ntov_table_free(&ntov, &sarg);
5981 
5982 	/*
5983 	 * Get the initial "after" sequence number, if it fails,
5984 	 * set to zero, time to before.
5985 	 */
5986 	iva.va_mask = AT_CTIME|AT_SEQ;
5987 	if (VOP_GETATTR(dvp, &iva, 0, cs->cr)) {
5988 		iva.va_seq = 0;
5989 		iva.va_ctime = bva.va_ctime;
5990 	}
5991 
5992 	/*
5993 	 * create_vnode attempts to create the file exclusive,
5994 	 * if it already exists the VOP_CREATE will fail and
5995 	 * may not increase va_seq. It is atomic if
5996 	 * we haven't changed the directory, but if it has changed
5997 	 * we don't know what changed it.
5998 	 */
5999 	if (!created) {
6000 		if (bva.va_seq && iva.va_seq &&
6001 			bva.va_seq == iva.va_seq)
6002 			cinfo->atomic = TRUE;
6003 		else
6004 			cinfo->atomic = FALSE;
6005 		NFS4_SET_FATTR4_CHANGE(cinfo->after, iva.va_ctime);
6006 	} else {
6007 		/*
6008 		 * The entry was created, we need to sync the
6009 		 * directory metadata.
6010 		 */
6011 		(void) VOP_FSYNC(dvp, 0, cs->cr);
6012 
6013 		/*
6014 		 * Get "after" change value, if it fails, simply return the
6015 		 * before value.
6016 		 */
6017 		ava.va_mask = AT_CTIME|AT_SEQ;
6018 		if (VOP_GETATTR(dvp, &ava, 0, cs->cr)) {
6019 			ava.va_ctime = bva.va_ctime;
6020 			ava.va_seq = 0;
6021 		}
6022 
6023 		NFS4_SET_FATTR4_CHANGE(cinfo->after, ava.va_ctime);
6024 
6025 		/*
6026 		 * The cinfo->atomic = TRUE only if we have
6027 		 * non-zero va_seq's, and it has incremented by exactly one
6028 		 * during the create_vnode and it didn't
6029 		 * change during the VOP_FSYNC.
6030 		 */
6031 		if (bva.va_seq && iva.va_seq && ava.va_seq &&
6032 				iva.va_seq == (bva.va_seq + 1) &&
6033 				iva.va_seq == ava.va_seq)
6034 			cinfo->atomic = TRUE;
6035 		else
6036 			cinfo->atomic = FALSE;
6037 	}
6038 
6039 	/* Check for mandatory locking and that the size gets set. */
6040 	cva.va_mask = AT_MODE;
6041 	if (setsize)
6042 		cva.va_mask |= AT_SIZE;
6043 
6044 	/* Assume the worst */
6045 	cs->mandlock = TRUE;
6046 
6047 	if (VOP_GETATTR(vp, &cva, 0, cs->cr) == 0) {
6048 		cs->mandlock = MANDLOCK(cs->vp, cva.va_mode);
6049 
6050 		/*
6051 		 * Truncate the file if necessary; this would be
6052 		 * the case for create over an existing file.
6053 		 */
6054 
6055 		if (trunc) {
6056 			int in_crit = 0;
6057 			rfs4_file_t *fp;
6058 			bool_t create = FALSE;
6059 
6060 			/*
6061 			 * We are writing over an existing file.
6062 			 * Check to see if we need to recall a delegation.
6063 			 */
6064 			rfs4_hold_deleg_policy();
6065 			if ((fp = rfs4_findfile(vp, NULL, &create)) != NULL) {
6066 				if (rfs4_check_delegated_byfp(FWRITE, fp,
6067 					(reqsize == 0), FALSE, FALSE,
6068 							&clientid)) {
6069 
6070 					rfs4_file_rele(fp);
6071 					rfs4_rele_deleg_policy();
6072 					VN_RELE(vp);
6073 					*attrset = 0;
6074 					return (NFS4ERR_DELAY);
6075 				}
6076 				rfs4_file_rele(fp);
6077 			}
6078 			rfs4_rele_deleg_policy();
6079 
6080 			if (nbl_need_check(vp)) {
6081 				in_crit = 1;
6082 
6083 				ASSERT(reqsize == 0);
6084 
6085 				nbl_start_crit(vp, RW_READER);
6086 				if (nbl_conflict(vp, NBL_WRITE, 0,
6087 						cva.va_size, 0)) {
6088 					in_crit = 0;
6089 					nbl_end_crit(vp);
6090 					VN_RELE(vp);
6091 					*attrset = 0;
6092 					return (NFS4ERR_ACCESS);
6093 				}
6094 			}
6095 			ct.cc_sysid = 0;
6096 			ct.cc_pid = 0;
6097 			ct.cc_caller_id = nfs4_srv_caller_id;
6098 
6099 			cva.va_mask = AT_SIZE;
6100 			cva.va_size = reqsize;
6101 			(void) VOP_SETATTR(vp, &cva, 0, cs->cr, &ct);
6102 			if (in_crit)
6103 				nbl_end_crit(vp);
6104 		}
6105 	}
6106 
6107 	error = makefh4(&cs->fh, vp, cs->exi);
6108 
6109 	/*
6110 	 * Force modified data and metadata out to stable storage.
6111 	 */
6112 	(void) VOP_FSYNC(vp, FNODSYNC, cs->cr);
6113 
6114 	if (error) {
6115 		VN_RELE(vp);
6116 		*attrset = 0;
6117 		return (puterrno4(error));
6118 	}
6119 
6120 	/* if parent dir is attrdir, set namedattr fh flag */
6121 	if (dvp->v_flag & V_XATTRDIR)
6122 		set_fh4_flag(&cs->fh, FH4_NAMEDATTR);
6123 
6124 	if (cs->vp)
6125 		VN_RELE(cs->vp);
6126 
6127 	cs->vp = vp;
6128 
6129 	/*
6130 	 * if we did not create the file, we will need to check
6131 	 * the access bits on the file
6132 	 */
6133 
6134 	if (!created) {
6135 		if (setsize)
6136 			args->share_access |= OPEN4_SHARE_ACCESS_WRITE;
6137 		status = check_open_access(args->share_access, cs, req);
6138 		if (status != NFS4_OK)
6139 			*attrset = 0;
6140 	}
6141 	return (status);
6142 }
6143 
6144 /*ARGSUSED*/
6145 static void
6146 rfs4_do_open(struct compound_state *cs, struct svc_req *req,
6147 		rfs4_openowner_t *oo, delegreq_t deleg,
6148 		uint32_t access, uint32_t deny,
6149 		OPEN4res *resp)
6150 {
6151 	/* XXX Currently not using req  */
6152 	rfs4_state_t *state;
6153 	rfs4_file_t *file;
6154 	bool_t screate = TRUE;
6155 	bool_t fcreate = TRUE;
6156 	uint32_t amodes;
6157 	uint32_t dmodes;
6158 	rfs4_deleg_state_t *dsp;
6159 	struct shrlock shr;
6160 	struct shr_locowner shr_loco;
6161 	sysid_t sysid;
6162 	nfsstat4 status;
6163 	int fflags = 0;
6164 	int recall = 0;
6165 	int err;
6166 
6167 	/* get the file struct and hold a lock on it during initial open */
6168 	file = rfs4_findfile_withlock(cs->vp, &cs->fh, &fcreate);
6169 	if (file == NULL) {
6170 		NFS4_DEBUG(rfs4_debug,
6171 			(CE_NOTE, "rfs4_do_open: can't find file"));
6172 		resp->status = NFS4ERR_SERVERFAULT;
6173 		return;
6174 	}
6175 
6176 	state = rfs4_findstate_by_owner_file(oo, file, &screate);
6177 	if (state == NULL) {
6178 		NFS4_DEBUG(rfs4_debug,
6179 			(CE_NOTE, "rfs4_do_open: can't find state"));
6180 		resp->status = NFS4ERR_RESOURCE;
6181 		/* No need to keep any reference */
6182 		rfs4_file_rele_withunlock(file);
6183 		return;
6184 	}
6185 
6186 	/* try to get the sysid before continuing */
6187 	if ((status = rfs4_client_sysid(oo->client, &sysid)) != NFS4_OK) {
6188 		resp->status = status;
6189 		rfs4_file_rele(file);
6190 		/* Not a fully formed open; "close" it */
6191 		if (screate == TRUE)
6192 			rfs4_state_close(state, FALSE, FALSE, cs->cr);
6193 		rfs4_state_rele(state);
6194 		return;
6195 	}
6196 
6197 	/*
6198 	 * Calculate the new deny and access mode that this open is adding to
6199 	 * the file for this open owner;
6200 	 */
6201 	dmodes = (deny & ~state->share_deny);
6202 	amodes = (access & ~state->share_access);
6203 
6204 	/*
6205 	 * Check to see the client has already sent an open for this
6206 	 * open owner on this file with the same share/deny modes.
6207 	 * If so, we don't need to check for a conflict and we don't
6208 	 * need to add another shrlock.  If not, then we need to
6209 	 * check for conflicts in deny and access before checking for
6210 	 * conflicts in delegation.  We don't want to recall a
6211 	 * delegation based on an open that will eventually fail based
6212 	 * on shares modes.
6213 	 */
6214 
6215 	if (dmodes || amodes) {
6216 		shr.s_access = (short)access;
6217 		shr.s_deny = (short)deny;
6218 		shr.s_pid = rfs4_dbe_getid(oo->dbe);
6219 		shr.s_sysid = sysid;
6220 		shr_loco.sl_pid = shr.s_pid;
6221 		shr_loco.sl_id = shr.s_sysid;
6222 		shr.s_owner = (caddr_t)&shr_loco;
6223 		shr.s_own_len = sizeof (shr_loco);
6224 
6225 		fflags = 0;
6226 		if (access & OPEN4_SHARE_ACCESS_READ)
6227 			fflags |= FREAD;
6228 		if (access & OPEN4_SHARE_ACCESS_WRITE)
6229 			fflags |= FWRITE;
6230 
6231 		if ((err = vop_shrlock(cs->vp, F_SHARE, &shr, fflags)) != 0) {
6232 
6233 			resp->status = err == EAGAIN ?
6234 				NFS4ERR_SHARE_DENIED : puterrno4(err);
6235 
6236 			rfs4_file_rele(file);
6237 			/* Not a fully formed open; "close" it */
6238 			if (screate == TRUE)
6239 				rfs4_state_close(state, FALSE, FALSE, cs->cr);
6240 			rfs4_state_rele(state);
6241 			return;
6242 		}
6243 	}
6244 
6245 	rfs4_dbe_lock(state->dbe);
6246 	rfs4_dbe_lock(file->dbe);
6247 
6248 	/*
6249 	 * Check to see if this file is delegated and if so, if a
6250 	 * recall needs to be done.
6251 	 */
6252 	if (rfs4_check_recall(state, access)) {
6253 		rfs4_dbe_unlock(file->dbe);
6254 		rfs4_dbe_unlock(state->dbe);
6255 		rfs4_recall_deleg(file, FALSE, state->owner->client);
6256 		delay(NFS4_DELEGATION_CONFLICT_DELAY);
6257 		rfs4_dbe_lock(state->dbe);
6258 		rfs4_dbe_lock(file->dbe);
6259 		/* Let's see if the delegation was returned */
6260 		if (rfs4_check_recall(state, access)) {
6261 			rfs4_dbe_unlock(file->dbe);
6262 			rfs4_dbe_unlock(state->dbe);
6263 			rfs4_file_rele(file);
6264 			rfs4_update_lease(state->owner->client);
6265 			/* recalculate flags to match what was added */
6266 			fflags = 0;
6267 			if (amodes & OPEN4_SHARE_ACCESS_READ)
6268 				fflags |= FREAD;
6269 			if (amodes & OPEN4_SHARE_ACCESS_WRITE)
6270 				fflags |= FWRITE;
6271 			(void) vop_shrlock(cs->vp, F_UNSHARE, &shr, fflags);
6272 			/* Not a fully formed open; "close" it */
6273 			if (screate == TRUE)
6274 				rfs4_state_close(state, FALSE, FALSE, cs->cr);
6275 			rfs4_state_rele(state);
6276 			resp->status = NFS4ERR_DELAY;
6277 			return;
6278 		}
6279 	}
6280 
6281 	if (dmodes & OPEN4_SHARE_DENY_READ)
6282 		file->deny_read++;
6283 	if (dmodes & OPEN4_SHARE_DENY_WRITE)
6284 		file->deny_write++;
6285 	file->share_deny |= deny;
6286 	state->share_deny |= deny;
6287 
6288 	if (amodes & OPEN4_SHARE_ACCESS_READ)
6289 		file->access_read++;
6290 	if (amodes & OPEN4_SHARE_ACCESS_WRITE)
6291 		file->access_write++;
6292 	file->share_access |= access;
6293 	state->share_access |= access;
6294 
6295 	/*
6296 	 * Check for delegation here. if the deleg argument is not
6297 	 * DELEG_ANY, then this is a reclaim from a client and
6298 	 * we must honor the delegation requested. If necessary we can
6299 	 * set the recall flag.
6300 	 */
6301 
6302 	dsp = rfs4_grant_delegation(deleg, state, &recall);
6303 
6304 	cs->deleg = (file->dinfo->dtype == OPEN_DELEGATE_WRITE);
6305 
6306 	next_stateid(&state->stateid);
6307 
6308 	resp->stateid = state->stateid.stateid;
6309 
6310 	rfs4_dbe_unlock(file->dbe);
6311 	rfs4_dbe_unlock(state->dbe);
6312 
6313 	if (dsp) {
6314 		rfs4_set_deleg_response(dsp, &resp->delegation, NULL, recall);
6315 		rfs4_deleg_state_rele(dsp);
6316 	}
6317 
6318 	rfs4_file_rele(file);
6319 	rfs4_state_rele(state);
6320 
6321 	resp->status = NFS4_OK;
6322 }
6323 
6324 /*ARGSUSED*/
6325 static void
6326 rfs4_do_opennull(struct compound_state *cs, struct svc_req *req,
6327 		OPEN4args *args, rfs4_openowner_t *oo, OPEN4res *resp)
6328 {
6329 	change_info4 *cinfo = &resp->cinfo;
6330 	bitmap4 *attrset = &resp->attrset;
6331 
6332 	if (args->opentype == OPEN4_NOCREATE)
6333 		resp->status = rfs4_lookupfile(&args->open_claim4_u.file,
6334 					req, cs, args->share_access, cinfo);
6335 	else {
6336 		/* inhibit delegation grants during exclusive create */
6337 
6338 		if (args->mode == EXCLUSIVE4)
6339 			rfs4_disable_delegation();
6340 
6341 		resp->status = rfs4_createfile(args, req, cs, cinfo, attrset,
6342 					oo->client->clientid);
6343 	}
6344 
6345 	if (resp->status == NFS4_OK) {
6346 
6347 		/* cs->vp cs->fh now reference the desired file */
6348 
6349 		rfs4_do_open(cs, req, oo, DELEG_ANY, args->share_access,
6350 						args->share_deny, resp);
6351 
6352 		/*
6353 		 * If rfs4_createfile set attrset, we must
6354 		 * clear this attrset before the response is copied.
6355 		 */
6356 		if (resp->status != NFS4_OK && resp->attrset) {
6357 			resp->attrset = 0;
6358 		}
6359 	}
6360 	else
6361 		*cs->statusp = resp->status;
6362 
6363 	if (args->mode == EXCLUSIVE4)
6364 		rfs4_enable_delegation();
6365 }
6366 
6367 /*ARGSUSED*/
6368 static void
6369 rfs4_do_openprev(struct compound_state *cs, struct svc_req *req,
6370 		OPEN4args *args, rfs4_openowner_t *oo, OPEN4res *resp)
6371 {
6372 	change_info4 *cinfo = &resp->cinfo;
6373 	vattr_t va;
6374 	vtype_t v_type = cs->vp->v_type;
6375 	int error = 0;
6376 
6377 	/* Verify that we have a regular file */
6378 	if (v_type != VREG) {
6379 		if (v_type == VDIR)
6380 			resp->status = NFS4ERR_ISDIR;
6381 		else if (v_type == VLNK)
6382 			resp->status = NFS4ERR_SYMLINK;
6383 		else
6384 			resp->status = NFS4ERR_INVAL;
6385 		return;
6386 	}
6387 
6388 	va.va_mask = AT_MODE|AT_UID;
6389 	error = VOP_GETATTR(cs->vp, &va, 0, cs->cr);
6390 	if (error) {
6391 		resp->status = puterrno4(error);
6392 		return;
6393 	}
6394 
6395 	cs->mandlock = MANDLOCK(cs->vp, va.va_mode);
6396 
6397 	/*
6398 	 * Check if we have access to the file, Note the the file
6399 	 * could have originally been open UNCHECKED or GUARDED
6400 	 * with mode bits that will now fail, but there is nothing
6401 	 * we can really do about that except in the case that the
6402 	 * owner of the file is the one requesting the open.
6403 	 */
6404 	if (crgetuid(cs->cr) != va.va_uid) {
6405 		resp->status = check_open_access(args->share_access, cs, req);
6406 		if (resp->status != NFS4_OK) {
6407 			return;
6408 		}
6409 	}
6410 
6411 	/*
6412 	 * cinfo on a CLAIM_PREVIOUS is undefined, initialize to zero
6413 	 */
6414 	cinfo->before = 0;
6415 	cinfo->after = 0;
6416 	cinfo->atomic = FALSE;
6417 
6418 	rfs4_do_open(cs, req, oo,
6419 		NFS4_DELEG4TYPE2REQTYPE(args->open_claim4_u.delegate_type),
6420 		args->share_access, args->share_deny, resp);
6421 }
6422 
6423 static void
6424 rfs4_do_opendelcur(struct compound_state *cs, struct svc_req *req,
6425 		OPEN4args *args, rfs4_openowner_t *oo, OPEN4res *resp)
6426 {
6427 	int error;
6428 	nfsstat4 status;
6429 	stateid4 stateid =
6430 			args->open_claim4_u.delegate_cur_info.delegate_stateid;
6431 	rfs4_deleg_state_t *dsp;
6432 
6433 	/*
6434 	 * Find the state info from the stateid and confirm that the
6435 	 * file is delegated.  If the state openowner is the same as
6436 	 * the supplied openowner we're done. If not, get the file
6437 	 * info from the found state info. Use that file info to
6438 	 * create the state for this lock owner. Note solaris doen't
6439 	 * really need the pathname to find the file. We may want to
6440 	 * lookup the pathname and make sure that the vp exist and
6441 	 * matches the vp in the file structure. However it is
6442 	 * possible that the pathname nolonger exists (local process
6443 	 * unlinks the file), so this may not be that useful.
6444 	 */
6445 
6446 	status = rfs4_get_deleg_state(&stateid, &dsp);
6447 	if (status != NFS4_OK) {
6448 		resp->status = status;
6449 		return;
6450 	}
6451 
6452 	ASSERT(dsp->finfo->dinfo->dtype != OPEN_DELEGATE_NONE);
6453 
6454 	/*
6455 	 * New lock owner, create state. Since this was probably called
6456 	 * in response to a CB_RECALL we set deleg to DELEG_NONE
6457 	 */
6458 
6459 	ASSERT(cs->vp != NULL);
6460 	VN_RELE(cs->vp);
6461 	VN_HOLD(dsp->finfo->vp);
6462 	cs->vp = dsp->finfo->vp;
6463 
6464 	if (error = makefh4(&cs->fh, cs->vp, cs->exi)) {
6465 		rfs4_deleg_state_rele(dsp);
6466 		*cs->statusp = resp->status = puterrno4(error);
6467 		return;
6468 	}
6469 
6470 	/* Mark progress for delegation returns */
6471 	dsp->finfo->dinfo->time_lastwrite = gethrestime_sec();
6472 	rfs4_deleg_state_rele(dsp);
6473 	rfs4_do_open(cs, req, oo, DELEG_NONE,
6474 				args->share_access, args->share_deny, resp);
6475 }
6476 
6477 /*ARGSUSED*/
6478 static void
6479 rfs4_do_opendelprev(struct compound_state *cs, struct svc_req *req,
6480 			OPEN4args *args, rfs4_openowner_t *oo, OPEN4res *resp)
6481 {
6482 	/*
6483 	 * Lookup the pathname, it must already exist since this file
6484 	 * was delegated.
6485 	 *
6486 	 * Find the file and state info for this vp and open owner pair.
6487 	 *	check that they are in fact delegated.
6488 	 *	check that the state access and deny modes are the same.
6489 	 *
6490 	 * Return the delgation possibly seting the recall flag.
6491 	 */
6492 	rfs4_file_t *file;
6493 	rfs4_state_t *state;
6494 	bool_t create = FALSE;
6495 	bool_t dcreate = FALSE;
6496 	rfs4_deleg_state_t *dsp;
6497 	nfsace4 *ace;
6498 
6499 
6500 	/* Note we ignore oflags */
6501 	resp->status = rfs4_lookupfile(&args->open_claim4_u.file_delegate_prev,
6502 				req, cs, args->share_access, &resp->cinfo);
6503 
6504 	if (resp->status != NFS4_OK) {
6505 		return;
6506 	}
6507 
6508 	/* get the file struct and hold a lock on it during initial open */
6509 	file = rfs4_findfile_withlock(cs->vp, NULL, &create);
6510 	if (file == NULL) {
6511 		NFS4_DEBUG(rfs4_debug,
6512 			(CE_NOTE, "rfs4_do_opendelprev: can't find file"));
6513 		resp->status = NFS4ERR_SERVERFAULT;
6514 		return;
6515 	}
6516 
6517 	state = rfs4_findstate_by_owner_file(oo, file, &create);
6518 	if (state == NULL) {
6519 		NFS4_DEBUG(rfs4_debug,
6520 			(CE_NOTE, "rfs4_do_opendelprev: can't find state"));
6521 		resp->status = NFS4ERR_SERVERFAULT;
6522 		rfs4_file_rele_withunlock(file);
6523 		return;
6524 	}
6525 
6526 	rfs4_dbe_lock(state->dbe);
6527 	rfs4_dbe_lock(file->dbe);
6528 	if (args->share_access != state->share_access ||
6529 			args->share_deny != state->share_deny ||
6530 			state->finfo->dinfo->dtype == OPEN_DELEGATE_NONE) {
6531 		NFS4_DEBUG(rfs4_debug,
6532 			(CE_NOTE, "rfs4_do_opendelprev: state mixup"));
6533 		rfs4_dbe_unlock(file->dbe);
6534 		rfs4_dbe_unlock(state->dbe);
6535 		rfs4_file_rele(file);
6536 		rfs4_state_rele(state);
6537 		resp->status = NFS4ERR_SERVERFAULT;
6538 		return;
6539 	}
6540 	rfs4_dbe_unlock(file->dbe);
6541 	rfs4_dbe_unlock(state->dbe);
6542 
6543 	dsp = rfs4_finddeleg(state, &dcreate);
6544 	if (dsp == NULL) {
6545 		rfs4_state_rele(state);
6546 		rfs4_file_rele(file);
6547 		resp->status = NFS4ERR_SERVERFAULT;
6548 		return;
6549 	}
6550 
6551 	next_stateid(&state->stateid);
6552 
6553 	resp->stateid = state->stateid.stateid;
6554 
6555 	resp->delegation.delegation_type = dsp->dtype;
6556 
6557 	if (dsp->dtype == OPEN_DELEGATE_READ) {
6558 		open_read_delegation4 *rv =
6559 			&resp->delegation.open_delegation4_u.read;
6560 
6561 		rv->stateid = dsp->delegid.stateid;
6562 		rv->recall = FALSE; /* no policy in place to set to TRUE */
6563 		ace = &rv->permissions;
6564 	} else {
6565 		open_write_delegation4 *rv =
6566 			&resp->delegation.open_delegation4_u.write;
6567 
6568 		rv->stateid = dsp->delegid.stateid;
6569 		rv->recall = FALSE;  /* no policy in place to set to TRUE */
6570 		ace = &rv->permissions;
6571 		rv->space_limit.limitby = NFS_LIMIT_SIZE;
6572 		rv->space_limit.nfs_space_limit4_u.filesize = UINT64_MAX;
6573 	}
6574 
6575 	/* XXX For now */
6576 	ace->type = ACE4_ACCESS_ALLOWED_ACE_TYPE;
6577 	ace->flag = 0;
6578 	ace->access_mask = 0;
6579 	ace->who.utf8string_len = 0;
6580 	ace->who.utf8string_val = 0;
6581 
6582 	rfs4_deleg_state_rele(dsp);
6583 	rfs4_state_rele(state);
6584 	rfs4_file_rele(file);
6585 }
6586 
6587 typedef enum {
6588 	NFS4_CHKSEQ_OKAY = 0,
6589 	NFS4_CHKSEQ_REPLAY = 1,
6590 	NFS4_CHKSEQ_BAD = 2
6591 } rfs4_chkseq_t;
6592 
6593 /*
6594  * Generic function for sequence number checks.
6595  */
6596 static rfs4_chkseq_t
6597 rfs4_check_seqid(seqid4 seqid, nfs_resop4 *lastop,
6598 		seqid4 rqst_seq, nfs_resop4 *resop, bool_t copyres)
6599 {
6600 	/* Same sequence ids and matching operations? */
6601 	if (seqid == rqst_seq && resop->resop == lastop->resop) {
6602 		if (copyres == TRUE) {
6603 			rfs4_free_reply(resop);
6604 			rfs4_copy_reply(resop, lastop);
6605 		}
6606 		NFS4_DEBUG(rfs4_debug, (CE_NOTE,
6607 			"Replayed SEQID %d\n", seqid));
6608 		return (NFS4_CHKSEQ_REPLAY);
6609 	}
6610 
6611 	/* If the incoming sequence is not the next expected then it is bad */
6612 	if (rqst_seq != seqid + 1) {
6613 		if (rqst_seq == seqid) {
6614 			NFS4_DEBUG(rfs4_debug,
6615 				(CE_NOTE, "BAD SEQID: Replayed sequence id "
6616 				"but last op was %d current op is %d\n",
6617 				lastop->resop, resop->resop));
6618 			return (NFS4_CHKSEQ_BAD);
6619 		}
6620 		NFS4_DEBUG(rfs4_debug,
6621 			(CE_NOTE, "BAD SEQID: got %u expecting %u\n",
6622 				rqst_seq, seqid));
6623 		return (NFS4_CHKSEQ_BAD);
6624 	}
6625 
6626 	/* Everything okay -- next expected */
6627 	return (NFS4_CHKSEQ_OKAY);
6628 }
6629 
6630 
6631 static rfs4_chkseq_t
6632 rfs4_check_open_seqid(seqid4 seqid, rfs4_openowner_t *op, nfs_resop4 *resop)
6633 {
6634 	rfs4_chkseq_t rc;
6635 
6636 	rfs4_dbe_lock(op->dbe);
6637 	rc = rfs4_check_seqid(op->open_seqid, op->reply, seqid, resop, TRUE);
6638 	rfs4_dbe_unlock(op->dbe);
6639 
6640 	if (rc == NFS4_CHKSEQ_OKAY)
6641 		rfs4_update_lease(op->client);
6642 
6643 	return (rc);
6644 }
6645 
6646 static rfs4_chkseq_t
6647 rfs4_check_olo_seqid(seqid4 olo_seqid, rfs4_openowner_t *op,
6648 	nfs_resop4 *resop)
6649 {
6650 	rfs4_chkseq_t rc;
6651 
6652 	rfs4_dbe_lock(op->dbe);
6653 	rc = rfs4_check_seqid(op->open_seqid, op->reply,
6654 		olo_seqid, resop, FALSE);
6655 	rfs4_dbe_unlock(op->dbe);
6656 
6657 	return (rc);
6658 }
6659 
6660 static rfs4_chkseq_t
6661 rfs4_check_lock_seqid(seqid4 seqid, rfs4_lo_state_t *lp, nfs_resop4 *resop)
6662 {
6663 	rfs4_chkseq_t rc = NFS4_CHKSEQ_OKAY;
6664 
6665 	rfs4_dbe_lock(lp->dbe);
6666 	if (!lp->skip_seqid_check)
6667 		rc = rfs4_check_seqid(lp->seqid, lp->reply,
6668 			seqid, resop, TRUE);
6669 	rfs4_dbe_unlock(lp->dbe);
6670 
6671 	return (rc);
6672 }
6673 
6674 static void
6675 rfs4_op_open(nfs_argop4 *argop, nfs_resop4 *resop,
6676 	    struct svc_req *req, struct compound_state *cs)
6677 {
6678 	OPEN4args *args = &argop->nfs_argop4_u.opopen;
6679 	OPEN4res *resp = &resop->nfs_resop4_u.opopen;
6680 	open_owner4 *owner = &args->owner;
6681 	open_claim_type4 claim = args->claim;
6682 	rfs4_client_t *cp;
6683 	rfs4_openowner_t *oo;
6684 	bool_t create;
6685 	bool_t replay = FALSE;
6686 	int can_reclaim;
6687 
6688 
6689 	if (cs->vp == NULL) {
6690 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
6691 		return;
6692 	}
6693 
6694 	/*
6695 	 * Need to check clientid and lease expiration first based on
6696 	 * error ordering and incrementing sequence id.
6697 	 */
6698 	cp = rfs4_findclient_by_id(owner->clientid, FALSE);
6699 	if (cp == NULL) {
6700 		*cs->statusp = resp->status =
6701 			rfs4_check_clientid(&owner->clientid, 0);
6702 		return;
6703 	}
6704 
6705 	if (rfs4_lease_expired(cp)) {
6706 		rfs4_client_close(cp);
6707 		*cs->statusp = resp->status = NFS4ERR_EXPIRED;
6708 		return;
6709 	}
6710 	can_reclaim = cp->can_reclaim;
6711 
6712 	/*
6713 	 * Find the open_owner for use from this point forward.  Take
6714 	 * care in updating the sequence id based on the type of error
6715 	 * being returned.
6716 	 */
6717 retry:
6718 	create = TRUE;
6719 	oo = rfs4_findopenowner(owner, &create, args->seqid);
6720 	if (oo == NULL) {
6721 		*cs->statusp = resp->status = NFS4ERR_STALE_CLIENTID;
6722 		rfs4_client_rele(cp);
6723 		return;
6724 	}
6725 
6726 	/* Hold off access to the sequence space while the open is done */
6727 	rfs4_sw_enter(&oo->oo_sw);
6728 
6729 	/*
6730 	 * If the open_owner existed before at the server, then check
6731 	 * the sequence id.
6732 	 */
6733 	if (!create && !oo->postpone_confirm) {
6734 		switch (rfs4_check_open_seqid(args->seqid, oo, resop)) {
6735 		case NFS4_CHKSEQ_BAD:
6736 			if ((args->seqid > oo->open_seqid) &&
6737 				oo->need_confirm) {
6738 				rfs4_free_opens(oo, TRUE, FALSE);
6739 				rfs4_sw_exit(&oo->oo_sw);
6740 				rfs4_openowner_rele(oo);
6741 				goto retry;
6742 			}
6743 			resp->status = NFS4ERR_BAD_SEQID;
6744 			goto out;
6745 		case NFS4_CHKSEQ_REPLAY: /* replay of previous request */
6746 			replay = TRUE;
6747 			goto out;
6748 		default:
6749 			break;
6750 		}
6751 
6752 		/*
6753 		 * Sequence was ok and open owner exists
6754 		 * check to see if we have yet to see an
6755 		 * open_confirm.
6756 		 */
6757 		if (oo->need_confirm) {
6758 			rfs4_free_opens(oo, TRUE, FALSE);
6759 			rfs4_sw_exit(&oo->oo_sw);
6760 			rfs4_openowner_rele(oo);
6761 			goto retry;
6762 		}
6763 	}
6764 	/* Grace only applies to regular-type OPENs */
6765 	if (rfs4_clnt_in_grace(cp) &&
6766 	    (claim == CLAIM_NULL || claim == CLAIM_DELEGATE_CUR)) {
6767 		*cs->statusp = resp->status = NFS4ERR_GRACE;
6768 		goto out;
6769 	}
6770 
6771 	/*
6772 	 * If previous state at the server existed then can_reclaim
6773 	 * will be set. If not reply NFS4ERR_NO_GRACE to the
6774 	 * client.
6775 	 */
6776 	if (rfs4_clnt_in_grace(cp) && claim == CLAIM_PREVIOUS && !can_reclaim) {
6777 		*cs->statusp = resp->status = NFS4ERR_NO_GRACE;
6778 		goto out;
6779 	}
6780 
6781 
6782 	/*
6783 	 * Reject the open if the client has missed the grace period
6784 	 */
6785 	if (!rfs4_clnt_in_grace(cp) && claim == CLAIM_PREVIOUS) {
6786 		*cs->statusp = resp->status = NFS4ERR_NO_GRACE;
6787 		goto out;
6788 	}
6789 
6790 	/* Couple of up-front bookkeeping items */
6791 	if (oo->need_confirm) {
6792 		/*
6793 		 * If this is a reclaim OPEN then we should not ask
6794 		 * for a confirmation of the open_owner per the
6795 		 * protocol specification.
6796 		 */
6797 		if (claim == CLAIM_PREVIOUS)
6798 			oo->need_confirm = FALSE;
6799 		else
6800 			resp->rflags |= OPEN4_RESULT_CONFIRM;
6801 	}
6802 	resp->rflags |= OPEN4_RESULT_LOCKTYPE_POSIX;
6803 
6804 	/*
6805 	 * If there is an unshared filesystem mounted on this vnode,
6806 	 * do not allow to open/create in this directory.
6807 	 */
6808 	if (vn_ismntpt(cs->vp)) {
6809 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
6810 		goto out;
6811 	}
6812 
6813 	/*
6814 	 * access must READ, WRITE, or BOTH.  No access is invalid.
6815 	 * deny can be READ, WRITE, BOTH, or NONE.
6816 	 * bits not defined for access/deny are invalid.
6817 	 */
6818 	if (! (args->share_access & OPEN4_SHARE_ACCESS_BOTH) ||
6819 	    (args->share_access & ~OPEN4_SHARE_ACCESS_BOTH) ||
6820 	    (args->share_deny & ~OPEN4_SHARE_DENY_BOTH)) {
6821 		*cs->statusp = resp->status = NFS4ERR_INVAL;
6822 		goto out;
6823 	}
6824 
6825 
6826 	/*
6827 	 * make sure attrset is zero before response is built.
6828 	 */
6829 	resp->attrset = 0;
6830 
6831 	switch (claim) {
6832 	case CLAIM_NULL:
6833 		rfs4_do_opennull(cs, req, args, oo, resp);
6834 	    break;
6835 	case CLAIM_PREVIOUS:
6836 		rfs4_do_openprev(cs, req, args, oo, resp);
6837 	    break;
6838 	case CLAIM_DELEGATE_CUR:
6839 		rfs4_do_opendelcur(cs, req, args, oo, resp);
6840 	    break;
6841 	case CLAIM_DELEGATE_PREV:
6842 		rfs4_do_opendelprev(cs, req, args, oo, resp);
6843 	    break;
6844 	default:
6845 		resp->status = NFS4ERR_INVAL;
6846 		break;
6847 	}
6848 
6849 out:
6850 	rfs4_client_rele(cp);
6851 
6852 	/* Catch sequence id handling here to make it a little easier */
6853 	switch (resp->status) {
6854 	case NFS4ERR_BADXDR:
6855 	case NFS4ERR_BAD_SEQID:
6856 	case NFS4ERR_BAD_STATEID:
6857 	case NFS4ERR_NOFILEHANDLE:
6858 	case NFS4ERR_RESOURCE:
6859 	case NFS4ERR_STALE_CLIENTID:
6860 	case NFS4ERR_STALE_STATEID:
6861 		/*
6862 		 * The protocol states that if any of these errors are
6863 		 * being returned, the sequence id should not be
6864 		 * incremented.  Any other return requires an
6865 		 * increment.
6866 		 */
6867 		break;
6868 	default:
6869 		/* Always update the lease in this case */
6870 		rfs4_update_lease(oo->client);
6871 
6872 		/* Regular response - copy the result */
6873 		if (!replay)
6874 			rfs4_update_open_resp(oo, resop, &cs->fh);
6875 
6876 		/*
6877 		 * REPLAY case: Only if the previous response was OK
6878 		 * do we copy the filehandle.  If not OK, no
6879 		 * filehandle to copy.
6880 		 */
6881 		if (replay == TRUE &&
6882 		    resp->status == NFS4_OK &&
6883 		    oo->reply_fh.nfs_fh4_val) {
6884 			/*
6885 			 * If this is a replay, we must restore the
6886 			 * current filehandle/vp to that of what was
6887 			 * returned originally.  Try our best to do
6888 			 * it.
6889 			 */
6890 			nfs_fh4_fmt_t *fh_fmtp =
6891 				(nfs_fh4_fmt_t *)oo->reply_fh.nfs_fh4_val;
6892 
6893 			cs->exi = checkexport4(&fh_fmtp->fh4_fsid,
6894 				(fid_t *)&fh_fmtp->fh4_xlen, NULL);
6895 
6896 			if (cs->exi == NULL) {
6897 				resp->status = NFS4ERR_STALE;
6898 				goto finish;
6899 			}
6900 
6901 			VN_RELE(cs->vp);
6902 
6903 			cs->vp = nfs4_fhtovp(&oo->reply_fh, cs->exi,
6904 				&resp->status);
6905 
6906 			if (cs->vp == NULL)
6907 				goto finish;
6908 
6909 			nfs_fh4_copy(&oo->reply_fh, &cs->fh);
6910 		}
6911 
6912 		/*
6913 		 * If this was a replay, no need to update the
6914 		 * sequence id. If the open_owner was not created on
6915 		 * this pass, then update.  The first use of an
6916 		 * open_owner will not bump the sequence id.
6917 		 */
6918 		if (replay == FALSE && !create)
6919 			rfs4_update_open_sequence(oo);
6920 		/*
6921 		 * If the client is receiving an error and the
6922 		 * open_owner needs to be confirmed, there is no way
6923 		 * to notify the client of this fact ignoring the fact
6924 		 * that the server has no method of returning a
6925 		 * stateid to confirm.  Therefore, the server needs to
6926 		 * mark this open_owner in a way as to avoid the
6927 		 * sequence id checking the next time the client uses
6928 		 * this open_owner.
6929 		 */
6930 		if (resp->status != NFS4_OK && oo->need_confirm)
6931 			oo->postpone_confirm = TRUE;
6932 		/*
6933 		 * If OK response then clear the postpone flag and
6934 		 * reset the sequence id to keep in sync with the
6935 		 * client.
6936 		 */
6937 		if (resp->status == NFS4_OK && oo->postpone_confirm) {
6938 			oo->postpone_confirm = FALSE;
6939 			oo->open_seqid = args->seqid;
6940 		}
6941 		break;
6942 	}
6943 
6944 finish:
6945 	*cs->statusp = resp->status;
6946 
6947 	rfs4_sw_exit(&oo->oo_sw);
6948 	rfs4_openowner_rele(oo);
6949 }
6950 
6951 /*ARGSUSED*/
6952 void
6953 rfs4_op_open_confirm(nfs_argop4 *argop, nfs_resop4 *resop,
6954 		    struct svc_req *req, struct compound_state *cs)
6955 {
6956 	OPEN_CONFIRM4args *args = &argop->nfs_argop4_u.opopen_confirm;
6957 	OPEN_CONFIRM4res *resp = &resop->nfs_resop4_u.opopen_confirm;
6958 	rfs4_state_t *sp;
6959 	nfsstat4 status;
6960 
6961 	if (cs->vp == NULL) {
6962 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
6963 		return;
6964 	}
6965 
6966 	status = rfs4_get_state(&args->open_stateid, &sp, RFS4_DBS_VALID);
6967 	if (status != NFS4_OK) {
6968 		*cs->statusp = resp->status = status;
6969 		return;
6970 	}
6971 
6972 	/* Ensure specified filehandle matches */
6973 	if (cs->vp != sp->finfo->vp) {
6974 		rfs4_state_rele(sp);
6975 		*cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
6976 		return;
6977 	}
6978 
6979 	/* hold off other access to open_owner while we tinker */
6980 	rfs4_sw_enter(&sp->owner->oo_sw);
6981 
6982 	switch (rfs4_check_stateid_seqid(sp, &args->open_stateid)) {
6983 	case NFS4_CHECK_STATEID_OKAY:
6984 		if (rfs4_check_open_seqid(args->seqid, sp->owner,
6985 			resop) != 0) {
6986 			*cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
6987 			break;
6988 		}
6989 		/*
6990 		 * If it is the appropriate stateid and determined to
6991 		 * be "OKAY" then this means that the stateid does not
6992 		 * need to be confirmed and the client is in error for
6993 		 * sending an OPEN_CONFIRM.
6994 		 */
6995 		*cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
6996 		break;
6997 	case NFS4_CHECK_STATEID_OLD:
6998 		*cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
6999 		break;
7000 	case NFS4_CHECK_STATEID_BAD:
7001 		*cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7002 		break;
7003 	case NFS4_CHECK_STATEID_EXPIRED:
7004 		*cs->statusp = resp->status = NFS4ERR_EXPIRED;
7005 		break;
7006 	case NFS4_CHECK_STATEID_CLOSED:
7007 		*cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
7008 		break;
7009 	case NFS4_CHECK_STATEID_REPLAY:
7010 		switch (rfs4_check_open_seqid(args->seqid, sp->owner, resop)) {
7011 		case NFS4_CHKSEQ_OKAY:
7012 			/*
7013 			 * This is replayed stateid; if seqid matches
7014 			 * next expected, then client is using wrong seqid.
7015 			 */
7016 			/* fall through */
7017 		case NFS4_CHKSEQ_BAD:
7018 			*cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
7019 			break;
7020 		case NFS4_CHKSEQ_REPLAY:
7021 			/*
7022 			 * Note this case is the duplicate case so
7023 			 * resp->status is already set.
7024 			 */
7025 			*cs->statusp = resp->status;
7026 			rfs4_update_lease(sp->owner->client);
7027 			break;
7028 		}
7029 		break;
7030 	case NFS4_CHECK_STATEID_UNCONFIRMED:
7031 		if (rfs4_check_open_seqid(args->seqid, sp->owner,
7032 			resop) != NFS4_CHKSEQ_OKAY) {
7033 			*cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
7034 			break;
7035 		}
7036 		*cs->statusp = resp->status = NFS4_OK;
7037 
7038 		next_stateid(&sp->stateid);
7039 		resp->open_stateid = sp->stateid.stateid;
7040 		sp->owner->need_confirm = FALSE;
7041 		rfs4_update_lease(sp->owner->client);
7042 		rfs4_update_open_sequence(sp->owner);
7043 		rfs4_update_open_resp(sp->owner, resop, NULL);
7044 		break;
7045 	default:
7046 		ASSERT(FALSE);
7047 		*cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
7048 		break;
7049 	}
7050 	rfs4_sw_exit(&sp->owner->oo_sw);
7051 	rfs4_state_rele(sp);
7052 }
7053 
7054 /*ARGSUSED*/
7055 void
7056 rfs4_op_open_downgrade(nfs_argop4 *argop, nfs_resop4 *resop,
7057 		    struct svc_req *req, struct compound_state *cs)
7058 {
7059 	OPEN_DOWNGRADE4args *args = &argop->nfs_argop4_u.opopen_downgrade;
7060 	OPEN_DOWNGRADE4res *resp = &resop->nfs_resop4_u.opopen_downgrade;
7061 	uint32_t access = args->share_access;
7062 	uint32_t deny = args->share_deny;
7063 	nfsstat4 status;
7064 	rfs4_state_t *sp;
7065 	rfs4_file_t *fp;
7066 
7067 	if (cs->vp == NULL) {
7068 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
7069 		return;
7070 	}
7071 
7072 	status = rfs4_get_state(&args->open_stateid, &sp, RFS4_DBS_VALID);
7073 	if (status != NFS4_OK) {
7074 		*cs->statusp = resp->status = status;
7075 		return;
7076 	}
7077 
7078 	/* Ensure specified filehandle matches */
7079 	if (cs->vp != sp->finfo->vp) {
7080 		rfs4_state_rele(sp);
7081 		*cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7082 		return;
7083 	}
7084 
7085 	/* hold off other access to open_owner while we tinker */
7086 	rfs4_sw_enter(&sp->owner->oo_sw);
7087 
7088 	switch (rfs4_check_stateid_seqid(sp, &args->open_stateid)) {
7089 	case NFS4_CHECK_STATEID_OKAY:
7090 		if (rfs4_check_open_seqid(args->seqid, sp->owner,
7091 			resop) != NFS4_CHKSEQ_OKAY) {
7092 			*cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
7093 			goto end;
7094 		}
7095 		break;
7096 	case NFS4_CHECK_STATEID_OLD:
7097 		*cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
7098 		goto end;
7099 	case NFS4_CHECK_STATEID_BAD:
7100 		*cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7101 		goto end;
7102 	case NFS4_CHECK_STATEID_EXPIRED:
7103 		*cs->statusp = resp->status = NFS4ERR_EXPIRED;
7104 		goto end;
7105 	case NFS4_CHECK_STATEID_CLOSED:
7106 		*cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
7107 		goto end;
7108 	case NFS4_CHECK_STATEID_UNCONFIRMED:
7109 		*cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7110 		goto end;
7111 	case NFS4_CHECK_STATEID_REPLAY:
7112 		/* Check the sequence id for the open owner */
7113 		switch (rfs4_check_open_seqid(args->seqid, sp->owner, resop)) {
7114 		case NFS4_CHKSEQ_OKAY:
7115 			/*
7116 			 * This is replayed stateid; if seqid matches
7117 			 * next expected, then client is using wrong seqid.
7118 			 */
7119 			/* fall through */
7120 		case NFS4_CHKSEQ_BAD:
7121 			*cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
7122 			goto end;
7123 		case NFS4_CHKSEQ_REPLAY:
7124 			/*
7125 			 * Note this case is the duplicate case so
7126 			 * resp->status is already set.
7127 			 */
7128 			*cs->statusp = resp->status;
7129 			rfs4_update_lease(sp->owner->client);
7130 			goto end;
7131 		}
7132 		break;
7133 	default:
7134 		ASSERT(FALSE);
7135 		break;
7136 	}
7137 
7138 	rfs4_dbe_lock(sp->dbe);
7139 	/*
7140 	 * Check that the new access modes and deny modes are valid.
7141 	 * Check that no invalid bits are set.
7142 	 */
7143 	if ((access & ~(OPEN4_SHARE_ACCESS_READ | OPEN4_SHARE_ACCESS_WRITE)) ||
7144 	    (deny & ~(OPEN4_SHARE_DENY_READ | OPEN4_SHARE_DENY_READ))) {
7145 		*cs->statusp = resp->status = NFS4ERR_INVAL;
7146 		rfs4_update_open_sequence(sp->owner);
7147 		rfs4_dbe_unlock(sp->dbe);
7148 		goto end;
7149 	}
7150 
7151 	/*
7152 	 * The new modes must be a subset of the current modes and
7153 	 * the access must specify at least one mode. To test that
7154 	 * the new mode is a subset of the current modes we bitwise
7155 	 * AND them together and check that the result equals the new
7156 	 * mode. For example:
7157 	 * New mode, access == R and current mode, sp->share_access  == RW
7158 	 * access & sp->share_access == R == access, so the new access mode
7159 	 * is valid. Consider access == RW, sp->share_access = R
7160 	 * access & sp->share_access == R != access, so the new access mode
7161 	 * is invalid.
7162 	 */
7163 	if ((access & sp->share_access) != access ||
7164 	    (deny & sp->share_deny) != deny ||
7165 	    (access &
7166 	    (OPEN4_SHARE_ACCESS_READ | OPEN4_SHARE_ACCESS_WRITE)) == 0) {
7167 		*cs->statusp = resp->status = NFS4ERR_INVAL;
7168 		rfs4_update_open_sequence(sp->owner);
7169 		rfs4_dbe_unlock(sp->dbe);
7170 		goto end;
7171 	}
7172 
7173 	/*
7174 	 * Release any share locks associated with this stateID.
7175 	 * Strictly speaking, this violates the spec because the
7176 	 * spec effectively requires that open downgrade be atomic.
7177 	 * At present, fs_shrlock does not have this capability.
7178 	 */
7179 	rfs4_dbe_unlock(sp->dbe);
7180 	rfs4_unshare(sp);
7181 	rfs4_dbe_lock(sp->dbe);
7182 
7183 	fp = sp->finfo;
7184 	rfs4_dbe_lock(fp->dbe);
7185 
7186 	/*
7187 	 * If the current mode has deny read and the new mode
7188 	 * does not, decrement the number of deny read mode bits
7189 	 * and if it goes to zero turn off the deny read bit
7190 	 * on the file.
7191 	 */
7192 	if ((sp->share_deny & OPEN4_SHARE_DENY_READ) &&
7193 	    (deny & OPEN4_SHARE_DENY_READ) == 0) {
7194 		fp->deny_read--;
7195 		if (fp->deny_read == 0)
7196 			fp->share_deny &= ~OPEN4_SHARE_DENY_READ;
7197 	}
7198 
7199 	/*
7200 	 * If the current mode has deny write and the new mode
7201 	 * does not, decrement the number of deny write mode bits
7202 	 * and if it goes to zero turn off the deny write bit
7203 	 * on the file.
7204 	 */
7205 	if ((sp->share_deny & OPEN4_SHARE_DENY_WRITE) &&
7206 	    (deny & OPEN4_SHARE_DENY_WRITE) == 0) {
7207 		fp->deny_write--;
7208 		if (fp->deny_write == 0)
7209 			fp->share_deny &= ~OPEN4_SHARE_DENY_WRITE;
7210 	}
7211 
7212 	/*
7213 	 * If the current mode has access read and the new mode
7214 	 * does not, decrement the number of access read mode bits
7215 	 * and if it goes to zero turn off the access read bit
7216 	 * on the file.
7217 	 */
7218 	if ((sp->share_access & OPEN4_SHARE_ACCESS_READ) &&
7219 	    (access & OPEN4_SHARE_ACCESS_READ) == 0) {
7220 		fp->access_read--;
7221 		if (fp->access_read == 0)
7222 			fp->share_access &= ~OPEN4_SHARE_ACCESS_READ;
7223 	}
7224 
7225 	/*
7226 	 * If the current mode has access write and the new mode
7227 	 * does not, decrement the number of access write mode bits
7228 	 * and if it goes to zero turn off the access write bit
7229 	 * on the file.
7230 	 */
7231 	if ((sp->share_access & OPEN4_SHARE_ACCESS_WRITE) &&
7232 	    (access & OPEN4_SHARE_ACCESS_WRITE) == 0) {
7233 		fp->access_write--;
7234 		if (fp->access_write == 0)
7235 			fp->share_deny &= ~OPEN4_SHARE_ACCESS_WRITE;
7236 	}
7237 
7238 	/* Set the new access and deny modes */
7239 	sp->share_access = access;
7240 	sp->share_deny = deny;
7241 	/* Check that the file is still accessible */
7242 	ASSERT(fp->share_access);
7243 
7244 	rfs4_dbe_unlock(fp->dbe);
7245 
7246 	rfs4_dbe_unlock(sp->dbe);
7247 	if ((status = rfs4_share(sp)) != NFS4_OK) {
7248 		*cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
7249 		rfs4_update_open_sequence(sp->owner);
7250 		goto end;
7251 	}
7252 
7253 	rfs4_dbe_lock(sp->dbe);
7254 
7255 	/* Update the stateid */
7256 	next_stateid(&sp->stateid);
7257 	resp->open_stateid = sp->stateid.stateid;
7258 
7259 	rfs4_dbe_unlock(sp->dbe);
7260 
7261 	*cs->statusp = resp->status = NFS4_OK;
7262 	/* Update the lease */
7263 	rfs4_update_lease(sp->owner->client);
7264 	/* And the sequence */
7265 	rfs4_update_open_sequence(sp->owner);
7266 	rfs4_update_open_resp(sp->owner, resop, NULL);
7267 
7268 end:
7269 	rfs4_sw_exit(&sp->owner->oo_sw);
7270 	rfs4_state_rele(sp);
7271 }
7272 
7273 /*
7274  * The logic behind this function is detailed in the NFSv4 RFC in the
7275  * SETCLIENTID operation description under IMPLEMENTATION.  Refer to
7276  * that section for explicit guidance to server behavior for
7277  * SETCLIENTID.
7278  */
7279 void
7280 rfs4_op_setclientid(nfs_argop4 *argop, nfs_resop4 *resop,
7281 		    struct svc_req *req, struct compound_state *cs)
7282 {
7283 	SETCLIENTID4args *args = &argop->nfs_argop4_u.opsetclientid;
7284 	SETCLIENTID4res *res = &resop->nfs_resop4_u.opsetclientid;
7285 	rfs4_client_t *cp, *newcp, *cp_confirmed, *cp_unconfirmed;
7286 	bool_t create = TRUE;
7287 	char *addr, *netid;
7288 	int len;
7289 
7290 retry:
7291 	newcp = cp_confirmed = cp_unconfirmed = NULL;
7292 
7293 	/*
7294 	 * In search of an EXISTING client matching the incoming
7295 	 * request to establish a new client identifier at the server
7296 	 */
7297 	create = TRUE;
7298 	cp = rfs4_findclient(&args->client, &create, NULL);
7299 
7300 	/* Should never happen */
7301 	ASSERT(cp != NULL);
7302 
7303 	if (cp == NULL) {
7304 		*cs->statusp = res->status = NFS4ERR_SERVERFAULT;
7305 		return;
7306 	}
7307 
7308 	/*
7309 	 * Easiest case. Client identifier is newly created and is
7310 	 * unconfirmed.  Also note that for this case, no other
7311 	 * entries exist for the client identifier.  Nothing else to
7312 	 * check.  Just setup the response and respond.
7313 	 */
7314 	if (create) {
7315 		*cs->statusp = res->status = NFS4_OK;
7316 		res->SETCLIENTID4res_u.resok4.clientid = cp->clientid;
7317 		res->SETCLIENTID4res_u.resok4.setclientid_confirm =
7318 							cp->confirm_verf;
7319 		/* Setup callback information; CB_NULL confirmation later */
7320 		rfs4_client_setcb(cp, &args->callback, args->callback_ident);
7321 
7322 		rfs4_client_rele(cp);
7323 		return;
7324 	}
7325 
7326 	/*
7327 	 * An existing, confirmed client may exist but it may not have
7328 	 * been active for at least one lease period.  If so, then
7329 	 * "close" the client and create a new client identifier
7330 	 */
7331 	if (rfs4_lease_expired(cp)) {
7332 		rfs4_client_close(cp);
7333 		goto retry;
7334 	}
7335 
7336 	if (cp->need_confirm == TRUE)
7337 		cp_unconfirmed = cp;
7338 	else
7339 		cp_confirmed = cp;
7340 
7341 	cp = NULL;
7342 
7343 	/*
7344 	 * We have a confirmed client, now check for an
7345 	 * unconfimred entry
7346 	 */
7347 	if (cp_confirmed) {
7348 		/* If creds don't match then client identifier is inuse */
7349 		if (!creds_ok(cp_confirmed->cr_set, req, cs)) {
7350 			rfs4_cbinfo_t *cbp;
7351 			/*
7352 			 * Some one else has established this client
7353 			 * id. Try and say * who they are. We will use
7354 			 * the call back address supplied by * the
7355 			 * first client.
7356 			 */
7357 			*cs->statusp = res->status = NFS4ERR_CLID_INUSE;
7358 
7359 			addr = netid = NULL;
7360 
7361 			cbp = &cp_confirmed->cbinfo;
7362 			if (cbp->cb_callback.cb_location.r_addr &&
7363 			    cbp->cb_callback.cb_location.r_netid) {
7364 				cb_client4 *cbcp = &cbp->cb_callback;
7365 
7366 				len = strlen(cbcp->cb_location.r_addr)+1;
7367 				addr = kmem_alloc(len, KM_SLEEP);
7368 				bcopy(cbcp->cb_location.r_addr, addr, len);
7369 				len = strlen(cbcp->cb_location.r_netid)+1;
7370 				netid = kmem_alloc(len, KM_SLEEP);
7371 				bcopy(cbcp->cb_location.r_netid, netid, len);
7372 			}
7373 
7374 			res->SETCLIENTID4res_u.client_using.r_addr = addr;
7375 			res->SETCLIENTID4res_u.client_using.r_netid = netid;
7376 
7377 			rfs4_client_rele(cp_confirmed);
7378 		}
7379 
7380 		/*
7381 		 * Confirmed, creds match, and verifier matches; must
7382 		 * be an update of the callback info
7383 		 */
7384 		if (cp_confirmed->nfs_client.verifier ==
7385 						args->client.verifier) {
7386 			/* Setup callback information */
7387 			rfs4_client_setcb(cp_confirmed, &args->callback,
7388 						args->callback_ident);
7389 
7390 			/* everything okay -- move ahead */
7391 			*cs->statusp = res->status = NFS4_OK;
7392 			res->SETCLIENTID4res_u.resok4.clientid =
7393 				cp_confirmed->clientid;
7394 
7395 			/* update the confirm_verifier and return it */
7396 			rfs4_client_scv_next(cp_confirmed);
7397 			res->SETCLIENTID4res_u.resok4.setclientid_confirm =
7398 						cp_confirmed->confirm_verf;
7399 
7400 			rfs4_client_rele(cp_confirmed);
7401 			return;
7402 		}
7403 
7404 		/*
7405 		 * Creds match but the verifier doesn't.  Must search
7406 		 * for an unconfirmed client that would be replaced by
7407 		 * this request.
7408 		 */
7409 		create = FALSE;
7410 		cp_unconfirmed = rfs4_findclient(&args->client, &create,
7411 						cp_confirmed);
7412 	}
7413 
7414 	/*
7415 	 * At this point, we have taken care of the brand new client
7416 	 * struct, INUSE case, update of an existing, and confirmed
7417 	 * client struct.
7418 	 */
7419 
7420 	/*
7421 	 * check to see if things have changed while we originally
7422 	 * picked up the client struct.  If they have, then return and
7423 	 * retry the processing of this SETCLIENTID request.
7424 	 */
7425 	if (cp_unconfirmed) {
7426 		rfs4_dbe_lock(cp_unconfirmed->dbe);
7427 		if (!cp_unconfirmed->need_confirm) {
7428 			rfs4_dbe_unlock(cp_unconfirmed->dbe);
7429 			rfs4_client_rele(cp_unconfirmed);
7430 			if (cp_confirmed)
7431 				rfs4_client_rele(cp_confirmed);
7432 			goto retry;
7433 		}
7434 		/* do away with the old unconfirmed one */
7435 		rfs4_dbe_invalidate(cp_unconfirmed->dbe);
7436 		rfs4_dbe_unlock(cp_unconfirmed->dbe);
7437 		rfs4_client_rele(cp_unconfirmed);
7438 		cp_unconfirmed = NULL;
7439 	}
7440 
7441 	/*
7442 	 * This search will temporarily hide the confirmed client
7443 	 * struct while a new client struct is created as the
7444 	 * unconfirmed one.
7445 	 */
7446 	create = TRUE;
7447 	newcp = rfs4_findclient(&args->client, &create, cp_confirmed);
7448 
7449 	ASSERT(newcp != NULL);
7450 
7451 	if (newcp == NULL) {
7452 		*cs->statusp = res->status = NFS4ERR_SERVERFAULT;
7453 		rfs4_client_rele(cp_confirmed);
7454 		return;
7455 	}
7456 
7457 	/*
7458 	 * If one was not created, then a similar request must be in
7459 	 * process so release and start over with this one
7460 	 */
7461 	if (create != TRUE) {
7462 		rfs4_client_rele(newcp);
7463 		if (cp_confirmed)
7464 			rfs4_client_rele(cp_confirmed);
7465 		goto retry;
7466 	}
7467 
7468 	*cs->statusp = res->status = NFS4_OK;
7469 	res->SETCLIENTID4res_u.resok4.clientid = newcp->clientid;
7470 	res->SETCLIENTID4res_u.resok4.setclientid_confirm =
7471 							newcp->confirm_verf;
7472 	/* Setup callback information; CB_NULL confirmation later */
7473 	rfs4_client_setcb(newcp, &args->callback,
7474 				args->callback_ident);
7475 
7476 	newcp->cp_confirmed = cp_confirmed;
7477 
7478 	rfs4_client_rele(newcp);
7479 }
7480 
7481 /*ARGSUSED*/
7482 void
7483 rfs4_op_setclientid_confirm(nfs_argop4 *argop, nfs_resop4 *resop,
7484 			    struct svc_req *req, struct compound_state *cs)
7485 {
7486 	SETCLIENTID_CONFIRM4args *args =
7487 		&argop->nfs_argop4_u.opsetclientid_confirm;
7488 	SETCLIENTID_CONFIRM4res *res =
7489 		&resop->nfs_resop4_u.opsetclientid_confirm;
7490 	rfs4_client_t *cp, *cptoclose = NULL;
7491 
7492 	*cs->statusp = res->status = NFS4_OK;
7493 
7494 	cp = rfs4_findclient_by_id(args->clientid, TRUE);
7495 
7496 	if (cp == NULL) {
7497 		*cs->statusp = res->status =
7498 			rfs4_check_clientid(&args->clientid, 1);
7499 		return;
7500 	}
7501 
7502 	if (!creds_ok(cp, req, cs)) {
7503 		*cs->statusp = res->status = NFS4ERR_CLID_INUSE;
7504 		rfs4_client_rele(cp);
7505 		return;
7506 	}
7507 
7508 	/* If the verifier doesn't match, the record doesn't match */
7509 	if (cp->confirm_verf != args->setclientid_confirm) {
7510 		*cs->statusp = res->status = NFS4ERR_STALE_CLIENTID;
7511 		rfs4_client_rele(cp);
7512 		return;
7513 	}
7514 
7515 	rfs4_dbe_lock(cp->dbe);
7516 	cp->need_confirm = FALSE;
7517 	if (cp->cp_confirmed) {
7518 		cptoclose = cp->cp_confirmed;
7519 		cptoclose->ss_remove = 1;
7520 		cp->cp_confirmed = NULL;
7521 	}
7522 
7523 	/*
7524 	 * Update the client's associated server instance, if it's changed
7525 	 * since the client was created.
7526 	 */
7527 	if (rfs4_servinst(cp) != rfs4_cur_servinst)
7528 		rfs4_servinst_assign(cp, rfs4_cur_servinst);
7529 
7530 	/*
7531 	 * Record clientid in stable storage.
7532 	 * Must be done after server instance has been assigned.
7533 	 */
7534 	rfs4_ss_clid(cp, req);
7535 
7536 	rfs4_dbe_unlock(cp->dbe);
7537 
7538 	if (cptoclose)
7539 		/* don't need to rele, client_close does it */
7540 		rfs4_client_close(cptoclose);
7541 
7542 	/* If needed, initiate CB_NULL call for callback path */
7543 	rfs4_deleg_cb_check(cp);
7544 	rfs4_update_lease(cp);
7545 
7546 	/*
7547 	 * Check to see if client can perform reclaims
7548 	 */
7549 	rfs4_ss_chkclid(cp);
7550 
7551 	rfs4_client_rele(cp);
7552 }
7553 
7554 
7555 /*ARGSUSED*/
7556 void
7557 rfs4_op_close(nfs_argop4 *argop, nfs_resop4 *resop,
7558 	    struct svc_req *req, struct compound_state *cs)
7559 {
7560 	/* XXX Currently not using req arg */
7561 	CLOSE4args *args = &argop->nfs_argop4_u.opclose;
7562 	CLOSE4res *resp = &resop->nfs_resop4_u.opclose;
7563 	rfs4_state_t *sp;
7564 	nfsstat4 status;
7565 
7566 	if (cs->vp == NULL) {
7567 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
7568 		return;
7569 	}
7570 
7571 	status = rfs4_get_state(&args->open_stateid, &sp, RFS4_DBS_INVALID);
7572 	if (status != NFS4_OK) {
7573 		*cs->statusp = resp->status = status;
7574 		return;
7575 	}
7576 
7577 	/* Ensure specified filehandle matches */
7578 	if (cs->vp != sp->finfo->vp) {
7579 		rfs4_state_rele(sp);
7580 		*cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7581 		return;
7582 	}
7583 
7584 	/* hold off other access to open_owner while we tinker */
7585 	rfs4_sw_enter(&sp->owner->oo_sw);
7586 
7587 	switch (rfs4_check_stateid_seqid(sp, &args->open_stateid)) {
7588 	case NFS4_CHECK_STATEID_OKAY:
7589 		if (rfs4_check_open_seqid(args->seqid, sp->owner,
7590 			resop) != NFS4_CHKSEQ_OKAY) {
7591 			*cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
7592 			goto end;
7593 		}
7594 		break;
7595 	case NFS4_CHECK_STATEID_OLD:
7596 		*cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
7597 		goto end;
7598 	case NFS4_CHECK_STATEID_BAD:
7599 		*cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7600 		goto end;
7601 	case NFS4_CHECK_STATEID_EXPIRED:
7602 		*cs->statusp = resp->status = NFS4ERR_EXPIRED;
7603 		goto end;
7604 	case NFS4_CHECK_STATEID_CLOSED:
7605 		*cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
7606 		goto end;
7607 	case NFS4_CHECK_STATEID_UNCONFIRMED:
7608 		*cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7609 		goto end;
7610 	case NFS4_CHECK_STATEID_REPLAY:
7611 		/* Check the sequence id for the open owner */
7612 		switch (rfs4_check_open_seqid(args->seqid, sp->owner, resop)) {
7613 		case NFS4_CHKSEQ_OKAY:
7614 			/*
7615 			 * This is replayed stateid; if seqid matches
7616 			 * next expected, then client is using wrong seqid.
7617 			 */
7618 			/* FALL THROUGH */
7619 		case NFS4_CHKSEQ_BAD:
7620 			*cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
7621 			goto end;
7622 		case NFS4_CHKSEQ_REPLAY:
7623 			/*
7624 			 * Note this case is the duplicate case so
7625 			 * resp->status is already set.
7626 			 */
7627 			*cs->statusp = resp->status;
7628 			rfs4_update_lease(sp->owner->client);
7629 			goto end;
7630 		}
7631 		break;
7632 	default:
7633 		ASSERT(FALSE);
7634 		break;
7635 	}
7636 
7637 	rfs4_dbe_lock(sp->dbe);
7638 
7639 	/* Update the stateid. */
7640 	next_stateid(&sp->stateid);
7641 	resp->open_stateid = sp->stateid.stateid;
7642 
7643 	rfs4_dbe_unlock(sp->dbe);
7644 
7645 	rfs4_update_lease(sp->owner->client);
7646 	rfs4_update_open_sequence(sp->owner);
7647 	rfs4_update_open_resp(sp->owner, resop, NULL);
7648 
7649 	rfs4_state_close(sp, FALSE, FALSE, cs->cr);
7650 
7651 	*cs->statusp = resp->status = status;
7652 
7653 end:
7654 	rfs4_sw_exit(&sp->owner->oo_sw);
7655 	rfs4_state_rele(sp);
7656 }
7657 
7658 /*
7659  * Manage the counts on the file struct and close all file locks
7660  */
7661 /*ARGSUSED*/
7662 void
7663 rfs4_release_share_lock_state(rfs4_state_t *sp, cred_t *cr,
7664 	bool_t close_of_client)
7665 {
7666 	rfs4_file_t *fp = sp->finfo;
7667 	rfs4_lo_state_t *lsp;
7668 	struct shrlock shr;
7669 	struct shr_locowner shr_loco;
7670 	int fflags, s_access, s_deny;
7671 
7672 	fflags = s_access = s_deny = 0;
7673 	/*
7674 	 * Decrement the count for each access and deny bit that this
7675 	 * state has contributed to the file. If the file counts go to zero
7676 	 * clear the appropriate bit in the appropriate mask.
7677 	 */
7678 
7679 	if (sp->share_access & OPEN4_SHARE_ACCESS_READ) {
7680 		fp->access_read--;
7681 		fflags |= FREAD;
7682 		s_access |= F_RDACC;
7683 		if (fp->access_read == 0)
7684 			fp->share_access &= ~OPEN4_SHARE_ACCESS_READ;
7685 	}
7686 	if (sp->share_access & OPEN4_SHARE_ACCESS_WRITE) {
7687 		fp->access_write--;
7688 		fflags |= FWRITE;
7689 		s_access |= F_WRACC;
7690 		if (fp->access_write == 0)
7691 			fp->share_access &= ~OPEN4_SHARE_ACCESS_WRITE;
7692 	}
7693 	if (sp->share_deny & OPEN4_SHARE_DENY_READ) {
7694 		fp->deny_read--;
7695 		s_deny |= F_RDDNY;
7696 		if (fp->deny_read == 0)
7697 			fp->share_deny &= ~OPEN4_SHARE_DENY_READ;
7698 	}
7699 	if (sp->share_deny & OPEN4_SHARE_DENY_WRITE) {
7700 		fp->deny_write--;
7701 		s_deny |= F_WRDNY;
7702 		if (fp->deny_write == 0)
7703 			fp->share_deny &= ~OPEN4_SHARE_DENY_WRITE;
7704 	}
7705 
7706 	/*
7707 	 * If this call is part of the larger closing down of client
7708 	 * state then it is just easier to release all locks
7709 	 * associated with this client instead of going through each
7710 	 * individual file and cleaning locks there.
7711 	 */
7712 	if (close_of_client) {
7713 		if (sp->owner->client->unlksys_completed == FALSE &&
7714 		    sp->lockownerlist.next->lsp != NULL &&
7715 			sp->owner->client->sysidt != LM_NOSYSID) {
7716 			/* Is the PxFS kernel module loaded? */
7717 			if (lm_remove_file_locks != NULL) {
7718 				int new_sysid;
7719 
7720 				/* Encode the cluster nodeid in new sysid */
7721 				new_sysid = sp->owner->client->sysidt;
7722 				lm_set_nlmid_flk(&new_sysid);
7723 
7724 				/*
7725 				 * This PxFS routine removes file locks for a
7726 				 * client over all nodes of a cluster.
7727 				 */
7728 				NFS4_DEBUG(rfs4_debug, (CE_NOTE,
7729 				    "lm_remove_file_locks(sysid=0x%x)\n",
7730 				    new_sysid));
7731 				(*lm_remove_file_locks)(new_sysid);
7732 			} else {
7733 				struct flock64 flk;
7734 
7735 				/* Release all locks for this client */
7736 				flk.l_type = F_UNLKSYS;
7737 				flk.l_whence = 0;
7738 				flk.l_start = 0;
7739 				flk.l_len = 0;
7740 				flk.l_sysid = sp->owner->client->sysidt;
7741 				flk.l_pid = 0;
7742 				(void) VOP_FRLOCK(sp->finfo->vp, F_SETLK, &flk,
7743 				    F_REMOTELOCK | FREAD | FWRITE,
7744 				    (u_offset_t)0, NULL, CRED());
7745 			}
7746 
7747 			sp->owner->client->unlksys_completed = TRUE;
7748 		}
7749 	}
7750 
7751 	/*
7752 	 * Release all locks on this file by this lock owner or at
7753 	 * least mark the locks as having been released
7754 	 */
7755 	for (lsp = sp->lockownerlist.next->lsp; lsp != NULL;
7756 		lsp = lsp->lockownerlist.next->lsp) {
7757 
7758 		lsp->locks_cleaned = TRUE;
7759 
7760 		/* Was this already taken care of above? */
7761 		if (!close_of_client &&
7762 		    sp->owner->client->sysidt != LM_NOSYSID)
7763 			(void) cleanlocks(sp->finfo->vp, lsp->locker->pid,
7764 				lsp->locker->client->sysidt);
7765 	}
7766 
7767 	/*
7768 	 * Release any shrlocks associated with this open state ID.
7769 	 * This must be done before the rfs4_state gets marked closed.
7770 	 */
7771 	if (sp->owner->client->sysidt != LM_NOSYSID) {
7772 		shr.s_access = s_access;
7773 		shr.s_deny = s_deny;
7774 		shr.s_pid = rfs4_dbe_getid(sp->owner->dbe);
7775 		shr.s_sysid = sp->owner->client->sysidt;
7776 		shr_loco.sl_pid = shr.s_pid;
7777 		shr_loco.sl_id = shr.s_sysid;
7778 		shr.s_owner = (caddr_t)&shr_loco;
7779 		shr.s_own_len = sizeof (shr_loco);
7780 		(void) vop_shrlock(sp->finfo->vp, F_UNSHARE, &shr, fflags);
7781 	}
7782 }
7783 
7784 /*
7785  * lock_denied: Fill in a LOCK4deneid structure given an flock64 structure.
7786  */
7787 static nfsstat4
7788 lock_denied(LOCK4denied *dp, struct flock64 *flk)
7789 {
7790 	rfs4_lockowner_t *lo;
7791 	rfs4_client_t *cp;
7792 	uint32_t len;
7793 
7794 	lo = rfs4_findlockowner_by_pid(flk->l_pid);
7795 	if (lo != NULL) {
7796 		cp = lo->client;
7797 		if (rfs4_lease_expired(cp)) {
7798 			rfs4_lockowner_rele(lo);
7799 			rfs4_dbe_hold(cp->dbe);
7800 			rfs4_client_close(cp);
7801 			return (NFS4ERR_EXPIRED);
7802 		}
7803 		dp->owner.clientid = lo->owner.clientid;
7804 		len = lo->owner.owner_len;
7805 		dp->owner.owner_val = kmem_alloc(len, KM_SLEEP);
7806 		bcopy(lo->owner.owner_val, dp->owner.owner_val, len);
7807 		dp->owner.owner_len = len;
7808 		rfs4_lockowner_rele(lo);
7809 		goto finish;
7810 	}
7811 
7812 	/*
7813 	 * Its not a NFS4 lock. We take advantage that the upper 32 bits
7814 	 * of the client id contain the boot time for a NFS4 lock. So we
7815 	 * fabricate and identity by setting clientid to the sysid, and
7816 	 * the lock owner to the pid.
7817 	 */
7818 	dp->owner.clientid = flk->l_sysid;
7819 	len = sizeof (pid_t);
7820 	dp->owner.owner_len = len;
7821 	dp->owner.owner_val = kmem_alloc(len, KM_SLEEP);
7822 	bcopy(&flk->l_pid, dp->owner.owner_val, len);
7823 finish:
7824 	dp->offset = flk->l_start;
7825 	dp->length = flk->l_len;
7826 
7827 	if (flk->l_type == F_RDLCK)
7828 		dp->locktype = READ_LT;
7829 	else if (flk->l_type == F_WRLCK)
7830 		dp->locktype = WRITE_LT;
7831 	else
7832 		return (NFS4ERR_INVAL);	/* no mapping from POSIX ltype to v4 */
7833 
7834 	return (NFS4_OK);
7835 }
7836 
7837 static int
7838 setlock(vnode_t *vp, struct flock64 *flock, int flag, cred_t *cred)
7839 {
7840 	int error;
7841 	struct flock64 flk;
7842 	int i;
7843 	clock_t delaytime;
7844 
7845 retry:
7846 	delaytime = MSEC_TO_TICK_ROUNDUP(rfs4_lock_delay);
7847 
7848 	for (i = 0; i < rfs4_maxlock_tries; i++) {
7849 		LOCK_PRINT(rfs4_debug, "setlock", F_SETLK, flock);
7850 		error = VOP_FRLOCK(vp, F_SETLK,
7851 				flock, flag, (u_offset_t)0, NULL, cred);
7852 
7853 		if (error != EAGAIN && error != EACCES)
7854 			break;
7855 
7856 		if (i < rfs4_maxlock_tries - 1) {
7857 			delay(delaytime);
7858 			delaytime *= 2;
7859 		}
7860 	}
7861 
7862 	if (error == EAGAIN || error == EACCES) {
7863 		/* Get the owner of the lock */
7864 		flk = *flock;
7865 		LOCK_PRINT(rfs4_debug, "setlock", F_GETLK, &flk);
7866 		if (VOP_FRLOCK(vp, F_GETLK,
7867 			    &flk,  flag, (u_offset_t)0, NULL, cred) == 0) {
7868 			if (flk.l_type == F_UNLCK) {
7869 				/* No longer locked, retry */
7870 				goto retry;
7871 			}
7872 			*flock = flk;
7873 			LOCK_PRINT(rfs4_debug, "setlock(blocking lock)",
7874 				F_GETLK, &flk);
7875 		}
7876 	}
7877 
7878 	return (error);
7879 }
7880 
7881 /*ARGSUSED*/
7882 static nfsstat4
7883 rfs4_do_lock(rfs4_lo_state_t *lp, nfs_lock_type4 locktype,
7884 	    seqid4 seqid, offset4 offset,
7885 	    length4 length, cred_t *cred, nfs_resop4 *resop)
7886 {
7887 	nfsstat4 status;
7888 	rfs4_lockowner_t *lo = lp->locker;
7889 	rfs4_state_t *sp = lp->state;
7890 	struct flock64 flock;
7891 	int16_t ltype;
7892 	int flag;
7893 	int error;
7894 	sysid_t sysid;
7895 	LOCK4res *lres;
7896 
7897 	if (rfs4_lease_expired(lo->client)) {
7898 		return (NFS4ERR_EXPIRED);
7899 	}
7900 
7901 	if ((status = rfs4_client_sysid(lo->client, &sysid)) != NFS4_OK)
7902 		return (status);
7903 
7904 	/* Check for zero length. To lock to end of file use all ones for V4 */
7905 	if (length == 0)
7906 		return (NFS4ERR_INVAL);
7907 	else if (length == (length4)(~0))
7908 		length = 0;		/* Posix to end of file  */
7909 
7910 retry:
7911 	rfs4_dbe_lock(sp->dbe);
7912 
7913 
7914 	if (resop->resop != OP_LOCKU) {
7915 		switch (locktype) {
7916 		case READ_LT:
7917 		case READW_LT:
7918 			if ((sp->share_access
7919 			    & OPEN4_SHARE_ACCESS_READ) == 0) {
7920 				rfs4_dbe_unlock(sp->dbe);
7921 
7922 				return (NFS4ERR_OPENMODE);
7923 			}
7924 			ltype = F_RDLCK;
7925 			break;
7926 		case WRITE_LT:
7927 		case WRITEW_LT:
7928 			if ((sp->share_access
7929 			    & OPEN4_SHARE_ACCESS_WRITE) == 0) {
7930 				rfs4_dbe_unlock(sp->dbe);
7931 
7932 				return (NFS4ERR_OPENMODE);
7933 			}
7934 			ltype = F_WRLCK;
7935 			break;
7936 		}
7937 	} else
7938 		ltype = F_UNLCK;
7939 
7940 	flock.l_type = ltype;
7941 	flock.l_whence = 0;		/* SEEK_SET */
7942 	flock.l_start = offset;
7943 	flock.l_len = length;
7944 	flock.l_sysid = sysid;
7945 	flock.l_pid = lp->locker->pid;
7946 
7947 	/* Note that length4 is uint64_t but l_len and l_start are off64_t */
7948 	if (flock.l_len < 0 || flock.l_start < 0) {
7949 		rfs4_dbe_unlock(sp->dbe);
7950 		return (NFS4ERR_INVAL);
7951 	}
7952 
7953 	/*
7954 	 * N.B. FREAD has the same value as OPEN4_SHARE_ACCESS_READ and
7955 	 * FWRITE has the same value as OPEN4_SHARE_ACCESS_WRITE.
7956 	 */
7957 	flag = (int)sp->share_access | F_REMOTELOCK;
7958 
7959 	error = setlock(sp->finfo->vp, &flock, flag, cred);
7960 	if (error == 0) {
7961 		rfs4_dbe_lock(lp->dbe);
7962 		next_stateid(&lp->lockid);
7963 		rfs4_dbe_unlock(lp->dbe);
7964 	}
7965 
7966 	rfs4_dbe_unlock(sp->dbe);
7967 
7968 	/*
7969 	 * N.B. We map error values to nfsv4 errors. This is differrent
7970 	 * than puterrno4 routine.
7971 	 */
7972 	switch (error) {
7973 	case 0:
7974 		status = NFS4_OK;
7975 		break;
7976 	case EAGAIN:
7977 	case EACCES:		/* Old value */
7978 		/* Can only get here if op is OP_LOCK */
7979 		ASSERT(resop->resop == OP_LOCK);
7980 		lres = &resop->nfs_resop4_u.oplock;
7981 		status = NFS4ERR_DENIED;
7982 		if (lock_denied(&lres->LOCK4res_u.denied, &flock)
7983 			== NFS4ERR_EXPIRED)
7984 			goto retry;
7985 		break;
7986 	case ENOLCK:
7987 		status = NFS4ERR_DELAY;
7988 		break;
7989 	case EOVERFLOW:
7990 		status = NFS4ERR_INVAL;
7991 		break;
7992 	case EINVAL:
7993 		status = NFS4ERR_NOTSUPP;
7994 		break;
7995 	default:
7996 		cmn_err(CE_WARN, "rfs4_do_lock: unexpected errno (%d)",
7997 			error);
7998 		status = NFS4ERR_SERVERFAULT;
7999 		break;
8000 	}
8001 
8002 	return (status);
8003 }
8004 
8005 /*ARGSUSED*/
8006 void
8007 rfs4_op_lock(nfs_argop4 *argop, nfs_resop4 *resop,
8008 	    struct svc_req *req, struct compound_state *cs)
8009 {
8010 	/* XXX Currently not using req arg */
8011 	LOCK4args *args = &argop->nfs_argop4_u.oplock;
8012 	LOCK4res *resp = &resop->nfs_resop4_u.oplock;
8013 	nfsstat4 status;
8014 	stateid4 *stateid;
8015 	rfs4_lockowner_t *lo;
8016 	rfs4_client_t *cp;
8017 	rfs4_state_t *sp = NULL;
8018 	rfs4_lo_state_t *lsp = NULL;
8019 	bool_t ls_sw_held = FALSE;
8020 	bool_t create = TRUE;
8021 	bool_t lcreate = TRUE;
8022 	bool_t dup_lock = FALSE;
8023 	int rc;
8024 
8025 	if (cs->vp == NULL) {
8026 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
8027 		return;
8028 	}
8029 
8030 	if (args->locker.new_lock_owner) {
8031 		/* Create a new lockowner for this instance */
8032 		open_to_lock_owner4 *olo = &args->locker.locker4_u.open_owner;
8033 
8034 		NFS4_DEBUG(rfs4_debug, (CE_NOTE, "Creating new lock owner"));
8035 
8036 		stateid = &olo->open_stateid;
8037 		status = rfs4_get_state(stateid, &sp, RFS4_DBS_VALID);
8038 		if (status != NFS4_OK) {
8039 			NFS4_DEBUG(rfs4_debug,
8040 				(CE_NOTE, "Get state failed in lock %d",
8041 				status));
8042 			*cs->statusp = resp->status = status;
8043 			return;
8044 		}
8045 
8046 		/* Ensure specified filehandle matches */
8047 		if (cs->vp != sp->finfo->vp) {
8048 			rfs4_state_rele(sp);
8049 			*cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8050 			return;
8051 		}
8052 
8053 		/* hold off other access to open_owner while we tinker */
8054 		rfs4_sw_enter(&sp->owner->oo_sw);
8055 
8056 		switch (rc = rfs4_check_stateid_seqid(sp, stateid)) {
8057 		case NFS4_CHECK_STATEID_OLD:
8058 			*cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
8059 			goto end;
8060 		case NFS4_CHECK_STATEID_BAD:
8061 			*cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8062 			goto end;
8063 		case NFS4_CHECK_STATEID_EXPIRED:
8064 			*cs->statusp = resp->status = NFS4ERR_EXPIRED;
8065 			goto end;
8066 		case NFS4_CHECK_STATEID_UNCONFIRMED:
8067 			*cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8068 			goto end;
8069 		case NFS4_CHECK_STATEID_CLOSED:
8070 			*cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
8071 			goto end;
8072 		case NFS4_CHECK_STATEID_OKAY:
8073 		case NFS4_CHECK_STATEID_REPLAY:
8074 			switch (rfs4_check_olo_seqid(olo->open_seqid,
8075 				sp->owner, resop)) {
8076 			case NFS4_CHKSEQ_OKAY:
8077 				if (rc == NFS4_CHECK_STATEID_OKAY)
8078 					break;
8079 				/*
8080 				 * This is replayed stateid; if seqid
8081 				 * matches next expected, then client
8082 				 * is using wrong seqid.
8083 				 */
8084 				/* FALLTHROUGH */
8085 			case NFS4_CHKSEQ_BAD:
8086 				*cs->statusp = resp->status =
8087 					NFS4ERR_BAD_SEQID;
8088 				goto end;
8089 			case NFS4_CHKSEQ_REPLAY:
8090 				/* This is a duplicate LOCK request */
8091 				dup_lock = TRUE;
8092 
8093 				/*
8094 				 * For a duplicate we do not want to
8095 				 * create a new lockowner as it should
8096 				 * already exist.
8097 				 * Turn off the lockowner create flag.
8098 				 */
8099 				lcreate = FALSE;
8100 			}
8101 			break;
8102 		}
8103 
8104 		lo = rfs4_findlockowner(&olo->lock_owner, &lcreate);
8105 		if (lo == NULL) {
8106 			NFS4_DEBUG(rfs4_debug,
8107 				(CE_NOTE, "rfs4_op_lock: no lock owner"));
8108 			*cs->statusp = resp->status = NFS4ERR_RESOURCE;
8109 			goto end;
8110 		}
8111 
8112 		lsp = rfs4_findlo_state_by_owner(lo, sp, &create);
8113 		if (lsp == NULL) {
8114 			rfs4_update_lease(sp->owner->client);
8115 			/*
8116 			 * Only update theh open_seqid if this is not
8117 			 * a duplicate request
8118 			 */
8119 			if (dup_lock == FALSE) {
8120 				rfs4_update_open_sequence(sp->owner);
8121 			}
8122 
8123 			NFS4_DEBUG(rfs4_debug,
8124 				(CE_NOTE, "rfs4_op_lock: no state"));
8125 			*cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
8126 			rfs4_update_open_resp(sp->owner, resop, NULL);
8127 			rfs4_lockowner_rele(lo);
8128 			goto end;
8129 		}
8130 
8131 		/*
8132 		 * This is the new_lock_owner branch and the client is
8133 		 * supposed to be associating a new lock_owner with
8134 		 * the open file at this point.  If we find that a
8135 		 * lock_owner/state association already exists and a
8136 		 * successful LOCK request was returned to the client,
8137 		 * an error is returned to the client since this is
8138 		 * not appropriate.  The client should be using the
8139 		 * existing lock_owner branch.
8140 		 */
8141 		if (dup_lock == FALSE && create == FALSE) {
8142 			if (lsp->lock_completed == TRUE) {
8143 				*cs->statusp =
8144 					resp->status = NFS4ERR_BAD_SEQID;
8145 				rfs4_lockowner_rele(lo);
8146 				goto end;
8147 			}
8148 		}
8149 
8150 		rfs4_update_lease(sp->owner->client);
8151 
8152 		/*
8153 		 * Only update theh open_seqid if this is not
8154 		 * a duplicate request
8155 		 */
8156 		if (dup_lock == FALSE) {
8157 			rfs4_update_open_sequence(sp->owner);
8158 		}
8159 
8160 		/*
8161 		 * If this is a duplicate lock request, just copy the
8162 		 * previously saved reply and return.
8163 		 */
8164 		if (dup_lock == TRUE) {
8165 			/* verify that lock_seqid's match */
8166 			if (lsp->seqid != olo->lock_seqid) {
8167 				NFS4_DEBUG(rfs4_debug,
8168 				(CE_NOTE, "rfs4_op_lock: Dup-Lock seqid bad"
8169 				"lsp->seqid=%d old->seqid=%d",
8170 				lsp->seqid, olo->lock_seqid));
8171 				*cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
8172 			} else {
8173 				rfs4_copy_reply(resop, lsp->reply);
8174 				/*
8175 				 * Make sure to copy the just
8176 				 * retrieved reply status into the
8177 				 * overall compound status
8178 				 */
8179 				*cs->statusp = resp->status;
8180 			}
8181 			rfs4_lockowner_rele(lo);
8182 			goto end;
8183 		}
8184 
8185 		rfs4_dbe_lock(lsp->dbe);
8186 
8187 		/* Make sure to update the lock sequence id */
8188 		lsp->seqid = olo->lock_seqid;
8189 
8190 		NFS4_DEBUG(rfs4_debug,
8191 			(CE_NOTE, "Lock seqid established as %d", lsp->seqid));
8192 
8193 		/*
8194 		 * This is used to signify the newly created lockowner
8195 		 * stateid and its sequence number.  The checks for
8196 		 * sequence number and increment don't occur on the
8197 		 * very first lock request for a lockowner.
8198 		 */
8199 		lsp->skip_seqid_check = TRUE;
8200 
8201 		/* hold off other access to lsp while we tinker */
8202 		rfs4_sw_enter(&lsp->ls_sw);
8203 		ls_sw_held = TRUE;
8204 
8205 		rfs4_dbe_unlock(lsp->dbe);
8206 
8207 		rfs4_lockowner_rele(lo);
8208 	} else {
8209 		stateid = &args->locker.locker4_u.lock_owner.lock_stateid;
8210 		/* get lsp and hold the lock on the underlying file struct */
8211 		if ((status = rfs4_get_lo_state(stateid, &lsp, TRUE))
8212 		    != NFS4_OK) {
8213 			*cs->statusp = resp->status = status;
8214 			return;
8215 		}
8216 		create = FALSE;	/* We didn't create lsp */
8217 
8218 		/* Ensure specified filehandle matches */
8219 		if (cs->vp != lsp->state->finfo->vp) {
8220 			rfs4_lo_state_rele(lsp, TRUE);
8221 			*cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8222 			return;
8223 		}
8224 
8225 		/* hold off other access to lsp while we tinker */
8226 		rfs4_sw_enter(&lsp->ls_sw);
8227 		ls_sw_held = TRUE;
8228 
8229 		switch (rfs4_check_lo_stateid_seqid(lsp, stateid)) {
8230 		/*
8231 		 * The stateid looks like it was okay (expected to be
8232 		 * the next one)
8233 		 */
8234 		case NFS4_CHECK_STATEID_OKAY:
8235 			/*
8236 			 * The sequence id is now checked.  Determine
8237 			 * if this is a replay or if it is in the
8238 			 * expected (next) sequence.  In the case of a
8239 			 * replay, there are two replay conditions
8240 			 * that may occur.  The first is the normal
8241 			 * condition where a LOCK is done with a
8242 			 * NFS4_OK response and the stateid is
8243 			 * updated.  That case is handled below when
8244 			 * the stateid is identified as a REPLAY.  The
8245 			 * second is the case where an error is
8246 			 * returned, like NFS4ERR_DENIED, and the
8247 			 * sequence number is updated but the stateid
8248 			 * is not updated.  This second case is dealt
8249 			 * with here.  So it may seem odd that the
8250 			 * stateid is okay but the sequence id is a
8251 			 * replay but it is okay.
8252 			 */
8253 			switch (rfs4_check_lock_seqid(
8254 				args->locker.locker4_u.lock_owner.lock_seqid,
8255 				lsp, resop)) {
8256 			case NFS4_CHKSEQ_REPLAY:
8257 				if (resp->status != NFS4_OK) {
8258 					/*
8259 					 * Here is our replay and need
8260 					 * to verify that the last
8261 					 * response was an error.
8262 					 */
8263 					*cs->statusp = resp->status;
8264 					goto end;
8265 				}
8266 				/*
8267 				 * This is done since the sequence id
8268 				 * looked like a replay but it didn't
8269 				 * pass our check so a BAD_SEQID is
8270 				 * returned as a result.
8271 				 */
8272 				/*FALLTHROUGH*/
8273 			case NFS4_CHKSEQ_BAD:
8274 				*cs->statusp = resp->status =
8275 					NFS4ERR_BAD_SEQID;
8276 				goto end;
8277 			case NFS4_CHKSEQ_OKAY:
8278 				/* Everything looks okay move ahead */
8279 				break;
8280 			}
8281 			break;
8282 		case NFS4_CHECK_STATEID_OLD:
8283 			*cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
8284 			goto end;
8285 		case NFS4_CHECK_STATEID_BAD:
8286 			*cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8287 			goto end;
8288 		case NFS4_CHECK_STATEID_EXPIRED:
8289 			*cs->statusp = resp->status = NFS4ERR_EXPIRED;
8290 			goto end;
8291 		case NFS4_CHECK_STATEID_CLOSED:
8292 			*cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
8293 			goto end;
8294 		case NFS4_CHECK_STATEID_REPLAY:
8295 			switch (rfs4_check_lock_seqid(
8296 				args->locker.locker4_u.lock_owner.lock_seqid,
8297 				lsp, resop)) {
8298 			case NFS4_CHKSEQ_OKAY:
8299 				/*
8300 				 * This is a replayed stateid; if
8301 				 * seqid matches the next expected,
8302 				 * then client is using wrong seqid.
8303 				 */
8304 			case NFS4_CHKSEQ_BAD:
8305 				*cs->statusp = resp->status =
8306 					NFS4ERR_BAD_SEQID;
8307 				goto end;
8308 			case NFS4_CHKSEQ_REPLAY:
8309 				rfs4_update_lease(lsp->locker->client);
8310 				*cs->statusp = status = resp->status;
8311 				goto end;
8312 			}
8313 			break;
8314 		default:
8315 			ASSERT(FALSE);
8316 			break;
8317 		}
8318 
8319 		rfs4_update_lock_sequence(lsp);
8320 		rfs4_update_lease(lsp->locker->client);
8321 	}
8322 
8323 	/*
8324 	 * NFS4 only allows locking on regular files, so
8325 	 * verify type of object.
8326 	 */
8327 	if (cs->vp->v_type != VREG) {
8328 		if (cs->vp->v_type == VDIR)
8329 			status = NFS4ERR_ISDIR;
8330 		else
8331 			status = NFS4ERR_INVAL;
8332 		goto out;
8333 	}
8334 
8335 	cp = lsp->state->owner->client;
8336 
8337 	if (rfs4_clnt_in_grace(cp) && !args->reclaim) {
8338 		status = NFS4ERR_GRACE;
8339 		goto out;
8340 	}
8341 
8342 	if (rfs4_clnt_in_grace(cp) && args->reclaim && !cp->can_reclaim) {
8343 		status = NFS4ERR_NO_GRACE;
8344 		goto out;
8345 	}
8346 
8347 	if (!rfs4_clnt_in_grace(cp) && args->reclaim) {
8348 		status = NFS4ERR_NO_GRACE;
8349 		goto out;
8350 	}
8351 
8352 	if (lsp->state->finfo->dinfo->dtype == OPEN_DELEGATE_WRITE)
8353 		cs->deleg = TRUE;
8354 
8355 	status = rfs4_do_lock(lsp, args->locktype,
8356 				args->locker.locker4_u.lock_owner.lock_seqid,
8357 				args->offset,
8358 				args->length, cs->cr, resop);
8359 
8360 out:
8361 	lsp->skip_seqid_check = FALSE;
8362 
8363 	*cs->statusp = resp->status = status;
8364 
8365 	if (status == NFS4_OK) {
8366 		resp->LOCK4res_u.lock_stateid = lsp->lockid.stateid;
8367 		lsp->lock_completed = TRUE;
8368 	}
8369 	/*
8370 	 * Only update the "OPEN" response here if this was a new
8371 	 * lock_owner
8372 	 */
8373 	if (sp)
8374 		rfs4_update_open_resp(sp->owner, resop, NULL);
8375 
8376 	rfs4_update_lock_resp(lsp, resop);
8377 
8378 end:
8379 	if (lsp) {
8380 		if (ls_sw_held)
8381 			rfs4_sw_exit(&lsp->ls_sw);
8382 		/*
8383 		 * If an sp obtained, then the lsp does not represent
8384 		 * a lock on the file struct.
8385 		 */
8386 		if (sp != NULL)
8387 			rfs4_lo_state_rele(lsp, FALSE);
8388 		else
8389 			rfs4_lo_state_rele(lsp, TRUE);
8390 	}
8391 	if (sp) {
8392 		rfs4_sw_exit(&sp->owner->oo_sw);
8393 		rfs4_state_rele(sp);
8394 	}
8395 }
8396 
8397 /* free function for LOCK/LOCKT */
8398 static void
8399 lock_denied_free(nfs_resop4 *resop)
8400 {
8401 	LOCK4denied *dp = NULL;
8402 
8403 	switch (resop->resop) {
8404 	case OP_LOCK:
8405 		if (resop->nfs_resop4_u.oplock.status == NFS4ERR_DENIED)
8406 			dp = &resop->nfs_resop4_u.oplock.LOCK4res_u.denied;
8407 		break;
8408 	case OP_LOCKT:
8409 		if (resop->nfs_resop4_u.oplockt.status == NFS4ERR_DENIED)
8410 			dp = &resop->nfs_resop4_u.oplockt.denied;
8411 		break;
8412 	default:
8413 		break;
8414 	}
8415 
8416 	if (dp)
8417 		kmem_free(dp->owner.owner_val, dp->owner.owner_len);
8418 }
8419 
8420 /*ARGSUSED*/
8421 void
8422 rfs4_op_locku(nfs_argop4 *argop, nfs_resop4 *resop,
8423 	    struct svc_req *req, struct compound_state *cs)
8424 {
8425 	/* XXX Currently not using req arg */
8426 	LOCKU4args *args = &argop->nfs_argop4_u.oplocku;
8427 	LOCKU4res *resp = &resop->nfs_resop4_u.oplocku;
8428 	nfsstat4 status;
8429 	stateid4 *stateid = &args->lock_stateid;
8430 	rfs4_lo_state_t *lsp;
8431 
8432 	if (cs->vp == NULL) {
8433 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
8434 		return;
8435 	}
8436 
8437 	if ((status = rfs4_get_lo_state(stateid, &lsp, TRUE)) != NFS4_OK) {
8438 		*cs->statusp = resp->status = status;
8439 		return;
8440 	}
8441 
8442 	/* Ensure specified filehandle matches */
8443 	if (cs->vp != lsp->state->finfo->vp) {
8444 		rfs4_lo_state_rele(lsp, TRUE);
8445 		*cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8446 		return;
8447 	}
8448 
8449 	/* hold off other access to lsp while we tinker */
8450 	rfs4_sw_enter(&lsp->ls_sw);
8451 
8452 	switch (rfs4_check_lo_stateid_seqid(lsp, stateid)) {
8453 	case NFS4_CHECK_STATEID_OKAY:
8454 		if (rfs4_check_lock_seqid(args->seqid, lsp, resop)
8455 		    != NFS4_CHKSEQ_OKAY) {
8456 			*cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
8457 			goto end;
8458 		}
8459 		break;
8460 	case NFS4_CHECK_STATEID_OLD:
8461 		*cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
8462 		goto end;
8463 	case NFS4_CHECK_STATEID_BAD:
8464 		*cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8465 		goto end;
8466 	case NFS4_CHECK_STATEID_EXPIRED:
8467 		*cs->statusp = resp->status = NFS4ERR_EXPIRED;
8468 		goto end;
8469 	case NFS4_CHECK_STATEID_CLOSED:
8470 		*cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
8471 		goto end;
8472 	case NFS4_CHECK_STATEID_REPLAY:
8473 		switch (rfs4_check_lock_seqid(args->seqid, lsp, resop)) {
8474 		case NFS4_CHKSEQ_OKAY:
8475 				/*
8476 				 * This is a replayed stateid; if
8477 				 * seqid matches the next expected,
8478 				 * then client is using wrong seqid.
8479 				 */
8480 		case NFS4_CHKSEQ_BAD:
8481 			*cs->statusp = resp->status =
8482 				NFS4ERR_BAD_SEQID;
8483 			goto end;
8484 		case NFS4_CHKSEQ_REPLAY:
8485 			rfs4_update_lease(lsp->locker->client);
8486 			*cs->statusp = status = resp->status;
8487 			goto end;
8488 		}
8489 		break;
8490 	default:
8491 		ASSERT(FALSE);
8492 		break;
8493 	}
8494 
8495 	rfs4_update_lock_sequence(lsp);
8496 	rfs4_update_lease(lsp->locker->client);
8497 
8498 	/*
8499 	 * NFS4 only allows locking on regular files, so
8500 	 * verify type of object.
8501 	 */
8502 	if (cs->vp->v_type != VREG) {
8503 		if (cs->vp->v_type == VDIR)
8504 			status = NFS4ERR_ISDIR;
8505 		else
8506 			status = NFS4ERR_INVAL;
8507 		goto out;
8508 	}
8509 
8510 	if (rfs4_clnt_in_grace(lsp->state->owner->client)) {
8511 		status = NFS4ERR_GRACE;
8512 		goto out;
8513 	}
8514 
8515 	status = rfs4_do_lock(lsp, args->locktype,
8516 			    args->seqid, args->offset,
8517 			    args->length, cs->cr, resop);
8518 
8519 out:
8520 	*cs->statusp = resp->status = status;
8521 
8522 	if (status == NFS4_OK)
8523 		resp->lock_stateid = lsp->lockid.stateid;
8524 
8525 	rfs4_update_lock_resp(lsp, resop);
8526 
8527 end:
8528 	rfs4_sw_exit(&lsp->ls_sw);
8529 	rfs4_lo_state_rele(lsp, TRUE);
8530 }
8531 
8532 /*
8533  * LOCKT is a best effort routine, the client can not be guaranteed that
8534  * the status return is still in effect by the time the reply is received.
8535  * They are numerous race conditions in this routine, but we are not required
8536  * and can not be accurate.
8537  */
8538 /*ARGSUSED*/
8539 void
8540 rfs4_op_lockt(nfs_argop4 *argop, nfs_resop4 *resop,
8541 	    struct svc_req *req, struct compound_state *cs)
8542 {
8543 	LOCKT4args *args = &argop->nfs_argop4_u.oplockt;
8544 	LOCKT4res *resp = &resop->nfs_resop4_u.oplockt;
8545 	rfs4_lockowner_t *lo;
8546 	rfs4_client_t *cp;
8547 	bool_t create = FALSE;
8548 	struct flock64 flk;
8549 	int error;
8550 	int flag = FREAD | FWRITE;
8551 	int ltype;
8552 	length4 posix_length;
8553 	sysid_t sysid;
8554 	pid_t pid;
8555 
8556 	if (cs->vp == NULL) {
8557 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
8558 		return;
8559 	}
8560 
8561 	/*
8562 	 * NFS4 only allows locking on regular files, so
8563 	 * verify type of object.
8564 	 */
8565 	if (cs->vp->v_type != VREG) {
8566 		if (cs->vp->v_type == VDIR)
8567 			*cs->statusp = resp->status = NFS4ERR_ISDIR;
8568 		else
8569 			*cs->statusp = resp->status =  NFS4ERR_INVAL;
8570 		return;
8571 	}
8572 
8573 	/*
8574 	 * Check out the clientid to ensure the server knows about it
8575 	 * so that we correctly inform the client of a server reboot.
8576 	 */
8577 	if ((cp = rfs4_findclient_by_id(args->owner.clientid, FALSE))
8578 	    == NULL) {
8579 		*cs->statusp = resp->status =
8580 			rfs4_check_clientid(&args->owner.clientid, 0);
8581 		return;
8582 	}
8583 	if (rfs4_lease_expired(cp)) {
8584 		rfs4_client_close(cp);
8585 		/*
8586 		 * Protocol doesn't allow returning NFS4ERR_STALE as
8587 		 * other operations do on this check so STALE_CLIENTID
8588 		 * is returned instead
8589 		 */
8590 		*cs->statusp = resp->status = NFS4ERR_STALE_CLIENTID;
8591 		return;
8592 	}
8593 
8594 	if (rfs4_clnt_in_grace(cp)) {
8595 		*cs->statusp = resp->status = NFS4ERR_GRACE;
8596 		return;
8597 	}
8598 	rfs4_client_rele(cp);
8599 
8600 	resp->status = NFS4_OK;
8601 
8602 	switch (args->locktype) {
8603 	case READ_LT:
8604 	case READW_LT:
8605 		ltype = F_RDLCK;
8606 		break;
8607 	case WRITE_LT:
8608 	case WRITEW_LT:
8609 		ltype = F_WRLCK;
8610 		break;
8611 	}
8612 
8613 	posix_length = args->length;
8614 	/* Check for zero length. To lock to end of file use all ones for V4 */
8615 	if (posix_length == 0) {
8616 		*cs->statusp = resp->status = NFS4ERR_INVAL;
8617 		return;
8618 	} else if (posix_length == (length4)(~0)) {
8619 		posix_length = 0;	/* Posix to end of file  */
8620 	}
8621 
8622 	/* Find or create a lockowner */
8623 	lo = rfs4_findlockowner(&args->owner, &create);
8624 
8625 	if (lo) {
8626 		pid = lo->pid;
8627 		if ((resp->status =
8628 			rfs4_client_sysid(lo->client, &sysid)) != NFS4_OK)
8629 		goto out;
8630 	} else {
8631 		pid = 0;
8632 		sysid = lockt_sysid;
8633 	}
8634 retry:
8635 	flk.l_type = ltype;
8636 	flk.l_whence = 0;		/* SEEK_SET */
8637 	flk.l_start = args->offset;
8638 	flk.l_len = posix_length;
8639 	flk.l_sysid = sysid;
8640 	flk.l_pid = pid;
8641 	flag |= F_REMOTELOCK;
8642 
8643 	LOCK_PRINT(rfs4_debug, "rfs4_op_lockt", F_GETLK, &flk);
8644 
8645 	/* Note that length4 is uint64_t but l_len and l_start are off64_t */
8646 	if (flk.l_len < 0 || flk.l_start < 0) {
8647 		resp->status = NFS4ERR_INVAL;
8648 		goto out;
8649 	}
8650 	error = VOP_FRLOCK(cs->vp, F_GETLK, &flk, flag, (u_offset_t)0,
8651 	    NULL, cs->cr);
8652 
8653 	/*
8654 	 * N.B. We map error values to nfsv4 errors. This is differrent
8655 	 * than puterrno4 routine.
8656 	 */
8657 	switch (error) {
8658 	case 0:
8659 		if (flk.l_type == F_UNLCK)
8660 			resp->status = NFS4_OK;
8661 		else {
8662 			if (lock_denied(&resp->denied, &flk) == NFS4ERR_EXPIRED)
8663 				goto retry;
8664 			resp->status = NFS4ERR_DENIED;
8665 		}
8666 		break;
8667 	case EOVERFLOW:
8668 		resp->status = NFS4ERR_INVAL;
8669 		break;
8670 	case EINVAL:
8671 		resp->status = NFS4ERR_NOTSUPP;
8672 		break;
8673 	default:
8674 		cmn_err(CE_WARN, "rfs4_op_lockt: unexpected errno (%d)",
8675 			error);
8676 		resp->status = NFS4ERR_SERVERFAULT;
8677 		break;
8678 	}
8679 
8680 out:
8681 	if (lo)
8682 		rfs4_lockowner_rele(lo);
8683 	*cs->statusp = resp->status;
8684 }
8685 
8686 static int
8687 vop_shrlock(vnode_t *vp, int cmd, struct shrlock *sp, int fflags)
8688 {
8689 	int err;
8690 
8691 	if (cmd == F_UNSHARE && sp->s_deny == 0 && sp->s_access == 0)
8692 		return (0);
8693 
8694 	err = VOP_SHRLOCK(vp, cmd, sp, fflags, CRED());
8695 
8696 	NFS4_DEBUG(rfs4_shrlock_debug,
8697 		(CE_NOTE, "rfs4_shrlock %s vp=%p acc=%d dny=%d sysid=%d "
8698 		"pid=%d err=%d\n", cmd == F_SHARE ? "SHARE" : "UNSHR",
8699 		(void *) vp, sp->s_access, sp->s_deny, sp->s_sysid, sp->s_pid,
8700 		err));
8701 
8702 	return (err);
8703 }
8704 
8705 static int
8706 rfs4_shrlock(rfs4_state_t *sp, int cmd)
8707 {
8708 	struct shrlock shr;
8709 	struct shr_locowner shr_loco;
8710 	int fflags;
8711 
8712 	fflags = shr.s_access = shr.s_deny = 0;
8713 
8714 	if (sp->share_access & OPEN4_SHARE_ACCESS_READ) {
8715 		fflags |= FREAD;
8716 		shr.s_access |= F_RDACC;
8717 	}
8718 	if (sp->share_access & OPEN4_SHARE_ACCESS_WRITE) {
8719 		fflags |= FWRITE;
8720 		shr.s_access |= F_WRACC;
8721 	}
8722 	if (sp->share_deny & OPEN4_SHARE_DENY_READ)
8723 		shr.s_deny |= F_RDDNY;
8724 	if (sp->share_deny & OPEN4_SHARE_DENY_WRITE)
8725 		shr.s_deny |= F_WRDNY;
8726 
8727 	shr.s_pid = rfs4_dbe_getid(sp->owner->dbe);
8728 	shr.s_sysid = sp->owner->client->sysidt;
8729 	shr_loco.sl_pid = shr.s_pid;
8730 	shr_loco.sl_id = shr.s_sysid;
8731 	shr.s_owner = (caddr_t)&shr_loco;
8732 	shr.s_own_len = sizeof (shr_loco);
8733 	return (vop_shrlock(sp->finfo->vp, cmd, &shr, fflags));
8734 }
8735 
8736 static int
8737 rfs4_share(rfs4_state_t *sp)
8738 {
8739 	return (rfs4_shrlock(sp, F_SHARE));
8740 }
8741 
8742 void
8743 rfs4_unshare(rfs4_state_t *sp)
8744 {
8745 	(void) rfs4_shrlock(sp, F_UNSHARE);
8746 }
8747