xref: /titanic_52/usr/src/uts/common/fs/nfs/nfs4_srv.c (revision 4b7f25f92fce04a513df62afed73561f9216a4fd)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2016 Nexenta Systems, Inc.  All rights reserved.
24  * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
25  * Copyright (c) 2012 by Delphix. All rights reserved.
26  */
27 
28 /*
29  *	Copyright (c) 1983,1984,1985,1986,1987,1988,1989  AT&T.
30  *	All Rights Reserved
31  */
32 
33 #include <sys/param.h>
34 #include <sys/types.h>
35 #include <sys/systm.h>
36 #include <sys/cred.h>
37 #include <sys/buf.h>
38 #include <sys/vfs.h>
39 #include <sys/vfs_opreg.h>
40 #include <sys/vnode.h>
41 #include <sys/uio.h>
42 #include <sys/errno.h>
43 #include <sys/sysmacros.h>
44 #include <sys/statvfs.h>
45 #include <sys/kmem.h>
46 #include <sys/dirent.h>
47 #include <sys/cmn_err.h>
48 #include <sys/debug.h>
49 #include <sys/systeminfo.h>
50 #include <sys/flock.h>
51 #include <sys/pathname.h>
52 #include <sys/nbmlock.h>
53 #include <sys/share.h>
54 #include <sys/atomic.h>
55 #include <sys/policy.h>
56 #include <sys/fem.h>
57 #include <sys/sdt.h>
58 #include <sys/ddi.h>
59 #include <sys/zone.h>
60 #include <sys/kstat.h>
61 
62 #include <fs/fs_reparse.h>
63 
64 #include <rpc/types.h>
65 #include <rpc/auth.h>
66 #include <rpc/rpcsec_gss.h>
67 #include <rpc/svc.h>
68 
69 #include <nfs/nfs.h>
70 #include <nfs/export.h>
71 #include <nfs/nfs_cmd.h>
72 #include <nfs/lm.h>
73 #include <nfs/nfs4.h>
74 
75 #include <sys/strsubr.h>
76 #include <sys/strsun.h>
77 
78 #include <inet/common.h>
79 #include <inet/ip.h>
80 #include <inet/ip6.h>
81 
82 #include <sys/tsol/label.h>
83 #include <sys/tsol/tndb.h>
84 
85 #define	RFS4_MAXLOCK_TRIES 4	/* Try to get the lock this many times */
86 static int rfs4_maxlock_tries = RFS4_MAXLOCK_TRIES;
87 #define	RFS4_LOCK_DELAY 10	/* Milliseconds */
88 static clock_t  rfs4_lock_delay = RFS4_LOCK_DELAY;
89 extern struct svc_ops rdma_svc_ops;
90 extern int nfs_loaned_buffers;
91 /* End of Tunables */
92 
93 static int rdma_setup_read_data4(READ4args *, READ4res *);
94 
95 /*
96  * Used to bump the stateid4.seqid value and show changes in the stateid
97  */
98 #define	next_stateid(sp) (++(sp)->bits.chgseq)
99 
100 /*
101  * RFS4_MINLEN_ENTRY4: XDR-encoded size of smallest possible dirent.
102  *	This is used to return NFS4ERR_TOOSMALL when clients specify
103  *	maxcount that isn't large enough to hold the smallest possible
104  *	XDR encoded dirent.
105  *
106  *	    sizeof cookie (8 bytes) +
107  *	    sizeof name_len (4 bytes) +
108  *	    sizeof smallest (padded) name (4 bytes) +
109  *	    sizeof bitmap4_len (12 bytes) +   NOTE: we always encode len=2 bm4
110  *	    sizeof attrlist4_len (4 bytes) +
111  *	    sizeof next boolean (4 bytes)
112  *
113  * RFS4_MINLEN_RDDIR4: XDR-encoded size of READDIR op reply containing
114  * the smallest possible entry4 (assumes no attrs requested).
115  *	sizeof nfsstat4 (4 bytes) +
116  *	sizeof verifier4 (8 bytes) +
117  *	sizeof entry4list bool (4 bytes) +
118  *	sizeof entry4 	(36 bytes) +
119  *	sizeof eof bool  (4 bytes)
120  *
121  * RFS4_MINLEN_RDDIR_BUF: minimum length of buffer server will provide to
122  *	VOP_READDIR.  Its value is the size of the maximum possible dirent
123  *	for solaris.  The DIRENT64_RECLEN macro returns	the size of dirent
124  *	required for a given name length.  MAXNAMELEN is the maximum
125  *	filename length allowed in Solaris.  The first two DIRENT64_RECLEN()
126  *	macros are to allow for . and .. entries -- just a minor tweak to try
127  *	and guarantee that buffer we give to VOP_READDIR will be large enough
128  *	to hold ., .., and the largest possible solaris dirent64.
129  */
130 #define	RFS4_MINLEN_ENTRY4 36
131 #define	RFS4_MINLEN_RDDIR4 (4 + NFS4_VERIFIER_SIZE + 4 + RFS4_MINLEN_ENTRY4 + 4)
132 #define	RFS4_MINLEN_RDDIR_BUF \
133 	(DIRENT64_RECLEN(1) + DIRENT64_RECLEN(2) + DIRENT64_RECLEN(MAXNAMELEN))
134 
135 /*
136  * It would be better to pad to 4 bytes since that's what XDR would do,
137  * but the dirents UFS gives us are already padded to 8, so just take
138  * what we're given.  Dircount is only a hint anyway.  Currently the
139  * solaris kernel is ASCII only, so there's no point in calling the
140  * UTF8 functions.
141  *
142  * dirent64: named padded to provide 8 byte struct alignment
143  *	d_ino(8) + d_off(8) + d_reclen(2) + d_name(namelen + null(1) + pad)
144  *
145  * cookie: uint64_t   +  utf8namelen: uint_t  +   utf8name padded to 8 bytes
146  *
147  */
148 #define	DIRENT64_TO_DIRCOUNT(dp) \
149 	(3 * BYTES_PER_XDR_UNIT + DIRENT64_NAMELEN((dp)->d_reclen))
150 
151 time_t rfs4_start_time;			/* Initialized in rfs4_srvrinit */
152 
153 static sysid_t lockt_sysid;		/* dummy sysid for all LOCKT calls */
154 
155 u_longlong_t	nfs4_srv_caller_id;
156 uint_t		nfs4_srv_vkey = 0;
157 
158 verifier4	Write4verf;
159 verifier4	Readdir4verf;
160 
161 void	rfs4_init_compound_state(struct compound_state *);
162 
163 static void	nullfree(caddr_t);
164 static void	rfs4_op_inval(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
165 			struct compound_state *);
166 static void	rfs4_op_access(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
167 			struct compound_state *);
168 static void	rfs4_op_close(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
169 			struct compound_state *);
170 static void	rfs4_op_commit(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
171 			struct compound_state *);
172 static void	rfs4_op_create(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
173 			struct compound_state *);
174 static void	rfs4_op_create_free(nfs_resop4 *resop);
175 static void	rfs4_op_delegreturn(nfs_argop4 *, nfs_resop4 *,
176 			struct svc_req *, struct compound_state *);
177 static void	rfs4_op_delegpurge(nfs_argop4 *, nfs_resop4 *,
178 			struct svc_req *, struct compound_state *);
179 static void	rfs4_op_getattr(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
180 			struct compound_state *);
181 static void	rfs4_op_getattr_free(nfs_resop4 *);
182 static void	rfs4_op_getfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
183 			struct compound_state *);
184 static void	rfs4_op_getfh_free(nfs_resop4 *);
185 static void	rfs4_op_illegal(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
186 			struct compound_state *);
187 static void	rfs4_op_link(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
188 			struct compound_state *);
189 static void	rfs4_op_lock(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
190 			struct compound_state *);
191 static void	lock_denied_free(nfs_resop4 *);
192 static void	rfs4_op_locku(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
193 			struct compound_state *);
194 static void	rfs4_op_lockt(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
195 			struct compound_state *);
196 static void	rfs4_op_lookup(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
197 			struct compound_state *);
198 static void	rfs4_op_lookupp(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
199 			struct compound_state *);
200 static void	rfs4_op_openattr(nfs_argop4 *argop, nfs_resop4 *resop,
201 				struct svc_req *req, struct compound_state *cs);
202 static void	rfs4_op_nverify(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
203 			struct compound_state *);
204 static void	rfs4_op_open(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
205 			struct compound_state *);
206 static void	rfs4_op_open_confirm(nfs_argop4 *, nfs_resop4 *,
207 			struct svc_req *, struct compound_state *);
208 static void	rfs4_op_open_downgrade(nfs_argop4 *, nfs_resop4 *,
209 			struct svc_req *, struct compound_state *);
210 static void	rfs4_op_putfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
211 			struct compound_state *);
212 static void	rfs4_op_putpubfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
213 			struct compound_state *);
214 static void	rfs4_op_putrootfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
215 			struct compound_state *);
216 static void	rfs4_op_read(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
217 			struct compound_state *);
218 static void	rfs4_op_read_free(nfs_resop4 *);
219 static void	rfs4_op_readdir_free(nfs_resop4 *resop);
220 static void	rfs4_op_readlink(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
221 			struct compound_state *);
222 static void	rfs4_op_readlink_free(nfs_resop4 *);
223 static void	rfs4_op_release_lockowner(nfs_argop4 *, nfs_resop4 *,
224 			struct svc_req *, struct compound_state *);
225 static void	rfs4_op_remove(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
226 			struct compound_state *);
227 static void	rfs4_op_rename(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
228 			struct compound_state *);
229 static void	rfs4_op_renew(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
230 			struct compound_state *);
231 static void	rfs4_op_restorefh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
232 			struct compound_state *);
233 static void	rfs4_op_savefh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
234 			struct compound_state *);
235 static void	rfs4_op_setattr(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
236 			struct compound_state *);
237 static void	rfs4_op_verify(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
238 			struct compound_state *);
239 static void	rfs4_op_write(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
240 			struct compound_state *);
241 static void	rfs4_op_setclientid(nfs_argop4 *, nfs_resop4 *,
242 			struct svc_req *, struct compound_state *);
243 static void	rfs4_op_setclientid_confirm(nfs_argop4 *, nfs_resop4 *,
244 			struct svc_req *req, struct compound_state *);
245 static void	rfs4_op_secinfo(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
246 			struct compound_state *);
247 static void	rfs4_op_secinfo_free(nfs_resop4 *);
248 
249 static nfsstat4 check_open_access(uint32_t,
250 				struct compound_state *, struct svc_req *);
251 nfsstat4 rfs4_client_sysid(rfs4_client_t *, sysid_t *);
252 void rfs4_ss_clid(rfs4_client_t *);
253 
254 /*
255  * translation table for attrs
256  */
257 struct nfs4_ntov_table {
258 	union nfs4_attr_u *na;
259 	uint8_t amap[NFS4_MAXNUM_ATTRS];
260 	int attrcnt;
261 	bool_t vfsstat;
262 };
263 
264 static void	nfs4_ntov_table_init(struct nfs4_ntov_table *ntovp);
265 static void	nfs4_ntov_table_free(struct nfs4_ntov_table *ntovp,
266 				    struct nfs4_svgetit_arg *sargp);
267 
268 static nfsstat4	do_rfs4_set_attrs(bitmap4 *resp, fattr4 *fattrp,
269 		    struct compound_state *cs, struct nfs4_svgetit_arg *sargp,
270 		    struct nfs4_ntov_table *ntovp, nfs4_attr_cmd_t cmd);
271 
272 fem_t		*deleg_rdops;
273 fem_t		*deleg_wrops;
274 
275 rfs4_servinst_t *rfs4_cur_servinst = NULL;	/* current server instance */
276 kmutex_t	rfs4_servinst_lock;	/* protects linked list */
277 int		rfs4_seen_first_compound;	/* set first time we see one */
278 
279 /*
280  * NFS4 op dispatch table
281  */
282 
283 struct rfsv4disp {
284 	void	(*dis_proc)();		/* proc to call */
285 	void	(*dis_resfree)();	/* frees space allocated by proc */
286 	int	dis_flags;		/* RPC_IDEMPOTENT, etc... */
287 	int	op_type;		/* operation type, see below */
288 };
289 
290 /*
291  * operation types; used primarily for the per-exportinfo kstat implementation
292  */
293 #define	NFS4_OP_NOFH	0	/* The operation does not operate with any */
294 				/* particular filehandle; we cannot associate */
295 				/* it with any exportinfo. */
296 
297 #define	NFS4_OP_CFH	1	/* The operation works with the current */
298 				/* filehandle; we associate the operation */
299 				/* with the exportinfo related to the current */
300 				/* filehandle (as set before the operation is */
301 				/* executed). */
302 
303 #define	NFS4_OP_SFH	2	/* The operation works with the saved */
304 				/* filehandle; we associate the operation */
305 				/* with the exportinfo related to the saved */
306 				/* filehandle (as set before the operation is */
307 				/* executed). */
308 
309 #define	NFS4_OP_POSTCFH	3	/* The operation ignores the current */
310 				/* filehandle, but sets the new current */
311 				/* filehandle instead; we associate the */
312 				/* operation with the exportinfo related to */
313 				/* the current filehandle as set after the */
314 				/* operation is successfuly executed.  Since */
315 				/* we do not know the particular exportinfo */
316 				/* (and thus the kstat) before the operation */
317 				/* is done, there is no simple way how to */
318 				/* update some I/O kstat statistics related */
319 				/* to kstat_queue(9F). */
320 
321 static struct rfsv4disp rfsv4disptab[] = {
322 	/*
323 	 * NFS VERSION 4
324 	 */
325 
326 	/* RFS_NULL = 0 */
327 	{rfs4_op_illegal, nullfree, 0, NFS4_OP_NOFH},
328 
329 	/* UNUSED = 1 */
330 	{rfs4_op_illegal, nullfree, 0, NFS4_OP_NOFH},
331 
332 	/* UNUSED = 2 */
333 	{rfs4_op_illegal, nullfree, 0, NFS4_OP_NOFH},
334 
335 	/* OP_ACCESS = 3 */
336 	{rfs4_op_access, nullfree, RPC_IDEMPOTENT, NFS4_OP_CFH},
337 
338 	/* OP_CLOSE = 4 */
339 	{rfs4_op_close, nullfree, 0, NFS4_OP_CFH},
340 
341 	/* OP_COMMIT = 5 */
342 	{rfs4_op_commit, nullfree, RPC_IDEMPOTENT, NFS4_OP_CFH},
343 
344 	/* OP_CREATE = 6 */
345 	{rfs4_op_create, nullfree, 0, NFS4_OP_CFH},
346 
347 	/* OP_DELEGPURGE = 7 */
348 	{rfs4_op_delegpurge, nullfree, 0, NFS4_OP_NOFH},
349 
350 	/* OP_DELEGRETURN = 8 */
351 	{rfs4_op_delegreturn, nullfree, 0, NFS4_OP_CFH},
352 
353 	/* OP_GETATTR = 9 */
354 	{rfs4_op_getattr, rfs4_op_getattr_free, RPC_IDEMPOTENT, NFS4_OP_CFH},
355 
356 	/* OP_GETFH = 10 */
357 	{rfs4_op_getfh, rfs4_op_getfh_free, RPC_ALL, NFS4_OP_CFH},
358 
359 	/* OP_LINK = 11 */
360 	{rfs4_op_link, nullfree, 0, NFS4_OP_CFH},
361 
362 	/* OP_LOCK = 12 */
363 	{rfs4_op_lock, lock_denied_free, 0, NFS4_OP_CFH},
364 
365 	/* OP_LOCKT = 13 */
366 	{rfs4_op_lockt, lock_denied_free, 0, NFS4_OP_CFH},
367 
368 	/* OP_LOCKU = 14 */
369 	{rfs4_op_locku, nullfree, 0, NFS4_OP_CFH},
370 
371 	/* OP_LOOKUP = 15 */
372 	{rfs4_op_lookup, nullfree, (RPC_IDEMPOTENT | RPC_PUBLICFH_OK),
373 	    NFS4_OP_CFH},
374 
375 	/* OP_LOOKUPP = 16 */
376 	{rfs4_op_lookupp, nullfree, (RPC_IDEMPOTENT | RPC_PUBLICFH_OK),
377 	    NFS4_OP_CFH},
378 
379 	/* OP_NVERIFY = 17 */
380 	{rfs4_op_nverify, nullfree, RPC_IDEMPOTENT, NFS4_OP_CFH},
381 
382 	/* OP_OPEN = 18 */
383 	{rfs4_op_open, rfs4_free_reply, 0, NFS4_OP_CFH},
384 
385 	/* OP_OPENATTR = 19 */
386 	{rfs4_op_openattr, nullfree, 0, NFS4_OP_CFH},
387 
388 	/* OP_OPEN_CONFIRM = 20 */
389 	{rfs4_op_open_confirm, nullfree, 0, NFS4_OP_CFH},
390 
391 	/* OP_OPEN_DOWNGRADE = 21 */
392 	{rfs4_op_open_downgrade, nullfree, 0, NFS4_OP_CFH},
393 
394 	/* OP_OPEN_PUTFH = 22 */
395 	{rfs4_op_putfh, nullfree, RPC_ALL, NFS4_OP_POSTCFH},
396 
397 	/* OP_PUTPUBFH = 23 */
398 	{rfs4_op_putpubfh, nullfree, RPC_ALL, NFS4_OP_POSTCFH},
399 
400 	/* OP_PUTROOTFH = 24 */
401 	{rfs4_op_putrootfh, nullfree, RPC_ALL, NFS4_OP_POSTCFH},
402 
403 	/* OP_READ = 25 */
404 	{rfs4_op_read, rfs4_op_read_free, RPC_IDEMPOTENT, NFS4_OP_CFH},
405 
406 	/* OP_READDIR = 26 */
407 	{rfs4_op_readdir, rfs4_op_readdir_free, RPC_IDEMPOTENT, NFS4_OP_CFH},
408 
409 	/* OP_READLINK = 27 */
410 	{rfs4_op_readlink, rfs4_op_readlink_free, RPC_IDEMPOTENT, NFS4_OP_CFH},
411 
412 	/* OP_REMOVE = 28 */
413 	{rfs4_op_remove, nullfree, 0, NFS4_OP_CFH},
414 
415 	/* OP_RENAME = 29 */
416 	{rfs4_op_rename, nullfree, 0, NFS4_OP_CFH},
417 
418 	/* OP_RENEW = 30 */
419 	{rfs4_op_renew, nullfree, 0, NFS4_OP_NOFH},
420 
421 	/* OP_RESTOREFH = 31 */
422 	{rfs4_op_restorefh, nullfree, RPC_ALL, NFS4_OP_SFH},
423 
424 	/* OP_SAVEFH = 32 */
425 	{rfs4_op_savefh, nullfree, RPC_ALL, NFS4_OP_CFH},
426 
427 	/* OP_SECINFO = 33 */
428 	{rfs4_op_secinfo, rfs4_op_secinfo_free, 0, NFS4_OP_CFH},
429 
430 	/* OP_SETATTR = 34 */
431 	{rfs4_op_setattr, nullfree, 0, NFS4_OP_CFH},
432 
433 	/* OP_SETCLIENTID = 35 */
434 	{rfs4_op_setclientid, nullfree, 0, NFS4_OP_NOFH},
435 
436 	/* OP_SETCLIENTID_CONFIRM = 36 */
437 	{rfs4_op_setclientid_confirm, nullfree, 0, NFS4_OP_NOFH},
438 
439 	/* OP_VERIFY = 37 */
440 	{rfs4_op_verify, nullfree, RPC_IDEMPOTENT, NFS4_OP_CFH},
441 
442 	/* OP_WRITE = 38 */
443 	{rfs4_op_write, nullfree, 0, NFS4_OP_CFH},
444 
445 	/* OP_RELEASE_LOCKOWNER = 39 */
446 	{rfs4_op_release_lockowner, nullfree, 0, NFS4_OP_NOFH},
447 };
448 
449 static uint_t rfsv4disp_cnt = sizeof (rfsv4disptab) / sizeof (rfsv4disptab[0]);
450 
451 #define	OP_ILLEGAL_IDX (rfsv4disp_cnt)
452 
453 #ifdef DEBUG
454 
455 int		rfs4_fillone_debug = 0;
456 int		rfs4_no_stub_access = 1;
457 int		rfs4_rddir_debug = 0;
458 
459 static char    *rfs4_op_string[] = {
460 	"rfs4_op_null",
461 	"rfs4_op_1 unused",
462 	"rfs4_op_2 unused",
463 	"rfs4_op_access",
464 	"rfs4_op_close",
465 	"rfs4_op_commit",
466 	"rfs4_op_create",
467 	"rfs4_op_delegpurge",
468 	"rfs4_op_delegreturn",
469 	"rfs4_op_getattr",
470 	"rfs4_op_getfh",
471 	"rfs4_op_link",
472 	"rfs4_op_lock",
473 	"rfs4_op_lockt",
474 	"rfs4_op_locku",
475 	"rfs4_op_lookup",
476 	"rfs4_op_lookupp",
477 	"rfs4_op_nverify",
478 	"rfs4_op_open",
479 	"rfs4_op_openattr",
480 	"rfs4_op_open_confirm",
481 	"rfs4_op_open_downgrade",
482 	"rfs4_op_putfh",
483 	"rfs4_op_putpubfh",
484 	"rfs4_op_putrootfh",
485 	"rfs4_op_read",
486 	"rfs4_op_readdir",
487 	"rfs4_op_readlink",
488 	"rfs4_op_remove",
489 	"rfs4_op_rename",
490 	"rfs4_op_renew",
491 	"rfs4_op_restorefh",
492 	"rfs4_op_savefh",
493 	"rfs4_op_secinfo",
494 	"rfs4_op_setattr",
495 	"rfs4_op_setclientid",
496 	"rfs4_op_setclient_confirm",
497 	"rfs4_op_verify",
498 	"rfs4_op_write",
499 	"rfs4_op_release_lockowner",
500 	"rfs4_op_illegal"
501 };
502 #endif
503 
504 void	rfs4_ss_chkclid(rfs4_client_t *);
505 
506 extern size_t   strlcpy(char *dst, const char *src, size_t dstsize);
507 
508 extern void	rfs4_free_fs_locations4(fs_locations4 *);
509 
510 #ifdef	nextdp
511 #undef nextdp
512 #endif
513 #define	nextdp(dp)	((struct dirent64 *)((char *)(dp) + (dp)->d_reclen))
514 
515 static const fs_operation_def_t nfs4_rd_deleg_tmpl[] = {
516 	VOPNAME_OPEN,		{ .femop_open = deleg_rd_open },
517 	VOPNAME_WRITE,		{ .femop_write = deleg_rd_write },
518 	VOPNAME_SETATTR,	{ .femop_setattr = deleg_rd_setattr },
519 	VOPNAME_RWLOCK,		{ .femop_rwlock = deleg_rd_rwlock },
520 	VOPNAME_SPACE,		{ .femop_space = deleg_rd_space },
521 	VOPNAME_SETSECATTR,	{ .femop_setsecattr = deleg_rd_setsecattr },
522 	VOPNAME_VNEVENT,	{ .femop_vnevent = deleg_rd_vnevent },
523 	NULL,			NULL
524 };
525 static const fs_operation_def_t nfs4_wr_deleg_tmpl[] = {
526 	VOPNAME_OPEN,		{ .femop_open = deleg_wr_open },
527 	VOPNAME_READ,		{ .femop_read = deleg_wr_read },
528 	VOPNAME_WRITE,		{ .femop_write = deleg_wr_write },
529 	VOPNAME_SETATTR,	{ .femop_setattr = deleg_wr_setattr },
530 	VOPNAME_RWLOCK,		{ .femop_rwlock = deleg_wr_rwlock },
531 	VOPNAME_SPACE,		{ .femop_space = deleg_wr_space },
532 	VOPNAME_SETSECATTR,	{ .femop_setsecattr = deleg_wr_setsecattr },
533 	VOPNAME_VNEVENT,	{ .femop_vnevent = deleg_wr_vnevent },
534 	NULL,			NULL
535 };
536 
537 int
538 rfs4_srvrinit(void)
539 {
540 	timespec32_t verf;
541 	int error;
542 	extern void rfs4_attr_init();
543 	extern krwlock_t rfs4_deleg_policy_lock;
544 
545 	/*
546 	 * The following algorithm attempts to find a unique verifier
547 	 * to be used as the write verifier returned from the server
548 	 * to the client.  It is important that this verifier change
549 	 * whenever the server reboots.  Of secondary importance, it
550 	 * is important for the verifier to be unique between two
551 	 * different servers.
552 	 *
553 	 * Thus, an attempt is made to use the system hostid and the
554 	 * current time in seconds when the nfssrv kernel module is
555 	 * loaded.  It is assumed that an NFS server will not be able
556 	 * to boot and then to reboot in less than a second.  If the
557 	 * hostid has not been set, then the current high resolution
558 	 * time is used.  This will ensure different verifiers each
559 	 * time the server reboots and minimize the chances that two
560 	 * different servers will have the same verifier.
561 	 * XXX - this is broken on LP64 kernels.
562 	 */
563 	verf.tv_sec = (time_t)zone_get_hostid(NULL);
564 	if (verf.tv_sec != 0) {
565 		verf.tv_nsec = gethrestime_sec();
566 	} else {
567 		timespec_t tverf;
568 
569 		gethrestime(&tverf);
570 		verf.tv_sec = (time_t)tverf.tv_sec;
571 		verf.tv_nsec = tverf.tv_nsec;
572 	}
573 
574 	Write4verf = *(uint64_t *)&verf;
575 
576 	rfs4_attr_init();
577 	mutex_init(&rfs4_deleg_lock, NULL, MUTEX_DEFAULT, NULL);
578 
579 	/* Used to manage create/destroy of server state */
580 	mutex_init(&rfs4_state_lock, NULL, MUTEX_DEFAULT, NULL);
581 
582 	/* Used to manage access to server instance linked list */
583 	mutex_init(&rfs4_servinst_lock, NULL, MUTEX_DEFAULT, NULL);
584 
585 	/* Used to manage access to rfs4_deleg_policy */
586 	rw_init(&rfs4_deleg_policy_lock, NULL, RW_DEFAULT, NULL);
587 
588 	error = fem_create("deleg_rdops", nfs4_rd_deleg_tmpl, &deleg_rdops);
589 	if (error != 0) {
590 		rfs4_disable_delegation();
591 	} else {
592 		error = fem_create("deleg_wrops", nfs4_wr_deleg_tmpl,
593 		    &deleg_wrops);
594 		if (error != 0) {
595 			rfs4_disable_delegation();
596 			fem_free(deleg_rdops);
597 		}
598 	}
599 
600 	nfs4_srv_caller_id = fs_new_caller_id();
601 
602 	lockt_sysid = lm_alloc_sysidt();
603 
604 	vsd_create(&nfs4_srv_vkey, NULL);
605 
606 	return (0);
607 }
608 
609 void
610 rfs4_srvrfini(void)
611 {
612 	extern krwlock_t rfs4_deleg_policy_lock;
613 
614 	if (lockt_sysid != LM_NOSYSID) {
615 		lm_free_sysidt(lockt_sysid);
616 		lockt_sysid = LM_NOSYSID;
617 	}
618 
619 	mutex_destroy(&rfs4_deleg_lock);
620 	mutex_destroy(&rfs4_state_lock);
621 	rw_destroy(&rfs4_deleg_policy_lock);
622 
623 	fem_free(deleg_rdops);
624 	fem_free(deleg_wrops);
625 }
626 
627 void
628 rfs4_init_compound_state(struct compound_state *cs)
629 {
630 	bzero(cs, sizeof (*cs));
631 	cs->cont = TRUE;
632 	cs->access = CS_ACCESS_DENIED;
633 	cs->deleg = FALSE;
634 	cs->mandlock = FALSE;
635 	cs->fh.nfs_fh4_val = cs->fhbuf;
636 }
637 
638 void
639 rfs4_grace_start(rfs4_servinst_t *sip)
640 {
641 	rw_enter(&sip->rwlock, RW_WRITER);
642 	sip->start_time = (time_t)TICK_TO_SEC(ddi_get_lbolt());
643 	sip->grace_period = rfs4_grace_period;
644 	rw_exit(&sip->rwlock);
645 }
646 
647 /*
648  * returns true if the instance's grace period has never been started
649  */
650 int
651 rfs4_servinst_grace_new(rfs4_servinst_t *sip)
652 {
653 	time_t start_time;
654 
655 	rw_enter(&sip->rwlock, RW_READER);
656 	start_time = sip->start_time;
657 	rw_exit(&sip->rwlock);
658 
659 	return (start_time == 0);
660 }
661 
662 /*
663  * Indicates if server instance is within the
664  * grace period.
665  */
666 int
667 rfs4_servinst_in_grace(rfs4_servinst_t *sip)
668 {
669 	time_t grace_expiry;
670 
671 	rw_enter(&sip->rwlock, RW_READER);
672 	grace_expiry = sip->start_time + sip->grace_period;
673 	rw_exit(&sip->rwlock);
674 
675 	return (((time_t)TICK_TO_SEC(ddi_get_lbolt())) < grace_expiry);
676 }
677 
678 int
679 rfs4_clnt_in_grace(rfs4_client_t *cp)
680 {
681 	ASSERT(rfs4_dbe_refcnt(cp->rc_dbe) > 0);
682 
683 	return (rfs4_servinst_in_grace(cp->rc_server_instance));
684 }
685 
686 /*
687  * reset all currently active grace periods
688  */
689 void
690 rfs4_grace_reset_all(void)
691 {
692 	rfs4_servinst_t *sip;
693 
694 	mutex_enter(&rfs4_servinst_lock);
695 	for (sip = rfs4_cur_servinst; sip != NULL; sip = sip->prev)
696 		if (rfs4_servinst_in_grace(sip))
697 			rfs4_grace_start(sip);
698 	mutex_exit(&rfs4_servinst_lock);
699 }
700 
701 /*
702  * start any new instances' grace periods
703  */
704 void
705 rfs4_grace_start_new(void)
706 {
707 	rfs4_servinst_t *sip;
708 
709 	mutex_enter(&rfs4_servinst_lock);
710 	for (sip = rfs4_cur_servinst; sip != NULL; sip = sip->prev)
711 		if (rfs4_servinst_grace_new(sip))
712 			rfs4_grace_start(sip);
713 	mutex_exit(&rfs4_servinst_lock);
714 }
715 
716 static rfs4_dss_path_t *
717 rfs4_dss_newpath(rfs4_servinst_t *sip, char *path, unsigned index)
718 {
719 	size_t len;
720 	rfs4_dss_path_t *dss_path;
721 
722 	dss_path = kmem_alloc(sizeof (rfs4_dss_path_t), KM_SLEEP);
723 
724 	/*
725 	 * Take a copy of the string, since the original may be overwritten.
726 	 * Sadly, no strdup() in the kernel.
727 	 */
728 	/* allow for NUL */
729 	len = strlen(path) + 1;
730 	dss_path->path = kmem_alloc(len, KM_SLEEP);
731 	(void) strlcpy(dss_path->path, path, len);
732 
733 	/* associate with servinst */
734 	dss_path->sip = sip;
735 	dss_path->index = index;
736 
737 	/*
738 	 * Add to list of served paths.
739 	 * No locking required, as we're only ever called at startup.
740 	 */
741 	if (rfs4_dss_pathlist == NULL) {
742 		/* this is the first dss_path_t */
743 
744 		/* needed for insque/remque */
745 		dss_path->next = dss_path->prev = dss_path;
746 
747 		rfs4_dss_pathlist = dss_path;
748 	} else {
749 		insque(dss_path, rfs4_dss_pathlist);
750 	}
751 
752 	return (dss_path);
753 }
754 
755 /*
756  * Create a new server instance, and make it the currently active instance.
757  * Note that starting the grace period too early will reduce the clients'
758  * recovery window.
759  */
760 void
761 rfs4_servinst_create(int start_grace, int dss_npaths, char **dss_paths)
762 {
763 	unsigned i;
764 	rfs4_servinst_t *sip;
765 	rfs4_oldstate_t *oldstate;
766 
767 	sip = kmem_alloc(sizeof (rfs4_servinst_t), KM_SLEEP);
768 	rw_init(&sip->rwlock, NULL, RW_DEFAULT, NULL);
769 
770 	sip->start_time = (time_t)0;
771 	sip->grace_period = (time_t)0;
772 	sip->next = NULL;
773 	sip->prev = NULL;
774 
775 	rw_init(&sip->oldstate_lock, NULL, RW_DEFAULT, NULL);
776 	/*
777 	 * This initial dummy entry is required to setup for insque/remque.
778 	 * It must be skipped over whenever the list is traversed.
779 	 */
780 	oldstate = kmem_alloc(sizeof (rfs4_oldstate_t), KM_SLEEP);
781 	/* insque/remque require initial list entry to be self-terminated */
782 	oldstate->next = oldstate;
783 	oldstate->prev = oldstate;
784 	sip->oldstate = oldstate;
785 
786 
787 	sip->dss_npaths = dss_npaths;
788 	sip->dss_paths = kmem_alloc(dss_npaths *
789 	    sizeof (rfs4_dss_path_t *), KM_SLEEP);
790 
791 	for (i = 0; i < dss_npaths; i++) {
792 		sip->dss_paths[i] = rfs4_dss_newpath(sip, dss_paths[i], i);
793 	}
794 
795 	mutex_enter(&rfs4_servinst_lock);
796 	if (rfs4_cur_servinst != NULL) {
797 		/* add to linked list */
798 		sip->prev = rfs4_cur_servinst;
799 		rfs4_cur_servinst->next = sip;
800 	}
801 	if (start_grace)
802 		rfs4_grace_start(sip);
803 	/* make the new instance "current" */
804 	rfs4_cur_servinst = sip;
805 
806 	mutex_exit(&rfs4_servinst_lock);
807 }
808 
809 /*
810  * In future, we might add a rfs4_servinst_destroy(sip) but, for now, destroy
811  * all instances directly.
812  */
813 void
814 rfs4_servinst_destroy_all(void)
815 {
816 	rfs4_servinst_t *sip, *prev, *current;
817 #ifdef DEBUG
818 	int n = 0;
819 #endif
820 
821 	mutex_enter(&rfs4_servinst_lock);
822 	ASSERT(rfs4_cur_servinst != NULL);
823 	current = rfs4_cur_servinst;
824 	rfs4_cur_servinst = NULL;
825 	for (sip = current; sip != NULL; sip = prev) {
826 		prev = sip->prev;
827 		rw_destroy(&sip->rwlock);
828 		if (sip->oldstate)
829 			kmem_free(sip->oldstate, sizeof (rfs4_oldstate_t));
830 		if (sip->dss_paths)
831 			kmem_free(sip->dss_paths,
832 			    sip->dss_npaths * sizeof (rfs4_dss_path_t *));
833 		kmem_free(sip, sizeof (rfs4_servinst_t));
834 #ifdef DEBUG
835 		n++;
836 #endif
837 	}
838 	mutex_exit(&rfs4_servinst_lock);
839 }
840 
841 /*
842  * Assign the current server instance to a client_t.
843  * Should be called with cp->rc_dbe held.
844  */
845 void
846 rfs4_servinst_assign(rfs4_client_t *cp, rfs4_servinst_t *sip)
847 {
848 	ASSERT(rfs4_dbe_refcnt(cp->rc_dbe) > 0);
849 
850 	/*
851 	 * The lock ensures that if the current instance is in the process
852 	 * of changing, we will see the new one.
853 	 */
854 	mutex_enter(&rfs4_servinst_lock);
855 	cp->rc_server_instance = sip;
856 	mutex_exit(&rfs4_servinst_lock);
857 }
858 
859 rfs4_servinst_t *
860 rfs4_servinst(rfs4_client_t *cp)
861 {
862 	ASSERT(rfs4_dbe_refcnt(cp->rc_dbe) > 0);
863 
864 	return (cp->rc_server_instance);
865 }
866 
867 /* ARGSUSED */
868 static void
869 nullfree(caddr_t resop)
870 {
871 }
872 
873 /*
874  * This is a fall-through for invalid or not implemented (yet) ops
875  */
876 /* ARGSUSED */
877 static void
878 rfs4_op_inval(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
879     struct compound_state *cs)
880 {
881 	*cs->statusp = *((nfsstat4 *)&(resop)->nfs_resop4_u) = NFS4ERR_INVAL;
882 }
883 
884 /*
885  * Check if the security flavor, nfsnum, is in the flavor_list.
886  */
887 bool_t
888 in_flavor_list(int nfsnum, int *flavor_list, int count)
889 {
890 	int i;
891 
892 	for (i = 0; i < count; i++) {
893 		if (nfsnum == flavor_list[i])
894 			return (TRUE);
895 	}
896 	return (FALSE);
897 }
898 
899 /*
900  * Used by rfs4_op_secinfo to get the security information from the
901  * export structure associated with the component.
902  */
903 /* ARGSUSED */
904 static nfsstat4
905 do_rfs4_op_secinfo(struct compound_state *cs, char *nm, SECINFO4res *resp)
906 {
907 	int error, different_export = 0;
908 	vnode_t *dvp, *vp;
909 	struct exportinfo *exi = NULL;
910 	struct exportinfo *oexi = NULL;
911 	fid_t fid;
912 	uint_t count, i;
913 	secinfo4 *resok_val;
914 	struct secinfo *secp;
915 	seconfig_t *si;
916 	bool_t did_traverse = FALSE;
917 	int dotdot, walk;
918 
919 	dvp = cs->vp;
920 	dotdot = (nm[0] == '.' && nm[1] == '.' && nm[2] == '\0');
921 
922 	/*
923 	 * If dotdotting, then need to check whether it's above the
924 	 * root of a filesystem, or above an export point.
925 	 */
926 	if (dotdot) {
927 
928 		/*
929 		 * If dotdotting at the root of a filesystem, then
930 		 * need to traverse back to the mounted-on filesystem
931 		 * and do the dotdot lookup there.
932 		 */
933 		if (cs->vp->v_flag & VROOT) {
934 
935 			/*
936 			 * If at the system root, then can
937 			 * go up no further.
938 			 */
939 			if (VN_CMP(dvp, rootdir))
940 				return (puterrno4(ENOENT));
941 
942 			/*
943 			 * Traverse back to the mounted-on filesystem
944 			 */
945 			dvp = untraverse(cs->vp);
946 
947 			/*
948 			 * Set the different_export flag so we remember
949 			 * to pick up a new exportinfo entry for
950 			 * this new filesystem.
951 			 */
952 			different_export = 1;
953 		} else {
954 
955 			/*
956 			 * If dotdotting above an export point then set
957 			 * the different_export to get new export info.
958 			 */
959 			different_export = nfs_exported(cs->exi, cs->vp);
960 		}
961 	}
962 
963 	/*
964 	 * Get the vnode for the component "nm".
965 	 */
966 	error = VOP_LOOKUP(dvp, nm, &vp, NULL, 0, NULL, cs->cr,
967 	    NULL, NULL, NULL);
968 	if (error)
969 		return (puterrno4(error));
970 
971 	/*
972 	 * If the vnode is in a pseudo filesystem, or if the security flavor
973 	 * used in the request is valid but not an explicitly shared flavor,
974 	 * or the access bit indicates that this is a limited access,
975 	 * check whether this vnode is visible.
976 	 */
977 	if (!different_export &&
978 	    (PSEUDO(cs->exi) || ! is_exported_sec(cs->nfsflavor, cs->exi) ||
979 	    cs->access & CS_ACCESS_LIMITED)) {
980 		if (! nfs_visible(cs->exi, vp, &different_export)) {
981 			VN_RELE(vp);
982 			return (puterrno4(ENOENT));
983 		}
984 	}
985 
986 	/*
987 	 * If it's a mountpoint, then traverse it.
988 	 */
989 	if (vn_ismntpt(vp)) {
990 		if ((error = traverse(&vp)) != 0) {
991 			VN_RELE(vp);
992 			return (puterrno4(error));
993 		}
994 		/* remember that we had to traverse mountpoint */
995 		did_traverse = TRUE;
996 		different_export = 1;
997 	} else if (vp->v_vfsp != dvp->v_vfsp) {
998 		/*
999 		 * If vp isn't a mountpoint and the vfs ptrs aren't the same,
1000 		 * then vp is probably an LOFS object.  We don't need the
1001 		 * realvp, we just need to know that we might have crossed
1002 		 * a server fs boundary and need to call checkexport.
1003 		 * (LOFS lookup hides server fs mountpoints, and actually calls
1004 		 * traverse)
1005 		 */
1006 		different_export = 1;
1007 	}
1008 
1009 	/*
1010 	 * Get the export information for it.
1011 	 */
1012 	if (different_export) {
1013 
1014 		bzero(&fid, sizeof (fid));
1015 		fid.fid_len = MAXFIDSZ;
1016 		error = vop_fid_pseudo(vp, &fid);
1017 		if (error) {
1018 			VN_RELE(vp);
1019 			return (puterrno4(error));
1020 		}
1021 
1022 		if (dotdot)
1023 			oexi = nfs_vptoexi(NULL, vp, cs->cr, &walk, NULL, TRUE);
1024 		else
1025 			oexi = checkexport(&vp->v_vfsp->vfs_fsid, &fid, vp);
1026 
1027 		if (oexi == NULL) {
1028 			if (did_traverse == TRUE) {
1029 				/*
1030 				 * If this vnode is a mounted-on vnode,
1031 				 * but the mounted-on file system is not
1032 				 * exported, send back the secinfo for
1033 				 * the exported node that the mounted-on
1034 				 * vnode lives in.
1035 				 */
1036 				exi = cs->exi;
1037 			} else {
1038 				VN_RELE(vp);
1039 				return (puterrno4(EACCES));
1040 			}
1041 		} else {
1042 			exi = oexi;
1043 		}
1044 	} else {
1045 		exi = cs->exi;
1046 	}
1047 	ASSERT(exi != NULL);
1048 
1049 
1050 	/*
1051 	 * Create the secinfo result based on the security information
1052 	 * from the exportinfo structure (exi).
1053 	 *
1054 	 * Return all flavors for a pseudo node.
1055 	 * For a real export node, return the flavor that the client
1056 	 * has access with.
1057 	 */
1058 	rw_enter(&exported_lock, RW_READER);
1059 	if (PSEUDO(exi)) {
1060 		count = exi->exi_export.ex_seccnt; /* total sec count */
1061 		resok_val = kmem_alloc(count * sizeof (secinfo4), KM_SLEEP);
1062 		secp = exi->exi_export.ex_secinfo;
1063 
1064 		for (i = 0; i < count; i++) {
1065 			si = &secp[i].s_secinfo;
1066 			resok_val[i].flavor = si->sc_rpcnum;
1067 			if (resok_val[i].flavor == RPCSEC_GSS) {
1068 				rpcsec_gss_info *info;
1069 
1070 				info = &resok_val[i].flavor_info;
1071 				info->qop = si->sc_qop;
1072 				info->service = (rpc_gss_svc_t)si->sc_service;
1073 
1074 				/* get oid opaque data */
1075 				info->oid.sec_oid4_len =
1076 				    si->sc_gss_mech_type->length;
1077 				info->oid.sec_oid4_val = kmem_alloc(
1078 				    si->sc_gss_mech_type->length, KM_SLEEP);
1079 				bcopy(
1080 				    si->sc_gss_mech_type->elements,
1081 				    info->oid.sec_oid4_val,
1082 				    info->oid.sec_oid4_len);
1083 			}
1084 		}
1085 		resp->SECINFO4resok_len = count;
1086 		resp->SECINFO4resok_val = resok_val;
1087 	} else {
1088 		int ret_cnt = 0, k = 0;
1089 		int *flavor_list;
1090 
1091 		count = exi->exi_export.ex_seccnt; /* total sec count */
1092 		secp = exi->exi_export.ex_secinfo;
1093 
1094 		flavor_list = kmem_alloc(count * sizeof (int), KM_SLEEP);
1095 		/* find out which flavors to return */
1096 		for (i = 0; i < count; i ++) {
1097 			int access, flavor, perm;
1098 
1099 			flavor = secp[i].s_secinfo.sc_nfsnum;
1100 			perm = secp[i].s_flags;
1101 
1102 			access = nfsauth4_secinfo_access(exi, cs->req,
1103 			    flavor, perm, cs->basecr);
1104 
1105 			if (! (access & NFSAUTH_DENIED) &&
1106 			    ! (access & NFSAUTH_WRONGSEC)) {
1107 				flavor_list[ret_cnt] = flavor;
1108 				ret_cnt++;
1109 			}
1110 		}
1111 
1112 		/* Create the returning SECINFO value */
1113 		resok_val = kmem_alloc(ret_cnt * sizeof (secinfo4), KM_SLEEP);
1114 
1115 		for (i = 0; i < count; i++) {
1116 			/*
1117 			 * If the flavor is in the flavor list,
1118 			 * fill in resok_val.
1119 			 */
1120 			si = &secp[i].s_secinfo;
1121 			if (in_flavor_list(si->sc_nfsnum,
1122 			    flavor_list, ret_cnt)) {
1123 				resok_val[k].flavor = si->sc_rpcnum;
1124 				if (resok_val[k].flavor == RPCSEC_GSS) {
1125 					rpcsec_gss_info *info;
1126 
1127 					info = &resok_val[k].flavor_info;
1128 					info->qop = si->sc_qop;
1129 					info->service = (rpc_gss_svc_t)
1130 					    si->sc_service;
1131 
1132 					/* get oid opaque data */
1133 					info->oid.sec_oid4_len =
1134 					    si->sc_gss_mech_type->length;
1135 					info->oid.sec_oid4_val = kmem_alloc(
1136 					    si->sc_gss_mech_type->length,
1137 					    KM_SLEEP);
1138 					bcopy(si->sc_gss_mech_type->elements,
1139 					    info->oid.sec_oid4_val,
1140 					    info->oid.sec_oid4_len);
1141 				}
1142 				k++;
1143 			}
1144 			if (k >= ret_cnt)
1145 				break;
1146 		}
1147 		resp->SECINFO4resok_len = ret_cnt;
1148 		resp->SECINFO4resok_val = resok_val;
1149 		kmem_free(flavor_list, count * sizeof (int));
1150 	}
1151 	rw_exit(&exported_lock);
1152 	if (oexi)
1153 		exi_rele(oexi);
1154 	VN_RELE(vp);
1155 	return (NFS4_OK);
1156 }
1157 
1158 /*
1159  * SECINFO (Operation 33): Obtain required security information on
1160  * the component name in the format of (security-mechanism-oid, qop, service)
1161  * triplets.
1162  */
1163 /* ARGSUSED */
1164 static void
1165 rfs4_op_secinfo(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1166     struct compound_state *cs)
1167 {
1168 	SECINFO4args *args = &argop->nfs_argop4_u.opsecinfo;
1169 	SECINFO4res *resp = &resop->nfs_resop4_u.opsecinfo;
1170 	utf8string *utfnm = &args->name;
1171 	uint_t len;
1172 	char *nm;
1173 	struct sockaddr *ca;
1174 	char *name = NULL;
1175 	nfsstat4 status = NFS4_OK;
1176 
1177 	DTRACE_NFSV4_2(op__secinfo__start, struct compound_state *, cs,
1178 	    SECINFO4args *, args);
1179 
1180 	/*
1181 	 * Current file handle (cfh) should have been set before getting
1182 	 * into this function. If not, return error.
1183 	 */
1184 	if (cs->vp == NULL) {
1185 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
1186 		goto out;
1187 	}
1188 
1189 	if (cs->vp->v_type != VDIR) {
1190 		*cs->statusp = resp->status = NFS4ERR_NOTDIR;
1191 		goto out;
1192 	}
1193 
1194 	/*
1195 	 * Verify the component name. If failed, error out, but
1196 	 * do not error out if the component name is a "..".
1197 	 * SECINFO will return its parents secinfo data for SECINFO "..".
1198 	 */
1199 	status = utf8_dir_verify(utfnm);
1200 	if (status != NFS4_OK) {
1201 		if (utfnm->utf8string_len != 2 ||
1202 		    utfnm->utf8string_val[0] != '.' ||
1203 		    utfnm->utf8string_val[1] != '.') {
1204 			*cs->statusp = resp->status = status;
1205 			goto out;
1206 		}
1207 	}
1208 
1209 	nm = utf8_to_str(utfnm, &len, NULL);
1210 	if (nm == NULL) {
1211 		*cs->statusp = resp->status = NFS4ERR_INVAL;
1212 		goto out;
1213 	}
1214 
1215 	if (len > MAXNAMELEN) {
1216 		*cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
1217 		kmem_free(nm, len);
1218 		goto out;
1219 	}
1220 
1221 	ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
1222 	name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
1223 	    MAXPATHLEN  + 1);
1224 
1225 	if (name == NULL) {
1226 		*cs->statusp = resp->status = NFS4ERR_INVAL;
1227 		kmem_free(nm, len);
1228 		goto out;
1229 	}
1230 
1231 
1232 	*cs->statusp = resp->status = do_rfs4_op_secinfo(cs, name, resp);
1233 
1234 	if (name != nm)
1235 		kmem_free(name, MAXPATHLEN + 1);
1236 	kmem_free(nm, len);
1237 
1238 out:
1239 	DTRACE_NFSV4_2(op__secinfo__done, struct compound_state *, cs,
1240 	    SECINFO4res *, resp);
1241 }
1242 
1243 /*
1244  * Free SECINFO result.
1245  */
1246 /* ARGSUSED */
1247 static void
1248 rfs4_op_secinfo_free(nfs_resop4 *resop)
1249 {
1250 	SECINFO4res *resp = &resop->nfs_resop4_u.opsecinfo;
1251 	int count, i;
1252 	secinfo4 *resok_val;
1253 
1254 	/* If this is not an Ok result, nothing to free. */
1255 	if (resp->status != NFS4_OK) {
1256 		return;
1257 	}
1258 
1259 	count = resp->SECINFO4resok_len;
1260 	resok_val = resp->SECINFO4resok_val;
1261 
1262 	for (i = 0; i < count; i++) {
1263 		if (resok_val[i].flavor == RPCSEC_GSS) {
1264 			rpcsec_gss_info *info;
1265 
1266 			info = &resok_val[i].flavor_info;
1267 			kmem_free(info->oid.sec_oid4_val,
1268 			    info->oid.sec_oid4_len);
1269 		}
1270 	}
1271 	kmem_free(resok_val, count * sizeof (secinfo4));
1272 	resp->SECINFO4resok_len = 0;
1273 	resp->SECINFO4resok_val = NULL;
1274 }
1275 
1276 /* ARGSUSED */
1277 static void
1278 rfs4_op_access(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1279     struct compound_state *cs)
1280 {
1281 	ACCESS4args *args = &argop->nfs_argop4_u.opaccess;
1282 	ACCESS4res *resp = &resop->nfs_resop4_u.opaccess;
1283 	int error;
1284 	vnode_t *vp;
1285 	struct vattr va;
1286 	int checkwriteperm;
1287 	cred_t *cr = cs->cr;
1288 	bslabel_t *clabel, *slabel;
1289 	ts_label_t *tslabel;
1290 	boolean_t admin_low_client;
1291 
1292 	DTRACE_NFSV4_2(op__access__start, struct compound_state *, cs,
1293 	    ACCESS4args *, args);
1294 
1295 #if 0	/* XXX allow access even if !cs->access. Eventually only pseudo fs */
1296 	if (cs->access == CS_ACCESS_DENIED) {
1297 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
1298 		goto out;
1299 	}
1300 #endif
1301 	if (cs->vp == NULL) {
1302 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
1303 		goto out;
1304 	}
1305 
1306 	ASSERT(cr != NULL);
1307 
1308 	vp = cs->vp;
1309 
1310 	/*
1311 	 * If the file system is exported read only, it is not appropriate
1312 	 * to check write permissions for regular files and directories.
1313 	 * Special files are interpreted by the client, so the underlying
1314 	 * permissions are sent back to the client for interpretation.
1315 	 */
1316 	if (rdonly4(req, cs) &&
1317 	    (vp->v_type == VREG || vp->v_type == VDIR))
1318 		checkwriteperm = 0;
1319 	else
1320 		checkwriteperm = 1;
1321 
1322 	/*
1323 	 * XXX
1324 	 * We need the mode so that we can correctly determine access
1325 	 * permissions relative to a mandatory lock file.  Access to
1326 	 * mandatory lock files is denied on the server, so it might
1327 	 * as well be reflected to the server during the open.
1328 	 */
1329 	va.va_mask = AT_MODE;
1330 	error = VOP_GETATTR(vp, &va, 0, cr, NULL);
1331 	if (error) {
1332 		*cs->statusp = resp->status = puterrno4(error);
1333 		goto out;
1334 	}
1335 	resp->access = 0;
1336 	resp->supported = 0;
1337 
1338 	if (is_system_labeled()) {
1339 		ASSERT(req->rq_label != NULL);
1340 		clabel = req->rq_label;
1341 		DTRACE_PROBE2(tx__rfs4__log__info__opaccess__clabel, char *,
1342 		    "got client label from request(1)",
1343 		    struct svc_req *, req);
1344 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
1345 			if ((tslabel = nfs_getflabel(vp, cs->exi)) == NULL) {
1346 				*cs->statusp = resp->status = puterrno4(EACCES);
1347 				goto out;
1348 			}
1349 			slabel = label2bslabel(tslabel);
1350 			DTRACE_PROBE3(tx__rfs4__log__info__opaccess__slabel,
1351 			    char *, "got server label(1) for vp(2)",
1352 			    bslabel_t *, slabel, vnode_t *, vp);
1353 
1354 			admin_low_client = B_FALSE;
1355 		} else
1356 			admin_low_client = B_TRUE;
1357 	}
1358 
1359 	if (args->access & ACCESS4_READ) {
1360 		error = VOP_ACCESS(vp, VREAD, 0, cr, NULL);
1361 		if (!error && !MANDLOCK(vp, va.va_mode) &&
1362 		    (!is_system_labeled() || admin_low_client ||
1363 		    bldominates(clabel, slabel)))
1364 			resp->access |= ACCESS4_READ;
1365 		resp->supported |= ACCESS4_READ;
1366 	}
1367 	if ((args->access & ACCESS4_LOOKUP) && vp->v_type == VDIR) {
1368 		error = VOP_ACCESS(vp, VEXEC, 0, cr, NULL);
1369 		if (!error && (!is_system_labeled() || admin_low_client ||
1370 		    bldominates(clabel, slabel)))
1371 			resp->access |= ACCESS4_LOOKUP;
1372 		resp->supported |= ACCESS4_LOOKUP;
1373 	}
1374 	if (checkwriteperm &&
1375 	    (args->access & (ACCESS4_MODIFY|ACCESS4_EXTEND))) {
1376 		error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL);
1377 		if (!error && !MANDLOCK(vp, va.va_mode) &&
1378 		    (!is_system_labeled() || admin_low_client ||
1379 		    blequal(clabel, slabel)))
1380 			resp->access |=
1381 			    (args->access & (ACCESS4_MODIFY | ACCESS4_EXTEND));
1382 		resp->supported |=
1383 		    resp->access & (ACCESS4_MODIFY | ACCESS4_EXTEND);
1384 	}
1385 
1386 	if (checkwriteperm &&
1387 	    (args->access & ACCESS4_DELETE) && vp->v_type == VDIR) {
1388 		error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL);
1389 		if (!error && (!is_system_labeled() || admin_low_client ||
1390 		    blequal(clabel, slabel)))
1391 			resp->access |= ACCESS4_DELETE;
1392 		resp->supported |= ACCESS4_DELETE;
1393 	}
1394 	if (args->access & ACCESS4_EXECUTE && vp->v_type != VDIR) {
1395 		error = VOP_ACCESS(vp, VEXEC, 0, cr, NULL);
1396 		if (!error && !MANDLOCK(vp, va.va_mode) &&
1397 		    (!is_system_labeled() || admin_low_client ||
1398 		    bldominates(clabel, slabel)))
1399 			resp->access |= ACCESS4_EXECUTE;
1400 		resp->supported |= ACCESS4_EXECUTE;
1401 	}
1402 
1403 	if (is_system_labeled() && !admin_low_client)
1404 		label_rele(tslabel);
1405 
1406 	*cs->statusp = resp->status = NFS4_OK;
1407 out:
1408 	DTRACE_NFSV4_2(op__access__done, struct compound_state *, cs,
1409 	    ACCESS4res *, resp);
1410 }
1411 
1412 /* ARGSUSED */
1413 static void
1414 rfs4_op_commit(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1415     struct compound_state *cs)
1416 {
1417 	COMMIT4args *args = &argop->nfs_argop4_u.opcommit;
1418 	COMMIT4res *resp = &resop->nfs_resop4_u.opcommit;
1419 	int error;
1420 	vnode_t *vp = cs->vp;
1421 	cred_t *cr = cs->cr;
1422 	vattr_t va;
1423 
1424 	DTRACE_NFSV4_2(op__commit__start, struct compound_state *, cs,
1425 	    COMMIT4args *, args);
1426 
1427 	if (vp == NULL) {
1428 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
1429 		goto out;
1430 	}
1431 	if (cs->access == CS_ACCESS_DENIED) {
1432 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
1433 		goto out;
1434 	}
1435 
1436 	if (args->offset + args->count < args->offset) {
1437 		*cs->statusp = resp->status = NFS4ERR_INVAL;
1438 		goto out;
1439 	}
1440 
1441 	va.va_mask = AT_UID;
1442 	error = VOP_GETATTR(vp, &va, 0, cr, NULL);
1443 
1444 	/*
1445 	 * If we can't get the attributes, then we can't do the
1446 	 * right access checking.  So, we'll fail the request.
1447 	 */
1448 	if (error) {
1449 		*cs->statusp = resp->status = puterrno4(error);
1450 		goto out;
1451 	}
1452 	if (rdonly4(req, cs)) {
1453 		*cs->statusp = resp->status = NFS4ERR_ROFS;
1454 		goto out;
1455 	}
1456 
1457 	if (vp->v_type != VREG) {
1458 		if (vp->v_type == VDIR)
1459 			resp->status = NFS4ERR_ISDIR;
1460 		else
1461 			resp->status = NFS4ERR_INVAL;
1462 		*cs->statusp = resp->status;
1463 		goto out;
1464 	}
1465 
1466 	if (crgetuid(cr) != va.va_uid &&
1467 	    (error = VOP_ACCESS(vp, VWRITE, 0, cs->cr, NULL))) {
1468 		*cs->statusp = resp->status = puterrno4(error);
1469 		goto out;
1470 	}
1471 
1472 	error = VOP_FSYNC(vp, FSYNC, cr, NULL);
1473 
1474 	if (error) {
1475 		*cs->statusp = resp->status = puterrno4(error);
1476 		goto out;
1477 	}
1478 
1479 	*cs->statusp = resp->status = NFS4_OK;
1480 	resp->writeverf = Write4verf;
1481 out:
1482 	DTRACE_NFSV4_2(op__commit__done, struct compound_state *, cs,
1483 	    COMMIT4res *, resp);
1484 }
1485 
1486 /*
1487  * rfs4_op_mknod is called from rfs4_op_create after all initial verification
1488  * was completed. It does the nfsv4 create for special files.
1489  */
1490 /* ARGSUSED */
1491 static vnode_t *
1492 do_rfs4_op_mknod(CREATE4args *args, CREATE4res *resp, struct svc_req *req,
1493     struct compound_state *cs, vattr_t *vap, char *nm)
1494 {
1495 	int error;
1496 	cred_t *cr = cs->cr;
1497 	vnode_t *dvp = cs->vp;
1498 	vnode_t *vp = NULL;
1499 	int mode;
1500 	enum vcexcl excl;
1501 
1502 	switch (args->type) {
1503 	case NF4CHR:
1504 	case NF4BLK:
1505 		if (secpolicy_sys_devices(cr) != 0) {
1506 			*cs->statusp = resp->status = NFS4ERR_PERM;
1507 			return (NULL);
1508 		}
1509 		if (args->type == NF4CHR)
1510 			vap->va_type = VCHR;
1511 		else
1512 			vap->va_type = VBLK;
1513 		vap->va_rdev = makedevice(args->ftype4_u.devdata.specdata1,
1514 		    args->ftype4_u.devdata.specdata2);
1515 		vap->va_mask |= AT_RDEV;
1516 		break;
1517 	case NF4SOCK:
1518 		vap->va_type = VSOCK;
1519 		break;
1520 	case NF4FIFO:
1521 		vap->va_type = VFIFO;
1522 		break;
1523 	default:
1524 		*cs->statusp = resp->status = NFS4ERR_BADTYPE;
1525 		return (NULL);
1526 	}
1527 
1528 	/*
1529 	 * Must specify the mode.
1530 	 */
1531 	if (!(vap->va_mask & AT_MODE)) {
1532 		*cs->statusp = resp->status = NFS4ERR_INVAL;
1533 		return (NULL);
1534 	}
1535 
1536 	excl = EXCL;
1537 
1538 	mode = 0;
1539 
1540 	error = VOP_CREATE(dvp, nm, vap, excl, mode, &vp, cr, 0, NULL, NULL);
1541 	if (error) {
1542 		*cs->statusp = resp->status = puterrno4(error);
1543 		return (NULL);
1544 	}
1545 	return (vp);
1546 }
1547 
1548 /*
1549  * nfsv4 create is used to create non-regular files. For regular files,
1550  * use nfsv4 open.
1551  */
1552 /* ARGSUSED */
1553 static void
1554 rfs4_op_create(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1555     struct compound_state *cs)
1556 {
1557 	CREATE4args *args = &argop->nfs_argop4_u.opcreate;
1558 	CREATE4res *resp = &resop->nfs_resop4_u.opcreate;
1559 	int error;
1560 	struct vattr bva, iva, iva2, ava, *vap;
1561 	cred_t *cr = cs->cr;
1562 	vnode_t *dvp = cs->vp;
1563 	vnode_t *vp = NULL;
1564 	vnode_t *realvp;
1565 	char *nm, *lnm;
1566 	uint_t len, llen;
1567 	int syncval = 0;
1568 	struct nfs4_svgetit_arg sarg;
1569 	struct nfs4_ntov_table ntov;
1570 	struct statvfs64 sb;
1571 	nfsstat4 status;
1572 	struct sockaddr *ca;
1573 	char *name = NULL;
1574 	char *lname = NULL;
1575 
1576 	DTRACE_NFSV4_2(op__create__start, struct compound_state *, cs,
1577 	    CREATE4args *, args);
1578 
1579 	resp->attrset = 0;
1580 
1581 	if (dvp == NULL) {
1582 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
1583 		goto out;
1584 	}
1585 
1586 	/*
1587 	 * If there is an unshared filesystem mounted on this vnode,
1588 	 * do not allow to create an object in this directory.
1589 	 */
1590 	if (vn_ismntpt(dvp)) {
1591 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
1592 		goto out;
1593 	}
1594 
1595 	/* Verify that type is correct */
1596 	switch (args->type) {
1597 	case NF4LNK:
1598 	case NF4BLK:
1599 	case NF4CHR:
1600 	case NF4SOCK:
1601 	case NF4FIFO:
1602 	case NF4DIR:
1603 		break;
1604 	default:
1605 		*cs->statusp = resp->status = NFS4ERR_BADTYPE;
1606 		goto out;
1607 	};
1608 
1609 	if (cs->access == CS_ACCESS_DENIED) {
1610 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
1611 		goto out;
1612 	}
1613 	if (dvp->v_type != VDIR) {
1614 		*cs->statusp = resp->status = NFS4ERR_NOTDIR;
1615 		goto out;
1616 	}
1617 	status = utf8_dir_verify(&args->objname);
1618 	if (status != NFS4_OK) {
1619 		*cs->statusp = resp->status = status;
1620 		goto out;
1621 	}
1622 
1623 	if (rdonly4(req, cs)) {
1624 		*cs->statusp = resp->status = NFS4ERR_ROFS;
1625 		goto out;
1626 	}
1627 
1628 	/*
1629 	 * Name of newly created object
1630 	 */
1631 	nm = utf8_to_fn(&args->objname, &len, NULL);
1632 	if (nm == NULL) {
1633 		*cs->statusp = resp->status = NFS4ERR_INVAL;
1634 		goto out;
1635 	}
1636 
1637 	if (len > MAXNAMELEN) {
1638 		*cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
1639 		kmem_free(nm, len);
1640 		goto out;
1641 	}
1642 
1643 	ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
1644 	name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
1645 	    MAXPATHLEN  + 1);
1646 
1647 	if (name == NULL) {
1648 		*cs->statusp = resp->status = NFS4ERR_INVAL;
1649 		kmem_free(nm, len);
1650 		goto out;
1651 	}
1652 
1653 	resp->attrset = 0;
1654 
1655 	sarg.sbp = &sb;
1656 	sarg.is_referral = B_FALSE;
1657 	nfs4_ntov_table_init(&ntov);
1658 
1659 	status = do_rfs4_set_attrs(&resp->attrset,
1660 	    &args->createattrs, cs, &sarg, &ntov, NFS4ATTR_SETIT);
1661 
1662 	if (sarg.vap->va_mask == 0 && status == NFS4_OK)
1663 		status = NFS4ERR_INVAL;
1664 
1665 	if (status != NFS4_OK) {
1666 		*cs->statusp = resp->status = status;
1667 		if (name != nm)
1668 			kmem_free(name, MAXPATHLEN + 1);
1669 		kmem_free(nm, len);
1670 		nfs4_ntov_table_free(&ntov, &sarg);
1671 		resp->attrset = 0;
1672 		goto out;
1673 	}
1674 
1675 	/* Get "before" change value */
1676 	bva.va_mask = AT_CTIME|AT_SEQ|AT_MODE;
1677 	error = VOP_GETATTR(dvp, &bva, 0, cr, NULL);
1678 	if (error) {
1679 		*cs->statusp = resp->status = puterrno4(error);
1680 		if (name != nm)
1681 			kmem_free(name, MAXPATHLEN + 1);
1682 		kmem_free(nm, len);
1683 		nfs4_ntov_table_free(&ntov, &sarg);
1684 		resp->attrset = 0;
1685 		goto out;
1686 	}
1687 	NFS4_SET_FATTR4_CHANGE(resp->cinfo.before, bva.va_ctime)
1688 
1689 	vap = sarg.vap;
1690 
1691 	/*
1692 	 * Set the default initial values for attributes when the parent
1693 	 * directory does not have the VSUID/VSGID bit set and they have
1694 	 * not been specified in createattrs.
1695 	 */
1696 	if (!(bva.va_mode & VSUID) && (vap->va_mask & AT_UID) == 0) {
1697 		vap->va_uid = crgetuid(cr);
1698 		vap->va_mask |= AT_UID;
1699 	}
1700 	if (!(bva.va_mode & VSGID) && (vap->va_mask & AT_GID) == 0) {
1701 		vap->va_gid = crgetgid(cr);
1702 		vap->va_mask |= AT_GID;
1703 	}
1704 
1705 	vap->va_mask |= AT_TYPE;
1706 	switch (args->type) {
1707 	case NF4DIR:
1708 		vap->va_type = VDIR;
1709 		if ((vap->va_mask & AT_MODE) == 0) {
1710 			vap->va_mode = 0700;	/* default: owner rwx only */
1711 			vap->va_mask |= AT_MODE;
1712 		}
1713 		error = VOP_MKDIR(dvp, name, vap, &vp, cr, NULL, 0, NULL);
1714 		if (error)
1715 			break;
1716 
1717 		/*
1718 		 * Get the initial "after" sequence number, if it fails,
1719 		 * set to zero
1720 		 */
1721 		iva.va_mask = AT_SEQ;
1722 		if (VOP_GETATTR(dvp, &iva, 0, cs->cr, NULL))
1723 			iva.va_seq = 0;
1724 		break;
1725 	case NF4LNK:
1726 		vap->va_type = VLNK;
1727 		if ((vap->va_mask & AT_MODE) == 0) {
1728 			vap->va_mode = 0700;	/* default: owner rwx only */
1729 			vap->va_mask |= AT_MODE;
1730 		}
1731 
1732 		/*
1733 		 * symlink names must be treated as data
1734 		 */
1735 		lnm = utf8_to_str((utf8string *)&args->ftype4_u.linkdata,
1736 		    &llen, NULL);
1737 
1738 		if (lnm == NULL) {
1739 			*cs->statusp = resp->status = NFS4ERR_INVAL;
1740 			if (name != nm)
1741 				kmem_free(name, MAXPATHLEN + 1);
1742 			kmem_free(nm, len);
1743 			nfs4_ntov_table_free(&ntov, &sarg);
1744 			resp->attrset = 0;
1745 			goto out;
1746 		}
1747 
1748 		if (llen > MAXPATHLEN) {
1749 			*cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
1750 			if (name != nm)
1751 				kmem_free(name, MAXPATHLEN + 1);
1752 			kmem_free(nm, len);
1753 			kmem_free(lnm, llen);
1754 			nfs4_ntov_table_free(&ntov, &sarg);
1755 			resp->attrset = 0;
1756 			goto out;
1757 		}
1758 
1759 		lname = nfscmd_convname(ca, cs->exi, lnm,
1760 		    NFSCMD_CONV_INBOUND, MAXPATHLEN  + 1);
1761 
1762 		if (lname == NULL) {
1763 			*cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
1764 			if (name != nm)
1765 				kmem_free(name, MAXPATHLEN + 1);
1766 			kmem_free(nm, len);
1767 			kmem_free(lnm, llen);
1768 			nfs4_ntov_table_free(&ntov, &sarg);
1769 			resp->attrset = 0;
1770 			goto out;
1771 		}
1772 
1773 		error = VOP_SYMLINK(dvp, name, vap, lname, cr, NULL, 0);
1774 		if (lname != lnm)
1775 			kmem_free(lname, MAXPATHLEN + 1);
1776 		kmem_free(lnm, llen);
1777 		if (error)
1778 			break;
1779 
1780 		/*
1781 		 * Get the initial "after" sequence number, if it fails,
1782 		 * set to zero
1783 		 */
1784 		iva.va_mask = AT_SEQ;
1785 		if (VOP_GETATTR(dvp, &iva, 0, cs->cr, NULL))
1786 			iva.va_seq = 0;
1787 
1788 		error = VOP_LOOKUP(dvp, name, &vp, NULL, 0, NULL, cr,
1789 		    NULL, NULL, NULL);
1790 		if (error)
1791 			break;
1792 
1793 		/*
1794 		 * va_seq is not safe over VOP calls, check it again
1795 		 * if it has changed zero out iva to force atomic = FALSE.
1796 		 */
1797 		iva2.va_mask = AT_SEQ;
1798 		if (VOP_GETATTR(dvp, &iva2, 0, cs->cr, NULL) ||
1799 		    iva2.va_seq != iva.va_seq)
1800 			iva.va_seq = 0;
1801 		break;
1802 	default:
1803 		/*
1804 		 * probably a special file.
1805 		 */
1806 		if ((vap->va_mask & AT_MODE) == 0) {
1807 			vap->va_mode = 0600;	/* default: owner rw only */
1808 			vap->va_mask |= AT_MODE;
1809 		}
1810 		syncval = FNODSYNC;
1811 		/*
1812 		 * We know this will only generate one VOP call
1813 		 */
1814 		vp = do_rfs4_op_mknod(args, resp, req, cs, vap, name);
1815 
1816 		if (vp == NULL) {
1817 			if (name != nm)
1818 				kmem_free(name, MAXPATHLEN + 1);
1819 			kmem_free(nm, len);
1820 			nfs4_ntov_table_free(&ntov, &sarg);
1821 			resp->attrset = 0;
1822 			goto out;
1823 		}
1824 
1825 		/*
1826 		 * Get the initial "after" sequence number, if it fails,
1827 		 * set to zero
1828 		 */
1829 		iva.va_mask = AT_SEQ;
1830 		if (VOP_GETATTR(dvp, &iva, 0, cs->cr, NULL))
1831 			iva.va_seq = 0;
1832 
1833 		break;
1834 	}
1835 	if (name != nm)
1836 		kmem_free(name, MAXPATHLEN + 1);
1837 	kmem_free(nm, len);
1838 
1839 	if (error) {
1840 		*cs->statusp = resp->status = puterrno4(error);
1841 	}
1842 
1843 	/*
1844 	 * Force modified data and metadata out to stable storage.
1845 	 */
1846 	(void) VOP_FSYNC(dvp, 0, cr, NULL);
1847 
1848 	if (resp->status != NFS4_OK) {
1849 		if (vp != NULL)
1850 			VN_RELE(vp);
1851 		nfs4_ntov_table_free(&ntov, &sarg);
1852 		resp->attrset = 0;
1853 		goto out;
1854 	}
1855 
1856 	/*
1857 	 * Finish setup of cinfo response, "before" value already set.
1858 	 * Get "after" change value, if it fails, simply return the
1859 	 * before value.
1860 	 */
1861 	ava.va_mask = AT_CTIME|AT_SEQ;
1862 	if (VOP_GETATTR(dvp, &ava, 0, cr, NULL)) {
1863 		ava.va_ctime = bva.va_ctime;
1864 		ava.va_seq = 0;
1865 	}
1866 	NFS4_SET_FATTR4_CHANGE(resp->cinfo.after, ava.va_ctime);
1867 
1868 	/*
1869 	 * True verification that object was created with correct
1870 	 * attrs is impossible.  The attrs could have been changed
1871 	 * immediately after object creation.  If attributes did
1872 	 * not verify, the only recourse for the server is to
1873 	 * destroy the object.  Maybe if some attrs (like gid)
1874 	 * are set incorrectly, the object should be destroyed;
1875 	 * however, seems bad as a default policy.  Do we really
1876 	 * want to destroy an object over one of the times not
1877 	 * verifying correctly?  For these reasons, the server
1878 	 * currently sets bits in attrset for createattrs
1879 	 * that were set; however, no verification is done.
1880 	 *
1881 	 * vmask_to_nmask accounts for vattr bits set on create
1882 	 *	[do_rfs4_set_attrs() only sets resp bits for
1883 	 *	 non-vattr/vfs bits.]
1884 	 * Mask off any bits set by default so as not to return
1885 	 * more attrset bits than were requested in createattrs
1886 	 */
1887 	nfs4_vmask_to_nmask(sarg.vap->va_mask, &resp->attrset);
1888 	resp->attrset &= args->createattrs.attrmask;
1889 	nfs4_ntov_table_free(&ntov, &sarg);
1890 
1891 	error = makefh4(&cs->fh, vp, cs->exi);
1892 	if (error) {
1893 		*cs->statusp = resp->status = puterrno4(error);
1894 	}
1895 
1896 	/*
1897 	 * The cinfo.atomic = TRUE only if we got no errors, we have
1898 	 * non-zero va_seq's, and it has incremented by exactly one
1899 	 * during the creation and it didn't change during the VOP_LOOKUP
1900 	 * or VOP_FSYNC.
1901 	 */
1902 	if (!error && bva.va_seq && iva.va_seq && ava.va_seq &&
1903 	    iva.va_seq == (bva.va_seq + 1) && iva.va_seq == ava.va_seq)
1904 		resp->cinfo.atomic = TRUE;
1905 	else
1906 		resp->cinfo.atomic = FALSE;
1907 
1908 	/*
1909 	 * Force modified metadata out to stable storage.
1910 	 *
1911 	 * if a underlying vp exists, pass it to VOP_FSYNC
1912 	 */
1913 	if (VOP_REALVP(vp, &realvp, NULL) == 0)
1914 		(void) VOP_FSYNC(realvp, syncval, cr, NULL);
1915 	else
1916 		(void) VOP_FSYNC(vp, syncval, cr, NULL);
1917 
1918 	if (resp->status != NFS4_OK) {
1919 		VN_RELE(vp);
1920 		goto out;
1921 	}
1922 	if (cs->vp)
1923 		VN_RELE(cs->vp);
1924 
1925 	cs->vp = vp;
1926 	*cs->statusp = resp->status = NFS4_OK;
1927 out:
1928 	DTRACE_NFSV4_2(op__create__done, struct compound_state *, cs,
1929 	    CREATE4res *, resp);
1930 }
1931 
1932 /*ARGSUSED*/
1933 static void
1934 rfs4_op_delegpurge(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1935     struct compound_state *cs)
1936 {
1937 	DTRACE_NFSV4_2(op__delegpurge__start, struct compound_state *, cs,
1938 	    DELEGPURGE4args *, &argop->nfs_argop4_u.opdelegpurge);
1939 
1940 	rfs4_op_inval(argop, resop, req, cs);
1941 
1942 	DTRACE_NFSV4_2(op__delegpurge__done, struct compound_state *, cs,
1943 	    DELEGPURGE4res *, &resop->nfs_resop4_u.opdelegpurge);
1944 }
1945 
1946 /*ARGSUSED*/
1947 static void
1948 rfs4_op_delegreturn(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1949     struct compound_state *cs)
1950 {
1951 	DELEGRETURN4args *args = &argop->nfs_argop4_u.opdelegreturn;
1952 	DELEGRETURN4res *resp = &resop->nfs_resop4_u.opdelegreturn;
1953 	rfs4_deleg_state_t *dsp;
1954 	nfsstat4 status;
1955 
1956 	DTRACE_NFSV4_2(op__delegreturn__start, struct compound_state *, cs,
1957 	    DELEGRETURN4args *, args);
1958 
1959 	status = rfs4_get_deleg_state(&args->deleg_stateid, &dsp);
1960 	resp->status = *cs->statusp = status;
1961 	if (status != NFS4_OK)
1962 		goto out;
1963 
1964 	/* Ensure specified filehandle matches */
1965 	if (cs->vp != dsp->rds_finfo->rf_vp) {
1966 		resp->status = *cs->statusp = NFS4ERR_BAD_STATEID;
1967 	} else
1968 		rfs4_return_deleg(dsp, FALSE);
1969 
1970 	rfs4_update_lease(dsp->rds_client);
1971 
1972 	rfs4_deleg_state_rele(dsp);
1973 out:
1974 	DTRACE_NFSV4_2(op__delegreturn__done, struct compound_state *, cs,
1975 	    DELEGRETURN4res *, resp);
1976 }
1977 
1978 /*
1979  * Check to see if a given "flavor" is an explicitly shared flavor.
1980  * The assumption of this routine is the "flavor" is already a valid
1981  * flavor in the secinfo list of "exi".
1982  *
1983  *	e.g.
1984  *		# share -o sec=flavor1 /export
1985  *		# share -o sec=flavor2 /export/home
1986  *
1987  *		flavor2 is not an explicitly shared flavor for /export,
1988  *		however it is in the secinfo list for /export thru the
1989  *		server namespace setup.
1990  */
1991 int
1992 is_exported_sec(int flavor, struct exportinfo *exi)
1993 {
1994 	int	i;
1995 	struct secinfo *sp;
1996 
1997 	sp = exi->exi_export.ex_secinfo;
1998 	for (i = 0; i < exi->exi_export.ex_seccnt; i++) {
1999 		if (flavor == sp[i].s_secinfo.sc_nfsnum ||
2000 		    sp[i].s_secinfo.sc_nfsnum == AUTH_NONE) {
2001 			return (SEC_REF_EXPORTED(&sp[i]));
2002 		}
2003 	}
2004 
2005 	/* Should not reach this point based on the assumption */
2006 	return (0);
2007 }
2008 
2009 /*
2010  * Check if the security flavor used in the request matches what is
2011  * required at the export point or at the root pseudo node (exi_root).
2012  *
2013  * returns 1 if there's a match or if exported with AUTH_NONE; 0 otherwise.
2014  *
2015  */
2016 static int
2017 secinfo_match_or_authnone(struct compound_state *cs)
2018 {
2019 	int	i;
2020 	struct secinfo *sp;
2021 
2022 	/*
2023 	 * Check cs->nfsflavor (from the request) against
2024 	 * the current export data in cs->exi.
2025 	 */
2026 	sp = cs->exi->exi_export.ex_secinfo;
2027 	for (i = 0; i < cs->exi->exi_export.ex_seccnt; i++) {
2028 		if (cs->nfsflavor == sp[i].s_secinfo.sc_nfsnum ||
2029 		    sp[i].s_secinfo.sc_nfsnum == AUTH_NONE)
2030 			return (1);
2031 	}
2032 
2033 	return (0);
2034 }
2035 
2036 /*
2037  * Check the access authority for the client and return the correct error.
2038  */
2039 nfsstat4
2040 call_checkauth4(struct compound_state *cs, struct svc_req *req)
2041 {
2042 	int	authres;
2043 
2044 	/*
2045 	 * First, check if the security flavor used in the request
2046 	 * are among the flavors set in the server namespace.
2047 	 */
2048 	if (!secinfo_match_or_authnone(cs)) {
2049 		*cs->statusp = NFS4ERR_WRONGSEC;
2050 		return (*cs->statusp);
2051 	}
2052 
2053 	authres = checkauth4(cs, req);
2054 
2055 	if (authres > 0) {
2056 		*cs->statusp = NFS4_OK;
2057 		if (! (cs->access & CS_ACCESS_LIMITED))
2058 			cs->access = CS_ACCESS_OK;
2059 	} else if (authres == 0) {
2060 		*cs->statusp = NFS4ERR_ACCESS;
2061 	} else if (authres == -2) {
2062 		*cs->statusp = NFS4ERR_WRONGSEC;
2063 	} else {
2064 		*cs->statusp = NFS4ERR_DELAY;
2065 	}
2066 	return (*cs->statusp);
2067 }
2068 
2069 /*
2070  * bitmap4_to_attrmask is called by getattr and readdir.
2071  * It sets up the vattr mask and determines whether vfsstat call is needed
2072  * based on the input bitmap.
2073  * Returns nfsv4 status.
2074  */
2075 static nfsstat4
2076 bitmap4_to_attrmask(bitmap4 breq, struct nfs4_svgetit_arg *sargp)
2077 {
2078 	int i;
2079 	uint_t	va_mask;
2080 	struct statvfs64 *sbp = sargp->sbp;
2081 
2082 	sargp->sbp = NULL;
2083 	sargp->flag = 0;
2084 	sargp->rdattr_error = NFS4_OK;
2085 	sargp->mntdfid_set = FALSE;
2086 	if (sargp->cs->vp)
2087 		sargp->xattr = get_fh4_flag(&sargp->cs->fh,
2088 		    FH4_ATTRDIR | FH4_NAMEDATTR);
2089 	else
2090 		sargp->xattr = 0;
2091 
2092 	/*
2093 	 * Set rdattr_error_req to true if return error per
2094 	 * failed entry rather than fail the readdir.
2095 	 */
2096 	if (breq & FATTR4_RDATTR_ERROR_MASK)
2097 		sargp->rdattr_error_req = 1;
2098 	else
2099 		sargp->rdattr_error_req = 0;
2100 
2101 	/*
2102 	 * generate the va_mask
2103 	 * Handle the easy cases first
2104 	 */
2105 	switch (breq) {
2106 	case NFS4_NTOV_ATTR_MASK:
2107 		sargp->vap->va_mask = NFS4_NTOV_ATTR_AT_MASK;
2108 		return (NFS4_OK);
2109 
2110 	case NFS4_FS_ATTR_MASK:
2111 		sargp->vap->va_mask = NFS4_FS_ATTR_AT_MASK;
2112 		sargp->sbp = sbp;
2113 		return (NFS4_OK);
2114 
2115 	case NFS4_NTOV_ATTR_CACHE_MASK:
2116 		sargp->vap->va_mask = NFS4_NTOV_ATTR_CACHE_AT_MASK;
2117 		return (NFS4_OK);
2118 
2119 	case FATTR4_LEASE_TIME_MASK:
2120 		sargp->vap->va_mask = 0;
2121 		return (NFS4_OK);
2122 
2123 	default:
2124 		va_mask = 0;
2125 		for (i = 0; i < nfs4_ntov_map_size; i++) {
2126 			if ((breq & nfs4_ntov_map[i].fbit) &&
2127 			    nfs4_ntov_map[i].vbit)
2128 				va_mask |= nfs4_ntov_map[i].vbit;
2129 		}
2130 
2131 		/*
2132 		 * Check is vfsstat is needed
2133 		 */
2134 		if (breq & NFS4_FS_ATTR_MASK)
2135 			sargp->sbp = sbp;
2136 
2137 		sargp->vap->va_mask = va_mask;
2138 		return (NFS4_OK);
2139 	}
2140 	/* NOTREACHED */
2141 }
2142 
2143 /*
2144  * bitmap4_get_sysattrs is called by getattr and readdir.
2145  * It calls both VOP_GETATTR and VFS_STATVFS calls to get the attrs.
2146  * Returns nfsv4 status.
2147  */
2148 static nfsstat4
2149 bitmap4_get_sysattrs(struct nfs4_svgetit_arg *sargp)
2150 {
2151 	int error;
2152 	struct compound_state *cs = sargp->cs;
2153 	vnode_t *vp = cs->vp;
2154 
2155 	if (sargp->sbp != NULL) {
2156 		if (error = VFS_STATVFS(vp->v_vfsp, sargp->sbp)) {
2157 			sargp->sbp = NULL;	/* to identify error */
2158 			return (puterrno4(error));
2159 		}
2160 	}
2161 
2162 	return (rfs4_vop_getattr(vp, sargp->vap, 0, cs->cr));
2163 }
2164 
2165 static void
2166 nfs4_ntov_table_init(struct nfs4_ntov_table *ntovp)
2167 {
2168 	ntovp->na = kmem_zalloc(sizeof (union nfs4_attr_u) * nfs4_ntov_map_size,
2169 	    KM_SLEEP);
2170 	ntovp->attrcnt = 0;
2171 	ntovp->vfsstat = FALSE;
2172 }
2173 
2174 static void
2175 nfs4_ntov_table_free(struct nfs4_ntov_table *ntovp,
2176     struct nfs4_svgetit_arg *sargp)
2177 {
2178 	int i;
2179 	union nfs4_attr_u *na;
2180 	uint8_t *amap;
2181 
2182 	/*
2183 	 * XXX Should do the same checks for whether the bit is set
2184 	 */
2185 	for (i = 0, na = ntovp->na, amap = ntovp->amap;
2186 	    i < ntovp->attrcnt; i++, na++, amap++) {
2187 		(void) (*nfs4_ntov_map[*amap].sv_getit)(
2188 		    NFS4ATTR_FREEIT, sargp, na);
2189 	}
2190 	if ((sargp->op == NFS4ATTR_SETIT) || (sargp->op == NFS4ATTR_VERIT)) {
2191 		/*
2192 		 * xdr_free for getattr will be done later
2193 		 */
2194 		for (i = 0, na = ntovp->na, amap = ntovp->amap;
2195 		    i < ntovp->attrcnt; i++, na++, amap++) {
2196 			xdr_free(nfs4_ntov_map[*amap].xfunc, (caddr_t)na);
2197 		}
2198 	}
2199 	kmem_free(ntovp->na, sizeof (union nfs4_attr_u) * nfs4_ntov_map_size);
2200 }
2201 
2202 /*
2203  * do_rfs4_op_getattr gets the system attrs and converts into fattr4.
2204  */
2205 static nfsstat4
2206 do_rfs4_op_getattr(bitmap4 breq, fattr4 *fattrp,
2207     struct nfs4_svgetit_arg *sargp)
2208 {
2209 	int error = 0;
2210 	int i, k;
2211 	struct nfs4_ntov_table ntov;
2212 	XDR xdr;
2213 	ulong_t xdr_size;
2214 	char *xdr_attrs;
2215 	nfsstat4 status = NFS4_OK;
2216 	nfsstat4 prev_rdattr_error = sargp->rdattr_error;
2217 	union nfs4_attr_u *na;
2218 	uint8_t *amap;
2219 
2220 	sargp->op = NFS4ATTR_GETIT;
2221 	sargp->flag = 0;
2222 
2223 	fattrp->attrmask = 0;
2224 	/* if no bits requested, then return empty fattr4 */
2225 	if (breq == 0) {
2226 		fattrp->attrlist4_len = 0;
2227 		fattrp->attrlist4 = NULL;
2228 		return (NFS4_OK);
2229 	}
2230 
2231 	/*
2232 	 * return NFS4ERR_INVAL when client requests write-only attrs
2233 	 */
2234 	if (breq & (FATTR4_TIME_ACCESS_SET_MASK | FATTR4_TIME_MODIFY_SET_MASK))
2235 		return (NFS4ERR_INVAL);
2236 
2237 	nfs4_ntov_table_init(&ntov);
2238 	na = ntov.na;
2239 	amap = ntov.amap;
2240 
2241 	/*
2242 	 * Now loop to get or verify the attrs
2243 	 */
2244 	for (i = 0; i < nfs4_ntov_map_size; i++) {
2245 		if (breq & nfs4_ntov_map[i].fbit) {
2246 			if ((*nfs4_ntov_map[i].sv_getit)(
2247 			    NFS4ATTR_SUPPORTED, sargp, NULL) == 0) {
2248 
2249 				error = (*nfs4_ntov_map[i].sv_getit)(
2250 				    NFS4ATTR_GETIT, sargp, na);
2251 
2252 				/*
2253 				 * Possible error values:
2254 				 * >0 if sv_getit failed to
2255 				 * get the attr; 0 if succeeded;
2256 				 * <0 if rdattr_error and the
2257 				 * attribute cannot be returned.
2258 				 */
2259 				if (error && !(sargp->rdattr_error_req))
2260 					goto done;
2261 				/*
2262 				 * If error then just for entry
2263 				 */
2264 				if (error == 0) {
2265 					fattrp->attrmask |=
2266 					    nfs4_ntov_map[i].fbit;
2267 					*amap++ =
2268 					    (uint8_t)nfs4_ntov_map[i].nval;
2269 					na++;
2270 					(ntov.attrcnt)++;
2271 				} else if ((error > 0) &&
2272 				    (sargp->rdattr_error == NFS4_OK)) {
2273 					sargp->rdattr_error = puterrno4(error);
2274 				}
2275 				error = 0;
2276 			}
2277 		}
2278 	}
2279 
2280 	/*
2281 	 * If rdattr_error was set after the return value for it was assigned,
2282 	 * update it.
2283 	 */
2284 	if (prev_rdattr_error != sargp->rdattr_error) {
2285 		na = ntov.na;
2286 		amap = ntov.amap;
2287 		for (i = 0; i < ntov.attrcnt; i++, na++, amap++) {
2288 			k = *amap;
2289 			if (k < FATTR4_RDATTR_ERROR) {
2290 				continue;
2291 			}
2292 			if ((k == FATTR4_RDATTR_ERROR) &&
2293 			    ((*nfs4_ntov_map[k].sv_getit)(
2294 			    NFS4ATTR_SUPPORTED, sargp, NULL) == 0)) {
2295 
2296 				(void) (*nfs4_ntov_map[k].sv_getit)(
2297 				    NFS4ATTR_GETIT, sargp, na);
2298 			}
2299 			break;
2300 		}
2301 	}
2302 
2303 	xdr_size = 0;
2304 	na = ntov.na;
2305 	amap = ntov.amap;
2306 	for (i = 0; i < ntov.attrcnt; i++, na++, amap++) {
2307 		xdr_size += xdr_sizeof(nfs4_ntov_map[*amap].xfunc, na);
2308 	}
2309 
2310 	fattrp->attrlist4_len = xdr_size;
2311 	if (xdr_size) {
2312 		/* freed by rfs4_op_getattr_free() */
2313 		fattrp->attrlist4 = xdr_attrs = kmem_zalloc(xdr_size, KM_SLEEP);
2314 
2315 		xdrmem_create(&xdr, xdr_attrs, xdr_size, XDR_ENCODE);
2316 
2317 		na = ntov.na;
2318 		amap = ntov.amap;
2319 		for (i = 0; i < ntov.attrcnt; i++, na++, amap++) {
2320 			if (!(*nfs4_ntov_map[*amap].xfunc)(&xdr, na)) {
2321 				DTRACE_PROBE1(nfss__e__getattr4_encfail,
2322 				    int, *amap);
2323 				status = NFS4ERR_SERVERFAULT;
2324 				break;
2325 			}
2326 		}
2327 		/* xdrmem_destroy(&xdrs); */	/* NO-OP */
2328 	} else {
2329 		fattrp->attrlist4 = NULL;
2330 	}
2331 done:
2332 
2333 	nfs4_ntov_table_free(&ntov, sargp);
2334 
2335 	if (error != 0)
2336 		status = puterrno4(error);
2337 
2338 	return (status);
2339 }
2340 
2341 /* ARGSUSED */
2342 static void
2343 rfs4_op_getattr(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2344     struct compound_state *cs)
2345 {
2346 	GETATTR4args *args = &argop->nfs_argop4_u.opgetattr;
2347 	GETATTR4res *resp = &resop->nfs_resop4_u.opgetattr;
2348 	struct nfs4_svgetit_arg sarg;
2349 	struct statvfs64 sb;
2350 	nfsstat4 status;
2351 
2352 	DTRACE_NFSV4_2(op__getattr__start, struct compound_state *, cs,
2353 	    GETATTR4args *, args);
2354 
2355 	if (cs->vp == NULL) {
2356 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2357 		goto out;
2358 	}
2359 
2360 	if (cs->access == CS_ACCESS_DENIED) {
2361 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
2362 		goto out;
2363 	}
2364 
2365 	sarg.sbp = &sb;
2366 	sarg.cs = cs;
2367 	sarg.is_referral = B_FALSE;
2368 
2369 	status = bitmap4_to_attrmask(args->attr_request, &sarg);
2370 	if (status == NFS4_OK) {
2371 
2372 		status = bitmap4_get_sysattrs(&sarg);
2373 		if (status == NFS4_OK) {
2374 
2375 			/* Is this a referral? */
2376 			if (vn_is_nfs_reparse(cs->vp, cs->cr)) {
2377 				/* Older V4 Solaris client sees a link */
2378 				if (client_is_downrev(req))
2379 					sarg.vap->va_type = VLNK;
2380 				else
2381 					sarg.is_referral = B_TRUE;
2382 			}
2383 
2384 			status = do_rfs4_op_getattr(args->attr_request,
2385 			    &resp->obj_attributes, &sarg);
2386 		}
2387 	}
2388 	*cs->statusp = resp->status = status;
2389 out:
2390 	DTRACE_NFSV4_2(op__getattr__done, struct compound_state *, cs,
2391 	    GETATTR4res *, resp);
2392 }
2393 
2394 static void
2395 rfs4_op_getattr_free(nfs_resop4 *resop)
2396 {
2397 	GETATTR4res *resp = &resop->nfs_resop4_u.opgetattr;
2398 
2399 	nfs4_fattr4_free(&resp->obj_attributes);
2400 }
2401 
2402 /* ARGSUSED */
2403 static void
2404 rfs4_op_getfh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2405     struct compound_state *cs)
2406 {
2407 	GETFH4res *resp = &resop->nfs_resop4_u.opgetfh;
2408 
2409 	DTRACE_NFSV4_1(op__getfh__start, struct compound_state *, cs);
2410 
2411 	if (cs->vp == NULL) {
2412 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2413 		goto out;
2414 	}
2415 	if (cs->access == CS_ACCESS_DENIED) {
2416 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
2417 		goto out;
2418 	}
2419 
2420 	/* check for reparse point at the share point */
2421 	if (cs->exi->exi_moved || vn_is_nfs_reparse(cs->exi->exi_vp, cs->cr)) {
2422 		/* it's all bad */
2423 		cs->exi->exi_moved = 1;
2424 		*cs->statusp = resp->status = NFS4ERR_MOVED;
2425 		DTRACE_PROBE2(nfs4serv__func__referral__shared__moved,
2426 		    vnode_t *, cs->vp, char *, "rfs4_op_getfh");
2427 		return;
2428 	}
2429 
2430 	/* check for reparse point at vp */
2431 	if (vn_is_nfs_reparse(cs->vp, cs->cr) && !client_is_downrev(req)) {
2432 		/* it's not all bad */
2433 		*cs->statusp = resp->status = NFS4ERR_MOVED;
2434 		DTRACE_PROBE2(nfs4serv__func__referral__moved,
2435 		    vnode_t *, cs->vp, char *, "rfs4_op_getfh");
2436 		return;
2437 	}
2438 
2439 	resp->object.nfs_fh4_val =
2440 	    kmem_alloc(cs->fh.nfs_fh4_len, KM_SLEEP);
2441 	nfs_fh4_copy(&cs->fh, &resp->object);
2442 	*cs->statusp = resp->status = NFS4_OK;
2443 out:
2444 	DTRACE_NFSV4_2(op__getfh__done, struct compound_state *, cs,
2445 	    GETFH4res *, resp);
2446 }
2447 
2448 static void
2449 rfs4_op_getfh_free(nfs_resop4 *resop)
2450 {
2451 	GETFH4res *resp = &resop->nfs_resop4_u.opgetfh;
2452 
2453 	if (resp->status == NFS4_OK &&
2454 	    resp->object.nfs_fh4_val != NULL) {
2455 		kmem_free(resp->object.nfs_fh4_val, resp->object.nfs_fh4_len);
2456 		resp->object.nfs_fh4_val = NULL;
2457 		resp->object.nfs_fh4_len = 0;
2458 	}
2459 }
2460 
2461 /*
2462  * illegal: args: void
2463  *	    res : status (NFS4ERR_OP_ILLEGAL)
2464  */
2465 /* ARGSUSED */
2466 static void
2467 rfs4_op_illegal(nfs_argop4 *argop, nfs_resop4 *resop,
2468     struct svc_req *req, struct compound_state *cs)
2469 {
2470 	ILLEGAL4res *resp = &resop->nfs_resop4_u.opillegal;
2471 
2472 	resop->resop = OP_ILLEGAL;
2473 	*cs->statusp = resp->status = NFS4ERR_OP_ILLEGAL;
2474 }
2475 
2476 /*
2477  * link: args: SAVED_FH: file, CURRENT_FH: target directory
2478  *	 res: status. If success - CURRENT_FH unchanged, return change_info
2479  */
2480 /* ARGSUSED */
2481 static void
2482 rfs4_op_link(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2483     struct compound_state *cs)
2484 {
2485 	LINK4args *args = &argop->nfs_argop4_u.oplink;
2486 	LINK4res *resp = &resop->nfs_resop4_u.oplink;
2487 	int error;
2488 	vnode_t *vp;
2489 	vnode_t *dvp;
2490 	struct vattr bdva, idva, adva;
2491 	char *nm;
2492 	uint_t  len;
2493 	struct sockaddr *ca;
2494 	char *name = NULL;
2495 	nfsstat4 status;
2496 
2497 	DTRACE_NFSV4_2(op__link__start, struct compound_state *, cs,
2498 	    LINK4args *, args);
2499 
2500 	/* SAVED_FH: source object */
2501 	vp = cs->saved_vp;
2502 	if (vp == NULL) {
2503 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2504 		goto out;
2505 	}
2506 
2507 	/* CURRENT_FH: target directory */
2508 	dvp = cs->vp;
2509 	if (dvp == NULL) {
2510 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2511 		goto out;
2512 	}
2513 
2514 	/*
2515 	 * If there is a non-shared filesystem mounted on this vnode,
2516 	 * do not allow to link any file in this directory.
2517 	 */
2518 	if (vn_ismntpt(dvp)) {
2519 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
2520 		goto out;
2521 	}
2522 
2523 	if (cs->access == CS_ACCESS_DENIED) {
2524 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
2525 		goto out;
2526 	}
2527 
2528 	/* Check source object's type validity */
2529 	if (vp->v_type == VDIR) {
2530 		*cs->statusp = resp->status = NFS4ERR_ISDIR;
2531 		goto out;
2532 	}
2533 
2534 	/* Check target directory's type */
2535 	if (dvp->v_type != VDIR) {
2536 		*cs->statusp = resp->status = NFS4ERR_NOTDIR;
2537 		goto out;
2538 	}
2539 
2540 	if (cs->saved_exi != cs->exi) {
2541 		*cs->statusp = resp->status = NFS4ERR_XDEV;
2542 		goto out;
2543 	}
2544 
2545 	status = utf8_dir_verify(&args->newname);
2546 	if (status != NFS4_OK) {
2547 		*cs->statusp = resp->status = status;
2548 		goto out;
2549 	}
2550 
2551 	nm = utf8_to_fn(&args->newname, &len, NULL);
2552 	if (nm == NULL) {
2553 		*cs->statusp = resp->status = NFS4ERR_INVAL;
2554 		goto out;
2555 	}
2556 
2557 	if (len > MAXNAMELEN) {
2558 		*cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
2559 		kmem_free(nm, len);
2560 		goto out;
2561 	}
2562 
2563 	if (rdonly4(req, cs)) {
2564 		*cs->statusp = resp->status = NFS4ERR_ROFS;
2565 		kmem_free(nm, len);
2566 		goto out;
2567 	}
2568 
2569 	/* Get "before" change value */
2570 	bdva.va_mask = AT_CTIME|AT_SEQ;
2571 	error = VOP_GETATTR(dvp, &bdva, 0, cs->cr, NULL);
2572 	if (error) {
2573 		*cs->statusp = resp->status = puterrno4(error);
2574 		kmem_free(nm, len);
2575 		goto out;
2576 	}
2577 
2578 	ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2579 	name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
2580 	    MAXPATHLEN  + 1);
2581 
2582 	if (name == NULL) {
2583 		*cs->statusp = resp->status = NFS4ERR_INVAL;
2584 		kmem_free(nm, len);
2585 		goto out;
2586 	}
2587 
2588 	NFS4_SET_FATTR4_CHANGE(resp->cinfo.before, bdva.va_ctime)
2589 
2590 	error = VOP_LINK(dvp, vp, name, cs->cr, NULL, 0);
2591 
2592 	if (nm != name)
2593 		kmem_free(name, MAXPATHLEN + 1);
2594 	kmem_free(nm, len);
2595 
2596 	/*
2597 	 * Get the initial "after" sequence number, if it fails, set to zero
2598 	 */
2599 	idva.va_mask = AT_SEQ;
2600 	if (VOP_GETATTR(dvp, &idva, 0, cs->cr, NULL))
2601 		idva.va_seq = 0;
2602 
2603 	/*
2604 	 * Force modified data and metadata out to stable storage.
2605 	 */
2606 	(void) VOP_FSYNC(vp, FNODSYNC, cs->cr, NULL);
2607 	(void) VOP_FSYNC(dvp, 0, cs->cr, NULL);
2608 
2609 	if (error) {
2610 		*cs->statusp = resp->status = puterrno4(error);
2611 		goto out;
2612 	}
2613 
2614 	/*
2615 	 * Get "after" change value, if it fails, simply return the
2616 	 * before value.
2617 	 */
2618 	adva.va_mask = AT_CTIME|AT_SEQ;
2619 	if (VOP_GETATTR(dvp, &adva, 0, cs->cr, NULL)) {
2620 		adva.va_ctime = bdva.va_ctime;
2621 		adva.va_seq = 0;
2622 	}
2623 
2624 	NFS4_SET_FATTR4_CHANGE(resp->cinfo.after, adva.va_ctime)
2625 
2626 	/*
2627 	 * The cinfo.atomic = TRUE only if we have
2628 	 * non-zero va_seq's, and it has incremented by exactly one
2629 	 * during the VOP_LINK and it didn't change during the VOP_FSYNC.
2630 	 */
2631 	if (bdva.va_seq && idva.va_seq && adva.va_seq &&
2632 	    idva.va_seq == (bdva.va_seq + 1) && idva.va_seq == adva.va_seq)
2633 		resp->cinfo.atomic = TRUE;
2634 	else
2635 		resp->cinfo.atomic = FALSE;
2636 
2637 	*cs->statusp = resp->status = NFS4_OK;
2638 out:
2639 	DTRACE_NFSV4_2(op__link__done, struct compound_state *, cs,
2640 	    LINK4res *, resp);
2641 }
2642 
2643 /*
2644  * Used by rfs4_op_lookup and rfs4_op_lookupp to do the actual work.
2645  */
2646 
2647 /* ARGSUSED */
2648 static nfsstat4
2649 do_rfs4_op_lookup(char *nm, struct svc_req *req, struct compound_state *cs)
2650 {
2651 	int error;
2652 	int different_export = 0;
2653 	vnode_t *vp, *pre_tvp = NULL, *oldvp = NULL;
2654 	struct exportinfo *exi = NULL, *pre_exi = NULL, *oexi = NULL;
2655 	nfsstat4 stat;
2656 	fid_t fid;
2657 	int attrdir, dotdot, walk;
2658 	bool_t is_newvp = FALSE;
2659 
2660 	if (cs->vp->v_flag & V_XATTRDIR) {
2661 		attrdir = 1;
2662 		ASSERT(get_fh4_flag(&cs->fh, FH4_ATTRDIR));
2663 	} else {
2664 		attrdir = 0;
2665 		ASSERT(! get_fh4_flag(&cs->fh, FH4_ATTRDIR));
2666 	}
2667 
2668 	dotdot = (nm[0] == '.' && nm[1] == '.' && nm[2] == '\0');
2669 
2670 	/*
2671 	 * If dotdotting, then need to check whether it's
2672 	 * above the root of a filesystem, or above an
2673 	 * export point.
2674 	 */
2675 	if (dotdot) {
2676 
2677 		/*
2678 		 * If dotdotting at the root of a filesystem, then
2679 		 * need to traverse back to the mounted-on filesystem
2680 		 * and do the dotdot lookup there.
2681 		 */
2682 		if (cs->vp->v_flag & VROOT) {
2683 
2684 			/*
2685 			 * If at the system root, then can
2686 			 * go up no further.
2687 			 */
2688 			if (VN_CMP(cs->vp, rootdir))
2689 				return (puterrno4(ENOENT));
2690 
2691 			/*
2692 			 * Traverse back to the mounted-on filesystem
2693 			 */
2694 			cs->vp = untraverse(cs->vp);
2695 
2696 			/*
2697 			 * Set the different_export flag so we remember
2698 			 * to pick up a new exportinfo entry for
2699 			 * this new filesystem.
2700 			 */
2701 			different_export = 1;
2702 		} else {
2703 
2704 			/*
2705 			 * If dotdotting above an export point then set
2706 			 * the different_export to get new export info.
2707 			 */
2708 			different_export = nfs_exported(cs->exi, cs->vp);
2709 		}
2710 	}
2711 
2712 	error = VOP_LOOKUP(cs->vp, nm, &vp, NULL, 0, NULL, cs->cr,
2713 	    NULL, NULL, NULL);
2714 	if (error)
2715 		return (puterrno4(error));
2716 
2717 	/*
2718 	 * If the vnode is in a pseudo filesystem, check whether it is visible.
2719 	 *
2720 	 * XXX if the vnode is a symlink and it is not visible in
2721 	 * a pseudo filesystem, return ENOENT (not following symlink).
2722 	 * V4 client can not mount such symlink. This is a regression
2723 	 * from V2/V3.
2724 	 *
2725 	 * In the same exported filesystem, if the security flavor used
2726 	 * is not an explicitly shared flavor, limit the view to the visible
2727 	 * list entries only. This is not a WRONGSEC case because it's already
2728 	 * checked via PUTROOTFH/PUTPUBFH or PUTFH.
2729 	 */
2730 	if (!different_export &&
2731 	    (PSEUDO(cs->exi) || ! is_exported_sec(cs->nfsflavor, cs->exi) ||
2732 	    cs->access & CS_ACCESS_LIMITED)) {
2733 		if (! nfs_visible(cs->exi, vp, &different_export)) {
2734 			VN_RELE(vp);
2735 			return (puterrno4(ENOENT));
2736 		}
2737 	}
2738 
2739 	/*
2740 	 * If it's a mountpoint, then traverse it.
2741 	 */
2742 	if (vn_ismntpt(vp)) {
2743 		pre_exi = cs->exi;	/* save pre-traversed exportinfo */
2744 		pre_tvp = vp;		/* save pre-traversed vnode	*/
2745 
2746 		/*
2747 		 * hold pre_tvp to counteract rele by traverse.  We will
2748 		 * need pre_tvp below if checkexport fails
2749 		 */
2750 		VN_HOLD(pre_tvp);
2751 		if ((error = traverse(&vp)) != 0) {
2752 			VN_RELE(vp);
2753 			VN_RELE(pre_tvp);
2754 			return (puterrno4(error));
2755 		}
2756 		different_export = 1;
2757 	} else if (vp->v_vfsp != cs->vp->v_vfsp) {
2758 		/*
2759 		 * The vfsp comparison is to handle the case where
2760 		 * a LOFS mount is shared.  lo_lookup traverses mount points,
2761 		 * and NFS is unaware of local fs transistions because
2762 		 * v_vfsmountedhere isn't set.  For this special LOFS case,
2763 		 * the dir and the obj returned by lookup will have different
2764 		 * vfs ptrs.
2765 		 */
2766 		different_export = 1;
2767 	}
2768 
2769 	if (different_export) {
2770 
2771 		bzero(&fid, sizeof (fid));
2772 		fid.fid_len = MAXFIDSZ;
2773 		error = vop_fid_pseudo(vp, &fid);
2774 		if (error) {
2775 			VN_RELE(vp);
2776 			if (pre_tvp)
2777 				VN_RELE(pre_tvp);
2778 			return (puterrno4(error));
2779 		}
2780 
2781 		if (dotdot)
2782 			exi = nfs_vptoexi(NULL, vp, cs->cr, &walk, NULL, TRUE);
2783 		else
2784 			exi = checkexport(&vp->v_vfsp->vfs_fsid, &fid, vp);
2785 
2786 		if (exi == NULL) {
2787 			if (pre_tvp) {
2788 				/*
2789 				 * If this vnode is a mounted-on vnode,
2790 				 * but the mounted-on file system is not
2791 				 * exported, send back the filehandle for
2792 				 * the mounted-on vnode, not the root of
2793 				 * the mounted-on file system.
2794 				 */
2795 				VN_RELE(vp);
2796 				vp = pre_tvp;
2797 				exi = pre_exi;
2798 				if (exi)
2799 					exi_hold(exi);
2800 			} else {
2801 				VN_RELE(vp);
2802 				return (puterrno4(EACCES));
2803 			}
2804 		} else if (pre_tvp) {
2805 			/* we're done with pre_tvp now. release extra hold */
2806 			VN_RELE(pre_tvp);
2807 		}
2808 
2809 		if (cs->exi)
2810 			exi_rele(cs->exi);
2811 		cs->exi = exi;
2812 
2813 		/*
2814 		 * Now we do a checkauth4. The reason is that
2815 		 * this client/user may not have access to the new
2816 		 * exported file system, and if he does,
2817 		 * the client/user may be mapped to a different uid.
2818 		 *
2819 		 * We start with a new cr, because the checkauth4 done
2820 		 * in the PUT*FH operation over wrote the cred's uid,
2821 		 * gid, etc, and we want the real thing before calling
2822 		 * checkauth4()
2823 		 */
2824 		crfree(cs->cr);
2825 		cs->cr = crdup(cs->basecr);
2826 
2827 		oldvp = cs->vp;
2828 		cs->vp = vp;
2829 		is_newvp = TRUE;
2830 
2831 		stat = call_checkauth4(cs, req);
2832 		if (stat != NFS4_OK) {
2833 			VN_RELE(cs->vp);
2834 			cs->vp = oldvp;
2835 			return (stat);
2836 		}
2837 	}
2838 
2839 	/*
2840 	 * After various NFS checks, do a label check on the path
2841 	 * component. The label on this path should either be the
2842 	 * global zone's label or a zone's label. We are only
2843 	 * interested in the zone's label because exported files
2844 	 * in global zone is accessible (though read-only) to
2845 	 * clients. The exportability/visibility check is already
2846 	 * done before reaching this code.
2847 	 */
2848 	if (is_system_labeled()) {
2849 		bslabel_t *clabel;
2850 
2851 		ASSERT(req->rq_label != NULL);
2852 		clabel = req->rq_label;
2853 		DTRACE_PROBE2(tx__rfs4__log__info__oplookup__clabel, char *,
2854 		    "got client label from request(1)", struct svc_req *, req);
2855 
2856 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
2857 			if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
2858 			    cs->exi)) {
2859 				error = EACCES;
2860 				goto err_out;
2861 			}
2862 		} else {
2863 			/*
2864 			 * We grant access to admin_low label clients
2865 			 * only if the client is trusted, i.e. also
2866 			 * running Solaris Trusted Extension.
2867 			 */
2868 			struct sockaddr	*ca;
2869 			int		addr_type;
2870 			void		*ipaddr;
2871 			tsol_tpc_t	*tp;
2872 
2873 			ca = (struct sockaddr *)svc_getrpccaller(
2874 			    req->rq_xprt)->buf;
2875 			if (ca->sa_family == AF_INET) {
2876 				addr_type = IPV4_VERSION;
2877 				ipaddr = &((struct sockaddr_in *)ca)->sin_addr;
2878 			} else if (ca->sa_family == AF_INET6) {
2879 				addr_type = IPV6_VERSION;
2880 				ipaddr = &((struct sockaddr_in6 *)
2881 				    ca)->sin6_addr;
2882 			}
2883 			tp = find_tpc(ipaddr, addr_type, B_FALSE);
2884 			if (tp == NULL || tp->tpc_tp.tp_doi !=
2885 			    l_admin_low->tsl_doi || tp->tpc_tp.host_type !=
2886 			    SUN_CIPSO) {
2887 				if (tp != NULL)
2888 					TPC_RELE(tp);
2889 				error = EACCES;
2890 				goto err_out;
2891 			}
2892 			TPC_RELE(tp);
2893 		}
2894 	}
2895 
2896 	error = makefh4(&cs->fh, vp, cs->exi);
2897 
2898 err_out:
2899 	if (error) {
2900 		if (is_newvp) {
2901 			VN_RELE(cs->vp);
2902 			cs->vp = oldvp;
2903 		} else
2904 			VN_RELE(vp);
2905 		return (puterrno4(error));
2906 	}
2907 
2908 	if (!is_newvp) {
2909 		if (cs->vp)
2910 			VN_RELE(cs->vp);
2911 		cs->vp = vp;
2912 	} else if (oldvp)
2913 		VN_RELE(oldvp);
2914 
2915 	/*
2916 	 * if did lookup on attrdir and didn't lookup .., set named
2917 	 * attr fh flag
2918 	 */
2919 	if (attrdir && ! dotdot)
2920 		set_fh4_flag(&cs->fh, FH4_NAMEDATTR);
2921 
2922 	/* Assume false for now, open proc will set this */
2923 	cs->mandlock = FALSE;
2924 
2925 	return (NFS4_OK);
2926 }
2927 
2928 /* ARGSUSED */
2929 static void
2930 rfs4_op_lookup(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2931     struct compound_state *cs)
2932 {
2933 	LOOKUP4args *args = &argop->nfs_argop4_u.oplookup;
2934 	LOOKUP4res *resp = &resop->nfs_resop4_u.oplookup;
2935 	char *nm;
2936 	uint_t len;
2937 	struct sockaddr *ca;
2938 	char *name = NULL;
2939 	nfsstat4 status;
2940 
2941 	DTRACE_NFSV4_2(op__lookup__start, struct compound_state *, cs,
2942 	    LOOKUP4args *, args);
2943 
2944 	if (cs->vp == NULL) {
2945 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2946 		goto out;
2947 	}
2948 
2949 	if (cs->vp->v_type == VLNK) {
2950 		*cs->statusp = resp->status = NFS4ERR_SYMLINK;
2951 		goto out;
2952 	}
2953 
2954 	if (cs->vp->v_type != VDIR) {
2955 		*cs->statusp = resp->status = NFS4ERR_NOTDIR;
2956 		goto out;
2957 	}
2958 
2959 	status = utf8_dir_verify(&args->objname);
2960 	if (status != NFS4_OK) {
2961 		*cs->statusp = resp->status = status;
2962 		goto out;
2963 	}
2964 
2965 	nm = utf8_to_str(&args->objname, &len, NULL);
2966 	if (nm == NULL) {
2967 		*cs->statusp = resp->status = NFS4ERR_INVAL;
2968 		goto out;
2969 	}
2970 
2971 	if (len > MAXNAMELEN) {
2972 		*cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
2973 		kmem_free(nm, len);
2974 		goto out;
2975 	}
2976 
2977 	ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2978 	name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
2979 	    MAXPATHLEN  + 1);
2980 
2981 	if (name == NULL) {
2982 		*cs->statusp = resp->status = NFS4ERR_INVAL;
2983 		kmem_free(nm, len);
2984 		goto out;
2985 	}
2986 
2987 	*cs->statusp = resp->status = do_rfs4_op_lookup(name, req, cs);
2988 
2989 	if (name != nm)
2990 		kmem_free(name, MAXPATHLEN + 1);
2991 	kmem_free(nm, len);
2992 
2993 out:
2994 	DTRACE_NFSV4_2(op__lookup__done, struct compound_state *, cs,
2995 	    LOOKUP4res *, resp);
2996 }
2997 
2998 /* ARGSUSED */
2999 static void
3000 rfs4_op_lookupp(nfs_argop4 *args, nfs_resop4 *resop, struct svc_req *req,
3001     struct compound_state *cs)
3002 {
3003 	LOOKUPP4res *resp = &resop->nfs_resop4_u.oplookupp;
3004 
3005 	DTRACE_NFSV4_1(op__lookupp__start, struct compound_state *, cs);
3006 
3007 	if (cs->vp == NULL) {
3008 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
3009 		goto out;
3010 	}
3011 
3012 	if (cs->vp->v_type != VDIR) {
3013 		*cs->statusp = resp->status = NFS4ERR_NOTDIR;
3014 		goto out;
3015 	}
3016 
3017 	*cs->statusp = resp->status = do_rfs4_op_lookup("..", req, cs);
3018 
3019 	/*
3020 	 * From NFSV4 Specification, LOOKUPP should not check for
3021 	 * NFS4ERR_WRONGSEC. Retrun NFS4_OK instead.
3022 	 */
3023 	if (resp->status == NFS4ERR_WRONGSEC) {
3024 		*cs->statusp = resp->status = NFS4_OK;
3025 	}
3026 
3027 out:
3028 	DTRACE_NFSV4_2(op__lookupp__done, struct compound_state *, cs,
3029 	    LOOKUPP4res *, resp);
3030 }
3031 
3032 
3033 /*ARGSUSED2*/
3034 static void
3035 rfs4_op_openattr(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3036     struct compound_state *cs)
3037 {
3038 	OPENATTR4args	*args = &argop->nfs_argop4_u.opopenattr;
3039 	OPENATTR4res	*resp = &resop->nfs_resop4_u.opopenattr;
3040 	vnode_t		*avp = NULL;
3041 	int		lookup_flags = LOOKUP_XATTR, error;
3042 	int		exp_ro = 0;
3043 
3044 	DTRACE_NFSV4_2(op__openattr__start, struct compound_state *, cs,
3045 	    OPENATTR4args *, args);
3046 
3047 	if (cs->vp == NULL) {
3048 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
3049 		goto out;
3050 	}
3051 
3052 	if ((cs->vp->v_vfsp->vfs_flag & VFS_XATTR) == 0 &&
3053 	    !vfs_has_feature(cs->vp->v_vfsp, VFSFT_SYSATTR_VIEWS)) {
3054 		*cs->statusp = resp->status = puterrno4(ENOTSUP);
3055 		goto out;
3056 	}
3057 
3058 	/*
3059 	 * If file system supports passing ACE mask to VOP_ACCESS then
3060 	 * check for ACE_READ_NAMED_ATTRS, otherwise do legacy checks
3061 	 */
3062 
3063 	if (vfs_has_feature(cs->vp->v_vfsp, VFSFT_ACEMASKONACCESS))
3064 		error = VOP_ACCESS(cs->vp, ACE_READ_NAMED_ATTRS,
3065 		    V_ACE_MASK, cs->cr, NULL);
3066 	else
3067 		error = ((VOP_ACCESS(cs->vp, VREAD, 0, cs->cr, NULL) != 0) &&
3068 		    (VOP_ACCESS(cs->vp, VWRITE, 0, cs->cr, NULL) != 0) &&
3069 		    (VOP_ACCESS(cs->vp, VEXEC, 0, cs->cr, NULL) != 0));
3070 
3071 	if (error) {
3072 		*cs->statusp = resp->status = puterrno4(EACCES);
3073 		goto out;
3074 	}
3075 
3076 	/*
3077 	 * The CREATE_XATTR_DIR VOP flag cannot be specified if
3078 	 * the file system is exported read-only -- regardless of
3079 	 * createdir flag.  Otherwise the attrdir would be created
3080 	 * (assuming server fs isn't mounted readonly locally).  If
3081 	 * VOP_LOOKUP returns ENOENT in this case, the error will
3082 	 * be translated into EROFS.  ENOSYS is mapped to ENOTSUP
3083 	 * because specfs has no VOP_LOOKUP op, so the macro would
3084 	 * return ENOSYS.  EINVAL is returned by all (current)
3085 	 * Solaris file system implementations when any of their
3086 	 * restrictions are violated (xattr(dir) can't have xattrdir).
3087 	 * Returning NOTSUPP is more appropriate in this case
3088 	 * because the object will never be able to have an attrdir.
3089 	 */
3090 	if (args->createdir && ! (exp_ro = rdonly4(req, cs)))
3091 		lookup_flags |= CREATE_XATTR_DIR;
3092 
3093 	error = VOP_LOOKUP(cs->vp, "", &avp, NULL, lookup_flags, NULL, cs->cr,
3094 	    NULL, NULL, NULL);
3095 
3096 	if (error) {
3097 		if (error == ENOENT && args->createdir && exp_ro)
3098 			*cs->statusp = resp->status = puterrno4(EROFS);
3099 		else if (error == EINVAL || error == ENOSYS)
3100 			*cs->statusp = resp->status = puterrno4(ENOTSUP);
3101 		else
3102 			*cs->statusp = resp->status = puterrno4(error);
3103 		goto out;
3104 	}
3105 
3106 	ASSERT(avp->v_flag & V_XATTRDIR);
3107 
3108 	error = makefh4(&cs->fh, avp, cs->exi);
3109 
3110 	if (error) {
3111 		VN_RELE(avp);
3112 		*cs->statusp = resp->status = puterrno4(error);
3113 		goto out;
3114 	}
3115 
3116 	VN_RELE(cs->vp);
3117 	cs->vp = avp;
3118 
3119 	/*
3120 	 * There is no requirement for an attrdir fh flag
3121 	 * because the attrdir has a vnode flag to distinguish
3122 	 * it from regular (non-xattr) directories.  The
3123 	 * FH4_ATTRDIR flag is set for future sanity checks.
3124 	 */
3125 	set_fh4_flag(&cs->fh, FH4_ATTRDIR);
3126 	*cs->statusp = resp->status = NFS4_OK;
3127 
3128 out:
3129 	DTRACE_NFSV4_2(op__openattr__done, struct compound_state *, cs,
3130 	    OPENATTR4res *, resp);
3131 }
3132 
3133 static int
3134 do_io(int direction, vnode_t *vp, struct uio *uio, int ioflag, cred_t *cred,
3135     caller_context_t *ct)
3136 {
3137 	int error;
3138 	int i;
3139 	clock_t delaytime;
3140 
3141 	delaytime = MSEC_TO_TICK_ROUNDUP(rfs4_lock_delay);
3142 
3143 	/*
3144 	 * Don't block on mandatory locks. If this routine returns
3145 	 * EAGAIN, the caller should return NFS4ERR_LOCKED.
3146 	 */
3147 	uio->uio_fmode = FNONBLOCK;
3148 
3149 	for (i = 0; i < rfs4_maxlock_tries; i++) {
3150 
3151 
3152 		if (direction == FREAD) {
3153 			(void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, ct);
3154 			error = VOP_READ(vp, uio, ioflag, cred, ct);
3155 			VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, ct);
3156 		} else {
3157 			(void) VOP_RWLOCK(vp, V_WRITELOCK_TRUE, ct);
3158 			error = VOP_WRITE(vp, uio, ioflag, cred, ct);
3159 			VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, ct);
3160 		}
3161 
3162 		if (error != EAGAIN)
3163 			break;
3164 
3165 		if (i < rfs4_maxlock_tries - 1) {
3166 			delay(delaytime);
3167 			delaytime *= 2;
3168 		}
3169 	}
3170 
3171 	return (error);
3172 }
3173 
3174 /* ARGSUSED */
3175 static void
3176 rfs4_op_read(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3177     struct compound_state *cs)
3178 {
3179 	READ4args *args = &argop->nfs_argop4_u.opread;
3180 	READ4res *resp = &resop->nfs_resop4_u.opread;
3181 	int error;
3182 	int verror;
3183 	vnode_t *vp;
3184 	struct vattr va;
3185 	struct iovec iov, *iovp = NULL;
3186 	int iovcnt;
3187 	struct uio uio;
3188 	u_offset_t offset;
3189 	bool_t *deleg = &cs->deleg;
3190 	nfsstat4 stat;
3191 	int in_crit = 0;
3192 	mblk_t *mp = NULL;
3193 	int alloc_err = 0;
3194 	int rdma_used = 0;
3195 	int loaned_buffers;
3196 	caller_context_t ct;
3197 	struct uio *uiop;
3198 
3199 	DTRACE_NFSV4_2(op__read__start, struct compound_state *, cs,
3200 	    READ4args, args);
3201 
3202 	vp = cs->vp;
3203 	if (vp == NULL) {
3204 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
3205 		goto out;
3206 	}
3207 	if (cs->access == CS_ACCESS_DENIED) {
3208 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
3209 		goto out;
3210 	}
3211 
3212 	if ((stat = rfs4_check_stateid(FREAD, vp, &args->stateid, FALSE,
3213 	    deleg, TRUE, &ct)) != NFS4_OK) {
3214 		*cs->statusp = resp->status = stat;
3215 		goto out;
3216 	}
3217 
3218 	/*
3219 	 * Enter the critical region before calling VOP_RWLOCK
3220 	 * to avoid a deadlock with write requests.
3221 	 */
3222 	if (nbl_need_check(vp)) {
3223 		nbl_start_crit(vp, RW_READER);
3224 		in_crit = 1;
3225 		if (nbl_conflict(vp, NBL_READ, args->offset, args->count, 0,
3226 		    &ct)) {
3227 			*cs->statusp = resp->status = NFS4ERR_LOCKED;
3228 			goto out;
3229 		}
3230 	}
3231 
3232 	if (args->wlist) {
3233 		if (args->count > clist_len(args->wlist)) {
3234 			*cs->statusp = resp->status = NFS4ERR_INVAL;
3235 			goto out;
3236 		}
3237 		rdma_used = 1;
3238 	}
3239 
3240 	/* use loaned buffers for TCP */
3241 	loaned_buffers = (nfs_loaned_buffers && !rdma_used) ? 1 : 0;
3242 
3243 	va.va_mask = AT_MODE|AT_SIZE|AT_UID;
3244 	verror = VOP_GETATTR(vp, &va, 0, cs->cr, &ct);
3245 
3246 	/*
3247 	 * If we can't get the attributes, then we can't do the
3248 	 * right access checking.  So, we'll fail the request.
3249 	 */
3250 	if (verror) {
3251 		*cs->statusp = resp->status = puterrno4(verror);
3252 		goto out;
3253 	}
3254 
3255 	if (vp->v_type != VREG) {
3256 		*cs->statusp = resp->status =
3257 		    ((vp->v_type == VDIR) ? NFS4ERR_ISDIR : NFS4ERR_INVAL);
3258 		goto out;
3259 	}
3260 
3261 	if (crgetuid(cs->cr) != va.va_uid &&
3262 	    (error = VOP_ACCESS(vp, VREAD, 0, cs->cr, &ct)) &&
3263 	    (error = VOP_ACCESS(vp, VEXEC, 0, cs->cr, &ct))) {
3264 		*cs->statusp = resp->status = puterrno4(error);
3265 		goto out;
3266 	}
3267 
3268 	if (MANDLOCK(vp, va.va_mode)) { /* XXX - V4 supports mand locking */
3269 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
3270 		goto out;
3271 	}
3272 
3273 	offset = args->offset;
3274 	if (offset >= va.va_size) {
3275 		*cs->statusp = resp->status = NFS4_OK;
3276 		resp->eof = TRUE;
3277 		resp->data_len = 0;
3278 		resp->data_val = NULL;
3279 		resp->mblk = NULL;
3280 		/* RDMA */
3281 		resp->wlist = args->wlist;
3282 		resp->wlist_len = resp->data_len;
3283 		*cs->statusp = resp->status = NFS4_OK;
3284 		if (resp->wlist)
3285 			clist_zero_len(resp->wlist);
3286 		goto out;
3287 	}
3288 
3289 	if (args->count == 0) {
3290 		*cs->statusp = resp->status = NFS4_OK;
3291 		resp->eof = FALSE;
3292 		resp->data_len = 0;
3293 		resp->data_val = NULL;
3294 		resp->mblk = NULL;
3295 		/* RDMA */
3296 		resp->wlist = args->wlist;
3297 		resp->wlist_len = resp->data_len;
3298 		if (resp->wlist)
3299 			clist_zero_len(resp->wlist);
3300 		goto out;
3301 	}
3302 
3303 	/*
3304 	 * Do not allocate memory more than maximum allowed
3305 	 * transfer size
3306 	 */
3307 	if (args->count > rfs4_tsize(req))
3308 		args->count = rfs4_tsize(req);
3309 
3310 	if (loaned_buffers) {
3311 		uiop = (uio_t *)rfs_setup_xuio(vp);
3312 		ASSERT(uiop != NULL);
3313 		uiop->uio_segflg = UIO_SYSSPACE;
3314 		uiop->uio_loffset = args->offset;
3315 		uiop->uio_resid = args->count;
3316 
3317 		/* Jump to do the read if successful */
3318 		if (!VOP_REQZCBUF(vp, UIO_READ, (xuio_t *)uiop, cs->cr, &ct)) {
3319 			/*
3320 			 * Need to hold the vnode until after VOP_RETZCBUF()
3321 			 * is called.
3322 			 */
3323 			VN_HOLD(vp);
3324 			goto doio_read;
3325 		}
3326 
3327 		DTRACE_PROBE2(nfss__i__reqzcbuf_failed, int,
3328 		    uiop->uio_loffset, int, uiop->uio_resid);
3329 
3330 		uiop->uio_extflg = 0;
3331 
3332 		/* failure to setup for zero copy */
3333 		rfs_free_xuio((void *)uiop);
3334 		loaned_buffers = 0;
3335 	}
3336 
3337 	/*
3338 	 * If returning data via RDMA Write, then grab the chunk list. If we
3339 	 * aren't returning READ data w/RDMA_WRITE, then grab a mblk.
3340 	 */
3341 	if (rdma_used) {
3342 		mp = NULL;
3343 		(void) rdma_get_wchunk(req, &iov, args->wlist);
3344 		uio.uio_iov = &iov;
3345 		uio.uio_iovcnt = 1;
3346 	} else {
3347 		/*
3348 		 * mp will contain the data to be sent out in the read reply.
3349 		 * It will be freed after the reply has been sent.
3350 		 */
3351 		mp = rfs_read_alloc(args->count, &iovp, &iovcnt);
3352 		ASSERT(mp != NULL);
3353 		ASSERT(alloc_err == 0);
3354 		uio.uio_iov = iovp;
3355 		uio.uio_iovcnt = iovcnt;
3356 	}
3357 
3358 	uio.uio_segflg = UIO_SYSSPACE;
3359 	uio.uio_extflg = UIO_COPY_CACHED;
3360 	uio.uio_loffset = args->offset;
3361 	uio.uio_resid = args->count;
3362 	uiop = &uio;
3363 
3364 doio_read:
3365 	error = do_io(FREAD, vp, uiop, 0, cs->cr, &ct);
3366 
3367 	va.va_mask = AT_SIZE;
3368 	verror = VOP_GETATTR(vp, &va, 0, cs->cr, &ct);
3369 
3370 	if (error) {
3371 		if (mp)
3372 			freemsg(mp);
3373 		*cs->statusp = resp->status = puterrno4(error);
3374 		goto out;
3375 	}
3376 
3377 	/* make mblk using zc buffers */
3378 	if (loaned_buffers) {
3379 		mp = uio_to_mblk(uiop);
3380 		ASSERT(mp != NULL);
3381 	}
3382 
3383 	*cs->statusp = resp->status = NFS4_OK;
3384 
3385 	ASSERT(uiop->uio_resid >= 0);
3386 	resp->data_len = args->count - uiop->uio_resid;
3387 	if (mp) {
3388 		resp->data_val = (char *)mp->b_datap->db_base;
3389 		rfs_rndup_mblks(mp, resp->data_len, loaned_buffers);
3390 	} else {
3391 		resp->data_val = (caddr_t)iov.iov_base;
3392 	}
3393 
3394 	resp->mblk = mp;
3395 
3396 	if (!verror && offset + resp->data_len == va.va_size)
3397 		resp->eof = TRUE;
3398 	else
3399 		resp->eof = FALSE;
3400 
3401 	if (rdma_used) {
3402 		if (!rdma_setup_read_data4(args, resp)) {
3403 			*cs->statusp = resp->status = NFS4ERR_INVAL;
3404 		}
3405 	} else {
3406 		resp->wlist = NULL;
3407 	}
3408 
3409 out:
3410 	if (in_crit)
3411 		nbl_end_crit(vp);
3412 
3413 	if (iovp != NULL)
3414 		kmem_free(iovp, iovcnt * sizeof (struct iovec));
3415 
3416 	DTRACE_NFSV4_2(op__read__done, struct compound_state *, cs,
3417 	    READ4res *, resp);
3418 }
3419 
3420 static void
3421 rfs4_op_read_free(nfs_resop4 *resop)
3422 {
3423 	READ4res	*resp = &resop->nfs_resop4_u.opread;
3424 
3425 	if (resp->status == NFS4_OK && resp->mblk != NULL) {
3426 		freemsg(resp->mblk);
3427 		resp->mblk = NULL;
3428 		resp->data_val = NULL;
3429 		resp->data_len = 0;
3430 	}
3431 }
3432 
3433 static void
3434 rfs4_op_readdir_free(nfs_resop4 * resop)
3435 {
3436 	READDIR4res    *resp = &resop->nfs_resop4_u.opreaddir;
3437 
3438 	if (resp->status == NFS4_OK && resp->mblk != NULL) {
3439 		freeb(resp->mblk);
3440 		resp->mblk = NULL;
3441 		resp->data_len = 0;
3442 	}
3443 }
3444 
3445 
3446 /* ARGSUSED */
3447 static void
3448 rfs4_op_putpubfh(nfs_argop4 *args, nfs_resop4 *resop, struct svc_req *req,
3449     struct compound_state *cs)
3450 {
3451 	PUTPUBFH4res	*resp = &resop->nfs_resop4_u.opputpubfh;
3452 	int		error;
3453 	vnode_t		*vp;
3454 	struct exportinfo *exi, *sav_exi;
3455 	nfs_fh4_fmt_t	*fh_fmtp;
3456 
3457 	DTRACE_NFSV4_1(op__putpubfh__start, struct compound_state *, cs);
3458 
3459 	if (cs->vp) {
3460 		VN_RELE(cs->vp);
3461 		cs->vp = NULL;
3462 	}
3463 
3464 	if (cs->cr)
3465 		crfree(cs->cr);
3466 
3467 	cs->cr = crdup(cs->basecr);
3468 
3469 	vp = exi_public->exi_vp;
3470 	if (vp == NULL) {
3471 		*cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
3472 		goto out;
3473 	}
3474 
3475 	error = makefh4(&cs->fh, vp, exi_public);
3476 	if (error != 0) {
3477 		*cs->statusp = resp->status = puterrno4(error);
3478 		goto out;
3479 	}
3480 	sav_exi = cs->exi;
3481 	if (exi_public == exi_root) {
3482 		/*
3483 		 * No filesystem is actually shared public, so we default
3484 		 * to exi_root. In this case, we must check whether root
3485 		 * is exported.
3486 		 */
3487 		fh_fmtp = (nfs_fh4_fmt_t *)cs->fh.nfs_fh4_val;
3488 
3489 		/*
3490 		 * if root filesystem is exported, the exportinfo struct that we
3491 		 * should use is what checkexport returns, because root_exi is
3492 		 * actually a mostly empty struct.
3493 		 */
3494 		exi = checkexport(&fh_fmtp->fh4_fsid,
3495 		    (fid_t *)&fh_fmtp->fh4_xlen, NULL);
3496 		if (exi) {
3497 			cs->exi = exi;
3498 		} else {
3499 			exi_hold(exi_public);
3500 			cs->exi = exi_public;
3501 		}
3502 	} else {
3503 		/*
3504 		 * it's a properly shared filesystem
3505 		 */
3506 		exi_hold(exi_public);
3507 		cs->exi = exi_public;
3508 	}
3509 
3510 	if (is_system_labeled()) {
3511 		bslabel_t *clabel;
3512 
3513 		ASSERT(req->rq_label != NULL);
3514 		clabel = req->rq_label;
3515 		DTRACE_PROBE2(tx__rfs4__log__info__opputpubfh__clabel, char *,
3516 		    "got client label from request(1)",
3517 		    struct svc_req *, req);
3518 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
3519 			if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
3520 			    cs->exi)) {
3521 				*cs->statusp = resp->status =
3522 				    NFS4ERR_SERVERFAULT;
3523 				if (sav_exi)
3524 					exi_rele(sav_exi);
3525 				goto out;
3526 			}
3527 		}
3528 	}
3529 
3530 	VN_HOLD(vp);
3531 	cs->vp = vp;
3532 
3533 	if ((resp->status = call_checkauth4(cs, req)) != NFS4_OK) {
3534 		VN_RELE(cs->vp);
3535 		cs->vp = NULL;
3536 		exi_rele(cs->exi);
3537 		cs->exi = sav_exi;
3538 		goto out;
3539 	}
3540 	if (sav_exi)
3541 		exi_rele(sav_exi);
3542 
3543 	*cs->statusp = resp->status = NFS4_OK;
3544 out:
3545 	DTRACE_NFSV4_2(op__putpubfh__done, struct compound_state *, cs,
3546 	    PUTPUBFH4res *, resp);
3547 }
3548 
3549 /*
3550  * XXX - issue with put*fh operations. Suppose /export/home is exported.
3551  * Suppose an NFS client goes to mount /export/home/joe. If /export, home,
3552  * or joe have restrictive search permissions, then we shouldn't let
3553  * the client get a file handle. This is easy to enforce. However, we
3554  * don't know what security flavor should be used until we resolve the
3555  * path name. Another complication is uid mapping. If root is
3556  * the user, then it will be mapped to the anonymous user by default,
3557  * but we won't know that till we've resolved the path name. And we won't
3558  * know what the anonymous user is.
3559  * Luckily, SECINFO is specified to take a full filename.
3560  * So what we will have to in rfs4_op_lookup is check that flavor of
3561  * the target object matches that of the request, and if root was the
3562  * caller, check for the root= and anon= options, and if necessary,
3563  * repeat the lookup using the right cred_t. But that's not done yet.
3564  */
3565 /* ARGSUSED */
3566 static void
3567 rfs4_op_putfh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3568     struct compound_state *cs)
3569 {
3570 	PUTFH4args *args = &argop->nfs_argop4_u.opputfh;
3571 	PUTFH4res *resp = &resop->nfs_resop4_u.opputfh;
3572 	nfs_fh4_fmt_t *fh_fmtp;
3573 
3574 	DTRACE_NFSV4_2(op__putfh__start, struct compound_state *, cs,
3575 	    PUTFH4args *, args);
3576 
3577 	if (cs->vp) {
3578 		VN_RELE(cs->vp);
3579 		cs->vp = NULL;
3580 	}
3581 
3582 	if (cs->cr) {
3583 		crfree(cs->cr);
3584 		cs->cr = NULL;
3585 	}
3586 
3587 	if (args->object.nfs_fh4_len < NFS_FH4_LEN) {
3588 		*cs->statusp = resp->status = NFS4ERR_BADHANDLE;
3589 		goto out;
3590 	}
3591 
3592 	fh_fmtp = (nfs_fh4_fmt_t *)args->object.nfs_fh4_val;
3593 	if (cs->exi)
3594 		exi_rele(cs->exi);
3595 	cs->exi = checkexport(&fh_fmtp->fh4_fsid, (fid_t *)&fh_fmtp->fh4_xlen,
3596 	    NULL);
3597 
3598 	if (cs->exi == NULL) {
3599 		*cs->statusp = resp->status = NFS4ERR_STALE;
3600 		goto out;
3601 	}
3602 
3603 	cs->cr = crdup(cs->basecr);
3604 
3605 	ASSERT(cs->cr != NULL);
3606 
3607 	if (! (cs->vp = nfs4_fhtovp(&args->object, cs->exi, &resp->status))) {
3608 		*cs->statusp = resp->status;
3609 		goto out;
3610 	}
3611 
3612 	if ((resp->status = call_checkauth4(cs, req)) != NFS4_OK) {
3613 		VN_RELE(cs->vp);
3614 		cs->vp = NULL;
3615 		goto out;
3616 	}
3617 
3618 	nfs_fh4_copy(&args->object, &cs->fh);
3619 	*cs->statusp = resp->status = NFS4_OK;
3620 	cs->deleg = FALSE;
3621 
3622 out:
3623 	DTRACE_NFSV4_2(op__putfh__done, struct compound_state *, cs,
3624 	    PUTFH4res *, resp);
3625 }
3626 
3627 /* ARGSUSED */
3628 static void
3629 rfs4_op_putrootfh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3630     struct compound_state *cs)
3631 {
3632 	PUTROOTFH4res *resp = &resop->nfs_resop4_u.opputrootfh;
3633 	int error;
3634 	fid_t fid;
3635 	struct exportinfo *exi, *sav_exi;
3636 
3637 	DTRACE_NFSV4_1(op__putrootfh__start, struct compound_state *, cs);
3638 
3639 	if (cs->vp) {
3640 		VN_RELE(cs->vp);
3641 		cs->vp = NULL;
3642 	}
3643 
3644 	if (cs->cr)
3645 		crfree(cs->cr);
3646 
3647 	cs->cr = crdup(cs->basecr);
3648 
3649 	/*
3650 	 * Using rootdir, the system root vnode,
3651 	 * get its fid.
3652 	 */
3653 	bzero(&fid, sizeof (fid));
3654 	fid.fid_len = MAXFIDSZ;
3655 	error = vop_fid_pseudo(rootdir, &fid);
3656 	if (error != 0) {
3657 		*cs->statusp = resp->status = puterrno4(error);
3658 		goto out;
3659 	}
3660 
3661 	/*
3662 	 * Then use the root fsid & fid it to find out if it's exported
3663 	 *
3664 	 * If the server root isn't exported directly, then
3665 	 * it should at least be a pseudo export based on
3666 	 * one or more exports further down in the server's
3667 	 * file tree.
3668 	 */
3669 	exi = checkexport(&rootdir->v_vfsp->vfs_fsid, &fid, NULL);
3670 	if (exi == NULL || exi->exi_export.ex_flags & EX_PUBLIC) {
3671 		NFS4_DEBUG(rfs4_debug,
3672 		    (CE_WARN, "rfs4_op_putrootfh: export check failure"));
3673 		*cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
3674 		if (exi)
3675 			exi_rele(exi);
3676 		goto out;
3677 	}
3678 
3679 	/*
3680 	 * Now make a filehandle based on the root
3681 	 * export and root vnode.
3682 	 */
3683 	error = makefh4(&cs->fh, rootdir, exi);
3684 	if (error != 0) {
3685 		*cs->statusp = resp->status = puterrno4(error);
3686 		exi_rele(exi);
3687 		goto out;
3688 	}
3689 
3690 	sav_exi = cs->exi;
3691 	cs->exi = exi;
3692 
3693 	VN_HOLD(rootdir);
3694 	cs->vp = rootdir;
3695 
3696 	if ((resp->status = call_checkauth4(cs, req)) != NFS4_OK) {
3697 		VN_RELE(rootdir);
3698 		cs->vp = NULL;
3699 		exi_rele(exi);
3700 		cs->exi = sav_exi;
3701 		goto out;
3702 	}
3703 	if (sav_exi)
3704 		exi_rele(sav_exi);
3705 
3706 	*cs->statusp = resp->status = NFS4_OK;
3707 	cs->deleg = FALSE;
3708 out:
3709 	DTRACE_NFSV4_2(op__putrootfh__done, struct compound_state *, cs,
3710 	    PUTROOTFH4res *, resp);
3711 }
3712 
3713 /*
3714  * set_rdattr_params sets up the variables used to manage what information
3715  * to get for each directory entry.
3716  */
3717 static nfsstat4
3718 set_rdattr_params(struct nfs4_svgetit_arg *sargp,
3719     bitmap4 attrs, bool_t *need_to_lookup)
3720 {
3721 	uint_t	va_mask;
3722 	nfsstat4 status;
3723 	bitmap4 objbits;
3724 
3725 	status = bitmap4_to_attrmask(attrs, sargp);
3726 	if (status != NFS4_OK) {
3727 		/*
3728 		 * could not even figure attr mask
3729 		 */
3730 		return (status);
3731 	}
3732 	va_mask = sargp->vap->va_mask;
3733 
3734 	/*
3735 	 * dirent's d_ino is always correct value for mounted_on_fileid.
3736 	 * mntdfid_set is set once here, but mounted_on_fileid is
3737 	 * set in main dirent processing loop for each dirent.
3738 	 * The mntdfid_set is a simple optimization that lets the
3739 	 * server attr code avoid work when caller is readdir.
3740 	 */
3741 	sargp->mntdfid_set = TRUE;
3742 
3743 	/*
3744 	 * Lookup entry only if client asked for any of the following:
3745 	 * a) vattr attrs
3746 	 * b) vfs attrs
3747 	 * c) attrs w/per-object scope requested (change, filehandle, etc)
3748 	 *    other than mounted_on_fileid (which we can take from dirent)
3749 	 */
3750 	objbits = attrs ? attrs & NFS4_VP_ATTR_MASK : 0;
3751 
3752 	if (va_mask || sargp->sbp || (objbits & ~FATTR4_MOUNTED_ON_FILEID_MASK))
3753 		*need_to_lookup = TRUE;
3754 	else
3755 		*need_to_lookup = FALSE;
3756 
3757 	if (sargp->sbp == NULL)
3758 		return (NFS4_OK);
3759 
3760 	/*
3761 	 * If filesystem attrs are requested, get them now from the
3762 	 * directory vp, as most entries will have same filesystem. The only
3763 	 * exception are mounted over entries but we handle
3764 	 * those as we go (XXX mounted over detection not yet implemented).
3765 	 */
3766 	sargp->vap->va_mask = 0;	/* to avoid VOP_GETATTR */
3767 	status = bitmap4_get_sysattrs(sargp);
3768 	sargp->vap->va_mask = va_mask;
3769 
3770 	if ((status != NFS4_OK) && sargp->rdattr_error_req) {
3771 		/*
3772 		 * Failed to get filesystem attributes.
3773 		 * Return a rdattr_error for each entry, but don't fail.
3774 		 * However, don't get any obj-dependent attrs.
3775 		 */
3776 		sargp->rdattr_error = status;	/* for rdattr_error */
3777 		*need_to_lookup = FALSE;
3778 		/*
3779 		 * At least get fileid for regular readdir output
3780 		 */
3781 		sargp->vap->va_mask &= AT_NODEID;
3782 		status = NFS4_OK;
3783 	}
3784 
3785 	return (status);
3786 }
3787 
3788 /*
3789  * readlink: args: CURRENT_FH.
3790  *	res: status. If success - CURRENT_FH unchanged, return linktext.
3791  */
3792 
3793 /* ARGSUSED */
3794 static void
3795 rfs4_op_readlink(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3796     struct compound_state *cs)
3797 {
3798 	READLINK4res *resp = &resop->nfs_resop4_u.opreadlink;
3799 	int error;
3800 	vnode_t *vp;
3801 	struct iovec iov;
3802 	struct vattr va;
3803 	struct uio uio;
3804 	char *data;
3805 	struct sockaddr *ca;
3806 	char *name = NULL;
3807 	int is_referral;
3808 
3809 	DTRACE_NFSV4_1(op__readlink__start, struct compound_state *, cs);
3810 
3811 	/* CURRENT_FH: directory */
3812 	vp = cs->vp;
3813 	if (vp == NULL) {
3814 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
3815 		goto out;
3816 	}
3817 
3818 	if (cs->access == CS_ACCESS_DENIED) {
3819 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
3820 		goto out;
3821 	}
3822 
3823 	/* Is it a referral? */
3824 	if (vn_is_nfs_reparse(vp, cs->cr) && client_is_downrev(req)) {
3825 
3826 		is_referral = 1;
3827 
3828 	} else {
3829 
3830 		is_referral = 0;
3831 
3832 		if (vp->v_type == VDIR) {
3833 			*cs->statusp = resp->status = NFS4ERR_ISDIR;
3834 			goto out;
3835 		}
3836 
3837 		if (vp->v_type != VLNK) {
3838 			*cs->statusp = resp->status = NFS4ERR_INVAL;
3839 			goto out;
3840 		}
3841 
3842 	}
3843 
3844 	va.va_mask = AT_MODE;
3845 	error = VOP_GETATTR(vp, &va, 0, cs->cr, NULL);
3846 	if (error) {
3847 		*cs->statusp = resp->status = puterrno4(error);
3848 		goto out;
3849 	}
3850 
3851 	if (MANDLOCK(vp, va.va_mode)) {
3852 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
3853 		goto out;
3854 	}
3855 
3856 	data = kmem_alloc(MAXPATHLEN + 1, KM_SLEEP);
3857 
3858 	if (is_referral) {
3859 		char *s;
3860 		size_t strsz;
3861 
3862 		/* Get an artificial symlink based on a referral */
3863 		s = build_symlink(vp, cs->cr, &strsz);
3864 		global_svstat_ptr[4][NFS_REFERLINKS].value.ui64++;
3865 		DTRACE_PROBE2(nfs4serv__func__referral__reflink,
3866 		    vnode_t *, vp, char *, s);
3867 		if (s == NULL)
3868 			error = EINVAL;
3869 		else {
3870 			error = 0;
3871 			(void) strlcpy(data, s, MAXPATHLEN + 1);
3872 			kmem_free(s, strsz);
3873 		}
3874 
3875 	} else {
3876 
3877 		iov.iov_base = data;
3878 		iov.iov_len = MAXPATHLEN;
3879 		uio.uio_iov = &iov;
3880 		uio.uio_iovcnt = 1;
3881 		uio.uio_segflg = UIO_SYSSPACE;
3882 		uio.uio_extflg = UIO_COPY_CACHED;
3883 		uio.uio_loffset = 0;
3884 		uio.uio_resid = MAXPATHLEN;
3885 
3886 		error = VOP_READLINK(vp, &uio, cs->cr, NULL);
3887 
3888 		if (!error)
3889 			*(data + MAXPATHLEN - uio.uio_resid) = '\0';
3890 	}
3891 
3892 	if (error) {
3893 		kmem_free((caddr_t)data, (uint_t)MAXPATHLEN + 1);
3894 		*cs->statusp = resp->status = puterrno4(error);
3895 		goto out;
3896 	}
3897 
3898 	ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
3899 	name = nfscmd_convname(ca, cs->exi, data, NFSCMD_CONV_OUTBOUND,
3900 	    MAXPATHLEN  + 1);
3901 
3902 	if (name == NULL) {
3903 		/*
3904 		 * Even though the conversion failed, we return
3905 		 * something. We just don't translate it.
3906 		 */
3907 		name = data;
3908 	}
3909 
3910 	/*
3911 	 * treat link name as data
3912 	 */
3913 	(void) str_to_utf8(name, (utf8string *)&resp->link);
3914 
3915 	if (name != data)
3916 		kmem_free(name, MAXPATHLEN + 1);
3917 	kmem_free((caddr_t)data, (uint_t)MAXPATHLEN + 1);
3918 	*cs->statusp = resp->status = NFS4_OK;
3919 
3920 out:
3921 	DTRACE_NFSV4_2(op__readlink__done, struct compound_state *, cs,
3922 	    READLINK4res *, resp);
3923 }
3924 
3925 static void
3926 rfs4_op_readlink_free(nfs_resop4 *resop)
3927 {
3928 	READLINK4res *resp = &resop->nfs_resop4_u.opreadlink;
3929 	utf8string *symlink = (utf8string *)&resp->link;
3930 
3931 	if (symlink->utf8string_val) {
3932 		UTF8STRING_FREE(*symlink)
3933 	}
3934 }
3935 
3936 /*
3937  * release_lockowner:
3938  *	Release any state associated with the supplied
3939  *	lockowner. Note if any lo_state is holding locks we will not
3940  *	rele that lo_state and thus the lockowner will not be destroyed.
3941  *	A client using lock after the lock owner stateid has been released
3942  *	will suffer the consequence of NFS4ERR_BAD_STATEID and would have
3943  *	to reissue the lock with new_lock_owner set to TRUE.
3944  *	args: lock_owner
3945  *	res:  status
3946  */
3947 /* ARGSUSED */
3948 static void
3949 rfs4_op_release_lockowner(nfs_argop4 *argop, nfs_resop4 *resop,
3950     struct svc_req *req, struct compound_state *cs)
3951 {
3952 	RELEASE_LOCKOWNER4args *ap = &argop->nfs_argop4_u.oprelease_lockowner;
3953 	RELEASE_LOCKOWNER4res *resp = &resop->nfs_resop4_u.oprelease_lockowner;
3954 	rfs4_lockowner_t *lo;
3955 	rfs4_openowner_t *oo;
3956 	rfs4_state_t *sp;
3957 	rfs4_lo_state_t *lsp;
3958 	rfs4_client_t *cp;
3959 	bool_t create = FALSE;
3960 	locklist_t *llist;
3961 	sysid_t sysid;
3962 
3963 	DTRACE_NFSV4_2(op__release__lockowner__start, struct compound_state *,
3964 	    cs, RELEASE_LOCKOWNER4args *, ap);
3965 
3966 	/* Make sure there is a clientid around for this request */
3967 	cp = rfs4_findclient_by_id(ap->lock_owner.clientid, FALSE);
3968 
3969 	if (cp == NULL) {
3970 		*cs->statusp = resp->status =
3971 		    rfs4_check_clientid(&ap->lock_owner.clientid, 0);
3972 		goto out;
3973 	}
3974 	rfs4_client_rele(cp);
3975 
3976 	lo = rfs4_findlockowner(&ap->lock_owner, &create);
3977 	if (lo == NULL) {
3978 		*cs->statusp = resp->status = NFS4_OK;
3979 		goto out;
3980 	}
3981 	ASSERT(lo->rl_client != NULL);
3982 
3983 	/*
3984 	 * Check for EXPIRED client. If so will reap state with in a lease
3985 	 * period or on next set_clientid_confirm step
3986 	 */
3987 	if (rfs4_lease_expired(lo->rl_client)) {
3988 		rfs4_lockowner_rele(lo);
3989 		*cs->statusp = resp->status = NFS4ERR_EXPIRED;
3990 		goto out;
3991 	}
3992 
3993 	/*
3994 	 * If no sysid has been assigned, then no locks exist; just return.
3995 	 */
3996 	rfs4_dbe_lock(lo->rl_client->rc_dbe);
3997 	if (lo->rl_client->rc_sysidt == LM_NOSYSID) {
3998 		rfs4_lockowner_rele(lo);
3999 		rfs4_dbe_unlock(lo->rl_client->rc_dbe);
4000 		goto out;
4001 	}
4002 
4003 	sysid = lo->rl_client->rc_sysidt;
4004 	rfs4_dbe_unlock(lo->rl_client->rc_dbe);
4005 
4006 	/*
4007 	 * Mark the lockowner invalid.
4008 	 */
4009 	rfs4_dbe_hide(lo->rl_dbe);
4010 
4011 	/*
4012 	 * sysid-pid pair should now not be used since the lockowner is
4013 	 * invalid. If the client were to instantiate the lockowner again
4014 	 * it would be assigned a new pid. Thus we can get the list of
4015 	 * current locks.
4016 	 */
4017 
4018 	llist = flk_get_active_locks(sysid, lo->rl_pid);
4019 	/* If we are still holding locks fail */
4020 	if (llist != NULL) {
4021 
4022 		*cs->statusp = resp->status = NFS4ERR_LOCKS_HELD;
4023 
4024 		flk_free_locklist(llist);
4025 		/*
4026 		 * We need to unhide the lockowner so the client can
4027 		 * try it again. The bad thing here is if the client
4028 		 * has a logic error that took it here in the first place
4029 		 * he probably has lost accounting of the locks that it
4030 		 * is holding. So we may have dangling state until the
4031 		 * open owner state is reaped via close. One scenario
4032 		 * that could possibly occur is that the client has
4033 		 * sent the unlock request(s) in separate threads
4034 		 * and has not waited for the replies before sending the
4035 		 * RELEASE_LOCKOWNER request. Presumably, it would expect
4036 		 * and deal appropriately with NFS4ERR_LOCKS_HELD, by
4037 		 * reissuing the request.
4038 		 */
4039 		rfs4_dbe_unhide(lo->rl_dbe);
4040 		rfs4_lockowner_rele(lo);
4041 		goto out;
4042 	}
4043 
4044 	/*
4045 	 * For the corresponding client we need to check each open
4046 	 * owner for any opens that have lockowner state associated
4047 	 * with this lockowner.
4048 	 */
4049 
4050 	rfs4_dbe_lock(lo->rl_client->rc_dbe);
4051 	for (oo = list_head(&lo->rl_client->rc_openownerlist); oo != NULL;
4052 	    oo = list_next(&lo->rl_client->rc_openownerlist, oo)) {
4053 
4054 		rfs4_dbe_lock(oo->ro_dbe);
4055 		for (sp = list_head(&oo->ro_statelist); sp != NULL;
4056 		    sp = list_next(&oo->ro_statelist, sp)) {
4057 
4058 			rfs4_dbe_lock(sp->rs_dbe);
4059 			for (lsp = list_head(&sp->rs_lostatelist);
4060 			    lsp != NULL;
4061 			    lsp = list_next(&sp->rs_lostatelist, lsp)) {
4062 				if (lsp->rls_locker == lo) {
4063 					rfs4_dbe_lock(lsp->rls_dbe);
4064 					rfs4_dbe_invalidate(lsp->rls_dbe);
4065 					rfs4_dbe_unlock(lsp->rls_dbe);
4066 				}
4067 			}
4068 			rfs4_dbe_unlock(sp->rs_dbe);
4069 		}
4070 		rfs4_dbe_unlock(oo->ro_dbe);
4071 	}
4072 	rfs4_dbe_unlock(lo->rl_client->rc_dbe);
4073 
4074 	rfs4_lockowner_rele(lo);
4075 
4076 	*cs->statusp = resp->status = NFS4_OK;
4077 
4078 out:
4079 	DTRACE_NFSV4_2(op__release__lockowner__done, struct compound_state *,
4080 	    cs, RELEASE_LOCKOWNER4res *, resp);
4081 }
4082 
4083 /*
4084  * short utility function to lookup a file and recall the delegation
4085  */
4086 static rfs4_file_t *
4087 rfs4_lookup_and_findfile(vnode_t *dvp, char *nm, vnode_t **vpp,
4088     int *lkup_error, cred_t *cr)
4089 {
4090 	vnode_t *vp;
4091 	rfs4_file_t *fp = NULL;
4092 	bool_t fcreate = FALSE;
4093 	int error;
4094 
4095 	if (vpp)
4096 		*vpp = NULL;
4097 
4098 	if ((error = VOP_LOOKUP(dvp, nm, &vp, NULL, 0, NULL, cr, NULL, NULL,
4099 	    NULL)) == 0) {
4100 		if (vp->v_type == VREG)
4101 			fp = rfs4_findfile(vp, NULL, &fcreate);
4102 		if (vpp)
4103 			*vpp = vp;
4104 		else
4105 			VN_RELE(vp);
4106 	}
4107 
4108 	if (lkup_error)
4109 		*lkup_error = error;
4110 
4111 	return (fp);
4112 }
4113 
4114 /*
4115  * remove: args: CURRENT_FH: directory; name.
4116  *	res: status. If success - CURRENT_FH unchanged, return change_info
4117  *		for directory.
4118  */
4119 /* ARGSUSED */
4120 static void
4121 rfs4_op_remove(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
4122     struct compound_state *cs)
4123 {
4124 	REMOVE4args *args = &argop->nfs_argop4_u.opremove;
4125 	REMOVE4res *resp = &resop->nfs_resop4_u.opremove;
4126 	int error;
4127 	vnode_t *dvp, *vp;
4128 	struct vattr bdva, idva, adva;
4129 	char *nm;
4130 	uint_t len;
4131 	rfs4_file_t *fp;
4132 	int in_crit = 0;
4133 	bslabel_t *clabel;
4134 	struct sockaddr *ca;
4135 	char *name = NULL;
4136 	nfsstat4 status;
4137 
4138 	DTRACE_NFSV4_2(op__remove__start, struct compound_state *, cs,
4139 	    REMOVE4args *, args);
4140 
4141 	/* CURRENT_FH: directory */
4142 	dvp = cs->vp;
4143 	if (dvp == NULL) {
4144 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
4145 		goto out;
4146 	}
4147 
4148 	if (cs->access == CS_ACCESS_DENIED) {
4149 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
4150 		goto out;
4151 	}
4152 
4153 	/*
4154 	 * If there is an unshared filesystem mounted on this vnode,
4155 	 * Do not allow to remove anything in this directory.
4156 	 */
4157 	if (vn_ismntpt(dvp)) {
4158 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
4159 		goto out;
4160 	}
4161 
4162 	if (dvp->v_type != VDIR) {
4163 		*cs->statusp = resp->status = NFS4ERR_NOTDIR;
4164 		goto out;
4165 	}
4166 
4167 	status = utf8_dir_verify(&args->target);
4168 	if (status != NFS4_OK) {
4169 		*cs->statusp = resp->status = status;
4170 		goto out;
4171 	}
4172 
4173 	/*
4174 	 * Lookup the file so that we can check if it's a directory
4175 	 */
4176 	nm = utf8_to_fn(&args->target, &len, NULL);
4177 	if (nm == NULL) {
4178 		*cs->statusp = resp->status = NFS4ERR_INVAL;
4179 		goto out;
4180 	}
4181 
4182 	if (len > MAXNAMELEN) {
4183 		*cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
4184 		kmem_free(nm, len);
4185 		goto out;
4186 	}
4187 
4188 	if (rdonly4(req, cs)) {
4189 		*cs->statusp = resp->status = NFS4ERR_ROFS;
4190 		kmem_free(nm, len);
4191 		goto out;
4192 	}
4193 
4194 	ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
4195 	name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
4196 	    MAXPATHLEN  + 1);
4197 
4198 	if (name == NULL) {
4199 		*cs->statusp = resp->status = NFS4ERR_INVAL;
4200 		kmem_free(nm, len);
4201 		goto out;
4202 	}
4203 
4204 	/*
4205 	 * Lookup the file to determine type and while we are see if
4206 	 * there is a file struct around and check for delegation.
4207 	 * We don't need to acquire va_seq before this lookup, if
4208 	 * it causes an update, cinfo.before will not match, which will
4209 	 * trigger a cache flush even if atomic is TRUE.
4210 	 */
4211 	if (fp = rfs4_lookup_and_findfile(dvp, name, &vp, &error, cs->cr)) {
4212 		if (rfs4_check_delegated_byfp(FWRITE, fp, TRUE, TRUE, TRUE,
4213 		    NULL)) {
4214 			VN_RELE(vp);
4215 			rfs4_file_rele(fp);
4216 			*cs->statusp = resp->status = NFS4ERR_DELAY;
4217 			if (nm != name)
4218 				kmem_free(name, MAXPATHLEN + 1);
4219 			kmem_free(nm, len);
4220 			goto out;
4221 		}
4222 	}
4223 
4224 	/* Didn't find anything to remove */
4225 	if (vp == NULL) {
4226 		*cs->statusp = resp->status = error;
4227 		if (nm != name)
4228 			kmem_free(name, MAXPATHLEN + 1);
4229 		kmem_free(nm, len);
4230 		goto out;
4231 	}
4232 
4233 	if (nbl_need_check(vp)) {
4234 		nbl_start_crit(vp, RW_READER);
4235 		in_crit = 1;
4236 		if (nbl_conflict(vp, NBL_REMOVE, 0, 0, 0, NULL)) {
4237 			*cs->statusp = resp->status = NFS4ERR_FILE_OPEN;
4238 			if (nm != name)
4239 				kmem_free(name, MAXPATHLEN + 1);
4240 			kmem_free(nm, len);
4241 			nbl_end_crit(vp);
4242 			VN_RELE(vp);
4243 			if (fp) {
4244 				rfs4_clear_dont_grant(fp);
4245 				rfs4_file_rele(fp);
4246 			}
4247 			goto out;
4248 		}
4249 	}
4250 
4251 	/* check label before allowing removal */
4252 	if (is_system_labeled()) {
4253 		ASSERT(req->rq_label != NULL);
4254 		clabel = req->rq_label;
4255 		DTRACE_PROBE2(tx__rfs4__log__info__opremove__clabel, char *,
4256 		    "got client label from request(1)",
4257 		    struct svc_req *, req);
4258 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
4259 			if (!do_rfs_label_check(clabel, vp, EQUALITY_CHECK,
4260 			    cs->exi)) {
4261 				*cs->statusp = resp->status = NFS4ERR_ACCESS;
4262 				if (name != nm)
4263 					kmem_free(name, MAXPATHLEN + 1);
4264 				kmem_free(nm, len);
4265 				if (in_crit)
4266 					nbl_end_crit(vp);
4267 				VN_RELE(vp);
4268 				if (fp) {
4269 					rfs4_clear_dont_grant(fp);
4270 					rfs4_file_rele(fp);
4271 				}
4272 				goto out;
4273 			}
4274 		}
4275 	}
4276 
4277 	/* Get dir "before" change value */
4278 	bdva.va_mask = AT_CTIME|AT_SEQ;
4279 	error = VOP_GETATTR(dvp, &bdva, 0, cs->cr, NULL);
4280 	if (error) {
4281 		*cs->statusp = resp->status = puterrno4(error);
4282 		if (nm != name)
4283 			kmem_free(name, MAXPATHLEN + 1);
4284 		kmem_free(nm, len);
4285 		if (in_crit)
4286 			nbl_end_crit(vp);
4287 		VN_RELE(vp);
4288 		if (fp) {
4289 			rfs4_clear_dont_grant(fp);
4290 			rfs4_file_rele(fp);
4291 		}
4292 		goto out;
4293 	}
4294 	NFS4_SET_FATTR4_CHANGE(resp->cinfo.before, bdva.va_ctime)
4295 
4296 	/* Actually do the REMOVE operation */
4297 	if (vp->v_type == VDIR) {
4298 		/*
4299 		 * Can't remove a directory that has a mounted-on filesystem.
4300 		 */
4301 		if (vn_ismntpt(vp)) {
4302 			error = EACCES;
4303 		} else {
4304 			/*
4305 			 * System V defines rmdir to return EEXIST,
4306 			 * not ENOTEMPTY, if the directory is not
4307 			 * empty.  A System V NFS server needs to map
4308 			 * NFS4ERR_EXIST to NFS4ERR_NOTEMPTY to
4309 			 * transmit over the wire.
4310 			 */
4311 			if ((error = VOP_RMDIR(dvp, name, rootdir, cs->cr,
4312 			    NULL, 0)) == EEXIST)
4313 				error = ENOTEMPTY;
4314 		}
4315 
4316 		if (in_crit)
4317 			nbl_end_crit(vp);
4318 		VN_RELE(vp);
4319 	} else {
4320 		if (!in_crit)
4321 			VN_RELE(vp);
4322 		if ((error = VOP_REMOVE(dvp, name, cs->cr, NULL, 0)) == 0 &&
4323 		    fp != NULL) {
4324 			struct vattr va;
4325 			vnode_t *tvp;
4326 
4327 			rfs4_dbe_lock(fp->rf_dbe);
4328 			tvp = fp->rf_vp;
4329 			if (tvp)
4330 				VN_HOLD(tvp);
4331 			rfs4_dbe_unlock(fp->rf_dbe);
4332 
4333 			if (tvp) {
4334 				/*
4335 				 * This is va_seq safe because we are not
4336 				 * manipulating dvp.
4337 				 */
4338 				va.va_mask = AT_NLINK;
4339 				if (!VOP_GETATTR(tvp, &va, 0, cs->cr, NULL) &&
4340 				    va.va_nlink == 0) {
4341 					/* Remove state on file remove */
4342 					if (in_crit) {
4343 						nbl_end_crit(vp);
4344 						VN_RELE(vp);
4345 						in_crit = 0;
4346 					}
4347 					rfs4_close_all_state(fp);
4348 				}
4349 				VN_RELE(tvp);
4350 			}
4351 		}
4352 
4353 		if (in_crit) {
4354 			nbl_end_crit(vp);
4355 			VN_RELE(vp);
4356 		}
4357 	}
4358 
4359 	if (fp) {
4360 		rfs4_clear_dont_grant(fp);
4361 		rfs4_file_rele(fp);
4362 	}
4363 	if (nm != name)
4364 		kmem_free(name, MAXPATHLEN + 1);
4365 	kmem_free(nm, len);
4366 
4367 	if (error) {
4368 		*cs->statusp = resp->status = puterrno4(error);
4369 		goto out;
4370 	}
4371 
4372 	/*
4373 	 * Get the initial "after" sequence number, if it fails, set to zero
4374 	 */
4375 	idva.va_mask = AT_SEQ;
4376 	if (VOP_GETATTR(dvp, &idva, 0, cs->cr, NULL))
4377 		idva.va_seq = 0;
4378 
4379 	/*
4380 	 * Force modified data and metadata out to stable storage.
4381 	 */
4382 	(void) VOP_FSYNC(dvp, 0, cs->cr, NULL);
4383 
4384 	/*
4385 	 * Get "after" change value, if it fails, simply return the
4386 	 * before value.
4387 	 */
4388 	adva.va_mask = AT_CTIME|AT_SEQ;
4389 	if (VOP_GETATTR(dvp, &adva, 0, cs->cr, NULL)) {
4390 		adva.va_ctime = bdva.va_ctime;
4391 		adva.va_seq = 0;
4392 	}
4393 
4394 	NFS4_SET_FATTR4_CHANGE(resp->cinfo.after, adva.va_ctime)
4395 
4396 	/*
4397 	 * The cinfo.atomic = TRUE only if we have
4398 	 * non-zero va_seq's, and it has incremented by exactly one
4399 	 * during the VOP_REMOVE/RMDIR and it didn't change during
4400 	 * the VOP_FSYNC.
4401 	 */
4402 	if (bdva.va_seq && idva.va_seq && adva.va_seq &&
4403 	    idva.va_seq == (bdva.va_seq + 1) && idva.va_seq == adva.va_seq)
4404 		resp->cinfo.atomic = TRUE;
4405 	else
4406 		resp->cinfo.atomic = FALSE;
4407 
4408 	*cs->statusp = resp->status = NFS4_OK;
4409 
4410 out:
4411 	DTRACE_NFSV4_2(op__remove__done, struct compound_state *, cs,
4412 	    REMOVE4res *, resp);
4413 }
4414 
4415 /*
4416  * rename: args: SAVED_FH: from directory, CURRENT_FH: target directory,
4417  *		oldname and newname.
4418  *	res: status. If success - CURRENT_FH unchanged, return change_info
4419  *		for both from and target directories.
4420  */
4421 /* ARGSUSED */
4422 static void
4423 rfs4_op_rename(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
4424     struct compound_state *cs)
4425 {
4426 	RENAME4args *args = &argop->nfs_argop4_u.oprename;
4427 	RENAME4res *resp = &resop->nfs_resop4_u.oprename;
4428 	int error;
4429 	vnode_t *odvp;
4430 	vnode_t *ndvp;
4431 	vnode_t *srcvp, *targvp;
4432 	struct vattr obdva, oidva, oadva;
4433 	struct vattr nbdva, nidva, nadva;
4434 	char *onm, *nnm;
4435 	uint_t olen, nlen;
4436 	rfs4_file_t *fp, *sfp;
4437 	int in_crit_src, in_crit_targ;
4438 	int fp_rele_grant_hold, sfp_rele_grant_hold;
4439 	bslabel_t *clabel;
4440 	struct sockaddr *ca;
4441 	char *converted_onm = NULL;
4442 	char *converted_nnm = NULL;
4443 	nfsstat4 status;
4444 
4445 	DTRACE_NFSV4_2(op__rename__start, struct compound_state *, cs,
4446 	    RENAME4args *, args);
4447 
4448 	fp = sfp = NULL;
4449 	srcvp = targvp = NULL;
4450 	in_crit_src = in_crit_targ = 0;
4451 	fp_rele_grant_hold = sfp_rele_grant_hold = 0;
4452 
4453 	/* CURRENT_FH: target directory */
4454 	ndvp = cs->vp;
4455 	if (ndvp == NULL) {
4456 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
4457 		goto out;
4458 	}
4459 
4460 	/* SAVED_FH: from directory */
4461 	odvp = cs->saved_vp;
4462 	if (odvp == NULL) {
4463 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
4464 		goto out;
4465 	}
4466 
4467 	if (cs->access == CS_ACCESS_DENIED) {
4468 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
4469 		goto out;
4470 	}
4471 
4472 	/*
4473 	 * If there is an unshared filesystem mounted on this vnode,
4474 	 * do not allow to rename objects in this directory.
4475 	 */
4476 	if (vn_ismntpt(odvp)) {
4477 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
4478 		goto out;
4479 	}
4480 
4481 	/*
4482 	 * If there is an unshared filesystem mounted on this vnode,
4483 	 * do not allow to rename to this directory.
4484 	 */
4485 	if (vn_ismntpt(ndvp)) {
4486 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
4487 		goto out;
4488 	}
4489 
4490 	if (odvp->v_type != VDIR || ndvp->v_type != VDIR) {
4491 		*cs->statusp = resp->status = NFS4ERR_NOTDIR;
4492 		goto out;
4493 	}
4494 
4495 	if (cs->saved_exi != cs->exi) {
4496 		*cs->statusp = resp->status = NFS4ERR_XDEV;
4497 		goto out;
4498 	}
4499 
4500 	status = utf8_dir_verify(&args->oldname);
4501 	if (status != NFS4_OK) {
4502 		*cs->statusp = resp->status = status;
4503 		goto out;
4504 	}
4505 
4506 	status = utf8_dir_verify(&args->newname);
4507 	if (status != NFS4_OK) {
4508 		*cs->statusp = resp->status = status;
4509 		goto out;
4510 	}
4511 
4512 	onm = utf8_to_fn(&args->oldname, &olen, NULL);
4513 	if (onm == NULL) {
4514 		*cs->statusp = resp->status = NFS4ERR_INVAL;
4515 		goto out;
4516 	}
4517 	ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
4518 	nlen = MAXPATHLEN + 1;
4519 	converted_onm = nfscmd_convname(ca, cs->exi, onm, NFSCMD_CONV_INBOUND,
4520 	    nlen);
4521 
4522 	if (converted_onm == NULL) {
4523 		*cs->statusp = resp->status = NFS4ERR_INVAL;
4524 		kmem_free(onm, olen);
4525 		goto out;
4526 	}
4527 
4528 	nnm = utf8_to_fn(&args->newname, &nlen, NULL);
4529 	if (nnm == NULL) {
4530 		*cs->statusp = resp->status = NFS4ERR_INVAL;
4531 		if (onm != converted_onm)
4532 			kmem_free(converted_onm, MAXPATHLEN + 1);
4533 		kmem_free(onm, olen);
4534 		goto out;
4535 	}
4536 	converted_nnm = nfscmd_convname(ca, cs->exi, nnm, NFSCMD_CONV_INBOUND,
4537 	    MAXPATHLEN  + 1);
4538 
4539 	if (converted_nnm == NULL) {
4540 		*cs->statusp = resp->status = NFS4ERR_INVAL;
4541 		kmem_free(nnm, nlen);
4542 		nnm = NULL;
4543 		if (onm != converted_onm)
4544 			kmem_free(converted_onm, MAXPATHLEN + 1);
4545 		kmem_free(onm, olen);
4546 		goto out;
4547 	}
4548 
4549 
4550 	if (olen > MAXNAMELEN || nlen > MAXNAMELEN) {
4551 		*cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
4552 		kmem_free(onm, olen);
4553 		kmem_free(nnm, nlen);
4554 		goto out;
4555 	}
4556 
4557 
4558 	if (rdonly4(req, cs)) {
4559 		*cs->statusp = resp->status = NFS4ERR_ROFS;
4560 		if (onm != converted_onm)
4561 			kmem_free(converted_onm, MAXPATHLEN + 1);
4562 		kmem_free(onm, olen);
4563 		if (nnm != converted_nnm)
4564 			kmem_free(converted_nnm, MAXPATHLEN + 1);
4565 		kmem_free(nnm, nlen);
4566 		goto out;
4567 	}
4568 
4569 	/* check label of the target dir */
4570 	if (is_system_labeled()) {
4571 		ASSERT(req->rq_label != NULL);
4572 		clabel = req->rq_label;
4573 		DTRACE_PROBE2(tx__rfs4__log__info__oprename__clabel, char *,
4574 		    "got client label from request(1)",
4575 		    struct svc_req *, req);
4576 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
4577 			if (!do_rfs_label_check(clabel, ndvp,
4578 			    EQUALITY_CHECK, cs->exi)) {
4579 				*cs->statusp = resp->status = NFS4ERR_ACCESS;
4580 				goto err_out;
4581 			}
4582 		}
4583 	}
4584 
4585 	/*
4586 	 * Is the source a file and have a delegation?
4587 	 * We don't need to acquire va_seq before these lookups, if
4588 	 * it causes an update, cinfo.before will not match, which will
4589 	 * trigger a cache flush even if atomic is TRUE.
4590 	 */
4591 	if (sfp = rfs4_lookup_and_findfile(odvp, converted_onm, &srcvp,
4592 	    &error, cs->cr)) {
4593 		if (rfs4_check_delegated_byfp(FWRITE, sfp, TRUE, TRUE, TRUE,
4594 		    NULL)) {
4595 			*cs->statusp = resp->status = NFS4ERR_DELAY;
4596 			goto err_out;
4597 		}
4598 	}
4599 
4600 	if (srcvp == NULL) {
4601 		*cs->statusp = resp->status = puterrno4(error);
4602 		if (onm != converted_onm)
4603 			kmem_free(converted_onm, MAXPATHLEN + 1);
4604 		kmem_free(onm, olen);
4605 		if (nnm != converted_nnm)
4606 			kmem_free(converted_nnm, MAXPATHLEN + 1);
4607 		kmem_free(nnm, nlen);
4608 		goto out;
4609 	}
4610 
4611 	sfp_rele_grant_hold = 1;
4612 
4613 	/* Does the destination exist and a file and have a delegation? */
4614 	if (fp = rfs4_lookup_and_findfile(ndvp, converted_nnm, &targvp,
4615 	    NULL, cs->cr)) {
4616 		if (rfs4_check_delegated_byfp(FWRITE, fp, TRUE, TRUE, TRUE,
4617 		    NULL)) {
4618 			*cs->statusp = resp->status = NFS4ERR_DELAY;
4619 			goto err_out;
4620 		}
4621 	}
4622 	fp_rele_grant_hold = 1;
4623 
4624 
4625 	/* Check for NBMAND lock on both source and target */
4626 	if (nbl_need_check(srcvp)) {
4627 		nbl_start_crit(srcvp, RW_READER);
4628 		in_crit_src = 1;
4629 		if (nbl_conflict(srcvp, NBL_RENAME, 0, 0, 0, NULL)) {
4630 			*cs->statusp = resp->status = NFS4ERR_FILE_OPEN;
4631 			goto err_out;
4632 		}
4633 	}
4634 
4635 	if (targvp && nbl_need_check(targvp)) {
4636 		nbl_start_crit(targvp, RW_READER);
4637 		in_crit_targ = 1;
4638 		if (nbl_conflict(targvp, NBL_REMOVE, 0, 0, 0, NULL)) {
4639 			*cs->statusp = resp->status = NFS4ERR_FILE_OPEN;
4640 			goto err_out;
4641 		}
4642 	}
4643 
4644 	/* Get source "before" change value */
4645 	obdva.va_mask = AT_CTIME|AT_SEQ;
4646 	error = VOP_GETATTR(odvp, &obdva, 0, cs->cr, NULL);
4647 	if (!error) {
4648 		nbdva.va_mask = AT_CTIME|AT_SEQ;
4649 		error = VOP_GETATTR(ndvp, &nbdva, 0, cs->cr, NULL);
4650 	}
4651 	if (error) {
4652 		*cs->statusp = resp->status = puterrno4(error);
4653 		goto err_out;
4654 	}
4655 
4656 	NFS4_SET_FATTR4_CHANGE(resp->source_cinfo.before, obdva.va_ctime)
4657 	NFS4_SET_FATTR4_CHANGE(resp->target_cinfo.before, nbdva.va_ctime)
4658 
4659 	if ((error = VOP_RENAME(odvp, converted_onm, ndvp, converted_nnm,
4660 	    cs->cr, NULL, 0)) == 0 && fp != NULL) {
4661 		struct vattr va;
4662 		vnode_t *tvp;
4663 
4664 		rfs4_dbe_lock(fp->rf_dbe);
4665 		tvp = fp->rf_vp;
4666 		if (tvp)
4667 			VN_HOLD(tvp);
4668 		rfs4_dbe_unlock(fp->rf_dbe);
4669 
4670 		if (tvp) {
4671 			va.va_mask = AT_NLINK;
4672 			if (!VOP_GETATTR(tvp, &va, 0, cs->cr, NULL) &&
4673 			    va.va_nlink == 0) {
4674 				/* The file is gone and so should the state */
4675 				if (in_crit_targ) {
4676 					nbl_end_crit(targvp);
4677 					in_crit_targ = 0;
4678 				}
4679 				rfs4_close_all_state(fp);
4680 			}
4681 			VN_RELE(tvp);
4682 		}
4683 	}
4684 	if (error == 0)
4685 		vn_renamepath(ndvp, srcvp, nnm, nlen - 1);
4686 
4687 	if (in_crit_src)
4688 		nbl_end_crit(srcvp);
4689 	if (srcvp)
4690 		VN_RELE(srcvp);
4691 	if (in_crit_targ)
4692 		nbl_end_crit(targvp);
4693 	if (targvp)
4694 		VN_RELE(targvp);
4695 
4696 	if (sfp) {
4697 		rfs4_clear_dont_grant(sfp);
4698 		rfs4_file_rele(sfp);
4699 	}
4700 	if (fp) {
4701 		rfs4_clear_dont_grant(fp);
4702 		rfs4_file_rele(fp);
4703 	}
4704 
4705 	if (converted_onm != onm)
4706 		kmem_free(converted_onm, MAXPATHLEN + 1);
4707 	kmem_free(onm, olen);
4708 	if (converted_nnm != nnm)
4709 		kmem_free(converted_nnm, MAXPATHLEN + 1);
4710 	kmem_free(nnm, nlen);
4711 
4712 	/*
4713 	 * Get the initial "after" sequence number, if it fails, set to zero
4714 	 */
4715 	oidva.va_mask = AT_SEQ;
4716 	if (VOP_GETATTR(odvp, &oidva, 0, cs->cr, NULL))
4717 		oidva.va_seq = 0;
4718 
4719 	nidva.va_mask = AT_SEQ;
4720 	if (VOP_GETATTR(ndvp, &nidva, 0, cs->cr, NULL))
4721 		nidva.va_seq = 0;
4722 
4723 	/*
4724 	 * Force modified data and metadata out to stable storage.
4725 	 */
4726 	(void) VOP_FSYNC(odvp, 0, cs->cr, NULL);
4727 	(void) VOP_FSYNC(ndvp, 0, cs->cr, NULL);
4728 
4729 	if (error) {
4730 		*cs->statusp = resp->status = puterrno4(error);
4731 		goto out;
4732 	}
4733 
4734 	/*
4735 	 * Get "after" change values, if it fails, simply return the
4736 	 * before value.
4737 	 */
4738 	oadva.va_mask = AT_CTIME|AT_SEQ;
4739 	if (VOP_GETATTR(odvp, &oadva, 0, cs->cr, NULL)) {
4740 		oadva.va_ctime = obdva.va_ctime;
4741 		oadva.va_seq = 0;
4742 	}
4743 
4744 	nadva.va_mask = AT_CTIME|AT_SEQ;
4745 	if (VOP_GETATTR(odvp, &nadva, 0, cs->cr, NULL)) {
4746 		nadva.va_ctime = nbdva.va_ctime;
4747 		nadva.va_seq = 0;
4748 	}
4749 
4750 	NFS4_SET_FATTR4_CHANGE(resp->source_cinfo.after, oadva.va_ctime)
4751 	NFS4_SET_FATTR4_CHANGE(resp->target_cinfo.after, nadva.va_ctime)
4752 
4753 	/*
4754 	 * The cinfo.atomic = TRUE only if we have
4755 	 * non-zero va_seq's, and it has incremented by exactly one
4756 	 * during the VOP_RENAME and it didn't change during the VOP_FSYNC.
4757 	 */
4758 	if (obdva.va_seq && oidva.va_seq && oadva.va_seq &&
4759 	    oidva.va_seq == (obdva.va_seq + 1) && oidva.va_seq == oadva.va_seq)
4760 		resp->source_cinfo.atomic = TRUE;
4761 	else
4762 		resp->source_cinfo.atomic = FALSE;
4763 
4764 	if (nbdva.va_seq && nidva.va_seq && nadva.va_seq &&
4765 	    nidva.va_seq == (nbdva.va_seq + 1) && nidva.va_seq == nadva.va_seq)
4766 		resp->target_cinfo.atomic = TRUE;
4767 	else
4768 		resp->target_cinfo.atomic = FALSE;
4769 
4770 #ifdef	VOLATILE_FH_TEST
4771 	{
4772 	extern void add_volrnm_fh(struct exportinfo *, vnode_t *);
4773 
4774 	/*
4775 	 * Add the renamed file handle to the volatile rename list
4776 	 */
4777 	if (cs->exi->exi_export.ex_flags & EX_VOLRNM) {
4778 		/* file handles may expire on rename */
4779 		vnode_t *vp;
4780 
4781 		nnm = utf8_to_fn(&args->newname, &nlen, NULL);
4782 		/*
4783 		 * Already know that nnm will be a valid string
4784 		 */
4785 		error = VOP_LOOKUP(ndvp, nnm, &vp, NULL, 0, NULL, cs->cr,
4786 		    NULL, NULL, NULL);
4787 		kmem_free(nnm, nlen);
4788 		if (!error) {
4789 			add_volrnm_fh(cs->exi, vp);
4790 			VN_RELE(vp);
4791 		}
4792 	}
4793 	}
4794 #endif	/* VOLATILE_FH_TEST */
4795 
4796 	*cs->statusp = resp->status = NFS4_OK;
4797 out:
4798 	DTRACE_NFSV4_2(op__rename__done, struct compound_state *, cs,
4799 	    RENAME4res *, resp);
4800 	return;
4801 
4802 err_out:
4803 	if (onm != converted_onm)
4804 		kmem_free(converted_onm, MAXPATHLEN + 1);
4805 	if (onm != NULL)
4806 		kmem_free(onm, olen);
4807 	if (nnm != converted_nnm)
4808 		kmem_free(converted_nnm, MAXPATHLEN + 1);
4809 	if (nnm != NULL)
4810 		kmem_free(nnm, nlen);
4811 
4812 	if (in_crit_src) nbl_end_crit(srcvp);
4813 	if (in_crit_targ) nbl_end_crit(targvp);
4814 	if (targvp) VN_RELE(targvp);
4815 	if (srcvp) VN_RELE(srcvp);
4816 	if (sfp) {
4817 		if (sfp_rele_grant_hold) rfs4_clear_dont_grant(sfp);
4818 		rfs4_file_rele(sfp);
4819 	}
4820 	if (fp) {
4821 		if (fp_rele_grant_hold) rfs4_clear_dont_grant(fp);
4822 		rfs4_file_rele(fp);
4823 	}
4824 
4825 	DTRACE_NFSV4_2(op__rename__done, struct compound_state *, cs,
4826 	    RENAME4res *, resp);
4827 }
4828 
4829 /* ARGSUSED */
4830 static void
4831 rfs4_op_renew(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
4832     struct compound_state *cs)
4833 {
4834 	RENEW4args *args = &argop->nfs_argop4_u.oprenew;
4835 	RENEW4res *resp = &resop->nfs_resop4_u.oprenew;
4836 	rfs4_client_t *cp;
4837 
4838 	DTRACE_NFSV4_2(op__renew__start, struct compound_state *, cs,
4839 	    RENEW4args *, args);
4840 
4841 	if ((cp = rfs4_findclient_by_id(args->clientid, FALSE)) == NULL) {
4842 		*cs->statusp = resp->status =
4843 		    rfs4_check_clientid(&args->clientid, 0);
4844 		goto out;
4845 	}
4846 
4847 	if (rfs4_lease_expired(cp)) {
4848 		rfs4_client_rele(cp);
4849 		*cs->statusp = resp->status = NFS4ERR_EXPIRED;
4850 		goto out;
4851 	}
4852 
4853 	rfs4_update_lease(cp);
4854 
4855 	mutex_enter(cp->rc_cbinfo.cb_lock);
4856 	if (cp->rc_cbinfo.cb_notified_of_cb_path_down == FALSE) {
4857 		cp->rc_cbinfo.cb_notified_of_cb_path_down = TRUE;
4858 		*cs->statusp = resp->status = NFS4ERR_CB_PATH_DOWN;
4859 	} else {
4860 		*cs->statusp = resp->status = NFS4_OK;
4861 	}
4862 	mutex_exit(cp->rc_cbinfo.cb_lock);
4863 
4864 	rfs4_client_rele(cp);
4865 
4866 out:
4867 	DTRACE_NFSV4_2(op__renew__done, struct compound_state *, cs,
4868 	    RENEW4res *, resp);
4869 }
4870 
4871 /* ARGSUSED */
4872 static void
4873 rfs4_op_restorefh(nfs_argop4 *args, nfs_resop4 *resop, struct svc_req *req,
4874     struct compound_state *cs)
4875 {
4876 	RESTOREFH4res *resp = &resop->nfs_resop4_u.oprestorefh;
4877 
4878 	DTRACE_NFSV4_1(op__restorefh__start, struct compound_state *, cs);
4879 
4880 	/* No need to check cs->access - we are not accessing any object */
4881 	if ((cs->saved_vp == NULL) || (cs->saved_fh.nfs_fh4_val == NULL)) {
4882 		*cs->statusp = resp->status = NFS4ERR_RESTOREFH;
4883 		goto out;
4884 	}
4885 	if (cs->vp != NULL) {
4886 		VN_RELE(cs->vp);
4887 	}
4888 	cs->vp = cs->saved_vp;
4889 	cs->saved_vp = NULL;
4890 	if (cs->exi)
4891 		exi_rele(cs->exi);
4892 	cs->exi = cs->saved_exi;
4893 	if (cs->exi)
4894 		exi_hold(cs->exi);
4895 	nfs_fh4_copy(&cs->saved_fh, &cs->fh);
4896 	*cs->statusp = resp->status = NFS4_OK;
4897 	cs->deleg = FALSE;
4898 
4899 out:
4900 	DTRACE_NFSV4_2(op__restorefh__done, struct compound_state *, cs,
4901 	    RESTOREFH4res *, resp);
4902 }
4903 
4904 /* ARGSUSED */
4905 static void
4906 rfs4_op_savefh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
4907     struct compound_state *cs)
4908 {
4909 	SAVEFH4res *resp = &resop->nfs_resop4_u.opsavefh;
4910 
4911 	DTRACE_NFSV4_1(op__savefh__start, struct compound_state *, cs);
4912 
4913 	/* No need to check cs->access - we are not accessing any object */
4914 	if (cs->vp == NULL) {
4915 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
4916 		goto out;
4917 	}
4918 	if (cs->saved_vp != NULL) {
4919 		VN_RELE(cs->saved_vp);
4920 	}
4921 	cs->saved_vp = cs->vp;
4922 	VN_HOLD(cs->saved_vp);
4923 	if (cs->saved_exi)
4924 		exi_rele(cs->saved_exi);
4925 	cs->saved_exi = cs->exi;
4926 	if (cs->saved_exi)
4927 		exi_hold(cs->saved_exi);
4928 	/*
4929 	 * since SAVEFH is fairly rare, don't alloc space for its fh
4930 	 * unless necessary.
4931 	 */
4932 	if (cs->saved_fh.nfs_fh4_val == NULL) {
4933 		cs->saved_fh.nfs_fh4_val = kmem_alloc(NFS4_FHSIZE, KM_SLEEP);
4934 	}
4935 	nfs_fh4_copy(&cs->fh, &cs->saved_fh);
4936 	*cs->statusp = resp->status = NFS4_OK;
4937 
4938 out:
4939 	DTRACE_NFSV4_2(op__savefh__done, struct compound_state *, cs,
4940 	    SAVEFH4res *, resp);
4941 }
4942 
4943 /*
4944  * rfs4_verify_attr is called when nfsv4 Setattr failed, but we wish to
4945  * return the bitmap of attrs that were set successfully. It is also
4946  * called by Verify/Nverify to test the vattr/vfsstat attrs. It should
4947  * always be called only after rfs4_do_set_attrs().
4948  *
4949  * Verify that the attributes are same as the expected ones. sargp->vap
4950  * and sargp->sbp contain the input attributes as translated from fattr4.
4951  *
4952  * This function verifies only the attrs that correspond to a vattr or
4953  * vfsstat struct. That is because of the extra step needed to get the
4954  * corresponding system structs. Other attributes have already been set or
4955  * verified by do_rfs4_set_attrs.
4956  *
4957  * Return 0 if all attrs match, -1 if some don't, error if error processing.
4958  */
4959 static int
4960 rfs4_verify_attr(struct nfs4_svgetit_arg *sargp,
4961     bitmap4 *resp, struct nfs4_ntov_table *ntovp)
4962 {
4963 	int error, ret_error = 0;
4964 	int i, k;
4965 	uint_t sva_mask = sargp->vap->va_mask;
4966 	uint_t vbit;
4967 	union nfs4_attr_u *na;
4968 	uint8_t *amap;
4969 	bool_t getsb = ntovp->vfsstat;
4970 
4971 	if (sva_mask != 0) {
4972 		/*
4973 		 * Okay to overwrite sargp->vap because we verify based
4974 		 * on the incoming values.
4975 		 */
4976 		ret_error = VOP_GETATTR(sargp->cs->vp, sargp->vap, 0,
4977 		    sargp->cs->cr, NULL);
4978 		if (ret_error) {
4979 			if (resp == NULL)
4980 				return (ret_error);
4981 			/*
4982 			 * Must return bitmap of successful attrs
4983 			 */
4984 			sva_mask = 0;	/* to prevent checking vap later */
4985 		} else {
4986 			/*
4987 			 * Some file systems clobber va_mask. it is probably
4988 			 * wrong of them to do so, nonethless we practice
4989 			 * defensive coding.
4990 			 * See bug id 4276830.
4991 			 */
4992 			sargp->vap->va_mask = sva_mask;
4993 		}
4994 	}
4995 
4996 	if (getsb) {
4997 		/*
4998 		 * Now get the superblock and loop on the bitmap, as there is
4999 		 * no simple way of translating from superblock to bitmap4.
5000 		 */
5001 		ret_error = VFS_STATVFS(sargp->cs->vp->v_vfsp, sargp->sbp);
5002 		if (ret_error) {
5003 			if (resp == NULL)
5004 				goto errout;
5005 			getsb = FALSE;
5006 		}
5007 	}
5008 
5009 	/*
5010 	 * Now loop and verify each attribute which getattr returned
5011 	 * whether it's the same as the input.
5012 	 */
5013 	if (resp == NULL && !getsb && (sva_mask == 0))
5014 		goto errout;
5015 
5016 	na = ntovp->na;
5017 	amap = ntovp->amap;
5018 	k = 0;
5019 	for (i = 0; i < ntovp->attrcnt; i++, na++, amap++) {
5020 		k = *amap;
5021 		ASSERT(nfs4_ntov_map[k].nval == k);
5022 		vbit = nfs4_ntov_map[k].vbit;
5023 
5024 		/*
5025 		 * If vattr attribute but VOP_GETATTR failed, or it's
5026 		 * superblock attribute but VFS_STATVFS failed, skip
5027 		 */
5028 		if (vbit) {
5029 			if ((vbit & sva_mask) == 0)
5030 				continue;
5031 		} else if (!(getsb && nfs4_ntov_map[k].vfsstat)) {
5032 			continue;
5033 		}
5034 		error = (*nfs4_ntov_map[k].sv_getit)(NFS4ATTR_VERIT, sargp, na);
5035 		if (resp != NULL) {
5036 			if (error)
5037 				ret_error = -1;	/* not all match */
5038 			else	/* update response bitmap */
5039 				*resp |= nfs4_ntov_map[k].fbit;
5040 			continue;
5041 		}
5042 		if (error) {
5043 			ret_error = -1;	/* not all match */
5044 			break;
5045 		}
5046 	}
5047 errout:
5048 	return (ret_error);
5049 }
5050 
5051 /*
5052  * Decode the attribute to be set/verified. If the attr requires a sys op
5053  * (VOP_GETATTR, VFS_VFSSTAT), and the request is to verify, then don't
5054  * call the sv_getit function for it, because the sys op hasn't yet been done.
5055  * Return 0 for success, error code if failed.
5056  *
5057  * Note: the decoded arg is not freed here but in nfs4_ntov_table_free.
5058  */
5059 static int
5060 decode_fattr4_attr(nfs4_attr_cmd_t cmd, struct nfs4_svgetit_arg *sargp,
5061     int k, XDR *xdrp, bitmap4 *resp_bval, union nfs4_attr_u *nap)
5062 {
5063 	int error = 0;
5064 	bool_t set_later;
5065 
5066 	sargp->vap->va_mask |= nfs4_ntov_map[k].vbit;
5067 
5068 	if ((*nfs4_ntov_map[k].xfunc)(xdrp, nap)) {
5069 		set_later = nfs4_ntov_map[k].vbit || nfs4_ntov_map[k].vfsstat;
5070 		/*
5071 		 * don't verify yet if a vattr or sb dependent attr,
5072 		 * because we don't have their sys values yet.
5073 		 * Will be done later.
5074 		 */
5075 		if (! (set_later && (cmd == NFS4ATTR_VERIT))) {
5076 			/*
5077 			 * ACLs are a special case, since setting the MODE
5078 			 * conflicts with setting the ACL.  We delay setting
5079 			 * the ACL until all other attributes have been set.
5080 			 * The ACL gets set in do_rfs4_op_setattr().
5081 			 */
5082 			if (nfs4_ntov_map[k].fbit != FATTR4_ACL_MASK) {
5083 				error = (*nfs4_ntov_map[k].sv_getit)(cmd,
5084 				    sargp, nap);
5085 				if (error) {
5086 					xdr_free(nfs4_ntov_map[k].xfunc,
5087 					    (caddr_t)nap);
5088 				}
5089 			}
5090 		}
5091 	} else {
5092 #ifdef  DEBUG
5093 		cmn_err(CE_NOTE, "decode_fattr4_attr: error "
5094 		    "decoding attribute %d\n", k);
5095 #endif
5096 		error = EINVAL;
5097 	}
5098 	if (!error && resp_bval && !set_later) {
5099 		*resp_bval |= nfs4_ntov_map[k].fbit;
5100 	}
5101 
5102 	return (error);
5103 }
5104 
5105 /*
5106  * Set vattr based on incoming fattr4 attrs - used by setattr.
5107  * Set response mask. Ignore any values that are not writable vattr attrs.
5108  */
5109 static nfsstat4
5110 do_rfs4_set_attrs(bitmap4 *resp, fattr4 *fattrp, struct compound_state *cs,
5111     struct nfs4_svgetit_arg *sargp, struct nfs4_ntov_table *ntovp,
5112     nfs4_attr_cmd_t cmd)
5113 {
5114 	int error = 0;
5115 	int i;
5116 	char *attrs = fattrp->attrlist4;
5117 	uint32_t attrslen = fattrp->attrlist4_len;
5118 	XDR xdr;
5119 	nfsstat4 status = NFS4_OK;
5120 	vnode_t *vp = cs->vp;
5121 	union nfs4_attr_u *na;
5122 	uint8_t *amap;
5123 
5124 #ifndef lint
5125 	/*
5126 	 * Make sure that maximum attribute number can be expressed as an
5127 	 * 8 bit quantity.
5128 	 */
5129 	ASSERT(NFS4_MAXNUM_ATTRS <= (UINT8_MAX + 1));
5130 #endif
5131 
5132 	if (vp == NULL) {
5133 		if (resp)
5134 			*resp = 0;
5135 		return (NFS4ERR_NOFILEHANDLE);
5136 	}
5137 	if (cs->access == CS_ACCESS_DENIED) {
5138 		if (resp)
5139 			*resp = 0;
5140 		return (NFS4ERR_ACCESS);
5141 	}
5142 
5143 	sargp->op = cmd;
5144 	sargp->cs = cs;
5145 	sargp->flag = 0;	/* may be set later */
5146 	sargp->vap->va_mask = 0;
5147 	sargp->rdattr_error = NFS4_OK;
5148 	sargp->rdattr_error_req = FALSE;
5149 	/* sargp->sbp is set by the caller */
5150 
5151 	xdrmem_create(&xdr, attrs, attrslen, XDR_DECODE);
5152 
5153 	na = ntovp->na;
5154 	amap = ntovp->amap;
5155 
5156 	/*
5157 	 * The following loop iterates on the nfs4_ntov_map checking
5158 	 * if the fbit is set in the requested bitmap.
5159 	 * If set then we process the arguments using the
5160 	 * rfs4_fattr4 conversion functions to populate the setattr
5161 	 * vattr and va_mask. Any settable attrs that are not using vattr
5162 	 * will be set in this loop.
5163 	 */
5164 	for (i = 0; i < nfs4_ntov_map_size; i++) {
5165 		if (!(fattrp->attrmask & nfs4_ntov_map[i].fbit)) {
5166 			continue;
5167 		}
5168 		/*
5169 		 * If setattr, must be a writable attr.
5170 		 * If verify/nverify, must be a readable attr.
5171 		 */
5172 		if ((error = (*nfs4_ntov_map[i].sv_getit)(
5173 		    NFS4ATTR_SUPPORTED, sargp, NULL)) != 0) {
5174 			/*
5175 			 * Client tries to set/verify an
5176 			 * unsupported attribute, tries to set
5177 			 * a read only attr or verify a write
5178 			 * only one - error!
5179 			 */
5180 			break;
5181 		}
5182 		/*
5183 		 * Decode the attribute to set/verify
5184 		 */
5185 		error = decode_fattr4_attr(cmd, sargp, nfs4_ntov_map[i].nval,
5186 		    &xdr, resp ? resp : NULL, na);
5187 		if (error)
5188 			break;
5189 		*amap++ = (uint8_t)nfs4_ntov_map[i].nval;
5190 		na++;
5191 		(ntovp->attrcnt)++;
5192 		if (nfs4_ntov_map[i].vfsstat)
5193 			ntovp->vfsstat = TRUE;
5194 	}
5195 
5196 	if (error != 0)
5197 		status = (error == ENOTSUP ? NFS4ERR_ATTRNOTSUPP :
5198 		    puterrno4(error));
5199 	/* xdrmem_destroy(&xdrs); */	/* NO-OP */
5200 	return (status);
5201 }
5202 
5203 static nfsstat4
5204 do_rfs4_op_setattr(bitmap4 *resp, fattr4 *fattrp, struct compound_state *cs,
5205     stateid4 *stateid)
5206 {
5207 	int error = 0;
5208 	struct nfs4_svgetit_arg sarg;
5209 	bool_t trunc;
5210 
5211 	nfsstat4 status = NFS4_OK;
5212 	cred_t *cr = cs->cr;
5213 	vnode_t *vp = cs->vp;
5214 	struct nfs4_ntov_table ntov;
5215 	struct statvfs64 sb;
5216 	struct vattr bva;
5217 	struct flock64 bf;
5218 	int in_crit = 0;
5219 	uint_t saved_mask = 0;
5220 	caller_context_t ct;
5221 
5222 	*resp = 0;
5223 	sarg.sbp = &sb;
5224 	sarg.is_referral = B_FALSE;
5225 	nfs4_ntov_table_init(&ntov);
5226 	status = do_rfs4_set_attrs(resp, fattrp, cs, &sarg, &ntov,
5227 	    NFS4ATTR_SETIT);
5228 	if (status != NFS4_OK) {
5229 		/*
5230 		 * failed set attrs
5231 		 */
5232 		goto done;
5233 	}
5234 	if ((sarg.vap->va_mask == 0) &&
5235 	    (! (fattrp->attrmask & FATTR4_ACL_MASK))) {
5236 		/*
5237 		 * no further work to be done
5238 		 */
5239 		goto done;
5240 	}
5241 
5242 	/*
5243 	 * If we got a request to set the ACL and the MODE, only
5244 	 * allow changing VSUID, VSGID, and VSVTX.  Attempting
5245 	 * to change any other bits, along with setting an ACL,
5246 	 * gives NFS4ERR_INVAL.
5247 	 */
5248 	if ((fattrp->attrmask & FATTR4_ACL_MASK) &&
5249 	    (fattrp->attrmask & FATTR4_MODE_MASK)) {
5250 		vattr_t va;
5251 
5252 		va.va_mask = AT_MODE;
5253 		error = VOP_GETATTR(vp, &va, 0, cs->cr, NULL);
5254 		if (error) {
5255 			status = puterrno4(error);
5256 			goto done;
5257 		}
5258 		if ((sarg.vap->va_mode ^ va.va_mode) &
5259 		    ~(VSUID | VSGID | VSVTX)) {
5260 			status = NFS4ERR_INVAL;
5261 			goto done;
5262 		}
5263 	}
5264 
5265 	/* Check stateid only if size has been set */
5266 	if (sarg.vap->va_mask & AT_SIZE) {
5267 		trunc = (sarg.vap->va_size == 0);
5268 		status = rfs4_check_stateid(FWRITE, cs->vp, stateid,
5269 		    trunc, &cs->deleg, sarg.vap->va_mask & AT_SIZE, &ct);
5270 		if (status != NFS4_OK)
5271 			goto done;
5272 	} else {
5273 		ct.cc_sysid = 0;
5274 		ct.cc_pid = 0;
5275 		ct.cc_caller_id = nfs4_srv_caller_id;
5276 		ct.cc_flags = CC_DONTBLOCK;
5277 	}
5278 
5279 	/* XXX start of possible race with delegations */
5280 
5281 	/*
5282 	 * We need to specially handle size changes because it is
5283 	 * possible for the client to create a file with read-only
5284 	 * modes, but with the file opened for writing. If the client
5285 	 * then tries to set the file size, e.g. ftruncate(3C),
5286 	 * fcntl(F_FREESP), the normal access checking done in
5287 	 * VOP_SETATTR would prevent the client from doing it even though
5288 	 * it should be allowed to do so.  To get around this, we do the
5289 	 * access checking for ourselves and use VOP_SPACE which doesn't
5290 	 * do the access checking.
5291 	 * Also the client should not be allowed to change the file
5292 	 * size if there is a conflicting non-blocking mandatory lock in
5293 	 * the region of the change.
5294 	 */
5295 	if (vp->v_type == VREG && (sarg.vap->va_mask & AT_SIZE)) {
5296 		u_offset_t offset;
5297 		ssize_t length;
5298 
5299 		/*
5300 		 * ufs_setattr clears AT_SIZE from vap->va_mask, but
5301 		 * before returning, sarg.vap->va_mask is used to
5302 		 * generate the setattr reply bitmap.  We also clear
5303 		 * AT_SIZE below before calling VOP_SPACE.  For both
5304 		 * of these cases, the va_mask needs to be saved here
5305 		 * and restored after calling VOP_SETATTR.
5306 		 */
5307 		saved_mask = sarg.vap->va_mask;
5308 
5309 		/*
5310 		 * Check any possible conflict due to NBMAND locks.
5311 		 * Get into critical region before VOP_GETATTR, so the
5312 		 * size attribute is valid when checking conflicts.
5313 		 */
5314 		if (nbl_need_check(vp)) {
5315 			nbl_start_crit(vp, RW_READER);
5316 			in_crit = 1;
5317 		}
5318 
5319 		bva.va_mask = AT_UID|AT_SIZE;
5320 		if (error = VOP_GETATTR(vp, &bva, 0, cr, &ct)) {
5321 			status = puterrno4(error);
5322 			goto done;
5323 		}
5324 
5325 		if (in_crit) {
5326 			if (sarg.vap->va_size < bva.va_size) {
5327 				offset = sarg.vap->va_size;
5328 				length = bva.va_size - sarg.vap->va_size;
5329 			} else {
5330 				offset = bva.va_size;
5331 				length = sarg.vap->va_size - bva.va_size;
5332 			}
5333 			if (nbl_conflict(vp, NBL_WRITE, offset, length, 0,
5334 			    &ct)) {
5335 				status = NFS4ERR_LOCKED;
5336 				goto done;
5337 			}
5338 		}
5339 
5340 		if (crgetuid(cr) == bva.va_uid) {
5341 			sarg.vap->va_mask &= ~AT_SIZE;
5342 			bf.l_type = F_WRLCK;
5343 			bf.l_whence = 0;
5344 			bf.l_start = (off64_t)sarg.vap->va_size;
5345 			bf.l_len = 0;
5346 			bf.l_sysid = 0;
5347 			bf.l_pid = 0;
5348 			error = VOP_SPACE(vp, F_FREESP, &bf, FWRITE,
5349 			    (offset_t)sarg.vap->va_size, cr, &ct);
5350 		}
5351 	}
5352 
5353 	if (!error && sarg.vap->va_mask != 0)
5354 		error = VOP_SETATTR(vp, sarg.vap, sarg.flag, cr, &ct);
5355 
5356 	/* restore va_mask -- ufs_setattr clears AT_SIZE */
5357 	if (saved_mask & AT_SIZE)
5358 		sarg.vap->va_mask |= AT_SIZE;
5359 
5360 	/*
5361 	 * If an ACL was being set, it has been delayed until now,
5362 	 * in order to set the mode (via the VOP_SETATTR() above) first.
5363 	 */
5364 	if ((! error) && (fattrp->attrmask & FATTR4_ACL_MASK)) {
5365 		int i;
5366 
5367 		for (i = 0; i < NFS4_MAXNUM_ATTRS; i++)
5368 			if (ntov.amap[i] == FATTR4_ACL)
5369 				break;
5370 		if (i < NFS4_MAXNUM_ATTRS) {
5371 			error = (*nfs4_ntov_map[FATTR4_ACL].sv_getit)(
5372 			    NFS4ATTR_SETIT, &sarg, &ntov.na[i]);
5373 			if (error == 0) {
5374 				*resp |= FATTR4_ACL_MASK;
5375 			} else if (error == ENOTSUP) {
5376 				(void) rfs4_verify_attr(&sarg, resp, &ntov);
5377 				status = NFS4ERR_ATTRNOTSUPP;
5378 				goto done;
5379 			}
5380 		} else {
5381 			NFS4_DEBUG(rfs4_debug,
5382 			    (CE_NOTE, "do_rfs4_op_setattr: "
5383 			    "unable to find ACL in fattr4"));
5384 			error = EINVAL;
5385 		}
5386 	}
5387 
5388 	if (error) {
5389 		/* check if a monitor detected a delegation conflict */
5390 		if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK))
5391 			status = NFS4ERR_DELAY;
5392 		else
5393 			status = puterrno4(error);
5394 
5395 		/*
5396 		 * Set the response bitmap when setattr failed.
5397 		 * If VOP_SETATTR partially succeeded, test by doing a
5398 		 * VOP_GETATTR on the object and comparing the data
5399 		 * to the setattr arguments.
5400 		 */
5401 		(void) rfs4_verify_attr(&sarg, resp, &ntov);
5402 	} else {
5403 		/*
5404 		 * Force modified metadata out to stable storage.
5405 		 */
5406 		(void) VOP_FSYNC(vp, FNODSYNC, cr, &ct);
5407 		/*
5408 		 * Set response bitmap
5409 		 */
5410 		nfs4_vmask_to_nmask_set(sarg.vap->va_mask, resp);
5411 	}
5412 
5413 /* Return early and already have a NFSv4 error */
5414 done:
5415 	/*
5416 	 * Except for nfs4_vmask_to_nmask_set(), vattr --> fattr
5417 	 * conversion sets both readable and writeable NFS4 attrs
5418 	 * for AT_MTIME and AT_ATIME.  The line below masks out
5419 	 * unrequested attrs from the setattr result bitmap.  This
5420 	 * is placed after the done: label to catch the ATTRNOTSUP
5421 	 * case.
5422 	 */
5423 	*resp &= fattrp->attrmask;
5424 
5425 	if (in_crit)
5426 		nbl_end_crit(vp);
5427 
5428 	nfs4_ntov_table_free(&ntov, &sarg);
5429 
5430 	return (status);
5431 }
5432 
5433 /* ARGSUSED */
5434 static void
5435 rfs4_op_setattr(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
5436     struct compound_state *cs)
5437 {
5438 	SETATTR4args *args = &argop->nfs_argop4_u.opsetattr;
5439 	SETATTR4res *resp = &resop->nfs_resop4_u.opsetattr;
5440 	bslabel_t *clabel;
5441 
5442 	DTRACE_NFSV4_2(op__setattr__start, struct compound_state *, cs,
5443 	    SETATTR4args *, args);
5444 
5445 	if (cs->vp == NULL) {
5446 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
5447 		goto out;
5448 	}
5449 
5450 	/*
5451 	 * If there is an unshared filesystem mounted on this vnode,
5452 	 * do not allow to setattr on this vnode.
5453 	 */
5454 	if (vn_ismntpt(cs->vp)) {
5455 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
5456 		goto out;
5457 	}
5458 
5459 	resp->attrsset = 0;
5460 
5461 	if (rdonly4(req, cs)) {
5462 		*cs->statusp = resp->status = NFS4ERR_ROFS;
5463 		goto out;
5464 	}
5465 
5466 	/* check label before setting attributes */
5467 	if (is_system_labeled()) {
5468 		ASSERT(req->rq_label != NULL);
5469 		clabel = req->rq_label;
5470 		DTRACE_PROBE2(tx__rfs4__log__info__opsetattr__clabel, char *,
5471 		    "got client label from request(1)",
5472 		    struct svc_req *, req);
5473 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
5474 			if (!do_rfs_label_check(clabel, cs->vp,
5475 			    EQUALITY_CHECK, cs->exi)) {
5476 				*cs->statusp = resp->status = NFS4ERR_ACCESS;
5477 				goto out;
5478 			}
5479 		}
5480 	}
5481 
5482 	*cs->statusp = resp->status =
5483 	    do_rfs4_op_setattr(&resp->attrsset, &args->obj_attributes, cs,
5484 	    &args->stateid);
5485 
5486 out:
5487 	DTRACE_NFSV4_2(op__setattr__done, struct compound_state *, cs,
5488 	    SETATTR4res *, resp);
5489 }
5490 
5491 /* ARGSUSED */
5492 static void
5493 rfs4_op_verify(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
5494     struct compound_state *cs)
5495 {
5496 	/*
5497 	 * verify and nverify are exactly the same, except that nverify
5498 	 * succeeds when some argument changed, and verify succeeds when
5499 	 * when none changed.
5500 	 */
5501 
5502 	VERIFY4args  *args = &argop->nfs_argop4_u.opverify;
5503 	VERIFY4res *resp = &resop->nfs_resop4_u.opverify;
5504 
5505 	int error;
5506 	struct nfs4_svgetit_arg sarg;
5507 	struct statvfs64 sb;
5508 	struct nfs4_ntov_table ntov;
5509 
5510 	DTRACE_NFSV4_2(op__verify__start, struct compound_state *, cs,
5511 	    VERIFY4args *, args);
5512 
5513 	if (cs->vp == NULL) {
5514 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
5515 		goto out;
5516 	}
5517 
5518 	sarg.sbp = &sb;
5519 	sarg.is_referral = B_FALSE;
5520 	nfs4_ntov_table_init(&ntov);
5521 	resp->status = do_rfs4_set_attrs(NULL, &args->obj_attributes, cs,
5522 	    &sarg, &ntov, NFS4ATTR_VERIT);
5523 	if (resp->status != NFS4_OK) {
5524 		/*
5525 		 * do_rfs4_set_attrs will try to verify systemwide attrs,
5526 		 * so could return -1 for "no match".
5527 		 */
5528 		if (resp->status == -1)
5529 			resp->status = NFS4ERR_NOT_SAME;
5530 		goto done;
5531 	}
5532 	error = rfs4_verify_attr(&sarg, NULL, &ntov);
5533 	switch (error) {
5534 	case 0:
5535 		resp->status = NFS4_OK;
5536 		break;
5537 	case -1:
5538 		resp->status = NFS4ERR_NOT_SAME;
5539 		break;
5540 	default:
5541 		resp->status = puterrno4(error);
5542 		break;
5543 	}
5544 done:
5545 	*cs->statusp = resp->status;
5546 	nfs4_ntov_table_free(&ntov, &sarg);
5547 out:
5548 	DTRACE_NFSV4_2(op__verify__done, struct compound_state *, cs,
5549 	    VERIFY4res *, resp);
5550 }
5551 
5552 /* ARGSUSED */
5553 static void
5554 rfs4_op_nverify(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
5555     struct compound_state *cs)
5556 {
5557 	/*
5558 	 * verify and nverify are exactly the same, except that nverify
5559 	 * succeeds when some argument changed, and verify succeeds when
5560 	 * when none changed.
5561 	 */
5562 
5563 	NVERIFY4args  *args = &argop->nfs_argop4_u.opnverify;
5564 	NVERIFY4res *resp = &resop->nfs_resop4_u.opnverify;
5565 
5566 	int error;
5567 	struct nfs4_svgetit_arg sarg;
5568 	struct statvfs64 sb;
5569 	struct nfs4_ntov_table ntov;
5570 
5571 	DTRACE_NFSV4_2(op__nverify__start, struct compound_state *, cs,
5572 	    NVERIFY4args *, args);
5573 
5574 	if (cs->vp == NULL) {
5575 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
5576 		DTRACE_NFSV4_2(op__nverify__done, struct compound_state *, cs,
5577 		    NVERIFY4res *, resp);
5578 		return;
5579 	}
5580 	sarg.sbp = &sb;
5581 	sarg.is_referral = B_FALSE;
5582 	nfs4_ntov_table_init(&ntov);
5583 	resp->status = do_rfs4_set_attrs(NULL, &args->obj_attributes, cs,
5584 	    &sarg, &ntov, NFS4ATTR_VERIT);
5585 	if (resp->status != NFS4_OK) {
5586 		/*
5587 		 * do_rfs4_set_attrs will try to verify systemwide attrs,
5588 		 * so could return -1 for "no match".
5589 		 */
5590 		if (resp->status == -1)
5591 			resp->status = NFS4_OK;
5592 		goto done;
5593 	}
5594 	error = rfs4_verify_attr(&sarg, NULL, &ntov);
5595 	switch (error) {
5596 	case 0:
5597 		resp->status = NFS4ERR_SAME;
5598 		break;
5599 	case -1:
5600 		resp->status = NFS4_OK;
5601 		break;
5602 	default:
5603 		resp->status = puterrno4(error);
5604 		break;
5605 	}
5606 done:
5607 	*cs->statusp = resp->status;
5608 	nfs4_ntov_table_free(&ntov, &sarg);
5609 
5610 	DTRACE_NFSV4_2(op__nverify__done, struct compound_state *, cs,
5611 	    NVERIFY4res *, resp);
5612 }
5613 
5614 /*
5615  * XXX - This should live in an NFS header file.
5616  */
5617 #define	MAX_IOVECS	12
5618 
5619 /* ARGSUSED */
5620 static void
5621 rfs4_op_write(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
5622     struct compound_state *cs)
5623 {
5624 	WRITE4args *args = &argop->nfs_argop4_u.opwrite;
5625 	WRITE4res *resp = &resop->nfs_resop4_u.opwrite;
5626 	int error;
5627 	vnode_t *vp;
5628 	struct vattr bva;
5629 	u_offset_t rlimit;
5630 	struct uio uio;
5631 	struct iovec iov[MAX_IOVECS];
5632 	struct iovec *iovp;
5633 	int iovcnt;
5634 	int ioflag;
5635 	cred_t *savecred, *cr;
5636 	bool_t *deleg = &cs->deleg;
5637 	nfsstat4 stat;
5638 	int in_crit = 0;
5639 	caller_context_t ct;
5640 
5641 	DTRACE_NFSV4_2(op__write__start, struct compound_state *, cs,
5642 	    WRITE4args *, args);
5643 
5644 	vp = cs->vp;
5645 	if (vp == NULL) {
5646 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
5647 		goto out;
5648 	}
5649 	if (cs->access == CS_ACCESS_DENIED) {
5650 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
5651 		goto out;
5652 	}
5653 
5654 	cr = cs->cr;
5655 
5656 	if ((stat = rfs4_check_stateid(FWRITE, vp, &args->stateid, FALSE,
5657 	    deleg, TRUE, &ct)) != NFS4_OK) {
5658 		*cs->statusp = resp->status = stat;
5659 		goto out;
5660 	}
5661 
5662 	/*
5663 	 * We have to enter the critical region before calling VOP_RWLOCK
5664 	 * to avoid a deadlock with ufs.
5665 	 */
5666 	if (nbl_need_check(vp)) {
5667 		nbl_start_crit(vp, RW_READER);
5668 		in_crit = 1;
5669 		if (nbl_conflict(vp, NBL_WRITE,
5670 		    args->offset, args->data_len, 0, &ct)) {
5671 			*cs->statusp = resp->status = NFS4ERR_LOCKED;
5672 			goto out;
5673 		}
5674 	}
5675 
5676 	bva.va_mask = AT_MODE | AT_UID;
5677 	error = VOP_GETATTR(vp, &bva, 0, cr, &ct);
5678 
5679 	/*
5680 	 * If we can't get the attributes, then we can't do the
5681 	 * right access checking.  So, we'll fail the request.
5682 	 */
5683 	if (error) {
5684 		*cs->statusp = resp->status = puterrno4(error);
5685 		goto out;
5686 	}
5687 
5688 	if (rdonly4(req, cs)) {
5689 		*cs->statusp = resp->status = NFS4ERR_ROFS;
5690 		goto out;
5691 	}
5692 
5693 	if (vp->v_type != VREG) {
5694 		*cs->statusp = resp->status =
5695 		    ((vp->v_type == VDIR) ? NFS4ERR_ISDIR : NFS4ERR_INVAL);
5696 		goto out;
5697 	}
5698 
5699 	if (crgetuid(cr) != bva.va_uid &&
5700 	    (error = VOP_ACCESS(vp, VWRITE, 0, cr, &ct))) {
5701 		*cs->statusp = resp->status = puterrno4(error);
5702 		goto out;
5703 	}
5704 
5705 	if (MANDLOCK(vp, bva.va_mode)) {
5706 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
5707 		goto out;
5708 	}
5709 
5710 	if (args->data_len == 0) {
5711 		*cs->statusp = resp->status = NFS4_OK;
5712 		resp->count = 0;
5713 		resp->committed = args->stable;
5714 		resp->writeverf = Write4verf;
5715 		goto out;
5716 	}
5717 
5718 	if (args->mblk != NULL) {
5719 		mblk_t *m;
5720 		uint_t bytes, round_len;
5721 
5722 		iovcnt = 0;
5723 		bytes = 0;
5724 		round_len = roundup(args->data_len, BYTES_PER_XDR_UNIT);
5725 		for (m = args->mblk;
5726 		    m != NULL && bytes < round_len;
5727 		    m = m->b_cont) {
5728 			iovcnt++;
5729 			bytes += MBLKL(m);
5730 		}
5731 #ifdef DEBUG
5732 		/* should have ended on an mblk boundary */
5733 		if (bytes != round_len) {
5734 			printf("bytes=0x%x, round_len=0x%x, req len=0x%x\n",
5735 			    bytes, round_len, args->data_len);
5736 			printf("args=%p, args->mblk=%p, m=%p", (void *)args,
5737 			    (void *)args->mblk, (void *)m);
5738 			ASSERT(bytes == round_len);
5739 		}
5740 #endif
5741 		if (iovcnt <= MAX_IOVECS) {
5742 			iovp = iov;
5743 		} else {
5744 			iovp = kmem_alloc(sizeof (*iovp) * iovcnt, KM_SLEEP);
5745 		}
5746 		mblk_to_iov(args->mblk, iovcnt, iovp);
5747 	} else if (args->rlist != NULL) {
5748 		iovcnt = 1;
5749 		iovp = iov;
5750 		iovp->iov_base = (char *)((args->rlist)->u.c_daddr3);
5751 		iovp->iov_len = args->data_len;
5752 	} else {
5753 		iovcnt = 1;
5754 		iovp = iov;
5755 		iovp->iov_base = args->data_val;
5756 		iovp->iov_len = args->data_len;
5757 	}
5758 
5759 	uio.uio_iov = iovp;
5760 	uio.uio_iovcnt = iovcnt;
5761 
5762 	uio.uio_segflg = UIO_SYSSPACE;
5763 	uio.uio_extflg = UIO_COPY_DEFAULT;
5764 	uio.uio_loffset = args->offset;
5765 	uio.uio_resid = args->data_len;
5766 	uio.uio_llimit = curproc->p_fsz_ctl;
5767 	rlimit = uio.uio_llimit - args->offset;
5768 	if (rlimit < (u_offset_t)uio.uio_resid)
5769 		uio.uio_resid = (int)rlimit;
5770 
5771 	if (args->stable == UNSTABLE4)
5772 		ioflag = 0;
5773 	else if (args->stable == FILE_SYNC4)
5774 		ioflag = FSYNC;
5775 	else if (args->stable == DATA_SYNC4)
5776 		ioflag = FDSYNC;
5777 	else {
5778 		if (iovp != iov)
5779 			kmem_free(iovp, sizeof (*iovp) * iovcnt);
5780 		*cs->statusp = resp->status = NFS4ERR_INVAL;
5781 		goto out;
5782 	}
5783 
5784 	/*
5785 	 * We're changing creds because VM may fault and we need
5786 	 * the cred of the current thread to be used if quota
5787 	 * checking is enabled.
5788 	 */
5789 	savecred = curthread->t_cred;
5790 	curthread->t_cred = cr;
5791 	error = do_io(FWRITE, vp, &uio, ioflag, cr, &ct);
5792 	curthread->t_cred = savecred;
5793 
5794 	if (iovp != iov)
5795 		kmem_free(iovp, sizeof (*iovp) * iovcnt);
5796 
5797 	if (error) {
5798 		*cs->statusp = resp->status = puterrno4(error);
5799 		goto out;
5800 	}
5801 
5802 	*cs->statusp = resp->status = NFS4_OK;
5803 	resp->count = args->data_len - uio.uio_resid;
5804 
5805 	if (ioflag == 0)
5806 		resp->committed = UNSTABLE4;
5807 	else
5808 		resp->committed = FILE_SYNC4;
5809 
5810 	resp->writeverf = Write4verf;
5811 
5812 out:
5813 	if (in_crit)
5814 		nbl_end_crit(vp);
5815 
5816 	DTRACE_NFSV4_2(op__write__done, struct compound_state *, cs,
5817 	    WRITE4res *, resp);
5818 }
5819 
5820 
5821 /* XXX put in a header file */
5822 extern int	sec_svc_getcred(struct svc_req *, cred_t *,  caddr_t *, int *);
5823 
5824 void
5825 rfs4_compound(COMPOUND4args *args, COMPOUND4res *resp, struct exportinfo *exi,
5826     struct svc_req *req, cred_t *cr, int *rv)
5827 {
5828 	uint_t i;
5829 	struct compound_state cs;
5830 
5831 	if (rv != NULL)
5832 		*rv = 0;
5833 	rfs4_init_compound_state(&cs);
5834 	/*
5835 	 * Form a reply tag by copying over the reqeuest tag.
5836 	 */
5837 	resp->tag.utf8string_val =
5838 	    kmem_alloc(args->tag.utf8string_len, KM_SLEEP);
5839 	resp->tag.utf8string_len = args->tag.utf8string_len;
5840 	bcopy(args->tag.utf8string_val, resp->tag.utf8string_val,
5841 	    resp->tag.utf8string_len);
5842 
5843 	cs.statusp = &resp->status;
5844 	cs.req = req;
5845 	resp->array = NULL;
5846 	resp->array_len = 0;
5847 
5848 	/*
5849 	 * XXX for now, minorversion should be zero
5850 	 */
5851 	if (args->minorversion != NFS4_MINORVERSION) {
5852 		DTRACE_NFSV4_2(compound__start, struct compound_state *,
5853 		    &cs, COMPOUND4args *, args);
5854 		resp->status = NFS4ERR_MINOR_VERS_MISMATCH;
5855 		DTRACE_NFSV4_2(compound__done, struct compound_state *,
5856 		    &cs, COMPOUND4res *, resp);
5857 		return;
5858 	}
5859 
5860 	if (args->array_len == 0) {
5861 		resp->status = NFS4_OK;
5862 		return;
5863 	}
5864 
5865 	ASSERT(exi == NULL);
5866 	ASSERT(cr == NULL);
5867 
5868 	cr = crget();
5869 	ASSERT(cr != NULL);
5870 
5871 	if (sec_svc_getcred(req, cr, &cs.principal, &cs.nfsflavor) == 0) {
5872 		DTRACE_NFSV4_2(compound__start, struct compound_state *,
5873 		    &cs, COMPOUND4args *, args);
5874 		crfree(cr);
5875 		DTRACE_NFSV4_2(compound__done, struct compound_state *,
5876 		    &cs, COMPOUND4res *, resp);
5877 		svcerr_badcred(req->rq_xprt);
5878 		if (rv != NULL)
5879 			*rv = 1;
5880 		return;
5881 	}
5882 	resp->array_len = args->array_len;
5883 	resp->array = kmem_zalloc(args->array_len * sizeof (nfs_resop4),
5884 	    KM_SLEEP);
5885 
5886 	cs.basecr = cr;
5887 
5888 	DTRACE_NFSV4_2(compound__start, struct compound_state *, &cs,
5889 	    COMPOUND4args *, args);
5890 
5891 	/*
5892 	 * If this is the first compound we've seen, we need to start all
5893 	 * new instances' grace periods.
5894 	 */
5895 	if (rfs4_seen_first_compound == 0) {
5896 		rfs4_grace_start_new();
5897 		/*
5898 		 * This must be set after rfs4_grace_start_new(), otherwise
5899 		 * another thread could proceed past here before the former
5900 		 * is finished.
5901 		 */
5902 		rfs4_seen_first_compound = 1;
5903 	}
5904 
5905 	for (i = 0; i < args->array_len && cs.cont; i++) {
5906 		nfs_argop4 *argop;
5907 		nfs_resop4 *resop;
5908 		uint_t op;
5909 
5910 		argop = &args->array[i];
5911 		resop = &resp->array[i];
5912 		resop->resop = argop->argop;
5913 		op = (uint_t)resop->resop;
5914 
5915 		if (op < rfsv4disp_cnt) {
5916 			kstat_t *ksp = rfsprocio_v4_ptr[op];
5917 			kstat_t *exi_ksp = NULL;
5918 
5919 			/*
5920 			 * Count the individual ops here; NULL and COMPOUND
5921 			 * are counted in common_dispatch()
5922 			 */
5923 			rfsproccnt_v4_ptr[op].value.ui64++;
5924 
5925 			if (ksp != NULL) {
5926 				mutex_enter(ksp->ks_lock);
5927 				kstat_runq_enter(KSTAT_IO_PTR(ksp));
5928 				mutex_exit(ksp->ks_lock);
5929 			}
5930 
5931 			switch (rfsv4disptab[op].op_type) {
5932 			case NFS4_OP_CFH:
5933 				resop->exi = cs.exi;
5934 				break;
5935 			case NFS4_OP_SFH:
5936 				resop->exi = cs.saved_exi;
5937 				break;
5938 			default:
5939 				ASSERT(resop->exi == NULL);
5940 				break;
5941 			}
5942 
5943 			if (resop->exi != NULL) {
5944 				exi_ksp = resop->exi->exi_kstats->
5945 				    rfsprocio_v4_ptr[op];
5946 				if (exi_ksp != NULL) {
5947 					mutex_enter(exi_ksp->ks_lock);
5948 					kstat_runq_enter(KSTAT_IO_PTR(exi_ksp));
5949 					mutex_exit(exi_ksp->ks_lock);
5950 				}
5951 			}
5952 
5953 			NFS4_DEBUG(rfs4_debug > 1,
5954 			    (CE_NOTE, "Executing %s", rfs4_op_string[op]));
5955 			(*rfsv4disptab[op].dis_proc)(argop, resop, req, &cs);
5956 			NFS4_DEBUG(rfs4_debug > 1, (CE_NOTE, "%s returned %d",
5957 			    rfs4_op_string[op], *cs.statusp));
5958 			if (*cs.statusp != NFS4_OK)
5959 				cs.cont = FALSE;
5960 
5961 			if (rfsv4disptab[op].op_type == NFS4_OP_POSTCFH &&
5962 			    *cs.statusp == NFS4_OK &&
5963 			    (resop->exi = cs.exi) != NULL) {
5964 				exi_ksp = resop->exi->exi_kstats->
5965 				    rfsprocio_v4_ptr[op];
5966 			}
5967 
5968 			if (exi_ksp != NULL) {
5969 				mutex_enter(exi_ksp->ks_lock);
5970 				KSTAT_IO_PTR(exi_ksp)->nwritten +=
5971 				    argop->opsize;
5972 				KSTAT_IO_PTR(exi_ksp)->writes++;
5973 				if (rfsv4disptab[op].op_type != NFS4_OP_POSTCFH)
5974 					kstat_runq_exit(KSTAT_IO_PTR(exi_ksp));
5975 				mutex_exit(exi_ksp->ks_lock);
5976 
5977 				exi_hold(resop->exi);
5978 			} else {
5979 				resop->exi = NULL;
5980 			}
5981 
5982 			if (ksp != NULL) {
5983 				mutex_enter(ksp->ks_lock);
5984 				kstat_runq_exit(KSTAT_IO_PTR(ksp));
5985 				mutex_exit(ksp->ks_lock);
5986 			}
5987 		} else {
5988 			/*
5989 			 * This is effectively dead code since XDR code
5990 			 * will have already returned BADXDR if op doesn't
5991 			 * decode to legal value.  This only done for a
5992 			 * day when XDR code doesn't verify v4 opcodes.
5993 			 */
5994 			op = OP_ILLEGAL;
5995 			rfsproccnt_v4_ptr[OP_ILLEGAL_IDX].value.ui64++;
5996 
5997 			rfs4_op_illegal(argop, resop, req, &cs);
5998 			cs.cont = FALSE;
5999 		}
6000 
6001 		/*
6002 		 * If not at last op, and if we are to stop, then
6003 		 * compact the results array.
6004 		 */
6005 		if ((i + 1) < args->array_len && !cs.cont) {
6006 			nfs_resop4 *new_res = kmem_alloc(
6007 			    (i + 1) * sizeof (nfs_resop4), KM_SLEEP);
6008 			bcopy(resp->array,
6009 			    new_res, (i + 1) * sizeof (nfs_resop4));
6010 			kmem_free(resp->array,
6011 			    args->array_len * sizeof (nfs_resop4));
6012 
6013 			resp->array_len = i + 1;
6014 			resp->array = new_res;
6015 		}
6016 	}
6017 
6018 
6019 	DTRACE_NFSV4_2(compound__done, struct compound_state *, &cs,
6020 	    COMPOUND4res *, resp);
6021 
6022 	if (cs.exi)
6023 		exi_rele(cs.exi);
6024 	if (cs.saved_exi)
6025 		exi_rele(cs.saved_exi);
6026 	if (cs.vp)
6027 		VN_RELE(cs.vp);
6028 	if (cs.saved_vp)
6029 		VN_RELE(cs.saved_vp);
6030 	if (cs.saved_fh.nfs_fh4_val)
6031 		kmem_free(cs.saved_fh.nfs_fh4_val, NFS4_FHSIZE);
6032 
6033 	if (cs.basecr)
6034 		crfree(cs.basecr);
6035 	if (cs.cr)
6036 		crfree(cs.cr);
6037 	/*
6038 	 * done with this compound request, free the label
6039 	 */
6040 
6041 	if (req->rq_label != NULL) {
6042 		kmem_free(req->rq_label, sizeof (bslabel_t));
6043 		req->rq_label = NULL;
6044 	}
6045 }
6046 
6047 /*
6048  * XXX because of what appears to be duplicate calls to rfs4_compound_free
6049  * XXX zero out the tag and array values. Need to investigate why the
6050  * XXX calls occur, but at least prevent the panic for now.
6051  */
6052 void
6053 rfs4_compound_free(COMPOUND4res *resp)
6054 {
6055 	uint_t i;
6056 
6057 	if (resp->tag.utf8string_val) {
6058 		UTF8STRING_FREE(resp->tag)
6059 	}
6060 
6061 	for (i = 0; i < resp->array_len; i++) {
6062 		nfs_resop4 *resop;
6063 		uint_t op;
6064 
6065 		resop = &resp->array[i];
6066 		op = (uint_t)resop->resop;
6067 		if (op < rfsv4disp_cnt) {
6068 			(*rfsv4disptab[op].dis_resfree)(resop);
6069 		}
6070 	}
6071 	if (resp->array != NULL) {
6072 		kmem_free(resp->array, resp->array_len * sizeof (nfs_resop4));
6073 	}
6074 }
6075 
6076 /*
6077  * Process the value of the compound request rpc flags, as a bit-AND
6078  * of the individual per-op flags (idempotent, allowork, publicfh_ok)
6079  */
6080 void
6081 rfs4_compound_flagproc(COMPOUND4args *args, int *flagp)
6082 {
6083 	int i;
6084 	int flag = RPC_ALL;
6085 
6086 	for (i = 0; flag && i < args->array_len; i++) {
6087 		uint_t op;
6088 
6089 		op = (uint_t)args->array[i].argop;
6090 
6091 		if (op < rfsv4disp_cnt)
6092 			flag &= rfsv4disptab[op].dis_flags;
6093 		else
6094 			flag = 0;
6095 	}
6096 	*flagp = flag;
6097 }
6098 
6099 void
6100 rfs4_compound_kstat_args(COMPOUND4args *args)
6101 {
6102 	int i;
6103 
6104 	for (i = 0; i < args->array_len; i++) {
6105 		uint_t op = (uint_t)args->array[i].argop;
6106 
6107 		if (op < rfsv4disp_cnt) {
6108 			kstat_t *ksp = rfsprocio_v4_ptr[op];
6109 
6110 			if (ksp != NULL) {
6111 				mutex_enter(ksp->ks_lock);
6112 				KSTAT_IO_PTR(ksp)->nwritten +=
6113 				    args->array[i].opsize;
6114 				KSTAT_IO_PTR(ksp)->writes++;
6115 				mutex_exit(ksp->ks_lock);
6116 			}
6117 		}
6118 	}
6119 }
6120 
6121 void
6122 rfs4_compound_kstat_res(COMPOUND4res *res)
6123 {
6124 	int i;
6125 
6126 	for (i = 0; i < res->array_len; i++) {
6127 		uint_t op = (uint_t)res->array[i].resop;
6128 
6129 		if (op < rfsv4disp_cnt) {
6130 			kstat_t *ksp = rfsprocio_v4_ptr[op];
6131 			struct exportinfo *exi = res->array[i].exi;
6132 
6133 			if (ksp != NULL) {
6134 				mutex_enter(ksp->ks_lock);
6135 				KSTAT_IO_PTR(ksp)->nread +=
6136 				    res->array[i].opsize;
6137 				KSTAT_IO_PTR(ksp)->reads++;
6138 				mutex_exit(ksp->ks_lock);
6139 			}
6140 
6141 			if (exi != NULL) {
6142 				kstat_t *exi_ksp;
6143 
6144 				rw_enter(&exported_lock, RW_READER);
6145 
6146 				exi_ksp = exi->exi_kstats->rfsprocio_v4_ptr[op];
6147 				if (exi_ksp != NULL) {
6148 					mutex_enter(exi_ksp->ks_lock);
6149 					KSTAT_IO_PTR(exi_ksp)->nread +=
6150 					    res->array[i].opsize;
6151 					KSTAT_IO_PTR(exi_ksp)->reads++;
6152 					mutex_exit(exi_ksp->ks_lock);
6153 				}
6154 
6155 				rw_exit(&exported_lock);
6156 
6157 				exi_rele(exi);
6158 			}
6159 		}
6160 	}
6161 }
6162 
6163 nfsstat4
6164 rfs4_client_sysid(rfs4_client_t *cp, sysid_t *sp)
6165 {
6166 	nfsstat4 e;
6167 
6168 	rfs4_dbe_lock(cp->rc_dbe);
6169 
6170 	if (cp->rc_sysidt != LM_NOSYSID) {
6171 		*sp = cp->rc_sysidt;
6172 		e = NFS4_OK;
6173 
6174 	} else if ((cp->rc_sysidt = lm_alloc_sysidt()) != LM_NOSYSID) {
6175 		*sp = cp->rc_sysidt;
6176 		e = NFS4_OK;
6177 
6178 		NFS4_DEBUG(rfs4_debug, (CE_NOTE,
6179 		    "rfs4_client_sysid: allocated 0x%x\n", *sp));
6180 	} else
6181 		e = NFS4ERR_DELAY;
6182 
6183 	rfs4_dbe_unlock(cp->rc_dbe);
6184 	return (e);
6185 }
6186 
6187 #if defined(DEBUG) && ! defined(lint)
6188 static void lock_print(char *str, int operation, struct flock64 *flk)
6189 {
6190 	char *op, *type;
6191 
6192 	switch (operation) {
6193 	case F_GETLK: op = "F_GETLK";
6194 		break;
6195 	case F_SETLK: op = "F_SETLK";
6196 		break;
6197 	case F_SETLK_NBMAND: op = "F_SETLK_NBMAND";
6198 		break;
6199 	default: op = "F_UNKNOWN";
6200 		break;
6201 	}
6202 	switch (flk->l_type) {
6203 	case F_UNLCK: type = "F_UNLCK";
6204 		break;
6205 	case F_RDLCK: type = "F_RDLCK";
6206 		break;
6207 	case F_WRLCK: type = "F_WRLCK";
6208 		break;
6209 	default: type = "F_UNKNOWN";
6210 		break;
6211 	}
6212 
6213 	ASSERT(flk->l_whence == 0);
6214 	cmn_err(CE_NOTE, "%s:  %s, type = %s, off = %llx len = %llx pid = %d",
6215 	    str, op, type, (longlong_t)flk->l_start,
6216 	    flk->l_len ? (longlong_t)flk->l_len : ~0LL, flk->l_pid);
6217 }
6218 
6219 #define	LOCK_PRINT(d, s, t, f) if (d) lock_print(s, t, f)
6220 #else
6221 #define	LOCK_PRINT(d, s, t, f)
6222 #endif
6223 
6224 /*ARGSUSED*/
6225 static bool_t
6226 creds_ok(cred_set_t cr_set, struct svc_req *req, struct compound_state *cs)
6227 {
6228 	return (TRUE);
6229 }
6230 
6231 /*
6232  * Look up the pathname using the vp in cs as the directory vnode.
6233  * cs->vp will be the vnode for the file on success
6234  */
6235 
6236 static nfsstat4
6237 rfs4_lookup(component4 *component, struct svc_req *req,
6238     struct compound_state *cs)
6239 {
6240 	char *nm;
6241 	uint32_t len;
6242 	nfsstat4 status;
6243 	struct sockaddr *ca;
6244 	char *name;
6245 
6246 	if (cs->vp == NULL) {
6247 		return (NFS4ERR_NOFILEHANDLE);
6248 	}
6249 	if (cs->vp->v_type != VDIR) {
6250 		return (NFS4ERR_NOTDIR);
6251 	}
6252 
6253 	status = utf8_dir_verify(component);
6254 	if (status != NFS4_OK)
6255 		return (status);
6256 
6257 	nm = utf8_to_fn(component, &len, NULL);
6258 	if (nm == NULL) {
6259 		return (NFS4ERR_INVAL);
6260 	}
6261 
6262 	if (len > MAXNAMELEN) {
6263 		kmem_free(nm, len);
6264 		return (NFS4ERR_NAMETOOLONG);
6265 	}
6266 
6267 	ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
6268 	name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
6269 	    MAXPATHLEN + 1);
6270 
6271 	if (name == NULL) {
6272 		kmem_free(nm, len);
6273 		return (NFS4ERR_INVAL);
6274 	}
6275 
6276 	status = do_rfs4_op_lookup(name, req, cs);
6277 
6278 	if (name != nm)
6279 		kmem_free(name, MAXPATHLEN + 1);
6280 
6281 	kmem_free(nm, len);
6282 
6283 	return (status);
6284 }
6285 
6286 static nfsstat4
6287 rfs4_lookupfile(component4 *component, struct svc_req *req,
6288     struct compound_state *cs, uint32_t access, change_info4 *cinfo)
6289 {
6290 	nfsstat4 status;
6291 	vnode_t *dvp = cs->vp;
6292 	vattr_t bva, ava, fva;
6293 	int error;
6294 
6295 	/* Get "before" change value */
6296 	bva.va_mask = AT_CTIME|AT_SEQ;
6297 	error = VOP_GETATTR(dvp, &bva, 0, cs->cr, NULL);
6298 	if (error)
6299 		return (puterrno4(error));
6300 
6301 	/* rfs4_lookup may VN_RELE directory */
6302 	VN_HOLD(dvp);
6303 
6304 	status = rfs4_lookup(component, req, cs);
6305 	if (status != NFS4_OK) {
6306 		VN_RELE(dvp);
6307 		return (status);
6308 	}
6309 
6310 	/*
6311 	 * Get "after" change value, if it fails, simply return the
6312 	 * before value.
6313 	 */
6314 	ava.va_mask = AT_CTIME|AT_SEQ;
6315 	if (VOP_GETATTR(dvp, &ava, 0, cs->cr, NULL)) {
6316 		ava.va_ctime = bva.va_ctime;
6317 		ava.va_seq = 0;
6318 	}
6319 	VN_RELE(dvp);
6320 
6321 	/*
6322 	 * Validate the file is a file
6323 	 */
6324 	fva.va_mask = AT_TYPE|AT_MODE;
6325 	error = VOP_GETATTR(cs->vp, &fva, 0, cs->cr, NULL);
6326 	if (error)
6327 		return (puterrno4(error));
6328 
6329 	if (fva.va_type != VREG) {
6330 		if (fva.va_type == VDIR)
6331 			return (NFS4ERR_ISDIR);
6332 		if (fva.va_type == VLNK)
6333 			return (NFS4ERR_SYMLINK);
6334 		return (NFS4ERR_INVAL);
6335 	}
6336 
6337 	NFS4_SET_FATTR4_CHANGE(cinfo->before, bva.va_ctime);
6338 	NFS4_SET_FATTR4_CHANGE(cinfo->after, ava.va_ctime);
6339 
6340 	/*
6341 	 * It is undefined if VOP_LOOKUP will change va_seq, so
6342 	 * cinfo.atomic = TRUE only if we have
6343 	 * non-zero va_seq's, and they have not changed.
6344 	 */
6345 	if (bva.va_seq && ava.va_seq && ava.va_seq == bva.va_seq)
6346 		cinfo->atomic = TRUE;
6347 	else
6348 		cinfo->atomic = FALSE;
6349 
6350 	/* Check for mandatory locking */
6351 	cs->mandlock = MANDLOCK(cs->vp, fva.va_mode);
6352 	return (check_open_access(access, cs, req));
6353 }
6354 
6355 static nfsstat4
6356 create_vnode(vnode_t *dvp, char *nm,  vattr_t *vap, createmode4 mode,
6357     timespec32_t *mtime, cred_t *cr, vnode_t **vpp, bool_t *created)
6358 {
6359 	int error;
6360 	nfsstat4 status = NFS4_OK;
6361 	vattr_t va;
6362 
6363 tryagain:
6364 
6365 	/*
6366 	 * The file open mode used is VWRITE.  If the client needs
6367 	 * some other semantic, then it should do the access checking
6368 	 * itself.  It would have been nice to have the file open mode
6369 	 * passed as part of the arguments.
6370 	 */
6371 
6372 	*created = TRUE;
6373 	error = VOP_CREATE(dvp, nm, vap, EXCL, VWRITE, vpp, cr, 0, NULL, NULL);
6374 
6375 	if (error) {
6376 		*created = FALSE;
6377 
6378 		/*
6379 		 * If we got something other than file already exists
6380 		 * then just return this error.  Otherwise, we got
6381 		 * EEXIST.  If we were doing a GUARDED create, then
6382 		 * just return this error.  Otherwise, we need to
6383 		 * make sure that this wasn't a duplicate of an
6384 		 * exclusive create request.
6385 		 *
6386 		 * The assumption is made that a non-exclusive create
6387 		 * request will never return EEXIST.
6388 		 */
6389 
6390 		if (error != EEXIST || mode == GUARDED4) {
6391 			status = puterrno4(error);
6392 			return (status);
6393 		}
6394 		error = VOP_LOOKUP(dvp, nm, vpp, NULL, 0, NULL, cr,
6395 		    NULL, NULL, NULL);
6396 
6397 		if (error) {
6398 			/*
6399 			 * We couldn't find the file that we thought that
6400 			 * we just created.  So, we'll just try creating
6401 			 * it again.
6402 			 */
6403 			if (error == ENOENT)
6404 				goto tryagain;
6405 
6406 			status = puterrno4(error);
6407 			return (status);
6408 		}
6409 
6410 		if (mode == UNCHECKED4) {
6411 			/* existing object must be regular file */
6412 			if ((*vpp)->v_type != VREG) {
6413 				if ((*vpp)->v_type == VDIR)
6414 					status = NFS4ERR_ISDIR;
6415 				else if ((*vpp)->v_type == VLNK)
6416 					status = NFS4ERR_SYMLINK;
6417 				else
6418 					status = NFS4ERR_INVAL;
6419 				VN_RELE(*vpp);
6420 				return (status);
6421 			}
6422 
6423 			return (NFS4_OK);
6424 		}
6425 
6426 		/* Check for duplicate request */
6427 		ASSERT(mtime != 0);
6428 		va.va_mask = AT_MTIME;
6429 		error = VOP_GETATTR(*vpp, &va, 0, cr, NULL);
6430 		if (!error) {
6431 			/* We found the file */
6432 			if (va.va_mtime.tv_sec != mtime->tv_sec ||
6433 			    va.va_mtime.tv_nsec != mtime->tv_nsec) {
6434 				/* but its not our creation */
6435 				VN_RELE(*vpp);
6436 				return (NFS4ERR_EXIST);
6437 			}
6438 			*created = TRUE; /* retrans of create == created */
6439 			return (NFS4_OK);
6440 		}
6441 		VN_RELE(*vpp);
6442 		return (NFS4ERR_EXIST);
6443 	}
6444 
6445 	return (NFS4_OK);
6446 }
6447 
6448 static nfsstat4
6449 check_open_access(uint32_t access, struct compound_state *cs,
6450     struct svc_req *req)
6451 {
6452 	int error;
6453 	vnode_t *vp;
6454 	bool_t readonly;
6455 	cred_t *cr = cs->cr;
6456 
6457 	/* For now we don't allow mandatory locking as per V2/V3 */
6458 	if (cs->access == CS_ACCESS_DENIED || cs->mandlock) {
6459 		return (NFS4ERR_ACCESS);
6460 	}
6461 
6462 	vp = cs->vp;
6463 	ASSERT(cr != NULL && vp->v_type == VREG);
6464 
6465 	/*
6466 	 * If the file system is exported read only and we are trying
6467 	 * to open for write, then return NFS4ERR_ROFS
6468 	 */
6469 
6470 	readonly = rdonly4(req, cs);
6471 
6472 	if ((access & OPEN4_SHARE_ACCESS_WRITE) && readonly)
6473 		return (NFS4ERR_ROFS);
6474 
6475 	if (access & OPEN4_SHARE_ACCESS_READ) {
6476 		if ((VOP_ACCESS(vp, VREAD, 0, cr, NULL) != 0) &&
6477 		    (VOP_ACCESS(vp, VEXEC, 0, cr, NULL) != 0)) {
6478 			return (NFS4ERR_ACCESS);
6479 		}
6480 	}
6481 
6482 	if (access & OPEN4_SHARE_ACCESS_WRITE) {
6483 		error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL);
6484 		if (error)
6485 			return (NFS4ERR_ACCESS);
6486 	}
6487 
6488 	return (NFS4_OK);
6489 }
6490 
6491 static nfsstat4
6492 rfs4_createfile(OPEN4args *args, struct svc_req *req, struct compound_state *cs,
6493     change_info4 *cinfo, bitmap4 *attrset, clientid4 clientid)
6494 {
6495 	struct nfs4_svgetit_arg sarg;
6496 	struct nfs4_ntov_table ntov;
6497 
6498 	bool_t ntov_table_init = FALSE;
6499 	struct statvfs64 sb;
6500 	nfsstat4 status;
6501 	vnode_t *vp;
6502 	vattr_t bva, ava, iva, cva, *vap;
6503 	vnode_t *dvp;
6504 	timespec32_t *mtime;
6505 	char *nm = NULL;
6506 	uint_t buflen;
6507 	bool_t created;
6508 	bool_t setsize = FALSE;
6509 	len_t reqsize;
6510 	int error;
6511 	bool_t trunc;
6512 	caller_context_t ct;
6513 	component4 *component;
6514 	bslabel_t *clabel;
6515 	struct sockaddr *ca;
6516 	char *name = NULL;
6517 
6518 	sarg.sbp = &sb;
6519 	sarg.is_referral = B_FALSE;
6520 
6521 	dvp = cs->vp;
6522 
6523 	/* Check if the file system is read only */
6524 	if (rdonly4(req, cs))
6525 		return (NFS4ERR_ROFS);
6526 
6527 	/* check the label of including directory */
6528 	if (is_system_labeled()) {
6529 		ASSERT(req->rq_label != NULL);
6530 		clabel = req->rq_label;
6531 		DTRACE_PROBE2(tx__rfs4__log__info__opremove__clabel, char *,
6532 		    "got client label from request(1)",
6533 		    struct svc_req *, req);
6534 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
6535 			if (!do_rfs_label_check(clabel, dvp, EQUALITY_CHECK,
6536 			    cs->exi)) {
6537 				return (NFS4ERR_ACCESS);
6538 			}
6539 		}
6540 	}
6541 
6542 	/*
6543 	 * Get the last component of path name in nm. cs will reference
6544 	 * the including directory on success.
6545 	 */
6546 	component = &args->open_claim4_u.file;
6547 	status = utf8_dir_verify(component);
6548 	if (status != NFS4_OK)
6549 		return (status);
6550 
6551 	nm = utf8_to_fn(component, &buflen, NULL);
6552 
6553 	if (nm == NULL)
6554 		return (NFS4ERR_RESOURCE);
6555 
6556 	if (buflen > MAXNAMELEN) {
6557 		kmem_free(nm, buflen);
6558 		return (NFS4ERR_NAMETOOLONG);
6559 	}
6560 
6561 	bva.va_mask = AT_TYPE|AT_CTIME|AT_SEQ;
6562 	error = VOP_GETATTR(dvp, &bva, 0, cs->cr, NULL);
6563 	if (error) {
6564 		kmem_free(nm, buflen);
6565 		return (puterrno4(error));
6566 	}
6567 
6568 	if (bva.va_type != VDIR) {
6569 		kmem_free(nm, buflen);
6570 		return (NFS4ERR_NOTDIR);
6571 	}
6572 
6573 	NFS4_SET_FATTR4_CHANGE(cinfo->before, bva.va_ctime)
6574 
6575 	switch (args->mode) {
6576 	case GUARDED4:
6577 		/*FALLTHROUGH*/
6578 	case UNCHECKED4:
6579 		nfs4_ntov_table_init(&ntov);
6580 		ntov_table_init = TRUE;
6581 
6582 		*attrset = 0;
6583 		status = do_rfs4_set_attrs(attrset,
6584 		    &args->createhow4_u.createattrs,
6585 		    cs, &sarg, &ntov, NFS4ATTR_SETIT);
6586 
6587 		if (status == NFS4_OK && (sarg.vap->va_mask & AT_TYPE) &&
6588 		    sarg.vap->va_type != VREG) {
6589 			if (sarg.vap->va_type == VDIR)
6590 				status = NFS4ERR_ISDIR;
6591 			else if (sarg.vap->va_type == VLNK)
6592 				status = NFS4ERR_SYMLINK;
6593 			else
6594 				status = NFS4ERR_INVAL;
6595 		}
6596 
6597 		if (status != NFS4_OK) {
6598 			kmem_free(nm, buflen);
6599 			nfs4_ntov_table_free(&ntov, &sarg);
6600 			*attrset = 0;
6601 			return (status);
6602 		}
6603 
6604 		vap = sarg.vap;
6605 		vap->va_type = VREG;
6606 		vap->va_mask |= AT_TYPE;
6607 
6608 		if ((vap->va_mask & AT_MODE) == 0) {
6609 			vap->va_mask |= AT_MODE;
6610 			vap->va_mode = (mode_t)0600;
6611 		}
6612 
6613 		if (vap->va_mask & AT_SIZE) {
6614 
6615 			/* Disallow create with a non-zero size */
6616 
6617 			if ((reqsize = sarg.vap->va_size) != 0) {
6618 				kmem_free(nm, buflen);
6619 				nfs4_ntov_table_free(&ntov, &sarg);
6620 				*attrset = 0;
6621 				return (NFS4ERR_INVAL);
6622 			}
6623 			setsize = TRUE;
6624 		}
6625 		break;
6626 
6627 	case EXCLUSIVE4:
6628 		/* prohibit EXCL create of named attributes */
6629 		if (dvp->v_flag & V_XATTRDIR) {
6630 			kmem_free(nm, buflen);
6631 			*attrset = 0;
6632 			return (NFS4ERR_INVAL);
6633 		}
6634 
6635 		cva.va_mask = AT_TYPE | AT_MTIME | AT_MODE;
6636 		cva.va_type = VREG;
6637 		/*
6638 		 * Ensure no time overflows. Assumes underlying
6639 		 * filesystem supports at least 32 bits.
6640 		 * Truncate nsec to usec resolution to allow valid
6641 		 * compares even if the underlying filesystem truncates.
6642 		 */
6643 		mtime = (timespec32_t *)&args->createhow4_u.createverf;
6644 		cva.va_mtime.tv_sec = mtime->tv_sec % TIME32_MAX;
6645 		cva.va_mtime.tv_nsec = (mtime->tv_nsec / 1000) * 1000;
6646 		cva.va_mode = (mode_t)0;
6647 		vap = &cva;
6648 
6649 		/*
6650 		 * For EXCL create, attrset is set to the server attr
6651 		 * used to cache the client's verifier.
6652 		 */
6653 		*attrset = FATTR4_TIME_MODIFY_MASK;
6654 		break;
6655 	}
6656 
6657 	ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
6658 	name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
6659 	    MAXPATHLEN  + 1);
6660 
6661 	if (name == NULL) {
6662 		kmem_free(nm, buflen);
6663 		return (NFS4ERR_SERVERFAULT);
6664 	}
6665 
6666 	status = create_vnode(dvp, name, vap, args->mode, mtime,
6667 	    cs->cr, &vp, &created);
6668 	if (nm != name)
6669 		kmem_free(name, MAXPATHLEN + 1);
6670 	kmem_free(nm, buflen);
6671 
6672 	if (status != NFS4_OK) {
6673 		if (ntov_table_init)
6674 			nfs4_ntov_table_free(&ntov, &sarg);
6675 		*attrset = 0;
6676 		return (status);
6677 	}
6678 
6679 	trunc = (setsize && !created);
6680 
6681 	if (args->mode != EXCLUSIVE4) {
6682 		bitmap4 createmask = args->createhow4_u.createattrs.attrmask;
6683 
6684 		/*
6685 		 * True verification that object was created with correct
6686 		 * attrs is impossible.  The attrs could have been changed
6687 		 * immediately after object creation.  If attributes did
6688 		 * not verify, the only recourse for the server is to
6689 		 * destroy the object.  Maybe if some attrs (like gid)
6690 		 * are set incorrectly, the object should be destroyed;
6691 		 * however, seems bad as a default policy.  Do we really
6692 		 * want to destroy an object over one of the times not
6693 		 * verifying correctly?  For these reasons, the server
6694 		 * currently sets bits in attrset for createattrs
6695 		 * that were set; however, no verification is done.
6696 		 *
6697 		 * vmask_to_nmask accounts for vattr bits set on create
6698 		 *	[do_rfs4_set_attrs() only sets resp bits for
6699 		 *	 non-vattr/vfs bits.]
6700 		 * Mask off any bits we set by default so as not to return
6701 		 * more attrset bits than were requested in createattrs
6702 		 */
6703 		if (created) {
6704 			nfs4_vmask_to_nmask(sarg.vap->va_mask, attrset);
6705 			*attrset &= createmask;
6706 		} else {
6707 			/*
6708 			 * We did not create the vnode (we tried but it
6709 			 * already existed).  In this case, the only createattr
6710 			 * that the spec allows the server to set is size,
6711 			 * and even then, it can only be set if it is 0.
6712 			 */
6713 			*attrset = 0;
6714 			if (trunc)
6715 				*attrset = FATTR4_SIZE_MASK;
6716 		}
6717 	}
6718 	if (ntov_table_init)
6719 		nfs4_ntov_table_free(&ntov, &sarg);
6720 
6721 	/*
6722 	 * Get the initial "after" sequence number, if it fails,
6723 	 * set to zero, time to before.
6724 	 */
6725 	iva.va_mask = AT_CTIME|AT_SEQ;
6726 	if (VOP_GETATTR(dvp, &iva, 0, cs->cr, NULL)) {
6727 		iva.va_seq = 0;
6728 		iva.va_ctime = bva.va_ctime;
6729 	}
6730 
6731 	/*
6732 	 * create_vnode attempts to create the file exclusive,
6733 	 * if it already exists the VOP_CREATE will fail and
6734 	 * may not increase va_seq. It is atomic if
6735 	 * we haven't changed the directory, but if it has changed
6736 	 * we don't know what changed it.
6737 	 */
6738 	if (!created) {
6739 		if (bva.va_seq && iva.va_seq &&
6740 		    bva.va_seq == iva.va_seq)
6741 			cinfo->atomic = TRUE;
6742 		else
6743 			cinfo->atomic = FALSE;
6744 		NFS4_SET_FATTR4_CHANGE(cinfo->after, iva.va_ctime);
6745 	} else {
6746 		/*
6747 		 * The entry was created, we need to sync the
6748 		 * directory metadata.
6749 		 */
6750 		(void) VOP_FSYNC(dvp, 0, cs->cr, NULL);
6751 
6752 		/*
6753 		 * Get "after" change value, if it fails, simply return the
6754 		 * before value.
6755 		 */
6756 		ava.va_mask = AT_CTIME|AT_SEQ;
6757 		if (VOP_GETATTR(dvp, &ava, 0, cs->cr, NULL)) {
6758 			ava.va_ctime = bva.va_ctime;
6759 			ava.va_seq = 0;
6760 		}
6761 
6762 		NFS4_SET_FATTR4_CHANGE(cinfo->after, ava.va_ctime);
6763 
6764 		/*
6765 		 * The cinfo->atomic = TRUE only if we have
6766 		 * non-zero va_seq's, and it has incremented by exactly one
6767 		 * during the create_vnode and it didn't
6768 		 * change during the VOP_FSYNC.
6769 		 */
6770 		if (bva.va_seq && iva.va_seq && ava.va_seq &&
6771 		    iva.va_seq == (bva.va_seq + 1) && iva.va_seq == ava.va_seq)
6772 			cinfo->atomic = TRUE;
6773 		else
6774 			cinfo->atomic = FALSE;
6775 	}
6776 
6777 	/* Check for mandatory locking and that the size gets set. */
6778 	cva.va_mask = AT_MODE;
6779 	if (setsize)
6780 		cva.va_mask |= AT_SIZE;
6781 
6782 	/* Assume the worst */
6783 	cs->mandlock = TRUE;
6784 
6785 	if (VOP_GETATTR(vp, &cva, 0, cs->cr, NULL) == 0) {
6786 		cs->mandlock = MANDLOCK(cs->vp, cva.va_mode);
6787 
6788 		/*
6789 		 * Truncate the file if necessary; this would be
6790 		 * the case for create over an existing file.
6791 		 */
6792 
6793 		if (trunc) {
6794 			int in_crit = 0;
6795 			rfs4_file_t *fp;
6796 			bool_t create = FALSE;
6797 
6798 			/*
6799 			 * We are writing over an existing file.
6800 			 * Check to see if we need to recall a delegation.
6801 			 */
6802 			rfs4_hold_deleg_policy();
6803 			if ((fp = rfs4_findfile(vp, NULL, &create)) != NULL) {
6804 				if (rfs4_check_delegated_byfp(FWRITE, fp,
6805 				    (reqsize == 0), FALSE, FALSE, &clientid)) {
6806 					rfs4_file_rele(fp);
6807 					rfs4_rele_deleg_policy();
6808 					VN_RELE(vp);
6809 					*attrset = 0;
6810 					return (NFS4ERR_DELAY);
6811 				}
6812 				rfs4_file_rele(fp);
6813 			}
6814 			rfs4_rele_deleg_policy();
6815 
6816 			if (nbl_need_check(vp)) {
6817 				in_crit = 1;
6818 
6819 				ASSERT(reqsize == 0);
6820 
6821 				nbl_start_crit(vp, RW_READER);
6822 				if (nbl_conflict(vp, NBL_WRITE, 0,
6823 				    cva.va_size, 0, NULL)) {
6824 					in_crit = 0;
6825 					nbl_end_crit(vp);
6826 					VN_RELE(vp);
6827 					*attrset = 0;
6828 					return (NFS4ERR_ACCESS);
6829 				}
6830 			}
6831 			ct.cc_sysid = 0;
6832 			ct.cc_pid = 0;
6833 			ct.cc_caller_id = nfs4_srv_caller_id;
6834 			ct.cc_flags = CC_DONTBLOCK;
6835 
6836 			cva.va_mask = AT_SIZE;
6837 			cva.va_size = reqsize;
6838 			(void) VOP_SETATTR(vp, &cva, 0, cs->cr, &ct);
6839 			if (in_crit)
6840 				nbl_end_crit(vp);
6841 		}
6842 	}
6843 
6844 	error = makefh4(&cs->fh, vp, cs->exi);
6845 
6846 	/*
6847 	 * Force modified data and metadata out to stable storage.
6848 	 */
6849 	(void) VOP_FSYNC(vp, FNODSYNC, cs->cr, NULL);
6850 
6851 	if (error) {
6852 		VN_RELE(vp);
6853 		*attrset = 0;
6854 		return (puterrno4(error));
6855 	}
6856 
6857 	/* if parent dir is attrdir, set namedattr fh flag */
6858 	if (dvp->v_flag & V_XATTRDIR)
6859 		set_fh4_flag(&cs->fh, FH4_NAMEDATTR);
6860 
6861 	if (cs->vp)
6862 		VN_RELE(cs->vp);
6863 
6864 	cs->vp = vp;
6865 
6866 	/*
6867 	 * if we did not create the file, we will need to check
6868 	 * the access bits on the file
6869 	 */
6870 
6871 	if (!created) {
6872 		if (setsize)
6873 			args->share_access |= OPEN4_SHARE_ACCESS_WRITE;
6874 		status = check_open_access(args->share_access, cs, req);
6875 		if (status != NFS4_OK)
6876 			*attrset = 0;
6877 	}
6878 	return (status);
6879 }
6880 
6881 /*ARGSUSED*/
6882 static void
6883 rfs4_do_open(struct compound_state *cs, struct svc_req *req,
6884     rfs4_openowner_t *oo, delegreq_t deleg,
6885     uint32_t access, uint32_t deny,
6886     OPEN4res *resp, int deleg_cur)
6887 {
6888 	/* XXX Currently not using req  */
6889 	rfs4_state_t *sp;
6890 	rfs4_file_t *fp;
6891 	bool_t screate = TRUE;
6892 	bool_t fcreate = TRUE;
6893 	uint32_t open_a, share_a;
6894 	uint32_t open_d, share_d;
6895 	rfs4_deleg_state_t *dsp;
6896 	sysid_t sysid;
6897 	nfsstat4 status;
6898 	caller_context_t ct;
6899 	int fflags = 0;
6900 	int recall = 0;
6901 	int err;
6902 	int first_open;
6903 
6904 	/* get the file struct and hold a lock on it during initial open */
6905 	fp = rfs4_findfile_withlock(cs->vp, &cs->fh, &fcreate);
6906 	if (fp == NULL) {
6907 		resp->status = NFS4ERR_RESOURCE;
6908 		DTRACE_PROBE1(nfss__e__do__open1, nfsstat4, resp->status);
6909 		return;
6910 	}
6911 
6912 	sp = rfs4_findstate_by_owner_file(oo, fp, &screate);
6913 	if (sp == NULL) {
6914 		resp->status = NFS4ERR_RESOURCE;
6915 		DTRACE_PROBE1(nfss__e__do__open2, nfsstat4, resp->status);
6916 		/* No need to keep any reference */
6917 		rw_exit(&fp->rf_file_rwlock);
6918 		rfs4_file_rele(fp);
6919 		return;
6920 	}
6921 
6922 	/* try to get the sysid before continuing */
6923 	if ((status = rfs4_client_sysid(oo->ro_client, &sysid)) != NFS4_OK) {
6924 		resp->status = status;
6925 		rfs4_file_rele(fp);
6926 		/* Not a fully formed open; "close" it */
6927 		if (screate == TRUE)
6928 			rfs4_state_close(sp, FALSE, FALSE, cs->cr);
6929 		rfs4_state_rele(sp);
6930 		return;
6931 	}
6932 
6933 	/* Calculate the fflags for this OPEN. */
6934 	if (access & OPEN4_SHARE_ACCESS_READ)
6935 		fflags |= FREAD;
6936 	if (access & OPEN4_SHARE_ACCESS_WRITE)
6937 		fflags |= FWRITE;
6938 
6939 	rfs4_dbe_lock(sp->rs_dbe);
6940 
6941 	/*
6942 	 * Calculate the new deny and access mode that this open is adding to
6943 	 * the file for this open owner;
6944 	 */
6945 	open_d = (deny & ~sp->rs_open_deny);
6946 	open_a = (access & ~sp->rs_open_access);
6947 
6948 	/*
6949 	 * Calculate the new share access and share deny modes that this open
6950 	 * is adding to the file for this open owner;
6951 	 */
6952 	share_a = (access & ~sp->rs_share_access);
6953 	share_d = (deny & ~sp->rs_share_deny);
6954 
6955 	first_open = (sp->rs_open_access & OPEN4_SHARE_ACCESS_BOTH) == 0;
6956 
6957 	/*
6958 	 * Check to see the client has already sent an open for this
6959 	 * open owner on this file with the same share/deny modes.
6960 	 * If so, we don't need to check for a conflict and we don't
6961 	 * need to add another shrlock.  If not, then we need to
6962 	 * check for conflicts in deny and access before checking for
6963 	 * conflicts in delegation.  We don't want to recall a
6964 	 * delegation based on an open that will eventually fail based
6965 	 * on shares modes.
6966 	 */
6967 
6968 	if (share_a || share_d) {
6969 		if ((err = rfs4_share(sp, access, deny)) != 0) {
6970 			rfs4_dbe_unlock(sp->rs_dbe);
6971 			resp->status = err;
6972 
6973 			rfs4_file_rele(fp);
6974 			/* Not a fully formed open; "close" it */
6975 			if (screate == TRUE)
6976 				rfs4_state_close(sp, FALSE, FALSE, cs->cr);
6977 			rfs4_state_rele(sp);
6978 			return;
6979 		}
6980 	}
6981 
6982 	rfs4_dbe_lock(fp->rf_dbe);
6983 
6984 	/*
6985 	 * Check to see if this file is delegated and if so, if a
6986 	 * recall needs to be done.
6987 	 */
6988 	if (rfs4_check_recall(sp, access)) {
6989 		rfs4_dbe_unlock(fp->rf_dbe);
6990 		rfs4_dbe_unlock(sp->rs_dbe);
6991 		rfs4_recall_deleg(fp, FALSE, sp->rs_owner->ro_client);
6992 		delay(NFS4_DELEGATION_CONFLICT_DELAY);
6993 		rfs4_dbe_lock(sp->rs_dbe);
6994 
6995 		/* if state closed while lock was dropped */
6996 		if (sp->rs_closed) {
6997 			if (share_a || share_d)
6998 				(void) rfs4_unshare(sp);
6999 			rfs4_dbe_unlock(sp->rs_dbe);
7000 			rfs4_file_rele(fp);
7001 			/* Not a fully formed open; "close" it */
7002 			if (screate == TRUE)
7003 				rfs4_state_close(sp, FALSE, FALSE, cs->cr);
7004 			rfs4_state_rele(sp);
7005 			resp->status = NFS4ERR_OLD_STATEID;
7006 			return;
7007 		}
7008 
7009 		rfs4_dbe_lock(fp->rf_dbe);
7010 		/* Let's see if the delegation was returned */
7011 		if (rfs4_check_recall(sp, access)) {
7012 			rfs4_dbe_unlock(fp->rf_dbe);
7013 			if (share_a || share_d)
7014 				(void) rfs4_unshare(sp);
7015 			rfs4_dbe_unlock(sp->rs_dbe);
7016 			rfs4_file_rele(fp);
7017 			rfs4_update_lease(sp->rs_owner->ro_client);
7018 
7019 			/* Not a fully formed open; "close" it */
7020 			if (screate == TRUE)
7021 				rfs4_state_close(sp, FALSE, FALSE, cs->cr);
7022 			rfs4_state_rele(sp);
7023 			resp->status = NFS4ERR_DELAY;
7024 			return;
7025 		}
7026 	}
7027 	/*
7028 	 * the share check passed and any delegation conflict has been
7029 	 * taken care of, now call vop_open.
7030 	 * if this is the first open then call vop_open with fflags.
7031 	 * if not, call vn_open_upgrade with just the upgrade flags.
7032 	 *
7033 	 * if the file has been opened already, it will have the current
7034 	 * access mode in the state struct.  if it has no share access, then
7035 	 * this is a new open.
7036 	 *
7037 	 * However, if this is open with CLAIM_DLEGATE_CUR, then don't
7038 	 * call VOP_OPEN(), just do the open upgrade.
7039 	 */
7040 	if (first_open && !deleg_cur) {
7041 		ct.cc_sysid = sysid;
7042 		ct.cc_pid = rfs4_dbe_getid(sp->rs_owner->ro_dbe);
7043 		ct.cc_caller_id = nfs4_srv_caller_id;
7044 		ct.cc_flags = CC_DONTBLOCK;
7045 		err = VOP_OPEN(&cs->vp, fflags, cs->cr, &ct);
7046 		if (err) {
7047 			rfs4_dbe_unlock(fp->rf_dbe);
7048 			if (share_a || share_d)
7049 				(void) rfs4_unshare(sp);
7050 			rfs4_dbe_unlock(sp->rs_dbe);
7051 			rfs4_file_rele(fp);
7052 
7053 			/* Not a fully formed open; "close" it */
7054 			if (screate == TRUE)
7055 				rfs4_state_close(sp, FALSE, FALSE, cs->cr);
7056 			rfs4_state_rele(sp);
7057 			/* check if a monitor detected a delegation conflict */
7058 			if (err == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK))
7059 				resp->status = NFS4ERR_DELAY;
7060 			else
7061 				resp->status = NFS4ERR_SERVERFAULT;
7062 			return;
7063 		}
7064 	} else { /* open upgrade */
7065 		/*
7066 		 * calculate the fflags for the new mode that is being added
7067 		 * by this upgrade.
7068 		 */
7069 		fflags = 0;
7070 		if (open_a & OPEN4_SHARE_ACCESS_READ)
7071 			fflags |= FREAD;
7072 		if (open_a & OPEN4_SHARE_ACCESS_WRITE)
7073 			fflags |= FWRITE;
7074 		vn_open_upgrade(cs->vp, fflags);
7075 	}
7076 	sp->rs_open_access |= access;
7077 	sp->rs_open_deny |= deny;
7078 
7079 	if (open_d & OPEN4_SHARE_DENY_READ)
7080 		fp->rf_deny_read++;
7081 	if (open_d & OPEN4_SHARE_DENY_WRITE)
7082 		fp->rf_deny_write++;
7083 	fp->rf_share_deny |= deny;
7084 
7085 	if (open_a & OPEN4_SHARE_ACCESS_READ)
7086 		fp->rf_access_read++;
7087 	if (open_a & OPEN4_SHARE_ACCESS_WRITE)
7088 		fp->rf_access_write++;
7089 	fp->rf_share_access |= access;
7090 
7091 	/*
7092 	 * Check for delegation here. if the deleg argument is not
7093 	 * DELEG_ANY, then this is a reclaim from a client and
7094 	 * we must honor the delegation requested. If necessary we can
7095 	 * set the recall flag.
7096 	 */
7097 
7098 	dsp = rfs4_grant_delegation(deleg, sp, &recall);
7099 
7100 	cs->deleg = (fp->rf_dinfo.rd_dtype == OPEN_DELEGATE_WRITE);
7101 
7102 	next_stateid(&sp->rs_stateid);
7103 
7104 	resp->stateid = sp->rs_stateid.stateid;
7105 
7106 	rfs4_dbe_unlock(fp->rf_dbe);
7107 	rfs4_dbe_unlock(sp->rs_dbe);
7108 
7109 	if (dsp) {
7110 		rfs4_set_deleg_response(dsp, &resp->delegation, NULL, recall);
7111 		rfs4_deleg_state_rele(dsp);
7112 	}
7113 
7114 	rfs4_file_rele(fp);
7115 	rfs4_state_rele(sp);
7116 
7117 	resp->status = NFS4_OK;
7118 }
7119 
7120 /*ARGSUSED*/
7121 static void
7122 rfs4_do_opennull(struct compound_state *cs, struct svc_req *req,
7123     OPEN4args *args, rfs4_openowner_t *oo, OPEN4res *resp)
7124 {
7125 	change_info4 *cinfo = &resp->cinfo;
7126 	bitmap4 *attrset = &resp->attrset;
7127 
7128 	if (args->opentype == OPEN4_NOCREATE)
7129 		resp->status = rfs4_lookupfile(&args->open_claim4_u.file,
7130 		    req, cs, args->share_access, cinfo);
7131 	else {
7132 		/* inhibit delegation grants during exclusive create */
7133 
7134 		if (args->mode == EXCLUSIVE4)
7135 			rfs4_disable_delegation();
7136 
7137 		resp->status = rfs4_createfile(args, req, cs, cinfo, attrset,
7138 		    oo->ro_client->rc_clientid);
7139 	}
7140 
7141 	if (resp->status == NFS4_OK) {
7142 
7143 		/* cs->vp cs->fh now reference the desired file */
7144 
7145 		rfs4_do_open(cs, req, oo,
7146 		    oo->ro_need_confirm ? DELEG_NONE : DELEG_ANY,
7147 		    args->share_access, args->share_deny, resp, 0);
7148 
7149 		/*
7150 		 * If rfs4_createfile set attrset, we must
7151 		 * clear this attrset before the response is copied.
7152 		 */
7153 		if (resp->status != NFS4_OK && resp->attrset) {
7154 			resp->attrset = 0;
7155 		}
7156 	}
7157 	else
7158 		*cs->statusp = resp->status;
7159 
7160 	if (args->mode == EXCLUSIVE4)
7161 		rfs4_enable_delegation();
7162 }
7163 
7164 /*ARGSUSED*/
7165 static void
7166 rfs4_do_openprev(struct compound_state *cs, struct svc_req *req,
7167     OPEN4args *args, rfs4_openowner_t *oo, OPEN4res *resp)
7168 {
7169 	change_info4 *cinfo = &resp->cinfo;
7170 	vattr_t va;
7171 	vtype_t v_type = cs->vp->v_type;
7172 	int error = 0;
7173 
7174 	/* Verify that we have a regular file */
7175 	if (v_type != VREG) {
7176 		if (v_type == VDIR)
7177 			resp->status = NFS4ERR_ISDIR;
7178 		else if (v_type == VLNK)
7179 			resp->status = NFS4ERR_SYMLINK;
7180 		else
7181 			resp->status = NFS4ERR_INVAL;
7182 		return;
7183 	}
7184 
7185 	va.va_mask = AT_MODE|AT_UID;
7186 	error = VOP_GETATTR(cs->vp, &va, 0, cs->cr, NULL);
7187 	if (error) {
7188 		resp->status = puterrno4(error);
7189 		return;
7190 	}
7191 
7192 	cs->mandlock = MANDLOCK(cs->vp, va.va_mode);
7193 
7194 	/*
7195 	 * Check if we have access to the file, Note the the file
7196 	 * could have originally been open UNCHECKED or GUARDED
7197 	 * with mode bits that will now fail, but there is nothing
7198 	 * we can really do about that except in the case that the
7199 	 * owner of the file is the one requesting the open.
7200 	 */
7201 	if (crgetuid(cs->cr) != va.va_uid) {
7202 		resp->status = check_open_access(args->share_access, cs, req);
7203 		if (resp->status != NFS4_OK) {
7204 			return;
7205 		}
7206 	}
7207 
7208 	/*
7209 	 * cinfo on a CLAIM_PREVIOUS is undefined, initialize to zero
7210 	 */
7211 	cinfo->before = 0;
7212 	cinfo->after = 0;
7213 	cinfo->atomic = FALSE;
7214 
7215 	rfs4_do_open(cs, req, oo,
7216 	    NFS4_DELEG4TYPE2REQTYPE(args->open_claim4_u.delegate_type),
7217 	    args->share_access, args->share_deny, resp, 0);
7218 }
7219 
7220 static void
7221 rfs4_do_opendelcur(struct compound_state *cs, struct svc_req *req,
7222     OPEN4args *args, rfs4_openowner_t *oo, OPEN4res *resp)
7223 {
7224 	int error;
7225 	nfsstat4 status;
7226 	stateid4 stateid =
7227 	    args->open_claim4_u.delegate_cur_info.delegate_stateid;
7228 	rfs4_deleg_state_t *dsp;
7229 
7230 	/*
7231 	 * Find the state info from the stateid and confirm that the
7232 	 * file is delegated.  If the state openowner is the same as
7233 	 * the supplied openowner we're done. If not, get the file
7234 	 * info from the found state info. Use that file info to
7235 	 * create the state for this lock owner. Note solaris doen't
7236 	 * really need the pathname to find the file. We may want to
7237 	 * lookup the pathname and make sure that the vp exist and
7238 	 * matches the vp in the file structure. However it is
7239 	 * possible that the pathname nolonger exists (local process
7240 	 * unlinks the file), so this may not be that useful.
7241 	 */
7242 
7243 	status = rfs4_get_deleg_state(&stateid, &dsp);
7244 	if (status != NFS4_OK) {
7245 		resp->status = status;
7246 		return;
7247 	}
7248 
7249 	ASSERT(dsp->rds_finfo->rf_dinfo.rd_dtype != OPEN_DELEGATE_NONE);
7250 
7251 	/*
7252 	 * New lock owner, create state. Since this was probably called
7253 	 * in response to a CB_RECALL we set deleg to DELEG_NONE
7254 	 */
7255 
7256 	ASSERT(cs->vp != NULL);
7257 	VN_RELE(cs->vp);
7258 	VN_HOLD(dsp->rds_finfo->rf_vp);
7259 	cs->vp = dsp->rds_finfo->rf_vp;
7260 
7261 	if (error = makefh4(&cs->fh, cs->vp, cs->exi)) {
7262 		rfs4_deleg_state_rele(dsp);
7263 		*cs->statusp = resp->status = puterrno4(error);
7264 		return;
7265 	}
7266 
7267 	/* Mark progress for delegation returns */
7268 	dsp->rds_finfo->rf_dinfo.rd_time_lastwrite = gethrestime_sec();
7269 	rfs4_deleg_state_rele(dsp);
7270 	rfs4_do_open(cs, req, oo, DELEG_NONE,
7271 	    args->share_access, args->share_deny, resp, 1);
7272 }
7273 
7274 /*ARGSUSED*/
7275 static void
7276 rfs4_do_opendelprev(struct compound_state *cs, struct svc_req *req,
7277     OPEN4args *args, rfs4_openowner_t *oo, OPEN4res *resp)
7278 {
7279 	/*
7280 	 * Lookup the pathname, it must already exist since this file
7281 	 * was delegated.
7282 	 *
7283 	 * Find the file and state info for this vp and open owner pair.
7284 	 *	check that they are in fact delegated.
7285 	 *	check that the state access and deny modes are the same.
7286 	 *
7287 	 * Return the delgation possibly seting the recall flag.
7288 	 */
7289 	rfs4_file_t *fp;
7290 	rfs4_state_t *sp;
7291 	bool_t create = FALSE;
7292 	bool_t dcreate = FALSE;
7293 	rfs4_deleg_state_t *dsp;
7294 	nfsace4 *ace;
7295 
7296 	/* Note we ignore oflags */
7297 	resp->status = rfs4_lookupfile(&args->open_claim4_u.file_delegate_prev,
7298 	    req, cs, args->share_access, &resp->cinfo);
7299 
7300 	if (resp->status != NFS4_OK) {
7301 		return;
7302 	}
7303 
7304 	/* get the file struct and hold a lock on it during initial open */
7305 	fp = rfs4_findfile_withlock(cs->vp, NULL, &create);
7306 	if (fp == NULL) {
7307 		resp->status = NFS4ERR_RESOURCE;
7308 		DTRACE_PROBE1(nfss__e__do_opendelprev1, nfsstat4, resp->status);
7309 		return;
7310 	}
7311 
7312 	sp = rfs4_findstate_by_owner_file(oo, fp, &create);
7313 	if (sp == NULL) {
7314 		resp->status = NFS4ERR_SERVERFAULT;
7315 		DTRACE_PROBE1(nfss__e__do_opendelprev2, nfsstat4, resp->status);
7316 		rw_exit(&fp->rf_file_rwlock);
7317 		rfs4_file_rele(fp);
7318 		return;
7319 	}
7320 
7321 	rfs4_dbe_lock(sp->rs_dbe);
7322 	rfs4_dbe_lock(fp->rf_dbe);
7323 	if (args->share_access != sp->rs_share_access ||
7324 	    args->share_deny != sp->rs_share_deny ||
7325 	    sp->rs_finfo->rf_dinfo.rd_dtype == OPEN_DELEGATE_NONE) {
7326 		NFS4_DEBUG(rfs4_debug,
7327 		    (CE_NOTE, "rfs4_do_opendelprev: state mixup"));
7328 		rfs4_dbe_unlock(fp->rf_dbe);
7329 		rfs4_dbe_unlock(sp->rs_dbe);
7330 		rfs4_file_rele(fp);
7331 		rfs4_state_rele(sp);
7332 		resp->status = NFS4ERR_SERVERFAULT;
7333 		return;
7334 	}
7335 	rfs4_dbe_unlock(fp->rf_dbe);
7336 	rfs4_dbe_unlock(sp->rs_dbe);
7337 
7338 	dsp = rfs4_finddeleg(sp, &dcreate);
7339 	if (dsp == NULL) {
7340 		rfs4_state_rele(sp);
7341 		rfs4_file_rele(fp);
7342 		resp->status = NFS4ERR_SERVERFAULT;
7343 		return;
7344 	}
7345 
7346 	next_stateid(&sp->rs_stateid);
7347 
7348 	resp->stateid = sp->rs_stateid.stateid;
7349 
7350 	resp->delegation.delegation_type = dsp->rds_dtype;
7351 
7352 	if (dsp->rds_dtype == OPEN_DELEGATE_READ) {
7353 		open_read_delegation4 *rv =
7354 		    &resp->delegation.open_delegation4_u.read;
7355 
7356 		rv->stateid = dsp->rds_delegid.stateid;
7357 		rv->recall = FALSE; /* no policy in place to set to TRUE */
7358 		ace = &rv->permissions;
7359 	} else {
7360 		open_write_delegation4 *rv =
7361 		    &resp->delegation.open_delegation4_u.write;
7362 
7363 		rv->stateid = dsp->rds_delegid.stateid;
7364 		rv->recall = FALSE;  /* no policy in place to set to TRUE */
7365 		ace = &rv->permissions;
7366 		rv->space_limit.limitby = NFS_LIMIT_SIZE;
7367 		rv->space_limit.nfs_space_limit4_u.filesize = UINT64_MAX;
7368 	}
7369 
7370 	/* XXX For now */
7371 	ace->type = ACE4_ACCESS_ALLOWED_ACE_TYPE;
7372 	ace->flag = 0;
7373 	ace->access_mask = 0;
7374 	ace->who.utf8string_len = 0;
7375 	ace->who.utf8string_val = 0;
7376 
7377 	rfs4_deleg_state_rele(dsp);
7378 	rfs4_state_rele(sp);
7379 	rfs4_file_rele(fp);
7380 }
7381 
7382 typedef enum {
7383 	NFS4_CHKSEQ_OKAY = 0,
7384 	NFS4_CHKSEQ_REPLAY = 1,
7385 	NFS4_CHKSEQ_BAD = 2
7386 } rfs4_chkseq_t;
7387 
7388 /*
7389  * Generic function for sequence number checks.
7390  */
7391 static rfs4_chkseq_t
7392 rfs4_check_seqid(seqid4 seqid, nfs_resop4 *lastop,
7393     seqid4 rqst_seq, nfs_resop4 *resop, bool_t copyres)
7394 {
7395 	/* Same sequence ids and matching operations? */
7396 	if (seqid == rqst_seq && resop->resop == lastop->resop) {
7397 		if (copyres == TRUE) {
7398 			rfs4_free_reply(resop);
7399 			rfs4_copy_reply(resop, lastop);
7400 		}
7401 		NFS4_DEBUG(rfs4_debug, (CE_NOTE,
7402 		    "Replayed SEQID %d\n", seqid));
7403 		return (NFS4_CHKSEQ_REPLAY);
7404 	}
7405 
7406 	/* If the incoming sequence is not the next expected then it is bad */
7407 	if (rqst_seq != seqid + 1) {
7408 		if (rqst_seq == seqid) {
7409 			NFS4_DEBUG(rfs4_debug,
7410 			    (CE_NOTE, "BAD SEQID: Replayed sequence id "
7411 			    "but last op was %d current op is %d\n",
7412 			    lastop->resop, resop->resop));
7413 			return (NFS4_CHKSEQ_BAD);
7414 		}
7415 		NFS4_DEBUG(rfs4_debug,
7416 		    (CE_NOTE, "BAD SEQID: got %u expecting %u\n",
7417 		    rqst_seq, seqid));
7418 		return (NFS4_CHKSEQ_BAD);
7419 	}
7420 
7421 	/* Everything okay -- next expected */
7422 	return (NFS4_CHKSEQ_OKAY);
7423 }
7424 
7425 
7426 static rfs4_chkseq_t
7427 rfs4_check_open_seqid(seqid4 seqid, rfs4_openowner_t *op, nfs_resop4 *resop)
7428 {
7429 	rfs4_chkseq_t rc;
7430 
7431 	rfs4_dbe_lock(op->ro_dbe);
7432 	rc = rfs4_check_seqid(op->ro_open_seqid, &op->ro_reply, seqid, resop,
7433 	    TRUE);
7434 	rfs4_dbe_unlock(op->ro_dbe);
7435 
7436 	if (rc == NFS4_CHKSEQ_OKAY)
7437 		rfs4_update_lease(op->ro_client);
7438 
7439 	return (rc);
7440 }
7441 
7442 static rfs4_chkseq_t
7443 rfs4_check_olo_seqid(seqid4 olo_seqid, rfs4_openowner_t *op, nfs_resop4 *resop)
7444 {
7445 	rfs4_chkseq_t rc;
7446 
7447 	rfs4_dbe_lock(op->ro_dbe);
7448 	rc = rfs4_check_seqid(op->ro_open_seqid, &op->ro_reply,
7449 	    olo_seqid, resop, FALSE);
7450 	rfs4_dbe_unlock(op->ro_dbe);
7451 
7452 	return (rc);
7453 }
7454 
7455 static rfs4_chkseq_t
7456 rfs4_check_lock_seqid(seqid4 seqid, rfs4_lo_state_t *lsp, nfs_resop4 *resop)
7457 {
7458 	rfs4_chkseq_t rc = NFS4_CHKSEQ_OKAY;
7459 
7460 	rfs4_dbe_lock(lsp->rls_dbe);
7461 	if (!lsp->rls_skip_seqid_check)
7462 		rc = rfs4_check_seqid(lsp->rls_seqid, &lsp->rls_reply, seqid,
7463 		    resop, TRUE);
7464 	rfs4_dbe_unlock(lsp->rls_dbe);
7465 
7466 	return (rc);
7467 }
7468 
7469 static void
7470 rfs4_op_open(nfs_argop4 *argop, nfs_resop4 *resop,
7471     struct svc_req *req, struct compound_state *cs)
7472 {
7473 	OPEN4args *args = &argop->nfs_argop4_u.opopen;
7474 	OPEN4res *resp = &resop->nfs_resop4_u.opopen;
7475 	open_owner4 *owner = &args->owner;
7476 	open_claim_type4 claim = args->claim;
7477 	rfs4_client_t *cp;
7478 	rfs4_openowner_t *oo;
7479 	bool_t create;
7480 	bool_t replay = FALSE;
7481 	int can_reclaim;
7482 
7483 	DTRACE_NFSV4_2(op__open__start, struct compound_state *, cs,
7484 	    OPEN4args *, args);
7485 
7486 	if (cs->vp == NULL) {
7487 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
7488 		goto end;
7489 	}
7490 
7491 	/*
7492 	 * Need to check clientid and lease expiration first based on
7493 	 * error ordering and incrementing sequence id.
7494 	 */
7495 	cp = rfs4_findclient_by_id(owner->clientid, FALSE);
7496 	if (cp == NULL) {
7497 		*cs->statusp = resp->status =
7498 		    rfs4_check_clientid(&owner->clientid, 0);
7499 		goto end;
7500 	}
7501 
7502 	if (rfs4_lease_expired(cp)) {
7503 		rfs4_client_close(cp);
7504 		*cs->statusp = resp->status = NFS4ERR_EXPIRED;
7505 		goto end;
7506 	}
7507 	can_reclaim = cp->rc_can_reclaim;
7508 
7509 	/*
7510 	 * Find the open_owner for use from this point forward.  Take
7511 	 * care in updating the sequence id based on the type of error
7512 	 * being returned.
7513 	 */
7514 retry:
7515 	create = TRUE;
7516 	oo = rfs4_findopenowner(owner, &create, args->seqid);
7517 	if (oo == NULL) {
7518 		*cs->statusp = resp->status = NFS4ERR_RESOURCE;
7519 		rfs4_client_rele(cp);
7520 		goto end;
7521 	}
7522 
7523 	/* Hold off access to the sequence space while the open is done */
7524 	rfs4_sw_enter(&oo->ro_sw);
7525 
7526 	/*
7527 	 * If the open_owner existed before at the server, then check
7528 	 * the sequence id.
7529 	 */
7530 	if (!create && !oo->ro_postpone_confirm) {
7531 		switch (rfs4_check_open_seqid(args->seqid, oo, resop)) {
7532 		case NFS4_CHKSEQ_BAD:
7533 			if ((args->seqid > oo->ro_open_seqid) &&
7534 			    oo->ro_need_confirm) {
7535 				rfs4_free_opens(oo, TRUE, FALSE);
7536 				rfs4_sw_exit(&oo->ro_sw);
7537 				rfs4_openowner_rele(oo);
7538 				goto retry;
7539 			}
7540 			resp->status = NFS4ERR_BAD_SEQID;
7541 			goto out;
7542 		case NFS4_CHKSEQ_REPLAY: /* replay of previous request */
7543 			replay = TRUE;
7544 			goto out;
7545 		default:
7546 			break;
7547 		}
7548 
7549 		/*
7550 		 * Sequence was ok and open owner exists
7551 		 * check to see if we have yet to see an
7552 		 * open_confirm.
7553 		 */
7554 		if (oo->ro_need_confirm) {
7555 			rfs4_free_opens(oo, TRUE, FALSE);
7556 			rfs4_sw_exit(&oo->ro_sw);
7557 			rfs4_openowner_rele(oo);
7558 			goto retry;
7559 		}
7560 	}
7561 	/* Grace only applies to regular-type OPENs */
7562 	if (rfs4_clnt_in_grace(cp) &&
7563 	    (claim == CLAIM_NULL || claim == CLAIM_DELEGATE_CUR)) {
7564 		*cs->statusp = resp->status = NFS4ERR_GRACE;
7565 		goto out;
7566 	}
7567 
7568 	/*
7569 	 * If previous state at the server existed then can_reclaim
7570 	 * will be set. If not reply NFS4ERR_NO_GRACE to the
7571 	 * client.
7572 	 */
7573 	if (rfs4_clnt_in_grace(cp) && claim == CLAIM_PREVIOUS && !can_reclaim) {
7574 		*cs->statusp = resp->status = NFS4ERR_NO_GRACE;
7575 		goto out;
7576 	}
7577 
7578 
7579 	/*
7580 	 * Reject the open if the client has missed the grace period
7581 	 */
7582 	if (!rfs4_clnt_in_grace(cp) && claim == CLAIM_PREVIOUS) {
7583 		*cs->statusp = resp->status = NFS4ERR_NO_GRACE;
7584 		goto out;
7585 	}
7586 
7587 	/* Couple of up-front bookkeeping items */
7588 	if (oo->ro_need_confirm) {
7589 		/*
7590 		 * If this is a reclaim OPEN then we should not ask
7591 		 * for a confirmation of the open_owner per the
7592 		 * protocol specification.
7593 		 */
7594 		if (claim == CLAIM_PREVIOUS)
7595 			oo->ro_need_confirm = FALSE;
7596 		else
7597 			resp->rflags |= OPEN4_RESULT_CONFIRM;
7598 	}
7599 	resp->rflags |= OPEN4_RESULT_LOCKTYPE_POSIX;
7600 
7601 	/*
7602 	 * If there is an unshared filesystem mounted on this vnode,
7603 	 * do not allow to open/create in this directory.
7604 	 */
7605 	if (vn_ismntpt(cs->vp)) {
7606 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
7607 		goto out;
7608 	}
7609 
7610 	/*
7611 	 * access must READ, WRITE, or BOTH.  No access is invalid.
7612 	 * deny can be READ, WRITE, BOTH, or NONE.
7613 	 * bits not defined for access/deny are invalid.
7614 	 */
7615 	if (! (args->share_access & OPEN4_SHARE_ACCESS_BOTH) ||
7616 	    (args->share_access & ~OPEN4_SHARE_ACCESS_BOTH) ||
7617 	    (args->share_deny & ~OPEN4_SHARE_DENY_BOTH)) {
7618 		*cs->statusp = resp->status = NFS4ERR_INVAL;
7619 		goto out;
7620 	}
7621 
7622 
7623 	/*
7624 	 * make sure attrset is zero before response is built.
7625 	 */
7626 	resp->attrset = 0;
7627 
7628 	switch (claim) {
7629 	case CLAIM_NULL:
7630 		rfs4_do_opennull(cs, req, args, oo, resp);
7631 		break;
7632 	case CLAIM_PREVIOUS:
7633 		rfs4_do_openprev(cs, req, args, oo, resp);
7634 		break;
7635 	case CLAIM_DELEGATE_CUR:
7636 		rfs4_do_opendelcur(cs, req, args, oo, resp);
7637 		break;
7638 	case CLAIM_DELEGATE_PREV:
7639 		rfs4_do_opendelprev(cs, req, args, oo, resp);
7640 		break;
7641 	default:
7642 		resp->status = NFS4ERR_INVAL;
7643 		break;
7644 	}
7645 
7646 out:
7647 	rfs4_client_rele(cp);
7648 
7649 	/* Catch sequence id handling here to make it a little easier */
7650 	switch (resp->status) {
7651 	case NFS4ERR_BADXDR:
7652 	case NFS4ERR_BAD_SEQID:
7653 	case NFS4ERR_BAD_STATEID:
7654 	case NFS4ERR_NOFILEHANDLE:
7655 	case NFS4ERR_RESOURCE:
7656 	case NFS4ERR_STALE_CLIENTID:
7657 	case NFS4ERR_STALE_STATEID:
7658 		/*
7659 		 * The protocol states that if any of these errors are
7660 		 * being returned, the sequence id should not be
7661 		 * incremented.  Any other return requires an
7662 		 * increment.
7663 		 */
7664 		break;
7665 	default:
7666 		/* Always update the lease in this case */
7667 		rfs4_update_lease(oo->ro_client);
7668 
7669 		/* Regular response - copy the result */
7670 		if (!replay)
7671 			rfs4_update_open_resp(oo, resop, &cs->fh);
7672 
7673 		/*
7674 		 * REPLAY case: Only if the previous response was OK
7675 		 * do we copy the filehandle.  If not OK, no
7676 		 * filehandle to copy.
7677 		 */
7678 		if (replay == TRUE &&
7679 		    resp->status == NFS4_OK &&
7680 		    oo->ro_reply_fh.nfs_fh4_val) {
7681 			/*
7682 			 * If this is a replay, we must restore the
7683 			 * current filehandle/vp to that of what was
7684 			 * returned originally.  Try our best to do
7685 			 * it.
7686 			 */
7687 			nfs_fh4_fmt_t *fh_fmtp =
7688 			    (nfs_fh4_fmt_t *)oo->ro_reply_fh.nfs_fh4_val;
7689 
7690 			if (cs->exi)
7691 				exi_rele(cs->exi);
7692 			cs->exi = checkexport(&fh_fmtp->fh4_fsid,
7693 			    (fid_t *)&fh_fmtp->fh4_xlen, NULL);
7694 
7695 			if (cs->exi == NULL) {
7696 				resp->status = NFS4ERR_STALE;
7697 				goto finish;
7698 			}
7699 
7700 			VN_RELE(cs->vp);
7701 
7702 			cs->vp = nfs4_fhtovp(&oo->ro_reply_fh, cs->exi,
7703 			    &resp->status);
7704 
7705 			if (cs->vp == NULL)
7706 				goto finish;
7707 
7708 			nfs_fh4_copy(&oo->ro_reply_fh, &cs->fh);
7709 		}
7710 
7711 		/*
7712 		 * If this was a replay, no need to update the
7713 		 * sequence id. If the open_owner was not created on
7714 		 * this pass, then update.  The first use of an
7715 		 * open_owner will not bump the sequence id.
7716 		 */
7717 		if (replay == FALSE && !create)
7718 			rfs4_update_open_sequence(oo);
7719 		/*
7720 		 * If the client is receiving an error and the
7721 		 * open_owner needs to be confirmed, there is no way
7722 		 * to notify the client of this fact ignoring the fact
7723 		 * that the server has no method of returning a
7724 		 * stateid to confirm.  Therefore, the server needs to
7725 		 * mark this open_owner in a way as to avoid the
7726 		 * sequence id checking the next time the client uses
7727 		 * this open_owner.
7728 		 */
7729 		if (resp->status != NFS4_OK && oo->ro_need_confirm)
7730 			oo->ro_postpone_confirm = TRUE;
7731 		/*
7732 		 * If OK response then clear the postpone flag and
7733 		 * reset the sequence id to keep in sync with the
7734 		 * client.
7735 		 */
7736 		if (resp->status == NFS4_OK && oo->ro_postpone_confirm) {
7737 			oo->ro_postpone_confirm = FALSE;
7738 			oo->ro_open_seqid = args->seqid;
7739 		}
7740 		break;
7741 	}
7742 
7743 finish:
7744 	*cs->statusp = resp->status;
7745 
7746 	rfs4_sw_exit(&oo->ro_sw);
7747 	rfs4_openowner_rele(oo);
7748 
7749 end:
7750 	DTRACE_NFSV4_2(op__open__done, struct compound_state *, cs,
7751 	    OPEN4res *, resp);
7752 }
7753 
7754 /*ARGSUSED*/
7755 void
7756 rfs4_op_open_confirm(nfs_argop4 *argop, nfs_resop4 *resop,
7757     struct svc_req *req, struct compound_state *cs)
7758 {
7759 	OPEN_CONFIRM4args *args = &argop->nfs_argop4_u.opopen_confirm;
7760 	OPEN_CONFIRM4res *resp = &resop->nfs_resop4_u.opopen_confirm;
7761 	rfs4_state_t *sp;
7762 	nfsstat4 status;
7763 
7764 	DTRACE_NFSV4_2(op__open__confirm__start, struct compound_state *, cs,
7765 	    OPEN_CONFIRM4args *, args);
7766 
7767 	if (cs->vp == NULL) {
7768 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
7769 		goto out;
7770 	}
7771 
7772 	if (cs->vp->v_type != VREG) {
7773 		*cs->statusp = resp->status =
7774 		    cs->vp->v_type == VDIR ? NFS4ERR_ISDIR : NFS4ERR_INVAL;
7775 		return;
7776 	}
7777 
7778 	status = rfs4_get_state(&args->open_stateid, &sp, RFS4_DBS_VALID);
7779 	if (status != NFS4_OK) {
7780 		*cs->statusp = resp->status = status;
7781 		goto out;
7782 	}
7783 
7784 	/* Ensure specified filehandle matches */
7785 	if (cs->vp != sp->rs_finfo->rf_vp) {
7786 		rfs4_state_rele(sp);
7787 		*cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7788 		goto out;
7789 	}
7790 
7791 	/* hold off other access to open_owner while we tinker */
7792 	rfs4_sw_enter(&sp->rs_owner->ro_sw);
7793 
7794 	switch (rfs4_check_stateid_seqid(sp, &args->open_stateid)) {
7795 	case NFS4_CHECK_STATEID_OKAY:
7796 		if (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
7797 		    resop) != 0) {
7798 			*cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
7799 			break;
7800 		}
7801 		/*
7802 		 * If it is the appropriate stateid and determined to
7803 		 * be "OKAY" then this means that the stateid does not
7804 		 * need to be confirmed and the client is in error for
7805 		 * sending an OPEN_CONFIRM.
7806 		 */
7807 		*cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7808 		break;
7809 	case NFS4_CHECK_STATEID_OLD:
7810 		*cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
7811 		break;
7812 	case NFS4_CHECK_STATEID_BAD:
7813 		*cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7814 		break;
7815 	case NFS4_CHECK_STATEID_EXPIRED:
7816 		*cs->statusp = resp->status = NFS4ERR_EXPIRED;
7817 		break;
7818 	case NFS4_CHECK_STATEID_CLOSED:
7819 		*cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
7820 		break;
7821 	case NFS4_CHECK_STATEID_REPLAY:
7822 		switch (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
7823 		    resop)) {
7824 		case NFS4_CHKSEQ_OKAY:
7825 			/*
7826 			 * This is replayed stateid; if seqid matches
7827 			 * next expected, then client is using wrong seqid.
7828 			 */
7829 			/* fall through */
7830 		case NFS4_CHKSEQ_BAD:
7831 			*cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
7832 			break;
7833 		case NFS4_CHKSEQ_REPLAY:
7834 			/*
7835 			 * Note this case is the duplicate case so
7836 			 * resp->status is already set.
7837 			 */
7838 			*cs->statusp = resp->status;
7839 			rfs4_update_lease(sp->rs_owner->ro_client);
7840 			break;
7841 		}
7842 		break;
7843 	case NFS4_CHECK_STATEID_UNCONFIRMED:
7844 		if (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
7845 		    resop) != NFS4_CHKSEQ_OKAY) {
7846 			*cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
7847 			break;
7848 		}
7849 		*cs->statusp = resp->status = NFS4_OK;
7850 
7851 		next_stateid(&sp->rs_stateid);
7852 		resp->open_stateid = sp->rs_stateid.stateid;
7853 		sp->rs_owner->ro_need_confirm = FALSE;
7854 		rfs4_update_lease(sp->rs_owner->ro_client);
7855 		rfs4_update_open_sequence(sp->rs_owner);
7856 		rfs4_update_open_resp(sp->rs_owner, resop, NULL);
7857 		break;
7858 	default:
7859 		ASSERT(FALSE);
7860 		*cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
7861 		break;
7862 	}
7863 	rfs4_sw_exit(&sp->rs_owner->ro_sw);
7864 	rfs4_state_rele(sp);
7865 
7866 out:
7867 	DTRACE_NFSV4_2(op__open__confirm__done, struct compound_state *, cs,
7868 	    OPEN_CONFIRM4res *, resp);
7869 }
7870 
7871 /*ARGSUSED*/
7872 void
7873 rfs4_op_open_downgrade(nfs_argop4 *argop, nfs_resop4 *resop,
7874     struct svc_req *req, struct compound_state *cs)
7875 {
7876 	OPEN_DOWNGRADE4args *args = &argop->nfs_argop4_u.opopen_downgrade;
7877 	OPEN_DOWNGRADE4res *resp = &resop->nfs_resop4_u.opopen_downgrade;
7878 	uint32_t access = args->share_access;
7879 	uint32_t deny = args->share_deny;
7880 	nfsstat4 status;
7881 	rfs4_state_t *sp;
7882 	rfs4_file_t *fp;
7883 	int fflags = 0;
7884 
7885 	DTRACE_NFSV4_2(op__open__downgrade__start, struct compound_state *, cs,
7886 	    OPEN_DOWNGRADE4args *, args);
7887 
7888 	if (cs->vp == NULL) {
7889 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
7890 		goto out;
7891 	}
7892 
7893 	if (cs->vp->v_type != VREG) {
7894 		*cs->statusp = resp->status = NFS4ERR_INVAL;
7895 		return;
7896 	}
7897 
7898 	status = rfs4_get_state(&args->open_stateid, &sp, RFS4_DBS_VALID);
7899 	if (status != NFS4_OK) {
7900 		*cs->statusp = resp->status = status;
7901 		goto out;
7902 	}
7903 
7904 	/* Ensure specified filehandle matches */
7905 	if (cs->vp != sp->rs_finfo->rf_vp) {
7906 		rfs4_state_rele(sp);
7907 		*cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7908 		goto out;
7909 	}
7910 
7911 	/* hold off other access to open_owner while we tinker */
7912 	rfs4_sw_enter(&sp->rs_owner->ro_sw);
7913 
7914 	switch (rfs4_check_stateid_seqid(sp, &args->open_stateid)) {
7915 	case NFS4_CHECK_STATEID_OKAY:
7916 		if (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
7917 		    resop) != NFS4_CHKSEQ_OKAY) {
7918 			*cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
7919 			goto end;
7920 		}
7921 		break;
7922 	case NFS4_CHECK_STATEID_OLD:
7923 		*cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
7924 		goto end;
7925 	case NFS4_CHECK_STATEID_BAD:
7926 		*cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7927 		goto end;
7928 	case NFS4_CHECK_STATEID_EXPIRED:
7929 		*cs->statusp = resp->status = NFS4ERR_EXPIRED;
7930 		goto end;
7931 	case NFS4_CHECK_STATEID_CLOSED:
7932 		*cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
7933 		goto end;
7934 	case NFS4_CHECK_STATEID_UNCONFIRMED:
7935 		*cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7936 		goto end;
7937 	case NFS4_CHECK_STATEID_REPLAY:
7938 		/* Check the sequence id for the open owner */
7939 		switch (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
7940 		    resop)) {
7941 		case NFS4_CHKSEQ_OKAY:
7942 			/*
7943 			 * This is replayed stateid; if seqid matches
7944 			 * next expected, then client is using wrong seqid.
7945 			 */
7946 			/* fall through */
7947 		case NFS4_CHKSEQ_BAD:
7948 			*cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
7949 			goto end;
7950 		case NFS4_CHKSEQ_REPLAY:
7951 			/*
7952 			 * Note this case is the duplicate case so
7953 			 * resp->status is already set.
7954 			 */
7955 			*cs->statusp = resp->status;
7956 			rfs4_update_lease(sp->rs_owner->ro_client);
7957 			goto end;
7958 		}
7959 		break;
7960 	default:
7961 		ASSERT(FALSE);
7962 		break;
7963 	}
7964 
7965 	rfs4_dbe_lock(sp->rs_dbe);
7966 	/*
7967 	 * Check that the new access modes and deny modes are valid.
7968 	 * Check that no invalid bits are set.
7969 	 */
7970 	if ((access & ~(OPEN4_SHARE_ACCESS_READ | OPEN4_SHARE_ACCESS_WRITE)) ||
7971 	    (deny & ~(OPEN4_SHARE_DENY_READ | OPEN4_SHARE_DENY_WRITE))) {
7972 		*cs->statusp = resp->status = NFS4ERR_INVAL;
7973 		rfs4_update_open_sequence(sp->rs_owner);
7974 		rfs4_dbe_unlock(sp->rs_dbe);
7975 		goto end;
7976 	}
7977 
7978 	/*
7979 	 * The new modes must be a subset of the current modes and
7980 	 * the access must specify at least one mode. To test that
7981 	 * the new mode is a subset of the current modes we bitwise
7982 	 * AND them together and check that the result equals the new
7983 	 * mode. For example:
7984 	 * New mode, access == R and current mode, sp->rs_open_access  == RW
7985 	 * access & sp->rs_open_access == R == access, so the new access mode
7986 	 * is valid. Consider access == RW, sp->rs_open_access = R
7987 	 * access & sp->rs_open_access == R != access, so the new access mode
7988 	 * is invalid.
7989 	 */
7990 	if ((access & sp->rs_open_access) != access ||
7991 	    (deny & sp->rs_open_deny) != deny ||
7992 	    (access &
7993 	    (OPEN4_SHARE_ACCESS_READ | OPEN4_SHARE_ACCESS_WRITE)) == 0) {
7994 		*cs->statusp = resp->status = NFS4ERR_INVAL;
7995 		rfs4_update_open_sequence(sp->rs_owner);
7996 		rfs4_dbe_unlock(sp->rs_dbe);
7997 		goto end;
7998 	}
7999 
8000 	/*
8001 	 * Release any share locks associated with this stateID.
8002 	 * Strictly speaking, this violates the spec because the
8003 	 * spec effectively requires that open downgrade be atomic.
8004 	 * At present, fs_shrlock does not have this capability.
8005 	 */
8006 	(void) rfs4_unshare(sp);
8007 
8008 	status = rfs4_share(sp, access, deny);
8009 	if (status != NFS4_OK) {
8010 		*cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
8011 		rfs4_update_open_sequence(sp->rs_owner);
8012 		rfs4_dbe_unlock(sp->rs_dbe);
8013 		goto end;
8014 	}
8015 
8016 	fp = sp->rs_finfo;
8017 	rfs4_dbe_lock(fp->rf_dbe);
8018 
8019 	/*
8020 	 * If the current mode has deny read and the new mode
8021 	 * does not, decrement the number of deny read mode bits
8022 	 * and if it goes to zero turn off the deny read bit
8023 	 * on the file.
8024 	 */
8025 	if ((sp->rs_open_deny & OPEN4_SHARE_DENY_READ) &&
8026 	    (deny & OPEN4_SHARE_DENY_READ) == 0) {
8027 		fp->rf_deny_read--;
8028 		if (fp->rf_deny_read == 0)
8029 			fp->rf_share_deny &= ~OPEN4_SHARE_DENY_READ;
8030 	}
8031 
8032 	/*
8033 	 * If the current mode has deny write and the new mode
8034 	 * does not, decrement the number of deny write mode bits
8035 	 * and if it goes to zero turn off the deny write bit
8036 	 * on the file.
8037 	 */
8038 	if ((sp->rs_open_deny & OPEN4_SHARE_DENY_WRITE) &&
8039 	    (deny & OPEN4_SHARE_DENY_WRITE) == 0) {
8040 		fp->rf_deny_write--;
8041 		if (fp->rf_deny_write == 0)
8042 			fp->rf_share_deny &= ~OPEN4_SHARE_DENY_WRITE;
8043 	}
8044 
8045 	/*
8046 	 * If the current mode has access read and the new mode
8047 	 * does not, decrement the number of access read mode bits
8048 	 * and if it goes to zero turn off the access read bit
8049 	 * on the file.  set fflags to FREAD for the call to
8050 	 * vn_open_downgrade().
8051 	 */
8052 	if ((sp->rs_open_access & OPEN4_SHARE_ACCESS_READ) &&
8053 	    (access & OPEN4_SHARE_ACCESS_READ) == 0) {
8054 		fp->rf_access_read--;
8055 		if (fp->rf_access_read == 0)
8056 			fp->rf_share_access &= ~OPEN4_SHARE_ACCESS_READ;
8057 		fflags |= FREAD;
8058 	}
8059 
8060 	/*
8061 	 * If the current mode has access write and the new mode
8062 	 * does not, decrement the number of access write mode bits
8063 	 * and if it goes to zero turn off the access write bit
8064 	 * on the file.  set fflags to FWRITE for the call to
8065 	 * vn_open_downgrade().
8066 	 */
8067 	if ((sp->rs_open_access & OPEN4_SHARE_ACCESS_WRITE) &&
8068 	    (access & OPEN4_SHARE_ACCESS_WRITE) == 0) {
8069 		fp->rf_access_write--;
8070 		if (fp->rf_access_write == 0)
8071 			fp->rf_share_deny &= ~OPEN4_SHARE_ACCESS_WRITE;
8072 		fflags |= FWRITE;
8073 	}
8074 
8075 	/* Check that the file is still accessible */
8076 	ASSERT(fp->rf_share_access);
8077 
8078 	rfs4_dbe_unlock(fp->rf_dbe);
8079 
8080 	/* now set the new open access and deny modes */
8081 	sp->rs_open_access = access;
8082 	sp->rs_open_deny = deny;
8083 
8084 	/*
8085 	 * we successfully downgraded the share lock, now we need to downgrade
8086 	 * the open. it is possible that the downgrade was only for a deny
8087 	 * mode and we have nothing else to do.
8088 	 */
8089 	if ((fflags & (FREAD|FWRITE)) != 0)
8090 		vn_open_downgrade(cs->vp, fflags);
8091 
8092 	/* Update the stateid */
8093 	next_stateid(&sp->rs_stateid);
8094 	resp->open_stateid = sp->rs_stateid.stateid;
8095 
8096 	rfs4_dbe_unlock(sp->rs_dbe);
8097 
8098 	*cs->statusp = resp->status = NFS4_OK;
8099 	/* Update the lease */
8100 	rfs4_update_lease(sp->rs_owner->ro_client);
8101 	/* And the sequence */
8102 	rfs4_update_open_sequence(sp->rs_owner);
8103 	rfs4_update_open_resp(sp->rs_owner, resop, NULL);
8104 
8105 end:
8106 	rfs4_sw_exit(&sp->rs_owner->ro_sw);
8107 	rfs4_state_rele(sp);
8108 out:
8109 	DTRACE_NFSV4_2(op__open__downgrade__done, struct compound_state *, cs,
8110 	    OPEN_DOWNGRADE4res *, resp);
8111 }
8112 
8113 static void *
8114 memstr(const void *s1, const char *s2, size_t n)
8115 {
8116 	size_t l = strlen(s2);
8117 	char *p = (char *)s1;
8118 
8119 	while (n >= l) {
8120 		if (bcmp(p, s2, l) == 0)
8121 			return (p);
8122 		p++;
8123 		n--;
8124 	}
8125 
8126 	return (NULL);
8127 }
8128 
8129 /*
8130  * The logic behind this function is detailed in the NFSv4 RFC in the
8131  * SETCLIENTID operation description under IMPLEMENTATION.  Refer to
8132  * that section for explicit guidance to server behavior for
8133  * SETCLIENTID.
8134  */
8135 void
8136 rfs4_op_setclientid(nfs_argop4 *argop, nfs_resop4 *resop,
8137     struct svc_req *req, struct compound_state *cs)
8138 {
8139 	SETCLIENTID4args *args = &argop->nfs_argop4_u.opsetclientid;
8140 	SETCLIENTID4res *res = &resop->nfs_resop4_u.opsetclientid;
8141 	rfs4_client_t *cp, *newcp, *cp_confirmed, *cp_unconfirmed;
8142 	rfs4_clntip_t *ci;
8143 	bool_t create;
8144 	char *addr, *netid;
8145 	int len;
8146 
8147 	DTRACE_NFSV4_2(op__setclientid__start, struct compound_state *, cs,
8148 	    SETCLIENTID4args *, args);
8149 retry:
8150 	newcp = cp_confirmed = cp_unconfirmed = NULL;
8151 
8152 	/*
8153 	 * Save the caller's IP address
8154 	 */
8155 	args->client.cl_addr =
8156 	    (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
8157 
8158 	/*
8159 	 * Record if it is a Solaris client that cannot handle referrals.
8160 	 */
8161 	if (memstr(args->client.id_val, "Solaris", args->client.id_len) &&
8162 	    !memstr(args->client.id_val, "+referrals", args->client.id_len)) {
8163 		/* Add a "yes, it's downrev" record */
8164 		create = TRUE;
8165 		ci = rfs4_find_clntip(args->client.cl_addr, &create);
8166 		ASSERT(ci != NULL);
8167 		rfs4_dbe_rele(ci->ri_dbe);
8168 	} else {
8169 		/* Remove any previous record */
8170 		rfs4_invalidate_clntip(args->client.cl_addr);
8171 	}
8172 
8173 	/*
8174 	 * In search of an EXISTING client matching the incoming
8175 	 * request to establish a new client identifier at the server
8176 	 */
8177 	create = TRUE;
8178 	cp = rfs4_findclient(&args->client, &create, NULL);
8179 
8180 	/* Should never happen */
8181 	ASSERT(cp != NULL);
8182 
8183 	if (cp == NULL) {
8184 		*cs->statusp = res->status = NFS4ERR_SERVERFAULT;
8185 		goto out;
8186 	}
8187 
8188 	/*
8189 	 * Easiest case. Client identifier is newly created and is
8190 	 * unconfirmed.  Also note that for this case, no other
8191 	 * entries exist for the client identifier.  Nothing else to
8192 	 * check.  Just setup the response and respond.
8193 	 */
8194 	if (create) {
8195 		*cs->statusp = res->status = NFS4_OK;
8196 		res->SETCLIENTID4res_u.resok4.clientid = cp->rc_clientid;
8197 		res->SETCLIENTID4res_u.resok4.setclientid_confirm =
8198 		    cp->rc_confirm_verf;
8199 		/* Setup callback information; CB_NULL confirmation later */
8200 		rfs4_client_setcb(cp, &args->callback, args->callback_ident);
8201 
8202 		rfs4_client_rele(cp);
8203 		goto out;
8204 	}
8205 
8206 	/*
8207 	 * An existing, confirmed client may exist but it may not have
8208 	 * been active for at least one lease period.  If so, then
8209 	 * "close" the client and create a new client identifier
8210 	 */
8211 	if (rfs4_lease_expired(cp)) {
8212 		rfs4_client_close(cp);
8213 		goto retry;
8214 	}
8215 
8216 	if (cp->rc_need_confirm == TRUE)
8217 		cp_unconfirmed = cp;
8218 	else
8219 		cp_confirmed = cp;
8220 
8221 	cp = NULL;
8222 
8223 	/*
8224 	 * We have a confirmed client, now check for an
8225 	 * unconfimred entry
8226 	 */
8227 	if (cp_confirmed) {
8228 		/* If creds don't match then client identifier is inuse */
8229 		if (!creds_ok(cp_confirmed->rc_cr_set, req, cs)) {
8230 			rfs4_cbinfo_t *cbp;
8231 			/*
8232 			 * Some one else has established this client
8233 			 * id. Try and say * who they are. We will use
8234 			 * the call back address supplied by * the
8235 			 * first client.
8236 			 */
8237 			*cs->statusp = res->status = NFS4ERR_CLID_INUSE;
8238 
8239 			addr = netid = NULL;
8240 
8241 			cbp = &cp_confirmed->rc_cbinfo;
8242 			if (cbp->cb_callback.cb_location.r_addr &&
8243 			    cbp->cb_callback.cb_location.r_netid) {
8244 				cb_client4 *cbcp = &cbp->cb_callback;
8245 
8246 				len = strlen(cbcp->cb_location.r_addr)+1;
8247 				addr = kmem_alloc(len, KM_SLEEP);
8248 				bcopy(cbcp->cb_location.r_addr, addr, len);
8249 				len = strlen(cbcp->cb_location.r_netid)+1;
8250 				netid = kmem_alloc(len, KM_SLEEP);
8251 				bcopy(cbcp->cb_location.r_netid, netid, len);
8252 			}
8253 
8254 			res->SETCLIENTID4res_u.client_using.r_addr = addr;
8255 			res->SETCLIENTID4res_u.client_using.r_netid = netid;
8256 
8257 			rfs4_client_rele(cp_confirmed);
8258 		}
8259 
8260 		/*
8261 		 * Confirmed, creds match, and verifier matches; must
8262 		 * be an update of the callback info
8263 		 */
8264 		if (cp_confirmed->rc_nfs_client.verifier ==
8265 		    args->client.verifier) {
8266 			/* Setup callback information */
8267 			rfs4_client_setcb(cp_confirmed, &args->callback,
8268 			    args->callback_ident);
8269 
8270 			/* everything okay -- move ahead */
8271 			*cs->statusp = res->status = NFS4_OK;
8272 			res->SETCLIENTID4res_u.resok4.clientid =
8273 			    cp_confirmed->rc_clientid;
8274 
8275 			/* update the confirm_verifier and return it */
8276 			rfs4_client_scv_next(cp_confirmed);
8277 			res->SETCLIENTID4res_u.resok4.setclientid_confirm =
8278 			    cp_confirmed->rc_confirm_verf;
8279 
8280 			rfs4_client_rele(cp_confirmed);
8281 			goto out;
8282 		}
8283 
8284 		/*
8285 		 * Creds match but the verifier doesn't.  Must search
8286 		 * for an unconfirmed client that would be replaced by
8287 		 * this request.
8288 		 */
8289 		create = FALSE;
8290 		cp_unconfirmed = rfs4_findclient(&args->client, &create,
8291 		    cp_confirmed);
8292 	}
8293 
8294 	/*
8295 	 * At this point, we have taken care of the brand new client
8296 	 * struct, INUSE case, update of an existing, and confirmed
8297 	 * client struct.
8298 	 */
8299 
8300 	/*
8301 	 * check to see if things have changed while we originally
8302 	 * picked up the client struct.  If they have, then return and
8303 	 * retry the processing of this SETCLIENTID request.
8304 	 */
8305 	if (cp_unconfirmed) {
8306 		rfs4_dbe_lock(cp_unconfirmed->rc_dbe);
8307 		if (!cp_unconfirmed->rc_need_confirm) {
8308 			rfs4_dbe_unlock(cp_unconfirmed->rc_dbe);
8309 			rfs4_client_rele(cp_unconfirmed);
8310 			if (cp_confirmed)
8311 				rfs4_client_rele(cp_confirmed);
8312 			goto retry;
8313 		}
8314 		/* do away with the old unconfirmed one */
8315 		rfs4_dbe_invalidate(cp_unconfirmed->rc_dbe);
8316 		rfs4_dbe_unlock(cp_unconfirmed->rc_dbe);
8317 		rfs4_client_rele(cp_unconfirmed);
8318 		cp_unconfirmed = NULL;
8319 	}
8320 
8321 	/*
8322 	 * This search will temporarily hide the confirmed client
8323 	 * struct while a new client struct is created as the
8324 	 * unconfirmed one.
8325 	 */
8326 	create = TRUE;
8327 	newcp = rfs4_findclient(&args->client, &create, cp_confirmed);
8328 
8329 	ASSERT(newcp != NULL);
8330 
8331 	if (newcp == NULL) {
8332 		*cs->statusp = res->status = NFS4ERR_SERVERFAULT;
8333 		rfs4_client_rele(cp_confirmed);
8334 		goto out;
8335 	}
8336 
8337 	/*
8338 	 * If one was not created, then a similar request must be in
8339 	 * process so release and start over with this one
8340 	 */
8341 	if (create != TRUE) {
8342 		rfs4_client_rele(newcp);
8343 		if (cp_confirmed)
8344 			rfs4_client_rele(cp_confirmed);
8345 		goto retry;
8346 	}
8347 
8348 	*cs->statusp = res->status = NFS4_OK;
8349 	res->SETCLIENTID4res_u.resok4.clientid = newcp->rc_clientid;
8350 	res->SETCLIENTID4res_u.resok4.setclientid_confirm =
8351 	    newcp->rc_confirm_verf;
8352 	/* Setup callback information; CB_NULL confirmation later */
8353 	rfs4_client_setcb(newcp, &args->callback, args->callback_ident);
8354 
8355 	newcp->rc_cp_confirmed = cp_confirmed;
8356 
8357 	rfs4_client_rele(newcp);
8358 
8359 out:
8360 	DTRACE_NFSV4_2(op__setclientid__done, struct compound_state *, cs,
8361 	    SETCLIENTID4res *, res);
8362 }
8363 
8364 /*ARGSUSED*/
8365 void
8366 rfs4_op_setclientid_confirm(nfs_argop4 *argop, nfs_resop4 *resop,
8367     struct svc_req *req, struct compound_state *cs)
8368 {
8369 	SETCLIENTID_CONFIRM4args *args =
8370 	    &argop->nfs_argop4_u.opsetclientid_confirm;
8371 	SETCLIENTID_CONFIRM4res *res =
8372 	    &resop->nfs_resop4_u.opsetclientid_confirm;
8373 	rfs4_client_t *cp, *cptoclose = NULL;
8374 
8375 	DTRACE_NFSV4_2(op__setclientid__confirm__start,
8376 	    struct compound_state *, cs,
8377 	    SETCLIENTID_CONFIRM4args *, args);
8378 
8379 	*cs->statusp = res->status = NFS4_OK;
8380 
8381 	cp = rfs4_findclient_by_id(args->clientid, TRUE);
8382 
8383 	if (cp == NULL) {
8384 		*cs->statusp = res->status =
8385 		    rfs4_check_clientid(&args->clientid, 1);
8386 		goto out;
8387 	}
8388 
8389 	if (!creds_ok(cp, req, cs)) {
8390 		*cs->statusp = res->status = NFS4ERR_CLID_INUSE;
8391 		rfs4_client_rele(cp);
8392 		goto out;
8393 	}
8394 
8395 	/* If the verifier doesn't match, the record doesn't match */
8396 	if (cp->rc_confirm_verf != args->setclientid_confirm) {
8397 		*cs->statusp = res->status = NFS4ERR_STALE_CLIENTID;
8398 		rfs4_client_rele(cp);
8399 		goto out;
8400 	}
8401 
8402 	rfs4_dbe_lock(cp->rc_dbe);
8403 	cp->rc_need_confirm = FALSE;
8404 	if (cp->rc_cp_confirmed) {
8405 		cptoclose = cp->rc_cp_confirmed;
8406 		cptoclose->rc_ss_remove = 1;
8407 		cp->rc_cp_confirmed = NULL;
8408 	}
8409 
8410 	/*
8411 	 * Update the client's associated server instance, if it's changed
8412 	 * since the client was created.
8413 	 */
8414 	if (rfs4_servinst(cp) != rfs4_cur_servinst)
8415 		rfs4_servinst_assign(cp, rfs4_cur_servinst);
8416 
8417 	/*
8418 	 * Record clientid in stable storage.
8419 	 * Must be done after server instance has been assigned.
8420 	 */
8421 	rfs4_ss_clid(cp);
8422 
8423 	rfs4_dbe_unlock(cp->rc_dbe);
8424 
8425 	if (cptoclose)
8426 		/* don't need to rele, client_close does it */
8427 		rfs4_client_close(cptoclose);
8428 
8429 	/* If needed, initiate CB_NULL call for callback path */
8430 	rfs4_deleg_cb_check(cp);
8431 	rfs4_update_lease(cp);
8432 
8433 	/*
8434 	 * Check to see if client can perform reclaims
8435 	 */
8436 	rfs4_ss_chkclid(cp);
8437 
8438 	rfs4_client_rele(cp);
8439 
8440 out:
8441 	DTRACE_NFSV4_2(op__setclientid__confirm__done,
8442 	    struct compound_state *, cs,
8443 	    SETCLIENTID_CONFIRM4 *, res);
8444 }
8445 
8446 
8447 /*ARGSUSED*/
8448 void
8449 rfs4_op_close(nfs_argop4 *argop, nfs_resop4 *resop,
8450     struct svc_req *req, struct compound_state *cs)
8451 {
8452 	CLOSE4args *args = &argop->nfs_argop4_u.opclose;
8453 	CLOSE4res *resp = &resop->nfs_resop4_u.opclose;
8454 	rfs4_state_t *sp;
8455 	nfsstat4 status;
8456 
8457 	DTRACE_NFSV4_2(op__close__start, struct compound_state *, cs,
8458 	    CLOSE4args *, args);
8459 
8460 	if (cs->vp == NULL) {
8461 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
8462 		goto out;
8463 	}
8464 
8465 	status = rfs4_get_state(&args->open_stateid, &sp, RFS4_DBS_INVALID);
8466 	if (status != NFS4_OK) {
8467 		*cs->statusp = resp->status = status;
8468 		goto out;
8469 	}
8470 
8471 	/* Ensure specified filehandle matches */
8472 	if (cs->vp != sp->rs_finfo->rf_vp) {
8473 		rfs4_state_rele(sp);
8474 		*cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8475 		goto out;
8476 	}
8477 
8478 	/* hold off other access to open_owner while we tinker */
8479 	rfs4_sw_enter(&sp->rs_owner->ro_sw);
8480 
8481 	switch (rfs4_check_stateid_seqid(sp, &args->open_stateid)) {
8482 	case NFS4_CHECK_STATEID_OKAY:
8483 		if (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
8484 		    resop) != NFS4_CHKSEQ_OKAY) {
8485 			*cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
8486 			goto end;
8487 		}
8488 		break;
8489 	case NFS4_CHECK_STATEID_OLD:
8490 		*cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
8491 		goto end;
8492 	case NFS4_CHECK_STATEID_BAD:
8493 		*cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8494 		goto end;
8495 	case NFS4_CHECK_STATEID_EXPIRED:
8496 		*cs->statusp = resp->status = NFS4ERR_EXPIRED;
8497 		goto end;
8498 	case NFS4_CHECK_STATEID_CLOSED:
8499 		*cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
8500 		goto end;
8501 	case NFS4_CHECK_STATEID_UNCONFIRMED:
8502 		*cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8503 		goto end;
8504 	case NFS4_CHECK_STATEID_REPLAY:
8505 		/* Check the sequence id for the open owner */
8506 		switch (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
8507 		    resop)) {
8508 		case NFS4_CHKSEQ_OKAY:
8509 			/*
8510 			 * This is replayed stateid; if seqid matches
8511 			 * next expected, then client is using wrong seqid.
8512 			 */
8513 			/* FALL THROUGH */
8514 		case NFS4_CHKSEQ_BAD:
8515 			*cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
8516 			goto end;
8517 		case NFS4_CHKSEQ_REPLAY:
8518 			/*
8519 			 * Note this case is the duplicate case so
8520 			 * resp->status is already set.
8521 			 */
8522 			*cs->statusp = resp->status;
8523 			rfs4_update_lease(sp->rs_owner->ro_client);
8524 			goto end;
8525 		}
8526 		break;
8527 	default:
8528 		ASSERT(FALSE);
8529 		break;
8530 	}
8531 
8532 	rfs4_dbe_lock(sp->rs_dbe);
8533 
8534 	/* Update the stateid. */
8535 	next_stateid(&sp->rs_stateid);
8536 	resp->open_stateid = sp->rs_stateid.stateid;
8537 
8538 	rfs4_dbe_unlock(sp->rs_dbe);
8539 
8540 	rfs4_update_lease(sp->rs_owner->ro_client);
8541 	rfs4_update_open_sequence(sp->rs_owner);
8542 	rfs4_update_open_resp(sp->rs_owner, resop, NULL);
8543 
8544 	rfs4_state_close(sp, FALSE, FALSE, cs->cr);
8545 
8546 	*cs->statusp = resp->status = status;
8547 
8548 end:
8549 	rfs4_sw_exit(&sp->rs_owner->ro_sw);
8550 	rfs4_state_rele(sp);
8551 out:
8552 	DTRACE_NFSV4_2(op__close__done, struct compound_state *, cs,
8553 	    CLOSE4res *, resp);
8554 }
8555 
8556 /*
8557  * Manage the counts on the file struct and close all file locks
8558  */
8559 /*ARGSUSED*/
8560 void
8561 rfs4_release_share_lock_state(rfs4_state_t *sp, cred_t *cr,
8562     bool_t close_of_client)
8563 {
8564 	rfs4_file_t *fp = sp->rs_finfo;
8565 	rfs4_lo_state_t *lsp;
8566 	int fflags = 0;
8567 
8568 	/*
8569 	 * If this call is part of the larger closing down of client
8570 	 * state then it is just easier to release all locks
8571 	 * associated with this client instead of going through each
8572 	 * individual file and cleaning locks there.
8573 	 */
8574 	if (close_of_client) {
8575 		if (sp->rs_owner->ro_client->rc_unlksys_completed == FALSE &&
8576 		    !list_is_empty(&sp->rs_lostatelist) &&
8577 		    sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID) {
8578 			/* Is the PxFS kernel module loaded? */
8579 			if (lm_remove_file_locks != NULL) {
8580 				int new_sysid;
8581 
8582 				/* Encode the cluster nodeid in new sysid */
8583 				new_sysid = sp->rs_owner->ro_client->rc_sysidt;
8584 				lm_set_nlmid_flk(&new_sysid);
8585 
8586 				/*
8587 				 * This PxFS routine removes file locks for a
8588 				 * client over all nodes of a cluster.
8589 				 */
8590 				NFS4_DEBUG(rfs4_debug, (CE_NOTE,
8591 				    "lm_remove_file_locks(sysid=0x%x)\n",
8592 				    new_sysid));
8593 				(*lm_remove_file_locks)(new_sysid);
8594 			} else {
8595 				struct flock64 flk;
8596 
8597 				/* Release all locks for this client */
8598 				flk.l_type = F_UNLKSYS;
8599 				flk.l_whence = 0;
8600 				flk.l_start = 0;
8601 				flk.l_len = 0;
8602 				flk.l_sysid =
8603 				    sp->rs_owner->ro_client->rc_sysidt;
8604 				flk.l_pid = 0;
8605 				(void) VOP_FRLOCK(sp->rs_finfo->rf_vp, F_SETLK,
8606 				    &flk, F_REMOTELOCK | FREAD | FWRITE,
8607 				    (u_offset_t)0, NULL, CRED(), NULL);
8608 			}
8609 
8610 			sp->rs_owner->ro_client->rc_unlksys_completed = TRUE;
8611 		}
8612 	}
8613 
8614 	/*
8615 	 * Release all locks on this file by this lock owner or at
8616 	 * least mark the locks as having been released
8617 	 */
8618 	for (lsp = list_head(&sp->rs_lostatelist); lsp != NULL;
8619 	    lsp = list_next(&sp->rs_lostatelist, lsp)) {
8620 		lsp->rls_locks_cleaned = TRUE;
8621 
8622 		/* Was this already taken care of above? */
8623 		if (!close_of_client &&
8624 		    sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID)
8625 			(void) cleanlocks(sp->rs_finfo->rf_vp,
8626 			    lsp->rls_locker->rl_pid,
8627 			    lsp->rls_locker->rl_client->rc_sysidt);
8628 	}
8629 
8630 	/*
8631 	 * Release any shrlocks associated with this open state ID.
8632 	 * This must be done before the rfs4_state gets marked closed.
8633 	 */
8634 	if (sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID)
8635 		(void) rfs4_unshare(sp);
8636 
8637 	if (sp->rs_open_access) {
8638 		rfs4_dbe_lock(fp->rf_dbe);
8639 
8640 		/*
8641 		 * Decrement the count for each access and deny bit that this
8642 		 * state has contributed to the file.
8643 		 * If the file counts go to zero
8644 		 * clear the appropriate bit in the appropriate mask.
8645 		 */
8646 		if (sp->rs_open_access & OPEN4_SHARE_ACCESS_READ) {
8647 			fp->rf_access_read--;
8648 			fflags |= FREAD;
8649 			if (fp->rf_access_read == 0)
8650 				fp->rf_share_access &= ~OPEN4_SHARE_ACCESS_READ;
8651 		}
8652 		if (sp->rs_open_access & OPEN4_SHARE_ACCESS_WRITE) {
8653 			fp->rf_access_write--;
8654 			fflags |= FWRITE;
8655 			if (fp->rf_access_write == 0)
8656 				fp->rf_share_access &=
8657 				    ~OPEN4_SHARE_ACCESS_WRITE;
8658 		}
8659 		if (sp->rs_open_deny & OPEN4_SHARE_DENY_READ) {
8660 			fp->rf_deny_read--;
8661 			if (fp->rf_deny_read == 0)
8662 				fp->rf_share_deny &= ~OPEN4_SHARE_DENY_READ;
8663 		}
8664 		if (sp->rs_open_deny & OPEN4_SHARE_DENY_WRITE) {
8665 			fp->rf_deny_write--;
8666 			if (fp->rf_deny_write == 0)
8667 				fp->rf_share_deny &= ~OPEN4_SHARE_DENY_WRITE;
8668 		}
8669 
8670 		(void) VOP_CLOSE(fp->rf_vp, fflags, 1, (offset_t)0, cr, NULL);
8671 
8672 		rfs4_dbe_unlock(fp->rf_dbe);
8673 
8674 		sp->rs_open_access = 0;
8675 		sp->rs_open_deny = 0;
8676 	}
8677 }
8678 
8679 /*
8680  * lock_denied: Fill in a LOCK4deneid structure given an flock64 structure.
8681  */
8682 static nfsstat4
8683 lock_denied(LOCK4denied *dp, struct flock64 *flk)
8684 {
8685 	rfs4_lockowner_t *lo;
8686 	rfs4_client_t *cp;
8687 	uint32_t len;
8688 
8689 	lo = rfs4_findlockowner_by_pid(flk->l_pid);
8690 	if (lo != NULL) {
8691 		cp = lo->rl_client;
8692 		if (rfs4_lease_expired(cp)) {
8693 			rfs4_lockowner_rele(lo);
8694 			rfs4_dbe_hold(cp->rc_dbe);
8695 			rfs4_client_close(cp);
8696 			return (NFS4ERR_EXPIRED);
8697 		}
8698 		dp->owner.clientid = lo->rl_owner.clientid;
8699 		len = lo->rl_owner.owner_len;
8700 		dp->owner.owner_val = kmem_alloc(len, KM_SLEEP);
8701 		bcopy(lo->rl_owner.owner_val, dp->owner.owner_val, len);
8702 		dp->owner.owner_len = len;
8703 		rfs4_lockowner_rele(lo);
8704 		goto finish;
8705 	}
8706 
8707 	/*
8708 	 * Its not a NFS4 lock. We take advantage that the upper 32 bits
8709 	 * of the client id contain the boot time for a NFS4 lock. So we
8710 	 * fabricate and identity by setting clientid to the sysid, and
8711 	 * the lock owner to the pid.
8712 	 */
8713 	dp->owner.clientid = flk->l_sysid;
8714 	len = sizeof (pid_t);
8715 	dp->owner.owner_len = len;
8716 	dp->owner.owner_val = kmem_alloc(len, KM_SLEEP);
8717 	bcopy(&flk->l_pid, dp->owner.owner_val, len);
8718 finish:
8719 	dp->offset = flk->l_start;
8720 	dp->length = flk->l_len;
8721 
8722 	if (flk->l_type == F_RDLCK)
8723 		dp->locktype = READ_LT;
8724 	else if (flk->l_type == F_WRLCK)
8725 		dp->locktype = WRITE_LT;
8726 	else
8727 		return (NFS4ERR_INVAL);	/* no mapping from POSIX ltype to v4 */
8728 
8729 	return (NFS4_OK);
8730 }
8731 
8732 /*
8733  * The NFSv4.0 LOCK operation does not support the blocking lock (at the
8734  * NFSv4.0 protocol level) so the client needs to resend the LOCK request in a
8735  * case the lock is denied by the NFSv4.0 server.  NFSv4.0 clients are prepared
8736  * for that (obviously); they are sending the LOCK requests with some delays
8737  * between the attempts.  See nfs4frlock() and nfs4_block_and_wait() for the
8738  * locking and delay implementation at the client side.
8739  *
8740  * To make the life of the clients easier, the NFSv4.0 server tries to do some
8741  * fast retries on its own (the for loop below) in a hope the lock will be
8742  * available soon.  And if not, the client won't need to resend the LOCK
8743  * requests so fast to check the lock availability.  This basically saves some
8744  * network traffic and tries to make sure the client gets the lock ASAP.
8745  */
8746 static int
8747 setlock(vnode_t *vp, struct flock64 *flock, int flag, cred_t *cred)
8748 {
8749 	int error;
8750 	struct flock64 flk;
8751 	int i;
8752 	clock_t delaytime;
8753 	int cmd;
8754 	int spin_cnt = 0;
8755 
8756 	cmd = nbl_need_check(vp) ? F_SETLK_NBMAND : F_SETLK;
8757 retry:
8758 	delaytime = MSEC_TO_TICK_ROUNDUP(rfs4_lock_delay);
8759 
8760 	for (i = 0; i < rfs4_maxlock_tries; i++) {
8761 		LOCK_PRINT(rfs4_debug, "setlock", cmd, flock);
8762 		error = VOP_FRLOCK(vp, cmd,
8763 		    flock, flag, (u_offset_t)0, NULL, cred, NULL);
8764 
8765 		if (error != EAGAIN && error != EACCES)
8766 			break;
8767 
8768 		if (i < rfs4_maxlock_tries - 1) {
8769 			delay(delaytime);
8770 			delaytime *= 2;
8771 		}
8772 	}
8773 
8774 	if (error == EAGAIN || error == EACCES) {
8775 		/* Get the owner of the lock */
8776 		flk = *flock;
8777 		LOCK_PRINT(rfs4_debug, "setlock", F_GETLK, &flk);
8778 		if (VOP_FRLOCK(vp, F_GETLK, &flk, flag, 0, NULL, cred,
8779 		    NULL) == 0) {
8780 			/*
8781 			 * There's a race inherent in the current VOP_FRLOCK
8782 			 * design where:
8783 			 * a: "other guy" takes a lock that conflicts with a
8784 			 * lock we want
8785 			 * b: we attempt to take our lock (non-blocking) and
8786 			 * the attempt fails.
8787 			 * c: "other guy" releases the conflicting lock
8788 			 * d: we ask what lock conflicts with the lock we want,
8789 			 * getting F_UNLCK (no lock blocks us)
8790 			 *
8791 			 * If we retry the non-blocking lock attempt in this
8792 			 * case (restart at step 'b') there's some possibility
8793 			 * that many such attempts might fail.  However a test
8794 			 * designed to actually provoke this race shows that
8795 			 * the vast majority of cases require no retry, and
8796 			 * only a few took as many as three retries.  Here's
8797 			 * the test outcome:
8798 			 *
8799 			 *	   number of retries    how many times we needed
8800 			 *				that many retries
8801 			 *	   0			79461
8802 			 *	   1			  862
8803 			 *	   2			   49
8804 			 *	   3			    5
8805 			 *
8806 			 * Given those empirical results, we arbitrarily limit
8807 			 * the retry count to ten.
8808 			 *
8809 			 * If we actually make to ten retries and give up,
8810 			 * nothing catastrophic happens, but we're unable to
8811 			 * return the information about the conflicting lock to
8812 			 * the NFS client.  That's an acceptable trade off vs.
8813 			 * letting this retry loop run forever.
8814 			 */
8815 			if (flk.l_type == F_UNLCK) {
8816 				if (spin_cnt++ < 10) {
8817 					/* No longer locked, retry */
8818 					goto retry;
8819 				}
8820 			} else {
8821 				*flock = flk;
8822 				LOCK_PRINT(rfs4_debug, "setlock(blocking lock)",
8823 				    F_GETLK, &flk);
8824 			}
8825 		}
8826 	}
8827 
8828 	return (error);
8829 }
8830 
8831 /*ARGSUSED*/
8832 static nfsstat4
8833 rfs4_do_lock(rfs4_lo_state_t *lsp, nfs_lock_type4 locktype,
8834     offset4 offset, length4 length, cred_t *cred, nfs_resop4 *resop)
8835 {
8836 	nfsstat4 status;
8837 	rfs4_lockowner_t *lo = lsp->rls_locker;
8838 	rfs4_state_t *sp = lsp->rls_state;
8839 	struct flock64 flock;
8840 	int16_t ltype;
8841 	int flag;
8842 	int error;
8843 	sysid_t sysid;
8844 	LOCK4res *lres;
8845 	vnode_t *vp;
8846 
8847 	if (rfs4_lease_expired(lo->rl_client)) {
8848 		return (NFS4ERR_EXPIRED);
8849 	}
8850 
8851 	if ((status = rfs4_client_sysid(lo->rl_client, &sysid)) != NFS4_OK)
8852 		return (status);
8853 
8854 	/* Check for zero length. To lock to end of file use all ones for V4 */
8855 	if (length == 0)
8856 		return (NFS4ERR_INVAL);
8857 	else if (length == (length4)(~0))
8858 		length = 0;		/* Posix to end of file  */
8859 
8860 retry:
8861 	rfs4_dbe_lock(sp->rs_dbe);
8862 	if (sp->rs_closed == TRUE) {
8863 		rfs4_dbe_unlock(sp->rs_dbe);
8864 		return (NFS4ERR_OLD_STATEID);
8865 	}
8866 
8867 	if (resop->resop != OP_LOCKU) {
8868 		switch (locktype) {
8869 		case READ_LT:
8870 		case READW_LT:
8871 			if ((sp->rs_share_access
8872 			    & OPEN4_SHARE_ACCESS_READ) == 0) {
8873 				rfs4_dbe_unlock(sp->rs_dbe);
8874 
8875 				return (NFS4ERR_OPENMODE);
8876 			}
8877 			ltype = F_RDLCK;
8878 			break;
8879 		case WRITE_LT:
8880 		case WRITEW_LT:
8881 			if ((sp->rs_share_access
8882 			    & OPEN4_SHARE_ACCESS_WRITE) == 0) {
8883 				rfs4_dbe_unlock(sp->rs_dbe);
8884 
8885 				return (NFS4ERR_OPENMODE);
8886 			}
8887 			ltype = F_WRLCK;
8888 			break;
8889 		}
8890 	} else
8891 		ltype = F_UNLCK;
8892 
8893 	flock.l_type = ltype;
8894 	flock.l_whence = 0;		/* SEEK_SET */
8895 	flock.l_start = offset;
8896 	flock.l_len = length;
8897 	flock.l_sysid = sysid;
8898 	flock.l_pid = lsp->rls_locker->rl_pid;
8899 
8900 	/* Note that length4 is uint64_t but l_len and l_start are off64_t */
8901 	if (flock.l_len < 0 || flock.l_start < 0) {
8902 		rfs4_dbe_unlock(sp->rs_dbe);
8903 		return (NFS4ERR_INVAL);
8904 	}
8905 
8906 	/*
8907 	 * N.B. FREAD has the same value as OPEN4_SHARE_ACCESS_READ and
8908 	 * FWRITE has the same value as OPEN4_SHARE_ACCESS_WRITE.
8909 	 */
8910 	flag = (int)sp->rs_share_access | F_REMOTELOCK;
8911 
8912 	vp = sp->rs_finfo->rf_vp;
8913 	VN_HOLD(vp);
8914 
8915 	/*
8916 	 * We need to unlock sp before we call the underlying filesystem to
8917 	 * acquire the file lock.
8918 	 */
8919 	rfs4_dbe_unlock(sp->rs_dbe);
8920 
8921 	error = setlock(vp, &flock, flag, cred);
8922 
8923 	/*
8924 	 * Make sure the file is still open.  In a case the file was closed in
8925 	 * the meantime, clean the lock we acquired using the setlock() call
8926 	 * above, and return the appropriate error.
8927 	 */
8928 	rfs4_dbe_lock(sp->rs_dbe);
8929 	if (sp->rs_closed == TRUE) {
8930 		cleanlocks(vp, lsp->rls_locker->rl_pid, sysid);
8931 		rfs4_dbe_unlock(sp->rs_dbe);
8932 
8933 		VN_RELE(vp);
8934 
8935 		return (NFS4ERR_OLD_STATEID);
8936 	}
8937 	rfs4_dbe_unlock(sp->rs_dbe);
8938 
8939 	VN_RELE(vp);
8940 
8941 	if (error == 0) {
8942 		rfs4_dbe_lock(lsp->rls_dbe);
8943 		next_stateid(&lsp->rls_lockid);
8944 		rfs4_dbe_unlock(lsp->rls_dbe);
8945 	}
8946 
8947 	/*
8948 	 * N.B. We map error values to nfsv4 errors. This is differrent
8949 	 * than puterrno4 routine.
8950 	 */
8951 	switch (error) {
8952 	case 0:
8953 		status = NFS4_OK;
8954 		break;
8955 	case EAGAIN:
8956 	case EACCES:		/* Old value */
8957 		/* Can only get here if op is OP_LOCK */
8958 		ASSERT(resop->resop == OP_LOCK);
8959 		lres = &resop->nfs_resop4_u.oplock;
8960 		status = NFS4ERR_DENIED;
8961 		if (lock_denied(&lres->LOCK4res_u.denied, &flock)
8962 		    == NFS4ERR_EXPIRED)
8963 			goto retry;
8964 		break;
8965 	case ENOLCK:
8966 		status = NFS4ERR_DELAY;
8967 		break;
8968 	case EOVERFLOW:
8969 		status = NFS4ERR_INVAL;
8970 		break;
8971 	case EINVAL:
8972 		status = NFS4ERR_NOTSUPP;
8973 		break;
8974 	default:
8975 		status = NFS4ERR_SERVERFAULT;
8976 		break;
8977 	}
8978 
8979 	return (status);
8980 }
8981 
8982 /*ARGSUSED*/
8983 void
8984 rfs4_op_lock(nfs_argop4 *argop, nfs_resop4 *resop,
8985     struct svc_req *req, struct compound_state *cs)
8986 {
8987 	LOCK4args *args = &argop->nfs_argop4_u.oplock;
8988 	LOCK4res *resp = &resop->nfs_resop4_u.oplock;
8989 	nfsstat4 status;
8990 	stateid4 *stateid;
8991 	rfs4_lockowner_t *lo;
8992 	rfs4_client_t *cp;
8993 	rfs4_state_t *sp = NULL;
8994 	rfs4_lo_state_t *lsp = NULL;
8995 	bool_t ls_sw_held = FALSE;
8996 	bool_t create = TRUE;
8997 	bool_t lcreate = TRUE;
8998 	bool_t dup_lock = FALSE;
8999 	int rc;
9000 
9001 	DTRACE_NFSV4_2(op__lock__start, struct compound_state *, cs,
9002 	    LOCK4args *, args);
9003 
9004 	if (cs->vp == NULL) {
9005 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
9006 		DTRACE_NFSV4_2(op__lock__done, struct compound_state *,
9007 		    cs, LOCK4res *, resp);
9008 		return;
9009 	}
9010 
9011 	if (args->locker.new_lock_owner) {
9012 		/* Create a new lockowner for this instance */
9013 		open_to_lock_owner4 *olo = &args->locker.locker4_u.open_owner;
9014 
9015 		NFS4_DEBUG(rfs4_debug, (CE_NOTE, "Creating new lock owner"));
9016 
9017 		stateid = &olo->open_stateid;
9018 		status = rfs4_get_state(stateid, &sp, RFS4_DBS_VALID);
9019 		if (status != NFS4_OK) {
9020 			NFS4_DEBUG(rfs4_debug,
9021 			    (CE_NOTE, "Get state failed in lock %d", status));
9022 			*cs->statusp = resp->status = status;
9023 			DTRACE_NFSV4_2(op__lock__done, struct compound_state *,
9024 			    cs, LOCK4res *, resp);
9025 			return;
9026 		}
9027 
9028 		/* Ensure specified filehandle matches */
9029 		if (cs->vp != sp->rs_finfo->rf_vp) {
9030 			rfs4_state_rele(sp);
9031 			*cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
9032 			DTRACE_NFSV4_2(op__lock__done, struct compound_state *,
9033 			    cs, LOCK4res *, resp);
9034 			return;
9035 		}
9036 
9037 		/* hold off other access to open_owner while we tinker */
9038 		rfs4_sw_enter(&sp->rs_owner->ro_sw);
9039 
9040 		switch (rc = rfs4_check_stateid_seqid(sp, stateid)) {
9041 		case NFS4_CHECK_STATEID_OLD:
9042 			*cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
9043 			goto end;
9044 		case NFS4_CHECK_STATEID_BAD:
9045 			*cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
9046 			goto end;
9047 		case NFS4_CHECK_STATEID_EXPIRED:
9048 			*cs->statusp = resp->status = NFS4ERR_EXPIRED;
9049 			goto end;
9050 		case NFS4_CHECK_STATEID_UNCONFIRMED:
9051 			*cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
9052 			goto end;
9053 		case NFS4_CHECK_STATEID_CLOSED:
9054 			*cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
9055 			goto end;
9056 		case NFS4_CHECK_STATEID_OKAY:
9057 		case NFS4_CHECK_STATEID_REPLAY:
9058 			switch (rfs4_check_olo_seqid(olo->open_seqid,
9059 			    sp->rs_owner, resop)) {
9060 			case NFS4_CHKSEQ_OKAY:
9061 				if (rc == NFS4_CHECK_STATEID_OKAY)
9062 					break;
9063 				/*
9064 				 * This is replayed stateid; if seqid
9065 				 * matches next expected, then client
9066 				 * is using wrong seqid.
9067 				 */
9068 				/* FALLTHROUGH */
9069 			case NFS4_CHKSEQ_BAD:
9070 				*cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
9071 				goto end;
9072 			case NFS4_CHKSEQ_REPLAY:
9073 				/* This is a duplicate LOCK request */
9074 				dup_lock = TRUE;
9075 
9076 				/*
9077 				 * For a duplicate we do not want to
9078 				 * create a new lockowner as it should
9079 				 * already exist.
9080 				 * Turn off the lockowner create flag.
9081 				 */
9082 				lcreate = FALSE;
9083 			}
9084 			break;
9085 		}
9086 
9087 		lo = rfs4_findlockowner(&olo->lock_owner, &lcreate);
9088 		if (lo == NULL) {
9089 			NFS4_DEBUG(rfs4_debug,
9090 			    (CE_NOTE, "rfs4_op_lock: no lock owner"));
9091 			*cs->statusp = resp->status = NFS4ERR_RESOURCE;
9092 			goto end;
9093 		}
9094 
9095 		lsp = rfs4_findlo_state_by_owner(lo, sp, &create);
9096 		if (lsp == NULL) {
9097 			rfs4_update_lease(sp->rs_owner->ro_client);
9098 			/*
9099 			 * Only update theh open_seqid if this is not
9100 			 * a duplicate request
9101 			 */
9102 			if (dup_lock == FALSE) {
9103 				rfs4_update_open_sequence(sp->rs_owner);
9104 			}
9105 
9106 			NFS4_DEBUG(rfs4_debug,
9107 			    (CE_NOTE, "rfs4_op_lock: no state"));
9108 			*cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
9109 			rfs4_update_open_resp(sp->rs_owner, resop, NULL);
9110 			rfs4_lockowner_rele(lo);
9111 			goto end;
9112 		}
9113 
9114 		/*
9115 		 * This is the new_lock_owner branch and the client is
9116 		 * supposed to be associating a new lock_owner with
9117 		 * the open file at this point.  If we find that a
9118 		 * lock_owner/state association already exists and a
9119 		 * successful LOCK request was returned to the client,
9120 		 * an error is returned to the client since this is
9121 		 * not appropriate.  The client should be using the
9122 		 * existing lock_owner branch.
9123 		 */
9124 		if (dup_lock == FALSE && create == FALSE) {
9125 			if (lsp->rls_lock_completed == TRUE) {
9126 				*cs->statusp =
9127 				    resp->status = NFS4ERR_BAD_SEQID;
9128 				rfs4_lockowner_rele(lo);
9129 				goto end;
9130 			}
9131 		}
9132 
9133 		rfs4_update_lease(sp->rs_owner->ro_client);
9134 
9135 		/*
9136 		 * Only update theh open_seqid if this is not
9137 		 * a duplicate request
9138 		 */
9139 		if (dup_lock == FALSE) {
9140 			rfs4_update_open_sequence(sp->rs_owner);
9141 		}
9142 
9143 		/*
9144 		 * If this is a duplicate lock request, just copy the
9145 		 * previously saved reply and return.
9146 		 */
9147 		if (dup_lock == TRUE) {
9148 			/* verify that lock_seqid's match */
9149 			if (lsp->rls_seqid != olo->lock_seqid) {
9150 				NFS4_DEBUG(rfs4_debug,
9151 				    (CE_NOTE, "rfs4_op_lock: Dup-Lock seqid bad"
9152 				    "lsp->seqid=%d old->seqid=%d",
9153 				    lsp->rls_seqid, olo->lock_seqid));
9154 				*cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
9155 			} else {
9156 				rfs4_copy_reply(resop, &lsp->rls_reply);
9157 				/*
9158 				 * Make sure to copy the just
9159 				 * retrieved reply status into the
9160 				 * overall compound status
9161 				 */
9162 				*cs->statusp = resp->status;
9163 			}
9164 			rfs4_lockowner_rele(lo);
9165 			goto end;
9166 		}
9167 
9168 		rfs4_dbe_lock(lsp->rls_dbe);
9169 
9170 		/* Make sure to update the lock sequence id */
9171 		lsp->rls_seqid = olo->lock_seqid;
9172 
9173 		NFS4_DEBUG(rfs4_debug,
9174 		    (CE_NOTE, "Lock seqid established as %d", lsp->rls_seqid));
9175 
9176 		/*
9177 		 * This is used to signify the newly created lockowner
9178 		 * stateid and its sequence number.  The checks for
9179 		 * sequence number and increment don't occur on the
9180 		 * very first lock request for a lockowner.
9181 		 */
9182 		lsp->rls_skip_seqid_check = TRUE;
9183 
9184 		/* hold off other access to lsp while we tinker */
9185 		rfs4_sw_enter(&lsp->rls_sw);
9186 		ls_sw_held = TRUE;
9187 
9188 		rfs4_dbe_unlock(lsp->rls_dbe);
9189 
9190 		rfs4_lockowner_rele(lo);
9191 	} else {
9192 		stateid = &args->locker.locker4_u.lock_owner.lock_stateid;
9193 		/* get lsp and hold the lock on the underlying file struct */
9194 		if ((status = rfs4_get_lo_state(stateid, &lsp, TRUE))
9195 		    != NFS4_OK) {
9196 			*cs->statusp = resp->status = status;
9197 			DTRACE_NFSV4_2(op__lock__done, struct compound_state *,
9198 			    cs, LOCK4res *, resp);
9199 			return;
9200 		}
9201 		create = FALSE;	/* We didn't create lsp */
9202 
9203 		/* Ensure specified filehandle matches */
9204 		if (cs->vp != lsp->rls_state->rs_finfo->rf_vp) {
9205 			rfs4_lo_state_rele(lsp, TRUE);
9206 			*cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
9207 			DTRACE_NFSV4_2(op__lock__done, struct compound_state *,
9208 			    cs, LOCK4res *, resp);
9209 			return;
9210 		}
9211 
9212 		/* hold off other access to lsp while we tinker */
9213 		rfs4_sw_enter(&lsp->rls_sw);
9214 		ls_sw_held = TRUE;
9215 
9216 		switch (rfs4_check_lo_stateid_seqid(lsp, stateid)) {
9217 		/*
9218 		 * The stateid looks like it was okay (expected to be
9219 		 * the next one)
9220 		 */
9221 		case NFS4_CHECK_STATEID_OKAY:
9222 			/*
9223 			 * The sequence id is now checked.  Determine
9224 			 * if this is a replay or if it is in the
9225 			 * expected (next) sequence.  In the case of a
9226 			 * replay, there are two replay conditions
9227 			 * that may occur.  The first is the normal
9228 			 * condition where a LOCK is done with a
9229 			 * NFS4_OK response and the stateid is
9230 			 * updated.  That case is handled below when
9231 			 * the stateid is identified as a REPLAY.  The
9232 			 * second is the case where an error is
9233 			 * returned, like NFS4ERR_DENIED, and the
9234 			 * sequence number is updated but the stateid
9235 			 * is not updated.  This second case is dealt
9236 			 * with here.  So it may seem odd that the
9237 			 * stateid is okay but the sequence id is a
9238 			 * replay but it is okay.
9239 			 */
9240 			switch (rfs4_check_lock_seqid(
9241 			    args->locker.locker4_u.lock_owner.lock_seqid,
9242 			    lsp, resop)) {
9243 			case NFS4_CHKSEQ_REPLAY:
9244 				if (resp->status != NFS4_OK) {
9245 					/*
9246 					 * Here is our replay and need
9247 					 * to verify that the last
9248 					 * response was an error.
9249 					 */
9250 					*cs->statusp = resp->status;
9251 					goto end;
9252 				}
9253 				/*
9254 				 * This is done since the sequence id
9255 				 * looked like a replay but it didn't
9256 				 * pass our check so a BAD_SEQID is
9257 				 * returned as a result.
9258 				 */
9259 				/*FALLTHROUGH*/
9260 			case NFS4_CHKSEQ_BAD:
9261 				*cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
9262 				goto end;
9263 			case NFS4_CHKSEQ_OKAY:
9264 				/* Everything looks okay move ahead */
9265 				break;
9266 			}
9267 			break;
9268 		case NFS4_CHECK_STATEID_OLD:
9269 			*cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
9270 			goto end;
9271 		case NFS4_CHECK_STATEID_BAD:
9272 			*cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
9273 			goto end;
9274 		case NFS4_CHECK_STATEID_EXPIRED:
9275 			*cs->statusp = resp->status = NFS4ERR_EXPIRED;
9276 			goto end;
9277 		case NFS4_CHECK_STATEID_CLOSED:
9278 			*cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
9279 			goto end;
9280 		case NFS4_CHECK_STATEID_REPLAY:
9281 			switch (rfs4_check_lock_seqid(
9282 			    args->locker.locker4_u.lock_owner.lock_seqid,
9283 			    lsp, resop)) {
9284 			case NFS4_CHKSEQ_OKAY:
9285 				/*
9286 				 * This is a replayed stateid; if
9287 				 * seqid matches the next expected,
9288 				 * then client is using wrong seqid.
9289 				 */
9290 			case NFS4_CHKSEQ_BAD:
9291 				*cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
9292 				goto end;
9293 			case NFS4_CHKSEQ_REPLAY:
9294 				rfs4_update_lease(lsp->rls_locker->rl_client);
9295 				*cs->statusp = status = resp->status;
9296 				goto end;
9297 			}
9298 			break;
9299 		default:
9300 			ASSERT(FALSE);
9301 			break;
9302 		}
9303 
9304 		rfs4_update_lock_sequence(lsp);
9305 		rfs4_update_lease(lsp->rls_locker->rl_client);
9306 	}
9307 
9308 	/*
9309 	 * NFS4 only allows locking on regular files, so
9310 	 * verify type of object.
9311 	 */
9312 	if (cs->vp->v_type != VREG) {
9313 		if (cs->vp->v_type == VDIR)
9314 			status = NFS4ERR_ISDIR;
9315 		else
9316 			status = NFS4ERR_INVAL;
9317 		goto out;
9318 	}
9319 
9320 	cp = lsp->rls_state->rs_owner->ro_client;
9321 
9322 	if (rfs4_clnt_in_grace(cp) && !args->reclaim) {
9323 		status = NFS4ERR_GRACE;
9324 		goto out;
9325 	}
9326 
9327 	if (rfs4_clnt_in_grace(cp) && args->reclaim && !cp->rc_can_reclaim) {
9328 		status = NFS4ERR_NO_GRACE;
9329 		goto out;
9330 	}
9331 
9332 	if (!rfs4_clnt_in_grace(cp) && args->reclaim) {
9333 		status = NFS4ERR_NO_GRACE;
9334 		goto out;
9335 	}
9336 
9337 	if (lsp->rls_state->rs_finfo->rf_dinfo.rd_dtype == OPEN_DELEGATE_WRITE)
9338 		cs->deleg = TRUE;
9339 
9340 	status = rfs4_do_lock(lsp, args->locktype,
9341 	    args->offset, args->length, cs->cr, resop);
9342 
9343 out:
9344 	lsp->rls_skip_seqid_check = FALSE;
9345 
9346 	*cs->statusp = resp->status = status;
9347 
9348 	if (status == NFS4_OK) {
9349 		resp->LOCK4res_u.lock_stateid = lsp->rls_lockid.stateid;
9350 		lsp->rls_lock_completed = TRUE;
9351 	}
9352 	/*
9353 	 * Only update the "OPEN" response here if this was a new
9354 	 * lock_owner
9355 	 */
9356 	if (sp)
9357 		rfs4_update_open_resp(sp->rs_owner, resop, NULL);
9358 
9359 	rfs4_update_lock_resp(lsp, resop);
9360 
9361 end:
9362 	if (lsp) {
9363 		if (ls_sw_held)
9364 			rfs4_sw_exit(&lsp->rls_sw);
9365 		/*
9366 		 * If an sp obtained, then the lsp does not represent
9367 		 * a lock on the file struct.
9368 		 */
9369 		if (sp != NULL)
9370 			rfs4_lo_state_rele(lsp, FALSE);
9371 		else
9372 			rfs4_lo_state_rele(lsp, TRUE);
9373 	}
9374 	if (sp) {
9375 		rfs4_sw_exit(&sp->rs_owner->ro_sw);
9376 		rfs4_state_rele(sp);
9377 	}
9378 
9379 	DTRACE_NFSV4_2(op__lock__done, struct compound_state *, cs,
9380 	    LOCK4res *, resp);
9381 }
9382 
9383 /* free function for LOCK/LOCKT */
9384 static void
9385 lock_denied_free(nfs_resop4 *resop)
9386 {
9387 	LOCK4denied *dp = NULL;
9388 
9389 	switch (resop->resop) {
9390 	case OP_LOCK:
9391 		if (resop->nfs_resop4_u.oplock.status == NFS4ERR_DENIED)
9392 			dp = &resop->nfs_resop4_u.oplock.LOCK4res_u.denied;
9393 		break;
9394 	case OP_LOCKT:
9395 		if (resop->nfs_resop4_u.oplockt.status == NFS4ERR_DENIED)
9396 			dp = &resop->nfs_resop4_u.oplockt.denied;
9397 		break;
9398 	default:
9399 		break;
9400 	}
9401 
9402 	if (dp)
9403 		kmem_free(dp->owner.owner_val, dp->owner.owner_len);
9404 }
9405 
9406 /*ARGSUSED*/
9407 void
9408 rfs4_op_locku(nfs_argop4 *argop, nfs_resop4 *resop,
9409     struct svc_req *req, struct compound_state *cs)
9410 {
9411 	LOCKU4args *args = &argop->nfs_argop4_u.oplocku;
9412 	LOCKU4res *resp = &resop->nfs_resop4_u.oplocku;
9413 	nfsstat4 status;
9414 	stateid4 *stateid = &args->lock_stateid;
9415 	rfs4_lo_state_t *lsp;
9416 
9417 	DTRACE_NFSV4_2(op__locku__start, struct compound_state *, cs,
9418 	    LOCKU4args *, args);
9419 
9420 	if (cs->vp == NULL) {
9421 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
9422 		DTRACE_NFSV4_2(op__locku__done, struct compound_state *, cs,
9423 		    LOCKU4res *, resp);
9424 		return;
9425 	}
9426 
9427 	if ((status = rfs4_get_lo_state(stateid, &lsp, TRUE)) != NFS4_OK) {
9428 		*cs->statusp = resp->status = status;
9429 		DTRACE_NFSV4_2(op__locku__done, struct compound_state *, cs,
9430 		    LOCKU4res *, resp);
9431 		return;
9432 	}
9433 
9434 	/* Ensure specified filehandle matches */
9435 	if (cs->vp != lsp->rls_state->rs_finfo->rf_vp) {
9436 		rfs4_lo_state_rele(lsp, TRUE);
9437 		*cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
9438 		DTRACE_NFSV4_2(op__locku__done, struct compound_state *, cs,
9439 		    LOCKU4res *, resp);
9440 		return;
9441 	}
9442 
9443 	/* hold off other access to lsp while we tinker */
9444 	rfs4_sw_enter(&lsp->rls_sw);
9445 
9446 	switch (rfs4_check_lo_stateid_seqid(lsp, stateid)) {
9447 	case NFS4_CHECK_STATEID_OKAY:
9448 		if (rfs4_check_lock_seqid(args->seqid, lsp, resop)
9449 		    != NFS4_CHKSEQ_OKAY) {
9450 			*cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
9451 			goto end;
9452 		}
9453 		break;
9454 	case NFS4_CHECK_STATEID_OLD:
9455 		*cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
9456 		goto end;
9457 	case NFS4_CHECK_STATEID_BAD:
9458 		*cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
9459 		goto end;
9460 	case NFS4_CHECK_STATEID_EXPIRED:
9461 		*cs->statusp = resp->status = NFS4ERR_EXPIRED;
9462 		goto end;
9463 	case NFS4_CHECK_STATEID_CLOSED:
9464 		*cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
9465 		goto end;
9466 	case NFS4_CHECK_STATEID_REPLAY:
9467 		switch (rfs4_check_lock_seqid(args->seqid, lsp, resop)) {
9468 		case NFS4_CHKSEQ_OKAY:
9469 				/*
9470 				 * This is a replayed stateid; if
9471 				 * seqid matches the next expected,
9472 				 * then client is using wrong seqid.
9473 				 */
9474 		case NFS4_CHKSEQ_BAD:
9475 			*cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
9476 			goto end;
9477 		case NFS4_CHKSEQ_REPLAY:
9478 			rfs4_update_lease(lsp->rls_locker->rl_client);
9479 			*cs->statusp = status = resp->status;
9480 			goto end;
9481 		}
9482 		break;
9483 	default:
9484 		ASSERT(FALSE);
9485 		break;
9486 	}
9487 
9488 	rfs4_update_lock_sequence(lsp);
9489 	rfs4_update_lease(lsp->rls_locker->rl_client);
9490 
9491 	/*
9492 	 * NFS4 only allows locking on regular files, so
9493 	 * verify type of object.
9494 	 */
9495 	if (cs->vp->v_type != VREG) {
9496 		if (cs->vp->v_type == VDIR)
9497 			status = NFS4ERR_ISDIR;
9498 		else
9499 			status = NFS4ERR_INVAL;
9500 		goto out;
9501 	}
9502 
9503 	if (rfs4_clnt_in_grace(lsp->rls_state->rs_owner->ro_client)) {
9504 		status = NFS4ERR_GRACE;
9505 		goto out;
9506 	}
9507 
9508 	status = rfs4_do_lock(lsp, args->locktype,
9509 	    args->offset, args->length, cs->cr, resop);
9510 
9511 out:
9512 	*cs->statusp = resp->status = status;
9513 
9514 	if (status == NFS4_OK)
9515 		resp->lock_stateid = lsp->rls_lockid.stateid;
9516 
9517 	rfs4_update_lock_resp(lsp, resop);
9518 
9519 end:
9520 	rfs4_sw_exit(&lsp->rls_sw);
9521 	rfs4_lo_state_rele(lsp, TRUE);
9522 
9523 	DTRACE_NFSV4_2(op__locku__done, struct compound_state *, cs,
9524 	    LOCKU4res *, resp);
9525 }
9526 
9527 /*
9528  * LOCKT is a best effort routine, the client can not be guaranteed that
9529  * the status return is still in effect by the time the reply is received.
9530  * They are numerous race conditions in this routine, but we are not required
9531  * and can not be accurate.
9532  */
9533 /*ARGSUSED*/
9534 void
9535 rfs4_op_lockt(nfs_argop4 *argop, nfs_resop4 *resop,
9536     struct svc_req *req, struct compound_state *cs)
9537 {
9538 	LOCKT4args *args = &argop->nfs_argop4_u.oplockt;
9539 	LOCKT4res *resp = &resop->nfs_resop4_u.oplockt;
9540 	rfs4_lockowner_t *lo;
9541 	rfs4_client_t *cp;
9542 	bool_t create = FALSE;
9543 	struct flock64 flk;
9544 	int error;
9545 	int flag = FREAD | FWRITE;
9546 	int ltype;
9547 	length4 posix_length;
9548 	sysid_t sysid;
9549 	pid_t pid;
9550 
9551 	DTRACE_NFSV4_2(op__lockt__start, struct compound_state *, cs,
9552 	    LOCKT4args *, args);
9553 
9554 	if (cs->vp == NULL) {
9555 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
9556 		goto out;
9557 	}
9558 
9559 	/*
9560 	 * NFS4 only allows locking on regular files, so
9561 	 * verify type of object.
9562 	 */
9563 	if (cs->vp->v_type != VREG) {
9564 		if (cs->vp->v_type == VDIR)
9565 			*cs->statusp = resp->status = NFS4ERR_ISDIR;
9566 		else
9567 			*cs->statusp = resp->status =  NFS4ERR_INVAL;
9568 		goto out;
9569 	}
9570 
9571 	/*
9572 	 * Check out the clientid to ensure the server knows about it
9573 	 * so that we correctly inform the client of a server reboot.
9574 	 */
9575 	if ((cp = rfs4_findclient_by_id(args->owner.clientid, FALSE))
9576 	    == NULL) {
9577 		*cs->statusp = resp->status =
9578 		    rfs4_check_clientid(&args->owner.clientid, 0);
9579 		goto out;
9580 	}
9581 	if (rfs4_lease_expired(cp)) {
9582 		rfs4_client_close(cp);
9583 		/*
9584 		 * Protocol doesn't allow returning NFS4ERR_STALE as
9585 		 * other operations do on this check so STALE_CLIENTID
9586 		 * is returned instead
9587 		 */
9588 		*cs->statusp = resp->status = NFS4ERR_STALE_CLIENTID;
9589 		goto out;
9590 	}
9591 
9592 	if (rfs4_clnt_in_grace(cp) && !(cp->rc_can_reclaim)) {
9593 		*cs->statusp = resp->status = NFS4ERR_GRACE;
9594 		rfs4_client_rele(cp);
9595 		goto out;
9596 	}
9597 	rfs4_client_rele(cp);
9598 
9599 	resp->status = NFS4_OK;
9600 
9601 	switch (args->locktype) {
9602 	case READ_LT:
9603 	case READW_LT:
9604 		ltype = F_RDLCK;
9605 		break;
9606 	case WRITE_LT:
9607 	case WRITEW_LT:
9608 		ltype = F_WRLCK;
9609 		break;
9610 	}
9611 
9612 	posix_length = args->length;
9613 	/* Check for zero length. To lock to end of file use all ones for V4 */
9614 	if (posix_length == 0) {
9615 		*cs->statusp = resp->status = NFS4ERR_INVAL;
9616 		goto out;
9617 	} else if (posix_length == (length4)(~0)) {
9618 		posix_length = 0;	/* Posix to end of file  */
9619 	}
9620 
9621 	/* Find or create a lockowner */
9622 	lo = rfs4_findlockowner(&args->owner, &create);
9623 
9624 	if (lo) {
9625 		pid = lo->rl_pid;
9626 		if ((resp->status =
9627 		    rfs4_client_sysid(lo->rl_client, &sysid)) != NFS4_OK)
9628 			goto err;
9629 	} else {
9630 		pid = 0;
9631 		sysid = lockt_sysid;
9632 	}
9633 retry:
9634 	flk.l_type = ltype;
9635 	flk.l_whence = 0;		/* SEEK_SET */
9636 	flk.l_start = args->offset;
9637 	flk.l_len = posix_length;
9638 	flk.l_sysid = sysid;
9639 	flk.l_pid = pid;
9640 	flag |= F_REMOTELOCK;
9641 
9642 	LOCK_PRINT(rfs4_debug, "rfs4_op_lockt", F_GETLK, &flk);
9643 
9644 	/* Note that length4 is uint64_t but l_len and l_start are off64_t */
9645 	if (flk.l_len < 0 || flk.l_start < 0) {
9646 		resp->status = NFS4ERR_INVAL;
9647 		goto err;
9648 	}
9649 	error = VOP_FRLOCK(cs->vp, F_GETLK, &flk, flag, (u_offset_t)0,
9650 	    NULL, cs->cr, NULL);
9651 
9652 	/*
9653 	 * N.B. We map error values to nfsv4 errors. This is differrent
9654 	 * than puterrno4 routine.
9655 	 */
9656 	switch (error) {
9657 	case 0:
9658 		if (flk.l_type == F_UNLCK)
9659 			resp->status = NFS4_OK;
9660 		else {
9661 			if (lock_denied(&resp->denied, &flk) == NFS4ERR_EXPIRED)
9662 				goto retry;
9663 			resp->status = NFS4ERR_DENIED;
9664 		}
9665 		break;
9666 	case EOVERFLOW:
9667 		resp->status = NFS4ERR_INVAL;
9668 		break;
9669 	case EINVAL:
9670 		resp->status = NFS4ERR_NOTSUPP;
9671 		break;
9672 	default:
9673 		cmn_err(CE_WARN, "rfs4_op_lockt: unexpected errno (%d)",
9674 		    error);
9675 		resp->status = NFS4ERR_SERVERFAULT;
9676 		break;
9677 	}
9678 
9679 err:
9680 	if (lo)
9681 		rfs4_lockowner_rele(lo);
9682 	*cs->statusp = resp->status;
9683 out:
9684 	DTRACE_NFSV4_2(op__lockt__done, struct compound_state *, cs,
9685 	    LOCKT4res *, resp);
9686 }
9687 
9688 int
9689 rfs4_share(rfs4_state_t *sp, uint32_t access, uint32_t deny)
9690 {
9691 	int err;
9692 	int cmd;
9693 	vnode_t *vp;
9694 	struct shrlock shr;
9695 	struct shr_locowner shr_loco;
9696 	int fflags = 0;
9697 
9698 	ASSERT(rfs4_dbe_islocked(sp->rs_dbe));
9699 	ASSERT(sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID);
9700 
9701 	if (sp->rs_closed)
9702 		return (NFS4ERR_OLD_STATEID);
9703 
9704 	vp = sp->rs_finfo->rf_vp;
9705 	ASSERT(vp);
9706 
9707 	shr.s_access = shr.s_deny = 0;
9708 
9709 	if (access & OPEN4_SHARE_ACCESS_READ) {
9710 		fflags |= FREAD;
9711 		shr.s_access |= F_RDACC;
9712 	}
9713 	if (access & OPEN4_SHARE_ACCESS_WRITE) {
9714 		fflags |= FWRITE;
9715 		shr.s_access |= F_WRACC;
9716 	}
9717 	ASSERT(shr.s_access);
9718 
9719 	if (deny & OPEN4_SHARE_DENY_READ)
9720 		shr.s_deny |= F_RDDNY;
9721 	if (deny & OPEN4_SHARE_DENY_WRITE)
9722 		shr.s_deny |= F_WRDNY;
9723 
9724 	shr.s_pid = rfs4_dbe_getid(sp->rs_owner->ro_dbe);
9725 	shr.s_sysid = sp->rs_owner->ro_client->rc_sysidt;
9726 	shr_loco.sl_pid = shr.s_pid;
9727 	shr_loco.sl_id = shr.s_sysid;
9728 	shr.s_owner = (caddr_t)&shr_loco;
9729 	shr.s_own_len = sizeof (shr_loco);
9730 
9731 	cmd = nbl_need_check(vp) ? F_SHARE_NBMAND : F_SHARE;
9732 
9733 	err = VOP_SHRLOCK(vp, cmd, &shr, fflags, CRED(), NULL);
9734 	if (err != 0) {
9735 		if (err == EAGAIN)
9736 			err = NFS4ERR_SHARE_DENIED;
9737 		else
9738 			err = puterrno4(err);
9739 		return (err);
9740 	}
9741 
9742 	sp->rs_share_access |= access;
9743 	sp->rs_share_deny |= deny;
9744 
9745 	return (0);
9746 }
9747 
9748 int
9749 rfs4_unshare(rfs4_state_t *sp)
9750 {
9751 	int err;
9752 	struct shrlock shr;
9753 	struct shr_locowner shr_loco;
9754 
9755 	ASSERT(rfs4_dbe_islocked(sp->rs_dbe));
9756 
9757 	if (sp->rs_closed || sp->rs_share_access == 0)
9758 		return (0);
9759 
9760 	ASSERT(sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID);
9761 	ASSERT(sp->rs_finfo->rf_vp);
9762 
9763 	shr.s_access = shr.s_deny = 0;
9764 	shr.s_pid = rfs4_dbe_getid(sp->rs_owner->ro_dbe);
9765 	shr.s_sysid = sp->rs_owner->ro_client->rc_sysidt;
9766 	shr_loco.sl_pid = shr.s_pid;
9767 	shr_loco.sl_id = shr.s_sysid;
9768 	shr.s_owner = (caddr_t)&shr_loco;
9769 	shr.s_own_len = sizeof (shr_loco);
9770 
9771 	err = VOP_SHRLOCK(sp->rs_finfo->rf_vp, F_UNSHARE, &shr, 0, CRED(),
9772 	    NULL);
9773 	if (err != 0) {
9774 		err = puterrno4(err);
9775 		return (err);
9776 	}
9777 
9778 	sp->rs_share_access = 0;
9779 	sp->rs_share_deny = 0;
9780 
9781 	return (0);
9782 
9783 }
9784 
9785 static int
9786 rdma_setup_read_data4(READ4args *args, READ4res *rok)
9787 {
9788 	struct clist	*wcl;
9789 	count4		count = rok->data_len;
9790 	int		wlist_len;
9791 
9792 	wcl = args->wlist;
9793 	if (rdma_setup_read_chunks(wcl, count, &wlist_len) == FALSE) {
9794 		return (FALSE);
9795 	}
9796 	wcl = args->wlist;
9797 	rok->wlist_len = wlist_len;
9798 	rok->wlist = wcl;
9799 	return (TRUE);
9800 }
9801 
9802 /* tunable to disable server referrals */
9803 int rfs4_no_referrals = 0;
9804 
9805 /*
9806  * Find an NFS record in reparse point data.
9807  * Returns 0 for success and <0 or an errno value on failure.
9808  */
9809 int
9810 vn_find_nfs_record(vnode_t *vp, nvlist_t **nvlp, char **svcp, char **datap)
9811 {
9812 	int err;
9813 	char *stype, *val;
9814 	nvlist_t *nvl;
9815 	nvpair_t *curr;
9816 
9817 	if ((nvl = reparse_init()) == NULL)
9818 		return (-1);
9819 
9820 	if ((err = reparse_vnode_parse(vp, nvl)) != 0) {
9821 		reparse_free(nvl);
9822 		return (err);
9823 	}
9824 
9825 	curr = NULL;
9826 	while ((curr = nvlist_next_nvpair(nvl, curr)) != NULL) {
9827 		if ((stype = nvpair_name(curr)) == NULL) {
9828 			reparse_free(nvl);
9829 			return (-2);
9830 		}
9831 		if (strncasecmp(stype, "NFS", 3) == 0)
9832 			break;
9833 	}
9834 
9835 	if ((curr == NULL) ||
9836 	    (nvpair_value_string(curr, &val))) {
9837 		reparse_free(nvl);
9838 		return (-3);
9839 	}
9840 	*nvlp = nvl;
9841 	*svcp = stype;
9842 	*datap = val;
9843 	return (0);
9844 }
9845 
9846 int
9847 vn_is_nfs_reparse(vnode_t *vp, cred_t *cr)
9848 {
9849 	nvlist_t *nvl;
9850 	char *s, *d;
9851 
9852 	if (rfs4_no_referrals != 0)
9853 		return (B_FALSE);
9854 
9855 	if (vn_is_reparse(vp, cr, NULL) == B_FALSE)
9856 		return (B_FALSE);
9857 
9858 	if (vn_find_nfs_record(vp, &nvl, &s, &d) != 0)
9859 		return (B_FALSE);
9860 
9861 	reparse_free(nvl);
9862 
9863 	return (B_TRUE);
9864 }
9865 
9866 /*
9867  * There is a user-level copy of this routine in ref_subr.c.
9868  * Changes should be kept in sync.
9869  */
9870 static int
9871 nfs4_create_components(char *path, component4 *comp4)
9872 {
9873 	int slen, plen, ncomp;
9874 	char *ori_path, *nxtc, buf[MAXNAMELEN];
9875 
9876 	if (path == NULL)
9877 		return (0);
9878 
9879 	plen = strlen(path) + 1;	/* include the terminator */
9880 	ori_path = path;
9881 	ncomp = 0;
9882 
9883 	/* count number of components in the path */
9884 	for (nxtc = path; nxtc < ori_path + plen; nxtc++) {
9885 		if (*nxtc == '/' || *nxtc == '\0' || *nxtc == '\n') {
9886 			if ((slen = nxtc - path) == 0) {
9887 				path = nxtc + 1;
9888 				continue;
9889 			}
9890 
9891 			if (comp4 != NULL) {
9892 				bcopy(path, buf, slen);
9893 				buf[slen] = '\0';
9894 				(void) str_to_utf8(buf, &comp4[ncomp]);
9895 			}
9896 
9897 			ncomp++;	/* 1 valid component */
9898 			path = nxtc + 1;
9899 		}
9900 		if (*nxtc == '\0' || *nxtc == '\n')
9901 			break;
9902 	}
9903 
9904 	return (ncomp);
9905 }
9906 
9907 /*
9908  * There is a user-level copy of this routine in ref_subr.c.
9909  * Changes should be kept in sync.
9910  */
9911 static int
9912 make_pathname4(char *path, pathname4 *pathname)
9913 {
9914 	int ncomp;
9915 	component4 *comp4;
9916 
9917 	if (pathname == NULL)
9918 		return (0);
9919 
9920 	if (path == NULL) {
9921 		pathname->pathname4_val = NULL;
9922 		pathname->pathname4_len = 0;
9923 		return (0);
9924 	}
9925 
9926 	/* count number of components to alloc buffer */
9927 	if ((ncomp = nfs4_create_components(path, NULL)) == 0) {
9928 		pathname->pathname4_val = NULL;
9929 		pathname->pathname4_len = 0;
9930 		return (0);
9931 	}
9932 	comp4 = kmem_zalloc(ncomp * sizeof (component4), KM_SLEEP);
9933 
9934 	/* copy components into allocated buffer */
9935 	ncomp = nfs4_create_components(path, comp4);
9936 
9937 	pathname->pathname4_val = comp4;
9938 	pathname->pathname4_len = ncomp;
9939 
9940 	return (ncomp);
9941 }
9942 
9943 #define	xdr_fs_locations4 xdr_fattr4_fs_locations
9944 
9945 fs_locations4 *
9946 fetch_referral(vnode_t *vp, cred_t *cr)
9947 {
9948 	nvlist_t *nvl;
9949 	char *stype, *sdata;
9950 	fs_locations4 *result;
9951 	char buf[1024];
9952 	size_t bufsize;
9953 	XDR xdr;
9954 	int err;
9955 
9956 	/*
9957 	 * Check attrs to ensure it's a reparse point
9958 	 */
9959 	if (vn_is_reparse(vp, cr, NULL) == B_FALSE)
9960 		return (NULL);
9961 
9962 	/*
9963 	 * Look for an NFS record and get the type and data
9964 	 */
9965 	if (vn_find_nfs_record(vp, &nvl, &stype, &sdata) != 0)
9966 		return (NULL);
9967 
9968 	/*
9969 	 * With the type and data, upcall to get the referral
9970 	 */
9971 	bufsize = sizeof (buf);
9972 	bzero(buf, sizeof (buf));
9973 	err = reparse_kderef((const char *)stype, (const char *)sdata,
9974 	    buf, &bufsize);
9975 	reparse_free(nvl);
9976 
9977 	DTRACE_PROBE4(nfs4serv__func__referral__upcall,
9978 	    char *, stype, char *, sdata, char *, buf, int, err);
9979 	if (err) {
9980 		cmn_err(CE_NOTE,
9981 		    "reparsed daemon not running: unable to get referral (%d)",
9982 		    err);
9983 		return (NULL);
9984 	}
9985 
9986 	/*
9987 	 * We get an XDR'ed record back from the kderef call
9988 	 */
9989 	xdrmem_create(&xdr, buf, bufsize, XDR_DECODE);
9990 	result = kmem_alloc(sizeof (fs_locations4), KM_SLEEP);
9991 	err = xdr_fs_locations4(&xdr, result);
9992 	XDR_DESTROY(&xdr);
9993 	if (err != TRUE) {
9994 		DTRACE_PROBE1(nfs4serv__func__referral__upcall__xdrfail,
9995 		    int, err);
9996 		return (NULL);
9997 	}
9998 
9999 	/*
10000 	 * Look at path to recover fs_root, ignoring the leading '/'
10001 	 */
10002 	(void) make_pathname4(vp->v_path, &result->fs_root);
10003 
10004 	return (result);
10005 }
10006 
10007 char *
10008 build_symlink(vnode_t *vp, cred_t *cr, size_t *strsz)
10009 {
10010 	fs_locations4 *fsl;
10011 	fs_location4 *fs;
10012 	char *server, *path, *symbuf;
10013 	static char *prefix = "/net/";
10014 	int i, size, npaths;
10015 	uint_t len;
10016 
10017 	/* Get the referral */
10018 	if ((fsl = fetch_referral(vp, cr)) == NULL)
10019 		return (NULL);
10020 
10021 	/* Deal with only the first location and first server */
10022 	fs = &fsl->locations_val[0];
10023 	server = utf8_to_str(&fs->server_val[0], &len, NULL);
10024 	if (server == NULL) {
10025 		rfs4_free_fs_locations4(fsl);
10026 		kmem_free(fsl, sizeof (fs_locations4));
10027 		return (NULL);
10028 	}
10029 
10030 	/* Figure out size for "/net/" + host + /path/path/path + NULL */
10031 	size = strlen(prefix) + len;
10032 	for (i = 0; i < fs->rootpath.pathname4_len; i++)
10033 		size += fs->rootpath.pathname4_val[i].utf8string_len + 1;
10034 
10035 	/* Allocate the symlink buffer and fill it */
10036 	symbuf = kmem_zalloc(size, KM_SLEEP);
10037 	(void) strcat(symbuf, prefix);
10038 	(void) strcat(symbuf, server);
10039 	kmem_free(server, len);
10040 
10041 	npaths = 0;
10042 	for (i = 0; i < fs->rootpath.pathname4_len; i++) {
10043 		path = utf8_to_str(&fs->rootpath.pathname4_val[i], &len, NULL);
10044 		if (path == NULL)
10045 			continue;
10046 		(void) strcat(symbuf, "/");
10047 		(void) strcat(symbuf, path);
10048 		npaths++;
10049 		kmem_free(path, len);
10050 	}
10051 
10052 	rfs4_free_fs_locations4(fsl);
10053 	kmem_free(fsl, sizeof (fs_locations4));
10054 
10055 	if (strsz != NULL)
10056 		*strsz = size;
10057 	return (symbuf);
10058 }
10059 
10060 /*
10061  * Check to see if we have a downrev Solaris client, so that we
10062  * can send it a symlink instead of a referral.
10063  */
10064 int
10065 client_is_downrev(struct svc_req *req)
10066 {
10067 	struct sockaddr *ca;
10068 	rfs4_clntip_t *ci;
10069 	bool_t create = FALSE;
10070 	int is_downrev;
10071 
10072 	ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
10073 	ASSERT(ca);
10074 	ci = rfs4_find_clntip(ca, &create);
10075 	if (ci == NULL)
10076 		return (0);
10077 	is_downrev = ci->ri_no_referrals;
10078 	rfs4_dbe_rele(ci->ri_dbe);
10079 	return (is_downrev);
10080 }
10081