xref: /illumos-gate/usr/src/uts/common/fs/nfs/nfs4_srv.c (revision 2833423dc59f4c35fe4713dbb942950c82df0437)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
24  */
25 
26 /*
27  *	Copyright (c) 1983,1984,1985,1986,1987,1988,1989  AT&T.
28  *	All Rights Reserved
29  */
30 
31 /*
32  * Copyright (c) 2012, 2016 by Delphix. All rights reserved.
33  * Copyright 2019 Nexenta Systems, Inc.
34  * Copyright 2019 Nexenta by DDN, Inc.
35  * Copyright 2021 Racktop Systems, Inc.
36  */
37 
38 #include <sys/param.h>
39 #include <sys/types.h>
40 #include <sys/systm.h>
41 #include <sys/cred.h>
42 #include <sys/buf.h>
43 #include <sys/vfs.h>
44 #include <sys/vfs_opreg.h>
45 #include <sys/vnode.h>
46 #include <sys/uio.h>
47 #include <sys/errno.h>
48 #include <sys/sysmacros.h>
49 #include <sys/statvfs.h>
50 #include <sys/kmem.h>
51 #include <sys/dirent.h>
52 #include <sys/cmn_err.h>
53 #include <sys/debug.h>
54 #include <sys/systeminfo.h>
55 #include <sys/flock.h>
56 #include <sys/pathname.h>
57 #include <sys/nbmlock.h>
58 #include <sys/share.h>
59 #include <sys/atomic.h>
60 #include <sys/policy.h>
61 #include <sys/fem.h>
62 #include <sys/sdt.h>
63 #include <sys/ddi.h>
64 #include <sys/zone.h>
65 
66 #include <fs/fs_reparse.h>
67 
68 #include <rpc/types.h>
69 #include <rpc/auth.h>
70 #include <rpc/rpcsec_gss.h>
71 #include <rpc/svc.h>
72 
73 #include <nfs/nfs.h>
74 #include <nfs/nfssys.h>
75 #include <nfs/export.h>
76 #include <nfs/nfs_cmd.h>
77 #include <nfs/lm.h>
78 #include <nfs/nfs4.h>
79 #include <nfs/nfs4_drc.h>
80 
81 #include <sys/strsubr.h>
82 #include <sys/strsun.h>
83 
84 #include <inet/common.h>
85 #include <inet/ip.h>
86 #include <inet/ip6.h>
87 
88 #include <sys/tsol/label.h>
89 #include <sys/tsol/tndb.h>
90 
91 #define	RFS4_MAXLOCK_TRIES 4	/* Try to get the lock this many times */
92 static int rfs4_maxlock_tries = RFS4_MAXLOCK_TRIES;
93 #define	RFS4_LOCK_DELAY 10	/* Milliseconds */
94 static clock_t  rfs4_lock_delay = RFS4_LOCK_DELAY;
95 extern struct svc_ops rdma_svc_ops;
96 extern int nfs_loaned_buffers;
97 /* End of Tunables */
98 
99 static int rdma_setup_read_data4(READ4args *, READ4res *);
100 
101 /*
102  * Used to bump the stateid4.seqid value and show changes in the stateid
103  */
104 #define	next_stateid(sp) (++(sp)->bits.chgseq)
105 
106 /*
107  * RFS4_MINLEN_ENTRY4: XDR-encoded size of smallest possible dirent.
108  *	This is used to return NFS4ERR_TOOSMALL when clients specify
109  *	maxcount that isn't large enough to hold the smallest possible
110  *	XDR encoded dirent.
111  *
112  *	    sizeof cookie (8 bytes) +
113  *	    sizeof name_len (4 bytes) +
114  *	    sizeof smallest (padded) name (4 bytes) +
115  *	    sizeof bitmap4_len (12 bytes) +   NOTE: we always encode len=2 bm4
116  *	    sizeof attrlist4_len (4 bytes) +
117  *	    sizeof next boolean (4 bytes)
118  *
119  * RFS4_MINLEN_RDDIR4: XDR-encoded size of READDIR op reply containing
120  * the smallest possible entry4 (assumes no attrs requested).
121  *	sizeof nfsstat4 (4 bytes) +
122  *	sizeof verifier4 (8 bytes) +
123  *	sizeof entry4list bool (4 bytes) +
124  *	sizeof entry4 (36 bytes) +
125  *	sizeof eof bool (4 bytes)
126  *
127  * RFS4_MINLEN_RDDIR_BUF: minimum length of buffer server will provide to
128  *	VOP_READDIR.  Its value is the size of the maximum possible dirent
129  *	for solaris.  The DIRENT64_RECLEN macro returns	the size of dirent
130  *	required for a given name length.  MAXNAMELEN is the maximum
131  *	filename length allowed in Solaris.  The first two DIRENT64_RECLEN()
132  *	macros are to allow for . and .. entries -- just a minor tweak to try
133  *	and guarantee that buffer we give to VOP_READDIR will be large enough
134  *	to hold ., .., and the largest possible solaris dirent64.
135  */
136 #define	RFS4_MINLEN_ENTRY4 36
137 #define	RFS4_MINLEN_RDDIR4 (4 + NFS4_VERIFIER_SIZE + 4 + RFS4_MINLEN_ENTRY4 + 4)
138 #define	RFS4_MINLEN_RDDIR_BUF \
139 	(DIRENT64_RECLEN(1) + DIRENT64_RECLEN(2) + DIRENT64_RECLEN(MAXNAMELEN))
140 
141 /*
142  * It would be better to pad to 4 bytes since that's what XDR would do,
143  * but the dirents UFS gives us are already padded to 8, so just take
144  * what we're given.  Dircount is only a hint anyway.  Currently the
145  * solaris kernel is ASCII only, so there's no point in calling the
146  * UTF8 functions.
147  *
148  * dirent64: named padded to provide 8 byte struct alignment
149  *	d_ino(8) + d_off(8) + d_reclen(2) + d_name(namelen + null(1) + pad)
150  *
151  * cookie: uint64_t   +  utf8namelen: uint_t  +   utf8name padded to 8 bytes
152  *
153  */
154 #define	DIRENT64_TO_DIRCOUNT(dp) \
155 	(3 * BYTES_PER_XDR_UNIT + DIRENT64_NAMELEN((dp)->d_reclen))
156 
157 
158 static sysid_t		lockt_sysid;	/* dummy sysid for all LOCKT calls */
159 
160 u_longlong_t	nfs4_srv_caller_id;
161 uint_t		nfs4_srv_vkey = 0;
162 
163 void	rfs4_init_compound_state(struct compound_state *);
164 
165 static void	nullfree(caddr_t);
166 static void	rfs4_op_inval(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
167 		    struct compound_state *);
168 static void	rfs4_op_access(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
169 		    struct compound_state *);
170 static void	rfs4_op_close(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
171 		    struct compound_state *);
172 static void	rfs4_op_commit(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
173 		    struct compound_state *);
174 static void	rfs4_op_create(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
175 		    struct compound_state *);
176 static void	rfs4_op_delegreturn(nfs_argop4 *, nfs_resop4 *,
177 		    struct svc_req *, struct compound_state *);
178 static void	rfs4_op_delegpurge(nfs_argop4 *, nfs_resop4 *,
179 		    struct svc_req *, struct compound_state *);
180 static void	rfs4_op_getattr(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
181 		    struct compound_state *);
182 static void	rfs4_op_getattr_free(nfs_resop4 *);
183 static void	rfs4_op_getfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
184 		    struct compound_state *);
185 static void	rfs4_op_getfh_free(nfs_resop4 *);
186 static void	rfs4_op_illegal(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
187 		    struct compound_state *);
188 static void	rfs4_op_notsup(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
189 		    struct compound_state *);
190 static void	rfs4_op_link(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
191 		    struct compound_state *);
192 static void	rfs4_op_lock(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
193 		    struct compound_state *);
194 static void	lock_denied_free(nfs_resop4 *);
195 static void	rfs4_op_locku(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
196 		    struct compound_state *);
197 static void	rfs4_op_lockt(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
198 		    struct compound_state *);
199 static void	rfs4_op_lookup(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
200 		    struct compound_state *);
201 static void	rfs4_op_lookupp(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
202 		    struct compound_state *);
203 static void	rfs4_op_openattr(nfs_argop4 *argop, nfs_resop4 *resop,
204 		    struct svc_req *req, struct compound_state *cs);
205 static void	rfs4_op_nverify(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
206 		    struct compound_state *);
207 static void	rfs4_op_open(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
208 		    struct compound_state *);
209 static void	rfs4_op_open_confirm(nfs_argop4 *, nfs_resop4 *,
210 		    struct svc_req *, struct compound_state *);
211 static void	rfs4_op_open_downgrade(nfs_argop4 *, nfs_resop4 *,
212 		    struct svc_req *, struct compound_state *);
213 static void	rfs4_op_putfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
214 		    struct compound_state *);
215 static void	rfs4_op_putpubfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
216 		    struct compound_state *);
217 static void	rfs4_op_putrootfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
218 		    struct compound_state *);
219 static void	rfs4_op_read(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
220 		    struct compound_state *);
221 static void	rfs4_op_read_free(nfs_resop4 *);
222 static void	rfs4_op_readdir_free(nfs_resop4 *resop);
223 static void	rfs4_op_readlink(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
224 		    struct compound_state *);
225 static void	rfs4_op_readlink_free(nfs_resop4 *);
226 static void	rfs4_op_release_lockowner(nfs_argop4 *, nfs_resop4 *,
227 		    struct svc_req *, struct compound_state *);
228 static void	rfs4_op_remove(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
229 		    struct compound_state *);
230 static void	rfs4_op_rename(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
231 		    struct compound_state *);
232 static void	rfs4_op_renew(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
233 		    struct compound_state *);
234 static void	rfs4_op_restorefh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
235 		    struct compound_state *);
236 static void	rfs4_op_savefh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
237 		    struct compound_state *);
238 static void	rfs4_op_setattr(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
239 		    struct compound_state *);
240 static void	rfs4_op_verify(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
241 		    struct compound_state *);
242 static void	rfs4_op_write(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
243 		    struct compound_state *);
244 static void	rfs4_op_setclientid(nfs_argop4 *, nfs_resop4 *,
245 		    struct svc_req *, struct compound_state *);
246 static void	rfs4_op_setclientid_confirm(nfs_argop4 *, nfs_resop4 *,
247 		    struct svc_req *req, struct compound_state *);
248 static void	rfs4_op_secinfo(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
249 		    struct compound_state *);
250 static void	rfs4_op_secinfo_free(nfs_resop4 *);
251 
252 void rfs4x_op_exchange_id(nfs_argop4 *argop, nfs_resop4 *resop,
253     struct svc_req *req, struct compound_state *cs);
254 void rfs4x_exchange_id_free(nfs_resop4 *);
255 
256 void rfs4x_op_create_session(nfs_argop4 *argop, nfs_resop4 *resop,
257     struct svc_req *req, struct compound_state *cs);
258 
259 void rfs4x_op_destroy_session(nfs_argop4 *argop, nfs_resop4 *resop,
260     struct svc_req *req, compound_state_t *cs);
261 
262 void rfs4x_op_sequence(nfs_argop4 *argop, nfs_resop4 *resop,
263     struct svc_req *req, struct compound_state *cs);
264 
265 void rfs4x_op_reclaim_complete(nfs_argop4 *argop, nfs_resop4 *resop,
266     struct svc_req *req, compound_state_t *cs);
267 
268 void rfs4x_op_destroy_clientid(nfs_argop4 *argop, nfs_resop4 *resop,
269     struct svc_req *req, compound_state_t *cs);
270 
271 void rfs4x_op_bind_conn_to_session(nfs_argop4 *argop, nfs_resop4 *resop,
272     struct svc_req *req, compound_state_t *cs);
273 
274 void rfs4x_op_secinfo_noname(nfs_argop4 *argop, nfs_resop4 *resop,
275     struct svc_req *req, compound_state_t *cs);
276 
277 static nfsstat4 check_open_access(uint32_t, struct compound_state *,
278 		    struct svc_req *);
279 nfsstat4	rfs4_client_sysid(rfs4_client_t *, sysid_t *);
280 void		rfs4_ss_clid(nfs4_srv_t *, rfs4_client_t *);
281 
282 /*
283  * translation table for attrs
284  */
285 struct nfs4_ntov_table {
286 	union nfs4_attr_u *na;
287 	uint8_t amap[NFS4_MAXNUM_ATTRS];
288 	int attrcnt;
289 	bool_t vfsstat;
290 };
291 
292 static void	nfs4_ntov_table_init(struct nfs4_ntov_table *ntovp);
293 static void	nfs4_ntov_table_free(struct nfs4_ntov_table *ntovp,
294 		    struct nfs4_svgetit_arg *sargp);
295 
296 static nfsstat4	do_rfs4_set_attrs(bitmap4 *resp, fattr4 *fattrp,
297 		    struct compound_state *cs, struct nfs4_svgetit_arg *sargp,
298 		    struct nfs4_ntov_table *ntovp, nfs4_attr_cmd_t cmd);
299 
300 static void	hanfsv4_failover(nfs4_srv_t *);
301 
302 fem_t		*deleg_rdops;
303 fem_t		*deleg_wrops;
304 
305 /*
306  * NFS4 op dispatch table
307  */
308 
309 struct rfsv4disp {
310 	void	(*dis_proc)();		/* proc to call */
311 	void	(*dis_resfree)();	/* frees space allocated by proc */
312 	int	dis_flags;		/* RPC_IDEMPOTENT, etc... */
313 };
314 
315 static struct rfsv4disp rfsv4disptab[] = {
316 	/*
317 	 * NFS VERSION 4
318 	 */
319 
320 	/* RFS_NULL = 0 */
321 	{rfs4_op_illegal, nullfree, 0},
322 
323 	/* UNUSED = 1 */
324 	{rfs4_op_illegal, nullfree, 0},
325 
326 	/* UNUSED = 2 */
327 	{rfs4_op_illegal, nullfree, 0},
328 
329 	/* OP_ACCESS = 3 */
330 	{rfs4_op_access, nullfree, RPC_IDEMPOTENT},
331 
332 	/* OP_CLOSE = 4 */
333 	{rfs4_op_close, nullfree, 0},
334 
335 	/* OP_COMMIT = 5 */
336 	{rfs4_op_commit, nullfree, RPC_IDEMPOTENT},
337 
338 	/* OP_CREATE = 6 */
339 	{rfs4_op_create, nullfree, 0},
340 
341 	/* OP_DELEGPURGE = 7 */
342 	{rfs4_op_delegpurge, nullfree, 0},
343 
344 	/* OP_DELEGRETURN = 8 */
345 	{rfs4_op_delegreturn, nullfree, 0},
346 
347 	/* OP_GETATTR = 9 */
348 	{rfs4_op_getattr, rfs4_op_getattr_free, RPC_IDEMPOTENT},
349 
350 	/* OP_GETFH = 10 */
351 	{rfs4_op_getfh, rfs4_op_getfh_free, RPC_ALL},
352 
353 	/* OP_LINK = 11 */
354 	{rfs4_op_link, nullfree, 0},
355 
356 	/* OP_LOCK = 12 */
357 	{rfs4_op_lock, lock_denied_free, 0},
358 
359 	/* OP_LOCKT = 13 */
360 	{rfs4_op_lockt, lock_denied_free, 0},
361 
362 	/* OP_LOCKU = 14 */
363 	{rfs4_op_locku, nullfree, 0},
364 
365 	/* OP_LOOKUP = 15 */
366 	{rfs4_op_lookup, nullfree, (RPC_IDEMPOTENT | RPC_PUBLICFH_OK)},
367 
368 	/* OP_LOOKUPP = 16 */
369 	{rfs4_op_lookupp, nullfree, (RPC_IDEMPOTENT | RPC_PUBLICFH_OK)},
370 
371 	/* OP_NVERIFY = 17 */
372 	{rfs4_op_nverify, nullfree, RPC_IDEMPOTENT},
373 
374 	/* OP_OPEN = 18 */
375 	{rfs4_op_open, rfs4_free_reply, 0},
376 
377 	/* OP_OPENATTR = 19 */
378 	{rfs4_op_openattr, nullfree, 0},
379 
380 	/* OP_OPEN_CONFIRM = 20 */
381 	{rfs4_op_open_confirm, nullfree, 0},
382 
383 	/* OP_OPEN_DOWNGRADE = 21 */
384 	{rfs4_op_open_downgrade, nullfree, 0},
385 
386 	/* OP_OPEN_PUTFH = 22 */
387 	{rfs4_op_putfh, nullfree, RPC_ALL},
388 
389 	/* OP_PUTPUBFH = 23 */
390 	{rfs4_op_putpubfh, nullfree, RPC_ALL},
391 
392 	/* OP_PUTROOTFH = 24 */
393 	{rfs4_op_putrootfh, nullfree, RPC_ALL},
394 
395 	/* OP_READ = 25 */
396 	{rfs4_op_read, rfs4_op_read_free, RPC_IDEMPOTENT},
397 
398 	/* OP_READDIR = 26 */
399 	{rfs4_op_readdir, rfs4_op_readdir_free, RPC_IDEMPOTENT},
400 
401 	/* OP_READLINK = 27 */
402 	{rfs4_op_readlink, rfs4_op_readlink_free, RPC_IDEMPOTENT},
403 
404 	/* OP_REMOVE = 28 */
405 	{rfs4_op_remove, nullfree, 0},
406 
407 	/* OP_RENAME = 29 */
408 	{rfs4_op_rename, nullfree, 0},
409 
410 	/* OP_RENEW = 30 */
411 	{rfs4_op_renew, nullfree, 0},
412 
413 	/* OP_RESTOREFH = 31 */
414 	{rfs4_op_restorefh, nullfree, RPC_ALL},
415 
416 	/* OP_SAVEFH = 32 */
417 	{rfs4_op_savefh, nullfree, RPC_ALL},
418 
419 	/* OP_SECINFO = 33 */
420 	{rfs4_op_secinfo, rfs4_op_secinfo_free, 0},
421 
422 	/* OP_SETATTR = 34 */
423 	{rfs4_op_setattr, nullfree, 0},
424 
425 	/* OP_SETCLIENTID = 35 */
426 	{rfs4_op_setclientid, nullfree, 0},
427 
428 	/* OP_SETCLIENTID_CONFIRM = 36 */
429 	{rfs4_op_setclientid_confirm, nullfree, 0},
430 
431 	/* OP_VERIFY = 37 */
432 	{rfs4_op_verify, nullfree, RPC_IDEMPOTENT},
433 
434 	/* OP_WRITE = 38 */
435 	{rfs4_op_write, nullfree, 0},
436 
437 	/* OP_RELEASE_LOCKOWNER = 39 */
438 	{rfs4_op_release_lockowner, nullfree, 0},
439 
440 	/*
441 	 * NFSv4.1 operations
442 	 */
443 
444 	/* OP_BACKCHANNEL_CTL = 40 */
445 	{rfs4_op_notsup,  nullfree,  0},
446 
447 	/*  OP_BIND_CONN_TO_SESSION = 41 */
448 	{rfs4x_op_bind_conn_to_session,  nullfree,  0},
449 
450 	/* OP_EXCHANGE_ID  = 42 */
451 	{rfs4x_op_exchange_id,  rfs4x_exchange_id_free,  0},
452 
453 	/* OP_CREATE_SESSION = 43 */
454 	{rfs4x_op_create_session,  nullfree,  0},
455 
456 	/* OP_DESTROY_SESSION = 44 */
457 	{rfs4x_op_destroy_session,  nullfree,  0},
458 
459 	/* OP_FREE_STATEID = 45 */
460 	{rfs4_op_notsup,  nullfree,  0},
461 
462 	/* OP_GET_DIR_DELEGATION = 46 */
463 	{rfs4_op_notsup,  nullfree,  0},
464 
465 	/* OP_GETDEVICEINFO = 47 */
466 	{rfs4_op_notsup,  nullfree,  0},
467 
468 	/* OP_GETDEVICELIST = 48 */
469 	{rfs4_op_notsup,  nullfree,  0},
470 
471 	/* OP_LAYOUTCOMMIT = 49 */
472 	{rfs4_op_notsup,  nullfree,  0},
473 
474 	/* OP_LAYOUTGET = 50 */
475 	{rfs4_op_notsup,  nullfree,  0},
476 
477 	/* OP_LAYOUTRETURN = 51 */
478 	{rfs4_op_notsup,  nullfree,  0},
479 
480 	/* OP_SECINFO_NO_NAME = 52 */
481 	{rfs4x_op_secinfo_noname, rfs4_op_secinfo_free, 0},
482 
483 	/* OP_SEQUENCE = 53 */
484 	{rfs4x_op_sequence,  nullfree,  0},
485 
486 	/* OP_SET_SSV = 54 */
487 	{rfs4_op_notsup,  nullfree,  0},
488 
489 	/* OP_TEST_STATEID = 55 */
490 	{rfs4_op_notsup,  nullfree,  0},
491 
492 	/* OP_WANT_DELEGATION = 56 */
493 	{rfs4_op_notsup,  nullfree,  0},
494 
495 	/* OP_DESTROY_CLIENTID = 57 */
496 	{rfs4x_op_destroy_clientid,  nullfree,  0},
497 
498 	/* OP_RECLAIM_COMPLETE = 58 */
499 	{rfs4x_op_reclaim_complete,  nullfree,  0},
500 };
501 
502 static uint_t rfsv4disp_cnt = sizeof (rfsv4disptab) / sizeof (rfsv4disptab[0]);
503 
504 #define	OP_ILLEGAL_IDX (rfsv4disp_cnt)
505 
506 #ifdef DEBUG
507 
508 int		rfs4_fillone_debug = 0;
509 int		rfs4_no_stub_access = 1;
510 int		rfs4_rddir_debug = 0;
511 
512 static char    *rfs4_op_string[] = {
513 	"rfs4_op_null",
514 	"rfs4_op_1 unused",
515 	"rfs4_op_2 unused",
516 	"rfs4_op_access",
517 	"rfs4_op_close",
518 	"rfs4_op_commit",
519 	"rfs4_op_create",
520 	"rfs4_op_delegpurge",
521 	"rfs4_op_delegreturn",
522 	"rfs4_op_getattr",
523 	"rfs4_op_getfh",
524 	"rfs4_op_link",
525 	"rfs4_op_lock",
526 	"rfs4_op_lockt",
527 	"rfs4_op_locku",
528 	"rfs4_op_lookup",
529 	"rfs4_op_lookupp",
530 	"rfs4_op_nverify",
531 	"rfs4_op_open",
532 	"rfs4_op_openattr",
533 	"rfs4_op_open_confirm",
534 	"rfs4_op_open_downgrade",
535 	"rfs4_op_putfh",
536 	"rfs4_op_putpubfh",
537 	"rfs4_op_putrootfh",
538 	"rfs4_op_read",
539 	"rfs4_op_readdir",
540 	"rfs4_op_readlink",
541 	"rfs4_op_remove",
542 	"rfs4_op_rename",
543 	"rfs4_op_renew",
544 	"rfs4_op_restorefh",
545 	"rfs4_op_savefh",
546 	"rfs4_op_secinfo",
547 	"rfs4_op_setattr",
548 	"rfs4_op_setclientid",
549 	"rfs4_op_setclient_confirm",
550 	"rfs4_op_verify",
551 	"rfs4_op_write",
552 	"rfs4_op_release_lockowner",
553 	/* NFSv4.1 */
554 	"backchannel_ctl",
555 	"bind_conn_to_session",
556 	"exchange_id",
557 	"create_session",
558 	"destroy_session",
559 	"free_stateid",
560 	"get_dir_delegation",
561 	"getdeviceinfo",
562 	"getdevicelist",
563 	"layoutcommit",
564 	"layoutget",
565 	"layoutreturn",
566 	"secinfo_no_name",
567 	"sequence",
568 	"set_ssv",
569 	"test_stateid",
570 	"want_delegation",
571 	"destroy_clientid",
572 	"reclaim_complete",
573 	"rfs4_op_illegal"
574 };
575 
576 #endif
577 
578 void	rfs4_ss_chkclid(nfs4_srv_t *, rfs4_client_t *);
579 
580 extern size_t   strlcpy(char *dst, const char *src, size_t dstsize);
581 
582 extern void	rfs4_free_fs_locations4(fs_locations4 *);
583 
584 #ifdef	nextdp
585 #undef nextdp
586 #endif
587 #define	nextdp(dp)	((struct dirent64 *)((char *)(dp) + (dp)->d_reclen))
588 
589 static const fs_operation_def_t nfs4_rd_deleg_tmpl[] = {
590 	VOPNAME_OPEN,		{ .femop_open = deleg_rd_open },
591 	VOPNAME_WRITE,		{ .femop_write = deleg_rd_write },
592 	VOPNAME_SETATTR,	{ .femop_setattr = deleg_rd_setattr },
593 	VOPNAME_RWLOCK,		{ .femop_rwlock = deleg_rd_rwlock },
594 	VOPNAME_SPACE,		{ .femop_space = deleg_rd_space },
595 	VOPNAME_SETSECATTR,	{ .femop_setsecattr = deleg_rd_setsecattr },
596 	VOPNAME_VNEVENT,	{ .femop_vnevent = deleg_rd_vnevent },
597 	NULL,			NULL
598 };
599 static const fs_operation_def_t nfs4_wr_deleg_tmpl[] = {
600 	VOPNAME_OPEN,		{ .femop_open = deleg_wr_open },
601 	VOPNAME_READ,		{ .femop_read = deleg_wr_read },
602 	VOPNAME_WRITE,		{ .femop_write = deleg_wr_write },
603 	VOPNAME_SETATTR,	{ .femop_setattr = deleg_wr_setattr },
604 	VOPNAME_RWLOCK,		{ .femop_rwlock = deleg_wr_rwlock },
605 	VOPNAME_SPACE,		{ .femop_space = deleg_wr_space },
606 	VOPNAME_SETSECATTR,	{ .femop_setsecattr = deleg_wr_setsecattr },
607 	VOPNAME_VNEVENT,	{ .femop_vnevent = deleg_wr_vnevent },
608 	NULL,			NULL
609 };
610 
611 nfs4_srv_t *
612 nfs4_get_srv(void)
613 {
614 	nfs_globals_t *ng = nfs_srv_getzg();
615 	nfs4_srv_t *srv = ng->nfs4_srv;
616 	ASSERT(srv != NULL);
617 	return (srv);
618 }
619 
620 void
621 rfs4_srv_zone_init(nfs_globals_t *ng)
622 {
623 	nfs4_srv_t *nsrv4;
624 	timespec32_t verf;
625 
626 	nsrv4 = kmem_zalloc(sizeof (*nsrv4), KM_SLEEP);
627 
628 	/*
629 	 * The following algorithm attempts to find a unique verifier
630 	 * to be used as the write verifier returned from the server
631 	 * to the client.  It is important that this verifier change
632 	 * whenever the server reboots.  Of secondary importance, it
633 	 * is important for the verifier to be unique between two
634 	 * different servers.
635 	 *
636 	 * Thus, an attempt is made to use the system hostid and the
637 	 * current time in seconds when the nfssrv kernel module is
638 	 * loaded.  It is assumed that an NFS server will not be able
639 	 * to boot and then to reboot in less than a second.  If the
640 	 * hostid has not been set, then the current high resolution
641 	 * time is used.  This will ensure different verifiers each
642 	 * time the server reboots and minimize the chances that two
643 	 * different servers will have the same verifier.
644 	 * XXX - this is broken on LP64 kernels.
645 	 */
646 	verf.tv_sec = (time_t)zone_get_hostid(NULL);
647 	if (verf.tv_sec != 0) {
648 		verf.tv_nsec = gethrestime_sec();
649 	} else {
650 		timespec_t tverf;
651 
652 		gethrestime(&tverf);
653 		verf.tv_sec = (time_t)tverf.tv_sec;
654 		verf.tv_nsec = tverf.tv_nsec;
655 	}
656 	nsrv4->write4verf = *(uint64_t *)&verf;
657 
658 	/* Used to manage create/destroy of server state */
659 	nsrv4->nfs4_server_state = NULL;
660 	nsrv4->nfs4_cur_servinst = NULL;
661 	nsrv4->nfs4_deleg_policy = SRV_NEVER_DELEGATE;
662 	mutex_init(&nsrv4->deleg_lock, NULL, MUTEX_DEFAULT, NULL);
663 	mutex_init(&nsrv4->state_lock, NULL, MUTEX_DEFAULT, NULL);
664 	mutex_init(&nsrv4->servinst_lock, NULL, MUTEX_DEFAULT, NULL);
665 	rw_init(&nsrv4->deleg_policy_lock, NULL, RW_DEFAULT, NULL);
666 
667 	ng->nfs4_srv = nsrv4;
668 }
669 
670 void
671 rfs4_srv_zone_fini(nfs_globals_t *ng)
672 {
673 	nfs4_srv_t *nsrv4 = ng->nfs4_srv;
674 
675 	ng->nfs4_srv = NULL;
676 
677 	mutex_destroy(&nsrv4->deleg_lock);
678 	mutex_destroy(&nsrv4->state_lock);
679 	mutex_destroy(&nsrv4->servinst_lock);
680 	rw_destroy(&nsrv4->deleg_policy_lock);
681 
682 	kmem_free(nsrv4, sizeof (*nsrv4));
683 }
684 
685 void
686 rfs4_srvrinit(void)
687 {
688 	extern void rfs4_attr_init();
689 
690 	rfs4_attr_init();
691 
692 	if (fem_create("deleg_rdops", nfs4_rd_deleg_tmpl, &deleg_rdops) != 0) {
693 		rfs4_disable_delegation();
694 	} else if (fem_create("deleg_wrops", nfs4_wr_deleg_tmpl,
695 	    &deleg_wrops) != 0) {
696 		rfs4_disable_delegation();
697 		fem_free(deleg_rdops);
698 	}
699 
700 	nfs4_srv_caller_id = fs_new_caller_id();
701 	lockt_sysid = lm_alloc_sysidt();
702 	vsd_create(&nfs4_srv_vkey, NULL);
703 	rfs4_state_g_init();
704 }
705 
706 void
707 rfs4_srvrfini(void)
708 {
709 	if (lockt_sysid != LM_NOSYSID) {
710 		lm_free_sysidt(lockt_sysid);
711 		lockt_sysid = LM_NOSYSID;
712 	}
713 
714 	rfs4_state_g_fini();
715 
716 	fem_free(deleg_rdops);
717 	fem_free(deleg_wrops);
718 }
719 
720 void
721 rfs4_do_server_start(int server_upordown, int srv_delegation,
722     nfs4_minor_t nfs4_minor_max, int cluster_booted)
723 {
724 	nfs4_srv_t *nsrv4 = nfs4_get_srv();
725 
726 	/* Is this a warm start? */
727 	if (server_upordown == NFS_SERVER_QUIESCED) {
728 		cmn_err(CE_NOTE, "nfs4_srv: "
729 		    "server was previously quiesced; "
730 		    "existing NFSv4 state will be re-used");
731 
732 		/*
733 		 * HA-NFSv4: this is also the signal
734 		 * that a Resource Group failover has
735 		 * occurred.
736 		 */
737 		if (cluster_booted)
738 			hanfsv4_failover(nsrv4);
739 	} else {
740 		/* Cold start */
741 		nsrv4->rfs4_start_time = 0;
742 		rfs4_state_zone_init(nsrv4);
743 		nsrv4->nfs4_drc = rfs4_init_drc(nfs4_drc_max,
744 		    nfs4_drc_hash);
745 
746 		/*
747 		 * The nfsd service was started with the -s option
748 		 * we need to pull in any state from the paths indicated.
749 		 */
750 		if (curzone == global_zone && rfs4_dss_numnewpaths > 0) {
751 			/* read in the stable storage state from these paths */
752 			rfs4_dss_readstate(nsrv4, rfs4_dss_numnewpaths,
753 			    rfs4_dss_newpaths);
754 		}
755 	}
756 
757 	nsrv4->nfs4_minor_max = nfs4_minor_max;
758 
759 	/* Check if delegation is to be enabled */
760 	if (srv_delegation != FALSE)
761 		rfs4_set_deleg_policy(nsrv4, SRV_NORMAL_DELEGATE);
762 }
763 
764 void
765 rfs4_init_compound_state(struct compound_state *cs)
766 {
767 	bzero(cs, sizeof (*cs));
768 	cs->cont = TRUE;
769 	cs->access = CS_ACCESS_DENIED;
770 	cs->deleg = FALSE;
771 	cs->mandlock = FALSE;
772 	cs->fh.nfs_fh4_val = cs->fhbuf;
773 }
774 
775 /* Do cleanup of the compound_state */
776 void
777 rfs4_fini_compound_state(struct compound_state *cs)
778 {
779 	if (cs->vp) {
780 		VN_RELE(cs->vp);
781 	}
782 	if (cs->saved_vp) {
783 		VN_RELE(cs->saved_vp);
784 	}
785 	if (cs->cr) {
786 		crfree(cs->cr);
787 	}
788 	if (cs->saved_fh.nfs_fh4_val) {
789 		kmem_free(cs->saved_fh.nfs_fh4_val, NFS4_FHSIZE);
790 	}
791 	if (cs->sp) {
792 		rfs4x_session_rele(cs->sp);
793 	}
794 }
795 
796 void
797 rfs4_grace_start(rfs4_servinst_t *sip)
798 {
799 	rw_enter(&sip->rwlock, RW_WRITER);
800 	sip->start_time = nfs_sys_uptime();
801 	sip->grace_period = rfs4_grace_period;
802 	rw_exit(&sip->rwlock);
803 }
804 
805 /*
806  * returns true if the instance's grace period has never been started
807  */
808 int
809 rfs4_servinst_grace_new(rfs4_servinst_t *sip)
810 {
811 	time_t start_time;
812 
813 	rw_enter(&sip->rwlock, RW_READER);
814 	start_time = sip->start_time;
815 	rw_exit(&sip->rwlock);
816 
817 	return (start_time == 0);
818 }
819 
820 /*
821  * Indicates if server instance is within the
822  * grace period.
823  */
824 int
825 rfs4_servinst_in_grace(rfs4_servinst_t *sip)
826 {
827 	time_t grace_expiry;
828 
829 	/* All clients called reclaim-complete */
830 	if (sip->nreclaim == 0 || sip->grace_period == 0)
831 		return (0);
832 
833 	rw_enter(&sip->rwlock, RW_READER);
834 	grace_expiry = sip->start_time + sip->grace_period;
835 	rw_exit(&sip->rwlock);
836 
837 	if (nfs_sys_uptime() < grace_expiry)
838 		return (1);
839 
840 	/* Once grace period ends, optimize next calls */
841 	sip->grace_period = 0;
842 	return (0);
843 }
844 
845 int
846 rfs4_clnt_in_grace(rfs4_client_t *cp)
847 {
848 	ASSERT(rfs4_dbe_refcnt(cp->rc_dbe) > 0);
849 
850 	return (rfs4_servinst_in_grace(cp->rc_server_instance));
851 }
852 
853 /*
854  * reset all currently active grace periods
855  */
856 void
857 rfs4_grace_reset_all(nfs4_srv_t *nsrv4)
858 {
859 	rfs4_servinst_t *sip;
860 
861 	mutex_enter(&nsrv4->servinst_lock);
862 	for (sip = nsrv4->nfs4_cur_servinst; sip != NULL; sip = sip->prev)
863 		if (rfs4_servinst_in_grace(sip))
864 			rfs4_grace_start(sip);
865 	mutex_exit(&nsrv4->servinst_lock);
866 }
867 
868 /*
869  * start any new instances' grace periods
870  */
871 void
872 rfs4_grace_start_new(nfs4_srv_t *nsrv4)
873 {
874 	rfs4_servinst_t *sip;
875 
876 	mutex_enter(&nsrv4->servinst_lock);
877 	for (sip = nsrv4->nfs4_cur_servinst; sip != NULL; sip = sip->prev)
878 		if (rfs4_servinst_grace_new(sip))
879 			rfs4_grace_start(sip);
880 	mutex_exit(&nsrv4->servinst_lock);
881 }
882 
883 static rfs4_dss_path_t *
884 rfs4_dss_newpath(nfs4_srv_t *nsrv4, rfs4_servinst_t *sip,
885     char *path, unsigned index)
886 {
887 	size_t len;
888 	rfs4_dss_path_t *dss_path;
889 
890 	dss_path = kmem_alloc(sizeof (rfs4_dss_path_t), KM_SLEEP);
891 
892 	/*
893 	 * Take a copy of the string, since the original may be overwritten.
894 	 * Sadly, no strdup() in the kernel.
895 	 */
896 	/* allow for NUL */
897 	len = strlen(path) + 1;
898 	dss_path->path = kmem_alloc(len, KM_SLEEP);
899 	(void) strlcpy(dss_path->path, path, len);
900 
901 	/* associate with servinst */
902 	dss_path->sip = sip;
903 	dss_path->index = index;
904 
905 	/*
906 	 * Add to list of served paths.
907 	 * No locking required, as we're only ever called at startup.
908 	 */
909 	if (nsrv4->dss_pathlist == NULL) {
910 		/* this is the first dss_path_t */
911 
912 		/* needed for insque/remque */
913 		dss_path->next = dss_path->prev = dss_path;
914 
915 		nsrv4->dss_pathlist = dss_path;
916 	} else {
917 		insque(dss_path, nsrv4->dss_pathlist);
918 	}
919 
920 	return (dss_path);
921 }
922 
923 /*
924  * Create a new server instance, and make it the currently active instance.
925  * Note that starting the grace period too early will reduce the clients'
926  * recovery window.
927  */
928 void
929 rfs4_servinst_create(nfs4_srv_t *nsrv4, int start_grace,
930     int dss_npaths, char **dss_paths)
931 {
932 	unsigned i;
933 	rfs4_servinst_t *sip;
934 	rfs4_oldstate_t *oldstate;
935 
936 	sip = kmem_alloc(sizeof (rfs4_servinst_t), KM_SLEEP);
937 	rw_init(&sip->rwlock, NULL, RW_DEFAULT, NULL);
938 
939 	sip->nreclaim = 0;
940 	sip->start_time = (time_t)0;
941 	sip->grace_period = (time_t)0;
942 	sip->next = NULL;
943 	sip->prev = NULL;
944 
945 	rw_init(&sip->oldstate_lock, NULL, RW_DEFAULT, NULL);
946 	/*
947 	 * This initial dummy entry is required to setup for insque/remque.
948 	 * It must be skipped over whenever the list is traversed.
949 	 */
950 	oldstate = kmem_alloc(sizeof (rfs4_oldstate_t), KM_SLEEP);
951 	/* insque/remque require initial list entry to be self-terminated */
952 	oldstate->next = oldstate;
953 	oldstate->prev = oldstate;
954 	sip->oldstate = oldstate;
955 
956 
957 	sip->dss_npaths = dss_npaths;
958 	sip->dss_paths = kmem_alloc(dss_npaths *
959 	    sizeof (rfs4_dss_path_t *), KM_SLEEP);
960 
961 	for (i = 0; i < dss_npaths; i++) {
962 		sip->dss_paths[i] =
963 		    rfs4_dss_newpath(nsrv4, sip, dss_paths[i], i);
964 	}
965 
966 	mutex_enter(&nsrv4->servinst_lock);
967 	if (nsrv4->nfs4_cur_servinst != NULL) {
968 		/* add to linked list */
969 		sip->prev = nsrv4->nfs4_cur_servinst;
970 		nsrv4->nfs4_cur_servinst->next = sip;
971 	}
972 	if (start_grace)
973 		rfs4_grace_start(sip);
974 	/* make the new instance "current" */
975 	nsrv4->nfs4_cur_servinst = sip;
976 
977 	mutex_exit(&nsrv4->servinst_lock);
978 }
979 
980 /*
981  * In future, we might add a rfs4_servinst_destroy(sip) but, for now, destroy
982  * all instances directly.
983  */
984 void
985 rfs4_servinst_destroy_all(nfs4_srv_t *nsrv4)
986 {
987 	rfs4_servinst_t *sip, *prev, *current;
988 #ifdef DEBUG
989 	int n = 0;
990 #endif
991 
992 	mutex_enter(&nsrv4->servinst_lock);
993 	ASSERT(nsrv4->nfs4_cur_servinst != NULL);
994 	current = nsrv4->nfs4_cur_servinst;
995 	nsrv4->nfs4_cur_servinst = NULL;
996 	for (sip = current; sip != NULL; sip = prev) {
997 		prev = sip->prev;
998 		rw_destroy(&sip->rwlock);
999 		if (sip->oldstate)
1000 			kmem_free(sip->oldstate, sizeof (rfs4_oldstate_t));
1001 		if (sip->dss_paths) {
1002 			int i = sip->dss_npaths;
1003 
1004 			while (i > 0) {
1005 				i--;
1006 				if (sip->dss_paths[i] != NULL) {
1007 					char *path = sip->dss_paths[i]->path;
1008 
1009 					if (path != NULL) {
1010 						kmem_free(path,
1011 						    strlen(path) + 1);
1012 					}
1013 					kmem_free(sip->dss_paths[i],
1014 					    sizeof (rfs4_dss_path_t));
1015 				}
1016 			}
1017 			kmem_free(sip->dss_paths,
1018 			    sip->dss_npaths * sizeof (rfs4_dss_path_t *));
1019 		}
1020 		kmem_free(sip, sizeof (rfs4_servinst_t));
1021 #ifdef DEBUG
1022 		n++;
1023 #endif
1024 	}
1025 	mutex_exit(&nsrv4->servinst_lock);
1026 }
1027 
1028 /*
1029  * Assign the current server instance to a client_t.
1030  * Should be called with cp->rc_dbe held.
1031  */
1032 void
1033 rfs4_servinst_assign(nfs4_srv_t *nsrv4, rfs4_client_t *cp,
1034     rfs4_servinst_t *sip)
1035 {
1036 	ASSERT(rfs4_dbe_refcnt(cp->rc_dbe) > 0);
1037 
1038 	/*
1039 	 * The lock ensures that if the current instance is in the process
1040 	 * of changing, we will see the new one.
1041 	 */
1042 	mutex_enter(&nsrv4->servinst_lock);
1043 	cp->rc_server_instance = sip;
1044 	mutex_exit(&nsrv4->servinst_lock);
1045 }
1046 
1047 rfs4_servinst_t *
1048 rfs4_servinst(rfs4_client_t *cp)
1049 {
1050 	ASSERT(rfs4_dbe_refcnt(cp->rc_dbe) > 0);
1051 
1052 	return (cp->rc_server_instance);
1053 }
1054 
1055 /* ARGSUSED */
1056 static void
1057 nullfree(caddr_t resop)
1058 {
1059 }
1060 
1061 /*
1062  * This is a fall-through for invalid or not implemented (yet) ops
1063  */
1064 /* ARGSUSED */
1065 static void
1066 rfs4_op_inval(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1067     struct compound_state *cs)
1068 {
1069 	*cs->statusp = *((nfsstat4 *)&(resop)->nfs_resop4_u) = NFS4ERR_INVAL;
1070 }
1071 
1072 /*
1073  * Check if the security flavor, nfsnum, is in the flavor_list.
1074  */
1075 bool_t
1076 in_flavor_list(int nfsnum, int *flavor_list, int count)
1077 {
1078 	int i;
1079 
1080 	for (i = 0; i < count; i++) {
1081 		if (nfsnum == flavor_list[i])
1082 			return (TRUE);
1083 	}
1084 	return (FALSE);
1085 }
1086 
1087 /*
1088  * Used by rfs4_op_secinfo to get the security information from the
1089  * export structure associated with the component.
1090  */
1091 /* ARGSUSED */
1092 nfsstat4
1093 do_rfs4_op_secinfo(struct compound_state *cs, char *nm, SECINFO4res *resp)
1094 {
1095 	int error, different_export = 0;
1096 	vnode_t *dvp, *vp;
1097 	struct exportinfo *exi;
1098 	fid_t fid;
1099 	uint_t count, i;
1100 	secinfo4 *resok_val;
1101 	struct secinfo *secp;
1102 	seconfig_t *si;
1103 	bool_t did_traverse = FALSE;
1104 	int dotdot, walk;
1105 	nfs_export_t *ne = nfs_get_export();
1106 
1107 	dvp = cs->vp;
1108 	exi = cs->exi;
1109 	ASSERT(exi != NULL);
1110 	dotdot = (nm[0] == '.' && nm[1] == '.' && nm[2] == '\0');
1111 
1112 	/*
1113 	 * If dotdotting, then need to check whether it's above the
1114 	 * root of a filesystem, or above an export point.
1115 	 */
1116 	if (dotdot) {
1117 		vnode_t *zone_rootvp = ne->exi_root->exi_vp;
1118 
1119 		ASSERT3U(exi->exi_zoneid, ==, ne->exi_root->exi_zoneid);
1120 		/*
1121 		 * If dotdotting at the root of a filesystem, then
1122 		 * need to traverse back to the mounted-on filesystem
1123 		 * and do the dotdot lookup there.
1124 		 */
1125 		if ((dvp->v_flag & VROOT) || VN_CMP(dvp, zone_rootvp)) {
1126 
1127 			/*
1128 			 * If at the system root, then can
1129 			 * go up no further.
1130 			 */
1131 			if (VN_CMP(dvp, zone_rootvp))
1132 				return (puterrno4(ENOENT));
1133 
1134 			/*
1135 			 * Traverse back to the mounted-on filesystem
1136 			 */
1137 			dvp = untraverse(dvp, zone_rootvp);
1138 
1139 			/*
1140 			 * Set the different_export flag so we remember
1141 			 * to pick up a new exportinfo entry for
1142 			 * this new filesystem.
1143 			 */
1144 			different_export = 1;
1145 		} else {
1146 
1147 			/*
1148 			 * If dotdotting above an export point then set
1149 			 * the different_export to get new export info.
1150 			 */
1151 			different_export = nfs_exported(exi, dvp);
1152 		}
1153 	}
1154 
1155 	/*
1156 	 * Get the vnode for the component "nm".
1157 	 */
1158 	error = VOP_LOOKUP(dvp, nm, &vp, NULL, 0, NULL, cs->cr,
1159 	    NULL, NULL, NULL);
1160 	if (error)
1161 		return (puterrno4(error));
1162 
1163 	/*
1164 	 * If the vnode is in a pseudo filesystem, or if the security flavor
1165 	 * used in the request is valid but not an explicitly shared flavor,
1166 	 * or the access bit indicates that this is a limited access,
1167 	 * check whether this vnode is visible.
1168 	 */
1169 	if (!different_export &&
1170 	    (PSEUDO(exi) || !is_exported_sec(cs->nfsflavor, exi) ||
1171 	    cs->access & CS_ACCESS_LIMITED)) {
1172 		if (! nfs_visible(exi, vp, &different_export)) {
1173 			VN_RELE(vp);
1174 			return (puterrno4(ENOENT));
1175 		}
1176 	}
1177 
1178 	/*
1179 	 * If it's a mountpoint, then traverse it.
1180 	 */
1181 	if (vn_ismntpt(vp)) {
1182 		if ((error = traverse(&vp)) != 0) {
1183 			VN_RELE(vp);
1184 			return (puterrno4(error));
1185 		}
1186 		/* remember that we had to traverse mountpoint */
1187 		did_traverse = TRUE;
1188 		different_export = 1;
1189 	} else if (vp->v_vfsp != dvp->v_vfsp) {
1190 		/*
1191 		 * If vp isn't a mountpoint and the vfs ptrs aren't the same,
1192 		 * then vp is probably an LOFS object.  We don't need the
1193 		 * realvp, we just need to know that we might have crossed
1194 		 * a server fs boundary and need to call checkexport4.
1195 		 * (LOFS lookup hides server fs mountpoints, and actually calls
1196 		 * traverse)
1197 		 */
1198 		different_export = 1;
1199 	}
1200 
1201 	/*
1202 	 * Get the export information for it.
1203 	 */
1204 	if (different_export) {
1205 
1206 		bzero(&fid, sizeof (fid));
1207 		fid.fid_len = MAXFIDSZ;
1208 		error = vop_fid_pseudo(vp, &fid);
1209 		if (error) {
1210 			VN_RELE(vp);
1211 			return (puterrno4(error));
1212 		}
1213 
1214 		/* We'll need to reassign "exi". */
1215 		if (dotdot)
1216 			exi = nfs_vptoexi(NULL, vp, cs->cr, &walk, NULL, TRUE);
1217 		else
1218 			exi = checkexport4(&vp->v_vfsp->vfs_fsid, &fid, vp);
1219 
1220 		if (exi == NULL) {
1221 			if (did_traverse == TRUE) {
1222 				/*
1223 				 * If this vnode is a mounted-on vnode,
1224 				 * but the mounted-on file system is not
1225 				 * exported, send back the secinfo for
1226 				 * the exported node that the mounted-on
1227 				 * vnode lives in.
1228 				 */
1229 				exi = cs->exi;
1230 			} else {
1231 				VN_RELE(vp);
1232 				return (puterrno4(EACCES));
1233 			}
1234 		}
1235 	}
1236 	ASSERT(exi != NULL);
1237 
1238 
1239 	/*
1240 	 * Create the secinfo result based on the security information
1241 	 * from the exportinfo structure (exi).
1242 	 *
1243 	 * Return all flavors for a pseudo node.
1244 	 * For a real export node, return the flavor that the client
1245 	 * has access with.
1246 	 */
1247 	ASSERT(RW_LOCK_HELD(&ne->exported_lock));
1248 	if (PSEUDO(exi)) {
1249 		count = exi->exi_export.ex_seccnt; /* total sec count */
1250 		resok_val = kmem_alloc(count * sizeof (secinfo4), KM_SLEEP);
1251 		secp = exi->exi_export.ex_secinfo;
1252 
1253 		for (i = 0; i < count; i++) {
1254 			si = &secp[i].s_secinfo;
1255 			resok_val[i].flavor = si->sc_rpcnum;
1256 			if (resok_val[i].flavor == RPCSEC_GSS) {
1257 				rpcsec_gss_info *info;
1258 
1259 				info = &resok_val[i].flavor_info;
1260 				info->qop = si->sc_qop;
1261 				info->service = (rpc_gss_svc_t)si->sc_service;
1262 
1263 				/* get oid opaque data */
1264 				info->oid.sec_oid4_len =
1265 				    si->sc_gss_mech_type->length;
1266 				info->oid.sec_oid4_val = kmem_alloc(
1267 				    si->sc_gss_mech_type->length, KM_SLEEP);
1268 				bcopy(
1269 				    si->sc_gss_mech_type->elements,
1270 				    info->oid.sec_oid4_val,
1271 				    info->oid.sec_oid4_len);
1272 			}
1273 		}
1274 		resp->SECINFO4resok_len = count;
1275 		resp->SECINFO4resok_val = resok_val;
1276 	} else {
1277 		int ret_cnt = 0, k = 0;
1278 		int *flavor_list;
1279 
1280 		count = exi->exi_export.ex_seccnt; /* total sec count */
1281 		secp = exi->exi_export.ex_secinfo;
1282 
1283 		flavor_list = kmem_alloc(count * sizeof (int), KM_SLEEP);
1284 		/* find out which flavors to return */
1285 		for (i = 0; i < count; i ++) {
1286 			int access, flavor, perm;
1287 
1288 			flavor = secp[i].s_secinfo.sc_nfsnum;
1289 			perm = secp[i].s_flags;
1290 
1291 			access = nfsauth4_secinfo_access(exi, cs->req,
1292 			    flavor, perm, cs->basecr);
1293 
1294 			if (! (access & NFSAUTH_DENIED) &&
1295 			    ! (access & NFSAUTH_WRONGSEC)) {
1296 				flavor_list[ret_cnt] = flavor;
1297 				ret_cnt++;
1298 			}
1299 		}
1300 
1301 		/* Create the returning SECINFO value */
1302 		resok_val = kmem_alloc(ret_cnt * sizeof (secinfo4), KM_SLEEP);
1303 
1304 		for (i = 0; i < count; i++) {
1305 			/*
1306 			 * If the flavor is in the flavor list,
1307 			 * fill in resok_val.
1308 			 */
1309 			si = &secp[i].s_secinfo;
1310 			if (in_flavor_list(si->sc_nfsnum,
1311 			    flavor_list, ret_cnt)) {
1312 				resok_val[k].flavor = si->sc_rpcnum;
1313 				if (resok_val[k].flavor == RPCSEC_GSS) {
1314 					rpcsec_gss_info *info;
1315 
1316 					info = &resok_val[k].flavor_info;
1317 					info->qop = si->sc_qop;
1318 					info->service = (rpc_gss_svc_t)
1319 					    si->sc_service;
1320 
1321 					/* get oid opaque data */
1322 					info->oid.sec_oid4_len =
1323 					    si->sc_gss_mech_type->length;
1324 					info->oid.sec_oid4_val = kmem_alloc(
1325 					    si->sc_gss_mech_type->length,
1326 					    KM_SLEEP);
1327 					bcopy(si->sc_gss_mech_type->elements,
1328 					    info->oid.sec_oid4_val,
1329 					    info->oid.sec_oid4_len);
1330 				}
1331 				k++;
1332 			}
1333 			if (k >= ret_cnt)
1334 				break;
1335 		}
1336 		resp->SECINFO4resok_len = ret_cnt;
1337 		resp->SECINFO4resok_val = resok_val;
1338 		kmem_free(flavor_list, count * sizeof (int));
1339 	}
1340 
1341 	VN_RELE(vp);
1342 	return (NFS4_OK);
1343 }
1344 
1345 /*
1346  * SECINFO (Operation 33): Obtain required security information on
1347  * the component name in the format of (security-mechanism-oid, qop, service)
1348  * triplets.
1349  */
1350 /* ARGSUSED */
1351 static void
1352 rfs4_op_secinfo(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1353     struct compound_state *cs)
1354 {
1355 	SECINFO4args *args = &argop->nfs_argop4_u.opsecinfo;
1356 	SECINFO4res *resp = &resop->nfs_resop4_u.opsecinfo;
1357 	utf8string *utfnm = &args->name;
1358 	uint_t len;
1359 	char *nm;
1360 	struct sockaddr *ca;
1361 	char *name = NULL;
1362 	nfsstat4 status = NFS4_OK;
1363 
1364 	DTRACE_NFSV4_2(op__secinfo__start, struct compound_state *, cs,
1365 	    SECINFO4args *, args);
1366 
1367 	/*
1368 	 * Current file handle (cfh) should have been set before getting
1369 	 * into this function. If not, return error.
1370 	 */
1371 	if (cs->vp == NULL) {
1372 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
1373 		goto out;
1374 	}
1375 
1376 	if (cs->vp->v_type != VDIR) {
1377 		*cs->statusp = resp->status = NFS4ERR_NOTDIR;
1378 		goto out;
1379 	}
1380 
1381 	/*
1382 	 * Verify the component name. If failed, error out, but
1383 	 * do not error out if the component name is a "..".
1384 	 * SECINFO will return its parents secinfo data for SECINFO "..".
1385 	 */
1386 	status = utf8_dir_verify(utfnm);
1387 	if (status != NFS4_OK) {
1388 		if (utfnm->utf8string_len != 2 ||
1389 		    utfnm->utf8string_val[0] != '.' ||
1390 		    utfnm->utf8string_val[1] != '.') {
1391 			*cs->statusp = resp->status = status;
1392 			goto out;
1393 		}
1394 	}
1395 
1396 	nm = utf8_to_str(utfnm, &len, NULL);
1397 	if (nm == NULL) {
1398 		*cs->statusp = resp->status = NFS4ERR_INVAL;
1399 		goto out;
1400 	}
1401 
1402 	if (len > MAXNAMELEN) {
1403 		*cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
1404 		kmem_free(nm, len);
1405 		goto out;
1406 	}
1407 
1408 	ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
1409 	name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
1410 	    MAXPATHLEN  + 1);
1411 
1412 	if (name == NULL) {
1413 		*cs->statusp = resp->status = NFS4ERR_INVAL;
1414 		kmem_free(nm, len);
1415 		goto out;
1416 	}
1417 
1418 	*cs->statusp = resp->status = do_rfs4_op_secinfo(cs, name, resp);
1419 
1420 	if (resp->status == NFS4_OK && rfs4_has_session(cs)) {
1421 		/*
1422 		 * See rfc 5661 section 2.6.3.1.1.8 and 18.29.3
1423 		 *
1424 		 * 2.6.3.1.1.8
1425 		 *	SECINFO and SECINFO_NO_NAME consume the current
1426 		 *	filehandle (note that this is a change from NFSv4.0).
1427 		 *
1428 		 * 18.29.3
1429 		 *	On success, the current filehandle is consumed (see
1430 		 *	Section 2.6.3.1.1.8), and if the next operation after
1431 		 *	SECINFO tries to use the current filehandle, that
1432 		 *	operation will fail with the status
1433 		 *	NFS4ERR_NOFILEHANDLE.
1434 		 */
1435 		VN_RELE(cs->vp);
1436 		cs->vp = NULL;
1437 	}
1438 
1439 	if (name != nm)
1440 		kmem_free(name, MAXPATHLEN + 1);
1441 	kmem_free(nm, len);
1442 
1443 out:
1444 	DTRACE_NFSV4_2(op__secinfo__done, struct compound_state *, cs,
1445 	    SECINFO4res *, resp);
1446 }
1447 
1448 /*
1449  * Free SECINFO result.
1450  */
1451 /* ARGSUSED */
1452 static void
1453 rfs4_op_secinfo_free(nfs_resop4 *resop)
1454 {
1455 	SECINFO4res *resp = &resop->nfs_resop4_u.opsecinfo;
1456 	int count, i;
1457 	secinfo4 *resok_val;
1458 
1459 	/* If this is not an Ok result, nothing to free. */
1460 	if (resp->status != NFS4_OK) {
1461 		return;
1462 	}
1463 
1464 	count = resp->SECINFO4resok_len;
1465 	resok_val = resp->SECINFO4resok_val;
1466 
1467 	for (i = 0; i < count; i++) {
1468 		if (resok_val[i].flavor == RPCSEC_GSS) {
1469 			rpcsec_gss_info *info;
1470 
1471 			info = &resok_val[i].flavor_info;
1472 			kmem_free(info->oid.sec_oid4_val,
1473 			    info->oid.sec_oid4_len);
1474 		}
1475 	}
1476 	kmem_free(resok_val, count * sizeof (secinfo4));
1477 	resp->SECINFO4resok_len = 0;
1478 	resp->SECINFO4resok_val = NULL;
1479 }
1480 
1481 /* ARGSUSED */
1482 static void
1483 rfs4_op_access(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1484     struct compound_state *cs)
1485 {
1486 	ACCESS4args *args = &argop->nfs_argop4_u.opaccess;
1487 	ACCESS4res *resp = &resop->nfs_resop4_u.opaccess;
1488 	int error;
1489 	vnode_t *vp;
1490 	struct vattr va;
1491 	int checkwriteperm;
1492 	cred_t *cr = cs->cr;
1493 	bslabel_t *clabel, *slabel;
1494 	ts_label_t *tslabel;
1495 	boolean_t admin_low_client;
1496 
1497 	DTRACE_NFSV4_2(op__access__start, struct compound_state *, cs,
1498 	    ACCESS4args *, args);
1499 
1500 #if 0	/* XXX allow access even if !cs->access. Eventually only pseudo fs */
1501 	if (cs->access == CS_ACCESS_DENIED) {
1502 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
1503 		goto out;
1504 	}
1505 #endif
1506 	if (cs->vp == NULL) {
1507 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
1508 		goto out;
1509 	}
1510 
1511 	ASSERT(cr != NULL);
1512 
1513 	vp = cs->vp;
1514 
1515 	/*
1516 	 * If the file system is exported read only, it is not appropriate
1517 	 * to check write permissions for regular files and directories.
1518 	 * Special files are interpreted by the client, so the underlying
1519 	 * permissions are sent back to the client for interpretation.
1520 	 */
1521 	if (rdonly4(req, cs) &&
1522 	    (vp->v_type == VREG || vp->v_type == VDIR))
1523 		checkwriteperm = 0;
1524 	else
1525 		checkwriteperm = 1;
1526 
1527 	/*
1528 	 * XXX
1529 	 * We need the mode so that we can correctly determine access
1530 	 * permissions relative to a mandatory lock file.  Access to
1531 	 * mandatory lock files is denied on the server, so it might
1532 	 * as well be reflected to the server during the open.
1533 	 */
1534 	va.va_mask = AT_MODE;
1535 	error = VOP_GETATTR(vp, &va, 0, cr, NULL);
1536 	if (error) {
1537 		*cs->statusp = resp->status = puterrno4(error);
1538 		goto out;
1539 	}
1540 	resp->access = 0;
1541 	resp->supported = 0;
1542 
1543 	if (is_system_labeled()) {
1544 		ASSERT(req->rq_label != NULL);
1545 		clabel = req->rq_label;
1546 		DTRACE_PROBE2(tx__rfs4__log__info__opaccess__clabel, char *,
1547 		    "got client label from request(1)",
1548 		    struct svc_req *, req);
1549 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
1550 			if ((tslabel = nfs_getflabel(vp, cs->exi)) == NULL) {
1551 				*cs->statusp = resp->status = puterrno4(EACCES);
1552 				goto out;
1553 			}
1554 			slabel = label2bslabel(tslabel);
1555 			DTRACE_PROBE3(tx__rfs4__log__info__opaccess__slabel,
1556 			    char *, "got server label(1) for vp(2)",
1557 			    bslabel_t *, slabel, vnode_t *, vp);
1558 
1559 			admin_low_client = B_FALSE;
1560 		} else
1561 			admin_low_client = B_TRUE;
1562 	}
1563 
1564 	if (args->access & ACCESS4_READ) {
1565 		error = VOP_ACCESS(vp, VREAD, 0, cr, NULL);
1566 		if (!error && !MANDLOCK(vp, va.va_mode) &&
1567 		    (!is_system_labeled() || admin_low_client ||
1568 		    bldominates(clabel, slabel)))
1569 			resp->access |= ACCESS4_READ;
1570 		resp->supported |= ACCESS4_READ;
1571 	}
1572 	if ((args->access & ACCESS4_LOOKUP) && vp->v_type == VDIR) {
1573 		error = VOP_ACCESS(vp, VEXEC, 0, cr, NULL);
1574 		if (!error && (!is_system_labeled() || admin_low_client ||
1575 		    bldominates(clabel, slabel)))
1576 			resp->access |= ACCESS4_LOOKUP;
1577 		resp->supported |= ACCESS4_LOOKUP;
1578 	}
1579 	if (checkwriteperm &&
1580 	    (args->access & (ACCESS4_MODIFY|ACCESS4_EXTEND))) {
1581 		error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL);
1582 		if (!error && !MANDLOCK(vp, va.va_mode) &&
1583 		    (!is_system_labeled() || admin_low_client ||
1584 		    blequal(clabel, slabel)))
1585 			resp->access |=
1586 			    (args->access & (ACCESS4_MODIFY | ACCESS4_EXTEND));
1587 		resp->supported |=
1588 		    resp->access & (ACCESS4_MODIFY | ACCESS4_EXTEND);
1589 	}
1590 
1591 	if (checkwriteperm &&
1592 	    (args->access & ACCESS4_DELETE) && vp->v_type == VDIR) {
1593 		error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL);
1594 		if (!error && (!is_system_labeled() || admin_low_client ||
1595 		    blequal(clabel, slabel)))
1596 			resp->access |= ACCESS4_DELETE;
1597 		resp->supported |= ACCESS4_DELETE;
1598 	}
1599 	if (args->access & ACCESS4_EXECUTE && vp->v_type != VDIR) {
1600 		error = VOP_ACCESS(vp, VEXEC, 0, cr, NULL);
1601 		if (!error && !MANDLOCK(vp, va.va_mode) &&
1602 		    (!is_system_labeled() || admin_low_client ||
1603 		    bldominates(clabel, slabel)))
1604 			resp->access |= ACCESS4_EXECUTE;
1605 		resp->supported |= ACCESS4_EXECUTE;
1606 	}
1607 
1608 	if (is_system_labeled() && !admin_low_client)
1609 		label_rele(tslabel);
1610 
1611 	*cs->statusp = resp->status = NFS4_OK;
1612 out:
1613 	DTRACE_NFSV4_2(op__access__done, struct compound_state *, cs,
1614 	    ACCESS4res *, resp);
1615 }
1616 
1617 /* ARGSUSED */
1618 static void
1619 rfs4_op_commit(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1620     struct compound_state *cs)
1621 {
1622 	COMMIT4args *args = &argop->nfs_argop4_u.opcommit;
1623 	COMMIT4res *resp = &resop->nfs_resop4_u.opcommit;
1624 	int error;
1625 	vnode_t *vp = cs->vp;
1626 	cred_t *cr = cs->cr;
1627 	vattr_t va;
1628 	nfs4_srv_t *nsrv4;
1629 
1630 	DTRACE_NFSV4_2(op__commit__start, struct compound_state *, cs,
1631 	    COMMIT4args *, args);
1632 
1633 	if (vp == NULL) {
1634 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
1635 		goto out;
1636 	}
1637 	if (cs->access == CS_ACCESS_DENIED) {
1638 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
1639 		goto out;
1640 	}
1641 
1642 	if (args->offset + args->count < args->offset) {
1643 		*cs->statusp = resp->status = NFS4ERR_INVAL;
1644 		goto out;
1645 	}
1646 
1647 	va.va_mask = AT_UID;
1648 	error = VOP_GETATTR(vp, &va, 0, cr, NULL);
1649 
1650 	/*
1651 	 * If we can't get the attributes, then we can't do the
1652 	 * right access checking.  So, we'll fail the request.
1653 	 */
1654 	if (error) {
1655 		*cs->statusp = resp->status = puterrno4(error);
1656 		goto out;
1657 	}
1658 	if (rdonly4(req, cs)) {
1659 		*cs->statusp = resp->status = NFS4ERR_ROFS;
1660 		goto out;
1661 	}
1662 
1663 	if (vp->v_type != VREG) {
1664 		if (vp->v_type == VDIR)
1665 			resp->status = NFS4ERR_ISDIR;
1666 		else
1667 			resp->status = NFS4ERR_INVAL;
1668 		*cs->statusp = resp->status;
1669 		goto out;
1670 	}
1671 
1672 	if (crgetuid(cr) != va.va_uid &&
1673 	    (error = VOP_ACCESS(vp, VWRITE, 0, cs->cr, NULL))) {
1674 		*cs->statusp = resp->status = puterrno4(error);
1675 		goto out;
1676 	}
1677 
1678 	error = VOP_FSYNC(vp, FSYNC, cr, NULL);
1679 
1680 	if (error) {
1681 		*cs->statusp = resp->status = puterrno4(error);
1682 		goto out;
1683 	}
1684 
1685 	nsrv4 = nfs4_get_srv();
1686 	*cs->statusp = resp->status = NFS4_OK;
1687 	resp->writeverf = nsrv4->write4verf;
1688 out:
1689 	DTRACE_NFSV4_2(op__commit__done, struct compound_state *, cs,
1690 	    COMMIT4res *, resp);
1691 }
1692 
1693 /*
1694  * rfs4_op_mknod is called from rfs4_op_create after all initial verification
1695  * was completed. It does the nfsv4 create for special files.
1696  */
1697 /* ARGSUSED */
1698 static vnode_t *
1699 do_rfs4_op_mknod(CREATE4args *args, CREATE4res *resp, struct svc_req *req,
1700     struct compound_state *cs, vattr_t *vap, char *nm)
1701 {
1702 	int error;
1703 	cred_t *cr = cs->cr;
1704 	vnode_t *dvp = cs->vp;
1705 	vnode_t *vp = NULL;
1706 	int mode;
1707 	enum vcexcl excl;
1708 
1709 	switch (args->type) {
1710 	case NF4CHR:
1711 	case NF4BLK:
1712 		if (secpolicy_sys_devices(cr) != 0) {
1713 			*cs->statusp = resp->status = NFS4ERR_PERM;
1714 			return (NULL);
1715 		}
1716 		if (args->type == NF4CHR)
1717 			vap->va_type = VCHR;
1718 		else
1719 			vap->va_type = VBLK;
1720 		vap->va_rdev = makedevice(args->ftype4_u.devdata.specdata1,
1721 		    args->ftype4_u.devdata.specdata2);
1722 		vap->va_mask |= AT_RDEV;
1723 		break;
1724 	case NF4SOCK:
1725 		vap->va_type = VSOCK;
1726 		break;
1727 	case NF4FIFO:
1728 		vap->va_type = VFIFO;
1729 		break;
1730 	default:
1731 		*cs->statusp = resp->status = NFS4ERR_BADTYPE;
1732 		return (NULL);
1733 	}
1734 
1735 	/*
1736 	 * Must specify the mode.
1737 	 */
1738 	if (!(vap->va_mask & AT_MODE)) {
1739 		*cs->statusp = resp->status = NFS4ERR_INVAL;
1740 		return (NULL);
1741 	}
1742 
1743 	excl = EXCL;
1744 
1745 	mode = 0;
1746 
1747 	error = VOP_CREATE(dvp, nm, vap, excl, mode, &vp, cr, 0, NULL, NULL);
1748 	if (error) {
1749 		*cs->statusp = resp->status = puterrno4(error);
1750 		return (NULL);
1751 	}
1752 	return (vp);
1753 }
1754 
1755 /*
1756  * nfsv4 create is used to create non-regular files. For regular files,
1757  * use nfsv4 open.
1758  */
1759 /* ARGSUSED */
1760 static void
1761 rfs4_op_create(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1762     struct compound_state *cs)
1763 {
1764 	CREATE4args *args = &argop->nfs_argop4_u.opcreate;
1765 	CREATE4res *resp = &resop->nfs_resop4_u.opcreate;
1766 	int error;
1767 	struct vattr bva, iva, iva2, ava, *vap;
1768 	cred_t *cr = cs->cr;
1769 	vnode_t *dvp = cs->vp;
1770 	vnode_t *vp = NULL;
1771 	vnode_t *realvp;
1772 	char *nm, *lnm;
1773 	uint_t len, llen;
1774 	int syncval = 0;
1775 	struct nfs4_svgetit_arg sarg;
1776 	struct nfs4_ntov_table ntov;
1777 	struct statvfs64 sb;
1778 	nfsstat4 status;
1779 	struct sockaddr *ca;
1780 	char *name = NULL;
1781 	char *lname = NULL;
1782 
1783 	DTRACE_NFSV4_2(op__create__start, struct compound_state *, cs,
1784 	    CREATE4args *, args);
1785 
1786 	resp->attrset = 0;
1787 
1788 	if (dvp == NULL) {
1789 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
1790 		goto out;
1791 	}
1792 
1793 	/*
1794 	 * If there is an unshared filesystem mounted on this vnode,
1795 	 * do not allow to create an object in this directory.
1796 	 */
1797 	if (vn_ismntpt(dvp)) {
1798 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
1799 		goto out;
1800 	}
1801 
1802 	/* Verify that type is correct */
1803 	switch (args->type) {
1804 	case NF4LNK:
1805 	case NF4BLK:
1806 	case NF4CHR:
1807 	case NF4SOCK:
1808 	case NF4FIFO:
1809 	case NF4DIR:
1810 		break;
1811 	default:
1812 		*cs->statusp = resp->status = NFS4ERR_BADTYPE;
1813 		goto out;
1814 	};
1815 
1816 	if (cs->access == CS_ACCESS_DENIED) {
1817 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
1818 		goto out;
1819 	}
1820 	if (dvp->v_type != VDIR) {
1821 		*cs->statusp = resp->status = NFS4ERR_NOTDIR;
1822 		goto out;
1823 	}
1824 	status = utf8_dir_verify(&args->objname);
1825 	if (status != NFS4_OK) {
1826 		*cs->statusp = resp->status = status;
1827 		goto out;
1828 	}
1829 
1830 	if (rdonly4(req, cs)) {
1831 		*cs->statusp = resp->status = NFS4ERR_ROFS;
1832 		goto out;
1833 	}
1834 
1835 	/*
1836 	 * Name of newly created object
1837 	 */
1838 	nm = utf8_to_fn(&args->objname, &len, NULL);
1839 	if (nm == NULL) {
1840 		*cs->statusp = resp->status = NFS4ERR_INVAL;
1841 		goto out;
1842 	}
1843 
1844 	if (len > MAXNAMELEN) {
1845 		*cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
1846 		kmem_free(nm, len);
1847 		goto out;
1848 	}
1849 
1850 	ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
1851 	name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
1852 	    MAXPATHLEN  + 1);
1853 
1854 	if (name == NULL) {
1855 		*cs->statusp = resp->status = NFS4ERR_INVAL;
1856 		kmem_free(nm, len);
1857 		goto out;
1858 	}
1859 
1860 	resp->attrset = 0;
1861 
1862 	sarg.sbp = &sb;
1863 	sarg.is_referral = B_FALSE;
1864 	nfs4_ntov_table_init(&ntov);
1865 
1866 	status = do_rfs4_set_attrs(&resp->attrset,
1867 	    &args->createattrs, cs, &sarg, &ntov, NFS4ATTR_SETIT);
1868 
1869 	if (sarg.vap->va_mask == 0 && status == NFS4_OK)
1870 		status = NFS4ERR_INVAL;
1871 
1872 	if (status != NFS4_OK) {
1873 		*cs->statusp = resp->status = status;
1874 		if (name != nm)
1875 			kmem_free(name, MAXPATHLEN + 1);
1876 		kmem_free(nm, len);
1877 		nfs4_ntov_table_free(&ntov, &sarg);
1878 		resp->attrset = 0;
1879 		goto out;
1880 	}
1881 
1882 	/* Get "before" change value */
1883 	bva.va_mask = AT_CTIME|AT_SEQ|AT_MODE;
1884 	error = VOP_GETATTR(dvp, &bva, 0, cr, NULL);
1885 	if (error) {
1886 		*cs->statusp = resp->status = puterrno4(error);
1887 		if (name != nm)
1888 			kmem_free(name, MAXPATHLEN + 1);
1889 		kmem_free(nm, len);
1890 		nfs4_ntov_table_free(&ntov, &sarg);
1891 		resp->attrset = 0;
1892 		goto out;
1893 	}
1894 	NFS4_SET_FATTR4_CHANGE(resp->cinfo.before, bva.va_ctime)
1895 
1896 	vap = sarg.vap;
1897 
1898 	/*
1899 	 * Set the default initial values for attributes when the parent
1900 	 * directory does not have the VSUID/VSGID bit set and they have
1901 	 * not been specified in createattrs.
1902 	 */
1903 	if (!(bva.va_mode & VSUID) && (vap->va_mask & AT_UID) == 0) {
1904 		vap->va_uid = crgetuid(cr);
1905 		vap->va_mask |= AT_UID;
1906 	}
1907 	if (!(bva.va_mode & VSGID) && (vap->va_mask & AT_GID) == 0) {
1908 		vap->va_gid = crgetgid(cr);
1909 		vap->va_mask |= AT_GID;
1910 	}
1911 
1912 	vap->va_mask |= AT_TYPE;
1913 	switch (args->type) {
1914 	case NF4DIR:
1915 		vap->va_type = VDIR;
1916 		if ((vap->va_mask & AT_MODE) == 0) {
1917 			vap->va_mode = 0700;	/* default: owner rwx only */
1918 			vap->va_mask |= AT_MODE;
1919 		}
1920 		error = VOP_MKDIR(dvp, name, vap, &vp, cr, NULL, 0, NULL);
1921 		if (error)
1922 			break;
1923 
1924 		/*
1925 		 * Get the initial "after" sequence number, if it fails,
1926 		 * set to zero
1927 		 */
1928 		iva.va_mask = AT_SEQ;
1929 		if (VOP_GETATTR(dvp, &iva, 0, cs->cr, NULL))
1930 			iva.va_seq = 0;
1931 		break;
1932 	case NF4LNK:
1933 		vap->va_type = VLNK;
1934 		if ((vap->va_mask & AT_MODE) == 0) {
1935 			vap->va_mode = 0700;	/* default: owner rwx only */
1936 			vap->va_mask |= AT_MODE;
1937 		}
1938 
1939 		/*
1940 		 * symlink names must be treated as data
1941 		 */
1942 		lnm = utf8_to_str((utf8string *)&args->ftype4_u.linkdata,
1943 		    &llen, NULL);
1944 
1945 		if (lnm == NULL) {
1946 			*cs->statusp = resp->status = NFS4ERR_INVAL;
1947 			if (name != nm)
1948 				kmem_free(name, MAXPATHLEN + 1);
1949 			kmem_free(nm, len);
1950 			nfs4_ntov_table_free(&ntov, &sarg);
1951 			resp->attrset = 0;
1952 			goto out;
1953 		}
1954 
1955 		if (llen > MAXPATHLEN) {
1956 			*cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
1957 			if (name != nm)
1958 				kmem_free(name, MAXPATHLEN + 1);
1959 			kmem_free(nm, len);
1960 			kmem_free(lnm, llen);
1961 			nfs4_ntov_table_free(&ntov, &sarg);
1962 			resp->attrset = 0;
1963 			goto out;
1964 		}
1965 
1966 		lname = nfscmd_convname(ca, cs->exi, lnm,
1967 		    NFSCMD_CONV_INBOUND, MAXPATHLEN  + 1);
1968 
1969 		if (lname == NULL) {
1970 			*cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
1971 			if (name != nm)
1972 				kmem_free(name, MAXPATHLEN + 1);
1973 			kmem_free(nm, len);
1974 			kmem_free(lnm, llen);
1975 			nfs4_ntov_table_free(&ntov, &sarg);
1976 			resp->attrset = 0;
1977 			goto out;
1978 		}
1979 
1980 		error = VOP_SYMLINK(dvp, name, vap, lname, cr, NULL, 0);
1981 		if (lname != lnm)
1982 			kmem_free(lname, MAXPATHLEN + 1);
1983 		kmem_free(lnm, llen);
1984 		if (error)
1985 			break;
1986 
1987 		/*
1988 		 * Get the initial "after" sequence number, if it fails,
1989 		 * set to zero
1990 		 */
1991 		iva.va_mask = AT_SEQ;
1992 		if (VOP_GETATTR(dvp, &iva, 0, cs->cr, NULL))
1993 			iva.va_seq = 0;
1994 
1995 		error = VOP_LOOKUP(dvp, name, &vp, NULL, 0, NULL, cr,
1996 		    NULL, NULL, NULL);
1997 		if (error)
1998 			break;
1999 
2000 		/*
2001 		 * va_seq is not safe over VOP calls, check it again
2002 		 * if it has changed zero out iva to force atomic = FALSE.
2003 		 */
2004 		iva2.va_mask = AT_SEQ;
2005 		if (VOP_GETATTR(dvp, &iva2, 0, cs->cr, NULL) ||
2006 		    iva2.va_seq != iva.va_seq)
2007 			iva.va_seq = 0;
2008 		break;
2009 	default:
2010 		/*
2011 		 * probably a special file.
2012 		 */
2013 		if ((vap->va_mask & AT_MODE) == 0) {
2014 			vap->va_mode = 0600;	/* default: owner rw only */
2015 			vap->va_mask |= AT_MODE;
2016 		}
2017 		syncval = FNODSYNC;
2018 		/*
2019 		 * We know this will only generate one VOP call
2020 		 */
2021 		vp = do_rfs4_op_mknod(args, resp, req, cs, vap, name);
2022 
2023 		if (vp == NULL) {
2024 			if (name != nm)
2025 				kmem_free(name, MAXPATHLEN + 1);
2026 			kmem_free(nm, len);
2027 			nfs4_ntov_table_free(&ntov, &sarg);
2028 			resp->attrset = 0;
2029 			goto out;
2030 		}
2031 
2032 		/*
2033 		 * Get the initial "after" sequence number, if it fails,
2034 		 * set to zero
2035 		 */
2036 		iva.va_mask = AT_SEQ;
2037 		if (VOP_GETATTR(dvp, &iva, 0, cs->cr, NULL))
2038 			iva.va_seq = 0;
2039 
2040 		break;
2041 	}
2042 	if (name != nm)
2043 		kmem_free(name, MAXPATHLEN + 1);
2044 	kmem_free(nm, len);
2045 
2046 	if (error) {
2047 		*cs->statusp = resp->status = puterrno4(error);
2048 	}
2049 
2050 	/*
2051 	 * Force modified data and metadata out to stable storage.
2052 	 */
2053 	(void) VOP_FSYNC(dvp, 0, cr, NULL);
2054 
2055 	if (resp->status != NFS4_OK) {
2056 		if (vp != NULL)
2057 			VN_RELE(vp);
2058 		nfs4_ntov_table_free(&ntov, &sarg);
2059 		resp->attrset = 0;
2060 		goto out;
2061 	}
2062 
2063 	/*
2064 	 * Finish setup of cinfo response, "before" value already set.
2065 	 * Get "after" change value, if it fails, simply return the
2066 	 * before value.
2067 	 */
2068 	ava.va_mask = AT_CTIME|AT_SEQ;
2069 	if (VOP_GETATTR(dvp, &ava, 0, cr, NULL)) {
2070 		ava.va_ctime = bva.va_ctime;
2071 		ava.va_seq = 0;
2072 	}
2073 	NFS4_SET_FATTR4_CHANGE(resp->cinfo.after, ava.va_ctime);
2074 
2075 	/*
2076 	 * True verification that object was created with correct
2077 	 * attrs is impossible.  The attrs could have been changed
2078 	 * immediately after object creation.  If attributes did
2079 	 * not verify, the only recourse for the server is to
2080 	 * destroy the object.  Maybe if some attrs (like gid)
2081 	 * are set incorrectly, the object should be destroyed;
2082 	 * however, seems bad as a default policy.  Do we really
2083 	 * want to destroy an object over one of the times not
2084 	 * verifying correctly?  For these reasons, the server
2085 	 * currently sets bits in attrset for createattrs
2086 	 * that were set; however, no verification is done.
2087 	 *
2088 	 * vmask_to_nmask accounts for vattr bits set on create
2089 	 *	[do_rfs4_set_attrs() only sets resp bits for
2090 	 *	 non-vattr/vfs bits.]
2091 	 * Mask off any bits set by default so as not to return
2092 	 * more attrset bits than were requested in createattrs
2093 	 */
2094 	nfs4_vmask_to_nmask(sarg.vap->va_mask, &resp->attrset);
2095 	resp->attrset &= args->createattrs.attrmask;
2096 	nfs4_ntov_table_free(&ntov, &sarg);
2097 
2098 	error = makefh4(&cs->fh, vp, cs->exi);
2099 	if (error) {
2100 		*cs->statusp = resp->status = puterrno4(error);
2101 	}
2102 
2103 	/*
2104 	 * The cinfo.atomic = TRUE only if we got no errors, we have
2105 	 * non-zero va_seq's, and it has incremented by exactly one
2106 	 * during the creation and it didn't change during the VOP_LOOKUP
2107 	 * or VOP_FSYNC.
2108 	 */
2109 	if (!error && bva.va_seq && iva.va_seq && ava.va_seq &&
2110 	    iva.va_seq == (bva.va_seq + 1) && iva.va_seq == ava.va_seq)
2111 		resp->cinfo.atomic = TRUE;
2112 	else
2113 		resp->cinfo.atomic = FALSE;
2114 
2115 	/*
2116 	 * Force modified metadata out to stable storage.
2117 	 *
2118 	 * if a underlying vp exists, pass it to VOP_FSYNC
2119 	 */
2120 	if (VOP_REALVP(vp, &realvp, NULL) == 0)
2121 		(void) VOP_FSYNC(realvp, syncval, cr, NULL);
2122 	else
2123 		(void) VOP_FSYNC(vp, syncval, cr, NULL);
2124 
2125 	if (resp->status != NFS4_OK) {
2126 		VN_RELE(vp);
2127 		goto out;
2128 	}
2129 	if (cs->vp)
2130 		VN_RELE(cs->vp);
2131 
2132 	cs->vp = vp;
2133 	*cs->statusp = resp->status = NFS4_OK;
2134 out:
2135 	DTRACE_NFSV4_2(op__create__done, struct compound_state *, cs,
2136 	    CREATE4res *, resp);
2137 }
2138 
2139 /*ARGSUSED*/
2140 static void
2141 rfs4_op_delegpurge(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2142     struct compound_state *cs)
2143 {
2144 	DTRACE_NFSV4_2(op__delegpurge__start, struct compound_state *, cs,
2145 	    DELEGPURGE4args *, &argop->nfs_argop4_u.opdelegpurge);
2146 
2147 	rfs4_op_inval(argop, resop, req, cs);
2148 
2149 	DTRACE_NFSV4_2(op__delegpurge__done, struct compound_state *, cs,
2150 	    DELEGPURGE4res *, &resop->nfs_resop4_u.opdelegpurge);
2151 }
2152 
2153 /*ARGSUSED*/
2154 static void
2155 rfs4_op_delegreturn(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2156     struct compound_state *cs)
2157 {
2158 	DELEGRETURN4args *args = &argop->nfs_argop4_u.opdelegreturn;
2159 	DELEGRETURN4res *resp = &resop->nfs_resop4_u.opdelegreturn;
2160 	rfs4_deleg_state_t *dsp;
2161 	nfsstat4 status;
2162 
2163 	DTRACE_NFSV4_2(op__delegreturn__start, struct compound_state *, cs,
2164 	    DELEGRETURN4args *, args);
2165 
2166 	status = rfs4_get_deleg_state(&args->deleg_stateid, &dsp);
2167 	resp->status = *cs->statusp = status;
2168 	if (status != NFS4_OK)
2169 		goto out;
2170 
2171 	/* Ensure specified filehandle matches */
2172 	if (cs->vp != dsp->rds_finfo->rf_vp) {
2173 		resp->status = *cs->statusp = NFS4ERR_BAD_STATEID;
2174 	} else
2175 		rfs4_return_deleg(dsp, FALSE);
2176 
2177 	rfs4_update_lease(dsp->rds_client);
2178 
2179 	rfs4_deleg_state_rele(dsp);
2180 out:
2181 	DTRACE_NFSV4_2(op__delegreturn__done, struct compound_state *, cs,
2182 	    DELEGRETURN4res *, resp);
2183 }
2184 
2185 /*
2186  * Check to see if a given "flavor" is an explicitly shared flavor.
2187  * The assumption of this routine is the "flavor" is already a valid
2188  * flavor in the secinfo list of "exi".
2189  *
2190  *	e.g.
2191  *		# share -o sec=flavor1 /export
2192  *		# share -o sec=flavor2 /export/home
2193  *
2194  *		flavor2 is not an explicitly shared flavor for /export,
2195  *		however it is in the secinfo list for /export thru the
2196  *		server namespace setup.
2197  */
2198 int
2199 is_exported_sec(int flavor, struct exportinfo *exi)
2200 {
2201 	int	i;
2202 	struct secinfo *sp;
2203 
2204 	sp = exi->exi_export.ex_secinfo;
2205 	for (i = 0; i < exi->exi_export.ex_seccnt; i++) {
2206 		if (flavor == sp[i].s_secinfo.sc_nfsnum ||
2207 		    sp[i].s_secinfo.sc_nfsnum == AUTH_NONE) {
2208 			return (SEC_REF_EXPORTED(&sp[i]));
2209 		}
2210 	}
2211 
2212 	/* Should not reach this point based on the assumption */
2213 	return (0);
2214 }
2215 
2216 /*
2217  * Check if the security flavor used in the request matches what is
2218  * required at the export point or at the root pseudo node (exi_root).
2219  *
2220  * returns 1 if there's a match or if exported with AUTH_NONE; 0 otherwise.
2221  *
2222  */
2223 static int
2224 secinfo_match_or_authnone(struct compound_state *cs)
2225 {
2226 	int	i;
2227 	struct secinfo *sp;
2228 
2229 	/*
2230 	 * Check cs->nfsflavor (from the request) against
2231 	 * the current export data in cs->exi.
2232 	 */
2233 	sp = cs->exi->exi_export.ex_secinfo;
2234 	for (i = 0; i < cs->exi->exi_export.ex_seccnt; i++) {
2235 		if (cs->nfsflavor == sp[i].s_secinfo.sc_nfsnum ||
2236 		    sp[i].s_secinfo.sc_nfsnum == AUTH_NONE)
2237 			return (1);
2238 	}
2239 
2240 	return (0);
2241 }
2242 
2243 /*
2244  * Check the access authority for the client and return the correct error.
2245  */
2246 nfsstat4
2247 call_checkauth4(struct compound_state *cs, struct svc_req *req)
2248 {
2249 	int	authres;
2250 
2251 	/*
2252 	 * First, check if the security flavor used in the request
2253 	 * are among the flavors set in the server namespace.
2254 	 */
2255 	if (!secinfo_match_or_authnone(cs)) {
2256 		*cs->statusp = NFS4ERR_WRONGSEC;
2257 		return (*cs->statusp);
2258 	}
2259 
2260 	authres = checkauth4(cs, req);
2261 
2262 	if (authres > 0) {
2263 		*cs->statusp = NFS4_OK;
2264 		if (! (cs->access & CS_ACCESS_LIMITED))
2265 			cs->access = CS_ACCESS_OK;
2266 	} else if (authres == 0) {
2267 		*cs->statusp = NFS4ERR_ACCESS;
2268 	} else if (authres == -2) {
2269 		*cs->statusp = NFS4ERR_WRONGSEC;
2270 	} else {
2271 		*cs->statusp = NFS4ERR_DELAY;
2272 	}
2273 	return (*cs->statusp);
2274 }
2275 
2276 /*
2277  * bitmap4_to_attrmask is called by getattr and readdir.
2278  * It sets up the vattr mask and determines whether vfsstat call is needed
2279  * based on the input bitmap.
2280  * Returns nfsv4 status.
2281  */
2282 static nfsstat4
2283 bitmap4_to_attrmask(bitmap4 breq, struct nfs4_svgetit_arg *sargp)
2284 {
2285 	int i;
2286 	uint_t	va_mask;
2287 	struct statvfs64 *sbp = sargp->sbp;
2288 
2289 	sargp->sbp = NULL;
2290 	sargp->flag = 0;
2291 	sargp->rdattr_error = NFS4_OK;
2292 	sargp->mntdfid_set = FALSE;
2293 	if (sargp->cs->vp)
2294 		sargp->xattr = get_fh4_flag(&sargp->cs->fh,
2295 		    FH4_ATTRDIR | FH4_NAMEDATTR);
2296 	else
2297 		sargp->xattr = 0;
2298 
2299 	/*
2300 	 * Set rdattr_error_req to true if return error per
2301 	 * failed entry rather than fail the readdir.
2302 	 */
2303 	if (breq & FATTR4_RDATTR_ERROR_MASK)
2304 		sargp->rdattr_error_req = 1;
2305 	else
2306 		sargp->rdattr_error_req = 0;
2307 
2308 	/*
2309 	 * generate the va_mask
2310 	 * Handle the easy cases first
2311 	 */
2312 	switch (breq) {
2313 	case NFS4_NTOV_ATTR_MASK:
2314 		sargp->vap->va_mask = NFS4_NTOV_ATTR_AT_MASK;
2315 		return (NFS4_OK);
2316 
2317 	case NFS4_FS_ATTR_MASK:
2318 		sargp->vap->va_mask = NFS4_FS_ATTR_AT_MASK;
2319 		sargp->sbp = sbp;
2320 		return (NFS4_OK);
2321 
2322 	case NFS4_NTOV_ATTR_CACHE_MASK:
2323 		sargp->vap->va_mask = NFS4_NTOV_ATTR_CACHE_AT_MASK;
2324 		return (NFS4_OK);
2325 
2326 	case FATTR4_LEASE_TIME_MASK:
2327 		sargp->vap->va_mask = 0;
2328 		return (NFS4_OK);
2329 
2330 	default:
2331 		va_mask = 0;
2332 		for (i = 0; i < nfs4_ntov_map_size; i++) {
2333 			if ((breq & nfs4_ntov_map[i].fbit) &&
2334 			    nfs4_ntov_map[i].vbit)
2335 				va_mask |= nfs4_ntov_map[i].vbit;
2336 		}
2337 
2338 		/*
2339 		 * Check is vfsstat is needed
2340 		 */
2341 		if (breq & NFS4_FS_ATTR_MASK)
2342 			sargp->sbp = sbp;
2343 
2344 		sargp->vap->va_mask = va_mask;
2345 		return (NFS4_OK);
2346 	}
2347 	/* NOTREACHED */
2348 }
2349 
2350 /*
2351  * bitmap4_get_sysattrs is called by getattr and readdir.
2352  * It calls both VOP_GETATTR and VFS_STATVFS calls to get the attrs.
2353  * Returns nfsv4 status.
2354  */
2355 static nfsstat4
2356 bitmap4_get_sysattrs(struct nfs4_svgetit_arg *sargp)
2357 {
2358 	int error;
2359 	struct compound_state *cs = sargp->cs;
2360 	vnode_t *vp = cs->vp;
2361 
2362 	if (sargp->sbp != NULL) {
2363 		error = VFS_STATVFS(vp->v_vfsp, sargp->sbp);
2364 		if (error != 0) {
2365 			sargp->sbp = NULL;	/* to identify error */
2366 			return (puterrno4(error));
2367 		}
2368 	}
2369 
2370 	return (rfs4_vop_getattr(vp, sargp->vap, 0, cs->cr));
2371 }
2372 
2373 static void
2374 nfs4_ntov_table_init(struct nfs4_ntov_table *ntovp)
2375 {
2376 	ntovp->na = kmem_zalloc(sizeof (union nfs4_attr_u) * nfs4_ntov_map_size,
2377 	    KM_SLEEP);
2378 	ntovp->attrcnt = 0;
2379 	ntovp->vfsstat = FALSE;
2380 }
2381 
2382 static void
2383 nfs4_ntov_table_free(struct nfs4_ntov_table *ntovp,
2384     struct nfs4_svgetit_arg *sargp)
2385 {
2386 	int i;
2387 	union nfs4_attr_u *na;
2388 	uint8_t *amap;
2389 
2390 	/*
2391 	 * XXX Should do the same checks for whether the bit is set
2392 	 */
2393 	for (i = 0, na = ntovp->na, amap = ntovp->amap;
2394 	    i < ntovp->attrcnt; i++, na++, amap++) {
2395 		(void) (*nfs4_ntov_map[*amap].sv_getit)(
2396 		    NFS4ATTR_FREEIT, sargp, na);
2397 	}
2398 	if ((sargp->op == NFS4ATTR_SETIT) || (sargp->op == NFS4ATTR_VERIT)) {
2399 		/*
2400 		 * xdr_free for getattr will be done later
2401 		 */
2402 		for (i = 0, na = ntovp->na, amap = ntovp->amap;
2403 		    i < ntovp->attrcnt; i++, na++, amap++) {
2404 			xdr_free(nfs4_ntov_map[*amap].xfunc, (caddr_t)na);
2405 		}
2406 	}
2407 	kmem_free(ntovp->na, sizeof (union nfs4_attr_u) * nfs4_ntov_map_size);
2408 }
2409 
2410 /*
2411  * do_rfs4_op_getattr gets the system attrs and converts into fattr4.
2412  */
2413 static nfsstat4
2414 do_rfs4_op_getattr(bitmap4 breq, fattr4 *fattrp,
2415     struct nfs4_svgetit_arg *sargp)
2416 {
2417 	int error = 0;
2418 	int i, k;
2419 	struct nfs4_ntov_table ntov;
2420 	XDR xdr;
2421 	ulong_t xdr_size;
2422 	char *xdr_attrs;
2423 	nfsstat4 status = NFS4_OK;
2424 	nfsstat4 prev_rdattr_error = sargp->rdattr_error;
2425 	union nfs4_attr_u *na;
2426 	uint8_t *amap;
2427 
2428 	sargp->op = NFS4ATTR_GETIT;
2429 	sargp->flag = 0;
2430 
2431 	fattrp->attrmask = 0;
2432 	/* if no bits requested, then return empty fattr4 */
2433 	if (breq == 0) {
2434 		fattrp->attrlist4_len = 0;
2435 		fattrp->attrlist4 = NULL;
2436 		return (NFS4_OK);
2437 	}
2438 
2439 	/*
2440 	 * return NFS4ERR_INVAL when client requests write-only attrs
2441 	 */
2442 	if (breq & (FATTR4_TIME_ACCESS_SET_MASK | FATTR4_TIME_MODIFY_SET_MASK))
2443 		return (NFS4ERR_INVAL);
2444 
2445 	nfs4_ntov_table_init(&ntov);
2446 	na = ntov.na;
2447 	amap = ntov.amap;
2448 
2449 	/*
2450 	 * Now loop to get or verify the attrs
2451 	 */
2452 	for (i = 0; i < nfs4_ntov_map_size; i++) {
2453 		if (breq & nfs4_ntov_map[i].fbit) {
2454 			if ((*nfs4_ntov_map[i].sv_getit)(
2455 			    NFS4ATTR_SUPPORTED, sargp, NULL) == 0) {
2456 
2457 				error = (*nfs4_ntov_map[i].sv_getit)(
2458 				    NFS4ATTR_GETIT, sargp, na);
2459 
2460 				/*
2461 				 * Possible error values:
2462 				 * >0 if sv_getit failed to
2463 				 * get the attr; 0 if succeeded;
2464 				 * <0 if rdattr_error and the
2465 				 * attribute cannot be returned.
2466 				 */
2467 				if (error && !(sargp->rdattr_error_req))
2468 					goto done;
2469 				/*
2470 				 * If error then just for entry
2471 				 */
2472 				if (error == 0) {
2473 					fattrp->attrmask |=
2474 					    nfs4_ntov_map[i].fbit;
2475 					*amap++ =
2476 					    (uint8_t)nfs4_ntov_map[i].nval;
2477 					na++;
2478 					(ntov.attrcnt)++;
2479 				} else if ((error > 0) &&
2480 				    (sargp->rdattr_error == NFS4_OK)) {
2481 					sargp->rdattr_error = puterrno4(error);
2482 				}
2483 				error = 0;
2484 			}
2485 		}
2486 	}
2487 
2488 	/*
2489 	 * If rdattr_error was set after the return value for it was assigned,
2490 	 * update it.
2491 	 */
2492 	if (prev_rdattr_error != sargp->rdattr_error) {
2493 		na = ntov.na;
2494 		amap = ntov.amap;
2495 		for (i = 0; i < ntov.attrcnt; i++, na++, amap++) {
2496 			k = *amap;
2497 			if (k < FATTR4_RDATTR_ERROR) {
2498 				continue;
2499 			}
2500 			if ((k == FATTR4_RDATTR_ERROR) &&
2501 			    ((*nfs4_ntov_map[k].sv_getit)(
2502 			    NFS4ATTR_SUPPORTED, sargp, NULL) == 0)) {
2503 
2504 				(void) (*nfs4_ntov_map[k].sv_getit)(
2505 				    NFS4ATTR_GETIT, sargp, na);
2506 			}
2507 			break;
2508 		}
2509 	}
2510 
2511 	xdr_size = 0;
2512 	na = ntov.na;
2513 	amap = ntov.amap;
2514 	for (i = 0; i < ntov.attrcnt; i++, na++, amap++) {
2515 		xdr_size += xdr_sizeof(nfs4_ntov_map[*amap].xfunc, na);
2516 	}
2517 
2518 	fattrp->attrlist4_len = xdr_size;
2519 	if (xdr_size) {
2520 		/* freed by rfs4_op_getattr_free() */
2521 		fattrp->attrlist4 = xdr_attrs = kmem_zalloc(xdr_size, KM_SLEEP);
2522 
2523 		xdrmem_create(&xdr, xdr_attrs, xdr_size, XDR_ENCODE);
2524 
2525 		na = ntov.na;
2526 		amap = ntov.amap;
2527 		for (i = 0; i < ntov.attrcnt; i++, na++, amap++) {
2528 			if (!(*nfs4_ntov_map[*amap].xfunc)(&xdr, na)) {
2529 				DTRACE_PROBE1(nfss__e__getattr4_encfail,
2530 				    int, *amap);
2531 				status = NFS4ERR_SERVERFAULT;
2532 				break;
2533 			}
2534 		}
2535 		/* xdrmem_destroy(&xdrs); */	/* NO-OP */
2536 	} else {
2537 		fattrp->attrlist4 = NULL;
2538 	}
2539 done:
2540 
2541 	nfs4_ntov_table_free(&ntov, sargp);
2542 
2543 	if (error != 0)
2544 		status = puterrno4(error);
2545 
2546 	return (status);
2547 }
2548 
2549 /* ARGSUSED */
2550 static void
2551 rfs4_op_getattr(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2552     struct compound_state *cs)
2553 {
2554 	GETATTR4args *args = &argop->nfs_argop4_u.opgetattr;
2555 	GETATTR4res *resp = &resop->nfs_resop4_u.opgetattr;
2556 	struct nfs4_svgetit_arg sarg;
2557 	struct statvfs64 sb;
2558 	nfsstat4 status;
2559 
2560 	DTRACE_NFSV4_2(op__getattr__start, struct compound_state *, cs,
2561 	    GETATTR4args *, args);
2562 
2563 	if (cs->vp == NULL) {
2564 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2565 		goto out;
2566 	}
2567 
2568 	if (cs->access == CS_ACCESS_DENIED) {
2569 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
2570 		goto out;
2571 	}
2572 
2573 	sarg.sbp = &sb;
2574 	sarg.cs = cs;
2575 	sarg.is_referral = B_FALSE;
2576 
2577 	status = bitmap4_to_attrmask(args->attr_request, &sarg);
2578 	if (status == NFS4_OK) {
2579 
2580 		status = bitmap4_get_sysattrs(&sarg);
2581 		if (status == NFS4_OK) {
2582 
2583 			/* Is this a referral? */
2584 			if (vn_is_nfs_reparse(cs->vp, cs->cr)) {
2585 				/* Older V4 Solaris client sees a link */
2586 				if (client_is_downrev(req))
2587 					sarg.vap->va_type = VLNK;
2588 				else
2589 					sarg.is_referral = B_TRUE;
2590 			}
2591 
2592 			status = do_rfs4_op_getattr(args->attr_request,
2593 			    &resp->obj_attributes, &sarg);
2594 		}
2595 	}
2596 	*cs->statusp = resp->status = status;
2597 out:
2598 	DTRACE_NFSV4_2(op__getattr__done, struct compound_state *, cs,
2599 	    GETATTR4res *, resp);
2600 }
2601 
2602 static void
2603 rfs4_op_getattr_free(nfs_resop4 *resop)
2604 {
2605 	GETATTR4res *resp = &resop->nfs_resop4_u.opgetattr;
2606 
2607 	nfs4_fattr4_free(&resp->obj_attributes);
2608 }
2609 
2610 /* ARGSUSED */
2611 static void
2612 rfs4_op_getfh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2613     struct compound_state *cs)
2614 {
2615 	GETFH4res *resp = &resop->nfs_resop4_u.opgetfh;
2616 
2617 	DTRACE_NFSV4_1(op__getfh__start, struct compound_state *, cs);
2618 
2619 	if (cs->vp == NULL) {
2620 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2621 		goto out;
2622 	}
2623 	if (cs->access == CS_ACCESS_DENIED) {
2624 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
2625 		goto out;
2626 	}
2627 
2628 	/* check for reparse point at the share point */
2629 	if (cs->exi->exi_moved || vn_is_nfs_reparse(cs->exi->exi_vp, cs->cr)) {
2630 		/* it's all bad */
2631 		cs->exi->exi_moved = 1;
2632 		*cs->statusp = resp->status = NFS4ERR_MOVED;
2633 		DTRACE_PROBE2(nfs4serv__func__referral__shared__moved,
2634 		    vnode_t *, cs->vp, char *, "rfs4_op_getfh");
2635 		return;
2636 	}
2637 
2638 	/* check for reparse point at vp */
2639 	if (vn_is_nfs_reparse(cs->vp, cs->cr) && !client_is_downrev(req)) {
2640 		/* it's not all bad */
2641 		*cs->statusp = resp->status = NFS4ERR_MOVED;
2642 		DTRACE_PROBE2(nfs4serv__func__referral__moved,
2643 		    vnode_t *, cs->vp, char *, "rfs4_op_getfh");
2644 		return;
2645 	}
2646 
2647 	resp->object.nfs_fh4_val =
2648 	    kmem_alloc(cs->fh.nfs_fh4_len, KM_SLEEP);
2649 	nfs_fh4_copy(&cs->fh, &resp->object);
2650 	*cs->statusp = resp->status = NFS4_OK;
2651 out:
2652 	DTRACE_NFSV4_2(op__getfh__done, struct compound_state *, cs,
2653 	    GETFH4res *, resp);
2654 }
2655 
2656 static void
2657 rfs4_op_getfh_free(nfs_resop4 *resop)
2658 {
2659 	GETFH4res *resp = &resop->nfs_resop4_u.opgetfh;
2660 
2661 	if (resp->status == NFS4_OK &&
2662 	    resp->object.nfs_fh4_val != NULL) {
2663 		kmem_free(resp->object.nfs_fh4_val, resp->object.nfs_fh4_len);
2664 		resp->object.nfs_fh4_val = NULL;
2665 		resp->object.nfs_fh4_len = 0;
2666 	}
2667 }
2668 
2669 /*
2670  * illegal: args: void
2671  *	    res : status (NFS4ERR_OP_ILLEGAL)
2672  */
2673 /* ARGSUSED */
2674 static void
2675 rfs4_op_illegal(nfs_argop4 *argop, nfs_resop4 *resop,
2676     struct svc_req *req, struct compound_state *cs)
2677 {
2678 	ILLEGAL4res *resp = &resop->nfs_resop4_u.opillegal;
2679 
2680 	resop->resop = OP_ILLEGAL;
2681 	*cs->statusp = resp->status = NFS4ERR_OP_ILLEGAL;
2682 }
2683 
2684 /* ARGSUSED */
2685 static void
2686 rfs4_op_notsup(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2687     struct compound_state *cs)
2688 {
2689 	*cs->statusp = *((nfsstat4 *)&(resop)->nfs_resop4_u) = NFS4ERR_NOTSUPP;
2690 }
2691 
2692 /*
2693  * link: args: SAVED_FH: file, CURRENT_FH: target directory
2694  *	 res: status. If success - CURRENT_FH unchanged, return change_info
2695  */
2696 /* ARGSUSED */
2697 static void
2698 rfs4_op_link(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2699     struct compound_state *cs)
2700 {
2701 	LINK4args *args = &argop->nfs_argop4_u.oplink;
2702 	LINK4res *resp = &resop->nfs_resop4_u.oplink;
2703 	int error;
2704 	vnode_t *vp;
2705 	vnode_t *dvp;
2706 	struct vattr bdva, idva, adva;
2707 	char *nm;
2708 	uint_t  len;
2709 	struct sockaddr *ca;
2710 	char *name = NULL;
2711 	nfsstat4 status;
2712 
2713 	DTRACE_NFSV4_2(op__link__start, struct compound_state *, cs,
2714 	    LINK4args *, args);
2715 
2716 	/* SAVED_FH: source object */
2717 	vp = cs->saved_vp;
2718 	if (vp == NULL) {
2719 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2720 		goto out;
2721 	}
2722 
2723 	/* CURRENT_FH: target directory */
2724 	dvp = cs->vp;
2725 	if (dvp == NULL) {
2726 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2727 		goto out;
2728 	}
2729 
2730 	/*
2731 	 * If there is a non-shared filesystem mounted on this vnode,
2732 	 * do not allow to link any file in this directory.
2733 	 */
2734 	if (vn_ismntpt(dvp)) {
2735 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
2736 		goto out;
2737 	}
2738 
2739 	if (cs->access == CS_ACCESS_DENIED) {
2740 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
2741 		goto out;
2742 	}
2743 
2744 	/* Check source object's type validity */
2745 	if (vp->v_type == VDIR) {
2746 		*cs->statusp = resp->status = NFS4ERR_ISDIR;
2747 		goto out;
2748 	}
2749 
2750 	/* Check target directory's type */
2751 	if (dvp->v_type != VDIR) {
2752 		*cs->statusp = resp->status = NFS4ERR_NOTDIR;
2753 		goto out;
2754 	}
2755 
2756 	if (cs->saved_exi != cs->exi) {
2757 		*cs->statusp = resp->status = NFS4ERR_XDEV;
2758 		goto out;
2759 	}
2760 
2761 	status = utf8_dir_verify(&args->newname);
2762 	if (status != NFS4_OK) {
2763 		*cs->statusp = resp->status = status;
2764 		goto out;
2765 	}
2766 
2767 	nm = utf8_to_fn(&args->newname, &len, NULL);
2768 	if (nm == NULL) {
2769 		*cs->statusp = resp->status = NFS4ERR_INVAL;
2770 		goto out;
2771 	}
2772 
2773 	if (len > MAXNAMELEN) {
2774 		*cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
2775 		kmem_free(nm, len);
2776 		goto out;
2777 	}
2778 
2779 	if (rdonly4(req, cs)) {
2780 		*cs->statusp = resp->status = NFS4ERR_ROFS;
2781 		kmem_free(nm, len);
2782 		goto out;
2783 	}
2784 
2785 	/* Get "before" change value */
2786 	bdva.va_mask = AT_CTIME|AT_SEQ;
2787 	error = VOP_GETATTR(dvp, &bdva, 0, cs->cr, NULL);
2788 	if (error) {
2789 		*cs->statusp = resp->status = puterrno4(error);
2790 		kmem_free(nm, len);
2791 		goto out;
2792 	}
2793 
2794 	ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2795 	name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
2796 	    MAXPATHLEN  + 1);
2797 
2798 	if (name == NULL) {
2799 		*cs->statusp = resp->status = NFS4ERR_INVAL;
2800 		kmem_free(nm, len);
2801 		goto out;
2802 	}
2803 
2804 	NFS4_SET_FATTR4_CHANGE(resp->cinfo.before, bdva.va_ctime)
2805 
2806 	error = VOP_LINK(dvp, vp, name, cs->cr, NULL, 0);
2807 
2808 	if (nm != name)
2809 		kmem_free(name, MAXPATHLEN + 1);
2810 	kmem_free(nm, len);
2811 
2812 	/*
2813 	 * Get the initial "after" sequence number, if it fails, set to zero
2814 	 */
2815 	idva.va_mask = AT_SEQ;
2816 	if (VOP_GETATTR(dvp, &idva, 0, cs->cr, NULL))
2817 		idva.va_seq = 0;
2818 
2819 	/*
2820 	 * Force modified data and metadata out to stable storage.
2821 	 */
2822 	(void) VOP_FSYNC(vp, FNODSYNC, cs->cr, NULL);
2823 	(void) VOP_FSYNC(dvp, 0, cs->cr, NULL);
2824 
2825 	if (error) {
2826 		*cs->statusp = resp->status = puterrno4(error);
2827 		goto out;
2828 	}
2829 
2830 	/*
2831 	 * Get "after" change value, if it fails, simply return the
2832 	 * before value.
2833 	 */
2834 	adva.va_mask = AT_CTIME|AT_SEQ;
2835 	if (VOP_GETATTR(dvp, &adva, 0, cs->cr, NULL)) {
2836 		adva.va_ctime = bdva.va_ctime;
2837 		adva.va_seq = 0;
2838 	}
2839 
2840 	NFS4_SET_FATTR4_CHANGE(resp->cinfo.after, adva.va_ctime)
2841 
2842 	/*
2843 	 * The cinfo.atomic = TRUE only if we have
2844 	 * non-zero va_seq's, and it has incremented by exactly one
2845 	 * during the VOP_LINK and it didn't change during the VOP_FSYNC.
2846 	 */
2847 	if (bdva.va_seq && idva.va_seq && adva.va_seq &&
2848 	    idva.va_seq == (bdva.va_seq + 1) && idva.va_seq == adva.va_seq)
2849 		resp->cinfo.atomic = TRUE;
2850 	else
2851 		resp->cinfo.atomic = FALSE;
2852 
2853 	*cs->statusp = resp->status = NFS4_OK;
2854 out:
2855 	DTRACE_NFSV4_2(op__link__done, struct compound_state *, cs,
2856 	    LINK4res *, resp);
2857 }
2858 
2859 /*
2860  * Used by rfs4_op_lookup and rfs4_op_lookupp to do the actual work.
2861  */
2862 
2863 /* ARGSUSED */
2864 static nfsstat4
2865 do_rfs4_op_lookup(char *nm, struct svc_req *req, struct compound_state *cs)
2866 {
2867 	int error;
2868 	int different_export = 0;
2869 	vnode_t *vp, *pre_tvp = NULL, *oldvp = NULL;
2870 	struct exportinfo *exi = NULL, *pre_exi = NULL;
2871 	nfsstat4 stat;
2872 	fid_t fid;
2873 	int attrdir, dotdot, walk;
2874 	bool_t is_newvp = FALSE;
2875 
2876 	if (cs->vp->v_flag & V_XATTRDIR) {
2877 		attrdir = 1;
2878 		ASSERT(get_fh4_flag(&cs->fh, FH4_ATTRDIR));
2879 	} else {
2880 		attrdir = 0;
2881 		ASSERT(! get_fh4_flag(&cs->fh, FH4_ATTRDIR));
2882 	}
2883 
2884 	dotdot = (nm[0] == '.' && nm[1] == '.' && nm[2] == '\0');
2885 
2886 	/*
2887 	 * If dotdotting, then need to check whether it's
2888 	 * above the root of a filesystem, or above an
2889 	 * export point.
2890 	 */
2891 	if (dotdot) {
2892 		vnode_t *zone_rootvp;
2893 
2894 		ASSERT(cs->exi != NULL);
2895 		zone_rootvp = cs->exi->exi_ne->exi_root->exi_vp;
2896 		/*
2897 		 * If dotdotting at the root of a filesystem, then
2898 		 * need to traverse back to the mounted-on filesystem
2899 		 * and do the dotdot lookup there.
2900 		 */
2901 		if ((cs->vp->v_flag & VROOT) || VN_CMP(cs->vp, zone_rootvp)) {
2902 
2903 			/*
2904 			 * If at the system root, then can
2905 			 * go up no further.
2906 			 */
2907 			if (VN_CMP(cs->vp, zone_rootvp))
2908 				return (puterrno4(ENOENT));
2909 
2910 			/*
2911 			 * Traverse back to the mounted-on filesystem
2912 			 */
2913 			cs->vp = untraverse(cs->vp, zone_rootvp);
2914 
2915 			/*
2916 			 * Set the different_export flag so we remember
2917 			 * to pick up a new exportinfo entry for
2918 			 * this new filesystem.
2919 			 */
2920 			different_export = 1;
2921 		} else {
2922 
2923 			/*
2924 			 * If dotdotting above an export point then set
2925 			 * the different_export to get new export info.
2926 			 */
2927 			different_export = nfs_exported(cs->exi, cs->vp);
2928 		}
2929 	}
2930 
2931 	error = VOP_LOOKUP(cs->vp, nm, &vp, NULL, 0, NULL, cs->cr,
2932 	    NULL, NULL, NULL);
2933 	if (error)
2934 		return (puterrno4(error));
2935 
2936 	/*
2937 	 * If the vnode is in a pseudo filesystem, check whether it is visible.
2938 	 *
2939 	 * XXX if the vnode is a symlink and it is not visible in
2940 	 * a pseudo filesystem, return ENOENT (not following symlink).
2941 	 * V4 client can not mount such symlink. This is a regression
2942 	 * from V2/V3.
2943 	 *
2944 	 * In the same exported filesystem, if the security flavor used
2945 	 * is not an explicitly shared flavor, limit the view to the visible
2946 	 * list entries only. This is not a WRONGSEC case because it's already
2947 	 * checked via PUTROOTFH/PUTPUBFH or PUTFH.
2948 	 */
2949 	if (!different_export &&
2950 	    (PSEUDO(cs->exi) || ! is_exported_sec(cs->nfsflavor, cs->exi) ||
2951 	    cs->access & CS_ACCESS_LIMITED)) {
2952 		if (! nfs_visible(cs->exi, vp, &different_export)) {
2953 			VN_RELE(vp);
2954 			return (puterrno4(ENOENT));
2955 		}
2956 	}
2957 
2958 	/*
2959 	 * If it's a mountpoint, then traverse it.
2960 	 */
2961 	if (vn_ismntpt(vp)) {
2962 		pre_exi = cs->exi;	/* save pre-traversed exportinfo */
2963 		pre_tvp = vp;		/* save pre-traversed vnode	*/
2964 
2965 		/*
2966 		 * hold pre_tvp to counteract rele by traverse.  We will
2967 		 * need pre_tvp below if checkexport4 fails
2968 		 */
2969 		VN_HOLD(pre_tvp);
2970 		if ((error = traverse(&vp)) != 0) {
2971 			VN_RELE(vp);
2972 			VN_RELE(pre_tvp);
2973 			return (puterrno4(error));
2974 		}
2975 		different_export = 1;
2976 	} else if (vp->v_vfsp != cs->vp->v_vfsp) {
2977 		/*
2978 		 * The vfsp comparison is to handle the case where
2979 		 * a LOFS mount is shared.  lo_lookup traverses mount points,
2980 		 * and NFS is unaware of local fs transistions because
2981 		 * v_vfsmountedhere isn't set.  For this special LOFS case,
2982 		 * the dir and the obj returned by lookup will have different
2983 		 * vfs ptrs.
2984 		 */
2985 		different_export = 1;
2986 	}
2987 
2988 	if (different_export) {
2989 
2990 		bzero(&fid, sizeof (fid));
2991 		fid.fid_len = MAXFIDSZ;
2992 		error = vop_fid_pseudo(vp, &fid);
2993 		if (error) {
2994 			VN_RELE(vp);
2995 			if (pre_tvp)
2996 				VN_RELE(pre_tvp);
2997 			return (puterrno4(error));
2998 		}
2999 
3000 		if (dotdot)
3001 			exi = nfs_vptoexi(NULL, vp, cs->cr, &walk, NULL, TRUE);
3002 		else
3003 			exi = checkexport4(&vp->v_vfsp->vfs_fsid, &fid, vp);
3004 
3005 		if (exi == NULL) {
3006 			if (pre_tvp) {
3007 				/*
3008 				 * If this vnode is a mounted-on vnode,
3009 				 * but the mounted-on file system is not
3010 				 * exported, send back the filehandle for
3011 				 * the mounted-on vnode, not the root of
3012 				 * the mounted-on file system.
3013 				 */
3014 				VN_RELE(vp);
3015 				vp = pre_tvp;
3016 				exi = pre_exi;
3017 			} else {
3018 				VN_RELE(vp);
3019 				return (puterrno4(EACCES));
3020 			}
3021 		} else if (pre_tvp) {
3022 			/* we're done with pre_tvp now. release extra hold */
3023 			VN_RELE(pre_tvp);
3024 		}
3025 
3026 		cs->exi = exi;
3027 
3028 		/*
3029 		 * Now we do a checkauth4. The reason is that
3030 		 * this client/user may not have access to the new
3031 		 * exported file system, and if they do,
3032 		 * the client/user may be mapped to a different uid.
3033 		 *
3034 		 * We start with a new cr, because the checkauth4 done
3035 		 * in the PUT*FH operation over wrote the cred's uid,
3036 		 * gid, etc, and we want the real thing before calling
3037 		 * checkauth4()
3038 		 */
3039 		crfree(cs->cr);
3040 		cs->cr = crdup(cs->basecr);
3041 
3042 		oldvp = cs->vp;
3043 		cs->vp = vp;
3044 		is_newvp = TRUE;
3045 
3046 		stat = call_checkauth4(cs, req);
3047 		if (stat != NFS4_OK) {
3048 			VN_RELE(cs->vp);
3049 			cs->vp = oldvp;
3050 			return (stat);
3051 		}
3052 	}
3053 
3054 	/*
3055 	 * After various NFS checks, do a label check on the path
3056 	 * component. The label on this path should either be the
3057 	 * global zone's label or a zone's label. We are only
3058 	 * interested in the zone's label because exported files
3059 	 * in global zone is accessible (though read-only) to
3060 	 * clients. The exportability/visibility check is already
3061 	 * done before reaching this code.
3062 	 */
3063 	if (is_system_labeled()) {
3064 		bslabel_t *clabel;
3065 
3066 		ASSERT(req->rq_label != NULL);
3067 		clabel = req->rq_label;
3068 		DTRACE_PROBE2(tx__rfs4__log__info__oplookup__clabel, char *,
3069 		    "got client label from request(1)", struct svc_req *, req);
3070 
3071 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
3072 			if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
3073 			    cs->exi)) {
3074 				error = EACCES;
3075 				goto err_out;
3076 			}
3077 		} else {
3078 			/*
3079 			 * We grant access to admin_low label clients
3080 			 * only if the client is trusted, i.e. also
3081 			 * running Solaris Trusted Extension.
3082 			 */
3083 			struct sockaddr	*ca;
3084 			int		addr_type;
3085 			void		*ipaddr;
3086 			tsol_tpc_t	*tp;
3087 
3088 			ca = (struct sockaddr *)svc_getrpccaller(
3089 			    req->rq_xprt)->buf;
3090 			if (ca->sa_family == AF_INET) {
3091 				addr_type = IPV4_VERSION;
3092 				ipaddr = &((struct sockaddr_in *)ca)->sin_addr;
3093 			} else if (ca->sa_family == AF_INET6) {
3094 				addr_type = IPV6_VERSION;
3095 				ipaddr = &((struct sockaddr_in6 *)
3096 				    ca)->sin6_addr;
3097 			}
3098 			tp = find_tpc(ipaddr, addr_type, B_FALSE);
3099 			if (tp == NULL || tp->tpc_tp.tp_doi !=
3100 			    l_admin_low->tsl_doi || tp->tpc_tp.host_type !=
3101 			    SUN_CIPSO) {
3102 				if (tp != NULL)
3103 					TPC_RELE(tp);
3104 				error = EACCES;
3105 				goto err_out;
3106 			}
3107 			TPC_RELE(tp);
3108 		}
3109 	}
3110 
3111 	error = makefh4(&cs->fh, vp, cs->exi);
3112 
3113 err_out:
3114 	if (error) {
3115 		if (is_newvp) {
3116 			VN_RELE(cs->vp);
3117 			cs->vp = oldvp;
3118 		} else
3119 			VN_RELE(vp);
3120 		return (puterrno4(error));
3121 	}
3122 
3123 	if (!is_newvp) {
3124 		if (cs->vp)
3125 			VN_RELE(cs->vp);
3126 		cs->vp = vp;
3127 	} else if (oldvp)
3128 		VN_RELE(oldvp);
3129 
3130 	/*
3131 	 * if did lookup on attrdir and didn't lookup .., set named
3132 	 * attr fh flag
3133 	 */
3134 	if (attrdir && ! dotdot)
3135 		set_fh4_flag(&cs->fh, FH4_NAMEDATTR);
3136 
3137 	/* Assume false for now, open proc will set this */
3138 	cs->mandlock = FALSE;
3139 
3140 	return (NFS4_OK);
3141 }
3142 
3143 /* ARGSUSED */
3144 static void
3145 rfs4_op_lookup(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3146     struct compound_state *cs)
3147 {
3148 	LOOKUP4args *args = &argop->nfs_argop4_u.oplookup;
3149 	LOOKUP4res *resp = &resop->nfs_resop4_u.oplookup;
3150 	char *nm;
3151 	uint_t len;
3152 	struct sockaddr *ca;
3153 	char *name = NULL;
3154 	nfsstat4 status;
3155 
3156 	DTRACE_NFSV4_2(op__lookup__start, struct compound_state *, cs,
3157 	    LOOKUP4args *, args);
3158 
3159 	if (cs->vp == NULL) {
3160 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
3161 		goto out;
3162 	}
3163 
3164 	if (cs->vp->v_type == VLNK) {
3165 		*cs->statusp = resp->status = NFS4ERR_SYMLINK;
3166 		goto out;
3167 	}
3168 
3169 	if (cs->vp->v_type != VDIR) {
3170 		*cs->statusp = resp->status = NFS4ERR_NOTDIR;
3171 		goto out;
3172 	}
3173 
3174 	status = utf8_dir_verify(&args->objname);
3175 	if (status != NFS4_OK) {
3176 		*cs->statusp = resp->status = status;
3177 		goto out;
3178 	}
3179 
3180 	nm = utf8_to_str(&args->objname, &len, NULL);
3181 	if (nm == NULL) {
3182 		*cs->statusp = resp->status = NFS4ERR_INVAL;
3183 		goto out;
3184 	}
3185 
3186 	if (len > MAXNAMELEN) {
3187 		*cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
3188 		kmem_free(nm, len);
3189 		goto out;
3190 	}
3191 
3192 	ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
3193 	name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
3194 	    MAXPATHLEN  + 1);
3195 
3196 	if (name == NULL) {
3197 		*cs->statusp = resp->status = NFS4ERR_INVAL;
3198 		kmem_free(nm, len);
3199 		goto out;
3200 	}
3201 
3202 	*cs->statusp = resp->status = do_rfs4_op_lookup(name, req, cs);
3203 
3204 	if (name != nm)
3205 		kmem_free(name, MAXPATHLEN + 1);
3206 	kmem_free(nm, len);
3207 
3208 out:
3209 	DTRACE_NFSV4_2(op__lookup__done, struct compound_state *, cs,
3210 	    LOOKUP4res *, resp);
3211 }
3212 
3213 /* ARGSUSED */
3214 static void
3215 rfs4_op_lookupp(nfs_argop4 *args, nfs_resop4 *resop, struct svc_req *req,
3216     struct compound_state *cs)
3217 {
3218 	LOOKUPP4res *resp = &resop->nfs_resop4_u.oplookupp;
3219 
3220 	DTRACE_NFSV4_1(op__lookupp__start, struct compound_state *, cs);
3221 
3222 	if (cs->vp == NULL) {
3223 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
3224 		goto out;
3225 	}
3226 
3227 	if (cs->vp->v_type == VLNK) {
3228 		*cs->statusp = resp->status = NFS4ERR_SYMLINK;
3229 		goto out;
3230 	}
3231 
3232 	if (cs->vp->v_type != VDIR) {
3233 		*cs->statusp = resp->status = NFS4ERR_NOTDIR;
3234 		goto out;
3235 	}
3236 
3237 	*cs->statusp = resp->status = do_rfs4_op_lookup("..", req, cs);
3238 
3239 	/*
3240 	 * From NFSV4 Specification, LOOKUPP should not check for
3241 	 * NFS4ERR_WRONGSEC. Retrun NFS4_OK instead.
3242 	 */
3243 	if (resp->status == NFS4ERR_WRONGSEC) {
3244 		*cs->statusp = resp->status = NFS4_OK;
3245 	}
3246 
3247 out:
3248 	DTRACE_NFSV4_2(op__lookupp__done, struct compound_state *, cs,
3249 	    LOOKUPP4res *, resp);
3250 }
3251 
3252 
3253 /*ARGSUSED2*/
3254 static void
3255 rfs4_op_openattr(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3256     struct compound_state *cs)
3257 {
3258 	OPENATTR4args	*args = &argop->nfs_argop4_u.opopenattr;
3259 	OPENATTR4res	*resp = &resop->nfs_resop4_u.opopenattr;
3260 	vnode_t		*avp = NULL;
3261 	int		lookup_flags = LOOKUP_XATTR, error;
3262 	int		exp_ro = 0;
3263 
3264 	DTRACE_NFSV4_2(op__openattr__start, struct compound_state *, cs,
3265 	    OPENATTR4args *, args);
3266 
3267 	if (cs->vp == NULL) {
3268 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
3269 		goto out;
3270 	}
3271 
3272 	if ((cs->vp->v_vfsp->vfs_flag & VFS_XATTR) == 0 &&
3273 	    !vfs_has_feature(cs->vp->v_vfsp, VFSFT_SYSATTR_VIEWS)) {
3274 		*cs->statusp = resp->status = puterrno4(ENOTSUP);
3275 		goto out;
3276 	}
3277 
3278 	/*
3279 	 * If file system supports passing ACE mask to VOP_ACCESS then
3280 	 * check for ACE_READ_NAMED_ATTRS, otherwise do legacy checks
3281 	 */
3282 
3283 	if (vfs_has_feature(cs->vp->v_vfsp, VFSFT_ACEMASKONACCESS))
3284 		error = VOP_ACCESS(cs->vp, ACE_READ_NAMED_ATTRS,
3285 		    V_ACE_MASK, cs->cr, NULL);
3286 	else
3287 		error = ((VOP_ACCESS(cs->vp, VREAD, 0, cs->cr, NULL) != 0) &&
3288 		    (VOP_ACCESS(cs->vp, VWRITE, 0, cs->cr, NULL) != 0) &&
3289 		    (VOP_ACCESS(cs->vp, VEXEC, 0, cs->cr, NULL) != 0));
3290 
3291 	if (error) {
3292 		*cs->statusp = resp->status = puterrno4(EACCES);
3293 		goto out;
3294 	}
3295 
3296 	/*
3297 	 * The CREATE_XATTR_DIR VOP flag cannot be specified if
3298 	 * the file system is exported read-only -- regardless of
3299 	 * createdir flag.  Otherwise the attrdir would be created
3300 	 * (assuming server fs isn't mounted readonly locally).  If
3301 	 * VOP_LOOKUP returns ENOENT in this case, the error will
3302 	 * be translated into EROFS.  ENOSYS is mapped to ENOTSUP
3303 	 * because specfs has no VOP_LOOKUP op, so the macro would
3304 	 * return ENOSYS.  EINVAL is returned by all (current)
3305 	 * Solaris file system implementations when any of their
3306 	 * restrictions are violated (xattr(dir) can't have xattrdir).
3307 	 * Returning NOTSUPP is more appropriate in this case
3308 	 * because the object will never be able to have an attrdir.
3309 	 */
3310 	if (args->createdir && ! (exp_ro = rdonly4(req, cs)))
3311 		lookup_flags |= CREATE_XATTR_DIR;
3312 
3313 	error = VOP_LOOKUP(cs->vp, "", &avp, NULL, lookup_flags, NULL, cs->cr,
3314 	    NULL, NULL, NULL);
3315 
3316 	if (error) {
3317 		if (error == ENOENT && args->createdir && exp_ro)
3318 			*cs->statusp = resp->status = puterrno4(EROFS);
3319 		else if (error == EINVAL || error == ENOSYS)
3320 			*cs->statusp = resp->status = puterrno4(ENOTSUP);
3321 		else
3322 			*cs->statusp = resp->status = puterrno4(error);
3323 		goto out;
3324 	}
3325 
3326 	ASSERT(avp->v_flag & V_XATTRDIR);
3327 
3328 	error = makefh4(&cs->fh, avp, cs->exi);
3329 
3330 	if (error) {
3331 		VN_RELE(avp);
3332 		*cs->statusp = resp->status = puterrno4(error);
3333 		goto out;
3334 	}
3335 
3336 	VN_RELE(cs->vp);
3337 	cs->vp = avp;
3338 
3339 	/*
3340 	 * There is no requirement for an attrdir fh flag
3341 	 * because the attrdir has a vnode flag to distinguish
3342 	 * it from regular (non-xattr) directories.  The
3343 	 * FH4_ATTRDIR flag is set for future sanity checks.
3344 	 */
3345 	set_fh4_flag(&cs->fh, FH4_ATTRDIR);
3346 	*cs->statusp = resp->status = NFS4_OK;
3347 
3348 out:
3349 	DTRACE_NFSV4_2(op__openattr__done, struct compound_state *, cs,
3350 	    OPENATTR4res *, resp);
3351 }
3352 
3353 static int
3354 do_io(int direction, vnode_t *vp, struct uio *uio, int ioflag, cred_t *cred,
3355     caller_context_t *ct)
3356 {
3357 	int error;
3358 	int i;
3359 	clock_t delaytime;
3360 
3361 	delaytime = MSEC_TO_TICK_ROUNDUP(rfs4_lock_delay);
3362 
3363 	/*
3364 	 * Don't block on mandatory locks. If this routine returns
3365 	 * EAGAIN, the caller should return NFS4ERR_LOCKED.
3366 	 */
3367 	uio->uio_fmode = FNONBLOCK;
3368 
3369 	for (i = 0; i < rfs4_maxlock_tries; i++) {
3370 
3371 
3372 		if (direction == FREAD) {
3373 			(void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, ct);
3374 			error = VOP_READ(vp, uio, ioflag, cred, ct);
3375 			VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, ct);
3376 		} else {
3377 			(void) VOP_RWLOCK(vp, V_WRITELOCK_TRUE, ct);
3378 			error = VOP_WRITE(vp, uio, ioflag, cred, ct);
3379 			VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, ct);
3380 		}
3381 
3382 		if (error != EAGAIN)
3383 			break;
3384 
3385 		if (i < rfs4_maxlock_tries - 1) {
3386 			delay(delaytime);
3387 			delaytime *= 2;
3388 		}
3389 	}
3390 
3391 	return (error);
3392 }
3393 
3394 /* ARGSUSED */
3395 static void
3396 rfs4_op_read(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3397     struct compound_state *cs)
3398 {
3399 	READ4args *args = &argop->nfs_argop4_u.opread;
3400 	READ4res *resp = &resop->nfs_resop4_u.opread;
3401 	int error;
3402 	int verror;
3403 	vnode_t *vp;
3404 	struct vattr va;
3405 	struct iovec iov, *iovp = NULL;
3406 	int iovcnt;
3407 	struct uio uio;
3408 	u_offset_t offset;
3409 	bool_t *deleg = &cs->deleg;
3410 	nfsstat4 stat;
3411 	int in_crit = 0;
3412 	mblk_t *mp = NULL;
3413 	int alloc_err = 0;
3414 	int rdma_used = 0;
3415 	int loaned_buffers;
3416 	caller_context_t ct;
3417 	struct uio *uiop;
3418 
3419 	DTRACE_NFSV4_2(op__read__start, struct compound_state *, cs,
3420 	    READ4args, args);
3421 
3422 	vp = cs->vp;
3423 	if (vp == NULL) {
3424 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
3425 		goto out;
3426 	}
3427 	if (cs->access == CS_ACCESS_DENIED) {
3428 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
3429 		goto out;
3430 	}
3431 
3432 	if ((stat = rfs4_check_stateid(FREAD, vp, &args->stateid, FALSE,
3433 	    deleg, TRUE, &ct, cs)) != NFS4_OK) {
3434 		*cs->statusp = resp->status = stat;
3435 		goto out;
3436 	}
3437 
3438 	/*
3439 	 * Enter the critical region before calling VOP_RWLOCK
3440 	 * to avoid a deadlock with write requests.
3441 	 */
3442 	if (nbl_need_check(vp)) {
3443 		nbl_start_crit(vp, RW_READER);
3444 		in_crit = 1;
3445 		if (nbl_conflict(vp, NBL_READ, args->offset, args->count, 0,
3446 		    &ct)) {
3447 			*cs->statusp = resp->status = NFS4ERR_LOCKED;
3448 			goto out;
3449 		}
3450 	}
3451 
3452 	if (args->wlist) {
3453 		if (args->count > clist_len(args->wlist)) {
3454 			*cs->statusp = resp->status = NFS4ERR_INVAL;
3455 			goto out;
3456 		}
3457 		rdma_used = 1;
3458 	}
3459 
3460 	/* use loaned buffers for TCP */
3461 	loaned_buffers = (nfs_loaned_buffers && !rdma_used) ? 1 : 0;
3462 
3463 	va.va_mask = AT_MODE|AT_SIZE|AT_UID;
3464 	verror = VOP_GETATTR(vp, &va, 0, cs->cr, &ct);
3465 
3466 	/*
3467 	 * If we can't get the attributes, then we can't do the
3468 	 * right access checking.  So, we'll fail the request.
3469 	 */
3470 	if (verror) {
3471 		*cs->statusp = resp->status = puterrno4(verror);
3472 		goto out;
3473 	}
3474 
3475 	if (vp->v_type != VREG) {
3476 		*cs->statusp = resp->status =
3477 		    ((vp->v_type == VDIR) ? NFS4ERR_ISDIR : NFS4ERR_INVAL);
3478 		goto out;
3479 	}
3480 
3481 	if (crgetuid(cs->cr) != va.va_uid &&
3482 	    (error = VOP_ACCESS(vp, VREAD, 0, cs->cr, &ct)) &&
3483 	    (error = VOP_ACCESS(vp, VEXEC, 0, cs->cr, &ct))) {
3484 		*cs->statusp = resp->status = puterrno4(error);
3485 		goto out;
3486 	}
3487 
3488 	if (MANDLOCK(vp, va.va_mode)) { /* XXX - V4 supports mand locking */
3489 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
3490 		goto out;
3491 	}
3492 
3493 	offset = args->offset;
3494 	if (offset >= va.va_size) {
3495 		*cs->statusp = resp->status = NFS4_OK;
3496 		resp->eof = TRUE;
3497 		resp->data_len = 0;
3498 		resp->data_val = NULL;
3499 		resp->mblk = NULL;
3500 		/* RDMA */
3501 		resp->wlist = args->wlist;
3502 		resp->wlist_len = resp->data_len;
3503 		*cs->statusp = resp->status = NFS4_OK;
3504 		if (resp->wlist)
3505 			clist_zero_len(resp->wlist);
3506 		goto out;
3507 	}
3508 
3509 	if (args->count == 0) {
3510 		*cs->statusp = resp->status = NFS4_OK;
3511 		resp->eof = FALSE;
3512 		resp->data_len = 0;
3513 		resp->data_val = NULL;
3514 		resp->mblk = NULL;
3515 		/* RDMA */
3516 		resp->wlist = args->wlist;
3517 		resp->wlist_len = resp->data_len;
3518 		if (resp->wlist)
3519 			clist_zero_len(resp->wlist);
3520 		goto out;
3521 	}
3522 
3523 	/*
3524 	 * Do not allocate memory more than maximum allowed
3525 	 * transfer size
3526 	 */
3527 	if (args->count > rfs4_tsize(req))
3528 		args->count = rfs4_tsize(req);
3529 
3530 	if (loaned_buffers) {
3531 		uiop = (uio_t *)rfs_setup_xuio(vp);
3532 		ASSERT(uiop != NULL);
3533 		uiop->uio_segflg = UIO_SYSSPACE;
3534 		uiop->uio_loffset = args->offset;
3535 		uiop->uio_resid = args->count;
3536 
3537 		/* Jump to do the read if successful */
3538 		if (!VOP_REQZCBUF(vp, UIO_READ, (xuio_t *)uiop, cs->cr, &ct)) {
3539 			/*
3540 			 * Need to hold the vnode until after VOP_RETZCBUF()
3541 			 * is called.
3542 			 */
3543 			VN_HOLD(vp);
3544 			goto doio_read;
3545 		}
3546 
3547 		DTRACE_PROBE2(nfss__i__reqzcbuf_failed, int,
3548 		    uiop->uio_loffset, int, uiop->uio_resid);
3549 
3550 		uiop->uio_extflg = 0;
3551 
3552 		/* failure to setup for zero copy */
3553 		rfs_free_xuio((void *)uiop);
3554 		loaned_buffers = 0;
3555 	}
3556 
3557 	/*
3558 	 * If returning data via RDMA Write, then grab the chunk list. If we
3559 	 * aren't returning READ data w/RDMA_WRITE, then grab a mblk.
3560 	 */
3561 	if (rdma_used) {
3562 		mp = NULL;
3563 		(void) rdma_get_wchunk(req, &iov, args->wlist);
3564 		uio.uio_iov = &iov;
3565 		uio.uio_iovcnt = 1;
3566 	} else {
3567 		/*
3568 		 * mp will contain the data to be sent out in the read reply.
3569 		 * It will be freed after the reply has been sent.
3570 		 */
3571 		mp = rfs_read_alloc(args->count, &iovp, &iovcnt);
3572 		ASSERT(mp != NULL);
3573 		ASSERT(alloc_err == 0);
3574 		uio.uio_iov = iovp;
3575 		uio.uio_iovcnt = iovcnt;
3576 	}
3577 
3578 	uio.uio_segflg = UIO_SYSSPACE;
3579 	uio.uio_extflg = UIO_COPY_CACHED;
3580 	uio.uio_loffset = args->offset;
3581 	uio.uio_resid = args->count;
3582 	uiop = &uio;
3583 
3584 doio_read:
3585 	error = do_io(FREAD, vp, uiop, 0, cs->cr, &ct);
3586 
3587 	va.va_mask = AT_SIZE;
3588 	verror = VOP_GETATTR(vp, &va, 0, cs->cr, &ct);
3589 
3590 	if (error) {
3591 		if (mp)
3592 			freemsg(mp);
3593 		*cs->statusp = resp->status = puterrno4(error);
3594 		goto out;
3595 	}
3596 
3597 	/* make mblk using zc buffers */
3598 	if (loaned_buffers) {
3599 		mp = uio_to_mblk(uiop);
3600 		ASSERT(mp != NULL);
3601 	}
3602 
3603 	*cs->statusp = resp->status = NFS4_OK;
3604 
3605 	ASSERT(uiop->uio_resid >= 0);
3606 	resp->data_len = args->count - uiop->uio_resid;
3607 	if (mp) {
3608 		resp->data_val = (char *)mp->b_datap->db_base;
3609 		rfs_rndup_mblks(mp, resp->data_len, loaned_buffers);
3610 	} else {
3611 		resp->data_val = (caddr_t)iov.iov_base;
3612 	}
3613 
3614 	resp->mblk = mp;
3615 
3616 	if (!verror && offset + resp->data_len == va.va_size)
3617 		resp->eof = TRUE;
3618 	else
3619 		resp->eof = FALSE;
3620 
3621 	if (rdma_used) {
3622 		if (!rdma_setup_read_data4(args, resp)) {
3623 			*cs->statusp = resp->status = NFS4ERR_INVAL;
3624 		}
3625 	} else {
3626 		resp->wlist = NULL;
3627 	}
3628 
3629 out:
3630 	if (in_crit)
3631 		nbl_end_crit(vp);
3632 
3633 	if (iovp != NULL)
3634 		kmem_free(iovp, iovcnt * sizeof (struct iovec));
3635 
3636 	DTRACE_NFSV4_2(op__read__done, struct compound_state *, cs,
3637 	    READ4res *, resp);
3638 }
3639 
3640 static void
3641 rfs4_op_read_free(nfs_resop4 *resop)
3642 {
3643 	READ4res	*resp = &resop->nfs_resop4_u.opread;
3644 
3645 	if (resp->status == NFS4_OK && resp->mblk != NULL) {
3646 		freemsg(resp->mblk);
3647 		resp->mblk = NULL;
3648 		resp->data_val = NULL;
3649 		resp->data_len = 0;
3650 	}
3651 }
3652 
3653 static void
3654 rfs4_op_readdir_free(nfs_resop4 * resop)
3655 {
3656 	READDIR4res    *resp = &resop->nfs_resop4_u.opreaddir;
3657 
3658 	if (resp->status == NFS4_OK && resp->mblk != NULL) {
3659 		freeb(resp->mblk);
3660 		resp->mblk = NULL;
3661 		resp->data_len = 0;
3662 	}
3663 }
3664 
3665 
3666 /* ARGSUSED */
3667 static void
3668 rfs4_op_putpubfh(nfs_argop4 *args, nfs_resop4 *resop, struct svc_req *req,
3669     struct compound_state *cs)
3670 {
3671 	PUTPUBFH4res	*resp = &resop->nfs_resop4_u.opputpubfh;
3672 	int		error;
3673 	vnode_t		*vp;
3674 	struct exportinfo *exi, *sav_exi;
3675 	nfs_fh4_fmt_t	*fh_fmtp;
3676 	nfs_export_t *ne = nfs_get_export();
3677 
3678 	DTRACE_NFSV4_1(op__putpubfh__start, struct compound_state *, cs);
3679 
3680 	if (cs->vp) {
3681 		VN_RELE(cs->vp);
3682 		cs->vp = NULL;
3683 	}
3684 
3685 	if (cs->cr)
3686 		crfree(cs->cr);
3687 
3688 	cs->cr = crdup(cs->basecr);
3689 
3690 	vp = ne->exi_public->exi_vp;
3691 	if (vp == NULL) {
3692 		*cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
3693 		goto out;
3694 	}
3695 
3696 	error = makefh4(&cs->fh, vp, ne->exi_public);
3697 	if (error != 0) {
3698 		*cs->statusp = resp->status = puterrno4(error);
3699 		goto out;
3700 	}
3701 	sav_exi = cs->exi;
3702 	if (ne->exi_public == ne->exi_root) {
3703 		/*
3704 		 * No filesystem is actually shared public, so we default
3705 		 * to exi_root. In this case, we must check whether root
3706 		 * is exported.
3707 		 */
3708 		fh_fmtp = (nfs_fh4_fmt_t *)cs->fh.nfs_fh4_val;
3709 
3710 		/*
3711 		 * if root filesystem is exported, the exportinfo struct that we
3712 		 * should use is what checkexport4 returns, because root_exi is
3713 		 * actually a mostly empty struct.
3714 		 */
3715 		exi = checkexport4(&fh_fmtp->fh4_fsid,
3716 		    (fid_t *)&fh_fmtp->fh4_xlen, NULL);
3717 		cs->exi = ((exi != NULL) ? exi : ne->exi_public);
3718 	} else {
3719 		/*
3720 		 * it's a properly shared filesystem
3721 		 */
3722 		cs->exi = ne->exi_public;
3723 	}
3724 
3725 	if (is_system_labeled()) {
3726 		bslabel_t *clabel;
3727 
3728 		ASSERT(req->rq_label != NULL);
3729 		clabel = req->rq_label;
3730 		DTRACE_PROBE2(tx__rfs4__log__info__opputpubfh__clabel, char *,
3731 		    "got client label from request(1)",
3732 		    struct svc_req *, req);
3733 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
3734 			if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
3735 			    cs->exi)) {
3736 				*cs->statusp = resp->status =
3737 				    NFS4ERR_SERVERFAULT;
3738 				goto out;
3739 			}
3740 		}
3741 	}
3742 
3743 	VN_HOLD(vp);
3744 	cs->vp = vp;
3745 
3746 	if ((resp->status = call_checkauth4(cs, req)) != NFS4_OK) {
3747 		VN_RELE(cs->vp);
3748 		cs->vp = NULL;
3749 		cs->exi = sav_exi;
3750 		goto out;
3751 	}
3752 
3753 	*cs->statusp = resp->status = NFS4_OK;
3754 out:
3755 	DTRACE_NFSV4_2(op__putpubfh__done, struct compound_state *, cs,
3756 	    PUTPUBFH4res *, resp);
3757 }
3758 
3759 /*
3760  * XXX - issue with put*fh operations. Suppose /export/home is exported.
3761  * Suppose an NFS client goes to mount /export/home/joe. If /export, home,
3762  * or joe have restrictive search permissions, then we shouldn't let
3763  * the client get a file handle. This is easy to enforce. However, we
3764  * don't know what security flavor should be used until we resolve the
3765  * path name. Another complication is uid mapping. If root is
3766  * the user, then it will be mapped to the anonymous user by default,
3767  * but we won't know that till we've resolved the path name. And we won't
3768  * know what the anonymous user is.
3769  * Luckily, SECINFO is specified to take a full filename.
3770  * So what we will have to in rfs4_op_lookup is check that flavor of
3771  * the target object matches that of the request, and if root was the
3772  * caller, check for the root= and anon= options, and if necessary,
3773  * repeat the lookup using the right cred_t. But that's not done yet.
3774  */
3775 /* ARGSUSED */
3776 static void
3777 rfs4_op_putfh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3778     struct compound_state *cs)
3779 {
3780 	PUTFH4args *args = &argop->nfs_argop4_u.opputfh;
3781 	PUTFH4res *resp = &resop->nfs_resop4_u.opputfh;
3782 	nfs_fh4_fmt_t *fh_fmtp;
3783 
3784 	DTRACE_NFSV4_2(op__putfh__start, struct compound_state *, cs,
3785 	    PUTFH4args *, args);
3786 
3787 	if (cs->vp) {
3788 		VN_RELE(cs->vp);
3789 		cs->vp = NULL;
3790 	}
3791 
3792 	if (cs->cr) {
3793 		crfree(cs->cr);
3794 		cs->cr = NULL;
3795 	}
3796 
3797 
3798 	if (args->object.nfs_fh4_len < NFS_FH4_LEN) {
3799 		*cs->statusp = resp->status = NFS4ERR_BADHANDLE;
3800 		goto out;
3801 	}
3802 
3803 	fh_fmtp = (nfs_fh4_fmt_t *)args->object.nfs_fh4_val;
3804 	cs->exi = checkexport4(&fh_fmtp->fh4_fsid, (fid_t *)&fh_fmtp->fh4_xlen,
3805 	    NULL);
3806 
3807 	if (cs->exi == NULL) {
3808 		*cs->statusp = resp->status = NFS4ERR_STALE;
3809 		goto out;
3810 	}
3811 
3812 	cs->cr = crdup(cs->basecr);
3813 
3814 	ASSERT(cs->cr != NULL);
3815 
3816 	if (! (cs->vp = nfs4_fhtovp(&args->object, cs->exi, &resp->status))) {
3817 		*cs->statusp = resp->status;
3818 		goto out;
3819 	}
3820 
3821 	if ((resp->status = call_checkauth4(cs, req)) != NFS4_OK) {
3822 		VN_RELE(cs->vp);
3823 		cs->vp = NULL;
3824 		goto out;
3825 	}
3826 
3827 	nfs_fh4_copy(&args->object, &cs->fh);
3828 	*cs->statusp = resp->status = NFS4_OK;
3829 	cs->deleg = FALSE;
3830 
3831 out:
3832 	DTRACE_NFSV4_2(op__putfh__done, struct compound_state *, cs,
3833 	    PUTFH4res *, resp);
3834 }
3835 
3836 /* ARGSUSED */
3837 static void
3838 rfs4_op_putrootfh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3839     struct compound_state *cs)
3840 {
3841 	PUTROOTFH4res *resp = &resop->nfs_resop4_u.opputrootfh;
3842 	int error;
3843 	fid_t fid;
3844 	struct exportinfo *exi, *sav_exi;
3845 
3846 	DTRACE_NFSV4_1(op__putrootfh__start, struct compound_state *, cs);
3847 
3848 	if (cs->vp) {
3849 		VN_RELE(cs->vp);
3850 		cs->vp = NULL;
3851 	}
3852 
3853 	if (cs->cr)
3854 		crfree(cs->cr);
3855 
3856 	cs->cr = crdup(cs->basecr);
3857 
3858 	/*
3859 	 * Using rootdir, the system root vnode,
3860 	 * get its fid.
3861 	 */
3862 	bzero(&fid, sizeof (fid));
3863 	fid.fid_len = MAXFIDSZ;
3864 	error = vop_fid_pseudo(ZONE_ROOTVP(), &fid);
3865 	if (error != 0) {
3866 		*cs->statusp = resp->status = puterrno4(error);
3867 		goto out;
3868 	}
3869 
3870 	/*
3871 	 * Then use the root fsid & fid it to find out if it's exported
3872 	 *
3873 	 * If the server root isn't exported directly, then
3874 	 * it should at least be a pseudo export based on
3875 	 * one or more exports further down in the server's
3876 	 * file tree.
3877 	 */
3878 	exi = checkexport4(&ZONE_ROOTVP()->v_vfsp->vfs_fsid, &fid, NULL);
3879 	if (exi == NULL || exi->exi_export.ex_flags & EX_PUBLIC) {
3880 		NFS4_DEBUG(rfs4_debug,
3881 		    (CE_WARN, "rfs4_op_putrootfh: export check failure"));
3882 		*cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
3883 		goto out;
3884 	}
3885 
3886 	/*
3887 	 * Now make a filehandle based on the root
3888 	 * export and root vnode.
3889 	 */
3890 	error = makefh4(&cs->fh, ZONE_ROOTVP(), exi);
3891 	if (error != 0) {
3892 		*cs->statusp = resp->status = puterrno4(error);
3893 		goto out;
3894 	}
3895 
3896 	sav_exi = cs->exi;
3897 	cs->exi = exi;
3898 
3899 	VN_HOLD(ZONE_ROOTVP());
3900 	cs->vp = ZONE_ROOTVP();
3901 
3902 	if ((resp->status = call_checkauth4(cs, req)) != NFS4_OK) {
3903 		VN_RELE(cs->vp);
3904 		cs->vp = NULL;
3905 		cs->exi = sav_exi;
3906 		goto out;
3907 	}
3908 
3909 	*cs->statusp = resp->status = NFS4_OK;
3910 	cs->deleg = FALSE;
3911 out:
3912 	DTRACE_NFSV4_2(op__putrootfh__done, struct compound_state *, cs,
3913 	    PUTROOTFH4res *, resp);
3914 }
3915 
3916 /*
3917  * readlink: args: CURRENT_FH.
3918  *	res: status. If success - CURRENT_FH unchanged, return linktext.
3919  */
3920 
3921 /* ARGSUSED */
3922 static void
3923 rfs4_op_readlink(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3924     struct compound_state *cs)
3925 {
3926 	READLINK4res *resp = &resop->nfs_resop4_u.opreadlink;
3927 	int error;
3928 	vnode_t *vp;
3929 	struct iovec iov;
3930 	struct vattr va;
3931 	struct uio uio;
3932 	char *data;
3933 	struct sockaddr *ca;
3934 	char *name = NULL;
3935 	int is_referral;
3936 
3937 	DTRACE_NFSV4_1(op__readlink__start, struct compound_state *, cs);
3938 
3939 	/* CURRENT_FH: directory */
3940 	vp = cs->vp;
3941 	if (vp == NULL) {
3942 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
3943 		goto out;
3944 	}
3945 
3946 	if (cs->access == CS_ACCESS_DENIED) {
3947 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
3948 		goto out;
3949 	}
3950 
3951 	/* Is it a referral? */
3952 	if (vn_is_nfs_reparse(vp, cs->cr) && client_is_downrev(req)) {
3953 
3954 		is_referral = 1;
3955 
3956 	} else {
3957 
3958 		is_referral = 0;
3959 
3960 		if (vp->v_type == VDIR) {
3961 			*cs->statusp = resp->status = NFS4ERR_ISDIR;
3962 			goto out;
3963 		}
3964 
3965 		if (vp->v_type != VLNK) {
3966 			*cs->statusp = resp->status = NFS4ERR_INVAL;
3967 			goto out;
3968 		}
3969 
3970 	}
3971 
3972 	va.va_mask = AT_MODE;
3973 	error = VOP_GETATTR(vp, &va, 0, cs->cr, NULL);
3974 	if (error) {
3975 		*cs->statusp = resp->status = puterrno4(error);
3976 		goto out;
3977 	}
3978 
3979 	if (MANDLOCK(vp, va.va_mode)) {
3980 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
3981 		goto out;
3982 	}
3983 
3984 	data = kmem_alloc(MAXPATHLEN + 1, KM_SLEEP);
3985 
3986 	if (is_referral) {
3987 		char *s;
3988 		size_t strsz;
3989 		kstat_named_t *stat =
3990 		    cs->exi->exi_ne->ne_globals->svstat[NFS_V4];
3991 
3992 		/* Get an artificial symlink based on a referral */
3993 		s = build_symlink(vp, cs->cr, &strsz);
3994 		stat[NFS_REFERLINKS].value.ui64++;
3995 		DTRACE_PROBE2(nfs4serv__func__referral__reflink,
3996 		    vnode_t *, vp, char *, s);
3997 		if (s == NULL)
3998 			error = EINVAL;
3999 		else {
4000 			error = 0;
4001 			(void) strlcpy(data, s, MAXPATHLEN + 1);
4002 			kmem_free(s, strsz);
4003 		}
4004 
4005 	} else {
4006 
4007 		iov.iov_base = data;
4008 		iov.iov_len = MAXPATHLEN;
4009 		uio.uio_iov = &iov;
4010 		uio.uio_iovcnt = 1;
4011 		uio.uio_segflg = UIO_SYSSPACE;
4012 		uio.uio_extflg = UIO_COPY_CACHED;
4013 		uio.uio_loffset = 0;
4014 		uio.uio_resid = MAXPATHLEN;
4015 
4016 		error = VOP_READLINK(vp, &uio, cs->cr, NULL);
4017 
4018 		if (!error)
4019 			*(data + MAXPATHLEN - uio.uio_resid) = '\0';
4020 	}
4021 
4022 	if (error) {
4023 		kmem_free((caddr_t)data, (uint_t)MAXPATHLEN + 1);
4024 		*cs->statusp = resp->status = puterrno4(error);
4025 		goto out;
4026 	}
4027 
4028 	ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
4029 	name = nfscmd_convname(ca, cs->exi, data, NFSCMD_CONV_OUTBOUND,
4030 	    MAXPATHLEN  + 1);
4031 
4032 	if (name == NULL) {
4033 		/*
4034 		 * Even though the conversion failed, we return
4035 		 * something. We just don't translate it.
4036 		 */
4037 		name = data;
4038 	}
4039 
4040 	/*
4041 	 * treat link name as data
4042 	 */
4043 	(void) str_to_utf8(name, (utf8string *)&resp->link);
4044 
4045 	if (name != data)
4046 		kmem_free(name, MAXPATHLEN + 1);
4047 	kmem_free((caddr_t)data, (uint_t)MAXPATHLEN + 1);
4048 	*cs->statusp = resp->status = NFS4_OK;
4049 
4050 out:
4051 	DTRACE_NFSV4_2(op__readlink__done, struct compound_state *, cs,
4052 	    READLINK4res *, resp);
4053 }
4054 
4055 static void
4056 rfs4_op_readlink_free(nfs_resop4 *resop)
4057 {
4058 	READLINK4res *resp = &resop->nfs_resop4_u.opreadlink;
4059 	utf8string *symlink = (utf8string *)&resp->link;
4060 
4061 	if (symlink->utf8string_val) {
4062 		UTF8STRING_FREE(*symlink)
4063 	}
4064 }
4065 
4066 /*
4067  * release_lockowner:
4068  *	Release any state associated with the supplied
4069  *	lockowner. Note if any lo_state is holding locks we will not
4070  *	rele that lo_state and thus the lockowner will not be destroyed.
4071  *	A client using lock after the lock owner stateid has been released
4072  *	will suffer the consequence of NFS4ERR_BAD_STATEID and would have
4073  *	to reissue the lock with new_lock_owner set to TRUE.
4074  *	args: lock_owner
4075  *	res:  status
4076  */
4077 /* ARGSUSED */
4078 static void
4079 rfs4_op_release_lockowner(nfs_argop4 *argop, nfs_resop4 *resop,
4080     struct svc_req *req, struct compound_state *cs)
4081 {
4082 	RELEASE_LOCKOWNER4args *ap = &argop->nfs_argop4_u.oprelease_lockowner;
4083 	RELEASE_LOCKOWNER4res *resp = &resop->nfs_resop4_u.oprelease_lockowner;
4084 	rfs4_lockowner_t *lo;
4085 	rfs4_openowner_t *oo;
4086 	rfs4_state_t *sp;
4087 	rfs4_lo_state_t *lsp;
4088 	rfs4_client_t *cp;
4089 	bool_t create = FALSE;
4090 	locklist_t *llist;
4091 	sysid_t sysid;
4092 
4093 	DTRACE_NFSV4_2(op__release__lockowner__start, struct compound_state *,
4094 	    cs, RELEASE_LOCKOWNER4args *, ap);
4095 
4096 	/* Make sure there is a clientid around for this request */
4097 	cp = rfs4_findclient_by_id(ap->lock_owner.clientid, FALSE);
4098 
4099 	if (cp == NULL) {
4100 		*cs->statusp = resp->status =
4101 		    rfs4_check_clientid(&ap->lock_owner.clientid, 0);
4102 		goto out;
4103 	}
4104 	rfs4_client_rele(cp);
4105 
4106 	lo = rfs4_findlockowner(&ap->lock_owner, &create);
4107 	if (lo == NULL) {
4108 		*cs->statusp = resp->status = NFS4_OK;
4109 		goto out;
4110 	}
4111 	ASSERT(lo->rl_client != NULL);
4112 
4113 	/*
4114 	 * Check for EXPIRED client. If so will reap state with in a lease
4115 	 * period or on next set_clientid_confirm step
4116 	 */
4117 	if (rfs4_lease_expired(lo->rl_client)) {
4118 		rfs4_lockowner_rele(lo);
4119 		*cs->statusp = resp->status = NFS4ERR_EXPIRED;
4120 		goto out;
4121 	}
4122 
4123 	/*
4124 	 * If no sysid has been assigned, then no locks exist; just return.
4125 	 */
4126 	rfs4_dbe_lock(lo->rl_client->rc_dbe);
4127 	if (lo->rl_client->rc_sysidt == LM_NOSYSID) {
4128 		rfs4_lockowner_rele(lo);
4129 		rfs4_dbe_unlock(lo->rl_client->rc_dbe);
4130 		goto out;
4131 	}
4132 
4133 	sysid = lo->rl_client->rc_sysidt;
4134 	rfs4_dbe_unlock(lo->rl_client->rc_dbe);
4135 
4136 	/*
4137 	 * Mark the lockowner invalid.
4138 	 */
4139 	rfs4_dbe_hide(lo->rl_dbe);
4140 
4141 	/*
4142 	 * sysid-pid pair should now not be used since the lockowner is
4143 	 * invalid. If the client were to instantiate the lockowner again
4144 	 * it would be assigned a new pid. Thus we can get the list of
4145 	 * current locks.
4146 	 */
4147 
4148 	llist = flk_get_active_locks(sysid, lo->rl_pid);
4149 	/* If we are still holding locks fail */
4150 	if (llist != NULL) {
4151 
4152 		*cs->statusp = resp->status = NFS4ERR_LOCKS_HELD;
4153 
4154 		flk_free_locklist(llist);
4155 		/*
4156 		 * We need to unhide the lockowner so the client can
4157 		 * try it again. The bad thing here is if the client
4158 		 * has a logic error that took it here in the first place
4159 		 * they probably have lost accounting of the locks that it
4160 		 * is holding. So we may have dangling state until the
4161 		 * open owner state is reaped via close. One scenario
4162 		 * that could possibly occur is that the client has
4163 		 * sent the unlock request(s) in separate threads
4164 		 * and has not waited for the replies before sending the
4165 		 * RELEASE_LOCKOWNER request. Presumably, it would expect
4166 		 * and deal appropriately with NFS4ERR_LOCKS_HELD, by
4167 		 * reissuing the request.
4168 		 */
4169 		rfs4_dbe_unhide(lo->rl_dbe);
4170 		rfs4_lockowner_rele(lo);
4171 		goto out;
4172 	}
4173 
4174 	/*
4175 	 * For the corresponding client we need to check each open
4176 	 * owner for any opens that have lockowner state associated
4177 	 * with this lockowner.
4178 	 */
4179 
4180 	rfs4_dbe_lock(lo->rl_client->rc_dbe);
4181 	for (oo = list_head(&lo->rl_client->rc_openownerlist); oo != NULL;
4182 	    oo = list_next(&lo->rl_client->rc_openownerlist, oo)) {
4183 
4184 		rfs4_dbe_lock(oo->ro_dbe);
4185 		for (sp = list_head(&oo->ro_statelist); sp != NULL;
4186 		    sp = list_next(&oo->ro_statelist, sp)) {
4187 
4188 			rfs4_dbe_lock(sp->rs_dbe);
4189 			for (lsp = list_head(&sp->rs_lostatelist);
4190 			    lsp != NULL;
4191 			    lsp = list_next(&sp->rs_lostatelist, lsp)) {
4192 				if (lsp->rls_locker == lo) {
4193 					rfs4_dbe_lock(lsp->rls_dbe);
4194 					rfs4_dbe_invalidate(lsp->rls_dbe);
4195 					rfs4_dbe_unlock(lsp->rls_dbe);
4196 				}
4197 			}
4198 			rfs4_dbe_unlock(sp->rs_dbe);
4199 		}
4200 		rfs4_dbe_unlock(oo->ro_dbe);
4201 	}
4202 	rfs4_dbe_unlock(lo->rl_client->rc_dbe);
4203 
4204 	rfs4_lockowner_rele(lo);
4205 
4206 	*cs->statusp = resp->status = NFS4_OK;
4207 
4208 out:
4209 	DTRACE_NFSV4_2(op__release__lockowner__done, struct compound_state *,
4210 	    cs, RELEASE_LOCKOWNER4res *, resp);
4211 }
4212 
4213 /*
4214  * short utility function to lookup a file and recall the delegation
4215  */
4216 static rfs4_file_t *
4217 rfs4_lookup_and_findfile(vnode_t *dvp, char *nm, vnode_t **vpp,
4218     int *lkup_error, cred_t *cr)
4219 {
4220 	vnode_t *vp;
4221 	rfs4_file_t *fp = NULL;
4222 	bool_t fcreate = FALSE;
4223 	int error;
4224 
4225 	if (vpp)
4226 		*vpp = NULL;
4227 
4228 	if ((error = VOP_LOOKUP(dvp, nm, &vp, NULL, 0, NULL, cr, NULL, NULL,
4229 	    NULL)) == 0) {
4230 		if (vp->v_type == VREG)
4231 			fp = rfs4_findfile(vp, NULL, &fcreate);
4232 		if (vpp)
4233 			*vpp = vp;
4234 		else
4235 			VN_RELE(vp);
4236 	}
4237 
4238 	if (lkup_error)
4239 		*lkup_error = error;
4240 
4241 	return (fp);
4242 }
4243 
4244 /*
4245  * remove: args: CURRENT_FH: directory; name.
4246  *	res: status. If success - CURRENT_FH unchanged, return change_info
4247  *		for directory.
4248  */
4249 /* ARGSUSED */
4250 static void
4251 rfs4_op_remove(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
4252     struct compound_state *cs)
4253 {
4254 	REMOVE4args *args = &argop->nfs_argop4_u.opremove;
4255 	REMOVE4res *resp = &resop->nfs_resop4_u.opremove;
4256 	int error;
4257 	vnode_t *dvp, *vp;
4258 	struct vattr bdva, idva, adva;
4259 	char *nm;
4260 	uint_t len;
4261 	rfs4_file_t *fp;
4262 	int in_crit = 0;
4263 	bslabel_t *clabel;
4264 	struct sockaddr *ca;
4265 	char *name = NULL;
4266 	nfsstat4 status;
4267 
4268 	DTRACE_NFSV4_2(op__remove__start, struct compound_state *, cs,
4269 	    REMOVE4args *, args);
4270 
4271 	/* CURRENT_FH: directory */
4272 	dvp = cs->vp;
4273 	if (dvp == NULL) {
4274 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
4275 		goto out;
4276 	}
4277 
4278 	if (cs->access == CS_ACCESS_DENIED) {
4279 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
4280 		goto out;
4281 	}
4282 
4283 	/*
4284 	 * If there is an unshared filesystem mounted on this vnode,
4285 	 * Do not allow to remove anything in this directory.
4286 	 */
4287 	if (vn_ismntpt(dvp)) {
4288 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
4289 		goto out;
4290 	}
4291 
4292 	if (dvp->v_type != VDIR) {
4293 		*cs->statusp = resp->status = NFS4ERR_NOTDIR;
4294 		goto out;
4295 	}
4296 
4297 	status = utf8_dir_verify(&args->target);
4298 	if (status != NFS4_OK) {
4299 		*cs->statusp = resp->status = status;
4300 		goto out;
4301 	}
4302 
4303 	/*
4304 	 * Lookup the file so that we can check if it's a directory
4305 	 */
4306 	nm = utf8_to_fn(&args->target, &len, NULL);
4307 	if (nm == NULL) {
4308 		*cs->statusp = resp->status = NFS4ERR_INVAL;
4309 		goto out;
4310 	}
4311 
4312 	if (len > MAXNAMELEN) {
4313 		*cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
4314 		kmem_free(nm, len);
4315 		goto out;
4316 	}
4317 
4318 	if (rdonly4(req, cs)) {
4319 		*cs->statusp = resp->status = NFS4ERR_ROFS;
4320 		kmem_free(nm, len);
4321 		goto out;
4322 	}
4323 
4324 	ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
4325 	name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
4326 	    MAXPATHLEN  + 1);
4327 
4328 	if (name == NULL) {
4329 		*cs->statusp = resp->status = NFS4ERR_INVAL;
4330 		kmem_free(nm, len);
4331 		goto out;
4332 	}
4333 
4334 	/*
4335 	 * Lookup the file to determine type and while we are see if
4336 	 * there is a file struct around and check for delegation.
4337 	 * We don't need to acquire va_seq before this lookup, if
4338 	 * it causes an update, cinfo.before will not match, which will
4339 	 * trigger a cache flush even if atomic is TRUE.
4340 	 */
4341 	fp = rfs4_lookup_and_findfile(dvp, name, &vp, &error, cs->cr);
4342 	if (fp != NULL) {
4343 		if (rfs4_check_delegated_byfp(FWRITE, fp, TRUE, TRUE, TRUE,
4344 		    NULL)) {
4345 			VN_RELE(vp);
4346 			rfs4_file_rele(fp);
4347 			*cs->statusp = resp->status = NFS4ERR_DELAY;
4348 			if (nm != name)
4349 				kmem_free(name, MAXPATHLEN + 1);
4350 			kmem_free(nm, len);
4351 			goto out;
4352 		}
4353 	}
4354 
4355 	/* Didn't find anything to remove */
4356 	if (vp == NULL) {
4357 		*cs->statusp = resp->status = error;
4358 		if (nm != name)
4359 			kmem_free(name, MAXPATHLEN + 1);
4360 		kmem_free(nm, len);
4361 		goto out;
4362 	}
4363 
4364 	if (nbl_need_check(vp)) {
4365 		nbl_start_crit(vp, RW_READER);
4366 		in_crit = 1;
4367 		if (nbl_conflict(vp, NBL_REMOVE, 0, 0, 0, NULL)) {
4368 			*cs->statusp = resp->status = NFS4ERR_FILE_OPEN;
4369 			if (nm != name)
4370 				kmem_free(name, MAXPATHLEN + 1);
4371 			kmem_free(nm, len);
4372 			nbl_end_crit(vp);
4373 			VN_RELE(vp);
4374 			if (fp) {
4375 				rfs4_clear_dont_grant(fp);
4376 				rfs4_file_rele(fp);
4377 			}
4378 			goto out;
4379 		}
4380 	}
4381 
4382 	/* check label before allowing removal */
4383 	if (is_system_labeled()) {
4384 		ASSERT(req->rq_label != NULL);
4385 		clabel = req->rq_label;
4386 		DTRACE_PROBE2(tx__rfs4__log__info__opremove__clabel, char *,
4387 		    "got client label from request(1)",
4388 		    struct svc_req *, req);
4389 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
4390 			if (!do_rfs_label_check(clabel, vp, EQUALITY_CHECK,
4391 			    cs->exi)) {
4392 				*cs->statusp = resp->status = NFS4ERR_ACCESS;
4393 				if (name != nm)
4394 					kmem_free(name, MAXPATHLEN + 1);
4395 				kmem_free(nm, len);
4396 				if (in_crit)
4397 					nbl_end_crit(vp);
4398 				VN_RELE(vp);
4399 				if (fp) {
4400 					rfs4_clear_dont_grant(fp);
4401 					rfs4_file_rele(fp);
4402 				}
4403 				goto out;
4404 			}
4405 		}
4406 	}
4407 
4408 	/* Get dir "before" change value */
4409 	bdva.va_mask = AT_CTIME|AT_SEQ;
4410 	error = VOP_GETATTR(dvp, &bdva, 0, cs->cr, NULL);
4411 	if (error) {
4412 		*cs->statusp = resp->status = puterrno4(error);
4413 		if (nm != name)
4414 			kmem_free(name, MAXPATHLEN + 1);
4415 		kmem_free(nm, len);
4416 		if (in_crit)
4417 			nbl_end_crit(vp);
4418 		VN_RELE(vp);
4419 		if (fp) {
4420 			rfs4_clear_dont_grant(fp);
4421 			rfs4_file_rele(fp);
4422 		}
4423 		goto out;
4424 	}
4425 	NFS4_SET_FATTR4_CHANGE(resp->cinfo.before, bdva.va_ctime)
4426 
4427 	/* Actually do the REMOVE operation */
4428 	if (vp->v_type == VDIR) {
4429 		/*
4430 		 * Can't remove a directory that has a mounted-on filesystem.
4431 		 */
4432 		if (vn_ismntpt(vp)) {
4433 			error = EACCES;
4434 		} else {
4435 			/*
4436 			 * System V defines rmdir to return EEXIST,
4437 			 * not ENOTEMPTY, if the directory is not
4438 			 * empty.  A System V NFS server needs to map
4439 			 * NFS4ERR_EXIST to NFS4ERR_NOTEMPTY to
4440 			 * transmit over the wire.
4441 			 */
4442 			if ((error = VOP_RMDIR(dvp, name, ZONE_ROOTVP(), cs->cr,
4443 			    NULL, 0)) == EEXIST)
4444 				error = ENOTEMPTY;
4445 		}
4446 	} else {
4447 		if ((error = VOP_REMOVE(dvp, name, cs->cr, NULL, 0)) == 0 &&
4448 		    fp != NULL) {
4449 			struct vattr va;
4450 			vnode_t *tvp;
4451 
4452 			rfs4_dbe_lock(fp->rf_dbe);
4453 			tvp = fp->rf_vp;
4454 			if (tvp)
4455 				VN_HOLD(tvp);
4456 			rfs4_dbe_unlock(fp->rf_dbe);
4457 
4458 			if (tvp) {
4459 				/*
4460 				 * This is va_seq safe because we are not
4461 				 * manipulating dvp.
4462 				 */
4463 				va.va_mask = AT_NLINK;
4464 				if (!VOP_GETATTR(tvp, &va, 0, cs->cr, NULL) &&
4465 				    va.va_nlink == 0) {
4466 					/* Remove state on file remove */
4467 					if (in_crit) {
4468 						nbl_end_crit(vp);
4469 						in_crit = 0;
4470 					}
4471 					rfs4_close_all_state(fp);
4472 				}
4473 				VN_RELE(tvp);
4474 			}
4475 		}
4476 	}
4477 
4478 	if (in_crit)
4479 		nbl_end_crit(vp);
4480 	VN_RELE(vp);
4481 
4482 	if (fp) {
4483 		rfs4_clear_dont_grant(fp);
4484 		rfs4_file_rele(fp);
4485 	}
4486 	if (nm != name)
4487 		kmem_free(name, MAXPATHLEN + 1);
4488 	kmem_free(nm, len);
4489 
4490 	if (error) {
4491 		*cs->statusp = resp->status = puterrno4(error);
4492 		goto out;
4493 	}
4494 
4495 	/*
4496 	 * Get the initial "after" sequence number, if it fails, set to zero
4497 	 */
4498 	idva.va_mask = AT_SEQ;
4499 	if (VOP_GETATTR(dvp, &idva, 0, cs->cr, NULL))
4500 		idva.va_seq = 0;
4501 
4502 	/*
4503 	 * Force modified data and metadata out to stable storage.
4504 	 */
4505 	(void) VOP_FSYNC(dvp, 0, cs->cr, NULL);
4506 
4507 	/*
4508 	 * Get "after" change value, if it fails, simply return the
4509 	 * before value.
4510 	 */
4511 	adva.va_mask = AT_CTIME|AT_SEQ;
4512 	if (VOP_GETATTR(dvp, &adva, 0, cs->cr, NULL)) {
4513 		adva.va_ctime = bdva.va_ctime;
4514 		adva.va_seq = 0;
4515 	}
4516 
4517 	NFS4_SET_FATTR4_CHANGE(resp->cinfo.after, adva.va_ctime)
4518 
4519 	/*
4520 	 * The cinfo.atomic = TRUE only if we have
4521 	 * non-zero va_seq's, and it has incremented by exactly one
4522 	 * during the VOP_REMOVE/RMDIR and it didn't change during
4523 	 * the VOP_FSYNC.
4524 	 */
4525 	if (bdva.va_seq && idva.va_seq && adva.va_seq &&
4526 	    idva.va_seq == (bdva.va_seq + 1) && idva.va_seq == adva.va_seq)
4527 		resp->cinfo.atomic = TRUE;
4528 	else
4529 		resp->cinfo.atomic = FALSE;
4530 
4531 	*cs->statusp = resp->status = NFS4_OK;
4532 
4533 out:
4534 	DTRACE_NFSV4_2(op__remove__done, struct compound_state *, cs,
4535 	    REMOVE4res *, resp);
4536 }
4537 
4538 /*
4539  * rename: args: SAVED_FH: from directory, CURRENT_FH: target directory,
4540  *		oldname and newname.
4541  *	res: status. If success - CURRENT_FH unchanged, return change_info
4542  *		for both from and target directories.
4543  */
4544 /* ARGSUSED */
4545 static void
4546 rfs4_op_rename(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
4547     struct compound_state *cs)
4548 {
4549 	RENAME4args *args = &argop->nfs_argop4_u.oprename;
4550 	RENAME4res *resp = &resop->nfs_resop4_u.oprename;
4551 	int error;
4552 	vnode_t *odvp;
4553 	vnode_t *ndvp;
4554 	vnode_t *srcvp, *targvp, *tvp;
4555 	struct vattr obdva, oidva, oadva;
4556 	struct vattr nbdva, nidva, nadva;
4557 	char *onm, *nnm;
4558 	uint_t olen, nlen;
4559 	rfs4_file_t *fp, *sfp;
4560 	int in_crit_src, in_crit_targ;
4561 	int fp_rele_grant_hold, sfp_rele_grant_hold;
4562 	int unlinked;
4563 	bslabel_t *clabel;
4564 	struct sockaddr *ca;
4565 	char *converted_onm = NULL;
4566 	char *converted_nnm = NULL;
4567 	nfsstat4 status;
4568 
4569 	DTRACE_NFSV4_2(op__rename__start, struct compound_state *, cs,
4570 	    RENAME4args *, args);
4571 
4572 	fp = sfp = NULL;
4573 	srcvp = targvp = tvp = NULL;
4574 	in_crit_src = in_crit_targ = 0;
4575 	fp_rele_grant_hold = sfp_rele_grant_hold = 0;
4576 	unlinked = 0;
4577 
4578 	/* CURRENT_FH: target directory */
4579 	ndvp = cs->vp;
4580 	if (ndvp == NULL) {
4581 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
4582 		goto out;
4583 	}
4584 
4585 	/* SAVED_FH: from directory */
4586 	odvp = cs->saved_vp;
4587 	if (odvp == NULL) {
4588 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
4589 		goto out;
4590 	}
4591 
4592 	if (cs->access == CS_ACCESS_DENIED) {
4593 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
4594 		goto out;
4595 	}
4596 
4597 	/*
4598 	 * If there is an unshared filesystem mounted on this vnode,
4599 	 * do not allow to rename objects in this directory.
4600 	 */
4601 	if (vn_ismntpt(odvp)) {
4602 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
4603 		goto out;
4604 	}
4605 
4606 	/*
4607 	 * If there is an unshared filesystem mounted on this vnode,
4608 	 * do not allow to rename to this directory.
4609 	 */
4610 	if (vn_ismntpt(ndvp)) {
4611 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
4612 		goto out;
4613 	}
4614 
4615 	if (odvp->v_type != VDIR || ndvp->v_type != VDIR) {
4616 		*cs->statusp = resp->status = NFS4ERR_NOTDIR;
4617 		goto out;
4618 	}
4619 
4620 	if (cs->saved_exi != cs->exi) {
4621 		*cs->statusp = resp->status = NFS4ERR_XDEV;
4622 		goto out;
4623 	}
4624 
4625 	status = utf8_dir_verify(&args->oldname);
4626 	if (status != NFS4_OK) {
4627 		*cs->statusp = resp->status = status;
4628 		goto out;
4629 	}
4630 
4631 	status = utf8_dir_verify(&args->newname);
4632 	if (status != NFS4_OK) {
4633 		*cs->statusp = resp->status = status;
4634 		goto out;
4635 	}
4636 
4637 	onm = utf8_to_fn(&args->oldname, &olen, NULL);
4638 	if (onm == NULL) {
4639 		*cs->statusp = resp->status = NFS4ERR_INVAL;
4640 		goto out;
4641 	}
4642 	ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
4643 	nlen = MAXPATHLEN + 1;
4644 	converted_onm = nfscmd_convname(ca, cs->exi, onm, NFSCMD_CONV_INBOUND,
4645 	    nlen);
4646 
4647 	if (converted_onm == NULL) {
4648 		*cs->statusp = resp->status = NFS4ERR_INVAL;
4649 		kmem_free(onm, olen);
4650 		goto out;
4651 	}
4652 
4653 	nnm = utf8_to_fn(&args->newname, &nlen, NULL);
4654 	if (nnm == NULL) {
4655 		*cs->statusp = resp->status = NFS4ERR_INVAL;
4656 		if (onm != converted_onm)
4657 			kmem_free(converted_onm, MAXPATHLEN + 1);
4658 		kmem_free(onm, olen);
4659 		goto out;
4660 	}
4661 	converted_nnm = nfscmd_convname(ca, cs->exi, nnm, NFSCMD_CONV_INBOUND,
4662 	    MAXPATHLEN  + 1);
4663 
4664 	if (converted_nnm == NULL) {
4665 		*cs->statusp = resp->status = NFS4ERR_INVAL;
4666 		kmem_free(nnm, nlen);
4667 		nnm = NULL;
4668 		if (onm != converted_onm)
4669 			kmem_free(converted_onm, MAXPATHLEN + 1);
4670 		kmem_free(onm, olen);
4671 		goto out;
4672 	}
4673 
4674 
4675 	if (olen > MAXNAMELEN || nlen > MAXNAMELEN) {
4676 		*cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
4677 		kmem_free(onm, olen);
4678 		kmem_free(nnm, nlen);
4679 		goto out;
4680 	}
4681 
4682 
4683 	if (rdonly4(req, cs)) {
4684 		*cs->statusp = resp->status = NFS4ERR_ROFS;
4685 		if (onm != converted_onm)
4686 			kmem_free(converted_onm, MAXPATHLEN + 1);
4687 		kmem_free(onm, olen);
4688 		if (nnm != converted_nnm)
4689 			kmem_free(converted_nnm, MAXPATHLEN + 1);
4690 		kmem_free(nnm, nlen);
4691 		goto out;
4692 	}
4693 
4694 	/* check label of the target dir */
4695 	if (is_system_labeled()) {
4696 		ASSERT(req->rq_label != NULL);
4697 		clabel = req->rq_label;
4698 		DTRACE_PROBE2(tx__rfs4__log__info__oprename__clabel, char *,
4699 		    "got client label from request(1)",
4700 		    struct svc_req *, req);
4701 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
4702 			if (!do_rfs_label_check(clabel, ndvp,
4703 			    EQUALITY_CHECK, cs->exi)) {
4704 				*cs->statusp = resp->status = NFS4ERR_ACCESS;
4705 				goto err_out;
4706 			}
4707 		}
4708 	}
4709 
4710 	/*
4711 	 * Is the source a file and have a delegation?
4712 	 * We don't need to acquire va_seq before these lookups, if
4713 	 * it causes an update, cinfo.before will not match, which will
4714 	 * trigger a cache flush even if atomic is TRUE.
4715 	 */
4716 	sfp = rfs4_lookup_and_findfile(odvp, converted_onm, &srcvp,
4717 	    &error, cs->cr);
4718 	if (sfp != NULL) {
4719 		if (rfs4_check_delegated_byfp(FWRITE, sfp, TRUE, TRUE, TRUE,
4720 		    NULL)) {
4721 			*cs->statusp = resp->status = NFS4ERR_DELAY;
4722 			goto err_out;
4723 		}
4724 	}
4725 
4726 	if (srcvp == NULL) {
4727 		*cs->statusp = resp->status = puterrno4(error);
4728 		if (onm != converted_onm)
4729 			kmem_free(converted_onm, MAXPATHLEN + 1);
4730 		kmem_free(onm, olen);
4731 		if (nnm != converted_nnm)
4732 			kmem_free(converted_nnm, MAXPATHLEN + 1);
4733 		kmem_free(nnm, nlen);
4734 		goto out;
4735 	}
4736 
4737 	sfp_rele_grant_hold = 1;
4738 
4739 	/* Does the destination exist and a file and have a delegation? */
4740 	fp = rfs4_lookup_and_findfile(ndvp, converted_nnm, &targvp, NULL,
4741 	    cs->cr);
4742 	if (fp != NULL) {
4743 		if (rfs4_check_delegated_byfp(FWRITE, fp, TRUE, TRUE, TRUE,
4744 		    NULL)) {
4745 			*cs->statusp = resp->status = NFS4ERR_DELAY;
4746 			goto err_out;
4747 		}
4748 	}
4749 	fp_rele_grant_hold = 1;
4750 
4751 	/* Check for NBMAND lock on both source and target */
4752 	if (nbl_need_check(srcvp)) {
4753 		nbl_start_crit(srcvp, RW_READER);
4754 		in_crit_src = 1;
4755 		if (nbl_conflict(srcvp, NBL_RENAME, 0, 0, 0, NULL)) {
4756 			*cs->statusp = resp->status = NFS4ERR_FILE_OPEN;
4757 			goto err_out;
4758 		}
4759 	}
4760 
4761 	if (targvp && nbl_need_check(targvp)) {
4762 		nbl_start_crit(targvp, RW_READER);
4763 		in_crit_targ = 1;
4764 		if (nbl_conflict(targvp, NBL_REMOVE, 0, 0, 0, NULL)) {
4765 			*cs->statusp = resp->status = NFS4ERR_FILE_OPEN;
4766 			goto err_out;
4767 		}
4768 	}
4769 
4770 	/* Get source "before" change value */
4771 	obdva.va_mask = AT_CTIME|AT_SEQ;
4772 	error = VOP_GETATTR(odvp, &obdva, 0, cs->cr, NULL);
4773 	if (!error) {
4774 		nbdva.va_mask = AT_CTIME|AT_SEQ;
4775 		error = VOP_GETATTR(ndvp, &nbdva, 0, cs->cr, NULL);
4776 	}
4777 	if (error) {
4778 		*cs->statusp = resp->status = puterrno4(error);
4779 		goto err_out;
4780 	}
4781 
4782 	NFS4_SET_FATTR4_CHANGE(resp->source_cinfo.before, obdva.va_ctime)
4783 	NFS4_SET_FATTR4_CHANGE(resp->target_cinfo.before, nbdva.va_ctime)
4784 
4785 	error = VOP_RENAME(odvp, converted_onm, ndvp, converted_nnm, cs->cr,
4786 	    NULL, 0);
4787 
4788 	/*
4789 	 * If target existed and was unlinked by VOP_RENAME, state will need
4790 	 * closed. To avoid deadlock, rfs4_close_all_state will be done after
4791 	 * any necessary nbl_end_crit on srcvp and tgtvp.
4792 	 */
4793 	if (error == 0 && fp != NULL) {
4794 		rfs4_dbe_lock(fp->rf_dbe);
4795 		tvp = fp->rf_vp;
4796 		if (tvp)
4797 			VN_HOLD(tvp);
4798 		rfs4_dbe_unlock(fp->rf_dbe);
4799 
4800 		if (tvp) {
4801 			struct vattr va;
4802 			va.va_mask = AT_NLINK;
4803 
4804 			if (!VOP_GETATTR(tvp, &va, 0, cs->cr, NULL) &&
4805 			    va.va_nlink == 0) {
4806 				unlinked = 1;
4807 
4808 				/* DEBUG data */
4809 				if ((srcvp == targvp) || (tvp != targvp)) {
4810 					cmn_err(CE_WARN, "rfs4_op_rename: "
4811 					    "srcvp %p, targvp: %p, tvp: %p",
4812 					    (void *)srcvp, (void *)targvp,
4813 					    (void *)tvp);
4814 				}
4815 			} else {
4816 				VN_RELE(tvp);
4817 			}
4818 		}
4819 	}
4820 	if (error == 0)
4821 		vn_renamepath(ndvp, srcvp, nnm, nlen - 1);
4822 
4823 	if (in_crit_src)
4824 		nbl_end_crit(srcvp);
4825 	if (srcvp)
4826 		VN_RELE(srcvp);
4827 	if (in_crit_targ)
4828 		nbl_end_crit(targvp);
4829 	if (targvp)
4830 		VN_RELE(targvp);
4831 
4832 	if (unlinked) {
4833 		ASSERT(fp != NULL);
4834 		ASSERT(tvp != NULL);
4835 
4836 		/* DEBUG data */
4837 		if (RW_READ_HELD(&tvp->v_nbllock)) {
4838 			cmn_err(CE_WARN, "rfs4_op_rename: "
4839 			    "RW_READ_HELD(%p)", (void *)tvp);
4840 		}
4841 
4842 		/* The file is gone and so should the state */
4843 		rfs4_close_all_state(fp);
4844 		VN_RELE(tvp);
4845 	}
4846 
4847 	if (sfp) {
4848 		rfs4_clear_dont_grant(sfp);
4849 		rfs4_file_rele(sfp);
4850 	}
4851 	if (fp) {
4852 		rfs4_clear_dont_grant(fp);
4853 		rfs4_file_rele(fp);
4854 	}
4855 
4856 	if (converted_onm != onm)
4857 		kmem_free(converted_onm, MAXPATHLEN + 1);
4858 	kmem_free(onm, olen);
4859 	if (converted_nnm != nnm)
4860 		kmem_free(converted_nnm, MAXPATHLEN + 1);
4861 	kmem_free(nnm, nlen);
4862 
4863 	/*
4864 	 * Get the initial "after" sequence number, if it fails, set to zero
4865 	 */
4866 	oidva.va_mask = AT_SEQ;
4867 	if (VOP_GETATTR(odvp, &oidva, 0, cs->cr, NULL))
4868 		oidva.va_seq = 0;
4869 
4870 	nidva.va_mask = AT_SEQ;
4871 	if (VOP_GETATTR(ndvp, &nidva, 0, cs->cr, NULL))
4872 		nidva.va_seq = 0;
4873 
4874 	/*
4875 	 * Force modified data and metadata out to stable storage.
4876 	 */
4877 	(void) VOP_FSYNC(odvp, 0, cs->cr, NULL);
4878 	(void) VOP_FSYNC(ndvp, 0, cs->cr, NULL);
4879 
4880 	if (error) {
4881 		*cs->statusp = resp->status = puterrno4(error);
4882 		goto out;
4883 	}
4884 
4885 	/*
4886 	 * Get "after" change values, if it fails, simply return the
4887 	 * before value.
4888 	 */
4889 	oadva.va_mask = AT_CTIME|AT_SEQ;
4890 	if (VOP_GETATTR(odvp, &oadva, 0, cs->cr, NULL)) {
4891 		oadva.va_ctime = obdva.va_ctime;
4892 		oadva.va_seq = 0;
4893 	}
4894 
4895 	nadva.va_mask = AT_CTIME|AT_SEQ;
4896 	if (VOP_GETATTR(odvp, &nadva, 0, cs->cr, NULL)) {
4897 		nadva.va_ctime = nbdva.va_ctime;
4898 		nadva.va_seq = 0;
4899 	}
4900 
4901 	NFS4_SET_FATTR4_CHANGE(resp->source_cinfo.after, oadva.va_ctime)
4902 	NFS4_SET_FATTR4_CHANGE(resp->target_cinfo.after, nadva.va_ctime)
4903 
4904 	/*
4905 	 * The cinfo.atomic = TRUE only if we have
4906 	 * non-zero va_seq's, and it has incremented by exactly one
4907 	 * during the VOP_RENAME and it didn't change during the VOP_FSYNC.
4908 	 */
4909 	if (obdva.va_seq && oidva.va_seq && oadva.va_seq &&
4910 	    oidva.va_seq == (obdva.va_seq + 1) && oidva.va_seq == oadva.va_seq)
4911 		resp->source_cinfo.atomic = TRUE;
4912 	else
4913 		resp->source_cinfo.atomic = FALSE;
4914 
4915 	if (nbdva.va_seq && nidva.va_seq && nadva.va_seq &&
4916 	    nidva.va_seq == (nbdva.va_seq + 1) && nidva.va_seq == nadva.va_seq)
4917 		resp->target_cinfo.atomic = TRUE;
4918 	else
4919 		resp->target_cinfo.atomic = FALSE;
4920 
4921 #ifdef	VOLATILE_FH_TEST
4922 	{
4923 	extern void add_volrnm_fh(struct exportinfo *, vnode_t *);
4924 
4925 	/*
4926 	 * Add the renamed file handle to the volatile rename list
4927 	 */
4928 	if (cs->exi->exi_export.ex_flags & EX_VOLRNM) {
4929 		/* file handles may expire on rename */
4930 		vnode_t *vp;
4931 
4932 		nnm = utf8_to_fn(&args->newname, &nlen, NULL);
4933 		/*
4934 		 * Already know that nnm will be a valid string
4935 		 */
4936 		error = VOP_LOOKUP(ndvp, nnm, &vp, NULL, 0, NULL, cs->cr,
4937 		    NULL, NULL, NULL);
4938 		kmem_free(nnm, nlen);
4939 		if (!error) {
4940 			add_volrnm_fh(cs->exi, vp);
4941 			VN_RELE(vp);
4942 		}
4943 	}
4944 	}
4945 #endif	/* VOLATILE_FH_TEST */
4946 
4947 	*cs->statusp = resp->status = NFS4_OK;
4948 out:
4949 	DTRACE_NFSV4_2(op__rename__done, struct compound_state *, cs,
4950 	    RENAME4res *, resp);
4951 	return;
4952 
4953 err_out:
4954 	if (onm != converted_onm)
4955 		kmem_free(converted_onm, MAXPATHLEN + 1);
4956 	if (onm != NULL)
4957 		kmem_free(onm, olen);
4958 	if (nnm != converted_nnm)
4959 		kmem_free(converted_nnm, MAXPATHLEN + 1);
4960 	if (nnm != NULL)
4961 		kmem_free(nnm, nlen);
4962 
4963 	if (in_crit_src) nbl_end_crit(srcvp);
4964 	if (in_crit_targ) nbl_end_crit(targvp);
4965 	if (targvp) VN_RELE(targvp);
4966 	if (srcvp) VN_RELE(srcvp);
4967 	if (sfp) {
4968 		if (sfp_rele_grant_hold) rfs4_clear_dont_grant(sfp);
4969 		rfs4_file_rele(sfp);
4970 	}
4971 	if (fp) {
4972 		if (fp_rele_grant_hold) rfs4_clear_dont_grant(fp);
4973 		rfs4_file_rele(fp);
4974 	}
4975 
4976 	DTRACE_NFSV4_2(op__rename__done, struct compound_state *, cs,
4977 	    RENAME4res *, resp);
4978 }
4979 
4980 /* ARGSUSED */
4981 static void
4982 rfs4_op_renew(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
4983     struct compound_state *cs)
4984 {
4985 	RENEW4args *args = &argop->nfs_argop4_u.oprenew;
4986 	RENEW4res *resp = &resop->nfs_resop4_u.oprenew;
4987 	rfs4_client_t *cp;
4988 
4989 	DTRACE_NFSV4_2(op__renew__start, struct compound_state *, cs,
4990 	    RENEW4args *, args);
4991 
4992 	if ((cp = rfs4_findclient_by_id(args->clientid, FALSE)) == NULL) {
4993 		*cs->statusp = resp->status =
4994 		    rfs4_check_clientid(&args->clientid, 0);
4995 		goto out;
4996 	}
4997 
4998 	if (rfs4_lease_expired(cp)) {
4999 		rfs4_client_rele(cp);
5000 		*cs->statusp = resp->status = NFS4ERR_EXPIRED;
5001 		goto out;
5002 	}
5003 
5004 	rfs4_update_lease(cp);
5005 
5006 	mutex_enter(cp->rc_cbinfo.cb_lock);
5007 	if (cp->rc_cbinfo.cb_notified_of_cb_path_down == FALSE) {
5008 		cp->rc_cbinfo.cb_notified_of_cb_path_down = TRUE;
5009 		*cs->statusp = resp->status = NFS4ERR_CB_PATH_DOWN;
5010 	} else {
5011 		*cs->statusp = resp->status = NFS4_OK;
5012 	}
5013 	mutex_exit(cp->rc_cbinfo.cb_lock);
5014 
5015 	rfs4_client_rele(cp);
5016 
5017 out:
5018 	DTRACE_NFSV4_2(op__renew__done, struct compound_state *, cs,
5019 	    RENEW4res *, resp);
5020 }
5021 
5022 /* ARGSUSED */
5023 static void
5024 rfs4_op_restorefh(nfs_argop4 *args, nfs_resop4 *resop, struct svc_req *req,
5025     struct compound_state *cs)
5026 {
5027 	RESTOREFH4res *resp = &resop->nfs_resop4_u.oprestorefh;
5028 
5029 	DTRACE_NFSV4_1(op__restorefh__start, struct compound_state *, cs);
5030 
5031 	/* No need to check cs->access - we are not accessing any object */
5032 	if ((cs->saved_vp == NULL) || (cs->saved_fh.nfs_fh4_val == NULL)) {
5033 		*cs->statusp = resp->status = NFS4ERR_RESTOREFH;
5034 		goto out;
5035 	}
5036 	if (cs->vp != NULL) {
5037 		VN_RELE(cs->vp);
5038 	}
5039 	cs->vp = cs->saved_vp;
5040 	cs->saved_vp = NULL;
5041 	cs->exi = cs->saved_exi;
5042 	nfs_fh4_copy(&cs->saved_fh, &cs->fh);
5043 	*cs->statusp = resp->status = NFS4_OK;
5044 	cs->deleg = FALSE;
5045 
5046 out:
5047 	DTRACE_NFSV4_2(op__restorefh__done, struct compound_state *, cs,
5048 	    RESTOREFH4res *, resp);
5049 }
5050 
5051 /* ARGSUSED */
5052 static void
5053 rfs4_op_savefh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
5054     struct compound_state *cs)
5055 {
5056 	SAVEFH4res *resp = &resop->nfs_resop4_u.opsavefh;
5057 
5058 	DTRACE_NFSV4_1(op__savefh__start, struct compound_state *, cs);
5059 
5060 	/* No need to check cs->access - we are not accessing any object */
5061 	if (cs->vp == NULL) {
5062 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
5063 		goto out;
5064 	}
5065 	if (cs->saved_vp != NULL) {
5066 		VN_RELE(cs->saved_vp);
5067 	}
5068 	cs->saved_vp = cs->vp;
5069 	VN_HOLD(cs->saved_vp);
5070 	cs->saved_exi = cs->exi;
5071 	/*
5072 	 * since SAVEFH is fairly rare, don't alloc space for its fh
5073 	 * unless necessary.
5074 	 */
5075 	if (cs->saved_fh.nfs_fh4_val == NULL) {
5076 		cs->saved_fh.nfs_fh4_val = kmem_alloc(NFS4_FHSIZE, KM_SLEEP);
5077 	}
5078 	nfs_fh4_copy(&cs->fh, &cs->saved_fh);
5079 	*cs->statusp = resp->status = NFS4_OK;
5080 
5081 out:
5082 	DTRACE_NFSV4_2(op__savefh__done, struct compound_state *, cs,
5083 	    SAVEFH4res *, resp);
5084 }
5085 
5086 /*
5087  * rfs4_verify_attr is called when nfsv4 Setattr failed, but we wish to
5088  * return the bitmap of attrs that were set successfully. It is also
5089  * called by Verify/Nverify to test the vattr/vfsstat attrs. It should
5090  * always be called only after rfs4_do_set_attrs().
5091  *
5092  * Verify that the attributes are same as the expected ones. sargp->vap
5093  * and sargp->sbp contain the input attributes as translated from fattr4.
5094  *
5095  * This function verifies only the attrs that correspond to a vattr or
5096  * vfsstat struct. That is because of the extra step needed to get the
5097  * corresponding system structs. Other attributes have already been set or
5098  * verified by do_rfs4_set_attrs.
5099  *
5100  * Return 0 if all attrs match, -1 if some don't, error if error processing.
5101  */
5102 static int
5103 rfs4_verify_attr(struct nfs4_svgetit_arg *sargp,
5104     bitmap4 *resp, struct nfs4_ntov_table *ntovp)
5105 {
5106 	int error, ret_error = 0;
5107 	int i, k;
5108 	uint_t sva_mask = sargp->vap->va_mask;
5109 	uint_t vbit;
5110 	union nfs4_attr_u *na;
5111 	uint8_t *amap;
5112 	bool_t getsb = ntovp->vfsstat;
5113 
5114 	if (sva_mask != 0) {
5115 		/*
5116 		 * Okay to overwrite sargp->vap because we verify based
5117 		 * on the incoming values.
5118 		 */
5119 		ret_error = VOP_GETATTR(sargp->cs->vp, sargp->vap, 0,
5120 		    sargp->cs->cr, NULL);
5121 		if (ret_error) {
5122 			if (resp == NULL)
5123 				return (ret_error);
5124 			/*
5125 			 * Must return bitmap of successful attrs
5126 			 */
5127 			sva_mask = 0;	/* to prevent checking vap later */
5128 		} else {
5129 			/*
5130 			 * Some file systems clobber va_mask. it is probably
5131 			 * wrong of them to do so, nonethless we practice
5132 			 * defensive coding.
5133 			 * See bug id 4276830.
5134 			 */
5135 			sargp->vap->va_mask = sva_mask;
5136 		}
5137 	}
5138 
5139 	if (getsb) {
5140 		/*
5141 		 * Now get the superblock and loop on the bitmap, as there is
5142 		 * no simple way of translating from superblock to bitmap4.
5143 		 */
5144 		ret_error = VFS_STATVFS(sargp->cs->vp->v_vfsp, sargp->sbp);
5145 		if (ret_error) {
5146 			if (resp == NULL)
5147 				goto errout;
5148 			getsb = FALSE;
5149 		}
5150 	}
5151 
5152 	/*
5153 	 * Now loop and verify each attribute which getattr returned
5154 	 * whether it's the same as the input.
5155 	 */
5156 	if (resp == NULL && !getsb && (sva_mask == 0))
5157 		goto errout;
5158 
5159 	na = ntovp->na;
5160 	amap = ntovp->amap;
5161 	k = 0;
5162 	for (i = 0; i < ntovp->attrcnt; i++, na++, amap++) {
5163 		k = *amap;
5164 		ASSERT(nfs4_ntov_map[k].nval == k);
5165 		vbit = nfs4_ntov_map[k].vbit;
5166 
5167 		/*
5168 		 * If vattr attribute but VOP_GETATTR failed, or it's
5169 		 * superblock attribute but VFS_STATVFS failed, skip
5170 		 */
5171 		if (vbit) {
5172 			if ((vbit & sva_mask) == 0)
5173 				continue;
5174 		} else if (!(getsb && nfs4_ntov_map[k].vfsstat)) {
5175 			continue;
5176 		}
5177 		error = (*nfs4_ntov_map[k].sv_getit)(NFS4ATTR_VERIT, sargp, na);
5178 		if (resp != NULL) {
5179 			if (error)
5180 				ret_error = -1;	/* not all match */
5181 			else	/* update response bitmap */
5182 				*resp |= nfs4_ntov_map[k].fbit;
5183 			continue;
5184 		}
5185 		if (error) {
5186 			ret_error = -1;	/* not all match */
5187 			break;
5188 		}
5189 	}
5190 errout:
5191 	return (ret_error);
5192 }
5193 
5194 /*
5195  * Decode the attribute to be set/verified. If the attr requires a sys op
5196  * (VOP_GETATTR, VFS_VFSSTAT), and the request is to verify, then don't
5197  * call the sv_getit function for it, because the sys op hasn't yet been done.
5198  * Return 0 for success, error code if failed.
5199  *
5200  * Note: the decoded arg is not freed here but in nfs4_ntov_table_free.
5201  */
5202 static int
5203 decode_fattr4_attr(nfs4_attr_cmd_t cmd, struct nfs4_svgetit_arg *sargp,
5204     int k, XDR *xdrp, bitmap4 *resp_bval, union nfs4_attr_u *nap)
5205 {
5206 	int error = 0;
5207 	bool_t set_later;
5208 
5209 	sargp->vap->va_mask |= nfs4_ntov_map[k].vbit;
5210 
5211 	if ((*nfs4_ntov_map[k].xfunc)(xdrp, nap)) {
5212 		set_later = nfs4_ntov_map[k].vbit || nfs4_ntov_map[k].vfsstat;
5213 		/*
5214 		 * don't verify yet if a vattr or sb dependent attr,
5215 		 * because we don't have their sys values yet.
5216 		 * Will be done later.
5217 		 */
5218 		if (! (set_later && (cmd == NFS4ATTR_VERIT))) {
5219 			/*
5220 			 * ACLs are a special case, since setting the MODE
5221 			 * conflicts with setting the ACL.  We delay setting
5222 			 * the ACL until all other attributes have been set.
5223 			 * The ACL gets set in do_rfs4_op_setattr().
5224 			 */
5225 			if (nfs4_ntov_map[k].fbit != FATTR4_ACL_MASK) {
5226 				error = (*nfs4_ntov_map[k].sv_getit)(cmd,
5227 				    sargp, nap);
5228 				if (error) {
5229 					xdr_free(nfs4_ntov_map[k].xfunc,
5230 					    (caddr_t)nap);
5231 				}
5232 			}
5233 		}
5234 	} else {
5235 #ifdef  DEBUG
5236 		cmn_err(CE_NOTE, "decode_fattr4_attr: error "
5237 		    "decoding attribute %d\n", k);
5238 #endif
5239 		error = EINVAL;
5240 	}
5241 	if (!error && resp_bval && !set_later) {
5242 		*resp_bval |= nfs4_ntov_map[k].fbit;
5243 	}
5244 
5245 	return (error);
5246 }
5247 
5248 /*
5249  * Set vattr based on incoming fattr4 attrs - used by setattr.
5250  * Set response mask. Ignore any values that are not writable vattr attrs.
5251  */
5252 static nfsstat4
5253 do_rfs4_set_attrs(bitmap4 *resp, fattr4 *fattrp, struct compound_state *cs,
5254     struct nfs4_svgetit_arg *sargp, struct nfs4_ntov_table *ntovp,
5255     nfs4_attr_cmd_t cmd)
5256 {
5257 	int error = 0;
5258 	int i;
5259 	char *attrs = fattrp->attrlist4;
5260 	uint32_t attrslen = fattrp->attrlist4_len;
5261 	XDR xdr;
5262 	nfsstat4 status = NFS4_OK;
5263 	vnode_t *vp = cs->vp;
5264 	union nfs4_attr_u *na;
5265 	uint8_t *amap;
5266 
5267 #ifndef lint
5268 	/*
5269 	 * Make sure that maximum attribute number can be expressed as an
5270 	 * 8 bit quantity.
5271 	 */
5272 	ASSERT(NFS4_MAXNUM_ATTRS <= (UINT8_MAX + 1));
5273 #endif
5274 
5275 	if (vp == NULL) {
5276 		if (resp)
5277 			*resp = 0;
5278 		return (NFS4ERR_NOFILEHANDLE);
5279 	}
5280 	if (cs->access == CS_ACCESS_DENIED) {
5281 		if (resp)
5282 			*resp = 0;
5283 		return (NFS4ERR_ACCESS);
5284 	}
5285 
5286 	sargp->op = cmd;
5287 	sargp->cs = cs;
5288 	sargp->flag = 0;	/* may be set later */
5289 	sargp->vap->va_mask = 0;
5290 	sargp->rdattr_error = NFS4_OK;
5291 	sargp->rdattr_error_req = FALSE;
5292 	/* sargp->sbp is set by the caller */
5293 
5294 	xdrmem_create(&xdr, attrs, attrslen, XDR_DECODE);
5295 
5296 	na = ntovp->na;
5297 	amap = ntovp->amap;
5298 
5299 	/*
5300 	 * The following loop iterates on the nfs4_ntov_map checking
5301 	 * if the fbit is set in the requested bitmap.
5302 	 * If set then we process the arguments using the
5303 	 * rfs4_fattr4 conversion functions to populate the setattr
5304 	 * vattr and va_mask. Any settable attrs that are not using vattr
5305 	 * will be set in this loop.
5306 	 */
5307 	for (i = 0; i < nfs4_ntov_map_size; i++) {
5308 		if (!(fattrp->attrmask & nfs4_ntov_map[i].fbit)) {
5309 			continue;
5310 		}
5311 		/*
5312 		 * If setattr, must be a writable attr.
5313 		 * If verify/nverify, must be a readable attr.
5314 		 */
5315 		if ((error = (*nfs4_ntov_map[i].sv_getit)(
5316 		    NFS4ATTR_SUPPORTED, sargp, NULL)) != 0) {
5317 			/*
5318 			 * Client tries to set/verify an
5319 			 * unsupported attribute, tries to set
5320 			 * a read only attr or verify a write
5321 			 * only one - error!
5322 			 */
5323 			break;
5324 		}
5325 		/*
5326 		 * Decode the attribute to set/verify
5327 		 */
5328 		error = decode_fattr4_attr(cmd, sargp, nfs4_ntov_map[i].nval,
5329 		    &xdr, resp ? resp : NULL, na);
5330 		if (error)
5331 			break;
5332 		*amap++ = (uint8_t)nfs4_ntov_map[i].nval;
5333 		na++;
5334 		(ntovp->attrcnt)++;
5335 		if (nfs4_ntov_map[i].vfsstat)
5336 			ntovp->vfsstat = TRUE;
5337 	}
5338 
5339 	if (error != 0)
5340 		status = (error == ENOTSUP ? NFS4ERR_ATTRNOTSUPP :
5341 		    puterrno4(error));
5342 	/* xdrmem_destroy(&xdrs); */	/* NO-OP */
5343 	return (status);
5344 }
5345 
5346 static nfsstat4
5347 do_rfs4_op_setattr(bitmap4 *resp, fattr4 *fattrp, struct compound_state *cs,
5348     stateid4 *stateid)
5349 {
5350 	int error = 0;
5351 	struct nfs4_svgetit_arg sarg;
5352 	bool_t trunc;
5353 
5354 	nfsstat4 status = NFS4_OK;
5355 	cred_t *cr = cs->cr;
5356 	vnode_t *vp = cs->vp;
5357 	struct nfs4_ntov_table ntov;
5358 	struct statvfs64 sb;
5359 	struct vattr bva;
5360 	struct flock64 bf;
5361 	int in_crit = 0;
5362 	uint_t saved_mask = 0;
5363 	caller_context_t ct;
5364 
5365 	*resp = 0;
5366 	sarg.sbp = &sb;
5367 	sarg.is_referral = B_FALSE;
5368 	nfs4_ntov_table_init(&ntov);
5369 	status = do_rfs4_set_attrs(resp, fattrp, cs, &sarg, &ntov,
5370 	    NFS4ATTR_SETIT);
5371 	if (status != NFS4_OK) {
5372 		/*
5373 		 * failed set attrs
5374 		 */
5375 		goto done;
5376 	}
5377 
5378 	if ((sarg.vap->va_mask == 0) &&
5379 	    (! (fattrp->attrmask & FATTR4_ACL_MASK))) {
5380 		/*
5381 		 * no further work to be done
5382 		 */
5383 		goto done;
5384 	}
5385 
5386 	/*
5387 	 * If we got a request to set the ACL and the MODE, only
5388 	 * allow changing VSUID, VSGID, and VSVTX.  Attempting
5389 	 * to change any other bits, along with setting an ACL,
5390 	 * gives NFS4ERR_INVAL.
5391 	 */
5392 	if ((fattrp->attrmask & FATTR4_ACL_MASK) &&
5393 	    (fattrp->attrmask & FATTR4_MODE_MASK)) {
5394 		vattr_t va;
5395 
5396 		va.va_mask = AT_MODE;
5397 		error = VOP_GETATTR(vp, &va, 0, cs->cr, NULL);
5398 		if (error) {
5399 			status = puterrno4(error);
5400 			goto done;
5401 		}
5402 		if ((sarg.vap->va_mode ^ va.va_mode) &
5403 		    ~(VSUID | VSGID | VSVTX)) {
5404 			status = NFS4ERR_INVAL;
5405 			goto done;
5406 		}
5407 	}
5408 
5409 	/* Check stateid only if size has been set */
5410 	if (sarg.vap->va_mask & AT_SIZE) {
5411 		trunc = (sarg.vap->va_size == 0);
5412 		status = rfs4_check_stateid(FWRITE, cs->vp, stateid,
5413 		    trunc, &cs->deleg, sarg.vap->va_mask & AT_SIZE, &ct, cs);
5414 		if (status != NFS4_OK)
5415 			goto done;
5416 	} else {
5417 		ct.cc_sysid = 0;
5418 		ct.cc_pid = 0;
5419 		ct.cc_caller_id = nfs4_srv_caller_id;
5420 		ct.cc_flags = CC_DONTBLOCK;
5421 	}
5422 
5423 	/* XXX start of possible race with delegations */
5424 
5425 	/*
5426 	 * We need to specially handle size changes because it is
5427 	 * possible for the client to create a file with read-only
5428 	 * modes, but with the file opened for writing. If the client
5429 	 * then tries to set the file size, e.g. ftruncate(3C),
5430 	 * fcntl(F_FREESP), the normal access checking done in
5431 	 * VOP_SETATTR would prevent the client from doing it even though
5432 	 * it should be allowed to do so.  To get around this, we do the
5433 	 * access checking for ourselves and use VOP_SPACE which doesn't
5434 	 * do the access checking.
5435 	 * Also the client should not be allowed to change the file
5436 	 * size if there is a conflicting non-blocking mandatory lock in
5437 	 * the region of the change.
5438 	 */
5439 	if (vp->v_type == VREG && (sarg.vap->va_mask & AT_SIZE)) {
5440 		u_offset_t offset;
5441 		ssize_t length;
5442 
5443 		/*
5444 		 * ufs_setattr clears AT_SIZE from vap->va_mask, but
5445 		 * before returning, sarg.vap->va_mask is used to
5446 		 * generate the setattr reply bitmap.  We also clear
5447 		 * AT_SIZE below before calling VOP_SPACE.  For both
5448 		 * of these cases, the va_mask needs to be saved here
5449 		 * and restored after calling VOP_SETATTR.
5450 		 */
5451 		saved_mask = sarg.vap->va_mask;
5452 
5453 		/*
5454 		 * Check any possible conflict due to NBMAND locks.
5455 		 * Get into critical region before VOP_GETATTR, so the
5456 		 * size attribute is valid when checking conflicts.
5457 		 */
5458 		if (nbl_need_check(vp)) {
5459 			nbl_start_crit(vp, RW_READER);
5460 			in_crit = 1;
5461 		}
5462 
5463 		bva.va_mask = AT_UID|AT_SIZE;
5464 		error = VOP_GETATTR(vp, &bva, 0, cr, &ct);
5465 		if (error != 0) {
5466 			status = puterrno4(error);
5467 			goto done;
5468 		}
5469 
5470 		if (in_crit) {
5471 			if (sarg.vap->va_size < bva.va_size) {
5472 				offset = sarg.vap->va_size;
5473 				length = bva.va_size - sarg.vap->va_size;
5474 			} else {
5475 				offset = bva.va_size;
5476 				length = sarg.vap->va_size - bva.va_size;
5477 			}
5478 			if (nbl_conflict(vp, NBL_WRITE, offset, length, 0,
5479 			    &ct)) {
5480 				status = NFS4ERR_LOCKED;
5481 				goto done;
5482 			}
5483 		}
5484 
5485 		if (crgetuid(cr) == bva.va_uid) {
5486 			sarg.vap->va_mask &= ~AT_SIZE;
5487 			bf.l_type = F_WRLCK;
5488 			bf.l_whence = 0;
5489 			bf.l_start = (off64_t)sarg.vap->va_size;
5490 			bf.l_len = 0;
5491 			bf.l_sysid = 0;
5492 			bf.l_pid = 0;
5493 			error = VOP_SPACE(vp, F_FREESP, &bf, FWRITE,
5494 			    (offset_t)sarg.vap->va_size, cr, &ct);
5495 		}
5496 	}
5497 
5498 	if (!error && sarg.vap->va_mask != 0)
5499 		error = VOP_SETATTR(vp, sarg.vap, sarg.flag, cr, &ct);
5500 
5501 	/* restore va_mask -- ufs_setattr clears AT_SIZE */
5502 	if (saved_mask & AT_SIZE)
5503 		sarg.vap->va_mask |= AT_SIZE;
5504 
5505 	/*
5506 	 * If an ACL was being set, it has been delayed until now,
5507 	 * in order to set the mode (via the VOP_SETATTR() above) first.
5508 	 */
5509 	if ((! error) && (fattrp->attrmask & FATTR4_ACL_MASK)) {
5510 		int i;
5511 
5512 		for (i = 0; i < NFS4_MAXNUM_ATTRS; i++)
5513 			if (ntov.amap[i] == FATTR4_ACL)
5514 				break;
5515 		if (i < NFS4_MAXNUM_ATTRS) {
5516 			error = (*nfs4_ntov_map[FATTR4_ACL].sv_getit)(
5517 			    NFS4ATTR_SETIT, &sarg, &ntov.na[i]);
5518 			if (error == 0) {
5519 				*resp |= FATTR4_ACL_MASK;
5520 			} else if (error == ENOTSUP) {
5521 				(void) rfs4_verify_attr(&sarg, resp, &ntov);
5522 				status = NFS4ERR_ATTRNOTSUPP;
5523 				goto done;
5524 			}
5525 		} else {
5526 			NFS4_DEBUG(rfs4_debug,
5527 			    (CE_NOTE, "do_rfs4_op_setattr: "
5528 			    "unable to find ACL in fattr4"));
5529 			error = EINVAL;
5530 		}
5531 	}
5532 
5533 	if (error) {
5534 		/* check if a monitor detected a delegation conflict */
5535 		if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK))
5536 			status = NFS4ERR_DELAY;
5537 		else
5538 			status = puterrno4(error);
5539 
5540 		/*
5541 		 * Set the response bitmap when setattr failed.
5542 		 * If VOP_SETATTR partially succeeded, test by doing a
5543 		 * VOP_GETATTR on the object and comparing the data
5544 		 * to the setattr arguments.
5545 		 */
5546 		(void) rfs4_verify_attr(&sarg, resp, &ntov);
5547 	} else {
5548 		/*
5549 		 * Force modified metadata out to stable storage.
5550 		 */
5551 		(void) VOP_FSYNC(vp, FNODSYNC, cr, &ct);
5552 		/*
5553 		 * Set response bitmap
5554 		 */
5555 		nfs4_vmask_to_nmask_set(sarg.vap->va_mask, resp);
5556 	}
5557 
5558 /* Return early and already have a NFSv4 error */
5559 done:
5560 	/*
5561 	 * Except for nfs4_vmask_to_nmask_set(), vattr --> fattr
5562 	 * conversion sets both readable and writeable NFS4 attrs
5563 	 * for AT_MTIME and AT_ATIME.  The line below masks out
5564 	 * unrequested attrs from the setattr result bitmap.  This
5565 	 * is placed after the done: label to catch the ATTRNOTSUP
5566 	 * case.
5567 	 */
5568 	*resp &= fattrp->attrmask;
5569 
5570 	if (in_crit)
5571 		nbl_end_crit(vp);
5572 
5573 	nfs4_ntov_table_free(&ntov, &sarg);
5574 
5575 	return (status);
5576 }
5577 
5578 /* ARGSUSED */
5579 static void
5580 rfs4_op_setattr(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
5581     struct compound_state *cs)
5582 {
5583 	SETATTR4args *args = &argop->nfs_argop4_u.opsetattr;
5584 	SETATTR4res *resp = &resop->nfs_resop4_u.opsetattr;
5585 	bslabel_t *clabel;
5586 
5587 	DTRACE_NFSV4_2(op__setattr__start, struct compound_state *, cs,
5588 	    SETATTR4args *, args);
5589 
5590 	if (cs->vp == NULL) {
5591 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
5592 		goto out;
5593 	}
5594 
5595 	/*
5596 	 * If there is an unshared filesystem mounted on this vnode,
5597 	 * do not allow to setattr on this vnode.
5598 	 */
5599 	if (vn_ismntpt(cs->vp)) {
5600 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
5601 		goto out;
5602 	}
5603 
5604 	resp->attrsset = 0;
5605 
5606 	if (rdonly4(req, cs)) {
5607 		*cs->statusp = resp->status = NFS4ERR_ROFS;
5608 		goto out;
5609 	}
5610 
5611 	/* check label before setting attributes */
5612 	if (is_system_labeled()) {
5613 		ASSERT(req->rq_label != NULL);
5614 		clabel = req->rq_label;
5615 		DTRACE_PROBE2(tx__rfs4__log__info__opsetattr__clabel, char *,
5616 		    "got client label from request(1)",
5617 		    struct svc_req *, req);
5618 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
5619 			if (!do_rfs_label_check(clabel, cs->vp,
5620 			    EQUALITY_CHECK, cs->exi)) {
5621 				*cs->statusp = resp->status = NFS4ERR_ACCESS;
5622 				goto out;
5623 			}
5624 		}
5625 	}
5626 
5627 	*cs->statusp = resp->status =
5628 	    do_rfs4_op_setattr(&resp->attrsset, &args->obj_attributes, cs,
5629 	    &args->stateid);
5630 
5631 out:
5632 	DTRACE_NFSV4_2(op__setattr__done, struct compound_state *, cs,
5633 	    SETATTR4res *, resp);
5634 }
5635 
5636 /* ARGSUSED */
5637 static void
5638 rfs4_op_verify(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
5639     struct compound_state *cs)
5640 {
5641 	/*
5642 	 * verify and nverify are exactly the same, except that nverify
5643 	 * succeeds when some argument changed, and verify succeeds when
5644 	 * when none changed.
5645 	 */
5646 
5647 	VERIFY4args  *args = &argop->nfs_argop4_u.opverify;
5648 	VERIFY4res *resp = &resop->nfs_resop4_u.opverify;
5649 
5650 	int error;
5651 	struct nfs4_svgetit_arg sarg;
5652 	struct statvfs64 sb;
5653 	struct nfs4_ntov_table ntov;
5654 
5655 	DTRACE_NFSV4_2(op__verify__start, struct compound_state *, cs,
5656 	    VERIFY4args *, args);
5657 
5658 	if (cs->vp == NULL) {
5659 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
5660 		goto out;
5661 	}
5662 
5663 	sarg.sbp = &sb;
5664 	sarg.is_referral = B_FALSE;
5665 	nfs4_ntov_table_init(&ntov);
5666 	resp->status = do_rfs4_set_attrs(NULL, &args->obj_attributes, cs,
5667 	    &sarg, &ntov, NFS4ATTR_VERIT);
5668 	if (resp->status != NFS4_OK) {
5669 		/*
5670 		 * do_rfs4_set_attrs will try to verify systemwide attrs,
5671 		 * so could return -1 for "no match".
5672 		 */
5673 		if (resp->status == -1)
5674 			resp->status = NFS4ERR_NOT_SAME;
5675 		goto done;
5676 	}
5677 	error = rfs4_verify_attr(&sarg, NULL, &ntov);
5678 	switch (error) {
5679 	case 0:
5680 		resp->status = NFS4_OK;
5681 		break;
5682 	case -1:
5683 		resp->status = NFS4ERR_NOT_SAME;
5684 		break;
5685 	default:
5686 		resp->status = puterrno4(error);
5687 		break;
5688 	}
5689 done:
5690 	*cs->statusp = resp->status;
5691 	nfs4_ntov_table_free(&ntov, &sarg);
5692 out:
5693 	DTRACE_NFSV4_2(op__verify__done, struct compound_state *, cs,
5694 	    VERIFY4res *, resp);
5695 }
5696 
5697 /* ARGSUSED */
5698 static void
5699 rfs4_op_nverify(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
5700     struct compound_state *cs)
5701 {
5702 	/*
5703 	 * verify and nverify are exactly the same, except that nverify
5704 	 * succeeds when some argument changed, and verify succeeds when
5705 	 * when none changed.
5706 	 */
5707 
5708 	NVERIFY4args  *args = &argop->nfs_argop4_u.opnverify;
5709 	NVERIFY4res *resp = &resop->nfs_resop4_u.opnverify;
5710 
5711 	int error;
5712 	struct nfs4_svgetit_arg sarg;
5713 	struct statvfs64 sb;
5714 	struct nfs4_ntov_table ntov;
5715 
5716 	DTRACE_NFSV4_2(op__nverify__start, struct compound_state *, cs,
5717 	    NVERIFY4args *, args);
5718 
5719 	if (cs->vp == NULL) {
5720 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
5721 		DTRACE_NFSV4_2(op__nverify__done, struct compound_state *, cs,
5722 		    NVERIFY4res *, resp);
5723 		return;
5724 	}
5725 	sarg.sbp = &sb;
5726 	sarg.is_referral = B_FALSE;
5727 	nfs4_ntov_table_init(&ntov);
5728 	resp->status = do_rfs4_set_attrs(NULL, &args->obj_attributes, cs,
5729 	    &sarg, &ntov, NFS4ATTR_VERIT);
5730 	if (resp->status != NFS4_OK) {
5731 		/*
5732 		 * do_rfs4_set_attrs will try to verify systemwide attrs,
5733 		 * so could return -1 for "no match".
5734 		 */
5735 		if (resp->status == -1)
5736 			resp->status = NFS4_OK;
5737 		goto done;
5738 	}
5739 	error = rfs4_verify_attr(&sarg, NULL, &ntov);
5740 	switch (error) {
5741 	case 0:
5742 		resp->status = NFS4ERR_SAME;
5743 		break;
5744 	case -1:
5745 		resp->status = NFS4_OK;
5746 		break;
5747 	default:
5748 		resp->status = puterrno4(error);
5749 		break;
5750 	}
5751 done:
5752 	*cs->statusp = resp->status;
5753 	nfs4_ntov_table_free(&ntov, &sarg);
5754 
5755 	DTRACE_NFSV4_2(op__nverify__done, struct compound_state *, cs,
5756 	    NVERIFY4res *, resp);
5757 }
5758 
5759 /*
5760  * XXX - This should live in an NFS header file.
5761  */
5762 #define	MAX_IOVECS	12
5763 
5764 /* ARGSUSED */
5765 static void
5766 rfs4_op_write(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
5767     struct compound_state *cs)
5768 {
5769 	WRITE4args *args = &argop->nfs_argop4_u.opwrite;
5770 	WRITE4res *resp = &resop->nfs_resop4_u.opwrite;
5771 	int error;
5772 	vnode_t *vp;
5773 	struct vattr bva;
5774 	u_offset_t rlimit;
5775 	struct uio uio;
5776 	struct iovec iov[MAX_IOVECS];
5777 	struct iovec *iovp;
5778 	int iovcnt;
5779 	int ioflag;
5780 	cred_t *savecred, *cr;
5781 	bool_t *deleg = &cs->deleg;
5782 	nfsstat4 stat;
5783 	int in_crit = 0;
5784 	caller_context_t ct;
5785 	nfs4_srv_t *nsrv4;
5786 
5787 	DTRACE_NFSV4_2(op__write__start, struct compound_state *, cs,
5788 	    WRITE4args *, args);
5789 
5790 	vp = cs->vp;
5791 	if (vp == NULL) {
5792 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
5793 		goto out;
5794 	}
5795 	if (cs->access == CS_ACCESS_DENIED) {
5796 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
5797 		goto out;
5798 	}
5799 
5800 	cr = cs->cr;
5801 
5802 	if ((stat = rfs4_check_stateid(FWRITE, vp, &args->stateid, FALSE,
5803 	    deleg, TRUE, &ct, cs)) != NFS4_OK) {
5804 		*cs->statusp = resp->status = stat;
5805 		goto out;
5806 	}
5807 
5808 	/*
5809 	 * We have to enter the critical region before calling VOP_RWLOCK
5810 	 * to avoid a deadlock with ufs.
5811 	 */
5812 	if (nbl_need_check(vp)) {
5813 		nbl_start_crit(vp, RW_READER);
5814 		in_crit = 1;
5815 		if (nbl_conflict(vp, NBL_WRITE,
5816 		    args->offset, args->data_len, 0, &ct)) {
5817 			*cs->statusp = resp->status = NFS4ERR_LOCKED;
5818 			goto out;
5819 		}
5820 	}
5821 
5822 	bva.va_mask = AT_MODE | AT_UID;
5823 	error = VOP_GETATTR(vp, &bva, 0, cr, &ct);
5824 
5825 	/*
5826 	 * If we can't get the attributes, then we can't do the
5827 	 * right access checking.  So, we'll fail the request.
5828 	 */
5829 	if (error) {
5830 		*cs->statusp = resp->status = puterrno4(error);
5831 		goto out;
5832 	}
5833 
5834 	if (rdonly4(req, cs)) {
5835 		*cs->statusp = resp->status = NFS4ERR_ROFS;
5836 		goto out;
5837 	}
5838 
5839 	if (vp->v_type != VREG) {
5840 		*cs->statusp = resp->status =
5841 		    ((vp->v_type == VDIR) ? NFS4ERR_ISDIR : NFS4ERR_INVAL);
5842 		goto out;
5843 	}
5844 
5845 	if (crgetuid(cr) != bva.va_uid &&
5846 	    (error = VOP_ACCESS(vp, VWRITE, 0, cr, &ct))) {
5847 		*cs->statusp = resp->status = puterrno4(error);
5848 		goto out;
5849 	}
5850 
5851 	if (MANDLOCK(vp, bva.va_mode)) {
5852 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
5853 		goto out;
5854 	}
5855 
5856 	nsrv4 = nfs4_get_srv();
5857 	if (args->data_len == 0) {
5858 		*cs->statusp = resp->status = NFS4_OK;
5859 		resp->count = 0;
5860 		resp->committed = args->stable;
5861 		resp->writeverf = nsrv4->write4verf;
5862 		goto out;
5863 	}
5864 
5865 	if (args->mblk != NULL) {
5866 		mblk_t *m;
5867 		uint_t bytes, round_len;
5868 
5869 		iovcnt = 0;
5870 		bytes = 0;
5871 		round_len = roundup(args->data_len, BYTES_PER_XDR_UNIT);
5872 		for (m = args->mblk;
5873 		    m != NULL && bytes < round_len;
5874 		    m = m->b_cont) {
5875 			iovcnt++;
5876 			bytes += MBLKL(m);
5877 		}
5878 #ifdef DEBUG
5879 		/* should have ended on an mblk boundary */
5880 		if (bytes != round_len) {
5881 			printf("bytes=0x%x, round_len=0x%x, req len=0x%x\n",
5882 			    bytes, round_len, args->data_len);
5883 			printf("args=%p, args->mblk=%p, m=%p", (void *)args,
5884 			    (void *)args->mblk, (void *)m);
5885 			ASSERT(bytes == round_len);
5886 		}
5887 #endif
5888 		if (iovcnt <= MAX_IOVECS) {
5889 			iovp = iov;
5890 		} else {
5891 			iovp = kmem_alloc(sizeof (*iovp) * iovcnt, KM_SLEEP);
5892 		}
5893 		mblk_to_iov(args->mblk, iovcnt, iovp);
5894 	} else if (args->rlist != NULL) {
5895 		iovcnt = 1;
5896 		iovp = iov;
5897 		iovp->iov_base = (char *)((args->rlist)->u.c_daddr3);
5898 		iovp->iov_len = args->data_len;
5899 	} else {
5900 		iovcnt = 1;
5901 		iovp = iov;
5902 		iovp->iov_base = args->data_val;
5903 		iovp->iov_len = args->data_len;
5904 	}
5905 
5906 	uio.uio_iov = iovp;
5907 	uio.uio_iovcnt = iovcnt;
5908 
5909 	uio.uio_segflg = UIO_SYSSPACE;
5910 	uio.uio_extflg = UIO_COPY_DEFAULT;
5911 	uio.uio_loffset = args->offset;
5912 	uio.uio_resid = args->data_len;
5913 	uio.uio_llimit = curproc->p_fsz_ctl;
5914 	rlimit = uio.uio_llimit - args->offset;
5915 	if (rlimit < (u_offset_t)uio.uio_resid)
5916 		uio.uio_resid = (int)rlimit;
5917 
5918 	if (args->stable == UNSTABLE4)
5919 		ioflag = 0;
5920 	else if (args->stable == FILE_SYNC4)
5921 		ioflag = FSYNC;
5922 	else if (args->stable == DATA_SYNC4)
5923 		ioflag = FDSYNC;
5924 	else {
5925 		if (iovp != iov)
5926 			kmem_free(iovp, sizeof (*iovp) * iovcnt);
5927 		*cs->statusp = resp->status = NFS4ERR_INVAL;
5928 		goto out;
5929 	}
5930 
5931 	/*
5932 	 * We're changing creds because VM may fault and we need
5933 	 * the cred of the current thread to be used if quota
5934 	 * checking is enabled.
5935 	 */
5936 	savecred = curthread->t_cred;
5937 	curthread->t_cred = cr;
5938 	error = do_io(FWRITE, vp, &uio, ioflag, cr, &ct);
5939 	curthread->t_cred = savecred;
5940 
5941 	if (iovp != iov)
5942 		kmem_free(iovp, sizeof (*iovp) * iovcnt);
5943 
5944 	if (error) {
5945 		*cs->statusp = resp->status = puterrno4(error);
5946 		goto out;
5947 	}
5948 
5949 	*cs->statusp = resp->status = NFS4_OK;
5950 	resp->count = args->data_len - uio.uio_resid;
5951 
5952 	if (ioflag == 0)
5953 		resp->committed = UNSTABLE4;
5954 	else
5955 		resp->committed = FILE_SYNC4;
5956 
5957 	resp->writeverf = nsrv4->write4verf;
5958 
5959 out:
5960 	if (in_crit)
5961 		nbl_end_crit(vp);
5962 
5963 	DTRACE_NFSV4_2(op__write__done, struct compound_state *, cs,
5964 	    WRITE4res *, resp);
5965 }
5966 
5967 static inline int
5968 rfs4_opnum_in_range(const compound_state_t *cs, int opnum)
5969 {
5970 	if (opnum < FIRST_NFS4_OP || opnum > LAST_NFS4_OP)
5971 		return (0);
5972 	else if (cs->minorversion == 0 && opnum > LAST_NFS40_OP)
5973 		return (0);
5974 	else if (cs->minorversion == 1 && opnum > LAST_NFS41_OP)
5975 		return (0);
5976 	else if (cs->minorversion == 2 && opnum > LAST_NFS42_OP)
5977 		return (0);
5978 	return (1);
5979 }
5980 
5981 void
5982 rfs4_compound(COMPOUND4args *args, COMPOUND4res *resp, compound_state_t *cs,
5983     struct svc_req *req, int *rv)
5984 {
5985 	uint_t i;
5986 	cred_t *cr;
5987 	nfs4_srv_t *nsrv4;
5988 	nfs_export_t *ne = nfs_get_export();
5989 
5990 	if (rv != NULL)
5991 		*rv = 0;
5992 	/*
5993 	 * Form a reply tag by copying over the request tag.
5994 	 */
5995 	resp->tag.utf8string_len = args->tag.utf8string_len;
5996 	if (args->tag.utf8string_len != 0) {
5997 		resp->tag.utf8string_val =
5998 		    kmem_alloc(args->tag.utf8string_len, KM_SLEEP);
5999 		bcopy(args->tag.utf8string_val, resp->tag.utf8string_val,
6000 		    resp->tag.utf8string_len);
6001 	} else {
6002 		resp->tag.utf8string_val = NULL;
6003 	}
6004 
6005 	cs->statusp = &resp->status;
6006 	cs->req = req;
6007 	cs->minorversion = args->minorversion;
6008 	resp->array = NULL;
6009 	resp->array_len = 0;
6010 
6011 	if (args->array_len == 0) {
6012 		resp->status = NFS4_OK;
6013 		return;
6014 	}
6015 
6016 	cr = svc_xprt_cred(req->rq_xprt);
6017 	ASSERT(cr != NULL);
6018 
6019 	if (sec_svc_getcred(req, cr, &cs->principal, &cs->nfsflavor) == 0) {
6020 		DTRACE_NFSV4_2(compound__start, struct compound_state *,
6021 		    cs, COMPOUND4args *, args);
6022 		DTRACE_NFSV4_2(compound__done, struct compound_state *,
6023 		    cs, COMPOUND4res *, resp);
6024 		svcerr_badcred(req->rq_xprt);
6025 		if (rv != NULL)
6026 			*rv = 1;
6027 		return;
6028 	}
6029 
6030 	resp->array_len = args->array_len;
6031 	resp->array = kmem_zalloc(args->array_len * sizeof (nfs_resop4),
6032 	    KM_SLEEP);
6033 
6034 	cs->op_len = args->array_len;
6035 	cs->basecr = cr;
6036 	nsrv4 = nfs4_get_srv();
6037 
6038 	DTRACE_NFSV4_2(compound__start, struct compound_state *, cs,
6039 	    COMPOUND4args *, args);
6040 
6041 	/*
6042 	 * For now, NFS4 compound processing must be protected by
6043 	 * exported_lock because it can access more than one exportinfo
6044 	 * per compound and share/unshare can now change multiple
6045 	 * exinfo structs.  The NFS2/3 code only refs 1 exportinfo
6046 	 * per proc (excluding public exinfo), and exi_count design
6047 	 * is sufficient to protect concurrent execution of NFS2/3
6048 	 * ops along with unexport.  This lock will be removed as
6049 	 * part of the NFSv4 phase 2 namespace redesign work.
6050 	 */
6051 	rw_enter(&ne->exported_lock, RW_READER);
6052 
6053 	/*
6054 	 * If this is the first compound we've seen, we need to start all
6055 	 * new instances' grace periods.
6056 	 */
6057 	if (nsrv4->seen_first_compound == 0) {
6058 		rfs4_grace_start_new(nsrv4);
6059 		/*
6060 		 * This must be set after rfs4_grace_start_new(), otherwise
6061 		 * another thread could proceed past here before the former
6062 		 * is finished.
6063 		 */
6064 		nsrv4->seen_first_compound = 1;
6065 	}
6066 
6067 	for (i = 0; i < args->array_len && cs->cont; i++) {
6068 		nfs_argop4 *argop;
6069 		nfs_resop4 *resop;
6070 		uint_t op;
6071 		kstat_named_t *stat = ne->ne_globals->rfsproccnt[NFS_V4];
6072 
6073 		argop = &args->array[i];
6074 		resop = &resp->array[i];
6075 		resop->resop = argop->argop;
6076 		op = (uint_t)resop->resop;
6077 
6078 		cs->op_pos = i;
6079 		if (op < rfsv4disp_cnt && rfs4_opnum_in_range(cs, op)) {
6080 			/*
6081 			 * Count the individual ops here; NULL and COMPOUND
6082 			 * are counted in common_dispatch()
6083 			 */
6084 			stat[op].value.ui64++;
6085 
6086 			NFS4_DEBUG(rfs4_debug > 1,
6087 			    (CE_NOTE, "Executing %s", rfs4_op_string[op]));
6088 			(*rfsv4disptab[op].dis_proc)(argop, resop, req, cs);
6089 			NFS4_DEBUG(rfs4_debug > 1, (CE_NOTE, "%s returned %d",
6090 			    rfs4_op_string[op], *cs->statusp));
6091 			if (*cs->statusp != NFS4_OK)
6092 				cs->cont = FALSE;
6093 		} else {
6094 			/*
6095 			 * This is effectively dead code since XDR code
6096 			 * will have already returned BADXDR if op doesn't
6097 			 * decode to legal value.  This only done for a
6098 			 * day when XDR code doesn't verify v4 opcodes.
6099 			 */
6100 			op = OP_ILLEGAL;
6101 			stat[OP_ILLEGAL_IDX].value.ui64++;
6102 
6103 			rfs4_op_illegal(argop, resop, req, cs);
6104 			cs->cont = FALSE;
6105 		}
6106 
6107 		/*
6108 		 * If not at last op, and if we are to stop, then
6109 		 * compact the results array.
6110 		 */
6111 		if ((i + 1) < args->array_len && !cs->cont) {
6112 			nfs_resop4 *new_res = kmem_alloc(
6113 			    (i+1) * sizeof (nfs_resop4), KM_SLEEP);
6114 			bcopy(resp->array,
6115 			    new_res, (i+1) * sizeof (nfs_resop4));
6116 			kmem_free(resp->array,
6117 			    args->array_len * sizeof (nfs_resop4));
6118 
6119 			resp->array_len =  i + 1;
6120 			resp->array = new_res;
6121 		}
6122 	}
6123 
6124 	rw_exit(&ne->exported_lock);
6125 
6126 	DTRACE_NFSV4_2(compound__done, struct compound_state *, cs,
6127 	    COMPOUND4res *, resp);
6128 
6129 	/*
6130 	 * done with this compound request, free the label
6131 	 */
6132 
6133 	if (req->rq_label != NULL) {
6134 		kmem_free(req->rq_label, sizeof (bslabel_t));
6135 		req->rq_label = NULL;
6136 	}
6137 }
6138 
6139 /*
6140  * XXX because of what appears to be duplicate calls to rfs4_compound_free
6141  * XXX zero out the tag and array values. Need to investigate why the
6142  * XXX calls occur, but at least prevent the panic for now.
6143  */
6144 void
6145 rfs4_compound_free(COMPOUND4res *resp)
6146 {
6147 	uint_t i;
6148 
6149 	if (resp->tag.utf8string_val) {
6150 		UTF8STRING_FREE(resp->tag)
6151 	}
6152 
6153 	for (i = 0; i < resp->array_len; i++) {
6154 		nfs_resop4 *resop;
6155 		uint_t op;
6156 
6157 		resop = &resp->array[i];
6158 		op = (uint_t)resop->resop;
6159 		if (op < rfsv4disp_cnt) {
6160 			(*rfsv4disptab[op].dis_resfree)(resop);
6161 		}
6162 	}
6163 	if (resp->array != NULL) {
6164 		kmem_free(resp->array, resp->array_len * sizeof (nfs_resop4));
6165 	}
6166 }
6167 
6168 /*
6169  * Process the value of the compound request rpc flags, as a bit-AND
6170  * of the individual per-op flags (idempotent, allowork, publicfh_ok)
6171  */
6172 void
6173 rfs4_compound_flagproc(COMPOUND4args *args, int *flagp)
6174 {
6175 	int i;
6176 	int flag = RPC_ALL;
6177 
6178 	for (i = 0; flag && i < args->array_len; i++) {
6179 		uint_t op;
6180 
6181 		op = (uint_t)args->array[i].argop;
6182 
6183 		if (op < rfsv4disp_cnt)
6184 			flag &= rfsv4disptab[op].dis_flags;
6185 		else
6186 			flag = 0;
6187 	}
6188 	*flagp = flag;
6189 }
6190 
6191 nfsstat4
6192 rfs4_client_sysid(rfs4_client_t *cp, sysid_t *sp)
6193 {
6194 	nfsstat4 e;
6195 
6196 	rfs4_dbe_lock(cp->rc_dbe);
6197 
6198 	if (cp->rc_sysidt != LM_NOSYSID) {
6199 		*sp = cp->rc_sysidt;
6200 		e = NFS4_OK;
6201 
6202 	} else if ((cp->rc_sysidt = lm_alloc_sysidt()) != LM_NOSYSID) {
6203 		*sp = cp->rc_sysidt;
6204 		e = NFS4_OK;
6205 
6206 		NFS4_DEBUG(rfs4_debug, (CE_NOTE,
6207 		    "rfs4_client_sysid: allocated 0x%x\n", *sp));
6208 	} else
6209 		e = NFS4ERR_DELAY;
6210 
6211 	rfs4_dbe_unlock(cp->rc_dbe);
6212 	return (e);
6213 }
6214 
6215 #if defined(DEBUG) && ! defined(lint)
6216 static void lock_print(char *str, int operation, struct flock64 *flk)
6217 {
6218 	char *op, *type;
6219 
6220 	switch (operation) {
6221 	case F_GETLK: op = "F_GETLK";
6222 		break;
6223 	case F_SETLK: op = "F_SETLK";
6224 		break;
6225 	case F_SETLK_NBMAND: op = "F_SETLK_NBMAND";
6226 		break;
6227 	default: op = "F_UNKNOWN";
6228 		break;
6229 	}
6230 	switch (flk->l_type) {
6231 	case F_UNLCK: type = "F_UNLCK";
6232 		break;
6233 	case F_RDLCK: type = "F_RDLCK";
6234 		break;
6235 	case F_WRLCK: type = "F_WRLCK";
6236 		break;
6237 	default: type = "F_UNKNOWN";
6238 		break;
6239 	}
6240 
6241 	ASSERT(flk->l_whence == 0);
6242 	cmn_err(CE_NOTE, "%s:  %s, type = %s, off = %llx len = %llx pid = %d",
6243 	    str, op, type, (longlong_t)flk->l_start,
6244 	    flk->l_len ? (longlong_t)flk->l_len : ~0LL, flk->l_pid);
6245 }
6246 
6247 #define	LOCK_PRINT(d, s, t, f) if (d) lock_print(s, t, f)
6248 #else
6249 #define	LOCK_PRINT(d, s, t, f)
6250 #endif
6251 
6252 /*ARGSUSED*/
6253 static bool_t
6254 creds_ok(cred_set_t *cr_set, struct svc_req *req, struct compound_state *cs)
6255 {
6256 	return (TRUE);
6257 }
6258 
6259 /*
6260  * Look up the pathname using the vp in cs as the directory vnode.
6261  * cs->vp will be the vnode for the file on success
6262  */
6263 
6264 static nfsstat4
6265 rfs4_lookup(component4 *component, struct svc_req *req,
6266     struct compound_state *cs)
6267 {
6268 	char *nm;
6269 	uint32_t len;
6270 	nfsstat4 status;
6271 	struct sockaddr *ca;
6272 	char *name;
6273 
6274 	if (cs->vp == NULL) {
6275 		return (NFS4ERR_NOFILEHANDLE);
6276 	}
6277 	if (cs->vp->v_type != VDIR) {
6278 		return (NFS4ERR_NOTDIR);
6279 	}
6280 
6281 	status = utf8_dir_verify(component);
6282 	if (status != NFS4_OK)
6283 		return (status);
6284 
6285 	nm = utf8_to_fn(component, &len, NULL);
6286 	if (nm == NULL) {
6287 		return (NFS4ERR_INVAL);
6288 	}
6289 
6290 	if (len > MAXNAMELEN) {
6291 		kmem_free(nm, len);
6292 		return (NFS4ERR_NAMETOOLONG);
6293 	}
6294 
6295 	ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
6296 	name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
6297 	    MAXPATHLEN + 1);
6298 
6299 	if (name == NULL) {
6300 		kmem_free(nm, len);
6301 		return (NFS4ERR_INVAL);
6302 	}
6303 
6304 	status = do_rfs4_op_lookup(name, req, cs);
6305 
6306 	if (name != nm)
6307 		kmem_free(name, MAXPATHLEN + 1);
6308 
6309 	kmem_free(nm, len);
6310 
6311 	return (status);
6312 }
6313 
6314 static nfsstat4
6315 rfs4_lookupfile(component4 *component, struct svc_req *req,
6316     struct compound_state *cs, uint32_t access, change_info4 *cinfo)
6317 {
6318 	nfsstat4 status;
6319 	vnode_t *dvp = cs->vp;
6320 	vattr_t bva, ava, fva;
6321 	int error;
6322 
6323 	/* Get "before" change value */
6324 	bva.va_mask = AT_CTIME|AT_SEQ;
6325 	error = VOP_GETATTR(dvp, &bva, 0, cs->cr, NULL);
6326 	if (error)
6327 		return (puterrno4(error));
6328 
6329 	/* rfs4_lookup may VN_RELE directory */
6330 	VN_HOLD(dvp);
6331 
6332 	status = rfs4_lookup(component, req, cs);
6333 	if (status != NFS4_OK) {
6334 		VN_RELE(dvp);
6335 		return (status);
6336 	}
6337 
6338 	/*
6339 	 * Get "after" change value, if it fails, simply return the
6340 	 * before value.
6341 	 */
6342 	ava.va_mask = AT_CTIME|AT_SEQ;
6343 	if (VOP_GETATTR(dvp, &ava, 0, cs->cr, NULL)) {
6344 		ava.va_ctime = bva.va_ctime;
6345 		ava.va_seq = 0;
6346 	}
6347 	VN_RELE(dvp);
6348 
6349 	/*
6350 	 * Validate the file is a file
6351 	 */
6352 	fva.va_mask = AT_TYPE|AT_MODE;
6353 	error = VOP_GETATTR(cs->vp, &fva, 0, cs->cr, NULL);
6354 	if (error)
6355 		return (puterrno4(error));
6356 
6357 	if (fva.va_type != VREG) {
6358 		if (fva.va_type == VDIR)
6359 			return (NFS4ERR_ISDIR);
6360 		if (fva.va_type == VLNK)
6361 			return (NFS4ERR_SYMLINK);
6362 		return (NFS4ERR_INVAL);
6363 	}
6364 
6365 	NFS4_SET_FATTR4_CHANGE(cinfo->before, bva.va_ctime);
6366 	NFS4_SET_FATTR4_CHANGE(cinfo->after, ava.va_ctime);
6367 
6368 	/*
6369 	 * It is undefined if VOP_LOOKUP will change va_seq, so
6370 	 * cinfo.atomic = TRUE only if we have
6371 	 * non-zero va_seq's, and they have not changed.
6372 	 */
6373 	if (bva.va_seq && ava.va_seq && ava.va_seq == bva.va_seq)
6374 		cinfo->atomic = TRUE;
6375 	else
6376 		cinfo->atomic = FALSE;
6377 
6378 	/* Check for mandatory locking */
6379 	cs->mandlock = MANDLOCK(cs->vp, fva.va_mode);
6380 	return (check_open_access(access, cs, req));
6381 }
6382 
6383 static nfsstat4
6384 create_vnode(vnode_t *dvp, char *nm,  vattr_t *vap, createmode4 mode,
6385     cred_t *cr, vnode_t **vpp, bool_t *created)
6386 {
6387 	int error;
6388 	nfsstat4 status = NFS4_OK;
6389 	vattr_t va;
6390 
6391 tryagain:
6392 
6393 	/*
6394 	 * The file open mode used is VWRITE.  If the client needs
6395 	 * some other semantic, then it should do the access checking
6396 	 * itself.  It would have been nice to have the file open mode
6397 	 * passed as part of the arguments.
6398 	 */
6399 
6400 	*created = TRUE;
6401 	error = VOP_CREATE(dvp, nm, vap, EXCL, VWRITE, vpp, cr, 0, NULL, NULL);
6402 
6403 	if (error) {
6404 		*created = FALSE;
6405 
6406 		/*
6407 		 * If we got something other than file already exists
6408 		 * then just return this error.  Otherwise, we got
6409 		 * EEXIST.  If we were doing a GUARDED create, then
6410 		 * just return this error.  Otherwise, we need to
6411 		 * make sure that this wasn't a duplicate of an
6412 		 * exclusive create request.
6413 		 *
6414 		 * The assumption is made that a non-exclusive create
6415 		 * request will never return EEXIST.
6416 		 */
6417 
6418 		if (error != EEXIST || mode == GUARDED4) {
6419 			status = puterrno4(error);
6420 			return (status);
6421 		}
6422 		error = VOP_LOOKUP(dvp, nm, vpp, NULL, 0, NULL, cr,
6423 		    NULL, NULL, NULL);
6424 
6425 		if (error) {
6426 			/*
6427 			 * We couldn't find the file that we thought that
6428 			 * we just created.  So, we'll just try creating
6429 			 * it again.
6430 			 */
6431 			if (error == ENOENT)
6432 				goto tryagain;
6433 
6434 			status = puterrno4(error);
6435 			return (status);
6436 		}
6437 
6438 		if (mode == UNCHECKED4) {
6439 			/* existing object must be regular file */
6440 			if ((*vpp)->v_type != VREG) {
6441 				if ((*vpp)->v_type == VDIR)
6442 					status = NFS4ERR_ISDIR;
6443 				else if ((*vpp)->v_type == VLNK)
6444 					status = NFS4ERR_SYMLINK;
6445 				else
6446 					status = NFS4ERR_INVAL;
6447 				VN_RELE(*vpp);
6448 				return (status);
6449 			}
6450 
6451 			return (NFS4_OK);
6452 		}
6453 
6454 		/* Check for duplicate request */
6455 		va.va_mask = AT_MTIME;
6456 		error = VOP_GETATTR(*vpp, &va, 0, cr, NULL);
6457 		if (!error) {
6458 			/* We found the file */
6459 			const timestruc_t *mtime = &vap->va_mtime;
6460 
6461 			if (va.va_mtime.tv_sec != mtime->tv_sec ||
6462 			    va.va_mtime.tv_nsec != mtime->tv_nsec) {
6463 				/* but its not our creation */
6464 				VN_RELE(*vpp);
6465 				return (NFS4ERR_EXIST);
6466 			}
6467 			*created = TRUE; /* retrans of create == created */
6468 			return (NFS4_OK);
6469 		}
6470 		VN_RELE(*vpp);
6471 		return (NFS4ERR_EXIST);
6472 	}
6473 
6474 	return (NFS4_OK);
6475 }
6476 
6477 static nfsstat4
6478 check_open_access(uint32_t access, struct compound_state *cs,
6479     struct svc_req *req)
6480 {
6481 	int error;
6482 	vnode_t *vp;
6483 	bool_t readonly;
6484 	cred_t *cr = cs->cr;
6485 
6486 	/* For now we don't allow mandatory locking as per V2/V3 */
6487 	if (cs->access == CS_ACCESS_DENIED || cs->mandlock) {
6488 		return (NFS4ERR_ACCESS);
6489 	}
6490 
6491 	vp = cs->vp;
6492 	ASSERT(cr != NULL && vp->v_type == VREG);
6493 
6494 	/*
6495 	 * If the file system is exported read only and we are trying
6496 	 * to open for write, then return NFS4ERR_ROFS
6497 	 */
6498 
6499 	readonly = rdonly4(req, cs);
6500 
6501 	if ((access & OPEN4_SHARE_ACCESS_WRITE) && readonly)
6502 		return (NFS4ERR_ROFS);
6503 
6504 	if (access & OPEN4_SHARE_ACCESS_READ) {
6505 		if ((VOP_ACCESS(vp, VREAD, 0, cr, NULL) != 0) &&
6506 		    (VOP_ACCESS(vp, VEXEC, 0, cr, NULL) != 0)) {
6507 			return (NFS4ERR_ACCESS);
6508 		}
6509 	}
6510 
6511 	if (access & OPEN4_SHARE_ACCESS_WRITE) {
6512 		error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL);
6513 		if (error)
6514 			return (NFS4ERR_ACCESS);
6515 	}
6516 
6517 	return (NFS4_OK);
6518 }
6519 
6520 static void
6521 rfs4_verifier_to_mtime(verifier4 v, timestruc_t *mtime)
6522 {
6523 	timespec32_t *time = (timespec32_t *)&v;
6524 
6525 	/*
6526 	 * Ensure no time overflows. Assumes underlying
6527 	 * filesystem supports at least 32 bits.
6528 	 * Truncate nsec to usec resolution to allow valid
6529 	 * compares even if the underlying filesystem truncates.
6530 	 */
6531 	mtime->tv_sec = time->tv_sec % TIME32_MAX;
6532 	mtime->tv_nsec = (time->tv_nsec / 1000) * 1000;
6533 }
6534 
6535 static nfsstat4
6536 rfs4_createfile(OPEN4args *args, struct svc_req *req, struct compound_state *cs,
6537     change_info4 *cinfo, bitmap4 *attrset, clientid4 clientid)
6538 {
6539 	struct nfs4_svgetit_arg sarg;
6540 	struct nfs4_ntov_table ntov;
6541 
6542 	bool_t ntov_table_init = FALSE;
6543 	struct statvfs64 sb;
6544 	nfsstat4 status;
6545 	vnode_t *vp;
6546 	vattr_t bva, ava, iva, cva, *vap;
6547 	vnode_t *dvp;
6548 	char *nm = NULL;
6549 	uint_t buflen;
6550 	bool_t created;
6551 	bool_t setsize = FALSE;
6552 	len_t reqsize;
6553 	int error;
6554 	bool_t trunc;
6555 	caller_context_t ct;
6556 	component4 *component;
6557 	bslabel_t *clabel;
6558 	struct sockaddr *ca;
6559 	char *name = NULL;
6560 	fattr4 *fattr = NULL;
6561 
6562 	ASSERT(*attrset == 0);
6563 
6564 	sarg.sbp = &sb;
6565 	sarg.is_referral = B_FALSE;
6566 
6567 	dvp = cs->vp;
6568 
6569 	/* Check if the file system is read only */
6570 	if (rdonly4(req, cs))
6571 		return (NFS4ERR_ROFS);
6572 
6573 	/* check the label of including directory */
6574 	if (is_system_labeled()) {
6575 		ASSERT(req->rq_label != NULL);
6576 		clabel = req->rq_label;
6577 		DTRACE_PROBE2(tx__rfs4__log__info__opremove__clabel, char *,
6578 		    "got client label from request(1)",
6579 		    struct svc_req *, req);
6580 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
6581 			if (!do_rfs_label_check(clabel, dvp, EQUALITY_CHECK,
6582 			    cs->exi)) {
6583 				return (NFS4ERR_ACCESS);
6584 			}
6585 		}
6586 	}
6587 
6588 	if ((args->mode == EXCLUSIVE4 || args->mode == EXCLUSIVE4_1) &&
6589 	    dvp->v_flag & V_XATTRDIR) {
6590 		/* prohibit EXCL create of named attributes */
6591 		return (NFS4ERR_INVAL);
6592 	}
6593 
6594 	/*
6595 	 * Get the last component of path name in nm. cs will reference
6596 	 * the including directory on success.
6597 	 */
6598 	component = &args->claim.open_claim4_u.file;
6599 	status = utf8_dir_verify(component);
6600 	if (status != NFS4_OK)
6601 		return (status);
6602 
6603 	nm = utf8_to_fn(component, &buflen, NULL);
6604 
6605 	if (nm == NULL)
6606 		return (NFS4ERR_RESOURCE);
6607 
6608 	if (buflen > MAXNAMELEN) {
6609 		kmem_free(nm, buflen);
6610 		return (NFS4ERR_NAMETOOLONG);
6611 	}
6612 
6613 	bva.va_mask = AT_TYPE|AT_CTIME|AT_SEQ;
6614 	error = VOP_GETATTR(dvp, &bva, 0, cs->cr, NULL);
6615 	if (error) {
6616 		kmem_free(nm, buflen);
6617 		return (puterrno4(error));
6618 	}
6619 
6620 	if (bva.va_type != VDIR) {
6621 		kmem_free(nm, buflen);
6622 		return (NFS4ERR_NOTDIR);
6623 	}
6624 
6625 	NFS4_SET_FATTR4_CHANGE(cinfo->before, bva.va_ctime)
6626 
6627 	switch (args->mode) {
6628 	case GUARDED4:
6629 		/*FALLTHROUGH*/
6630 	case UNCHECKED4:
6631 	case EXCLUSIVE4_1:
6632 		nfs4_ntov_table_init(&ntov);
6633 		ntov_table_init = TRUE;
6634 
6635 		if (args->mode == EXCLUSIVE4_1)
6636 			fattr = &args->createhow4_u.ch_createboth.cva_attrs;
6637 		else
6638 			fattr = &args->createhow4_u.createattrs;
6639 
6640 		status = do_rfs4_set_attrs(attrset,
6641 		    fattr,
6642 		    cs, &sarg, &ntov, NFS4ATTR_SETIT);
6643 
6644 		if (status == NFS4_OK && (sarg.vap->va_mask & AT_TYPE) &&
6645 		    sarg.vap->va_type != VREG) {
6646 			if (sarg.vap->va_type == VDIR)
6647 				status = NFS4ERR_ISDIR;
6648 			else if (sarg.vap->va_type == VLNK)
6649 				status = NFS4ERR_SYMLINK;
6650 			else
6651 				status = NFS4ERR_INVAL;
6652 		}
6653 
6654 		if (status != NFS4_OK) {
6655 			kmem_free(nm, buflen);
6656 			nfs4_ntov_table_free(&ntov, &sarg);
6657 			*attrset = 0;
6658 			return (status);
6659 		}
6660 
6661 		vap = sarg.vap;
6662 		vap->va_type = VREG;
6663 		vap->va_mask |= AT_TYPE;
6664 
6665 		if ((vap->va_mask & AT_MODE) == 0) {
6666 			vap->va_mask |= AT_MODE;
6667 			vap->va_mode = (mode_t)0600;
6668 		}
6669 
6670 		if (vap->va_mask & AT_SIZE) {
6671 
6672 			/* Disallow create with a non-zero size */
6673 
6674 			if ((reqsize = sarg.vap->va_size) != 0) {
6675 				kmem_free(nm, buflen);
6676 				nfs4_ntov_table_free(&ntov, &sarg);
6677 				*attrset = 0;
6678 				return (NFS4ERR_INVAL);
6679 			}
6680 			setsize = TRUE;
6681 		}
6682 		if (args->mode == EXCLUSIVE4_1) {
6683 			rfs4_verifier_to_mtime(
6684 			    args->createhow4_u.ch_createboth.cva_verf,
6685 			    &vap->va_mtime);
6686 			/* attrset will be set later */
6687 			fattr->attrmask |= FATTR4_TIME_MODIFY_MASK;
6688 			vap->va_mask |= AT_MTIME;
6689 		}
6690 		break;
6691 
6692 	case EXCLUSIVE4:
6693 		cva.va_mask = AT_TYPE | AT_MTIME | AT_MODE;
6694 		cva.va_type = VREG;
6695 		cva.va_mode = (mode_t)0;
6696 
6697 		rfs4_verifier_to_mtime(args->createhow4_u.createverf,
6698 		    &cva.va_mtime);
6699 
6700 		vap = &cva;
6701 
6702 		/*
6703 		 * For EXCL create, attrset is set to the server attr
6704 		 * used to cache the client's verifier.
6705 		 */
6706 		*attrset = FATTR4_TIME_MODIFY_MASK;
6707 		break;
6708 	}
6709 
6710 	ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
6711 	name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
6712 	    MAXPATHLEN  + 1);
6713 
6714 	if (name == NULL) {
6715 		kmem_free(nm, buflen);
6716 		return (NFS4ERR_SERVERFAULT);
6717 	}
6718 
6719 	status = create_vnode(dvp, name, vap, args->mode,
6720 	    cs->cr, &vp, &created);
6721 	if (nm != name)
6722 		kmem_free(name, MAXPATHLEN + 1);
6723 	kmem_free(nm, buflen);
6724 
6725 	if (status != NFS4_OK) {
6726 		if (ntov_table_init)
6727 			nfs4_ntov_table_free(&ntov, &sarg);
6728 		*attrset = 0;
6729 		return (status);
6730 	}
6731 
6732 	trunc = (setsize && !created);
6733 
6734 	if (args->mode != EXCLUSIVE4) {
6735 		bitmap4 createmask = fattr->attrmask;
6736 
6737 		/*
6738 		 * True verification that object was created with correct
6739 		 * attrs is impossible.  The attrs could have been changed
6740 		 * immediately after object creation.  If attributes did
6741 		 * not verify, the only recourse for the server is to
6742 		 * destroy the object.  Maybe if some attrs (like gid)
6743 		 * are set incorrectly, the object should be destroyed;
6744 		 * however, seems bad as a default policy.  Do we really
6745 		 * want to destroy an object over one of the times not
6746 		 * verifying correctly?  For these reasons, the server
6747 		 * currently sets bits in attrset for createattrs
6748 		 * that were set; however, no verification is done.
6749 		 *
6750 		 * vmask_to_nmask accounts for vattr bits set on create
6751 		 *	[do_rfs4_set_attrs() only sets resp bits for
6752 		 *	 non-vattr/vfs bits.]
6753 		 * Mask off any bits we set by default so as not to return
6754 		 * more attrset bits than were requested in createattrs
6755 		 */
6756 		if (created) {
6757 			nfs4_vmask_to_nmask(sarg.vap->va_mask, attrset);
6758 			*attrset &= createmask;
6759 		} else {
6760 			/*
6761 			 * We did not create the vnode (we tried but it
6762 			 * already existed).  In this case, the only createattr
6763 			 * that the spec allows the server to set is size,
6764 			 * and even then, it can only be set if it is 0.
6765 			 */
6766 			*attrset = 0;
6767 			if (trunc)
6768 				*attrset = FATTR4_SIZE_MASK;
6769 		}
6770 	}
6771 	if (ntov_table_init)
6772 		nfs4_ntov_table_free(&ntov, &sarg);
6773 
6774 	/*
6775 	 * Get the initial "after" sequence number, if it fails,
6776 	 * set to zero, time to before.
6777 	 */
6778 	iva.va_mask = AT_CTIME|AT_SEQ;
6779 	if (VOP_GETATTR(dvp, &iva, 0, cs->cr, NULL)) {
6780 		iva.va_seq = 0;
6781 		iva.va_ctime = bva.va_ctime;
6782 	}
6783 
6784 	/*
6785 	 * create_vnode attempts to create the file exclusive,
6786 	 * if it already exists the VOP_CREATE will fail and
6787 	 * may not increase va_seq. It is atomic if
6788 	 * we haven't changed the directory, but if it has changed
6789 	 * we don't know what changed it.
6790 	 */
6791 	if (!created) {
6792 		if (bva.va_seq && iva.va_seq &&
6793 		    bva.va_seq == iva.va_seq)
6794 			cinfo->atomic = TRUE;
6795 		else
6796 			cinfo->atomic = FALSE;
6797 		NFS4_SET_FATTR4_CHANGE(cinfo->after, iva.va_ctime);
6798 	} else {
6799 		/*
6800 		 * The entry was created, we need to sync the
6801 		 * directory metadata.
6802 		 */
6803 		(void) VOP_FSYNC(dvp, 0, cs->cr, NULL);
6804 
6805 		/*
6806 		 * Get "after" change value, if it fails, simply return the
6807 		 * before value.
6808 		 */
6809 		ava.va_mask = AT_CTIME|AT_SEQ;
6810 		if (VOP_GETATTR(dvp, &ava, 0, cs->cr, NULL)) {
6811 			ava.va_ctime = bva.va_ctime;
6812 			ava.va_seq = 0;
6813 		}
6814 
6815 		NFS4_SET_FATTR4_CHANGE(cinfo->after, ava.va_ctime);
6816 
6817 		/*
6818 		 * The cinfo->atomic = TRUE only if we have
6819 		 * non-zero va_seq's, and it has incremented by exactly one
6820 		 * during the create_vnode and it didn't
6821 		 * change during the VOP_FSYNC.
6822 		 */
6823 		if (bva.va_seq && iva.va_seq && ava.va_seq &&
6824 		    iva.va_seq == (bva.va_seq + 1) && iva.va_seq == ava.va_seq)
6825 			cinfo->atomic = TRUE;
6826 		else
6827 			cinfo->atomic = FALSE;
6828 	}
6829 
6830 	/* Check for mandatory locking and that the size gets set. */
6831 	cva.va_mask = AT_MODE;
6832 	if (setsize)
6833 		cva.va_mask |= AT_SIZE;
6834 
6835 	/* Assume the worst */
6836 	cs->mandlock = TRUE;
6837 
6838 	if (VOP_GETATTR(vp, &cva, 0, cs->cr, NULL) == 0) {
6839 		cs->mandlock = MANDLOCK(cs->vp, cva.va_mode);
6840 
6841 		/*
6842 		 * Truncate the file if necessary; this would be
6843 		 * the case for create over an existing file.
6844 		 */
6845 
6846 		if (trunc) {
6847 			int in_crit = 0;
6848 			rfs4_file_t *fp;
6849 			nfs4_srv_t *nsrv4;
6850 			bool_t create = FALSE;
6851 
6852 			/*
6853 			 * We are writing over an existing file.
6854 			 * Check to see if we need to recall a delegation.
6855 			 */
6856 			nsrv4 = nfs4_get_srv();
6857 			rfs4_hold_deleg_policy(nsrv4);
6858 			if ((fp = rfs4_findfile(vp, NULL, &create)) != NULL) {
6859 				if (rfs4_check_delegated_byfp(FWRITE, fp,
6860 				    (reqsize == 0), FALSE, FALSE, &clientid)) {
6861 					rfs4_file_rele(fp);
6862 					rfs4_rele_deleg_policy(nsrv4);
6863 					VN_RELE(vp);
6864 					*attrset = 0;
6865 					return (NFS4ERR_DELAY);
6866 				}
6867 				rfs4_file_rele(fp);
6868 			}
6869 			rfs4_rele_deleg_policy(nsrv4);
6870 
6871 			if (nbl_need_check(vp)) {
6872 				in_crit = 1;
6873 
6874 				ASSERT(reqsize == 0);
6875 
6876 				nbl_start_crit(vp, RW_READER);
6877 				if (nbl_conflict(vp, NBL_WRITE, 0,
6878 				    cva.va_size, 0, NULL)) {
6879 					in_crit = 0;
6880 					nbl_end_crit(vp);
6881 					VN_RELE(vp);
6882 					*attrset = 0;
6883 					return (NFS4ERR_ACCESS);
6884 				}
6885 			}
6886 			ct.cc_sysid = 0;
6887 			ct.cc_pid = 0;
6888 			ct.cc_caller_id = nfs4_srv_caller_id;
6889 			ct.cc_flags = CC_DONTBLOCK;
6890 
6891 			cva.va_mask = AT_SIZE;
6892 			cva.va_size = reqsize;
6893 			(void) VOP_SETATTR(vp, &cva, 0, cs->cr, &ct);
6894 			if (in_crit)
6895 				nbl_end_crit(vp);
6896 		}
6897 	}
6898 
6899 	error = makefh4(&cs->fh, vp, cs->exi);
6900 
6901 	/*
6902 	 * Force modified data and metadata out to stable storage.
6903 	 */
6904 	(void) VOP_FSYNC(vp, FNODSYNC, cs->cr, NULL);
6905 
6906 	if (error) {
6907 		VN_RELE(vp);
6908 		*attrset = 0;
6909 		return (puterrno4(error));
6910 	}
6911 
6912 	/* if parent dir is attrdir, set namedattr fh flag */
6913 	if (dvp->v_flag & V_XATTRDIR)
6914 		set_fh4_flag(&cs->fh, FH4_NAMEDATTR);
6915 
6916 	if (cs->vp)
6917 		VN_RELE(cs->vp);
6918 
6919 	cs->vp = vp;
6920 
6921 	/*
6922 	 * if we did not create the file, we will need to check
6923 	 * the access bits on the file
6924 	 */
6925 
6926 	if (!created) {
6927 		if (setsize)
6928 			args->share_access |= OPEN4_SHARE_ACCESS_WRITE;
6929 		status = check_open_access(args->share_access, cs, req);
6930 		if (status != NFS4_OK)
6931 			*attrset = 0;
6932 	}
6933 	return (status);
6934 }
6935 
6936 /*ARGSUSED*/
6937 static void
6938 rfs4_do_open(struct compound_state *cs, struct svc_req *req,
6939     rfs4_openowner_t *oo, delegreq_t deleg,
6940     uint32_t access, uint32_t deny,
6941     OPEN4res *resp, int deleg_cur)
6942 {
6943 	/* XXX Currently not using req  */
6944 	rfs4_state_t *sp;
6945 	rfs4_file_t *fp;
6946 	bool_t screate = TRUE;
6947 	bool_t fcreate = TRUE;
6948 	uint32_t open_a, share_a;
6949 	uint32_t open_d, share_d;
6950 	rfs4_deleg_state_t *dsp;
6951 	sysid_t sysid;
6952 	nfsstat4 status;
6953 	caller_context_t ct;
6954 	int fflags = 0;
6955 	int recall = 0;
6956 	int err;
6957 	int first_open;
6958 
6959 	/* get the file struct and hold a lock on it during initial open */
6960 	fp = rfs4_findfile_withlock(cs->vp, &cs->fh, &fcreate);
6961 	if (fp == NULL) {
6962 		resp->status = NFS4ERR_RESOURCE;
6963 		DTRACE_PROBE1(nfss__e__do__open1, nfsstat4, resp->status);
6964 		return;
6965 	}
6966 
6967 	sp = rfs4_findstate_by_owner_file(oo, fp, &screate);
6968 	if (sp == NULL) {
6969 		resp->status = NFS4ERR_RESOURCE;
6970 		DTRACE_PROBE1(nfss__e__do__open2, nfsstat4, resp->status);
6971 		/* No need to keep any reference */
6972 		rw_exit(&fp->rf_file_rwlock);
6973 		rfs4_file_rele(fp);
6974 		return;
6975 	}
6976 
6977 	/* try to get the sysid before continuing */
6978 	if ((status = rfs4_client_sysid(oo->ro_client, &sysid)) != NFS4_OK) {
6979 		resp->status = status;
6980 		rfs4_file_rele(fp);
6981 		/* Not a fully formed open; "close" it */
6982 		if (screate == TRUE)
6983 			rfs4_state_close(sp, FALSE, FALSE, cs->cr);
6984 		rfs4_state_rele(sp);
6985 		return;
6986 	}
6987 
6988 	/* Calculate the fflags for this OPEN. */
6989 	if (access & OPEN4_SHARE_ACCESS_READ)
6990 		fflags |= FREAD;
6991 	if (access & OPEN4_SHARE_ACCESS_WRITE)
6992 		fflags |= FWRITE;
6993 
6994 	rfs4_dbe_lock(sp->rs_dbe);
6995 
6996 	/*
6997 	 * Calculate the new deny and access mode that this open is adding to
6998 	 * the file for this open owner;
6999 	 */
7000 	open_d = (deny & ~sp->rs_open_deny);
7001 	open_a = (access & ~sp->rs_open_access);
7002 
7003 	/*
7004 	 * Calculate the new share access and share deny modes that this open
7005 	 * is adding to the file for this open owner;
7006 	 */
7007 	share_a = (access & ~sp->rs_share_access);
7008 	share_d = (deny & ~sp->rs_share_deny);
7009 
7010 	first_open = (sp->rs_open_access & OPEN4_SHARE_ACCESS_BOTH) == 0;
7011 
7012 	/*
7013 	 * Check to see the client has already sent an open for this
7014 	 * open owner on this file with the same share/deny modes.
7015 	 * If so, we don't need to check for a conflict and we don't
7016 	 * need to add another shrlock.  If not, then we need to
7017 	 * check for conflicts in deny and access before checking for
7018 	 * conflicts in delegation.  We don't want to recall a
7019 	 * delegation based on an open that will eventually fail based
7020 	 * on shares modes.
7021 	 */
7022 
7023 	if (share_a || share_d) {
7024 		if ((err = rfs4_share(sp, access, deny)) != 0) {
7025 			rfs4_dbe_unlock(sp->rs_dbe);
7026 			resp->status = err;
7027 
7028 			rfs4_file_rele(fp);
7029 			/* Not a fully formed open; "close" it */
7030 			if (screate == TRUE)
7031 				rfs4_state_close(sp, FALSE, FALSE, cs->cr);
7032 			rfs4_state_rele(sp);
7033 			return;
7034 		}
7035 	}
7036 
7037 	rfs4_dbe_lock(fp->rf_dbe);
7038 
7039 	/*
7040 	 * Check to see if this file is delegated and if so, if a
7041 	 * recall needs to be done.
7042 	 */
7043 	if (rfs4_check_recall(sp, access)) {
7044 		rfs4_dbe_unlock(fp->rf_dbe);
7045 		rfs4_dbe_unlock(sp->rs_dbe);
7046 		rfs4_recall_deleg(fp, FALSE, sp->rs_owner->ro_client);
7047 		delay(NFS4_DELEGATION_CONFLICT_DELAY);
7048 		rfs4_dbe_lock(sp->rs_dbe);
7049 
7050 		/* if state closed while lock was dropped */
7051 		if (sp->rs_closed) {
7052 			if (share_a || share_d)
7053 				(void) rfs4_unshare(sp);
7054 			rfs4_dbe_unlock(sp->rs_dbe);
7055 			rfs4_file_rele(fp);
7056 			/* Not a fully formed open; "close" it */
7057 			if (screate == TRUE)
7058 				rfs4_state_close(sp, FALSE, FALSE, cs->cr);
7059 			rfs4_state_rele(sp);
7060 			resp->status = NFS4ERR_OLD_STATEID;
7061 			return;
7062 		}
7063 
7064 		rfs4_dbe_lock(fp->rf_dbe);
7065 		/* Let's see if the delegation was returned */
7066 		if (rfs4_check_recall(sp, access)) {
7067 			rfs4_dbe_unlock(fp->rf_dbe);
7068 			if (share_a || share_d)
7069 				(void) rfs4_unshare(sp);
7070 			rfs4_dbe_unlock(sp->rs_dbe);
7071 			rfs4_file_rele(fp);
7072 			rfs4_update_lease(sp->rs_owner->ro_client);
7073 
7074 			/* Not a fully formed open; "close" it */
7075 			if (screate == TRUE)
7076 				rfs4_state_close(sp, FALSE, FALSE, cs->cr);
7077 			rfs4_state_rele(sp);
7078 			resp->status = NFS4ERR_DELAY;
7079 			return;
7080 		}
7081 	}
7082 	/*
7083 	 * the share check passed and any delegation conflict has been
7084 	 * taken care of, now call vop_open.
7085 	 * if this is the first open then call vop_open with fflags.
7086 	 * if not, call vn_open_upgrade with just the upgrade flags.
7087 	 *
7088 	 * if the file has been opened already, it will have the current
7089 	 * access mode in the state struct.  if it has no share access, then
7090 	 * this is a new open.
7091 	 *
7092 	 * However, if this is open with CLAIM_DLEGATE_CUR, then don't
7093 	 * call VOP_OPEN(), just do the open upgrade.
7094 	 */
7095 	if (first_open && !deleg_cur) {
7096 		ct.cc_sysid = sysid;
7097 		ct.cc_pid = rfs4_dbe_getid(sp->rs_owner->ro_dbe);
7098 		ct.cc_caller_id = nfs4_srv_caller_id;
7099 		ct.cc_flags = CC_DONTBLOCK;
7100 		err = VOP_OPEN(&cs->vp, fflags, cs->cr, &ct);
7101 		if (err) {
7102 			rfs4_dbe_unlock(fp->rf_dbe);
7103 			if (share_a || share_d)
7104 				(void) rfs4_unshare(sp);
7105 			rfs4_dbe_unlock(sp->rs_dbe);
7106 			rfs4_file_rele(fp);
7107 
7108 			/* Not a fully formed open; "close" it */
7109 			if (screate == TRUE)
7110 				rfs4_state_close(sp, FALSE, FALSE, cs->cr);
7111 			rfs4_state_rele(sp);
7112 			/* check if a monitor detected a delegation conflict */
7113 			if (err == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK))
7114 				resp->status = NFS4ERR_DELAY;
7115 			else
7116 				resp->status = NFS4ERR_SERVERFAULT;
7117 			return;
7118 		}
7119 	} else { /* open upgrade */
7120 		/*
7121 		 * calculate the fflags for the new mode that is being added
7122 		 * by this upgrade.
7123 		 */
7124 		fflags = 0;
7125 		if (open_a & OPEN4_SHARE_ACCESS_READ)
7126 			fflags |= FREAD;
7127 		if (open_a & OPEN4_SHARE_ACCESS_WRITE)
7128 			fflags |= FWRITE;
7129 		vn_open_upgrade(cs->vp, fflags);
7130 	}
7131 	sp->rs_open_access |= access;
7132 	sp->rs_open_deny |= deny;
7133 
7134 	if (open_d & OPEN4_SHARE_DENY_READ)
7135 		fp->rf_deny_read++;
7136 	if (open_d & OPEN4_SHARE_DENY_WRITE)
7137 		fp->rf_deny_write++;
7138 	fp->rf_share_deny |= deny;
7139 
7140 	if (open_a & OPEN4_SHARE_ACCESS_READ)
7141 		fp->rf_access_read++;
7142 	if (open_a & OPEN4_SHARE_ACCESS_WRITE)
7143 		fp->rf_access_write++;
7144 	fp->rf_share_access |= access;
7145 
7146 	/*
7147 	 * Check for delegation here. if the deleg argument is not
7148 	 * DELEG_ANY, then this is a reclaim from a client and
7149 	 * we must honor the delegation requested. If necessary we can
7150 	 * set the recall flag.
7151 	 */
7152 
7153 	dsp = rfs4_grant_delegation(deleg, sp, &recall);
7154 
7155 	cs->deleg = (fp->rf_dinfo.rd_dtype == OPEN_DELEGATE_WRITE);
7156 
7157 	next_stateid(&sp->rs_stateid);
7158 
7159 	resp->stateid = sp->rs_stateid.stateid;
7160 
7161 	rfs4_dbe_unlock(fp->rf_dbe);
7162 	rfs4_dbe_unlock(sp->rs_dbe);
7163 
7164 	if (dsp) {
7165 		rfs4_set_deleg_response(dsp, &resp->delegation, NULL, recall);
7166 		rfs4_deleg_state_rele(dsp);
7167 	}
7168 
7169 	rfs4_file_rele(fp);
7170 	rfs4_state_rele(sp);
7171 
7172 	resp->status = NFS4_OK;
7173 }
7174 
7175 /*ARGSUSED*/
7176 static void
7177 rfs4_do_openfh(struct compound_state *cs, struct svc_req *req, OPEN4args *args,
7178     rfs4_openowner_t *oo, OPEN4res *resp)
7179 {
7180 	/* cs->vp and cs->fh have been updated by putfh. */
7181 	rfs4_do_open(cs, req, oo, DELEG_ANY,
7182 	    (args->share_access & 0xff), args->share_deny, resp, 0);
7183 }
7184 
7185 /*ARGSUSED*/
7186 static void
7187 rfs4_do_opennull(struct compound_state *cs, struct svc_req *req,
7188     OPEN4args *args, rfs4_openowner_t *oo, OPEN4res *resp)
7189 {
7190 	change_info4 *cinfo = &resp->cinfo;
7191 	bitmap4 *attrset = &resp->attrset;
7192 
7193 	if (args->opentype == OPEN4_NOCREATE)
7194 		resp->status = rfs4_lookupfile(&args->claim.open_claim4_u.file,
7195 		    req, cs, args->share_access, cinfo);
7196 	else {
7197 		/* inhibit delegation grants during exclusive create */
7198 
7199 		if (args->mode == EXCLUSIVE4)
7200 			rfs4_disable_delegation();
7201 
7202 		resp->status = rfs4_createfile(args, req, cs, cinfo, attrset,
7203 		    oo->ro_client->rc_clientid);
7204 	}
7205 
7206 	if (resp->status == NFS4_OK) {
7207 
7208 		/* cs->vp cs->fh now reference the desired file */
7209 
7210 		rfs4_do_open(cs, req, oo,
7211 		    oo->ro_need_confirm ? DELEG_NONE : DELEG_ANY,
7212 		    args->share_access, args->share_deny, resp, 0);
7213 
7214 		/*
7215 		 * If rfs4_createfile set attrset, we must
7216 		 * clear this attrset before the response is copied.
7217 		 */
7218 		if (resp->status != NFS4_OK && resp->attrset) {
7219 			resp->attrset = 0;
7220 		}
7221 	}
7222 	else
7223 		*cs->statusp = resp->status;
7224 
7225 	if (args->mode == EXCLUSIVE4)
7226 		rfs4_enable_delegation();
7227 }
7228 
7229 /*ARGSUSED*/
7230 static void
7231 rfs4_do_openprev(struct compound_state *cs, struct svc_req *req,
7232     OPEN4args *args, rfs4_openowner_t *oo, OPEN4res *resp)
7233 {
7234 	change_info4 *cinfo = &resp->cinfo;
7235 	vattr_t va;
7236 	vtype_t v_type = cs->vp->v_type;
7237 	int error = 0;
7238 
7239 	/* Verify that we have a regular file */
7240 	if (v_type != VREG) {
7241 		if (v_type == VDIR)
7242 			resp->status = NFS4ERR_ISDIR;
7243 		else if (v_type == VLNK)
7244 			resp->status = NFS4ERR_SYMLINK;
7245 		else
7246 			resp->status = NFS4ERR_INVAL;
7247 		return;
7248 	}
7249 
7250 	va.va_mask = AT_MODE|AT_UID;
7251 	error = VOP_GETATTR(cs->vp, &va, 0, cs->cr, NULL);
7252 	if (error) {
7253 		resp->status = puterrno4(error);
7254 		return;
7255 	}
7256 
7257 	cs->mandlock = MANDLOCK(cs->vp, va.va_mode);
7258 
7259 	/*
7260 	 * Check if we have access to the file, Note the the file
7261 	 * could have originally been open UNCHECKED or GUARDED
7262 	 * with mode bits that will now fail, but there is nothing
7263 	 * we can really do about that except in the case that the
7264 	 * owner of the file is the one requesting the open.
7265 	 */
7266 	if (crgetuid(cs->cr) != va.va_uid) {
7267 		resp->status = check_open_access(args->share_access, cs, req);
7268 		if (resp->status != NFS4_OK) {
7269 			return;
7270 		}
7271 	}
7272 
7273 	/*
7274 	 * cinfo on a CLAIM_PREVIOUS is undefined, initialize to zero
7275 	 */
7276 	cinfo->before = 0;
7277 	cinfo->after = 0;
7278 	cinfo->atomic = FALSE;
7279 
7280 	rfs4_do_open(cs, req, oo,
7281 	    NFS4_DELEG4TYPE2REQTYPE(args->claim.open_claim4_u.delegate_type),
7282 	    args->share_access, args->share_deny, resp, 0);
7283 }
7284 
7285 static void
7286 rfs4_do_opendelcur(struct compound_state *cs, struct svc_req *req,
7287     OPEN4args *args, rfs4_openowner_t *oo, OPEN4res *resp)
7288 {
7289 	int error;
7290 	nfsstat4 status;
7291 	stateid4 stateid =
7292 	    args->claim.open_claim4_u.delegate_cur_info.delegate_stateid;
7293 	rfs4_deleg_state_t *dsp;
7294 
7295 	/*
7296 	 * Find the state info from the stateid and confirm that the
7297 	 * file is delegated.  If the state openowner is the same as
7298 	 * the supplied openowner we're done. If not, get the file
7299 	 * info from the found state info. Use that file info to
7300 	 * create the state for this lock owner. Note solaris doen't
7301 	 * really need the pathname to find the file. We may want to
7302 	 * lookup the pathname and make sure that the vp exist and
7303 	 * matches the vp in the file structure. However it is
7304 	 * possible that the pathname nolonger exists (local process
7305 	 * unlinks the file), so this may not be that useful.
7306 	 */
7307 
7308 	status = rfs4_get_deleg_state(&stateid, &dsp);
7309 	if (status != NFS4_OK) {
7310 		resp->status = status;
7311 		return;
7312 	}
7313 
7314 	ASSERT(dsp->rds_finfo->rf_dinfo.rd_dtype != OPEN_DELEGATE_NONE);
7315 
7316 	/*
7317 	 * New lock owner, create state. Since this was probably called
7318 	 * in response to a CB_RECALL we set deleg to DELEG_NONE
7319 	 */
7320 
7321 	ASSERT(cs->vp != NULL);
7322 	VN_RELE(cs->vp);
7323 	VN_HOLD(dsp->rds_finfo->rf_vp);
7324 	cs->vp = dsp->rds_finfo->rf_vp;
7325 
7326 	error = makefh4(&cs->fh, cs->vp, cs->exi);
7327 	if (error != 0) {
7328 		rfs4_deleg_state_rele(dsp);
7329 		*cs->statusp = resp->status = puterrno4(error);
7330 		return;
7331 	}
7332 
7333 	/* Mark progress for delegation returns */
7334 	dsp->rds_finfo->rf_dinfo.rd_time_lastwrite = gethrestime_sec();
7335 	rfs4_deleg_state_rele(dsp);
7336 	rfs4_do_open(cs, req, oo, DELEG_NONE,
7337 	    args->share_access, args->share_deny, resp, 1);
7338 }
7339 
7340 /*ARGSUSED*/
7341 static void
7342 rfs4_do_opendelprev(struct compound_state *cs, struct svc_req *req,
7343     OPEN4args *args, rfs4_openowner_t *oo, OPEN4res *resp)
7344 {
7345 	/*
7346 	 * Lookup the pathname, it must already exist since this file
7347 	 * was delegated.
7348 	 *
7349 	 * Find the file and state info for this vp and open owner pair.
7350 	 *	check that they are in fact delegated.
7351 	 *	check that the state access and deny modes are the same.
7352 	 *
7353 	 * Return the delgation possibly seting the recall flag.
7354 	 */
7355 	rfs4_file_t *fp;
7356 	rfs4_state_t *sp;
7357 	bool_t create = FALSE;
7358 	bool_t dcreate = FALSE;
7359 	rfs4_deleg_state_t *dsp;
7360 	nfsace4 *ace;
7361 
7362 	/* Note we ignore oflags */
7363 	resp->status = rfs4_lookupfile(
7364 	    &args->claim.open_claim4_u.file_delegate_prev,
7365 	    req, cs, args->share_access, &resp->cinfo);
7366 
7367 	if (resp->status != NFS4_OK) {
7368 		return;
7369 	}
7370 
7371 	/* get the file struct and hold a lock on it during initial open */
7372 	fp = rfs4_findfile_withlock(cs->vp, NULL, &create);
7373 	if (fp == NULL) {
7374 		resp->status = NFS4ERR_RESOURCE;
7375 		DTRACE_PROBE1(nfss__e__do_opendelprev1, nfsstat4, resp->status);
7376 		return;
7377 	}
7378 
7379 	sp = rfs4_findstate_by_owner_file(oo, fp, &create);
7380 	if (sp == NULL) {
7381 		resp->status = NFS4ERR_SERVERFAULT;
7382 		DTRACE_PROBE1(nfss__e__do_opendelprev2, nfsstat4, resp->status);
7383 		rw_exit(&fp->rf_file_rwlock);
7384 		rfs4_file_rele(fp);
7385 		return;
7386 	}
7387 
7388 	rfs4_dbe_lock(sp->rs_dbe);
7389 	rfs4_dbe_lock(fp->rf_dbe);
7390 	if (args->share_access != sp->rs_share_access ||
7391 	    args->share_deny != sp->rs_share_deny ||
7392 	    sp->rs_finfo->rf_dinfo.rd_dtype == OPEN_DELEGATE_NONE) {
7393 		NFS4_DEBUG(rfs4_debug,
7394 		    (CE_NOTE, "rfs4_do_opendelprev: state mixup"));
7395 		rfs4_dbe_unlock(fp->rf_dbe);
7396 		rfs4_dbe_unlock(sp->rs_dbe);
7397 		rfs4_file_rele(fp);
7398 		rfs4_state_rele(sp);
7399 		resp->status = NFS4ERR_SERVERFAULT;
7400 		return;
7401 	}
7402 	rfs4_dbe_unlock(fp->rf_dbe);
7403 	rfs4_dbe_unlock(sp->rs_dbe);
7404 
7405 	dsp = rfs4_finddeleg(sp, &dcreate);
7406 	if (dsp == NULL) {
7407 		rfs4_state_rele(sp);
7408 		rfs4_file_rele(fp);
7409 		resp->status = NFS4ERR_SERVERFAULT;
7410 		return;
7411 	}
7412 
7413 	next_stateid(&sp->rs_stateid);
7414 
7415 	resp->stateid = sp->rs_stateid.stateid;
7416 
7417 	resp->delegation.delegation_type = dsp->rds_dtype;
7418 
7419 	if (dsp->rds_dtype == OPEN_DELEGATE_READ) {
7420 		open_read_delegation4 *rv =
7421 		    &resp->delegation.open_delegation4_u.read;
7422 
7423 		rv->stateid = dsp->rds_delegid.stateid;
7424 		rv->recall = FALSE; /* no policy in place to set to TRUE */
7425 		ace = &rv->permissions;
7426 	} else {
7427 		open_write_delegation4 *rv =
7428 		    &resp->delegation.open_delegation4_u.write;
7429 
7430 		rv->stateid = dsp->rds_delegid.stateid;
7431 		rv->recall = FALSE;  /* no policy in place to set to TRUE */
7432 		ace = &rv->permissions;
7433 		rv->space_limit.limitby = NFS_LIMIT_SIZE;
7434 		rv->space_limit.nfs_space_limit4_u.filesize = UINT64_MAX;
7435 	}
7436 
7437 	/* XXX For now */
7438 	ace->type = ACE4_ACCESS_ALLOWED_ACE_TYPE;
7439 	ace->flag = 0;
7440 	ace->access_mask = 0;
7441 	ace->who.utf8string_len = 0;
7442 	ace->who.utf8string_val = 0;
7443 
7444 	rfs4_deleg_state_rele(dsp);
7445 	rfs4_state_rele(sp);
7446 	rfs4_file_rele(fp);
7447 }
7448 
7449 typedef enum {
7450 	NFS4_CHKSEQ_OKAY = 0,
7451 	NFS4_CHKSEQ_REPLAY = 1,
7452 	NFS4_CHKSEQ_BAD = 2
7453 } rfs4_chkseq_t;
7454 
7455 /*
7456  * Generic function for sequence number checks.
7457  */
7458 static rfs4_chkseq_t
7459 rfs4_check_seqid(seqid4 seqid, nfs_resop4 *lastop,
7460     seqid4 rqst_seq, nfs_resop4 *resop, bool_t copyres)
7461 {
7462 	/* Same sequence ids and matching operations? */
7463 	if (seqid == rqst_seq && resop->resop == lastop->resop) {
7464 		if (copyres == TRUE) {
7465 			rfs4_free_reply(resop);
7466 			rfs4_copy_reply(resop, lastop);
7467 		}
7468 		NFS4_DEBUG(rfs4_debug, (CE_NOTE,
7469 		    "Replayed SEQID %d\n", seqid));
7470 		return (NFS4_CHKSEQ_REPLAY);
7471 	}
7472 
7473 	/* If the incoming sequence is not the next expected then it is bad */
7474 	if (rqst_seq != seqid + 1) {
7475 		if (rqst_seq == seqid) {
7476 			NFS4_DEBUG(rfs4_debug,
7477 			    (CE_NOTE, "BAD SEQID: Replayed sequence id "
7478 			    "but last op was %d current op is %d\n",
7479 			    lastop->resop, resop->resop));
7480 			return (NFS4_CHKSEQ_BAD);
7481 		}
7482 		NFS4_DEBUG(rfs4_debug,
7483 		    (CE_NOTE, "BAD SEQID: got %u expecting %u\n",
7484 		    rqst_seq, seqid));
7485 		return (NFS4_CHKSEQ_BAD);
7486 	}
7487 
7488 	/* Everything okay -- next expected */
7489 	return (NFS4_CHKSEQ_OKAY);
7490 }
7491 
7492 
7493 static rfs4_chkseq_t
7494 rfs4_check_open_seqid(seqid4 seqid, rfs4_openowner_t *op, nfs_resop4 *resop,
7495     const compound_state_t *cs)
7496 {
7497 	rfs4_chkseq_t rc;
7498 
7499 	if (rfs4_has_session(cs))
7500 		return (NFS4_CHKSEQ_OKAY);
7501 
7502 	rfs4_dbe_lock(op->ro_dbe);
7503 	rc = rfs4_check_seqid(op->ro_open_seqid, &op->ro_reply, seqid, resop,
7504 	    TRUE);
7505 	rfs4_dbe_unlock(op->ro_dbe);
7506 
7507 	if (rc == NFS4_CHKSEQ_OKAY)
7508 		rfs4_update_lease(op->ro_client);
7509 
7510 	return (rc);
7511 }
7512 
7513 static rfs4_chkseq_t
7514 rfs4_check_olo_seqid(seqid4 olo_seqid, rfs4_openowner_t *op, nfs_resop4 *resop)
7515 {
7516 	rfs4_chkseq_t rc;
7517 
7518 	rfs4_dbe_lock(op->ro_dbe);
7519 	rc = rfs4_check_seqid(op->ro_open_seqid, &op->ro_reply,
7520 	    olo_seqid, resop, FALSE);
7521 	rfs4_dbe_unlock(op->ro_dbe);
7522 
7523 	return (rc);
7524 }
7525 
7526 static rfs4_chkseq_t
7527 rfs4_check_lock_seqid(seqid4 seqid, rfs4_lo_state_t *lsp, nfs_resop4 *resop)
7528 {
7529 	rfs4_chkseq_t rc = NFS4_CHKSEQ_OKAY;
7530 
7531 	rfs4_dbe_lock(lsp->rls_dbe);
7532 	if (!lsp->rls_skip_seqid_check)
7533 		rc = rfs4_check_seqid(lsp->rls_seqid, &lsp->rls_reply, seqid,
7534 		    resop, TRUE);
7535 	rfs4_dbe_unlock(lsp->rls_dbe);
7536 
7537 	return (rc);
7538 }
7539 
7540 static void
7541 rfs4_op_open(nfs_argop4 *argop, nfs_resop4 *resop,
7542     struct svc_req *req, struct compound_state *cs)
7543 {
7544 	OPEN4args *args = &argop->nfs_argop4_u.opopen;
7545 	OPEN4res *resp = &resop->nfs_resop4_u.opopen;
7546 	open_owner4 *owner = &args->owner;
7547 	open_claim_type4 claim = args->claim.claim;
7548 	rfs4_client_t *cp;
7549 	rfs4_openowner_t *oo;
7550 	bool_t create;
7551 	bool_t replay = FALSE;
7552 	int can_reclaim;
7553 
7554 	DTRACE_NFSV4_2(op__open__start, struct compound_state *, cs,
7555 	    OPEN4args *, args);
7556 
7557 	if (cs->vp == NULL) {
7558 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
7559 		goto end;
7560 	}
7561 
7562 	/* rfc5661 section 18.16.3 */
7563 	if (rfs4_has_session(cs))
7564 		owner->clientid = cs->client->rc_clientid;
7565 
7566 	/*
7567 	 * Need to check clientid and lease expiration first based on
7568 	 * error ordering and incrementing sequence id.
7569 	 */
7570 	cp = rfs4_findclient_by_id(owner->clientid, FALSE);
7571 	if (cp == NULL) {
7572 		*cs->statusp = resp->status =
7573 		    rfs4_check_clientid(&owner->clientid, 0);
7574 		goto end;
7575 	}
7576 
7577 	if (rfs4_lease_expired(cp)) {
7578 		rfs4_client_close(cp);
7579 		*cs->statusp = resp->status = NFS4ERR_EXPIRED;
7580 		goto end;
7581 	}
7582 	can_reclaim = cp->rc_can_reclaim;
7583 
7584 	/*
7585 	 * Find the open_owner for use from this point forward.  Take
7586 	 * care in updating the sequence id based on the type of error
7587 	 * being returned.
7588 	 */
7589 retry:
7590 	create = TRUE;
7591 	oo = rfs4_findopenowner(owner, &create, args->seqid);
7592 	if (oo == NULL) {
7593 		*cs->statusp = resp->status = NFS4ERR_RESOURCE;
7594 		rfs4_client_rele(cp);
7595 		goto end;
7596 	}
7597 
7598 	/*
7599 	 * OPEN_CONFIRM must not be implemented in v4.1
7600 	 */
7601 	if (rfs4_has_session(cs)) {
7602 		oo->ro_need_confirm = FALSE;
7603 	}
7604 
7605 	/* Hold off access to the sequence space while the open is done */
7606 	/* Workaround to avoid deadlock */
7607 	if (!rfs4_has_session(cs))
7608 		rfs4_sw_enter(&oo->ro_sw);
7609 
7610 	/*
7611 	 * If the open_owner existed before at the server, then check
7612 	 * the sequence id.
7613 	 */
7614 	if (!create && !oo->ro_postpone_confirm) {
7615 		switch (rfs4_check_open_seqid(args->seqid, oo, resop, cs)) {
7616 		case NFS4_CHKSEQ_BAD:
7617 			ASSERT(!rfs4_has_session(cs));
7618 			if ((args->seqid > oo->ro_open_seqid) &&
7619 			    oo->ro_need_confirm) {
7620 				rfs4_free_opens(oo, TRUE, FALSE);
7621 				rfs4_sw_exit(&oo->ro_sw);
7622 				rfs4_openowner_rele(oo);
7623 				goto retry;
7624 			}
7625 			resp->status = NFS4ERR_BAD_SEQID;
7626 			goto out;
7627 		case NFS4_CHKSEQ_REPLAY: /* replay of previous request */
7628 			replay = TRUE;
7629 			goto out;
7630 		default:
7631 			break;
7632 		}
7633 
7634 		/*
7635 		 * Sequence was ok and open owner exists
7636 		 * check to see if we have yet to see an
7637 		 * open_confirm.
7638 		 */
7639 		if (oo->ro_need_confirm) {
7640 			rfs4_free_opens(oo, TRUE, FALSE);
7641 			ASSERT(!rfs4_has_session(cs));
7642 			rfs4_sw_exit(&oo->ro_sw);
7643 			rfs4_openowner_rele(oo);
7644 			goto retry;
7645 		}
7646 	}
7647 	/* Grace only applies to regular-type OPENs */
7648 	if (rfs4_clnt_in_grace(cp) &&
7649 	    (claim == CLAIM_NULL || claim == CLAIM_DELEGATE_CUR ||
7650 	    claim == CLAIM_FH)) {
7651 		*cs->statusp = resp->status = NFS4ERR_GRACE;
7652 		goto out;
7653 	}
7654 
7655 	/*
7656 	 * If previous state at the server existed then can_reclaim
7657 	 * will be set. If not reply NFS4ERR_NO_GRACE to the
7658 	 * client.
7659 	 */
7660 	if (rfs4_clnt_in_grace(cp) && claim == CLAIM_PREVIOUS && !can_reclaim) {
7661 		*cs->statusp = resp->status = NFS4ERR_NO_GRACE;
7662 		goto out;
7663 	}
7664 
7665 
7666 	/*
7667 	 * Reject the open if the client has missed the grace period
7668 	 */
7669 	if (!rfs4_clnt_in_grace(cp) && claim == CLAIM_PREVIOUS) {
7670 		*cs->statusp = resp->status = NFS4ERR_NO_GRACE;
7671 		goto out;
7672 	}
7673 
7674 	/* Couple of up-front bookkeeping items */
7675 	if (oo->ro_need_confirm) {
7676 		/*
7677 		 * If this is a reclaim OPEN then we should not ask
7678 		 * for a confirmation of the open_owner per the
7679 		 * protocol specification.
7680 		 */
7681 		if (claim == CLAIM_PREVIOUS)
7682 			oo->ro_need_confirm = FALSE;
7683 		else
7684 			resp->rflags |= OPEN4_RESULT_CONFIRM;
7685 	}
7686 	resp->rflags |= OPEN4_RESULT_LOCKTYPE_POSIX;
7687 
7688 	/*
7689 	 * If there is an unshared filesystem mounted on this vnode,
7690 	 * do not allow to open/create in this directory.
7691 	 */
7692 	if (vn_ismntpt(cs->vp)) {
7693 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
7694 		goto out;
7695 	}
7696 
7697 	/*
7698 	 * access must READ, WRITE, or BOTH.  No access is invalid.
7699 	 * deny can be READ, WRITE, BOTH, or NONE.
7700 	 * bits not defined for access/deny are invalid.
7701 	 */
7702 	if (! (args->share_access & OPEN4_SHARE_ACCESS_BOTH) ||
7703 	    (args->share_access & ~OPEN4_SHARE_ACCESS_BOTH) ||
7704 	    (args->share_deny & ~OPEN4_SHARE_DENY_BOTH)) {
7705 		*cs->statusp = resp->status = NFS4ERR_INVAL;
7706 		goto out;
7707 	}
7708 
7709 
7710 	/*
7711 	 * make sure attrset is zero before response is built.
7712 	 */
7713 	resp->attrset = 0;
7714 
7715 	switch (claim) {
7716 	case CLAIM_NULL:
7717 		rfs4_do_opennull(cs, req, args, oo, resp);
7718 		break;
7719 	case CLAIM_PREVIOUS:
7720 		rfs4_do_openprev(cs, req, args, oo, resp);
7721 		break;
7722 	case CLAIM_DELEGATE_CUR:
7723 		rfs4_do_opendelcur(cs, req, args, oo, resp);
7724 		break;
7725 	case CLAIM_DELEGATE_PREV:
7726 		rfs4_do_opendelprev(cs, req, args, oo, resp);
7727 		break;
7728 	case CLAIM_FH:
7729 		rfs4_do_openfh(cs, req, args, oo, resp);
7730 		break;
7731 	default:
7732 		resp->status = NFS4ERR_INVAL;
7733 		break;
7734 	}
7735 
7736 out:
7737 	rfs4_client_rele(cp);
7738 
7739 	/* Catch sequence id handling here to make it a little easier */
7740 	switch (resp->status) {
7741 	case NFS4ERR_BADXDR:
7742 	case NFS4ERR_BAD_SEQID:
7743 	case NFS4ERR_BAD_STATEID:
7744 	case NFS4ERR_NOFILEHANDLE:
7745 	case NFS4ERR_RESOURCE:
7746 	case NFS4ERR_STALE_CLIENTID:
7747 	case NFS4ERR_STALE_STATEID:
7748 		/*
7749 		 * The protocol states that if any of these errors are
7750 		 * being returned, the sequence id should not be
7751 		 * incremented.  Any other return requires an
7752 		 * increment.
7753 		 */
7754 		break;
7755 	default:
7756 		/* Always update the lease in this case */
7757 		rfs4_update_lease(oo->ro_client);
7758 
7759 		/* Regular response - copy the result */
7760 		if (!replay)
7761 			rfs4_update_open_resp(oo, resop, &cs->fh);
7762 
7763 		/*
7764 		 * REPLAY case: Only if the previous response was OK
7765 		 * do we copy the filehandle.  If not OK, no
7766 		 * filehandle to copy.
7767 		 */
7768 		if (replay == TRUE &&
7769 		    resp->status == NFS4_OK &&
7770 		    oo->ro_reply_fh.nfs_fh4_val) {
7771 			/*
7772 			 * If this is a replay, we must restore the
7773 			 * current filehandle/vp to that of what was
7774 			 * returned originally.  Try our best to do
7775 			 * it.
7776 			 */
7777 			nfs_fh4_fmt_t *fh_fmtp =
7778 			    (nfs_fh4_fmt_t *)oo->ro_reply_fh.nfs_fh4_val;
7779 
7780 			cs->exi = checkexport4(&fh_fmtp->fh4_fsid,
7781 			    (fid_t *)&fh_fmtp->fh4_xlen, NULL);
7782 
7783 			if (cs->exi == NULL) {
7784 				resp->status = NFS4ERR_STALE;
7785 				goto finish;
7786 			}
7787 
7788 			VN_RELE(cs->vp);
7789 
7790 			cs->vp = nfs4_fhtovp(&oo->ro_reply_fh, cs->exi,
7791 			    &resp->status);
7792 
7793 			if (cs->vp == NULL)
7794 				goto finish;
7795 
7796 			nfs_fh4_copy(&oo->ro_reply_fh, &cs->fh);
7797 		}
7798 
7799 		/*
7800 		 * If this was a replay, no need to update the
7801 		 * sequence id. If the open_owner was not created on
7802 		 * this pass, then update.  The first use of an
7803 		 * open_owner will not bump the sequence id.
7804 		 */
7805 		if (replay == FALSE && !create)
7806 			rfs4_update_open_sequence(oo);
7807 		/*
7808 		 * If the client is receiving an error and the
7809 		 * open_owner needs to be confirmed, there is no way
7810 		 * to notify the client of this fact ignoring the fact
7811 		 * that the server has no method of returning a
7812 		 * stateid to confirm.  Therefore, the server needs to
7813 		 * mark this open_owner in a way as to avoid the
7814 		 * sequence id checking the next time the client uses
7815 		 * this open_owner.
7816 		 */
7817 		if (resp->status != NFS4_OK && oo->ro_need_confirm)
7818 			oo->ro_postpone_confirm = TRUE;
7819 		/*
7820 		 * If OK response then clear the postpone flag and
7821 		 * reset the sequence id to keep in sync with the
7822 		 * client.
7823 		 */
7824 		if (resp->status == NFS4_OK && oo->ro_postpone_confirm) {
7825 			oo->ro_postpone_confirm = FALSE;
7826 			oo->ro_open_seqid = args->seqid;
7827 		}
7828 		break;
7829 	}
7830 
7831 finish:
7832 	*cs->statusp = resp->status;
7833 
7834 	if (!rfs4_has_session(cs))
7835 		rfs4_sw_exit(&oo->ro_sw);
7836 	rfs4_openowner_rele(oo);
7837 
7838 end:
7839 	DTRACE_NFSV4_2(op__open__done, struct compound_state *, cs,
7840 	    OPEN4res *, resp);
7841 }
7842 
7843 /*ARGSUSED*/
7844 void
7845 rfs4_op_open_confirm(nfs_argop4 *argop, nfs_resop4 *resop,
7846     struct svc_req *req, struct compound_state *cs)
7847 {
7848 	OPEN_CONFIRM4args *args = &argop->nfs_argop4_u.opopen_confirm;
7849 	OPEN_CONFIRM4res *resp = &resop->nfs_resop4_u.opopen_confirm;
7850 	rfs4_state_t *sp;
7851 	nfsstat4 status;
7852 
7853 	DTRACE_NFSV4_2(op__open__confirm__start, struct compound_state *, cs,
7854 	    OPEN_CONFIRM4args *, args);
7855 
7856 	ASSERT(!rfs4_has_session(cs));
7857 
7858 	if (cs->vp == NULL) {
7859 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
7860 		goto out;
7861 	}
7862 
7863 	if (cs->vp->v_type != VREG) {
7864 		*cs->statusp = resp->status =
7865 		    cs->vp->v_type == VDIR ? NFS4ERR_ISDIR : NFS4ERR_INVAL;
7866 		return;
7867 	}
7868 
7869 	status = rfs4_get_state(&args->open_stateid, &sp, RFS4_DBS_VALID);
7870 	if (status != NFS4_OK) {
7871 		*cs->statusp = resp->status = status;
7872 		goto out;
7873 	}
7874 
7875 	/* Ensure specified filehandle matches */
7876 	if (cs->vp != sp->rs_finfo->rf_vp) {
7877 		rfs4_state_rele(sp);
7878 		*cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7879 		goto out;
7880 	}
7881 
7882 	/* hold off other access to open_owner while we tinker */
7883 	rfs4_sw_enter(&sp->rs_owner->ro_sw);
7884 
7885 	switch (rfs4_check_stateid_seqid(sp, &args->open_stateid, cs)) {
7886 	case NFS4_CHECK_STATEID_OKAY:
7887 		if (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
7888 		    resop, cs) != 0) {
7889 			*cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
7890 			break;
7891 		}
7892 		/*
7893 		 * If it is the appropriate stateid and determined to
7894 		 * be "OKAY" then this means that the stateid does not
7895 		 * need to be confirmed and the client is in error for
7896 		 * sending an OPEN_CONFIRM.
7897 		 */
7898 		*cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7899 		break;
7900 	case NFS4_CHECK_STATEID_OLD:
7901 		*cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
7902 		break;
7903 	case NFS4_CHECK_STATEID_BAD:
7904 		*cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7905 		break;
7906 	case NFS4_CHECK_STATEID_EXPIRED:
7907 		*cs->statusp = resp->status = NFS4ERR_EXPIRED;
7908 		break;
7909 	case NFS4_CHECK_STATEID_CLOSED:
7910 		*cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
7911 		break;
7912 	case NFS4_CHECK_STATEID_REPLAY:
7913 		switch (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
7914 		    resop, cs)) {
7915 		case NFS4_CHKSEQ_OKAY:
7916 			/*
7917 			 * This is replayed stateid; if seqid matches
7918 			 * next expected, then client is using wrong seqid.
7919 			 */
7920 			/* fall through */
7921 		case NFS4_CHKSEQ_BAD:
7922 			*cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
7923 			break;
7924 		case NFS4_CHKSEQ_REPLAY:
7925 			/*
7926 			 * Note this case is the duplicate case so
7927 			 * resp->status is already set.
7928 			 */
7929 			*cs->statusp = resp->status;
7930 			rfs4_update_lease(sp->rs_owner->ro_client);
7931 			break;
7932 		}
7933 		break;
7934 	case NFS4_CHECK_STATEID_UNCONFIRMED:
7935 		if (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
7936 		    resop, cs) != NFS4_CHKSEQ_OKAY) {
7937 			*cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
7938 			break;
7939 		}
7940 		*cs->statusp = resp->status = NFS4_OK;
7941 
7942 		next_stateid(&sp->rs_stateid);
7943 		resp->open_stateid = sp->rs_stateid.stateid;
7944 		sp->rs_owner->ro_need_confirm = FALSE;
7945 		rfs4_update_lease(sp->rs_owner->ro_client);
7946 		rfs4_update_open_sequence(sp->rs_owner);
7947 		rfs4_update_open_resp(sp->rs_owner, resop, NULL);
7948 		break;
7949 	default:
7950 		ASSERT(FALSE);
7951 		*cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
7952 		break;
7953 	}
7954 	rfs4_sw_exit(&sp->rs_owner->ro_sw);
7955 	rfs4_state_rele(sp);
7956 
7957 out:
7958 	DTRACE_NFSV4_2(op__open__confirm__done, struct compound_state *, cs,
7959 	    OPEN_CONFIRM4res *, resp);
7960 }
7961 
7962 /*ARGSUSED*/
7963 void
7964 rfs4_op_open_downgrade(nfs_argop4 *argop, nfs_resop4 *resop,
7965     struct svc_req *req, struct compound_state *cs)
7966 {
7967 	OPEN_DOWNGRADE4args *args = &argop->nfs_argop4_u.opopen_downgrade;
7968 	OPEN_DOWNGRADE4res *resp = &resop->nfs_resop4_u.opopen_downgrade;
7969 	uint32_t access = args->share_access;
7970 	uint32_t deny = args->share_deny;
7971 	nfsstat4 status;
7972 	rfs4_state_t *sp;
7973 	rfs4_file_t *fp;
7974 	int fflags = 0;
7975 
7976 	DTRACE_NFSV4_2(op__open__downgrade__start, struct compound_state *, cs,
7977 	    OPEN_DOWNGRADE4args *, args);
7978 
7979 	if (cs->vp == NULL) {
7980 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
7981 		goto out;
7982 	}
7983 
7984 	if (cs->vp->v_type != VREG) {
7985 		*cs->statusp = resp->status = NFS4ERR_INVAL;
7986 		return;
7987 	}
7988 
7989 	status = rfs4_get_state(&args->open_stateid, &sp, RFS4_DBS_VALID);
7990 	if (status != NFS4_OK) {
7991 		*cs->statusp = resp->status = status;
7992 		goto out;
7993 	}
7994 
7995 	/* Ensure specified filehandle matches */
7996 	if (cs->vp != sp->rs_finfo->rf_vp) {
7997 		rfs4_state_rele(sp);
7998 		*cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7999 		goto out;
8000 	}
8001 
8002 	/* hold off other access to open_owner while we tinker */
8003 	rfs4_sw_enter(&sp->rs_owner->ro_sw);
8004 
8005 	switch (rfs4_check_stateid_seqid(sp, &args->open_stateid, cs)) {
8006 	case NFS4_CHECK_STATEID_OKAY:
8007 		if (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
8008 		    resop, cs) != NFS4_CHKSEQ_OKAY) {
8009 			*cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
8010 			goto end;
8011 		}
8012 		break;
8013 	case NFS4_CHECK_STATEID_OLD:
8014 		*cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
8015 		goto end;
8016 	case NFS4_CHECK_STATEID_BAD:
8017 		*cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8018 		goto end;
8019 	case NFS4_CHECK_STATEID_EXPIRED:
8020 		*cs->statusp = resp->status = NFS4ERR_EXPIRED;
8021 		goto end;
8022 	case NFS4_CHECK_STATEID_CLOSED:
8023 		*cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
8024 		goto end;
8025 	case NFS4_CHECK_STATEID_UNCONFIRMED:
8026 		*cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8027 		goto end;
8028 	case NFS4_CHECK_STATEID_REPLAY:
8029 		ASSERT(!rfs4_has_session(cs));
8030 
8031 		/* Check the sequence id for the open owner */
8032 		switch (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
8033 		    resop, cs)) {
8034 		case NFS4_CHKSEQ_OKAY:
8035 			/*
8036 			 * This is replayed stateid; if seqid matches
8037 			 * next expected, then client is using wrong seqid.
8038 			 */
8039 			/* fall through */
8040 		case NFS4_CHKSEQ_BAD:
8041 			*cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
8042 			goto end;
8043 		case NFS4_CHKSEQ_REPLAY:
8044 			/*
8045 			 * Note this case is the duplicate case so
8046 			 * resp->status is already set.
8047 			 */
8048 			*cs->statusp = resp->status;
8049 			rfs4_update_lease(sp->rs_owner->ro_client);
8050 			goto end;
8051 		}
8052 		break;
8053 	default:
8054 		ASSERT(FALSE);
8055 		break;
8056 	}
8057 
8058 	rfs4_dbe_lock(sp->rs_dbe);
8059 	/*
8060 	 * Check that the new access modes and deny modes are valid.
8061 	 * Check that no invalid bits are set.
8062 	 */
8063 	if ((access & ~(OPEN4_SHARE_ACCESS_READ | OPEN4_SHARE_ACCESS_WRITE)) ||
8064 	    (deny & ~(OPEN4_SHARE_DENY_READ | OPEN4_SHARE_DENY_WRITE))) {
8065 		*cs->statusp = resp->status = NFS4ERR_INVAL;
8066 		rfs4_update_open_sequence(sp->rs_owner);
8067 		rfs4_dbe_unlock(sp->rs_dbe);
8068 		goto end;
8069 	}
8070 
8071 	/*
8072 	 * The new modes must be a subset of the current modes and
8073 	 * the access must specify at least one mode. To test that
8074 	 * the new mode is a subset of the current modes we bitwise
8075 	 * AND them together and check that the result equals the new
8076 	 * mode. For example:
8077 	 * New mode, access == R and current mode, sp->rs_open_access  == RW
8078 	 * access & sp->rs_open_access == R == access, so the new access mode
8079 	 * is valid. Consider access == RW, sp->rs_open_access = R
8080 	 * access & sp->rs_open_access == R != access, so the new access mode
8081 	 * is invalid.
8082 	 */
8083 	if ((access & sp->rs_open_access) != access ||
8084 	    (deny & sp->rs_open_deny) != deny ||
8085 	    (access &
8086 	    (OPEN4_SHARE_ACCESS_READ | OPEN4_SHARE_ACCESS_WRITE)) == 0) {
8087 		*cs->statusp = resp->status = NFS4ERR_INVAL;
8088 		rfs4_update_open_sequence(sp->rs_owner);
8089 		rfs4_dbe_unlock(sp->rs_dbe);
8090 		goto end;
8091 	}
8092 
8093 	/*
8094 	 * Release any share locks associated with this stateID.
8095 	 * Strictly speaking, this violates the spec because the
8096 	 * spec effectively requires that open downgrade be atomic.
8097 	 * At present, fs_shrlock does not have this capability.
8098 	 */
8099 	(void) rfs4_unshare(sp);
8100 
8101 	status = rfs4_share(sp, access, deny);
8102 	if (status != NFS4_OK) {
8103 		*cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
8104 		rfs4_update_open_sequence(sp->rs_owner);
8105 		rfs4_dbe_unlock(sp->rs_dbe);
8106 		goto end;
8107 	}
8108 
8109 	fp = sp->rs_finfo;
8110 	rfs4_dbe_lock(fp->rf_dbe);
8111 
8112 	/*
8113 	 * If the current mode has deny read and the new mode
8114 	 * does not, decrement the number of deny read mode bits
8115 	 * and if it goes to zero turn off the deny read bit
8116 	 * on the file.
8117 	 */
8118 	if ((sp->rs_open_deny & OPEN4_SHARE_DENY_READ) &&
8119 	    (deny & OPEN4_SHARE_DENY_READ) == 0) {
8120 		fp->rf_deny_read--;
8121 		if (fp->rf_deny_read == 0)
8122 			fp->rf_share_deny &= ~OPEN4_SHARE_DENY_READ;
8123 	}
8124 
8125 	/*
8126 	 * If the current mode has deny write and the new mode
8127 	 * does not, decrement the number of deny write mode bits
8128 	 * and if it goes to zero turn off the deny write bit
8129 	 * on the file.
8130 	 */
8131 	if ((sp->rs_open_deny & OPEN4_SHARE_DENY_WRITE) &&
8132 	    (deny & OPEN4_SHARE_DENY_WRITE) == 0) {
8133 		fp->rf_deny_write--;
8134 		if (fp->rf_deny_write == 0)
8135 			fp->rf_share_deny &= ~OPEN4_SHARE_DENY_WRITE;
8136 	}
8137 
8138 	/*
8139 	 * If the current mode has access read and the new mode
8140 	 * does not, decrement the number of access read mode bits
8141 	 * and if it goes to zero turn off the access read bit
8142 	 * on the file.  set fflags to FREAD for the call to
8143 	 * vn_open_downgrade().
8144 	 */
8145 	if ((sp->rs_open_access & OPEN4_SHARE_ACCESS_READ) &&
8146 	    (access & OPEN4_SHARE_ACCESS_READ) == 0) {
8147 		fp->rf_access_read--;
8148 		if (fp->rf_access_read == 0)
8149 			fp->rf_share_access &= ~OPEN4_SHARE_ACCESS_READ;
8150 		fflags |= FREAD;
8151 	}
8152 
8153 	/*
8154 	 * If the current mode has access write and the new mode
8155 	 * does not, decrement the number of access write mode bits
8156 	 * and if it goes to zero turn off the access write bit
8157 	 * on the file.  set fflags to FWRITE for the call to
8158 	 * vn_open_downgrade().
8159 	 */
8160 	if ((sp->rs_open_access & OPEN4_SHARE_ACCESS_WRITE) &&
8161 	    (access & OPEN4_SHARE_ACCESS_WRITE) == 0) {
8162 		fp->rf_access_write--;
8163 		if (fp->rf_access_write == 0)
8164 			fp->rf_share_deny &= ~OPEN4_SHARE_ACCESS_WRITE;
8165 		fflags |= FWRITE;
8166 	}
8167 
8168 	/* Check that the file is still accessible */
8169 	ASSERT(fp->rf_share_access);
8170 
8171 	rfs4_dbe_unlock(fp->rf_dbe);
8172 
8173 	/* now set the new open access and deny modes */
8174 	sp->rs_open_access = access;
8175 	sp->rs_open_deny = deny;
8176 
8177 	/*
8178 	 * we successfully downgraded the share lock, now we need to downgrade
8179 	 * the open. it is possible that the downgrade was only for a deny
8180 	 * mode and we have nothing else to do.
8181 	 */
8182 	if ((fflags & (FREAD|FWRITE)) != 0)
8183 		vn_open_downgrade(cs->vp, fflags);
8184 
8185 	/* Update the stateid */
8186 	next_stateid(&sp->rs_stateid);
8187 	resp->open_stateid = sp->rs_stateid.stateid;
8188 
8189 	rfs4_dbe_unlock(sp->rs_dbe);
8190 
8191 	*cs->statusp = resp->status = NFS4_OK;
8192 	/* Update the lease */
8193 	rfs4_update_lease(sp->rs_owner->ro_client);
8194 	/* And the sequence */
8195 	rfs4_update_open_sequence(sp->rs_owner);
8196 	rfs4_update_open_resp(sp->rs_owner, resop, NULL);
8197 
8198 end:
8199 	rfs4_sw_exit(&sp->rs_owner->ro_sw);
8200 	rfs4_state_rele(sp);
8201 out:
8202 	DTRACE_NFSV4_2(op__open__downgrade__done, struct compound_state *, cs,
8203 	    OPEN_DOWNGRADE4res *, resp);
8204 }
8205 
8206 static void *
8207 memstr(const void *s1, const char *s2, size_t n)
8208 {
8209 	size_t l = strlen(s2);
8210 	char *p = (char *)s1;
8211 
8212 	while (n >= l) {
8213 		if (bcmp(p, s2, l) == 0)
8214 			return (p);
8215 		p++;
8216 		n--;
8217 	}
8218 
8219 	return (NULL);
8220 }
8221 
8222 /*
8223  * The logic behind this function is detailed in the NFSv4 RFC in the
8224  * SETCLIENTID operation description under IMPLEMENTATION.  Refer to
8225  * that section for explicit guidance to server behavior for
8226  * SETCLIENTID.
8227  */
8228 void
8229 rfs4_op_setclientid(nfs_argop4 *argop, nfs_resop4 *resop,
8230     struct svc_req *req, struct compound_state *cs)
8231 {
8232 	SETCLIENTID4args *args = &argop->nfs_argop4_u.opsetclientid;
8233 	SETCLIENTID4res *res = &resop->nfs_resop4_u.opsetclientid;
8234 	rfs4_client_t *cp, *newcp, *cp_confirmed, *cp_unconfirmed;
8235 	rfs4_clntip_t *ci;
8236 	bool_t create;
8237 	char *addr, *netid;
8238 	int len;
8239 
8240 	DTRACE_NFSV4_2(op__setclientid__start, struct compound_state *, cs,
8241 	    SETCLIENTID4args *, args);
8242 retry:
8243 	newcp = cp_confirmed = cp_unconfirmed = NULL;
8244 
8245 	/*
8246 	 * Save the caller's IP address
8247 	 */
8248 	args->client.cl_addr =
8249 	    (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
8250 
8251 	/*
8252 	 * Record if it is a Solaris client that cannot handle referrals.
8253 	 */
8254 	if (memstr(args->client.id_val, "Solaris", args->client.id_len) &&
8255 	    !memstr(args->client.id_val, "+referrals", args->client.id_len)) {
8256 		/* Add a "yes, it's downrev" record */
8257 		create = TRUE;
8258 		ci = rfs4_find_clntip(args->client.cl_addr, &create);
8259 		ASSERT(ci != NULL);
8260 		rfs4_dbe_rele(ci->ri_dbe);
8261 	} else {
8262 		/* Remove any previous record */
8263 		rfs4_invalidate_clntip(args->client.cl_addr);
8264 	}
8265 
8266 	/*
8267 	 * In search of an EXISTING client matching the incoming
8268 	 * request to establish a new client identifier at the server
8269 	 */
8270 	create = TRUE;
8271 	cp = rfs4_findclient(&args->client, &create, NULL);
8272 
8273 	/* Should never happen */
8274 	ASSERT(cp != NULL);
8275 
8276 	if (cp == NULL) {
8277 		*cs->statusp = res->status = NFS4ERR_SERVERFAULT;
8278 		goto out;
8279 	}
8280 
8281 	/*
8282 	 * Easiest case. Client identifier is newly created and is
8283 	 * unconfirmed.  Also note that for this case, no other
8284 	 * entries exist for the client identifier.  Nothing else to
8285 	 * check.  Just setup the response and respond.
8286 	 */
8287 	if (create) {
8288 		*cs->statusp = res->status = NFS4_OK;
8289 		res->SETCLIENTID4res_u.resok4.clientid = cp->rc_clientid;
8290 		res->SETCLIENTID4res_u.resok4.setclientid_confirm =
8291 		    cp->rc_confirm_verf;
8292 		/* Setup callback information; CB_NULL confirmation later */
8293 		rfs4_client_setcb(cp, &args->callback, args->callback_ident);
8294 
8295 		rfs4_client_rele(cp);
8296 		goto out;
8297 	}
8298 
8299 	/*
8300 	 * An existing, confirmed client may exist but it may not have
8301 	 * been active for at least one lease period.  If so, then
8302 	 * "close" the client and create a new client identifier
8303 	 */
8304 	if (rfs4_lease_expired(cp)) {
8305 		rfs4_client_close(cp);
8306 		goto retry;
8307 	}
8308 
8309 	if (cp->rc_need_confirm == TRUE)
8310 		cp_unconfirmed = cp;
8311 	else
8312 		cp_confirmed = cp;
8313 
8314 	cp = NULL;
8315 
8316 	/*
8317 	 * We have a confirmed client, now check for an
8318 	 * unconfimred entry
8319 	 */
8320 	if (cp_confirmed) {
8321 		/* If creds don't match then client identifier is inuse */
8322 		if (!creds_ok(&cp_confirmed->rc_cr_set, req, cs)) {
8323 			rfs4_cbinfo_t *cbp;
8324 			/*
8325 			 * Some one else has established this client
8326 			 * id. Try and say * who they are. We will use
8327 			 * the call back address supplied by * the
8328 			 * first client.
8329 			 */
8330 			*cs->statusp = res->status = NFS4ERR_CLID_INUSE;
8331 
8332 			addr = netid = NULL;
8333 
8334 			cbp = &cp_confirmed->rc_cbinfo;
8335 			if (cbp->cb_callback.cb_location.r_addr &&
8336 			    cbp->cb_callback.cb_location.r_netid) {
8337 				cb_client4 *cbcp = &cbp->cb_callback;
8338 
8339 				len = strlen(cbcp->cb_location.r_addr)+1;
8340 				addr = kmem_alloc(len, KM_SLEEP);
8341 				bcopy(cbcp->cb_location.r_addr, addr, len);
8342 				len = strlen(cbcp->cb_location.r_netid)+1;
8343 				netid = kmem_alloc(len, KM_SLEEP);
8344 				bcopy(cbcp->cb_location.r_netid, netid, len);
8345 			}
8346 
8347 			res->SETCLIENTID4res_u.client_using.r_addr = addr;
8348 			res->SETCLIENTID4res_u.client_using.r_netid = netid;
8349 
8350 			rfs4_client_rele(cp_confirmed);
8351 		}
8352 
8353 		/*
8354 		 * Confirmed, creds match, and verifier matches; must
8355 		 * be an update of the callback info
8356 		 */
8357 		if (cp_confirmed->rc_nfs_client.verifier ==
8358 		    args->client.verifier) {
8359 			/* Setup callback information */
8360 			rfs4_client_setcb(cp_confirmed, &args->callback,
8361 			    args->callback_ident);
8362 
8363 			/* everything okay -- move ahead */
8364 			*cs->statusp = res->status = NFS4_OK;
8365 			res->SETCLIENTID4res_u.resok4.clientid =
8366 			    cp_confirmed->rc_clientid;
8367 
8368 			/* update the confirm_verifier and return it */
8369 			rfs4_client_scv_next(cp_confirmed);
8370 			res->SETCLIENTID4res_u.resok4.setclientid_confirm =
8371 			    cp_confirmed->rc_confirm_verf;
8372 
8373 			rfs4_client_rele(cp_confirmed);
8374 			goto out;
8375 		}
8376 
8377 		/*
8378 		 * Creds match but the verifier doesn't.  Must search
8379 		 * for an unconfirmed client that would be replaced by
8380 		 * this request.
8381 		 */
8382 		create = FALSE;
8383 		cp_unconfirmed = rfs4_findclient(&args->client, &create,
8384 		    cp_confirmed);
8385 	}
8386 
8387 	/*
8388 	 * At this point, we have taken care of the brand new client
8389 	 * struct, INUSE case, update of an existing, and confirmed
8390 	 * client struct.
8391 	 */
8392 
8393 	/*
8394 	 * check to see if things have changed while we originally
8395 	 * picked up the client struct.  If they have, then return and
8396 	 * retry the processing of this SETCLIENTID request.
8397 	 */
8398 	if (cp_unconfirmed) {
8399 		rfs4_dbe_lock(cp_unconfirmed->rc_dbe);
8400 		if (!cp_unconfirmed->rc_need_confirm) {
8401 			rfs4_dbe_unlock(cp_unconfirmed->rc_dbe);
8402 			rfs4_client_rele(cp_unconfirmed);
8403 			if (cp_confirmed)
8404 				rfs4_client_rele(cp_confirmed);
8405 			goto retry;
8406 		}
8407 		/* do away with the old unconfirmed one */
8408 		rfs4_dbe_invalidate(cp_unconfirmed->rc_dbe);
8409 		rfs4_dbe_unlock(cp_unconfirmed->rc_dbe);
8410 		rfs4_client_rele(cp_unconfirmed);
8411 		cp_unconfirmed = NULL;
8412 	}
8413 
8414 	/*
8415 	 * This search will temporarily hide the confirmed client
8416 	 * struct while a new client struct is created as the
8417 	 * unconfirmed one.
8418 	 */
8419 	create = TRUE;
8420 	newcp = rfs4_findclient(&args->client, &create, cp_confirmed);
8421 
8422 	ASSERT(newcp != NULL);
8423 
8424 	if (newcp == NULL) {
8425 		*cs->statusp = res->status = NFS4ERR_SERVERFAULT;
8426 		rfs4_client_rele(cp_confirmed);
8427 		goto out;
8428 	}
8429 
8430 	/*
8431 	 * If one was not created, then a similar request must be in
8432 	 * process so release and start over with this one
8433 	 */
8434 	if (create != TRUE) {
8435 		rfs4_client_rele(newcp);
8436 		if (cp_confirmed)
8437 			rfs4_client_rele(cp_confirmed);
8438 		goto retry;
8439 	}
8440 
8441 	*cs->statusp = res->status = NFS4_OK;
8442 	res->SETCLIENTID4res_u.resok4.clientid = newcp->rc_clientid;
8443 	res->SETCLIENTID4res_u.resok4.setclientid_confirm =
8444 	    newcp->rc_confirm_verf;
8445 	/* Setup callback information; CB_NULL confirmation later */
8446 	rfs4_client_setcb(newcp, &args->callback, args->callback_ident);
8447 
8448 	newcp->rc_cp_confirmed = cp_confirmed;
8449 
8450 	rfs4_client_rele(newcp);
8451 
8452 out:
8453 	DTRACE_NFSV4_2(op__setclientid__done, struct compound_state *, cs,
8454 	    SETCLIENTID4res *, res);
8455 }
8456 
8457 /*ARGSUSED*/
8458 void
8459 rfs4_op_setclientid_confirm(nfs_argop4 *argop, nfs_resop4 *resop,
8460     struct svc_req *req, struct compound_state *cs)
8461 {
8462 	SETCLIENTID_CONFIRM4args *args =
8463 	    &argop->nfs_argop4_u.opsetclientid_confirm;
8464 	SETCLIENTID_CONFIRM4res *res =
8465 	    &resop->nfs_resop4_u.opsetclientid_confirm;
8466 	rfs4_client_t *cp, *cptoclose = NULL;
8467 	nfs4_srv_t *nsrv4;
8468 
8469 	DTRACE_NFSV4_2(op__setclientid__confirm__start,
8470 	    struct compound_state *, cs,
8471 	    SETCLIENTID_CONFIRM4args *, args);
8472 
8473 	nsrv4 = nfs4_get_srv();
8474 	*cs->statusp = res->status = NFS4_OK;
8475 
8476 	cp = rfs4_findclient_by_id(args->clientid, TRUE);
8477 
8478 	if (cp == NULL) {
8479 		*cs->statusp = res->status =
8480 		    rfs4_check_clientid(&args->clientid, 1);
8481 		goto out;
8482 	}
8483 
8484 	if (!creds_ok(&cp->rc_cr_set, req, cs)) {
8485 		*cs->statusp = res->status = NFS4ERR_CLID_INUSE;
8486 		rfs4_client_rele(cp);
8487 		goto out;
8488 	}
8489 
8490 	/* If the verifier doesn't match, the record doesn't match */
8491 	if (cp->rc_confirm_verf != args->setclientid_confirm) {
8492 		*cs->statusp = res->status = NFS4ERR_STALE_CLIENTID;
8493 		rfs4_client_rele(cp);
8494 		goto out;
8495 	}
8496 
8497 	rfs4_dbe_lock(cp->rc_dbe);
8498 	cp->rc_need_confirm = FALSE;
8499 	if (cp->rc_cp_confirmed) {
8500 		cptoclose = cp->rc_cp_confirmed;
8501 		cptoclose->rc_ss_remove = 1;
8502 		cp->rc_cp_confirmed = NULL;
8503 	}
8504 
8505 	/*
8506 	 * Update the client's associated server instance, if it's changed
8507 	 * since the client was created.
8508 	 */
8509 	if (rfs4_servinst(cp) != nsrv4->nfs4_cur_servinst)
8510 		rfs4_servinst_assign(nsrv4, cp, nsrv4->nfs4_cur_servinst);
8511 
8512 	/*
8513 	 * Record clientid in stable storage.
8514 	 * Must be done after server instance has been assigned.
8515 	 */
8516 	rfs4_ss_clid(nsrv4, cp);
8517 
8518 	rfs4_dbe_unlock(cp->rc_dbe);
8519 
8520 	if (cptoclose)
8521 		/* don't need to rele, client_close does it */
8522 		rfs4_client_close(cptoclose);
8523 
8524 	/* If needed, initiate CB_NULL call for callback path */
8525 	rfs4_deleg_cb_check(cp);
8526 	rfs4_update_lease(cp);
8527 
8528 	/*
8529 	 * Check to see if client can perform reclaims
8530 	 */
8531 	rfs4_ss_chkclid(nsrv4, cp);
8532 
8533 	rfs4_client_rele(cp);
8534 
8535 out:
8536 	DTRACE_NFSV4_2(op__setclientid__confirm__done,
8537 	    struct compound_state *, cs,
8538 	    SETCLIENTID_CONFIRM4 *, res);
8539 }
8540 
8541 
8542 /*ARGSUSED*/
8543 void
8544 rfs4_op_close(nfs_argop4 *argop, nfs_resop4 *resop,
8545     struct svc_req *req, struct compound_state *cs)
8546 {
8547 	CLOSE4args *args = &argop->nfs_argop4_u.opclose;
8548 	CLOSE4res *resp = &resop->nfs_resop4_u.opclose;
8549 	rfs4_state_t *sp;
8550 	nfsstat4 status;
8551 
8552 	DTRACE_NFSV4_2(op__close__start, struct compound_state *, cs,
8553 	    CLOSE4args *, args);
8554 
8555 	if (cs->vp == NULL) {
8556 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
8557 		goto out;
8558 	}
8559 
8560 	status = rfs4_get_state(&args->open_stateid, &sp, RFS4_DBS_INVALID);
8561 	if (status != NFS4_OK) {
8562 		*cs->statusp = resp->status = status;
8563 		goto out;
8564 	}
8565 
8566 	/* Ensure specified filehandle matches */
8567 	if (cs->vp != sp->rs_finfo->rf_vp) {
8568 		rfs4_state_rele(sp);
8569 		*cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8570 		goto out;
8571 	}
8572 
8573 	/* hold off other access to open_owner while we tinker */
8574 	rfs4_sw_enter(&sp->rs_owner->ro_sw);
8575 
8576 	switch (rfs4_check_stateid_seqid(sp, &args->open_stateid, cs)) {
8577 	case NFS4_CHECK_STATEID_OKAY:
8578 		if (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
8579 		    resop, cs) != NFS4_CHKSEQ_OKAY) {
8580 			*cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
8581 			goto end;
8582 		}
8583 		break;
8584 	case NFS4_CHECK_STATEID_OLD:
8585 		*cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
8586 		goto end;
8587 	case NFS4_CHECK_STATEID_BAD:
8588 		*cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8589 		goto end;
8590 	case NFS4_CHECK_STATEID_EXPIRED:
8591 		*cs->statusp = resp->status = NFS4ERR_EXPIRED;
8592 		goto end;
8593 	case NFS4_CHECK_STATEID_CLOSED:
8594 		*cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
8595 		goto end;
8596 	case NFS4_CHECK_STATEID_UNCONFIRMED:
8597 		*cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8598 		goto end;
8599 	case NFS4_CHECK_STATEID_REPLAY:
8600 		ASSERT(!rfs4_has_session(cs));
8601 
8602 		/* Check the sequence id for the open owner */
8603 		switch (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
8604 		    resop, cs)) {
8605 		case NFS4_CHKSEQ_OKAY:
8606 			/*
8607 			 * This is replayed stateid; if seqid matches
8608 			 * next expected, then client is using wrong seqid.
8609 			 */
8610 			/* FALL THROUGH */
8611 		case NFS4_CHKSEQ_BAD:
8612 			*cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
8613 			goto end;
8614 		case NFS4_CHKSEQ_REPLAY:
8615 			/*
8616 			 * Note this case is the duplicate case so
8617 			 * resp->status is already set.
8618 			 */
8619 			*cs->statusp = resp->status;
8620 			rfs4_update_lease(sp->rs_owner->ro_client);
8621 			goto end;
8622 		}
8623 		break;
8624 	default:
8625 		ASSERT(FALSE);
8626 		break;
8627 	}
8628 
8629 	rfs4_dbe_lock(sp->rs_dbe);
8630 
8631 	/* Update the stateid. */
8632 	next_stateid(&sp->rs_stateid);
8633 	resp->open_stateid = sp->rs_stateid.stateid;
8634 
8635 	rfs4_dbe_unlock(sp->rs_dbe);
8636 
8637 	rfs4_update_lease(sp->rs_owner->ro_client);
8638 	rfs4_update_open_sequence(sp->rs_owner);
8639 	rfs4_update_open_resp(sp->rs_owner, resop, NULL);
8640 
8641 	rfs4_state_close(sp, FALSE, FALSE, cs->cr);
8642 
8643 	*cs->statusp = resp->status = status;
8644 
8645 end:
8646 	rfs4_sw_exit(&sp->rs_owner->ro_sw);
8647 	rfs4_state_rele(sp);
8648 out:
8649 	DTRACE_NFSV4_2(op__close__done, struct compound_state *, cs,
8650 	    CLOSE4res *, resp);
8651 }
8652 
8653 /*
8654  * Manage the counts on the file struct and close all file locks
8655  */
8656 /*ARGSUSED*/
8657 void
8658 rfs4_release_share_lock_state(rfs4_state_t *sp, cred_t *cr,
8659     bool_t close_of_client)
8660 {
8661 	rfs4_file_t *fp = sp->rs_finfo;
8662 	rfs4_lo_state_t *lsp;
8663 	int fflags = 0;
8664 
8665 	/*
8666 	 * If this call is part of the larger closing down of client
8667 	 * state then it is just easier to release all locks
8668 	 * associated with this client instead of going through each
8669 	 * individual file and cleaning locks there.
8670 	 */
8671 	if (close_of_client) {
8672 		if (sp->rs_owner->ro_client->rc_unlksys_completed == FALSE &&
8673 		    !list_is_empty(&sp->rs_lostatelist) &&
8674 		    sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID) {
8675 			/* Is the PxFS kernel module loaded? */
8676 			if (lm_remove_file_locks != NULL) {
8677 				int new_sysid;
8678 
8679 				/* Encode the cluster nodeid in new sysid */
8680 				new_sysid = sp->rs_owner->ro_client->rc_sysidt;
8681 				lm_set_nlmid_flk(&new_sysid);
8682 
8683 				/*
8684 				 * This PxFS routine removes file locks for a
8685 				 * client over all nodes of a cluster.
8686 				 */
8687 				NFS4_DEBUG(rfs4_debug, (CE_NOTE,
8688 				    "lm_remove_file_locks(sysid=0x%x)\n",
8689 				    new_sysid));
8690 				(*lm_remove_file_locks)(new_sysid);
8691 			} else {
8692 				struct flock64 flk;
8693 
8694 				/* Release all locks for this client */
8695 				flk.l_type = F_UNLKSYS;
8696 				flk.l_whence = 0;
8697 				flk.l_start = 0;
8698 				flk.l_len = 0;
8699 				flk.l_sysid =
8700 				    sp->rs_owner->ro_client->rc_sysidt;
8701 				flk.l_pid = 0;
8702 				(void) VOP_FRLOCK(sp->rs_finfo->rf_vp, F_SETLK,
8703 				    &flk, F_REMOTELOCK | FREAD | FWRITE,
8704 				    (u_offset_t)0, NULL, CRED(), NULL);
8705 			}
8706 
8707 			sp->rs_owner->ro_client->rc_unlksys_completed = TRUE;
8708 		}
8709 	}
8710 
8711 	/*
8712 	 * Release all locks on this file by this lock owner or at
8713 	 * least mark the locks as having been released
8714 	 */
8715 	for (lsp = list_head(&sp->rs_lostatelist); lsp != NULL;
8716 	    lsp = list_next(&sp->rs_lostatelist, lsp)) {
8717 		lsp->rls_locks_cleaned = TRUE;
8718 
8719 		/* Was this already taken care of above? */
8720 		if (!close_of_client &&
8721 		    sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID)
8722 			(void) cleanlocks(sp->rs_finfo->rf_vp,
8723 			    lsp->rls_locker->rl_pid,
8724 			    lsp->rls_locker->rl_client->rc_sysidt);
8725 	}
8726 
8727 	/*
8728 	 * Release any shrlocks associated with this open state ID.
8729 	 * This must be done before the rfs4_state gets marked closed.
8730 	 */
8731 	if (sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID)
8732 		(void) rfs4_unshare(sp);
8733 
8734 	if (sp->rs_open_access) {
8735 		rfs4_dbe_lock(fp->rf_dbe);
8736 
8737 		/*
8738 		 * Decrement the count for each access and deny bit that this
8739 		 * state has contributed to the file.
8740 		 * If the file counts go to zero
8741 		 * clear the appropriate bit in the appropriate mask.
8742 		 */
8743 		if (sp->rs_open_access & OPEN4_SHARE_ACCESS_READ) {
8744 			fp->rf_access_read--;
8745 			fflags |= FREAD;
8746 			if (fp->rf_access_read == 0)
8747 				fp->rf_share_access &= ~OPEN4_SHARE_ACCESS_READ;
8748 		}
8749 		if (sp->rs_open_access & OPEN4_SHARE_ACCESS_WRITE) {
8750 			fp->rf_access_write--;
8751 			fflags |= FWRITE;
8752 			if (fp->rf_access_write == 0)
8753 				fp->rf_share_access &=
8754 				    ~OPEN4_SHARE_ACCESS_WRITE;
8755 		}
8756 		if (sp->rs_open_deny & OPEN4_SHARE_DENY_READ) {
8757 			fp->rf_deny_read--;
8758 			if (fp->rf_deny_read == 0)
8759 				fp->rf_share_deny &= ~OPEN4_SHARE_DENY_READ;
8760 		}
8761 		if (sp->rs_open_deny & OPEN4_SHARE_DENY_WRITE) {
8762 			fp->rf_deny_write--;
8763 			if (fp->rf_deny_write == 0)
8764 				fp->rf_share_deny &= ~OPEN4_SHARE_DENY_WRITE;
8765 		}
8766 
8767 		(void) VOP_CLOSE(fp->rf_vp, fflags, 1, (offset_t)0, cr, NULL);
8768 
8769 		rfs4_dbe_unlock(fp->rf_dbe);
8770 
8771 		sp->rs_open_access = 0;
8772 		sp->rs_open_deny = 0;
8773 	}
8774 }
8775 
8776 /*
8777  * lock_denied: Fill in a LOCK4deneid structure given an flock64 structure.
8778  */
8779 static nfsstat4
8780 lock_denied(LOCK4denied *dp, struct flock64 *flk)
8781 {
8782 	rfs4_lockowner_t *lo;
8783 	rfs4_client_t *cp;
8784 	uint32_t len;
8785 
8786 	lo = rfs4_findlockowner_by_pid(flk->l_pid);
8787 	if (lo != NULL) {
8788 		cp = lo->rl_client;
8789 		if (rfs4_lease_expired(cp)) {
8790 			rfs4_lockowner_rele(lo);
8791 			rfs4_dbe_hold(cp->rc_dbe);
8792 			rfs4_client_close(cp);
8793 			return (NFS4ERR_EXPIRED);
8794 		}
8795 		dp->owner.clientid = lo->rl_owner.clientid;
8796 		len = lo->rl_owner.owner_len;
8797 		dp->owner.owner_val = kmem_alloc(len, KM_SLEEP);
8798 		bcopy(lo->rl_owner.owner_val, dp->owner.owner_val, len);
8799 		dp->owner.owner_len = len;
8800 		rfs4_lockowner_rele(lo);
8801 		goto finish;
8802 	}
8803 
8804 	/*
8805 	 * Its not a NFS4 lock. We take advantage that the upper 32 bits
8806 	 * of the client id contain the boot time for a NFS4 lock. So we
8807 	 * fabricate and identity by setting clientid to the sysid, and
8808 	 * the lock owner to the pid.
8809 	 */
8810 	dp->owner.clientid = flk->l_sysid;
8811 	len = sizeof (pid_t);
8812 	dp->owner.owner_len = len;
8813 	dp->owner.owner_val = kmem_alloc(len, KM_SLEEP);
8814 	bcopy(&flk->l_pid, dp->owner.owner_val, len);
8815 finish:
8816 	dp->offset = flk->l_start;
8817 	dp->length = flk->l_len;
8818 
8819 	if (flk->l_type == F_RDLCK)
8820 		dp->locktype = READ_LT;
8821 	else if (flk->l_type == F_WRLCK)
8822 		dp->locktype = WRITE_LT;
8823 	else
8824 		return (NFS4ERR_INVAL);	/* no mapping from POSIX ltype to v4 */
8825 
8826 	return (NFS4_OK);
8827 }
8828 
8829 /*
8830  * The NFSv4.0 LOCK operation does not support the blocking lock (at the
8831  * NFSv4.0 protocol level) so the client needs to resend the LOCK request in a
8832  * case the lock is denied by the NFSv4.0 server.  NFSv4.0 clients are prepared
8833  * for that (obviously); they are sending the LOCK requests with some delays
8834  * between the attempts.  See nfs4frlock() and nfs4_block_and_wait() for the
8835  * locking and delay implementation at the client side.
8836  *
8837  * To make the life of the clients easier, the NFSv4.0 server tries to do some
8838  * fast retries on its own (the for loop below) in a hope the lock will be
8839  * available soon.  And if not, the client won't need to resend the LOCK
8840  * requests so fast to check the lock availability.  This basically saves some
8841  * network traffic and tries to make sure the client gets the lock ASAP.
8842  */
8843 static int
8844 setlock(vnode_t *vp, struct flock64 *flock, int flag, cred_t *cred)
8845 {
8846 	int error;
8847 	struct flock64 flk;
8848 	int i;
8849 	clock_t delaytime;
8850 	int cmd;
8851 	int spin_cnt = 0;
8852 
8853 	cmd = nbl_need_check(vp) ? F_SETLK_NBMAND : F_SETLK;
8854 retry:
8855 	delaytime = MSEC_TO_TICK_ROUNDUP(rfs4_lock_delay);
8856 
8857 	for (i = 0; i < rfs4_maxlock_tries; i++) {
8858 		LOCK_PRINT(rfs4_debug, "setlock", cmd, flock);
8859 		error = VOP_FRLOCK(vp, cmd,
8860 		    flock, flag, (u_offset_t)0, NULL, cred, NULL);
8861 
8862 		if (error != EAGAIN && error != EACCES)
8863 			break;
8864 
8865 		if (i < rfs4_maxlock_tries - 1) {
8866 			delay(delaytime);
8867 			delaytime *= 2;
8868 		}
8869 	}
8870 
8871 	if (error == EAGAIN || error == EACCES) {
8872 		/* Get the owner of the lock */
8873 		flk = *flock;
8874 		LOCK_PRINT(rfs4_debug, "setlock", F_GETLK, &flk);
8875 		if (VOP_FRLOCK(vp, F_GETLK, &flk, flag, 0, NULL, cred,
8876 		    NULL) == 0) {
8877 			/*
8878 			 * There's a race inherent in the current VOP_FRLOCK
8879 			 * design where:
8880 			 * a: "other guy" takes a lock that conflicts with a
8881 			 * lock we want
8882 			 * b: we attempt to take our lock (non-blocking) and
8883 			 * the attempt fails.
8884 			 * c: "other guy" releases the conflicting lock
8885 			 * d: we ask what lock conflicts with the lock we want,
8886 			 * getting F_UNLCK (no lock blocks us)
8887 			 *
8888 			 * If we retry the non-blocking lock attempt in this
8889 			 * case (restart at step 'b') there's some possibility
8890 			 * that many such attempts might fail.  However a test
8891 			 * designed to actually provoke this race shows that
8892 			 * the vast majority of cases require no retry, and
8893 			 * only a few took as many as three retries.  Here's
8894 			 * the test outcome:
8895 			 *
8896 			 *	   number of retries    how many times we needed
8897 			 *				that many retries
8898 			 *	   0			79461
8899 			 *	   1			  862
8900 			 *	   2			   49
8901 			 *	   3			    5
8902 			 *
8903 			 * Given those empirical results, we arbitrarily limit
8904 			 * the retry count to ten.
8905 			 *
8906 			 * If we actually make to ten retries and give up,
8907 			 * nothing catastrophic happens, but we're unable to
8908 			 * return the information about the conflicting lock to
8909 			 * the NFS client.  That's an acceptable trade off vs.
8910 			 * letting this retry loop run forever.
8911 			 */
8912 			if (flk.l_type == F_UNLCK) {
8913 				if (spin_cnt++ < 10) {
8914 					/* No longer locked, retry */
8915 					goto retry;
8916 				}
8917 			} else {
8918 				*flock = flk;
8919 				LOCK_PRINT(rfs4_debug, "setlock(blocking lock)",
8920 				    F_GETLK, &flk);
8921 			}
8922 		}
8923 	}
8924 
8925 	return (error);
8926 }
8927 
8928 /*ARGSUSED*/
8929 static nfsstat4
8930 rfs4_do_lock(rfs4_lo_state_t *lsp, nfs_lock_type4 locktype,
8931     offset4 offset, length4 length, cred_t *cred, nfs_resop4 *resop)
8932 {
8933 	nfsstat4 status;
8934 	rfs4_lockowner_t *lo = lsp->rls_locker;
8935 	rfs4_state_t *sp = lsp->rls_state;
8936 	struct flock64 flock;
8937 	int16_t ltype;
8938 	int flag;
8939 	int error;
8940 	sysid_t sysid;
8941 	LOCK4res *lres;
8942 	vnode_t *vp;
8943 
8944 	if (rfs4_lease_expired(lo->rl_client)) {
8945 		return (NFS4ERR_EXPIRED);
8946 	}
8947 
8948 	if ((status = rfs4_client_sysid(lo->rl_client, &sysid)) != NFS4_OK)
8949 		return (status);
8950 
8951 	/* Check for zero length. To lock to end of file use all ones for V4 */
8952 	if (length == 0)
8953 		return (NFS4ERR_INVAL);
8954 	else if (length == (length4)(~0))
8955 		length = 0;		/* Posix to end of file  */
8956 
8957 retry:
8958 	rfs4_dbe_lock(sp->rs_dbe);
8959 	if (sp->rs_closed == TRUE) {
8960 		rfs4_dbe_unlock(sp->rs_dbe);
8961 		return (NFS4ERR_OLD_STATEID);
8962 	}
8963 
8964 	if (resop->resop != OP_LOCKU) {
8965 		switch (locktype) {
8966 		case READ_LT:
8967 		case READW_LT:
8968 			if ((sp->rs_share_access
8969 			    & OPEN4_SHARE_ACCESS_READ) == 0) {
8970 				rfs4_dbe_unlock(sp->rs_dbe);
8971 
8972 				return (NFS4ERR_OPENMODE);
8973 			}
8974 			ltype = F_RDLCK;
8975 			break;
8976 		case WRITE_LT:
8977 		case WRITEW_LT:
8978 			if ((sp->rs_share_access
8979 			    & OPEN4_SHARE_ACCESS_WRITE) == 0) {
8980 				rfs4_dbe_unlock(sp->rs_dbe);
8981 
8982 				return (NFS4ERR_OPENMODE);
8983 			}
8984 			ltype = F_WRLCK;
8985 			break;
8986 		}
8987 	} else
8988 		ltype = F_UNLCK;
8989 
8990 	flock.l_type = ltype;
8991 	flock.l_whence = 0;		/* SEEK_SET */
8992 	flock.l_start = offset;
8993 	flock.l_len = length;
8994 	flock.l_sysid = sysid;
8995 	flock.l_pid = lsp->rls_locker->rl_pid;
8996 
8997 	/* Note that length4 is uint64_t but l_len and l_start are off64_t */
8998 	if (flock.l_len < 0 || flock.l_start < 0) {
8999 		rfs4_dbe_unlock(sp->rs_dbe);
9000 		return (NFS4ERR_INVAL);
9001 	}
9002 
9003 	/*
9004 	 * N.B. FREAD has the same value as OPEN4_SHARE_ACCESS_READ and
9005 	 * FWRITE has the same value as OPEN4_SHARE_ACCESS_WRITE.
9006 	 */
9007 	flag = (int)sp->rs_share_access | F_REMOTELOCK;
9008 
9009 	vp = sp->rs_finfo->rf_vp;
9010 	VN_HOLD(vp);
9011 
9012 	/*
9013 	 * We need to unlock sp before we call the underlying filesystem to
9014 	 * acquire the file lock.
9015 	 */
9016 	rfs4_dbe_unlock(sp->rs_dbe);
9017 
9018 	error = setlock(vp, &flock, flag, cred);
9019 
9020 	/*
9021 	 * Make sure the file is still open.  In a case the file was closed in
9022 	 * the meantime, clean the lock we acquired using the setlock() call
9023 	 * above, and return the appropriate error.
9024 	 */
9025 	rfs4_dbe_lock(sp->rs_dbe);
9026 	if (sp->rs_closed == TRUE) {
9027 		cleanlocks(vp, lsp->rls_locker->rl_pid, sysid);
9028 		rfs4_dbe_unlock(sp->rs_dbe);
9029 
9030 		VN_RELE(vp);
9031 
9032 		return (NFS4ERR_OLD_STATEID);
9033 	}
9034 	rfs4_dbe_unlock(sp->rs_dbe);
9035 
9036 	VN_RELE(vp);
9037 
9038 	if (error == 0) {
9039 		rfs4_dbe_lock(lsp->rls_dbe);
9040 		next_stateid(&lsp->rls_lockid);
9041 		rfs4_dbe_unlock(lsp->rls_dbe);
9042 	}
9043 
9044 	/*
9045 	 * N.B. We map error values to nfsv4 errors. This is differrent
9046 	 * than puterrno4 routine.
9047 	 */
9048 	switch (error) {
9049 	case 0:
9050 		status = NFS4_OK;
9051 		break;
9052 	case EAGAIN:
9053 	case EACCES:		/* Old value */
9054 		/* Can only get here if op is OP_LOCK */
9055 		ASSERT(resop->resop == OP_LOCK);
9056 		lres = &resop->nfs_resop4_u.oplock;
9057 		status = NFS4ERR_DENIED;
9058 		if (lock_denied(&lres->LOCK4res_u.denied, &flock)
9059 		    == NFS4ERR_EXPIRED)
9060 			goto retry;
9061 		break;
9062 	case ENOLCK:
9063 		status = NFS4ERR_DELAY;
9064 		break;
9065 	case EOVERFLOW:
9066 		status = NFS4ERR_INVAL;
9067 		break;
9068 	case EINVAL:
9069 		status = NFS4ERR_NOTSUPP;
9070 		break;
9071 	default:
9072 		status = NFS4ERR_SERVERFAULT;
9073 		break;
9074 	}
9075 
9076 	return (status);
9077 }
9078 
9079 /*ARGSUSED*/
9080 void
9081 rfs4_op_lock(nfs_argop4 *argop, nfs_resop4 *resop,
9082     struct svc_req *req, struct compound_state *cs)
9083 {
9084 	LOCK4args *args = &argop->nfs_argop4_u.oplock;
9085 	LOCK4res *resp = &resop->nfs_resop4_u.oplock;
9086 	nfsstat4 status;
9087 	stateid4 *stateid;
9088 	rfs4_lockowner_t *lo;
9089 	rfs4_client_t *cp;
9090 	rfs4_state_t *sp = NULL;
9091 	rfs4_lo_state_t *lsp = NULL;
9092 	bool_t ls_sw_held = FALSE;
9093 	bool_t create = TRUE;
9094 	bool_t lcreate = TRUE;
9095 	bool_t dup_lock = FALSE;
9096 	int rc;
9097 
9098 	DTRACE_NFSV4_2(op__lock__start, struct compound_state *, cs,
9099 	    LOCK4args *, args);
9100 
9101 	if (cs->vp == NULL) {
9102 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
9103 		DTRACE_NFSV4_2(op__lock__done, struct compound_state *,
9104 		    cs, LOCK4res *, resp);
9105 		return;
9106 	}
9107 
9108 	if (args->locker.new_lock_owner) {
9109 		/* Create a new lockowner for this instance */
9110 		open_to_lock_owner4 *olo = &args->locker.locker4_u.open_owner;
9111 
9112 		NFS4_DEBUG(rfs4_debug, (CE_NOTE, "Creating new lock owner"));
9113 
9114 		stateid = &olo->open_stateid;
9115 		status = rfs4_get_state(stateid, &sp, RFS4_DBS_VALID);
9116 		if (status != NFS4_OK) {
9117 			NFS4_DEBUG(rfs4_debug,
9118 			    (CE_NOTE, "Get state failed in lock %d", status));
9119 			*cs->statusp = resp->status = status;
9120 			DTRACE_NFSV4_2(op__lock__done, struct compound_state *,
9121 			    cs, LOCK4res *, resp);
9122 			return;
9123 		}
9124 
9125 		/* Ensure specified filehandle matches */
9126 		if (cs->vp != sp->rs_finfo->rf_vp) {
9127 			rfs4_state_rele(sp);
9128 			*cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
9129 			DTRACE_NFSV4_2(op__lock__done, struct compound_state *,
9130 			    cs, LOCK4res *, resp);
9131 			return;
9132 		}
9133 
9134 		/* hold off other access to open_owner while we tinker */
9135 		rfs4_sw_enter(&sp->rs_owner->ro_sw);
9136 
9137 		switch (rc = rfs4_check_stateid_seqid(sp, stateid, cs)) {
9138 		case NFS4_CHECK_STATEID_OLD:
9139 			*cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
9140 			goto end;
9141 		case NFS4_CHECK_STATEID_BAD:
9142 			*cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
9143 			goto end;
9144 		case NFS4_CHECK_STATEID_EXPIRED:
9145 			*cs->statusp = resp->status = NFS4ERR_EXPIRED;
9146 			goto end;
9147 		case NFS4_CHECK_STATEID_UNCONFIRMED:
9148 			*cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
9149 			goto end;
9150 		case NFS4_CHECK_STATEID_CLOSED:
9151 			*cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
9152 			goto end;
9153 		case NFS4_CHECK_STATEID_OKAY:
9154 			if (rfs4_has_session(cs))
9155 				break;
9156 			/* FALLTHROUGH */
9157 		case NFS4_CHECK_STATEID_REPLAY:
9158 			ASSERT(!rfs4_has_session(cs));
9159 
9160 			switch (rfs4_check_olo_seqid(olo->open_seqid,
9161 			    sp->rs_owner, resop)) {
9162 			case NFS4_CHKSEQ_OKAY:
9163 				if (rc == NFS4_CHECK_STATEID_OKAY)
9164 					break;
9165 				/*
9166 				 * This is replayed stateid; if seqid
9167 				 * matches next expected, then client
9168 				 * is using wrong seqid.
9169 				 */
9170 				/* FALLTHROUGH */
9171 			case NFS4_CHKSEQ_BAD:
9172 				*cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
9173 				goto end;
9174 			case NFS4_CHKSEQ_REPLAY:
9175 				/* This is a duplicate LOCK request */
9176 				dup_lock = TRUE;
9177 
9178 				/*
9179 				 * For a duplicate we do not want to
9180 				 * create a new lockowner as it should
9181 				 * already exist.
9182 				 * Turn off the lockowner create flag.
9183 				 */
9184 				lcreate = FALSE;
9185 			}
9186 			break;
9187 		}
9188 
9189 		lo = rfs4_findlockowner(&olo->lock_owner, &lcreate);
9190 		if (lo == NULL) {
9191 			NFS4_DEBUG(rfs4_debug,
9192 			    (CE_NOTE, "rfs4_op_lock: no lock owner"));
9193 			*cs->statusp = resp->status = NFS4ERR_RESOURCE;
9194 			goto end;
9195 		}
9196 
9197 		lsp = rfs4_findlo_state_by_owner(lo, sp, &create);
9198 		if (lsp == NULL) {
9199 			rfs4_update_lease(sp->rs_owner->ro_client);
9200 			/*
9201 			 * Only update theh open_seqid if this is not
9202 			 * a duplicate request
9203 			 */
9204 			if (dup_lock == FALSE) {
9205 				rfs4_update_open_sequence(sp->rs_owner);
9206 			}
9207 
9208 			NFS4_DEBUG(rfs4_debug,
9209 			    (CE_NOTE, "rfs4_op_lock: no state"));
9210 			*cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
9211 			rfs4_update_open_resp(sp->rs_owner, resop, NULL);
9212 			rfs4_lockowner_rele(lo);
9213 			goto end;
9214 		}
9215 
9216 		/*
9217 		 * This is the new_lock_owner branch and the client is
9218 		 * supposed to be associating a new lock_owner with
9219 		 * the open file at this point.  If we find that a
9220 		 * lock_owner/state association already exists and a
9221 		 * successful LOCK request was returned to the client,
9222 		 * an error is returned to the client since this is
9223 		 * not appropriate.  The client should be using the
9224 		 * existing lock_owner branch.
9225 		 */
9226 		if (!rfs4_has_session(cs) && !dup_lock && !create) {
9227 			if (lsp->rls_lock_completed == TRUE) {
9228 				*cs->statusp =
9229 				    resp->status = NFS4ERR_BAD_SEQID;
9230 				rfs4_lockowner_rele(lo);
9231 				goto end;
9232 			}
9233 		}
9234 
9235 		rfs4_update_lease(sp->rs_owner->ro_client);
9236 
9237 		/*
9238 		 * Only update theh open_seqid if this is not
9239 		 * a duplicate request
9240 		 */
9241 		if (dup_lock == FALSE) {
9242 			rfs4_update_open_sequence(sp->rs_owner);
9243 		}
9244 
9245 		/*
9246 		 * If this is a duplicate lock request, just copy the
9247 		 * previously saved reply and return.
9248 		 */
9249 		if (dup_lock == TRUE) {
9250 			/* verify that lock_seqid's match */
9251 			if (lsp->rls_seqid != olo->lock_seqid) {
9252 				NFS4_DEBUG(rfs4_debug,
9253 				    (CE_NOTE, "rfs4_op_lock: Dup-Lock seqid bad"
9254 				    "lsp->seqid=%d old->seqid=%d",
9255 				    lsp->rls_seqid, olo->lock_seqid));
9256 				*cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
9257 			} else {
9258 				rfs4_copy_reply(resop, &lsp->rls_reply);
9259 				/*
9260 				 * Make sure to copy the just
9261 				 * retrieved reply status into the
9262 				 * overall compound status
9263 				 */
9264 				*cs->statusp = resp->status;
9265 			}
9266 			rfs4_lockowner_rele(lo);
9267 			goto end;
9268 		}
9269 
9270 		rfs4_dbe_lock(lsp->rls_dbe);
9271 
9272 		/* Make sure to update the lock sequence id */
9273 		lsp->rls_seqid = olo->lock_seqid;
9274 
9275 		NFS4_DEBUG(rfs4_debug,
9276 		    (CE_NOTE, "Lock seqid established as %d", lsp->rls_seqid));
9277 
9278 		/*
9279 		 * This is used to signify the newly created lockowner
9280 		 * stateid and its sequence number.  The checks for
9281 		 * sequence number and increment don't occur on the
9282 		 * very first lock request for a lockowner.
9283 		 */
9284 		lsp->rls_skip_seqid_check = TRUE;
9285 
9286 		/* hold off other access to lsp while we tinker */
9287 		rfs4_sw_enter(&lsp->rls_sw);
9288 		ls_sw_held = TRUE;
9289 
9290 		rfs4_dbe_unlock(lsp->rls_dbe);
9291 
9292 		rfs4_lockowner_rele(lo);
9293 	} else {
9294 		stateid = &args->locker.locker4_u.lock_owner.lock_stateid;
9295 		/* get lsp and hold the lock on the underlying file struct */
9296 		if ((status = rfs4_get_lo_state(stateid, &lsp, TRUE))
9297 		    != NFS4_OK) {
9298 			*cs->statusp = resp->status = status;
9299 			DTRACE_NFSV4_2(op__lock__done, struct compound_state *,
9300 			    cs, LOCK4res *, resp);
9301 			return;
9302 		}
9303 		create = FALSE;	/* We didn't create lsp */
9304 
9305 		/* Ensure specified filehandle matches */
9306 		if (cs->vp != lsp->rls_state->rs_finfo->rf_vp) {
9307 			rfs4_lo_state_rele(lsp, TRUE);
9308 			*cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
9309 			DTRACE_NFSV4_2(op__lock__done, struct compound_state *,
9310 			    cs, LOCK4res *, resp);
9311 			return;
9312 		}
9313 
9314 		/* hold off other access to lsp while we tinker */
9315 		rfs4_sw_enter(&lsp->rls_sw);
9316 		ls_sw_held = TRUE;
9317 
9318 		switch (rfs4_check_lo_stateid_seqid(lsp, stateid, cs)) {
9319 		/*
9320 		 * The stateid looks like it was okay (expected to be
9321 		 * the next one)
9322 		 */
9323 		case NFS4_CHECK_STATEID_OKAY:
9324 			if (rfs4_has_session(cs))
9325 				break;
9326 
9327 			/*
9328 			 * The sequence id is now checked.  Determine
9329 			 * if this is a replay or if it is in the
9330 			 * expected (next) sequence.  In the case of a
9331 			 * replay, there are two replay conditions
9332 			 * that may occur.  The first is the normal
9333 			 * condition where a LOCK is done with a
9334 			 * NFS4_OK response and the stateid is
9335 			 * updated.  That case is handled below when
9336 			 * the stateid is identified as a REPLAY.  The
9337 			 * second is the case where an error is
9338 			 * returned, like NFS4ERR_DENIED, and the
9339 			 * sequence number is updated but the stateid
9340 			 * is not updated.  This second case is dealt
9341 			 * with here.  So it may seem odd that the
9342 			 * stateid is okay but the sequence id is a
9343 			 * replay but it is okay.
9344 			 */
9345 			switch (rfs4_check_lock_seqid(
9346 			    args->locker.locker4_u.lock_owner.lock_seqid,
9347 			    lsp, resop)) {
9348 			case NFS4_CHKSEQ_REPLAY:
9349 				if (resp->status != NFS4_OK) {
9350 					/*
9351 					 * Here is our replay and need
9352 					 * to verify that the last
9353 					 * response was an error.
9354 					 */
9355 					*cs->statusp = resp->status;
9356 					goto end;
9357 				}
9358 				/*
9359 				 * This is done since the sequence id
9360 				 * looked like a replay but it didn't
9361 				 * pass our check so a BAD_SEQID is
9362 				 * returned as a result.
9363 				 */
9364 				/*FALLTHROUGH*/
9365 			case NFS4_CHKSEQ_BAD:
9366 				*cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
9367 				goto end;
9368 			case NFS4_CHKSEQ_OKAY:
9369 				/* Everything looks okay move ahead */
9370 				break;
9371 			}
9372 			break;
9373 		case NFS4_CHECK_STATEID_OLD:
9374 			*cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
9375 			goto end;
9376 		case NFS4_CHECK_STATEID_BAD:
9377 			*cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
9378 			goto end;
9379 		case NFS4_CHECK_STATEID_EXPIRED:
9380 			*cs->statusp = resp->status = NFS4ERR_EXPIRED;
9381 			goto end;
9382 		case NFS4_CHECK_STATEID_CLOSED:
9383 			*cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
9384 			goto end;
9385 		case NFS4_CHECK_STATEID_REPLAY:
9386 			ASSERT(!rfs4_has_session(cs));
9387 
9388 			switch (rfs4_check_lock_seqid(
9389 			    args->locker.locker4_u.lock_owner.lock_seqid,
9390 			    lsp, resop)) {
9391 			case NFS4_CHKSEQ_OKAY:
9392 				/*
9393 				 * This is a replayed stateid; if
9394 				 * seqid matches the next expected,
9395 				 * then client is using wrong seqid.
9396 				 */
9397 			case NFS4_CHKSEQ_BAD:
9398 				*cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
9399 				goto end;
9400 			case NFS4_CHKSEQ_REPLAY:
9401 				rfs4_update_lease(lsp->rls_locker->rl_client);
9402 				*cs->statusp = status = resp->status;
9403 				goto end;
9404 			}
9405 			break;
9406 		default:
9407 			ASSERT(FALSE);
9408 			break;
9409 		}
9410 
9411 		rfs4_update_lock_sequence(lsp);
9412 		rfs4_update_lease(lsp->rls_locker->rl_client);
9413 	}
9414 
9415 	/*
9416 	 * NFS4 only allows locking on regular files, so
9417 	 * verify type of object.
9418 	 */
9419 	if (cs->vp->v_type != VREG) {
9420 		if (cs->vp->v_type == VDIR)
9421 			status = NFS4ERR_ISDIR;
9422 		else
9423 			status = NFS4ERR_INVAL;
9424 		goto out;
9425 	}
9426 
9427 	cp = lsp->rls_state->rs_owner->ro_client;
9428 
9429 	if (rfs4_clnt_in_grace(cp) && !args->reclaim) {
9430 		status = NFS4ERR_GRACE;
9431 		goto out;
9432 	}
9433 
9434 	if (rfs4_clnt_in_grace(cp) && args->reclaim && !cp->rc_can_reclaim) {
9435 		status = NFS4ERR_NO_GRACE;
9436 		goto out;
9437 	}
9438 
9439 	if (!rfs4_clnt_in_grace(cp) && args->reclaim) {
9440 		status = NFS4ERR_NO_GRACE;
9441 		goto out;
9442 	}
9443 
9444 	if (lsp->rls_state->rs_finfo->rf_dinfo.rd_dtype == OPEN_DELEGATE_WRITE)
9445 		cs->deleg = TRUE;
9446 
9447 	status = rfs4_do_lock(lsp, args->locktype,
9448 	    args->offset, args->length, cs->cr, resop);
9449 
9450 out:
9451 	lsp->rls_skip_seqid_check = FALSE;
9452 
9453 	*cs->statusp = resp->status = status;
9454 
9455 	if (status == NFS4_OK) {
9456 		resp->LOCK4res_u.lock_stateid = lsp->rls_lockid.stateid;
9457 		lsp->rls_lock_completed = TRUE;
9458 	}
9459 	/*
9460 	 * Only update the "OPEN" response here if this was a new
9461 	 * lock_owner
9462 	 */
9463 	if (sp)
9464 		rfs4_update_open_resp(sp->rs_owner, resop, NULL);
9465 
9466 	rfs4_update_lock_resp(lsp, resop);
9467 
9468 end:
9469 	if (lsp) {
9470 		if (ls_sw_held)
9471 			rfs4_sw_exit(&lsp->rls_sw);
9472 		/*
9473 		 * If an sp obtained, then the lsp does not represent
9474 		 * a lock on the file struct.
9475 		 */
9476 		if (sp != NULL)
9477 			rfs4_lo_state_rele(lsp, FALSE);
9478 		else
9479 			rfs4_lo_state_rele(lsp, TRUE);
9480 	}
9481 	if (sp) {
9482 		rfs4_sw_exit(&sp->rs_owner->ro_sw);
9483 		rfs4_state_rele(sp);
9484 	}
9485 
9486 	DTRACE_NFSV4_2(op__lock__done, struct compound_state *, cs,
9487 	    LOCK4res *, resp);
9488 }
9489 
9490 /* free function for LOCK/LOCKT */
9491 static void
9492 lock_denied_free(nfs_resop4 *resop)
9493 {
9494 	LOCK4denied *dp = NULL;
9495 
9496 	switch (resop->resop) {
9497 	case OP_LOCK:
9498 		if (resop->nfs_resop4_u.oplock.status == NFS4ERR_DENIED)
9499 			dp = &resop->nfs_resop4_u.oplock.LOCK4res_u.denied;
9500 		break;
9501 	case OP_LOCKT:
9502 		if (resop->nfs_resop4_u.oplockt.status == NFS4ERR_DENIED)
9503 			dp = &resop->nfs_resop4_u.oplockt.denied;
9504 		break;
9505 	default:
9506 		break;
9507 	}
9508 
9509 	if (dp)
9510 		kmem_free(dp->owner.owner_val, dp->owner.owner_len);
9511 }
9512 
9513 /*ARGSUSED*/
9514 void
9515 rfs4_op_locku(nfs_argop4 *argop, nfs_resop4 *resop,
9516     struct svc_req *req, struct compound_state *cs)
9517 {
9518 	LOCKU4args *args = &argop->nfs_argop4_u.oplocku;
9519 	LOCKU4res *resp = &resop->nfs_resop4_u.oplocku;
9520 	nfsstat4 status;
9521 	stateid4 *stateid = &args->lock_stateid;
9522 	rfs4_lo_state_t *lsp;
9523 
9524 	DTRACE_NFSV4_2(op__locku__start, struct compound_state *, cs,
9525 	    LOCKU4args *, args);
9526 
9527 	if (cs->vp == NULL) {
9528 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
9529 		DTRACE_NFSV4_2(op__locku__done, struct compound_state *, cs,
9530 		    LOCKU4res *, resp);
9531 		return;
9532 	}
9533 
9534 	if ((status = rfs4_get_lo_state(stateid, &lsp, TRUE)) != NFS4_OK) {
9535 		*cs->statusp = resp->status = status;
9536 		DTRACE_NFSV4_2(op__locku__done, struct compound_state *, cs,
9537 		    LOCKU4res *, resp);
9538 		return;
9539 	}
9540 
9541 	/* Ensure specified filehandle matches */
9542 	if (cs->vp != lsp->rls_state->rs_finfo->rf_vp) {
9543 		rfs4_lo_state_rele(lsp, TRUE);
9544 		*cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
9545 		DTRACE_NFSV4_2(op__locku__done, struct compound_state *, cs,
9546 		    LOCKU4res *, resp);
9547 		return;
9548 	}
9549 
9550 	/* hold off other access to lsp while we tinker */
9551 	rfs4_sw_enter(&lsp->rls_sw);
9552 
9553 	switch (rfs4_check_lo_stateid_seqid(lsp, stateid, cs)) {
9554 	case NFS4_CHECK_STATEID_OKAY:
9555 		if (rfs4_has_session(cs))
9556 			break;
9557 
9558 		if (rfs4_check_lock_seqid(args->seqid, lsp, resop)
9559 		    != NFS4_CHKSEQ_OKAY) {
9560 			*cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
9561 			goto end;
9562 		}
9563 		break;
9564 	case NFS4_CHECK_STATEID_OLD:
9565 		*cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
9566 		goto end;
9567 	case NFS4_CHECK_STATEID_BAD:
9568 		*cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
9569 		goto end;
9570 	case NFS4_CHECK_STATEID_EXPIRED:
9571 		*cs->statusp = resp->status = NFS4ERR_EXPIRED;
9572 		goto end;
9573 	case NFS4_CHECK_STATEID_CLOSED:
9574 		*cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
9575 		goto end;
9576 	case NFS4_CHECK_STATEID_REPLAY:
9577 		ASSERT(!rfs4_has_session(cs));
9578 
9579 		switch (rfs4_check_lock_seqid(args->seqid, lsp, resop)) {
9580 		case NFS4_CHKSEQ_OKAY:
9581 				/*
9582 				 * This is a replayed stateid; if
9583 				 * seqid matches the next expected,
9584 				 * then client is using wrong seqid.
9585 				 */
9586 		case NFS4_CHKSEQ_BAD:
9587 			*cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
9588 			goto end;
9589 		case NFS4_CHKSEQ_REPLAY:
9590 			rfs4_update_lease(lsp->rls_locker->rl_client);
9591 			*cs->statusp = status = resp->status;
9592 			goto end;
9593 		}
9594 		break;
9595 	default:
9596 		ASSERT(FALSE);
9597 		break;
9598 	}
9599 
9600 	rfs4_update_lock_sequence(lsp);
9601 	rfs4_update_lease(lsp->rls_locker->rl_client);
9602 
9603 	/*
9604 	 * NFS4 only allows locking on regular files, so
9605 	 * verify type of object.
9606 	 */
9607 	if (cs->vp->v_type != VREG) {
9608 		if (cs->vp->v_type == VDIR)
9609 			status = NFS4ERR_ISDIR;
9610 		else
9611 			status = NFS4ERR_INVAL;
9612 		goto out;
9613 	}
9614 
9615 	if (rfs4_clnt_in_grace(lsp->rls_state->rs_owner->ro_client)) {
9616 		status = NFS4ERR_GRACE;
9617 		goto out;
9618 	}
9619 
9620 	status = rfs4_do_lock(lsp, args->locktype,
9621 	    args->offset, args->length, cs->cr, resop);
9622 
9623 out:
9624 	*cs->statusp = resp->status = status;
9625 
9626 	if (status == NFS4_OK)
9627 		resp->lock_stateid = lsp->rls_lockid.stateid;
9628 
9629 	rfs4_update_lock_resp(lsp, resop);
9630 
9631 end:
9632 	rfs4_sw_exit(&lsp->rls_sw);
9633 	rfs4_lo_state_rele(lsp, TRUE);
9634 
9635 	DTRACE_NFSV4_2(op__locku__done, struct compound_state *, cs,
9636 	    LOCKU4res *, resp);
9637 }
9638 
9639 /*
9640  * LOCKT is a best effort routine, the client can not be guaranteed that
9641  * the status return is still in effect by the time the reply is received.
9642  * They are numerous race conditions in this routine, but we are not required
9643  * and can not be accurate.
9644  */
9645 /*ARGSUSED*/
9646 void
9647 rfs4_op_lockt(nfs_argop4 *argop, nfs_resop4 *resop,
9648     struct svc_req *req, struct compound_state *cs)
9649 {
9650 	LOCKT4args *args = &argop->nfs_argop4_u.oplockt;
9651 	LOCKT4res *resp = &resop->nfs_resop4_u.oplockt;
9652 	rfs4_lockowner_t *lo;
9653 	rfs4_client_t *cp;
9654 	bool_t create = FALSE;
9655 	struct flock64 flk;
9656 	int error;
9657 	int flag = FREAD | FWRITE;
9658 	int ltype;
9659 	length4 posix_length;
9660 	sysid_t sysid;
9661 	pid_t pid;
9662 
9663 	DTRACE_NFSV4_2(op__lockt__start, struct compound_state *, cs,
9664 	    LOCKT4args *, args);
9665 
9666 	if (cs->vp == NULL) {
9667 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
9668 		goto out;
9669 	}
9670 
9671 	/*
9672 	 * NFS4 only allows locking on regular files, so
9673 	 * verify type of object.
9674 	 */
9675 	if (cs->vp->v_type != VREG) {
9676 		if (cs->vp->v_type == VDIR)
9677 			*cs->statusp = resp->status = NFS4ERR_ISDIR;
9678 		else
9679 			*cs->statusp = resp->status =  NFS4ERR_INVAL;
9680 		goto out;
9681 	}
9682 
9683 	/*
9684 	 * Check out the clientid to ensure the server knows about it
9685 	 * so that we correctly inform the client of a server reboot.
9686 	 */
9687 	if ((cp = rfs4_findclient_by_id(args->owner.clientid, FALSE))
9688 	    == NULL) {
9689 		*cs->statusp = resp->status =
9690 		    rfs4_check_clientid(&args->owner.clientid, 0);
9691 		goto out;
9692 	}
9693 	if (rfs4_lease_expired(cp)) {
9694 		rfs4_client_close(cp);
9695 		/*
9696 		 * Protocol doesn't allow returning NFS4ERR_STALE as
9697 		 * other operations do on this check so STALE_CLIENTID
9698 		 * is returned instead
9699 		 */
9700 		*cs->statusp = resp->status = NFS4ERR_STALE_CLIENTID;
9701 		goto out;
9702 	}
9703 
9704 	if (rfs4_clnt_in_grace(cp) && !(cp->rc_can_reclaim)) {
9705 		*cs->statusp = resp->status = NFS4ERR_GRACE;
9706 		rfs4_client_rele(cp);
9707 		goto out;
9708 	}
9709 	rfs4_client_rele(cp);
9710 
9711 	resp->status = NFS4_OK;
9712 
9713 	switch (args->locktype) {
9714 	case READ_LT:
9715 	case READW_LT:
9716 		ltype = F_RDLCK;
9717 		break;
9718 	case WRITE_LT:
9719 	case WRITEW_LT:
9720 		ltype = F_WRLCK;
9721 		break;
9722 	}
9723 
9724 	posix_length = args->length;
9725 	/* Check for zero length. To lock to end of file use all ones for V4 */
9726 	if (posix_length == 0) {
9727 		*cs->statusp = resp->status = NFS4ERR_INVAL;
9728 		goto out;
9729 	} else if (posix_length == (length4)(~0)) {
9730 		posix_length = 0;	/* Posix to end of file  */
9731 	}
9732 
9733 	/* Find or create a lockowner */
9734 	lo = rfs4_findlockowner(&args->owner, &create);
9735 
9736 	if (lo) {
9737 		pid = lo->rl_pid;
9738 		if ((resp->status =
9739 		    rfs4_client_sysid(lo->rl_client, &sysid)) != NFS4_OK)
9740 			goto err;
9741 	} else {
9742 		pid = 0;
9743 		sysid = lockt_sysid;
9744 	}
9745 retry:
9746 	flk.l_type = ltype;
9747 	flk.l_whence = 0;		/* SEEK_SET */
9748 	flk.l_start = args->offset;
9749 	flk.l_len = posix_length;
9750 	flk.l_sysid = sysid;
9751 	flk.l_pid = pid;
9752 	flag |= F_REMOTELOCK;
9753 
9754 	LOCK_PRINT(rfs4_debug, "rfs4_op_lockt", F_GETLK, &flk);
9755 
9756 	/* Note that length4 is uint64_t but l_len and l_start are off64_t */
9757 	if (flk.l_len < 0 || flk.l_start < 0) {
9758 		resp->status = NFS4ERR_INVAL;
9759 		goto err;
9760 	}
9761 	error = VOP_FRLOCK(cs->vp, F_GETLK, &flk, flag, (u_offset_t)0,
9762 	    NULL, cs->cr, NULL);
9763 
9764 	/*
9765 	 * N.B. We map error values to nfsv4 errors. This is differrent
9766 	 * than puterrno4 routine.
9767 	 */
9768 	switch (error) {
9769 	case 0:
9770 		if (flk.l_type == F_UNLCK)
9771 			resp->status = NFS4_OK;
9772 		else {
9773 			if (lock_denied(&resp->denied, &flk) == NFS4ERR_EXPIRED)
9774 				goto retry;
9775 			resp->status = NFS4ERR_DENIED;
9776 		}
9777 		break;
9778 	case EOVERFLOW:
9779 		resp->status = NFS4ERR_INVAL;
9780 		break;
9781 	case EINVAL:
9782 		resp->status = NFS4ERR_NOTSUPP;
9783 		break;
9784 	default:
9785 		cmn_err(CE_WARN, "rfs4_op_lockt: unexpected errno (%d)",
9786 		    error);
9787 		resp->status = NFS4ERR_SERVERFAULT;
9788 		break;
9789 	}
9790 
9791 err:
9792 	if (lo)
9793 		rfs4_lockowner_rele(lo);
9794 	*cs->statusp = resp->status;
9795 out:
9796 	DTRACE_NFSV4_2(op__lockt__done, struct compound_state *, cs,
9797 	    LOCKT4res *, resp);
9798 }
9799 
9800 int
9801 rfs4_share(rfs4_state_t *sp, uint32_t access, uint32_t deny)
9802 {
9803 	int err;
9804 	int cmd;
9805 	vnode_t *vp;
9806 	struct shrlock shr;
9807 	struct shr_locowner shr_loco;
9808 	int fflags = 0;
9809 
9810 	ASSERT(rfs4_dbe_islocked(sp->rs_dbe));
9811 	ASSERT(sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID);
9812 
9813 	if (sp->rs_closed)
9814 		return (NFS4ERR_OLD_STATEID);
9815 
9816 	vp = sp->rs_finfo->rf_vp;
9817 	ASSERT(vp);
9818 
9819 	shr.s_access = shr.s_deny = 0;
9820 
9821 	if (access & OPEN4_SHARE_ACCESS_READ) {
9822 		fflags |= FREAD;
9823 		shr.s_access |= F_RDACC;
9824 	}
9825 	if (access & OPEN4_SHARE_ACCESS_WRITE) {
9826 		fflags |= FWRITE;
9827 		shr.s_access |= F_WRACC;
9828 	}
9829 	ASSERT(shr.s_access);
9830 
9831 	if (deny & OPEN4_SHARE_DENY_READ)
9832 		shr.s_deny |= F_RDDNY;
9833 	if (deny & OPEN4_SHARE_DENY_WRITE)
9834 		shr.s_deny |= F_WRDNY;
9835 
9836 	shr.s_pid = rfs4_dbe_getid(sp->rs_owner->ro_dbe);
9837 	shr.s_sysid = sp->rs_owner->ro_client->rc_sysidt;
9838 	shr_loco.sl_pid = shr.s_pid;
9839 	shr_loco.sl_id = shr.s_sysid;
9840 	shr.s_owner = (caddr_t)&shr_loco;
9841 	shr.s_own_len = sizeof (shr_loco);
9842 
9843 	cmd = nbl_need_check(vp) ? F_SHARE_NBMAND : F_SHARE;
9844 
9845 	err = VOP_SHRLOCK(vp, cmd, &shr, fflags, CRED(), NULL);
9846 	if (err != 0) {
9847 		if (err == EAGAIN)
9848 			err = NFS4ERR_SHARE_DENIED;
9849 		else
9850 			err = puterrno4(err);
9851 		return (err);
9852 	}
9853 
9854 	sp->rs_share_access |= access;
9855 	sp->rs_share_deny |= deny;
9856 
9857 	return (0);
9858 }
9859 
9860 int
9861 rfs4_unshare(rfs4_state_t *sp)
9862 {
9863 	int err;
9864 	struct shrlock shr;
9865 	struct shr_locowner shr_loco;
9866 
9867 	ASSERT(rfs4_dbe_islocked(sp->rs_dbe));
9868 
9869 	if (sp->rs_closed || sp->rs_share_access == 0)
9870 		return (0);
9871 
9872 	ASSERT(sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID);
9873 	ASSERT(sp->rs_finfo->rf_vp);
9874 
9875 	shr.s_access = shr.s_deny = 0;
9876 	shr.s_pid = rfs4_dbe_getid(sp->rs_owner->ro_dbe);
9877 	shr.s_sysid = sp->rs_owner->ro_client->rc_sysidt;
9878 	shr_loco.sl_pid = shr.s_pid;
9879 	shr_loco.sl_id = shr.s_sysid;
9880 	shr.s_owner = (caddr_t)&shr_loco;
9881 	shr.s_own_len = sizeof (shr_loco);
9882 
9883 	err = VOP_SHRLOCK(sp->rs_finfo->rf_vp, F_UNSHARE, &shr, 0, CRED(),
9884 	    NULL);
9885 	if (err != 0) {
9886 		err = puterrno4(err);
9887 		return (err);
9888 	}
9889 
9890 	sp->rs_share_access = 0;
9891 	sp->rs_share_deny = 0;
9892 
9893 	return (0);
9894 
9895 }
9896 
9897 static int
9898 rdma_setup_read_data4(READ4args *args, READ4res *rok)
9899 {
9900 	struct clist	*wcl;
9901 	count4		count = rok->data_len;
9902 	int		wlist_len;
9903 
9904 	wcl = args->wlist;
9905 	if (rdma_setup_read_chunks(wcl, count, &wlist_len) == FALSE) {
9906 		return (FALSE);
9907 	}
9908 	wcl = args->wlist;
9909 	rok->wlist_len = wlist_len;
9910 	rok->wlist = wcl;
9911 	return (TRUE);
9912 }
9913 
9914 /* tunable to disable server referrals */
9915 int rfs4_no_referrals = 0;
9916 
9917 /*
9918  * Find an NFS record in reparse point data.
9919  * Returns 0 for success and <0 or an errno value on failure.
9920  */
9921 int
9922 vn_find_nfs_record(vnode_t *vp, nvlist_t **nvlp, char **svcp, char **datap)
9923 {
9924 	int err;
9925 	char *stype, *val;
9926 	nvlist_t *nvl;
9927 	nvpair_t *curr;
9928 
9929 	if ((nvl = reparse_init()) == NULL)
9930 		return (-1);
9931 
9932 	if ((err = reparse_vnode_parse(vp, nvl)) != 0) {
9933 		reparse_free(nvl);
9934 		return (err);
9935 	}
9936 
9937 	curr = NULL;
9938 	while ((curr = nvlist_next_nvpair(nvl, curr)) != NULL) {
9939 		if ((stype = nvpair_name(curr)) == NULL) {
9940 			reparse_free(nvl);
9941 			return (-2);
9942 		}
9943 		if (strncasecmp(stype, "NFS", 3) == 0)
9944 			break;
9945 	}
9946 
9947 	if ((curr == NULL) ||
9948 	    (nvpair_value_string(curr, &val))) {
9949 		reparse_free(nvl);
9950 		return (-3);
9951 	}
9952 	*nvlp = nvl;
9953 	*svcp = stype;
9954 	*datap = val;
9955 	return (0);
9956 }
9957 
9958 int
9959 vn_is_nfs_reparse(vnode_t *vp, cred_t *cr)
9960 {
9961 	nvlist_t *nvl;
9962 	char *s, *d;
9963 
9964 	if (rfs4_no_referrals != 0)
9965 		return (B_FALSE);
9966 
9967 	if (vn_is_reparse(vp, cr, NULL) == B_FALSE)
9968 		return (B_FALSE);
9969 
9970 	if (vn_find_nfs_record(vp, &nvl, &s, &d) != 0)
9971 		return (B_FALSE);
9972 
9973 	reparse_free(nvl);
9974 
9975 	return (B_TRUE);
9976 }
9977 
9978 /*
9979  * There is a user-level copy of this routine in ref_subr.c.
9980  * Changes should be kept in sync.
9981  */
9982 static int
9983 nfs4_create_components(char *path, component4 *comp4)
9984 {
9985 	int slen, plen, ncomp;
9986 	char *ori_path, *nxtc, buf[MAXNAMELEN];
9987 
9988 	if (path == NULL)
9989 		return (0);
9990 
9991 	plen = strlen(path) + 1;	/* include the terminator */
9992 	ori_path = path;
9993 	ncomp = 0;
9994 
9995 	/* count number of components in the path */
9996 	for (nxtc = path; nxtc < ori_path + plen; nxtc++) {
9997 		if (*nxtc == '/' || *nxtc == '\0' || *nxtc == '\n') {
9998 			if ((slen = nxtc - path) == 0) {
9999 				path = nxtc + 1;
10000 				continue;
10001 			}
10002 
10003 			if (comp4 != NULL) {
10004 				bcopy(path, buf, slen);
10005 				buf[slen] = '\0';
10006 				(void) str_to_utf8(buf, &comp4[ncomp]);
10007 			}
10008 
10009 			ncomp++;	/* 1 valid component */
10010 			path = nxtc + 1;
10011 		}
10012 		if (*nxtc == '\0' || *nxtc == '\n')
10013 			break;
10014 	}
10015 
10016 	return (ncomp);
10017 }
10018 
10019 /*
10020  * There is a user-level copy of this routine in ref_subr.c.
10021  * Changes should be kept in sync.
10022  */
10023 static int
10024 make_pathname4(char *path, pathname4 *pathname)
10025 {
10026 	int ncomp;
10027 	component4 *comp4;
10028 
10029 	if (pathname == NULL)
10030 		return (0);
10031 
10032 	if (path == NULL) {
10033 		pathname->pathname4_val = NULL;
10034 		pathname->pathname4_len = 0;
10035 		return (0);
10036 	}
10037 
10038 	/* count number of components to alloc buffer */
10039 	if ((ncomp = nfs4_create_components(path, NULL)) == 0) {
10040 		pathname->pathname4_val = NULL;
10041 		pathname->pathname4_len = 0;
10042 		return (0);
10043 	}
10044 	comp4 = kmem_zalloc(ncomp * sizeof (component4), KM_SLEEP);
10045 
10046 	/* copy components into allocated buffer */
10047 	ncomp = nfs4_create_components(path, comp4);
10048 
10049 	pathname->pathname4_val = comp4;
10050 	pathname->pathname4_len = ncomp;
10051 
10052 	return (ncomp);
10053 }
10054 
10055 #define	xdr_fs_locations4 xdr_fattr4_fs_locations
10056 
10057 fs_locations4 *
10058 fetch_referral(vnode_t *vp, cred_t *cr)
10059 {
10060 	nvlist_t *nvl;
10061 	char *stype, *sdata;
10062 	fs_locations4 *result;
10063 	char buf[1024];
10064 	size_t bufsize;
10065 	XDR xdr;
10066 	int err;
10067 
10068 	/*
10069 	 * Check attrs to ensure it's a reparse point
10070 	 */
10071 	if (vn_is_reparse(vp, cr, NULL) == B_FALSE)
10072 		return (NULL);
10073 
10074 	/*
10075 	 * Look for an NFS record and get the type and data
10076 	 */
10077 	if (vn_find_nfs_record(vp, &nvl, &stype, &sdata) != 0)
10078 		return (NULL);
10079 
10080 	/*
10081 	 * With the type and data, upcall to get the referral
10082 	 */
10083 	bufsize = sizeof (buf);
10084 	bzero(buf, sizeof (buf));
10085 	err = reparse_kderef((const char *)stype, (const char *)sdata,
10086 	    buf, &bufsize);
10087 	reparse_free(nvl);
10088 
10089 	DTRACE_PROBE4(nfs4serv__func__referral__upcall,
10090 	    char *, stype, char *, sdata, char *, buf, int, err);
10091 	if (err) {
10092 		cmn_err(CE_NOTE,
10093 		    "reparsed daemon not running: unable to get referral (%d)",
10094 		    err);
10095 		return (NULL);
10096 	}
10097 
10098 	/*
10099 	 * We get an XDR'ed record back from the kderef call
10100 	 */
10101 	xdrmem_create(&xdr, buf, bufsize, XDR_DECODE);
10102 	result = kmem_alloc(sizeof (fs_locations4), KM_SLEEP);
10103 	err = xdr_fs_locations4(&xdr, result);
10104 	XDR_DESTROY(&xdr);
10105 	if (err != TRUE) {
10106 		DTRACE_PROBE1(nfs4serv__func__referral__upcall__xdrfail,
10107 		    int, err);
10108 		return (NULL);
10109 	}
10110 
10111 	/*
10112 	 * Look at path to recover fs_root, ignoring the leading '/'
10113 	 */
10114 	(void) make_pathname4(vp->v_path, &result->fs_root);
10115 
10116 	return (result);
10117 }
10118 
10119 char *
10120 build_symlink(vnode_t *vp, cred_t *cr, size_t *strsz)
10121 {
10122 	fs_locations4 *fsl;
10123 	fs_location4 *fs;
10124 	char *server, *path, *symbuf;
10125 	static char *prefix = "/net/";
10126 	int i, size, npaths;
10127 	uint_t len;
10128 
10129 	/* Get the referral */
10130 	if ((fsl = fetch_referral(vp, cr)) == NULL)
10131 		return (NULL);
10132 
10133 	/* Deal with only the first location and first server */
10134 	fs = &fsl->locations_val[0];
10135 	server = utf8_to_str(&fs->server_val[0], &len, NULL);
10136 	if (server == NULL) {
10137 		rfs4_free_fs_locations4(fsl);
10138 		kmem_free(fsl, sizeof (fs_locations4));
10139 		return (NULL);
10140 	}
10141 
10142 	/* Figure out size for "/net/" + host + /path/path/path + NULL */
10143 	size = strlen(prefix) + len;
10144 	for (i = 0; i < fs->rootpath.pathname4_len; i++)
10145 		size += fs->rootpath.pathname4_val[i].utf8string_len + 1;
10146 
10147 	/* Allocate the symlink buffer and fill it */
10148 	symbuf = kmem_zalloc(size, KM_SLEEP);
10149 	(void) strcat(symbuf, prefix);
10150 	(void) strcat(symbuf, server);
10151 	kmem_free(server, len);
10152 
10153 	npaths = 0;
10154 	for (i = 0; i < fs->rootpath.pathname4_len; i++) {
10155 		path = utf8_to_str(&fs->rootpath.pathname4_val[i], &len, NULL);
10156 		if (path == NULL)
10157 			continue;
10158 		(void) strcat(symbuf, "/");
10159 		(void) strcat(symbuf, path);
10160 		npaths++;
10161 		kmem_free(path, len);
10162 	}
10163 
10164 	rfs4_free_fs_locations4(fsl);
10165 	kmem_free(fsl, sizeof (fs_locations4));
10166 
10167 	if (strsz != NULL)
10168 		*strsz = size;
10169 	return (symbuf);
10170 }
10171 
10172 /*
10173  * Check to see if we have a downrev Solaris client, so that we
10174  * can send it a symlink instead of a referral.
10175  */
10176 int
10177 client_is_downrev(struct svc_req *req)
10178 {
10179 	struct sockaddr *ca;
10180 	rfs4_clntip_t *ci;
10181 	bool_t create = FALSE;
10182 	int is_downrev;
10183 
10184 	ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
10185 	ASSERT(ca);
10186 	ci = rfs4_find_clntip(ca, &create);
10187 	if (ci == NULL)
10188 		return (0);
10189 	is_downrev = ci->ri_no_referrals;
10190 	rfs4_dbe_rele(ci->ri_dbe);
10191 	return (is_downrev);
10192 }
10193 
10194 /*
10195  * Do the main work of handling HA-NFSv4 Resource Group failover on
10196  * Sun Cluster.
10197  * We need to detect whether any RG admin paths have been added or removed,
10198  * and adjust resources accordingly.
10199  * Currently we're using a very inefficient algorithm, ~ 2 * O(n**2). In
10200  * order to scale, the list and array of paths need to be held in more
10201  * suitable data structures.
10202  */
10203 static void
10204 hanfsv4_failover(nfs4_srv_t *nsrv4)
10205 {
10206 	int i, start_grace, numadded_paths = 0;
10207 	char **added_paths = NULL;
10208 	rfs4_dss_path_t *dss_path;
10209 
10210 	/*
10211 	 * Note: currently, dss_pathlist cannot be NULL, since
10212 	 * it will always include an entry for NFS4_DSS_VAR_DIR. If we
10213 	 * make the latter dynamically specified too, the following will
10214 	 * need to be adjusted.
10215 	 */
10216 
10217 	/*
10218 	 * First, look for removed paths: RGs that have been failed-over
10219 	 * away from this node.
10220 	 * Walk the "currently-serving" dss_pathlist and, for each
10221 	 * path, check if it is on the "passed-in" rfs4_dss_newpaths array
10222 	 * from nfsd. If not, that RG path has been removed.
10223 	 *
10224 	 * Note that nfsd has sorted rfs4_dss_newpaths for us, and removed
10225 	 * any duplicates.
10226 	 */
10227 	dss_path = nsrv4->dss_pathlist;
10228 	do {
10229 		int found = 0;
10230 		char *path = dss_path->path;
10231 
10232 		/* used only for non-HA so may not be removed */
10233 		if (strcmp(path, NFS4_DSS_VAR_DIR) == 0) {
10234 			dss_path = dss_path->next;
10235 			continue;
10236 		}
10237 
10238 		for (i = 0; i < rfs4_dss_numnewpaths; i++) {
10239 			int cmpret;
10240 			char *newpath = rfs4_dss_newpaths[i];
10241 
10242 			/*
10243 			 * Since nfsd has sorted rfs4_dss_newpaths for us,
10244 			 * once the return from strcmp is negative we know
10245 			 * we've passed the point where "path" should be,
10246 			 * and can stop searching: "path" has been removed.
10247 			 */
10248 			cmpret = strcmp(path, newpath);
10249 			if (cmpret < 0)
10250 				break;
10251 			if (cmpret == 0) {
10252 				found = 1;
10253 				break;
10254 			}
10255 		}
10256 
10257 		if (found == 0) {
10258 			unsigned index = dss_path->index;
10259 			rfs4_servinst_t *sip = dss_path->sip;
10260 			rfs4_dss_path_t *path_next = dss_path->next;
10261 
10262 			/*
10263 			 * This path has been removed.
10264 			 * We must clear out the servinst reference to
10265 			 * it, since it's now owned by another
10266 			 * node: we should not attempt to touch it.
10267 			 */
10268 			ASSERT(dss_path == sip->dss_paths[index]);
10269 			sip->dss_paths[index] = NULL;
10270 
10271 			/* remove from "currently-serving" list, and destroy */
10272 			remque(dss_path);
10273 			/* allow for NUL */
10274 			kmem_free(dss_path->path, strlen(dss_path->path) + 1);
10275 			kmem_free(dss_path, sizeof (rfs4_dss_path_t));
10276 
10277 			dss_path = path_next;
10278 		} else {
10279 			/* path was found; not removed */
10280 			dss_path = dss_path->next;
10281 		}
10282 	} while (dss_path != nsrv4->dss_pathlist);
10283 
10284 	/*
10285 	 * Now, look for added paths: RGs that have been failed-over
10286 	 * to this node.
10287 	 * Walk the "passed-in" rfs4_dss_newpaths array from nfsd and,
10288 	 * for each path, check if it is on the "currently-serving"
10289 	 * dss_pathlist. If not, that RG path has been added.
10290 	 *
10291 	 * Note: we don't do duplicate detection here; nfsd does that for us.
10292 	 *
10293 	 * Note: numadded_paths <= rfs4_dss_numnewpaths, which gives us
10294 	 * an upper bound for the size needed for added_paths[numadded_paths].
10295 	 */
10296 
10297 	/* probably more space than we need, but guaranteed to be enough */
10298 	if (rfs4_dss_numnewpaths > 0) {
10299 		size_t sz = rfs4_dss_numnewpaths * sizeof (char *);
10300 		added_paths = kmem_zalloc(sz, KM_SLEEP);
10301 	}
10302 
10303 	/* walk the "passed-in" rfs4_dss_newpaths array from nfsd */
10304 	for (i = 0; i < rfs4_dss_numnewpaths; i++) {
10305 		int found = 0;
10306 		char *newpath = rfs4_dss_newpaths[i];
10307 
10308 		dss_path = nsrv4->dss_pathlist;
10309 		do {
10310 			char *path = dss_path->path;
10311 
10312 			/* used only for non-HA */
10313 			if (strcmp(path, NFS4_DSS_VAR_DIR) == 0) {
10314 				dss_path = dss_path->next;
10315 				continue;
10316 			}
10317 
10318 			if (strncmp(path, newpath, strlen(path)) == 0) {
10319 				found = 1;
10320 				break;
10321 			}
10322 
10323 			dss_path = dss_path->next;
10324 		} while (dss_path != nsrv4->dss_pathlist);
10325 
10326 		if (found == 0) {
10327 			added_paths[numadded_paths] = newpath;
10328 			numadded_paths++;
10329 		}
10330 	}
10331 
10332 	/* did we find any added paths? */
10333 	if (numadded_paths > 0) {
10334 
10335 		/* create a new server instance, and start its grace period */
10336 		start_grace = 1;
10337 		/* CSTYLED */
10338 		rfs4_servinst_create(nsrv4, start_grace, numadded_paths, added_paths);
10339 
10340 		/* read in the stable storage state from these paths */
10341 		rfs4_dss_readstate(nsrv4, numadded_paths, added_paths);
10342 
10343 		/*
10344 		 * Multiple failovers during a grace period will cause
10345 		 * clients of the same resource group to be partitioned
10346 		 * into different server instances, with different
10347 		 * grace periods.  Since clients of the same resource
10348 		 * group must be subject to the same grace period,
10349 		 * we need to reset all currently active grace periods.
10350 		 */
10351 		rfs4_grace_reset_all(nsrv4);
10352 	}
10353 
10354 	if (rfs4_dss_numnewpaths > 0)
10355 		kmem_free(added_paths, rfs4_dss_numnewpaths * sizeof (char *));
10356 }
10357