xref: /illumos-gate/usr/src/uts/common/fs/nfs/nfs4_srv.c (revision 4774dff6a15e3052e75fd8a0fdd519521be2db59)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
24  */
25 
26 /*
27  *	Copyright (c) 1983,1984,1985,1986,1987,1988,1989  AT&T.
28  *	All Rights Reserved
29  */
30 
31 /*
32  * Copyright (c) 2012, 2016 by Delphix. All rights reserved.
33  * Copyright 2019 Nexenta Systems, Inc.
34  * Copyright 2019 Nexenta by DDN, Inc.
35  * Copyright 2021 Racktop Systems, Inc.
36  */
37 
38 #include <sys/param.h>
39 #include <sys/types.h>
40 #include <sys/systm.h>
41 #include <sys/cred.h>
42 #include <sys/buf.h>
43 #include <sys/vfs.h>
44 #include <sys/vfs_opreg.h>
45 #include <sys/vnode.h>
46 #include <sys/uio.h>
47 #include <sys/errno.h>
48 #include <sys/sysmacros.h>
49 #include <sys/statvfs.h>
50 #include <sys/kmem.h>
51 #include <sys/dirent.h>
52 #include <sys/cmn_err.h>
53 #include <sys/debug.h>
54 #include <sys/systeminfo.h>
55 #include <sys/flock.h>
56 #include <sys/pathname.h>
57 #include <sys/nbmlock.h>
58 #include <sys/share.h>
59 #include <sys/atomic.h>
60 #include <sys/policy.h>
61 #include <sys/fem.h>
62 #include <sys/sdt.h>
63 #include <sys/ddi.h>
64 #include <sys/zone.h>
65 
66 #include <fs/fs_reparse.h>
67 
68 #include <rpc/types.h>
69 #include <rpc/auth.h>
70 #include <rpc/rpcsec_gss.h>
71 #include <rpc/svc.h>
72 
73 #include <nfs/nfs.h>
74 #include <nfs/nfssys.h>
75 #include <nfs/export.h>
76 #include <nfs/nfs_cmd.h>
77 #include <nfs/lm.h>
78 #include <nfs/nfs4.h>
79 #include <nfs/nfs4_drc.h>
80 
81 #include <sys/strsubr.h>
82 #include <sys/strsun.h>
83 
84 #include <inet/common.h>
85 #include <inet/ip.h>
86 #include <inet/ip6.h>
87 
88 #include <sys/tsol/label.h>
89 #include <sys/tsol/tndb.h>
90 
91 #define	RFS4_MAXLOCK_TRIES 4	/* Try to get the lock this many times */
92 static int rfs4_maxlock_tries = RFS4_MAXLOCK_TRIES;
93 #define	RFS4_LOCK_DELAY 10	/* Milliseconds */
94 static clock_t  rfs4_lock_delay = RFS4_LOCK_DELAY;
95 extern struct svc_ops rdma_svc_ops;
96 extern int nfs_loaned_buffers;
97 /* End of Tunables */
98 
99 static int rdma_setup_read_data4(READ4args *, READ4res *);
100 
101 /*
102  * Used to bump the stateid4.seqid value and show changes in the stateid
103  */
104 #define	next_stateid(sp) (++(sp)->bits.chgseq)
105 
106 /*
107  * RFS4_MINLEN_ENTRY4: XDR-encoded size of smallest possible dirent.
108  *	This is used to return NFS4ERR_TOOSMALL when clients specify
109  *	maxcount that isn't large enough to hold the smallest possible
110  *	XDR encoded dirent.
111  *
112  *	    sizeof cookie (8 bytes) +
113  *	    sizeof name_len (4 bytes) +
114  *	    sizeof smallest (padded) name (4 bytes) +
115  *	    sizeof bitmap4_len (12 bytes) +   NOTE: we always encode len=2 bm4
116  *	    sizeof attrlist4_len (4 bytes) +
117  *	    sizeof next boolean (4 bytes)
118  *
119  * RFS4_MINLEN_RDDIR4: XDR-encoded size of READDIR op reply containing
120  * the smallest possible entry4 (assumes no attrs requested).
121  *	sizeof nfsstat4 (4 bytes) +
122  *	sizeof verifier4 (8 bytes) +
123  *	sizeof entry4list bool (4 bytes) +
124  *	sizeof entry4 (36 bytes) +
125  *	sizeof eof bool (4 bytes)
126  *
127  * RFS4_MINLEN_RDDIR_BUF: minimum length of buffer server will provide to
128  *	VOP_READDIR.  Its value is the size of the maximum possible dirent
129  *	for solaris.  The DIRENT64_RECLEN macro returns	the size of dirent
130  *	required for a given name length.  MAXNAMELEN is the maximum
131  *	filename length allowed in Solaris.  The first two DIRENT64_RECLEN()
132  *	macros are to allow for . and .. entries -- just a minor tweak to try
133  *	and guarantee that buffer we give to VOP_READDIR will be large enough
134  *	to hold ., .., and the largest possible solaris dirent64.
135  */
136 #define	RFS4_MINLEN_ENTRY4 36
137 #define	RFS4_MINLEN_RDDIR4 (4 + NFS4_VERIFIER_SIZE + 4 + RFS4_MINLEN_ENTRY4 + 4)
138 #define	RFS4_MINLEN_RDDIR_BUF \
139 	(DIRENT64_RECLEN(1) + DIRENT64_RECLEN(2) + DIRENT64_RECLEN(MAXNAMELEN))
140 
141 /*
142  * It would be better to pad to 4 bytes since that's what XDR would do,
143  * but the dirents UFS gives us are already padded to 8, so just take
144  * what we're given.  Dircount is only a hint anyway.  Currently the
145  * solaris kernel is ASCII only, so there's no point in calling the
146  * UTF8 functions.
147  *
148  * dirent64: named padded to provide 8 byte struct alignment
149  *	d_ino(8) + d_off(8) + d_reclen(2) + d_name(namelen + null(1) + pad)
150  *
151  * cookie: uint64_t   +  utf8namelen: uint_t  +   utf8name padded to 8 bytes
152  *
153  */
154 #define	DIRENT64_TO_DIRCOUNT(dp) \
155 	(3 * BYTES_PER_XDR_UNIT + DIRENT64_NAMELEN((dp)->d_reclen))
156 
157 
158 static sysid_t		lockt_sysid;	/* dummy sysid for all LOCKT calls */
159 
160 u_longlong_t	nfs4_srv_caller_id;
161 uint_t		nfs4_srv_vkey = 0;
162 
163 void	rfs4_init_compound_state(struct compound_state *);
164 
165 static void	nullfree(caddr_t);
166 static void	rfs4_op_inval(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
167 		    struct compound_state *);
168 static void	rfs4_op_access(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
169 		    struct compound_state *);
170 static void	rfs4_op_close(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
171 		    struct compound_state *);
172 static void	rfs4_op_commit(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
173 		    struct compound_state *);
174 static void	rfs4_op_create(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
175 		    struct compound_state *);
176 static void	rfs4_op_delegreturn(nfs_argop4 *, nfs_resop4 *,
177 		    struct svc_req *, struct compound_state *);
178 static void	rfs4_op_delegpurge(nfs_argop4 *, nfs_resop4 *,
179 		    struct svc_req *, struct compound_state *);
180 static void	rfs4_op_getattr(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
181 		    struct compound_state *);
182 static void	rfs4_op_getattr_free(nfs_resop4 *);
183 static void	rfs4_op_getfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
184 		    struct compound_state *);
185 static void	rfs4_op_getfh_free(nfs_resop4 *);
186 static void	rfs4_op_illegal(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
187 		    struct compound_state *);
188 static void	rfs4_op_notsup(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
189 		    struct compound_state *);
190 static void	rfs4_op_link(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
191 		    struct compound_state *);
192 static void	rfs4_op_lock(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
193 		    struct compound_state *);
194 static void	lock_denied_free(nfs_resop4 *);
195 static void	rfs4_op_locku(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
196 		    struct compound_state *);
197 static void	rfs4_op_lockt(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
198 		    struct compound_state *);
199 static void	rfs4_op_lookup(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
200 		    struct compound_state *);
201 static void	rfs4_op_lookupp(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
202 		    struct compound_state *);
203 static void	rfs4_op_openattr(nfs_argop4 *argop, nfs_resop4 *resop,
204 		    struct svc_req *req, struct compound_state *cs);
205 static void	rfs4_op_nverify(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
206 		    struct compound_state *);
207 static void	rfs4_op_open(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
208 		    struct compound_state *);
209 static void	rfs4_op_open_confirm(nfs_argop4 *, nfs_resop4 *,
210 		    struct svc_req *, struct compound_state *);
211 static void	rfs4_op_open_downgrade(nfs_argop4 *, nfs_resop4 *,
212 		    struct svc_req *, struct compound_state *);
213 static void	rfs4_op_putfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
214 		    struct compound_state *);
215 static void	rfs4_op_putpubfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
216 		    struct compound_state *);
217 static void	rfs4_op_putrootfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
218 		    struct compound_state *);
219 static void	rfs4_op_read(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
220 		    struct compound_state *);
221 static void	rfs4_op_read_free(nfs_resop4 *);
222 static void	rfs4_op_readdir_free(nfs_resop4 *resop);
223 static void	rfs4_op_readlink(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
224 		    struct compound_state *);
225 static void	rfs4_op_readlink_free(nfs_resop4 *);
226 static void	rfs4_op_release_lockowner(nfs_argop4 *, nfs_resop4 *,
227 		    struct svc_req *, struct compound_state *);
228 static void	rfs4_op_remove(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
229 		    struct compound_state *);
230 static void	rfs4_op_rename(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
231 		    struct compound_state *);
232 static void	rfs4_op_renew(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
233 		    struct compound_state *);
234 static void	rfs4_op_restorefh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
235 		    struct compound_state *);
236 static void	rfs4_op_savefh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
237 		    struct compound_state *);
238 static void	rfs4_op_setattr(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
239 		    struct compound_state *);
240 static void	rfs4_op_verify(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
241 		    struct compound_state *);
242 static void	rfs4_op_write(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
243 		    struct compound_state *);
244 static void	rfs4_op_setclientid(nfs_argop4 *, nfs_resop4 *,
245 		    struct svc_req *, struct compound_state *);
246 static void	rfs4_op_setclientid_confirm(nfs_argop4 *, nfs_resop4 *,
247 		    struct svc_req *req, struct compound_state *);
248 static void	rfs4_op_secinfo(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
249 		    struct compound_state *);
250 static void	rfs4_op_secinfo_free(nfs_resop4 *);
251 
252 void rfs4x_op_exchange_id(nfs_argop4 *argop, nfs_resop4 *resop,
253     struct svc_req *req, struct compound_state *cs);
254 void rfs4x_exchange_id_free(nfs_resop4 *);
255 
256 void rfs4x_op_create_session(nfs_argop4 *argop, nfs_resop4 *resop,
257     struct svc_req *req, struct compound_state *cs);
258 
259 void rfs4x_op_destroy_session(nfs_argop4 *argop, nfs_resop4 *resop,
260     struct svc_req *req, compound_state_t *cs);
261 
262 void rfs4x_op_sequence(nfs_argop4 *argop, nfs_resop4 *resop,
263     struct svc_req *req, struct compound_state *cs);
264 
265 void rfs4x_op_reclaim_complete(nfs_argop4 *argop, nfs_resop4 *resop,
266     struct svc_req *req, compound_state_t *cs);
267 
268 void rfs4x_op_destroy_clientid(nfs_argop4 *argop, nfs_resop4 *resop,
269     struct svc_req *req, compound_state_t *cs);
270 
271 void rfs4x_op_bind_conn_to_session(nfs_argop4 *argop, nfs_resop4 *resop,
272     struct svc_req *req, compound_state_t *cs);
273 
274 void rfs4x_op_secinfo_noname(nfs_argop4 *argop, nfs_resop4 *resop,
275     struct svc_req *req, compound_state_t *cs);
276 
277 static nfsstat4 check_open_access(uint32_t, struct compound_state *,
278 		    struct svc_req *);
279 nfsstat4	rfs4_client_sysid(rfs4_client_t *, sysid_t *);
280 void		rfs4_ss_clid(nfs4_srv_t *, rfs4_client_t *);
281 
282 /*
283  * translation table for attrs
284  */
285 struct nfs4_ntov_table {
286 	union nfs4_attr_u *na;
287 	uint8_t amap[NFS4_MAXNUM_ATTRS];
288 	int attrcnt;
289 	bool_t vfsstat;
290 };
291 
292 static void	nfs4_ntov_table_init(struct nfs4_ntov_table *ntovp);
293 static void	nfs4_ntov_table_free(struct nfs4_ntov_table *ntovp,
294 		    struct nfs4_svgetit_arg *sargp);
295 
296 static nfsstat4	do_rfs4_set_attrs(bitmap4 *resp, fattr4 *fattrp,
297 		    struct compound_state *cs, struct nfs4_svgetit_arg *sargp,
298 		    struct nfs4_ntov_table *ntovp, nfs4_attr_cmd_t cmd);
299 
300 static void	hanfsv4_failover(nfs4_srv_t *);
301 
302 fem_t		*deleg_rdops;
303 fem_t		*deleg_wrops;
304 
305 /*
306  * NFS4 op dispatch table
307  */
308 
309 struct rfsv4disp {
310 	void	(*dis_proc)();		/* proc to call */
311 	void	(*dis_resfree)();	/* frees space allocated by proc */
312 	int	dis_flags;		/* RPC_IDEMPOTENT, etc... */
313 };
314 
315 static struct rfsv4disp rfsv4disptab[] = {
316 	/*
317 	 * NFS VERSION 4
318 	 */
319 
320 	/* RFS_NULL = 0 */
321 	{rfs4_op_illegal, nullfree, 0},
322 
323 	/* UNUSED = 1 */
324 	{rfs4_op_illegal, nullfree, 0},
325 
326 	/* UNUSED = 2 */
327 	{rfs4_op_illegal, nullfree, 0},
328 
329 	/* OP_ACCESS = 3 */
330 	{rfs4_op_access, nullfree, RPC_IDEMPOTENT},
331 
332 	/* OP_CLOSE = 4 */
333 	{rfs4_op_close, nullfree, 0},
334 
335 	/* OP_COMMIT = 5 */
336 	{rfs4_op_commit, nullfree, RPC_IDEMPOTENT},
337 
338 	/* OP_CREATE = 6 */
339 	{rfs4_op_create, nullfree, 0},
340 
341 	/* OP_DELEGPURGE = 7 */
342 	{rfs4_op_delegpurge, nullfree, 0},
343 
344 	/* OP_DELEGRETURN = 8 */
345 	{rfs4_op_delegreturn, nullfree, 0},
346 
347 	/* OP_GETATTR = 9 */
348 	{rfs4_op_getattr, rfs4_op_getattr_free, RPC_IDEMPOTENT},
349 
350 	/* OP_GETFH = 10 */
351 	{rfs4_op_getfh, rfs4_op_getfh_free, RPC_ALL},
352 
353 	/* OP_LINK = 11 */
354 	{rfs4_op_link, nullfree, 0},
355 
356 	/* OP_LOCK = 12 */
357 	{rfs4_op_lock, lock_denied_free, 0},
358 
359 	/* OP_LOCKT = 13 */
360 	{rfs4_op_lockt, lock_denied_free, 0},
361 
362 	/* OP_LOCKU = 14 */
363 	{rfs4_op_locku, nullfree, 0},
364 
365 	/* OP_LOOKUP = 15 */
366 	{rfs4_op_lookup, nullfree, (RPC_IDEMPOTENT | RPC_PUBLICFH_OK)},
367 
368 	/* OP_LOOKUPP = 16 */
369 	{rfs4_op_lookupp, nullfree, (RPC_IDEMPOTENT | RPC_PUBLICFH_OK)},
370 
371 	/* OP_NVERIFY = 17 */
372 	{rfs4_op_nverify, nullfree, RPC_IDEMPOTENT},
373 
374 	/* OP_OPEN = 18 */
375 	{rfs4_op_open, rfs4_free_reply, 0},
376 
377 	/* OP_OPENATTR = 19 */
378 	{rfs4_op_openattr, nullfree, 0},
379 
380 	/* OP_OPEN_CONFIRM = 20 */
381 	{rfs4_op_open_confirm, nullfree, 0},
382 
383 	/* OP_OPEN_DOWNGRADE = 21 */
384 	{rfs4_op_open_downgrade, nullfree, 0},
385 
386 	/* OP_OPEN_PUTFH = 22 */
387 	{rfs4_op_putfh, nullfree, RPC_ALL},
388 
389 	/* OP_PUTPUBFH = 23 */
390 	{rfs4_op_putpubfh, nullfree, RPC_ALL},
391 
392 	/* OP_PUTROOTFH = 24 */
393 	{rfs4_op_putrootfh, nullfree, RPC_ALL},
394 
395 	/* OP_READ = 25 */
396 	{rfs4_op_read, rfs4_op_read_free, RPC_IDEMPOTENT},
397 
398 	/* OP_READDIR = 26 */
399 	{rfs4_op_readdir, rfs4_op_readdir_free, RPC_IDEMPOTENT},
400 
401 	/* OP_READLINK = 27 */
402 	{rfs4_op_readlink, rfs4_op_readlink_free, RPC_IDEMPOTENT},
403 
404 	/* OP_REMOVE = 28 */
405 	{rfs4_op_remove, nullfree, 0},
406 
407 	/* OP_RENAME = 29 */
408 	{rfs4_op_rename, nullfree, 0},
409 
410 	/* OP_RENEW = 30 */
411 	{rfs4_op_renew, nullfree, 0},
412 
413 	/* OP_RESTOREFH = 31 */
414 	{rfs4_op_restorefh, nullfree, RPC_ALL},
415 
416 	/* OP_SAVEFH = 32 */
417 	{rfs4_op_savefh, nullfree, RPC_ALL},
418 
419 	/* OP_SECINFO = 33 */
420 	{rfs4_op_secinfo, rfs4_op_secinfo_free, 0},
421 
422 	/* OP_SETATTR = 34 */
423 	{rfs4_op_setattr, nullfree, 0},
424 
425 	/* OP_SETCLIENTID = 35 */
426 	{rfs4_op_setclientid, nullfree, 0},
427 
428 	/* OP_SETCLIENTID_CONFIRM = 36 */
429 	{rfs4_op_setclientid_confirm, nullfree, 0},
430 
431 	/* OP_VERIFY = 37 */
432 	{rfs4_op_verify, nullfree, RPC_IDEMPOTENT},
433 
434 	/* OP_WRITE = 38 */
435 	{rfs4_op_write, nullfree, 0},
436 
437 	/* OP_RELEASE_LOCKOWNER = 39 */
438 	{rfs4_op_release_lockowner, nullfree, 0},
439 
440 	/*
441 	 * NFSv4.1 operations
442 	 */
443 
444 	/* OP_BACKCHANNEL_CTL = 40 */
445 	{rfs4_op_notsup,  nullfree,  0},
446 
447 	/*  OP_BIND_CONN_TO_SESSION = 41 */
448 	{rfs4x_op_bind_conn_to_session,  nullfree,  0},
449 
450 	/* OP_EXCHANGE_ID  = 42 */
451 	{rfs4x_op_exchange_id,  rfs4x_exchange_id_free,  0},
452 
453 	/* OP_CREATE_SESSION = 43 */
454 	{rfs4x_op_create_session,  nullfree,  0},
455 
456 	/* OP_DESTROY_SESSION = 44 */
457 	{rfs4x_op_destroy_session,  nullfree,  0},
458 
459 	/* OP_FREE_STATEID = 45 */
460 	{rfs4_op_notsup,  nullfree,  0},
461 
462 	/* OP_GET_DIR_DELEGATION = 46 */
463 	{rfs4_op_notsup,  nullfree,  0},
464 
465 	/* OP_GETDEVICEINFO = 47 */
466 	{rfs4_op_notsup,  nullfree,  0},
467 
468 	/* OP_GETDEVICELIST = 48 */
469 	{rfs4_op_notsup,  nullfree,  0},
470 
471 	/* OP_LAYOUTCOMMIT = 49 */
472 	{rfs4_op_notsup,  nullfree,  0},
473 
474 	/* OP_LAYOUTGET = 50 */
475 	{rfs4_op_notsup,  nullfree,  0},
476 
477 	/* OP_LAYOUTRETURN = 51 */
478 	{rfs4_op_notsup,  nullfree,  0},
479 
480 	/* OP_SECINFO_NO_NAME = 52 */
481 	{rfs4x_op_secinfo_noname, rfs4_op_secinfo_free, 0},
482 
483 	/* OP_SEQUENCE = 53 */
484 	{rfs4x_op_sequence,  nullfree,  0},
485 
486 	/* OP_SET_SSV = 54 */
487 	{rfs4_op_notsup,  nullfree,  0},
488 
489 	/* OP_TEST_STATEID = 55 */
490 	{rfs4_op_notsup,  nullfree,  0},
491 
492 	/* OP_WANT_DELEGATION = 56 */
493 	{rfs4_op_notsup,  nullfree,  0},
494 
495 	/* OP_DESTROY_CLIENTID = 57 */
496 	{rfs4x_op_destroy_clientid,  nullfree,  0},
497 
498 	/* OP_RECLAIM_COMPLETE = 58 */
499 	{rfs4x_op_reclaim_complete,  nullfree,  0},
500 };
501 
502 static uint_t rfsv4disp_cnt = sizeof (rfsv4disptab) / sizeof (rfsv4disptab[0]);
503 
504 #define	OP_ILLEGAL_IDX (rfsv4disp_cnt)
505 
506 #ifdef DEBUG
507 
508 int		rfs4_fillone_debug = 0;
509 int		rfs4_no_stub_access = 1;
510 int		rfs4_rddir_debug = 0;
511 
512 static char    *rfs4_op_string[] = {
513 	"rfs4_op_null",
514 	"rfs4_op_1 unused",
515 	"rfs4_op_2 unused",
516 	"rfs4_op_access",
517 	"rfs4_op_close",
518 	"rfs4_op_commit",
519 	"rfs4_op_create",
520 	"rfs4_op_delegpurge",
521 	"rfs4_op_delegreturn",
522 	"rfs4_op_getattr",
523 	"rfs4_op_getfh",
524 	"rfs4_op_link",
525 	"rfs4_op_lock",
526 	"rfs4_op_lockt",
527 	"rfs4_op_locku",
528 	"rfs4_op_lookup",
529 	"rfs4_op_lookupp",
530 	"rfs4_op_nverify",
531 	"rfs4_op_open",
532 	"rfs4_op_openattr",
533 	"rfs4_op_open_confirm",
534 	"rfs4_op_open_downgrade",
535 	"rfs4_op_putfh",
536 	"rfs4_op_putpubfh",
537 	"rfs4_op_putrootfh",
538 	"rfs4_op_read",
539 	"rfs4_op_readdir",
540 	"rfs4_op_readlink",
541 	"rfs4_op_remove",
542 	"rfs4_op_rename",
543 	"rfs4_op_renew",
544 	"rfs4_op_restorefh",
545 	"rfs4_op_savefh",
546 	"rfs4_op_secinfo",
547 	"rfs4_op_setattr",
548 	"rfs4_op_setclientid",
549 	"rfs4_op_setclient_confirm",
550 	"rfs4_op_verify",
551 	"rfs4_op_write",
552 	"rfs4_op_release_lockowner",
553 	/* NFSv4.1 */
554 	"backchannel_ctl",
555 	"bind_conn_to_session",
556 	"exchange_id",
557 	"create_session",
558 	"destroy_session",
559 	"free_stateid",
560 	"get_dir_delegation",
561 	"getdeviceinfo",
562 	"getdevicelist",
563 	"layoutcommit",
564 	"layoutget",
565 	"layoutreturn",
566 	"secinfo_no_name",
567 	"sequence",
568 	"set_ssv",
569 	"test_stateid",
570 	"want_delegation",
571 	"destroy_clientid",
572 	"reclaim_complete",
573 	"rfs4_op_illegal"
574 };
575 
576 #endif
577 
578 void	rfs4_ss_chkclid(nfs4_srv_t *, rfs4_client_t *);
579 
580 extern size_t   strlcpy(char *dst, const char *src, size_t dstsize);
581 
582 extern void	rfs4_free_fs_locations4(fs_locations4 *);
583 
584 #ifdef	nextdp
585 #undef nextdp
586 #endif
587 #define	nextdp(dp)	((struct dirent64 *)((char *)(dp) + (dp)->d_reclen))
588 
589 static const fs_operation_def_t nfs4_rd_deleg_tmpl[] = {
590 	VOPNAME_OPEN,		{ .femop_open = deleg_rd_open },
591 	VOPNAME_WRITE,		{ .femop_write = deleg_rd_write },
592 	VOPNAME_SETATTR,	{ .femop_setattr = deleg_rd_setattr },
593 	VOPNAME_RWLOCK,		{ .femop_rwlock = deleg_rd_rwlock },
594 	VOPNAME_SPACE,		{ .femop_space = deleg_rd_space },
595 	VOPNAME_SETSECATTR,	{ .femop_setsecattr = deleg_rd_setsecattr },
596 	VOPNAME_VNEVENT,	{ .femop_vnevent = deleg_rd_vnevent },
597 	NULL,			NULL
598 };
599 static const fs_operation_def_t nfs4_wr_deleg_tmpl[] = {
600 	VOPNAME_OPEN,		{ .femop_open = deleg_wr_open },
601 	VOPNAME_READ,		{ .femop_read = deleg_wr_read },
602 	VOPNAME_WRITE,		{ .femop_write = deleg_wr_write },
603 	VOPNAME_SETATTR,	{ .femop_setattr = deleg_wr_setattr },
604 	VOPNAME_RWLOCK,		{ .femop_rwlock = deleg_wr_rwlock },
605 	VOPNAME_SPACE,		{ .femop_space = deleg_wr_space },
606 	VOPNAME_SETSECATTR,	{ .femop_setsecattr = deleg_wr_setsecattr },
607 	VOPNAME_VNEVENT,	{ .femop_vnevent = deleg_wr_vnevent },
608 	NULL,			NULL
609 };
610 
611 nfs4_srv_t *
612 nfs4_get_srv(void)
613 {
614 	nfs_globals_t *ng = nfs_srv_getzg();
615 	nfs4_srv_t *srv = ng->nfs4_srv;
616 	ASSERT(srv != NULL);
617 	return (srv);
618 }
619 
620 void
621 rfs4_srv_zone_init(nfs_globals_t *ng)
622 {
623 	nfs4_srv_t *nsrv4;
624 	timespec32_t verf;
625 
626 	nsrv4 = kmem_zalloc(sizeof (*nsrv4), KM_SLEEP);
627 
628 	/*
629 	 * The following algorithm attempts to find a unique verifier
630 	 * to be used as the write verifier returned from the server
631 	 * to the client.  It is important that this verifier change
632 	 * whenever the server reboots.  Of secondary importance, it
633 	 * is important for the verifier to be unique between two
634 	 * different servers.
635 	 *
636 	 * Thus, an attempt is made to use the system hostid and the
637 	 * current time in seconds when the nfssrv kernel module is
638 	 * loaded.  It is assumed that an NFS server will not be able
639 	 * to boot and then to reboot in less than a second.  If the
640 	 * hostid has not been set, then the current high resolution
641 	 * time is used.  This will ensure different verifiers each
642 	 * time the server reboots and minimize the chances that two
643 	 * different servers will have the same verifier.
644 	 * XXX - this is broken on LP64 kernels.
645 	 */
646 	verf.tv_sec = (time_t)zone_get_hostid(NULL);
647 	if (verf.tv_sec != 0) {
648 		verf.tv_nsec = gethrestime_sec();
649 	} else {
650 		timespec_t tverf;
651 
652 		gethrestime(&tverf);
653 		verf.tv_sec = (time_t)tverf.tv_sec;
654 		verf.tv_nsec = tverf.tv_nsec;
655 	}
656 	nsrv4->write4verf = *(uint64_t *)&verf;
657 
658 	/* Used to manage create/destroy of server state */
659 	nsrv4->nfs4_server_state = NULL;
660 	nsrv4->nfs4_cur_servinst = NULL;
661 	nsrv4->nfs4_deleg_policy = SRV_NEVER_DELEGATE;
662 	mutex_init(&nsrv4->deleg_lock, NULL, MUTEX_DEFAULT, NULL);
663 	mutex_init(&nsrv4->state_lock, NULL, MUTEX_DEFAULT, NULL);
664 	mutex_init(&nsrv4->servinst_lock, NULL, MUTEX_DEFAULT, NULL);
665 	rw_init(&nsrv4->deleg_policy_lock, NULL, RW_DEFAULT, NULL);
666 
667 	ng->nfs4_srv = nsrv4;
668 }
669 
670 void
671 rfs4_srv_zone_fini(nfs_globals_t *ng)
672 {
673 	nfs4_srv_t *nsrv4 = ng->nfs4_srv;
674 
675 	ng->nfs4_srv = NULL;
676 
677 	mutex_destroy(&nsrv4->deleg_lock);
678 	mutex_destroy(&nsrv4->state_lock);
679 	mutex_destroy(&nsrv4->servinst_lock);
680 	rw_destroy(&nsrv4->deleg_policy_lock);
681 
682 	kmem_free(nsrv4, sizeof (*nsrv4));
683 }
684 
685 void
686 rfs4_srvrinit(void)
687 {
688 	extern void rfs4_attr_init();
689 
690 	rfs4_attr_init();
691 
692 	if (fem_create("deleg_rdops", nfs4_rd_deleg_tmpl, &deleg_rdops) != 0) {
693 		rfs4_disable_delegation();
694 	} else if (fem_create("deleg_wrops", nfs4_wr_deleg_tmpl,
695 	    &deleg_wrops) != 0) {
696 		rfs4_disable_delegation();
697 		fem_free(deleg_rdops);
698 	}
699 
700 	nfs4_srv_caller_id = fs_new_caller_id();
701 	lockt_sysid = lm_alloc_sysidt();
702 	vsd_create(&nfs4_srv_vkey, NULL);
703 	rfs4_state_g_init();
704 }
705 
706 void
707 rfs4_srvrfini(void)
708 {
709 	if (lockt_sysid != LM_NOSYSID) {
710 		lm_free_sysidt(lockt_sysid);
711 		lockt_sysid = LM_NOSYSID;
712 	}
713 
714 	rfs4_state_g_fini();
715 
716 	fem_free(deleg_rdops);
717 	fem_free(deleg_wrops);
718 }
719 
720 void
721 rfs4_do_server_start(int server_upordown, int srv_delegation,
722     nfs4_minor_t nfs4_minor_max, int cluster_booted)
723 {
724 	nfs4_srv_t *nsrv4 = nfs4_get_srv();
725 
726 	/* Is this a warm start? */
727 	if (server_upordown == NFS_SERVER_QUIESCED) {
728 		cmn_err(CE_NOTE, "nfs4_srv: "
729 		    "server was previously quiesced; "
730 		    "existing NFSv4 state will be re-used");
731 
732 		/*
733 		 * HA-NFSv4: this is also the signal
734 		 * that a Resource Group failover has
735 		 * occurred.
736 		 */
737 		if (cluster_booted)
738 			hanfsv4_failover(nsrv4);
739 	} else {
740 		/* Cold start */
741 		nsrv4->rfs4_start_time = 0;
742 		rfs4_state_zone_init(nsrv4);
743 		nsrv4->nfs4_drc = rfs4_init_drc(nfs4_drc_max,
744 		    nfs4_drc_hash);
745 
746 		/*
747 		 * The nfsd service was started with the -s option
748 		 * we need to pull in any state from the paths indicated.
749 		 */
750 		if (curzone == global_zone && rfs4_dss_numnewpaths > 0) {
751 			/* read in the stable storage state from these paths */
752 			rfs4_dss_readstate(nsrv4, rfs4_dss_numnewpaths,
753 			    rfs4_dss_newpaths);
754 		}
755 	}
756 
757 	nsrv4->nfs4_minor_max = nfs4_minor_max;
758 
759 	/* Check if delegation is to be enabled */
760 	if (srv_delegation != FALSE)
761 		rfs4_set_deleg_policy(nsrv4, SRV_NORMAL_DELEGATE);
762 }
763 
764 void
765 rfs4_init_compound_state(struct compound_state *cs)
766 {
767 	bzero(cs, sizeof (*cs));
768 	cs->cont = TRUE;
769 	cs->access = CS_ACCESS_DENIED;
770 	cs->deleg = FALSE;
771 	cs->mandlock = FALSE;
772 	cs->fh.nfs_fh4_val = cs->fhbuf;
773 }
774 
775 /* Do cleanup of the compound_state */
776 void
777 rfs4_fini_compound_state(struct compound_state *cs)
778 {
779 	if (cs->vp) {
780 		VN_RELE(cs->vp);
781 	}
782 	if (cs->saved_vp) {
783 		VN_RELE(cs->saved_vp);
784 	}
785 	if (cs->cr) {
786 		crfree(cs->cr);
787 	}
788 	if (cs->saved_fh.nfs_fh4_val) {
789 		kmem_free(cs->saved_fh.nfs_fh4_val, NFS4_FHSIZE);
790 	}
791 	if (cs->sp) {
792 		rfs4x_session_rele(cs->sp);
793 	}
794 }
795 
796 void
797 rfs4_grace_start(rfs4_servinst_t *sip)
798 {
799 	rw_enter(&sip->rwlock, RW_WRITER);
800 	sip->start_time = nfs_sys_uptime();
801 	sip->grace_period = rfs4_grace_period;
802 	rw_exit(&sip->rwlock);
803 }
804 
805 /*
806  * returns true if the instance's grace period has never been started
807  */
808 int
809 rfs4_servinst_grace_new(rfs4_servinst_t *sip)
810 {
811 	time_t start_time;
812 
813 	rw_enter(&sip->rwlock, RW_READER);
814 	start_time = sip->start_time;
815 	rw_exit(&sip->rwlock);
816 
817 	return (start_time == 0);
818 }
819 
820 /*
821  * Indicates if server instance is within the
822  * grace period.
823  */
824 int
825 rfs4_servinst_in_grace(rfs4_servinst_t *sip)
826 {
827 	time_t grace_expiry;
828 
829 	/* All clients called reclaim-complete */
830 	if (sip->nreclaim == 0 || sip->grace_period == 0)
831 		return (0);
832 
833 	rw_enter(&sip->rwlock, RW_READER);
834 	grace_expiry = sip->start_time + sip->grace_period;
835 	rw_exit(&sip->rwlock);
836 
837 	if (nfs_sys_uptime() < grace_expiry)
838 		return (1);
839 
840 	/* Once grace period ends, optimize next calls */
841 	sip->grace_period = 0;
842 	return (0);
843 }
844 
845 int
846 rfs4_clnt_in_grace(rfs4_client_t *cp)
847 {
848 	ASSERT(rfs4_dbe_refcnt(cp->rc_dbe) > 0);
849 
850 	return (rfs4_servinst_in_grace(cp->rc_server_instance));
851 }
852 
853 /*
854  * reset all currently active grace periods
855  */
856 void
857 rfs4_grace_reset_all(nfs4_srv_t *nsrv4)
858 {
859 	rfs4_servinst_t *sip;
860 
861 	mutex_enter(&nsrv4->servinst_lock);
862 	for (sip = nsrv4->nfs4_cur_servinst; sip != NULL; sip = sip->prev)
863 		if (rfs4_servinst_in_grace(sip))
864 			rfs4_grace_start(sip);
865 	mutex_exit(&nsrv4->servinst_lock);
866 }
867 
868 /*
869  * start any new instances' grace periods
870  */
871 void
872 rfs4_grace_start_new(nfs4_srv_t *nsrv4)
873 {
874 	rfs4_servinst_t *sip;
875 
876 	mutex_enter(&nsrv4->servinst_lock);
877 	for (sip = nsrv4->nfs4_cur_servinst; sip != NULL; sip = sip->prev)
878 		if (rfs4_servinst_grace_new(sip))
879 			rfs4_grace_start(sip);
880 	mutex_exit(&nsrv4->servinst_lock);
881 }
882 
883 static rfs4_dss_path_t *
884 rfs4_dss_newpath(nfs4_srv_t *nsrv4, rfs4_servinst_t *sip,
885     char *path, unsigned index)
886 {
887 	size_t len;
888 	rfs4_dss_path_t *dss_path;
889 
890 	dss_path = kmem_alloc(sizeof (rfs4_dss_path_t), KM_SLEEP);
891 
892 	/*
893 	 * Take a copy of the string, since the original may be overwritten.
894 	 * Sadly, no strdup() in the kernel.
895 	 */
896 	/* allow for NUL */
897 	len = strlen(path) + 1;
898 	dss_path->path = kmem_alloc(len, KM_SLEEP);
899 	(void) strlcpy(dss_path->path, path, len);
900 
901 	/* associate with servinst */
902 	dss_path->sip = sip;
903 	dss_path->index = index;
904 
905 	/*
906 	 * Add to list of served paths.
907 	 * No locking required, as we're only ever called at startup.
908 	 */
909 	if (nsrv4->dss_pathlist == NULL) {
910 		/* this is the first dss_path_t */
911 
912 		/* needed for insque/remque */
913 		dss_path->next = dss_path->prev = dss_path;
914 
915 		nsrv4->dss_pathlist = dss_path;
916 	} else {
917 		insque(dss_path, nsrv4->dss_pathlist);
918 	}
919 
920 	return (dss_path);
921 }
922 
923 /*
924  * Create a new server instance, and make it the currently active instance.
925  * Note that starting the grace period too early will reduce the clients'
926  * recovery window.
927  */
928 void
929 rfs4_servinst_create(nfs4_srv_t *nsrv4, int start_grace,
930     int dss_npaths, char **dss_paths)
931 {
932 	unsigned i;
933 	rfs4_servinst_t *sip;
934 	rfs4_oldstate_t *oldstate;
935 
936 	sip = kmem_alloc(sizeof (rfs4_servinst_t), KM_SLEEP);
937 	rw_init(&sip->rwlock, NULL, RW_DEFAULT, NULL);
938 
939 	sip->nreclaim = 0;
940 	sip->start_time = (time_t)0;
941 	sip->grace_period = (time_t)0;
942 	sip->next = NULL;
943 	sip->prev = NULL;
944 
945 	rw_init(&sip->oldstate_lock, NULL, RW_DEFAULT, NULL);
946 	/*
947 	 * This initial dummy entry is required to setup for insque/remque.
948 	 * It must be skipped over whenever the list is traversed.
949 	 */
950 	oldstate = kmem_alloc(sizeof (rfs4_oldstate_t), KM_SLEEP);
951 	/* insque/remque require initial list entry to be self-terminated */
952 	oldstate->next = oldstate;
953 	oldstate->prev = oldstate;
954 	sip->oldstate = oldstate;
955 
956 
957 	sip->dss_npaths = dss_npaths;
958 	sip->dss_paths = kmem_alloc(dss_npaths *
959 	    sizeof (rfs4_dss_path_t *), KM_SLEEP);
960 
961 	for (i = 0; i < dss_npaths; i++) {
962 		sip->dss_paths[i] =
963 		    rfs4_dss_newpath(nsrv4, sip, dss_paths[i], i);
964 	}
965 
966 	mutex_enter(&nsrv4->servinst_lock);
967 	if (nsrv4->nfs4_cur_servinst != NULL) {
968 		/* add to linked list */
969 		sip->prev = nsrv4->nfs4_cur_servinst;
970 		nsrv4->nfs4_cur_servinst->next = sip;
971 	}
972 	if (start_grace)
973 		rfs4_grace_start(sip);
974 	/* make the new instance "current" */
975 	nsrv4->nfs4_cur_servinst = sip;
976 
977 	mutex_exit(&nsrv4->servinst_lock);
978 }
979 
980 /*
981  * In future, we might add a rfs4_servinst_destroy(sip) but, for now, destroy
982  * all instances directly.
983  */
984 void
985 rfs4_servinst_destroy_all(nfs4_srv_t *nsrv4)
986 {
987 	rfs4_servinst_t *sip, *prev, *current;
988 #ifdef DEBUG
989 	int n = 0;
990 #endif
991 
992 	mutex_enter(&nsrv4->servinst_lock);
993 	ASSERT(nsrv4->nfs4_cur_servinst != NULL);
994 	current = nsrv4->nfs4_cur_servinst;
995 	nsrv4->nfs4_cur_servinst = NULL;
996 	for (sip = current; sip != NULL; sip = prev) {
997 		prev = sip->prev;
998 		rw_destroy(&sip->rwlock);
999 		if (sip->oldstate)
1000 			kmem_free(sip->oldstate, sizeof (rfs4_oldstate_t));
1001 		if (sip->dss_paths) {
1002 			int i = sip->dss_npaths;
1003 
1004 			while (i > 0) {
1005 				i--;
1006 				if (sip->dss_paths[i] != NULL) {
1007 					char *path = sip->dss_paths[i]->path;
1008 
1009 					if (path != NULL) {
1010 						kmem_free(path,
1011 						    strlen(path) + 1);
1012 					}
1013 					kmem_free(sip->dss_paths[i],
1014 					    sizeof (rfs4_dss_path_t));
1015 				}
1016 			}
1017 			kmem_free(sip->dss_paths,
1018 			    sip->dss_npaths * sizeof (rfs4_dss_path_t *));
1019 		}
1020 		kmem_free(sip, sizeof (rfs4_servinst_t));
1021 #ifdef DEBUG
1022 		n++;
1023 #endif
1024 	}
1025 	mutex_exit(&nsrv4->servinst_lock);
1026 }
1027 
1028 /*
1029  * Assign the current server instance to a client_t.
1030  * Should be called with cp->rc_dbe held.
1031  */
1032 void
1033 rfs4_servinst_assign(nfs4_srv_t *nsrv4, rfs4_client_t *cp,
1034     rfs4_servinst_t *sip)
1035 {
1036 	ASSERT(rfs4_dbe_refcnt(cp->rc_dbe) > 0);
1037 
1038 	/*
1039 	 * The lock ensures that if the current instance is in the process
1040 	 * of changing, we will see the new one.
1041 	 */
1042 	mutex_enter(&nsrv4->servinst_lock);
1043 	cp->rc_server_instance = sip;
1044 	mutex_exit(&nsrv4->servinst_lock);
1045 }
1046 
1047 rfs4_servinst_t *
1048 rfs4_servinst(rfs4_client_t *cp)
1049 {
1050 	ASSERT(rfs4_dbe_refcnt(cp->rc_dbe) > 0);
1051 
1052 	return (cp->rc_server_instance);
1053 }
1054 
1055 /* ARGSUSED */
1056 static void
1057 nullfree(caddr_t resop)
1058 {
1059 }
1060 
1061 /*
1062  * This is a fall-through for invalid or not implemented (yet) ops
1063  */
1064 /* ARGSUSED */
1065 static void
1066 rfs4_op_inval(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1067     struct compound_state *cs)
1068 {
1069 	*cs->statusp = *((nfsstat4 *)&(resop)->nfs_resop4_u) = NFS4ERR_INVAL;
1070 }
1071 
1072 /*
1073  * Check if the security flavor, nfsnum, is in the flavor_list.
1074  */
1075 bool_t
1076 in_flavor_list(int nfsnum, int *flavor_list, int count)
1077 {
1078 	int i;
1079 
1080 	for (i = 0; i < count; i++) {
1081 		if (nfsnum == flavor_list[i])
1082 			return (TRUE);
1083 	}
1084 	return (FALSE);
1085 }
1086 
1087 /*
1088  * Used by rfs4_op_secinfo to get the security information from the
1089  * export structure associated with the component.
1090  */
1091 /* ARGSUSED */
1092 nfsstat4
1093 do_rfs4_op_secinfo(struct compound_state *cs, char *nm, SECINFO4res *resp)
1094 {
1095 	int error, different_export = 0;
1096 	vnode_t *dvp, *vp;
1097 	struct exportinfo *exi;
1098 	fid_t fid;
1099 	uint_t count, i;
1100 	secinfo4 *resok_val;
1101 	struct secinfo *secp;
1102 	seconfig_t *si;
1103 	bool_t did_traverse = FALSE;
1104 	int dotdot, walk;
1105 	nfs_export_t *ne = nfs_get_export();
1106 
1107 	dvp = cs->vp;
1108 	exi = cs->exi;
1109 	ASSERT(exi != NULL);
1110 	dotdot = (nm[0] == '.' && nm[1] == '.' && nm[2] == '\0');
1111 
1112 	/*
1113 	 * If dotdotting, then need to check whether it's above the
1114 	 * root of a filesystem, or above an export point.
1115 	 */
1116 	if (dotdot) {
1117 		vnode_t *zone_rootvp = ne->exi_root->exi_vp;
1118 
1119 		ASSERT3U(exi->exi_zoneid, ==, ne->exi_root->exi_zoneid);
1120 		/*
1121 		 * If dotdotting at the root of a filesystem, then
1122 		 * need to traverse back to the mounted-on filesystem
1123 		 * and do the dotdot lookup there.
1124 		 */
1125 		if ((dvp->v_flag & VROOT) || VN_CMP(dvp, zone_rootvp)) {
1126 
1127 			/*
1128 			 * If at the system root, then can
1129 			 * go up no further.
1130 			 */
1131 			if (VN_CMP(dvp, zone_rootvp))
1132 				return (puterrno4(ENOENT));
1133 
1134 			/*
1135 			 * Traverse back to the mounted-on filesystem
1136 			 */
1137 			dvp = untraverse(dvp, zone_rootvp);
1138 
1139 			/*
1140 			 * Set the different_export flag so we remember
1141 			 * to pick up a new exportinfo entry for
1142 			 * this new filesystem.
1143 			 */
1144 			different_export = 1;
1145 		} else {
1146 
1147 			/*
1148 			 * If dotdotting above an export point then set
1149 			 * the different_export to get new export info.
1150 			 */
1151 			different_export = nfs_exported(exi, dvp);
1152 		}
1153 	}
1154 
1155 	/*
1156 	 * Get the vnode for the component "nm".
1157 	 */
1158 	error = VOP_LOOKUP(dvp, nm, &vp, NULL, 0, NULL, cs->cr,
1159 	    NULL, NULL, NULL);
1160 	if (error)
1161 		return (puterrno4(error));
1162 
1163 	/*
1164 	 * If the vnode is in a pseudo filesystem, or if the security flavor
1165 	 * used in the request is valid but not an explicitly shared flavor,
1166 	 * or the access bit indicates that this is a limited access,
1167 	 * check whether this vnode is visible.
1168 	 */
1169 	if (!different_export &&
1170 	    (PSEUDO(exi) || !is_exported_sec(cs->nfsflavor, exi) ||
1171 	    cs->access & CS_ACCESS_LIMITED)) {
1172 		if (! nfs_visible(exi, vp, &different_export)) {
1173 			VN_RELE(vp);
1174 			return (puterrno4(ENOENT));
1175 		}
1176 	}
1177 
1178 	/*
1179 	 * If it's a mountpoint, then traverse it.
1180 	 */
1181 	if (vn_ismntpt(vp)) {
1182 		if ((error = traverse(&vp)) != 0) {
1183 			VN_RELE(vp);
1184 			return (puterrno4(error));
1185 		}
1186 		/* remember that we had to traverse mountpoint */
1187 		did_traverse = TRUE;
1188 		different_export = 1;
1189 	} else if (vp->v_vfsp != dvp->v_vfsp) {
1190 		/*
1191 		 * If vp isn't a mountpoint and the vfs ptrs aren't the same,
1192 		 * then vp is probably an LOFS object.  We don't need the
1193 		 * realvp, we just need to know that we might have crossed
1194 		 * a server fs boundary and need to call checkexport4.
1195 		 * (LOFS lookup hides server fs mountpoints, and actually calls
1196 		 * traverse)
1197 		 */
1198 		different_export = 1;
1199 	}
1200 
1201 	/*
1202 	 * Get the export information for it.
1203 	 */
1204 	if (different_export) {
1205 
1206 		bzero(&fid, sizeof (fid));
1207 		fid.fid_len = MAXFIDSZ;
1208 		error = vop_fid_pseudo(vp, &fid);
1209 		if (error) {
1210 			VN_RELE(vp);
1211 			return (puterrno4(error));
1212 		}
1213 
1214 		/* We'll need to reassign "exi". */
1215 		if (dotdot)
1216 			exi = nfs_vptoexi(NULL, vp, cs->cr, &walk, NULL, TRUE);
1217 		else
1218 			exi = checkexport4(&vp->v_vfsp->vfs_fsid, &fid, vp);
1219 
1220 		if (exi == NULL) {
1221 			if (did_traverse == TRUE) {
1222 				/*
1223 				 * If this vnode is a mounted-on vnode,
1224 				 * but the mounted-on file system is not
1225 				 * exported, send back the secinfo for
1226 				 * the exported node that the mounted-on
1227 				 * vnode lives in.
1228 				 */
1229 				exi = cs->exi;
1230 			} else {
1231 				VN_RELE(vp);
1232 				return (puterrno4(EACCES));
1233 			}
1234 		}
1235 	}
1236 	ASSERT(exi != NULL);
1237 
1238 
1239 	/*
1240 	 * Create the secinfo result based on the security information
1241 	 * from the exportinfo structure (exi).
1242 	 *
1243 	 * Return all flavors for a pseudo node.
1244 	 * For a real export node, return the flavor that the client
1245 	 * has access with.
1246 	 */
1247 	ASSERT(RW_LOCK_HELD(&ne->exported_lock));
1248 	if (PSEUDO(exi)) {
1249 		count = exi->exi_export.ex_seccnt; /* total sec count */
1250 		resok_val = kmem_alloc(count * sizeof (secinfo4), KM_SLEEP);
1251 		secp = exi->exi_export.ex_secinfo;
1252 
1253 		for (i = 0; i < count; i++) {
1254 			si = &secp[i].s_secinfo;
1255 			resok_val[i].flavor = si->sc_rpcnum;
1256 			if (resok_val[i].flavor == RPCSEC_GSS) {
1257 				rpcsec_gss_info *info;
1258 
1259 				info = &resok_val[i].flavor_info;
1260 				info->qop = si->sc_qop;
1261 				info->service = (rpc_gss_svc_t)si->sc_service;
1262 
1263 				/* get oid opaque data */
1264 				info->oid.sec_oid4_len =
1265 				    si->sc_gss_mech_type->length;
1266 				info->oid.sec_oid4_val = kmem_alloc(
1267 				    si->sc_gss_mech_type->length, KM_SLEEP);
1268 				bcopy(
1269 				    si->sc_gss_mech_type->elements,
1270 				    info->oid.sec_oid4_val,
1271 				    info->oid.sec_oid4_len);
1272 			}
1273 		}
1274 		resp->SECINFO4resok_len = count;
1275 		resp->SECINFO4resok_val = resok_val;
1276 	} else {
1277 		int ret_cnt = 0, k = 0;
1278 		int *flavor_list;
1279 
1280 		count = exi->exi_export.ex_seccnt; /* total sec count */
1281 		secp = exi->exi_export.ex_secinfo;
1282 
1283 		flavor_list = kmem_alloc(count * sizeof (int), KM_SLEEP);
1284 		/* find out which flavors to return */
1285 		for (i = 0; i < count; i ++) {
1286 			int access, flavor, perm;
1287 
1288 			flavor = secp[i].s_secinfo.sc_nfsnum;
1289 			perm = secp[i].s_flags;
1290 
1291 			access = nfsauth4_secinfo_access(exi, cs->req,
1292 			    flavor, perm, cs->basecr);
1293 
1294 			if (! (access & NFSAUTH_DENIED) &&
1295 			    ! (access & NFSAUTH_WRONGSEC)) {
1296 				flavor_list[ret_cnt] = flavor;
1297 				ret_cnt++;
1298 			}
1299 		}
1300 
1301 		/* Create the returning SECINFO value */
1302 		resok_val = kmem_alloc(ret_cnt * sizeof (secinfo4), KM_SLEEP);
1303 
1304 		for (i = 0; i < count; i++) {
1305 			/*
1306 			 * If the flavor is in the flavor list,
1307 			 * fill in resok_val.
1308 			 */
1309 			si = &secp[i].s_secinfo;
1310 			if (in_flavor_list(si->sc_nfsnum,
1311 			    flavor_list, ret_cnt)) {
1312 				resok_val[k].flavor = si->sc_rpcnum;
1313 				if (resok_val[k].flavor == RPCSEC_GSS) {
1314 					rpcsec_gss_info *info;
1315 
1316 					info = &resok_val[k].flavor_info;
1317 					info->qop = si->sc_qop;
1318 					info->service = (rpc_gss_svc_t)
1319 					    si->sc_service;
1320 
1321 					/* get oid opaque data */
1322 					info->oid.sec_oid4_len =
1323 					    si->sc_gss_mech_type->length;
1324 					info->oid.sec_oid4_val = kmem_alloc(
1325 					    si->sc_gss_mech_type->length,
1326 					    KM_SLEEP);
1327 					bcopy(si->sc_gss_mech_type->elements,
1328 					    info->oid.sec_oid4_val,
1329 					    info->oid.sec_oid4_len);
1330 				}
1331 				k++;
1332 			}
1333 			if (k >= ret_cnt)
1334 				break;
1335 		}
1336 		resp->SECINFO4resok_len = ret_cnt;
1337 		resp->SECINFO4resok_val = resok_val;
1338 		kmem_free(flavor_list, count * sizeof (int));
1339 	}
1340 
1341 	VN_RELE(vp);
1342 	return (NFS4_OK);
1343 }
1344 
1345 /*
1346  * SECINFO (Operation 33): Obtain required security information on
1347  * the component name in the format of (security-mechanism-oid, qop, service)
1348  * triplets.
1349  */
1350 /* ARGSUSED */
1351 static void
1352 rfs4_op_secinfo(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1353     struct compound_state *cs)
1354 {
1355 	SECINFO4args *args = &argop->nfs_argop4_u.opsecinfo;
1356 	SECINFO4res *resp = &resop->nfs_resop4_u.opsecinfo;
1357 	utf8string *utfnm = &args->name;
1358 	uint_t len;
1359 	char *nm;
1360 	struct sockaddr *ca;
1361 	char *name = NULL;
1362 	nfsstat4 status = NFS4_OK;
1363 
1364 	DTRACE_NFSV4_2(op__secinfo__start, struct compound_state *, cs,
1365 	    SECINFO4args *, args);
1366 
1367 	/*
1368 	 * Current file handle (cfh) should have been set before getting
1369 	 * into this function. If not, return error.
1370 	 */
1371 	if (cs->vp == NULL) {
1372 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
1373 		goto out;
1374 	}
1375 
1376 	if (cs->vp->v_type != VDIR) {
1377 		*cs->statusp = resp->status = NFS4ERR_NOTDIR;
1378 		goto out;
1379 	}
1380 
1381 	/*
1382 	 * Verify the component name. If failed, error out, but
1383 	 * do not error out if the component name is a "..".
1384 	 * SECINFO will return its parents secinfo data for SECINFO "..".
1385 	 */
1386 	status = utf8_dir_verify(utfnm);
1387 	if (status != NFS4_OK) {
1388 		if (utfnm->utf8string_len != 2 ||
1389 		    utfnm->utf8string_val[0] != '.' ||
1390 		    utfnm->utf8string_val[1] != '.') {
1391 			*cs->statusp = resp->status = status;
1392 			goto out;
1393 		}
1394 	}
1395 
1396 	nm = utf8_to_str(utfnm, &len, NULL);
1397 	if (nm == NULL) {
1398 		*cs->statusp = resp->status = NFS4ERR_INVAL;
1399 		goto out;
1400 	}
1401 
1402 	if (len > MAXNAMELEN) {
1403 		*cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
1404 		kmem_free(nm, len);
1405 		goto out;
1406 	}
1407 
1408 	ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
1409 	name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
1410 	    MAXPATHLEN  + 1);
1411 
1412 	if (name == NULL) {
1413 		*cs->statusp = resp->status = NFS4ERR_INVAL;
1414 		kmem_free(nm, len);
1415 		goto out;
1416 	}
1417 
1418 	*cs->statusp = resp->status = do_rfs4_op_secinfo(cs, name, resp);
1419 
1420 	if (resp->status == NFS4_OK && rfs4_has_session(cs)) {
1421 		/*
1422 		 * See rfc 5661 section 2.6.3.1.1.8 and 18.29.3
1423 		 *
1424 		 * 2.6.3.1.1.8
1425 		 *	SECINFO and SECINFO_NO_NAME consume the current
1426 		 *	filehandle (note that this is a change from NFSv4.0).
1427 		 *
1428 		 * 18.29.3
1429 		 *	On success, the current filehandle is consumed (see
1430 		 *	Section 2.6.3.1.1.8), and if the next operation after
1431 		 *	SECINFO tries to use the current filehandle, that
1432 		 *	operation will fail with the status
1433 		 *	NFS4ERR_NOFILEHANDLE.
1434 		 */
1435 		VN_RELE(cs->vp);
1436 		cs->vp = NULL;
1437 	}
1438 
1439 	if (name != nm)
1440 		kmem_free(name, MAXPATHLEN + 1);
1441 	kmem_free(nm, len);
1442 
1443 out:
1444 	DTRACE_NFSV4_2(op__secinfo__done, struct compound_state *, cs,
1445 	    SECINFO4res *, resp);
1446 }
1447 
1448 /*
1449  * Free SECINFO result.
1450  */
1451 /* ARGSUSED */
1452 static void
1453 rfs4_op_secinfo_free(nfs_resop4 *resop)
1454 {
1455 	SECINFO4res *resp = &resop->nfs_resop4_u.opsecinfo;
1456 	int count, i;
1457 	secinfo4 *resok_val;
1458 
1459 	/* If this is not an Ok result, nothing to free. */
1460 	if (resp->status != NFS4_OK) {
1461 		return;
1462 	}
1463 
1464 	count = resp->SECINFO4resok_len;
1465 	resok_val = resp->SECINFO4resok_val;
1466 
1467 	for (i = 0; i < count; i++) {
1468 		if (resok_val[i].flavor == RPCSEC_GSS) {
1469 			rpcsec_gss_info *info;
1470 
1471 			info = &resok_val[i].flavor_info;
1472 			kmem_free(info->oid.sec_oid4_val,
1473 			    info->oid.sec_oid4_len);
1474 		}
1475 	}
1476 	kmem_free(resok_val, count * sizeof (secinfo4));
1477 	resp->SECINFO4resok_len = 0;
1478 	resp->SECINFO4resok_val = NULL;
1479 }
1480 
1481 /* ARGSUSED */
1482 static void
1483 rfs4_op_access(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1484     struct compound_state *cs)
1485 {
1486 	ACCESS4args *args = &argop->nfs_argop4_u.opaccess;
1487 	ACCESS4res *resp = &resop->nfs_resop4_u.opaccess;
1488 	int error;
1489 	vnode_t *vp;
1490 	struct vattr va;
1491 	int checkwriteperm;
1492 	cred_t *cr = cs->cr;
1493 	bslabel_t *clabel, *slabel;
1494 	ts_label_t *tslabel;
1495 	boolean_t admin_low_client;
1496 
1497 	DTRACE_NFSV4_2(op__access__start, struct compound_state *, cs,
1498 	    ACCESS4args *, args);
1499 
1500 #if 0	/* XXX allow access even if !cs->access. Eventually only pseudo fs */
1501 	if (cs->access == CS_ACCESS_DENIED) {
1502 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
1503 		goto out;
1504 	}
1505 #endif
1506 	if (cs->vp == NULL) {
1507 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
1508 		goto out;
1509 	}
1510 
1511 	ASSERT(cr != NULL);
1512 
1513 	vp = cs->vp;
1514 
1515 	/*
1516 	 * If the file system is exported read only, it is not appropriate
1517 	 * to check write permissions for regular files and directories.
1518 	 * Special files are interpreted by the client, so the underlying
1519 	 * permissions are sent back to the client for interpretation.
1520 	 */
1521 	if (rdonly4(req, cs) &&
1522 	    (vp->v_type == VREG || vp->v_type == VDIR))
1523 		checkwriteperm = 0;
1524 	else
1525 		checkwriteperm = 1;
1526 
1527 	/*
1528 	 * XXX
1529 	 * We need the mode so that we can correctly determine access
1530 	 * permissions relative to a mandatory lock file.  Access to
1531 	 * mandatory lock files is denied on the server, so it might
1532 	 * as well be reflected to the server during the open.
1533 	 */
1534 	va.va_mask = AT_MODE;
1535 	error = VOP_GETATTR(vp, &va, 0, cr, NULL);
1536 	if (error) {
1537 		*cs->statusp = resp->status = puterrno4(error);
1538 		goto out;
1539 	}
1540 	resp->access = 0;
1541 	resp->supported = 0;
1542 
1543 	if (is_system_labeled()) {
1544 		ASSERT(req->rq_label != NULL);
1545 		clabel = req->rq_label;
1546 		DTRACE_PROBE2(tx__rfs4__log__info__opaccess__clabel, char *,
1547 		    "got client label from request(1)",
1548 		    struct svc_req *, req);
1549 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
1550 			if ((tslabel = nfs_getflabel(vp, cs->exi)) == NULL) {
1551 				*cs->statusp = resp->status = puterrno4(EACCES);
1552 				goto out;
1553 			}
1554 			slabel = label2bslabel(tslabel);
1555 			DTRACE_PROBE3(tx__rfs4__log__info__opaccess__slabel,
1556 			    char *, "got server label(1) for vp(2)",
1557 			    bslabel_t *, slabel, vnode_t *, vp);
1558 
1559 			admin_low_client = B_FALSE;
1560 		} else
1561 			admin_low_client = B_TRUE;
1562 	}
1563 
1564 	if (args->access & ACCESS4_READ) {
1565 		error = VOP_ACCESS(vp, VREAD, 0, cr, NULL);
1566 		if (!error && !MANDLOCK(vp, va.va_mode) &&
1567 		    (!is_system_labeled() || admin_low_client ||
1568 		    bldominates(clabel, slabel)))
1569 			resp->access |= ACCESS4_READ;
1570 		resp->supported |= ACCESS4_READ;
1571 	}
1572 	if ((args->access & ACCESS4_LOOKUP) && vp->v_type == VDIR) {
1573 		error = VOP_ACCESS(vp, VEXEC, 0, cr, NULL);
1574 		if (!error && (!is_system_labeled() || admin_low_client ||
1575 		    bldominates(clabel, slabel)))
1576 			resp->access |= ACCESS4_LOOKUP;
1577 		resp->supported |= ACCESS4_LOOKUP;
1578 	}
1579 	if (checkwriteperm &&
1580 	    (args->access & (ACCESS4_MODIFY|ACCESS4_EXTEND))) {
1581 		error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL);
1582 		if (!error && !MANDLOCK(vp, va.va_mode) &&
1583 		    (!is_system_labeled() || admin_low_client ||
1584 		    blequal(clabel, slabel)))
1585 			resp->access |=
1586 			    (args->access & (ACCESS4_MODIFY | ACCESS4_EXTEND));
1587 		resp->supported |=
1588 		    resp->access & (ACCESS4_MODIFY | ACCESS4_EXTEND);
1589 	}
1590 
1591 	if (checkwriteperm &&
1592 	    (args->access & ACCESS4_DELETE) && vp->v_type == VDIR) {
1593 		error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL);
1594 		if (!error && (!is_system_labeled() || admin_low_client ||
1595 		    blequal(clabel, slabel)))
1596 			resp->access |= ACCESS4_DELETE;
1597 		resp->supported |= ACCESS4_DELETE;
1598 	}
1599 	if (args->access & ACCESS4_EXECUTE && vp->v_type != VDIR) {
1600 		error = VOP_ACCESS(vp, VEXEC, 0, cr, NULL);
1601 		if (!error && !MANDLOCK(vp, va.va_mode) &&
1602 		    (!is_system_labeled() || admin_low_client ||
1603 		    bldominates(clabel, slabel)))
1604 			resp->access |= ACCESS4_EXECUTE;
1605 		resp->supported |= ACCESS4_EXECUTE;
1606 	}
1607 
1608 	if (is_system_labeled() && !admin_low_client)
1609 		label_rele(tslabel);
1610 
1611 	*cs->statusp = resp->status = NFS4_OK;
1612 out:
1613 	DTRACE_NFSV4_2(op__access__done, struct compound_state *, cs,
1614 	    ACCESS4res *, resp);
1615 }
1616 
1617 /* ARGSUSED */
1618 static void
1619 rfs4_op_commit(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1620     struct compound_state *cs)
1621 {
1622 	COMMIT4args *args = &argop->nfs_argop4_u.opcommit;
1623 	COMMIT4res *resp = &resop->nfs_resop4_u.opcommit;
1624 	int error;
1625 	vnode_t *vp = cs->vp;
1626 	cred_t *cr = cs->cr;
1627 	vattr_t va;
1628 	nfs4_srv_t *nsrv4;
1629 
1630 	DTRACE_NFSV4_2(op__commit__start, struct compound_state *, cs,
1631 	    COMMIT4args *, args);
1632 
1633 	if (vp == NULL) {
1634 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
1635 		goto out;
1636 	}
1637 	if (cs->access == CS_ACCESS_DENIED) {
1638 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
1639 		goto out;
1640 	}
1641 
1642 	if (args->offset + args->count < args->offset) {
1643 		*cs->statusp = resp->status = NFS4ERR_INVAL;
1644 		goto out;
1645 	}
1646 
1647 	va.va_mask = AT_UID;
1648 	error = VOP_GETATTR(vp, &va, 0, cr, NULL);
1649 
1650 	/*
1651 	 * If we can't get the attributes, then we can't do the
1652 	 * right access checking.  So, we'll fail the request.
1653 	 */
1654 	if (error) {
1655 		*cs->statusp = resp->status = puterrno4(error);
1656 		goto out;
1657 	}
1658 	if (rdonly4(req, cs)) {
1659 		*cs->statusp = resp->status = NFS4ERR_ROFS;
1660 		goto out;
1661 	}
1662 
1663 	if (vp->v_type != VREG) {
1664 		if (vp->v_type == VDIR)
1665 			resp->status = NFS4ERR_ISDIR;
1666 		else
1667 			resp->status = NFS4ERR_INVAL;
1668 		*cs->statusp = resp->status;
1669 		goto out;
1670 	}
1671 
1672 	if (crgetuid(cr) != va.va_uid &&
1673 	    (error = VOP_ACCESS(vp, VWRITE, 0, cs->cr, NULL))) {
1674 		*cs->statusp = resp->status = puterrno4(error);
1675 		goto out;
1676 	}
1677 
1678 	error = VOP_FSYNC(vp, FSYNC, cr, NULL);
1679 
1680 	if (error) {
1681 		*cs->statusp = resp->status = puterrno4(error);
1682 		goto out;
1683 	}
1684 
1685 	nsrv4 = nfs4_get_srv();
1686 	*cs->statusp = resp->status = NFS4_OK;
1687 	resp->writeverf = nsrv4->write4verf;
1688 out:
1689 	DTRACE_NFSV4_2(op__commit__done, struct compound_state *, cs,
1690 	    COMMIT4res *, resp);
1691 }
1692 
1693 /*
1694  * rfs4_op_mknod is called from rfs4_op_create after all initial verification
1695  * was completed. It does the nfsv4 create for special files.
1696  */
1697 /* ARGSUSED */
1698 static vnode_t *
1699 do_rfs4_op_mknod(CREATE4args *args, CREATE4res *resp, struct svc_req *req,
1700     struct compound_state *cs, vattr_t *vap, char *nm)
1701 {
1702 	int error;
1703 	cred_t *cr = cs->cr;
1704 	vnode_t *dvp = cs->vp;
1705 	vnode_t *vp = NULL;
1706 	int mode;
1707 	enum vcexcl excl;
1708 
1709 	switch (args->type) {
1710 	case NF4CHR:
1711 	case NF4BLK:
1712 		if (secpolicy_sys_devices(cr) != 0) {
1713 			*cs->statusp = resp->status = NFS4ERR_PERM;
1714 			return (NULL);
1715 		}
1716 		if (args->type == NF4CHR)
1717 			vap->va_type = VCHR;
1718 		else
1719 			vap->va_type = VBLK;
1720 		vap->va_rdev = makedevice(args->ftype4_u.devdata.specdata1,
1721 		    args->ftype4_u.devdata.specdata2);
1722 		vap->va_mask |= AT_RDEV;
1723 		break;
1724 	case NF4SOCK:
1725 		vap->va_type = VSOCK;
1726 		break;
1727 	case NF4FIFO:
1728 		vap->va_type = VFIFO;
1729 		break;
1730 	default:
1731 		*cs->statusp = resp->status = NFS4ERR_BADTYPE;
1732 		return (NULL);
1733 	}
1734 
1735 	/*
1736 	 * Must specify the mode.
1737 	 */
1738 	if (!(vap->va_mask & AT_MODE)) {
1739 		*cs->statusp = resp->status = NFS4ERR_INVAL;
1740 		return (NULL);
1741 	}
1742 
1743 	excl = EXCL;
1744 
1745 	mode = 0;
1746 
1747 	error = VOP_CREATE(dvp, nm, vap, excl, mode, &vp, cr, 0, NULL, NULL);
1748 	if (error) {
1749 		*cs->statusp = resp->status = puterrno4(error);
1750 		return (NULL);
1751 	}
1752 	return (vp);
1753 }
1754 
1755 /*
1756  * nfsv4 create is used to create non-regular files. For regular files,
1757  * use nfsv4 open.
1758  */
1759 /* ARGSUSED */
1760 static void
1761 rfs4_op_create(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1762     struct compound_state *cs)
1763 {
1764 	CREATE4args *args = &argop->nfs_argop4_u.opcreate;
1765 	CREATE4res *resp = &resop->nfs_resop4_u.opcreate;
1766 	int error;
1767 	struct vattr bva, iva, iva2, ava, *vap;
1768 	cred_t *cr = cs->cr;
1769 	vnode_t *dvp = cs->vp;
1770 	vnode_t *vp = NULL;
1771 	vnode_t *realvp;
1772 	char *nm, *lnm;
1773 	uint_t len, llen;
1774 	int syncval = 0;
1775 	struct nfs4_svgetit_arg sarg;
1776 	struct nfs4_ntov_table ntov;
1777 	struct statvfs64 sb;
1778 	nfsstat4 status;
1779 	struct sockaddr *ca;
1780 	char *name = NULL;
1781 	char *lname = NULL;
1782 
1783 	DTRACE_NFSV4_2(op__create__start, struct compound_state *, cs,
1784 	    CREATE4args *, args);
1785 
1786 	resp->attrset = 0;
1787 
1788 	if (dvp == NULL) {
1789 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
1790 		goto out;
1791 	}
1792 
1793 	/*
1794 	 * If there is an unshared filesystem mounted on this vnode,
1795 	 * do not allow to create an object in this directory.
1796 	 */
1797 	if (vn_ismntpt(dvp)) {
1798 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
1799 		goto out;
1800 	}
1801 
1802 	/* Verify that type is correct */
1803 	switch (args->type) {
1804 	case NF4LNK:
1805 	case NF4BLK:
1806 	case NF4CHR:
1807 	case NF4SOCK:
1808 	case NF4FIFO:
1809 	case NF4DIR:
1810 		break;
1811 	default:
1812 		*cs->statusp = resp->status = NFS4ERR_BADTYPE;
1813 		goto out;
1814 	};
1815 
1816 	if (cs->access == CS_ACCESS_DENIED) {
1817 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
1818 		goto out;
1819 	}
1820 	if (dvp->v_type != VDIR) {
1821 		*cs->statusp = resp->status = NFS4ERR_NOTDIR;
1822 		goto out;
1823 	}
1824 	status = utf8_dir_verify(&args->objname);
1825 	if (status != NFS4_OK) {
1826 		*cs->statusp = resp->status = status;
1827 		goto out;
1828 	}
1829 
1830 	if (rdonly4(req, cs)) {
1831 		*cs->statusp = resp->status = NFS4ERR_ROFS;
1832 		goto out;
1833 	}
1834 
1835 	/*
1836 	 * Name of newly created object
1837 	 */
1838 	nm = utf8_to_fn(&args->objname, &len, NULL);
1839 	if (nm == NULL) {
1840 		*cs->statusp = resp->status = NFS4ERR_INVAL;
1841 		goto out;
1842 	}
1843 
1844 	if (len > MAXNAMELEN) {
1845 		*cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
1846 		kmem_free(nm, len);
1847 		goto out;
1848 	}
1849 
1850 	ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
1851 	name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
1852 	    MAXPATHLEN  + 1);
1853 
1854 	if (name == NULL) {
1855 		*cs->statusp = resp->status = NFS4ERR_INVAL;
1856 		kmem_free(nm, len);
1857 		goto out;
1858 	}
1859 
1860 	resp->attrset = 0;
1861 
1862 	sarg.sbp = &sb;
1863 	sarg.is_referral = B_FALSE;
1864 	nfs4_ntov_table_init(&ntov);
1865 
1866 	status = do_rfs4_set_attrs(&resp->attrset,
1867 	    &args->createattrs, cs, &sarg, &ntov, NFS4ATTR_SETIT);
1868 
1869 	if (sarg.vap->va_mask == 0 && status == NFS4_OK)
1870 		status = NFS4ERR_INVAL;
1871 
1872 	if (status != NFS4_OK) {
1873 		*cs->statusp = resp->status = status;
1874 		if (name != nm)
1875 			kmem_free(name, MAXPATHLEN + 1);
1876 		kmem_free(nm, len);
1877 		nfs4_ntov_table_free(&ntov, &sarg);
1878 		resp->attrset = 0;
1879 		goto out;
1880 	}
1881 
1882 	/* Get "before" change value */
1883 	bva.va_mask = AT_CTIME|AT_SEQ|AT_MODE;
1884 	error = VOP_GETATTR(dvp, &bva, 0, cr, NULL);
1885 	if (error) {
1886 		*cs->statusp = resp->status = puterrno4(error);
1887 		if (name != nm)
1888 			kmem_free(name, MAXPATHLEN + 1);
1889 		kmem_free(nm, len);
1890 		nfs4_ntov_table_free(&ntov, &sarg);
1891 		resp->attrset = 0;
1892 		goto out;
1893 	}
1894 	NFS4_SET_FATTR4_CHANGE(resp->cinfo.before, bva.va_ctime)
1895 
1896 	vap = sarg.vap;
1897 
1898 	/*
1899 	 * Set the default initial values for attributes when the parent
1900 	 * directory does not have the VSUID/VSGID bit set and they have
1901 	 * not been specified in createattrs.
1902 	 */
1903 	if (!(bva.va_mode & VSUID) && (vap->va_mask & AT_UID) == 0) {
1904 		vap->va_uid = crgetuid(cr);
1905 		vap->va_mask |= AT_UID;
1906 	}
1907 	if (!(bva.va_mode & VSGID) && (vap->va_mask & AT_GID) == 0) {
1908 		vap->va_gid = crgetgid(cr);
1909 		vap->va_mask |= AT_GID;
1910 	}
1911 
1912 	vap->va_mask |= AT_TYPE;
1913 	switch (args->type) {
1914 	case NF4DIR:
1915 		vap->va_type = VDIR;
1916 		if ((vap->va_mask & AT_MODE) == 0) {
1917 			vap->va_mode = 0700;	/* default: owner rwx only */
1918 			vap->va_mask |= AT_MODE;
1919 		}
1920 		error = VOP_MKDIR(dvp, name, vap, &vp, cr, NULL, 0, NULL);
1921 		if (error)
1922 			break;
1923 
1924 		/*
1925 		 * Get the initial "after" sequence number, if it fails,
1926 		 * set to zero
1927 		 */
1928 		iva.va_mask = AT_SEQ;
1929 		if (VOP_GETATTR(dvp, &iva, 0, cs->cr, NULL))
1930 			iva.va_seq = 0;
1931 		break;
1932 	case NF4LNK:
1933 		vap->va_type = VLNK;
1934 		if ((vap->va_mask & AT_MODE) == 0) {
1935 			vap->va_mode = 0700;	/* default: owner rwx only */
1936 			vap->va_mask |= AT_MODE;
1937 		}
1938 
1939 		/*
1940 		 * symlink names must be treated as data
1941 		 */
1942 		lnm = utf8_to_str((utf8string *)&args->ftype4_u.linkdata,
1943 		    &llen, NULL);
1944 
1945 		if (lnm == NULL) {
1946 			*cs->statusp = resp->status = NFS4ERR_INVAL;
1947 			if (name != nm)
1948 				kmem_free(name, MAXPATHLEN + 1);
1949 			kmem_free(nm, len);
1950 			nfs4_ntov_table_free(&ntov, &sarg);
1951 			resp->attrset = 0;
1952 			goto out;
1953 		}
1954 
1955 		if (llen > MAXPATHLEN) {
1956 			*cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
1957 			if (name != nm)
1958 				kmem_free(name, MAXPATHLEN + 1);
1959 			kmem_free(nm, len);
1960 			kmem_free(lnm, llen);
1961 			nfs4_ntov_table_free(&ntov, &sarg);
1962 			resp->attrset = 0;
1963 			goto out;
1964 		}
1965 
1966 		lname = nfscmd_convname(ca, cs->exi, lnm,
1967 		    NFSCMD_CONV_INBOUND, MAXPATHLEN  + 1);
1968 
1969 		if (lname == NULL) {
1970 			*cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
1971 			if (name != nm)
1972 				kmem_free(name, MAXPATHLEN + 1);
1973 			kmem_free(nm, len);
1974 			kmem_free(lnm, llen);
1975 			nfs4_ntov_table_free(&ntov, &sarg);
1976 			resp->attrset = 0;
1977 			goto out;
1978 		}
1979 
1980 		error = VOP_SYMLINK(dvp, name, vap, lname, cr, NULL, 0);
1981 		if (lname != lnm)
1982 			kmem_free(lname, MAXPATHLEN + 1);
1983 		kmem_free(lnm, llen);
1984 		if (error)
1985 			break;
1986 
1987 		/*
1988 		 * Get the initial "after" sequence number, if it fails,
1989 		 * set to zero
1990 		 */
1991 		iva.va_mask = AT_SEQ;
1992 		if (VOP_GETATTR(dvp, &iva, 0, cs->cr, NULL))
1993 			iva.va_seq = 0;
1994 
1995 		error = VOP_LOOKUP(dvp, name, &vp, NULL, 0, NULL, cr,
1996 		    NULL, NULL, NULL);
1997 		if (error)
1998 			break;
1999 
2000 		/*
2001 		 * va_seq is not safe over VOP calls, check it again
2002 		 * if it has changed zero out iva to force atomic = FALSE.
2003 		 */
2004 		iva2.va_mask = AT_SEQ;
2005 		if (VOP_GETATTR(dvp, &iva2, 0, cs->cr, NULL) ||
2006 		    iva2.va_seq != iva.va_seq)
2007 			iva.va_seq = 0;
2008 		break;
2009 	default:
2010 		/*
2011 		 * probably a special file.
2012 		 */
2013 		if ((vap->va_mask & AT_MODE) == 0) {
2014 			vap->va_mode = 0600;	/* default: owner rw only */
2015 			vap->va_mask |= AT_MODE;
2016 		}
2017 		syncval = FNODSYNC;
2018 		/*
2019 		 * We know this will only generate one VOP call
2020 		 */
2021 		vp = do_rfs4_op_mknod(args, resp, req, cs, vap, name);
2022 
2023 		if (vp == NULL) {
2024 			if (name != nm)
2025 				kmem_free(name, MAXPATHLEN + 1);
2026 			kmem_free(nm, len);
2027 			nfs4_ntov_table_free(&ntov, &sarg);
2028 			resp->attrset = 0;
2029 			goto out;
2030 		}
2031 
2032 		/*
2033 		 * Get the initial "after" sequence number, if it fails,
2034 		 * set to zero
2035 		 */
2036 		iva.va_mask = AT_SEQ;
2037 		if (VOP_GETATTR(dvp, &iva, 0, cs->cr, NULL))
2038 			iva.va_seq = 0;
2039 
2040 		break;
2041 	}
2042 	if (name != nm)
2043 		kmem_free(name, MAXPATHLEN + 1);
2044 	kmem_free(nm, len);
2045 
2046 	if (error) {
2047 		*cs->statusp = resp->status = puterrno4(error);
2048 	}
2049 
2050 	/*
2051 	 * Force modified data and metadata out to stable storage.
2052 	 */
2053 	(void) VOP_FSYNC(dvp, 0, cr, NULL);
2054 
2055 	if (resp->status != NFS4_OK) {
2056 		if (vp != NULL)
2057 			VN_RELE(vp);
2058 		nfs4_ntov_table_free(&ntov, &sarg);
2059 		resp->attrset = 0;
2060 		goto out;
2061 	}
2062 
2063 	/*
2064 	 * Finish setup of cinfo response, "before" value already set.
2065 	 * Get "after" change value, if it fails, simply return the
2066 	 * before value.
2067 	 */
2068 	ava.va_mask = AT_CTIME|AT_SEQ;
2069 	if (VOP_GETATTR(dvp, &ava, 0, cr, NULL)) {
2070 		ava.va_ctime = bva.va_ctime;
2071 		ava.va_seq = 0;
2072 	}
2073 	NFS4_SET_FATTR4_CHANGE(resp->cinfo.after, ava.va_ctime);
2074 
2075 	/*
2076 	 * True verification that object was created with correct
2077 	 * attrs is impossible.  The attrs could have been changed
2078 	 * immediately after object creation.  If attributes did
2079 	 * not verify, the only recourse for the server is to
2080 	 * destroy the object.  Maybe if some attrs (like gid)
2081 	 * are set incorrectly, the object should be destroyed;
2082 	 * however, seems bad as a default policy.  Do we really
2083 	 * want to destroy an object over one of the times not
2084 	 * verifying correctly?  For these reasons, the server
2085 	 * currently sets bits in attrset for createattrs
2086 	 * that were set; however, no verification is done.
2087 	 *
2088 	 * vmask_to_nmask accounts for vattr bits set on create
2089 	 *	[do_rfs4_set_attrs() only sets resp bits for
2090 	 *	 non-vattr/vfs bits.]
2091 	 * Mask off any bits set by default so as not to return
2092 	 * more attrset bits than were requested in createattrs
2093 	 */
2094 	nfs4_vmask_to_nmask(sarg.vap->va_mask, &resp->attrset);
2095 	resp->attrset &= args->createattrs.attrmask;
2096 	nfs4_ntov_table_free(&ntov, &sarg);
2097 
2098 	error = makefh4(&cs->fh, vp, cs->exi);
2099 	if (error) {
2100 		*cs->statusp = resp->status = puterrno4(error);
2101 	}
2102 
2103 	/*
2104 	 * The cinfo.atomic = TRUE only if we got no errors, we have
2105 	 * non-zero va_seq's, and it has incremented by exactly one
2106 	 * during the creation and it didn't change during the VOP_LOOKUP
2107 	 * or VOP_FSYNC.
2108 	 */
2109 	if (!error && bva.va_seq && iva.va_seq && ava.va_seq &&
2110 	    iva.va_seq == (bva.va_seq + 1) && iva.va_seq == ava.va_seq)
2111 		resp->cinfo.atomic = TRUE;
2112 	else
2113 		resp->cinfo.atomic = FALSE;
2114 
2115 	/*
2116 	 * Force modified metadata out to stable storage.
2117 	 *
2118 	 * if a underlying vp exists, pass it to VOP_FSYNC
2119 	 */
2120 	if (VOP_REALVP(vp, &realvp, NULL) == 0)
2121 		(void) VOP_FSYNC(realvp, syncval, cr, NULL);
2122 	else
2123 		(void) VOP_FSYNC(vp, syncval, cr, NULL);
2124 
2125 	if (resp->status != NFS4_OK) {
2126 		VN_RELE(vp);
2127 		goto out;
2128 	}
2129 	if (cs->vp)
2130 		VN_RELE(cs->vp);
2131 
2132 	cs->vp = vp;
2133 	*cs->statusp = resp->status = NFS4_OK;
2134 out:
2135 	DTRACE_NFSV4_2(op__create__done, struct compound_state *, cs,
2136 	    CREATE4res *, resp);
2137 }
2138 
2139 /*ARGSUSED*/
2140 static void
2141 rfs4_op_delegpurge(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2142     struct compound_state *cs)
2143 {
2144 	DTRACE_NFSV4_2(op__delegpurge__start, struct compound_state *, cs,
2145 	    DELEGPURGE4args *, &argop->nfs_argop4_u.opdelegpurge);
2146 
2147 	rfs4_op_inval(argop, resop, req, cs);
2148 
2149 	DTRACE_NFSV4_2(op__delegpurge__done, struct compound_state *, cs,
2150 	    DELEGPURGE4res *, &resop->nfs_resop4_u.opdelegpurge);
2151 }
2152 
2153 /*ARGSUSED*/
2154 static void
2155 rfs4_op_delegreturn(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2156     struct compound_state *cs)
2157 {
2158 	DELEGRETURN4args *args = &argop->nfs_argop4_u.opdelegreturn;
2159 	DELEGRETURN4res *resp = &resop->nfs_resop4_u.opdelegreturn;
2160 	rfs4_deleg_state_t *dsp;
2161 	nfsstat4 status;
2162 
2163 	DTRACE_NFSV4_2(op__delegreturn__start, struct compound_state *, cs,
2164 	    DELEGRETURN4args *, args);
2165 
2166 	status = rfs4_get_deleg_state(&args->deleg_stateid, &dsp);
2167 	resp->status = *cs->statusp = status;
2168 	if (status != NFS4_OK)
2169 		goto out;
2170 
2171 	/* Ensure specified filehandle matches */
2172 	if (cs->vp != dsp->rds_finfo->rf_vp) {
2173 		resp->status = *cs->statusp = NFS4ERR_BAD_STATEID;
2174 	} else
2175 		rfs4_return_deleg(dsp, FALSE);
2176 
2177 	rfs4_update_lease(dsp->rds_client);
2178 
2179 	rfs4_deleg_state_rele(dsp);
2180 out:
2181 	DTRACE_NFSV4_2(op__delegreturn__done, struct compound_state *, cs,
2182 	    DELEGRETURN4res *, resp);
2183 }
2184 
2185 /*
2186  * Check to see if a given "flavor" is an explicitly shared flavor.
2187  * The assumption of this routine is the "flavor" is already a valid
2188  * flavor in the secinfo list of "exi".
2189  *
2190  *	e.g.
2191  *		# share -o sec=flavor1 /export
2192  *		# share -o sec=flavor2 /export/home
2193  *
2194  *		flavor2 is not an explicitly shared flavor for /export,
2195  *		however it is in the secinfo list for /export thru the
2196  *		server namespace setup.
2197  */
2198 int
2199 is_exported_sec(int flavor, struct exportinfo *exi)
2200 {
2201 	int	i;
2202 	struct secinfo *sp;
2203 
2204 	sp = exi->exi_export.ex_secinfo;
2205 	for (i = 0; i < exi->exi_export.ex_seccnt; i++) {
2206 		if (flavor == sp[i].s_secinfo.sc_nfsnum ||
2207 		    sp[i].s_secinfo.sc_nfsnum == AUTH_NONE) {
2208 			return (SEC_REF_EXPORTED(&sp[i]));
2209 		}
2210 	}
2211 
2212 	/* Should not reach this point based on the assumption */
2213 	return (0);
2214 }
2215 
2216 /*
2217  * Check if the security flavor used in the request matches what is
2218  * required at the export point or at the root pseudo node (exi_root).
2219  *
2220  * returns 1 if there's a match or if exported with AUTH_NONE; 0 otherwise.
2221  *
2222  */
2223 static int
2224 secinfo_match_or_authnone(struct compound_state *cs)
2225 {
2226 	int	i;
2227 	struct secinfo *sp;
2228 
2229 	/*
2230 	 * Check cs->nfsflavor (from the request) against
2231 	 * the current export data in cs->exi.
2232 	 */
2233 	sp = cs->exi->exi_export.ex_secinfo;
2234 	for (i = 0; i < cs->exi->exi_export.ex_seccnt; i++) {
2235 		if (cs->nfsflavor == sp[i].s_secinfo.sc_nfsnum ||
2236 		    sp[i].s_secinfo.sc_nfsnum == AUTH_NONE)
2237 			return (1);
2238 	}
2239 
2240 	return (0);
2241 }
2242 
2243 /*
2244  * Check the access authority for the client and return the correct error.
2245  */
2246 nfsstat4
2247 call_checkauth4(struct compound_state *cs, struct svc_req *req)
2248 {
2249 	int	authres;
2250 
2251 	/*
2252 	 * First, check if the security flavor used in the request
2253 	 * are among the flavors set in the server namespace.
2254 	 */
2255 	if (!secinfo_match_or_authnone(cs)) {
2256 		*cs->statusp = NFS4ERR_WRONGSEC;
2257 		return (*cs->statusp);
2258 	}
2259 
2260 	authres = checkauth4(cs, req);
2261 
2262 	if (authres > 0) {
2263 		*cs->statusp = NFS4_OK;
2264 		if (! (cs->access & CS_ACCESS_LIMITED))
2265 			cs->access = CS_ACCESS_OK;
2266 	} else if (authres == 0) {
2267 		*cs->statusp = NFS4ERR_ACCESS;
2268 	} else if (authres == -2) {
2269 		*cs->statusp = NFS4ERR_WRONGSEC;
2270 	} else {
2271 		*cs->statusp = NFS4ERR_DELAY;
2272 	}
2273 	return (*cs->statusp);
2274 }
2275 
2276 /*
2277  * bitmap4_to_attrmask is called by getattr and readdir.
2278  * It sets up the vattr mask and determines whether vfsstat call is needed
2279  * based on the input bitmap.
2280  * Returns nfsv4 status.
2281  */
2282 static nfsstat4
2283 bitmap4_to_attrmask(bitmap4 breq, struct nfs4_svgetit_arg *sargp)
2284 {
2285 	int i;
2286 	uint_t	va_mask;
2287 	struct statvfs64 *sbp = sargp->sbp;
2288 
2289 	sargp->sbp = NULL;
2290 	sargp->flag = 0;
2291 	sargp->rdattr_error = NFS4_OK;
2292 	sargp->mntdfid_set = FALSE;
2293 	if (sargp->cs->vp)
2294 		sargp->xattr = get_fh4_flag(&sargp->cs->fh,
2295 		    FH4_ATTRDIR | FH4_NAMEDATTR);
2296 	else
2297 		sargp->xattr = 0;
2298 
2299 	/*
2300 	 * Set rdattr_error_req to true if return error per
2301 	 * failed entry rather than fail the readdir.
2302 	 */
2303 	if (breq & FATTR4_RDATTR_ERROR_MASK)
2304 		sargp->rdattr_error_req = 1;
2305 	else
2306 		sargp->rdattr_error_req = 0;
2307 
2308 	/*
2309 	 * generate the va_mask
2310 	 * Handle the easy cases first
2311 	 */
2312 	switch (breq) {
2313 	case NFS4_NTOV_ATTR_MASK:
2314 		sargp->vap->va_mask = NFS4_NTOV_ATTR_AT_MASK;
2315 		return (NFS4_OK);
2316 
2317 	case NFS4_FS_ATTR_MASK:
2318 		sargp->vap->va_mask = NFS4_FS_ATTR_AT_MASK;
2319 		sargp->sbp = sbp;
2320 		return (NFS4_OK);
2321 
2322 	case NFS4_NTOV_ATTR_CACHE_MASK:
2323 		sargp->vap->va_mask = NFS4_NTOV_ATTR_CACHE_AT_MASK;
2324 		return (NFS4_OK);
2325 
2326 	case FATTR4_LEASE_TIME_MASK:
2327 		sargp->vap->va_mask = 0;
2328 		return (NFS4_OK);
2329 
2330 	default:
2331 		va_mask = 0;
2332 		for (i = 0; i < nfs4_ntov_map_size; i++) {
2333 			if ((breq & nfs4_ntov_map[i].fbit) &&
2334 			    nfs4_ntov_map[i].vbit)
2335 				va_mask |= nfs4_ntov_map[i].vbit;
2336 		}
2337 
2338 		/*
2339 		 * Check is vfsstat is needed
2340 		 */
2341 		if (breq & NFS4_FS_ATTR_MASK)
2342 			sargp->sbp = sbp;
2343 
2344 		sargp->vap->va_mask = va_mask;
2345 		return (NFS4_OK);
2346 	}
2347 	/* NOTREACHED */
2348 }
2349 
2350 /*
2351  * bitmap4_get_sysattrs is called by getattr and readdir.
2352  * It calls both VOP_GETATTR and VFS_STATVFS calls to get the attrs.
2353  * Returns nfsv4 status.
2354  */
2355 static nfsstat4
2356 bitmap4_get_sysattrs(struct nfs4_svgetit_arg *sargp)
2357 {
2358 	int error;
2359 	struct compound_state *cs = sargp->cs;
2360 	vnode_t *vp = cs->vp;
2361 
2362 	if (sargp->sbp != NULL) {
2363 		error = VFS_STATVFS(vp->v_vfsp, sargp->sbp);
2364 		if (error != 0) {
2365 			sargp->sbp = NULL;	/* to identify error */
2366 			return (puterrno4(error));
2367 		}
2368 	}
2369 
2370 	return (rfs4_vop_getattr(vp, sargp->vap, 0, cs->cr));
2371 }
2372 
2373 static void
2374 nfs4_ntov_table_init(struct nfs4_ntov_table *ntovp)
2375 {
2376 	ntovp->na = kmem_zalloc(sizeof (union nfs4_attr_u) * nfs4_ntov_map_size,
2377 	    KM_SLEEP);
2378 	ntovp->attrcnt = 0;
2379 	ntovp->vfsstat = FALSE;
2380 }
2381 
2382 static void
2383 nfs4_ntov_table_free(struct nfs4_ntov_table *ntovp,
2384     struct nfs4_svgetit_arg *sargp)
2385 {
2386 	int i;
2387 	union nfs4_attr_u *na;
2388 	uint8_t *amap;
2389 
2390 	/*
2391 	 * XXX Should do the same checks for whether the bit is set
2392 	 */
2393 	for (i = 0, na = ntovp->na, amap = ntovp->amap;
2394 	    i < ntovp->attrcnt; i++, na++, amap++) {
2395 		(void) (*nfs4_ntov_map[*amap].sv_getit)(
2396 		    NFS4ATTR_FREEIT, sargp, na);
2397 	}
2398 	if ((sargp->op == NFS4ATTR_SETIT) || (sargp->op == NFS4ATTR_VERIT)) {
2399 		/*
2400 		 * xdr_free for getattr will be done later
2401 		 */
2402 		for (i = 0, na = ntovp->na, amap = ntovp->amap;
2403 		    i < ntovp->attrcnt; i++, na++, amap++) {
2404 			xdr_free(nfs4_ntov_map[*amap].xfunc, (caddr_t)na);
2405 		}
2406 	}
2407 	kmem_free(ntovp->na, sizeof (union nfs4_attr_u) * nfs4_ntov_map_size);
2408 }
2409 
2410 /*
2411  * do_rfs4_op_getattr gets the system attrs and converts into fattr4.
2412  */
2413 static nfsstat4
2414 do_rfs4_op_getattr(bitmap4 breq, fattr4 *fattrp,
2415     struct nfs4_svgetit_arg *sargp)
2416 {
2417 	int error = 0;
2418 	int i, k;
2419 	struct nfs4_ntov_table ntov;
2420 	XDR xdr;
2421 	ulong_t xdr_size;
2422 	char *xdr_attrs;
2423 	nfsstat4 status = NFS4_OK;
2424 	nfsstat4 prev_rdattr_error = sargp->rdattr_error;
2425 	union nfs4_attr_u *na;
2426 	uint8_t *amap;
2427 
2428 	sargp->op = NFS4ATTR_GETIT;
2429 	sargp->flag = 0;
2430 
2431 	fattrp->attrmask = 0;
2432 	/* if no bits requested, then return empty fattr4 */
2433 	if (breq == 0) {
2434 		fattrp->attrlist4_len = 0;
2435 		fattrp->attrlist4 = NULL;
2436 		return (NFS4_OK);
2437 	}
2438 
2439 	/*
2440 	 * return NFS4ERR_INVAL when client requests write-only attrs
2441 	 */
2442 	if (breq & (FATTR4_TIME_ACCESS_SET_MASK | FATTR4_TIME_MODIFY_SET_MASK))
2443 		return (NFS4ERR_INVAL);
2444 
2445 	nfs4_ntov_table_init(&ntov);
2446 	na = ntov.na;
2447 	amap = ntov.amap;
2448 
2449 	/*
2450 	 * Now loop to get or verify the attrs
2451 	 */
2452 	for (i = 0; i < nfs4_ntov_map_size; i++) {
2453 		if (breq & nfs4_ntov_map[i].fbit) {
2454 			if ((*nfs4_ntov_map[i].sv_getit)(
2455 			    NFS4ATTR_SUPPORTED, sargp, NULL) == 0) {
2456 
2457 				error = (*nfs4_ntov_map[i].sv_getit)(
2458 				    NFS4ATTR_GETIT, sargp, na);
2459 
2460 				/*
2461 				 * Possible error values:
2462 				 * >0 if sv_getit failed to
2463 				 * get the attr; 0 if succeeded;
2464 				 * <0 if rdattr_error and the
2465 				 * attribute cannot be returned.
2466 				 */
2467 				if (error && !(sargp->rdattr_error_req))
2468 					goto done;
2469 				/*
2470 				 * If error then just for entry
2471 				 */
2472 				if (error == 0) {
2473 					fattrp->attrmask |=
2474 					    nfs4_ntov_map[i].fbit;
2475 					*amap++ =
2476 					    (uint8_t)nfs4_ntov_map[i].nval;
2477 					na++;
2478 					(ntov.attrcnt)++;
2479 				} else if ((error > 0) &&
2480 				    (sargp->rdattr_error == NFS4_OK)) {
2481 					sargp->rdattr_error = puterrno4(error);
2482 				}
2483 				error = 0;
2484 			}
2485 		}
2486 	}
2487 
2488 	/*
2489 	 * If rdattr_error was set after the return value for it was assigned,
2490 	 * update it.
2491 	 */
2492 	if (prev_rdattr_error != sargp->rdattr_error) {
2493 		na = ntov.na;
2494 		amap = ntov.amap;
2495 		for (i = 0; i < ntov.attrcnt; i++, na++, amap++) {
2496 			k = *amap;
2497 			if (k < FATTR4_RDATTR_ERROR) {
2498 				continue;
2499 			}
2500 			if ((k == FATTR4_RDATTR_ERROR) &&
2501 			    ((*nfs4_ntov_map[k].sv_getit)(
2502 			    NFS4ATTR_SUPPORTED, sargp, NULL) == 0)) {
2503 
2504 				(void) (*nfs4_ntov_map[k].sv_getit)(
2505 				    NFS4ATTR_GETIT, sargp, na);
2506 			}
2507 			break;
2508 		}
2509 	}
2510 
2511 	xdr_size = 0;
2512 	na = ntov.na;
2513 	amap = ntov.amap;
2514 	for (i = 0; i < ntov.attrcnt; i++, na++, amap++) {
2515 		xdr_size += xdr_sizeof(nfs4_ntov_map[*amap].xfunc, na);
2516 	}
2517 
2518 	fattrp->attrlist4_len = xdr_size;
2519 	if (xdr_size) {
2520 		/* freed by rfs4_op_getattr_free() */
2521 		fattrp->attrlist4 = xdr_attrs = kmem_zalloc(xdr_size, KM_SLEEP);
2522 
2523 		xdrmem_create(&xdr, xdr_attrs, xdr_size, XDR_ENCODE);
2524 
2525 		na = ntov.na;
2526 		amap = ntov.amap;
2527 		for (i = 0; i < ntov.attrcnt; i++, na++, amap++) {
2528 			if (!(*nfs4_ntov_map[*amap].xfunc)(&xdr, na)) {
2529 				DTRACE_PROBE1(nfss__e__getattr4_encfail,
2530 				    int, *amap);
2531 				status = NFS4ERR_SERVERFAULT;
2532 				break;
2533 			}
2534 		}
2535 		/* xdrmem_destroy(&xdrs); */	/* NO-OP */
2536 	} else {
2537 		fattrp->attrlist4 = NULL;
2538 	}
2539 done:
2540 
2541 	nfs4_ntov_table_free(&ntov, sargp);
2542 
2543 	if (error != 0)
2544 		status = puterrno4(error);
2545 
2546 	return (status);
2547 }
2548 
2549 /* ARGSUSED */
2550 static void
2551 rfs4_op_getattr(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2552     struct compound_state *cs)
2553 {
2554 	GETATTR4args *args = &argop->nfs_argop4_u.opgetattr;
2555 	GETATTR4res *resp = &resop->nfs_resop4_u.opgetattr;
2556 	struct nfs4_svgetit_arg sarg;
2557 	struct statvfs64 sb;
2558 	nfsstat4 status;
2559 
2560 	DTRACE_NFSV4_2(op__getattr__start, struct compound_state *, cs,
2561 	    GETATTR4args *, args);
2562 
2563 	if (cs->vp == NULL) {
2564 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2565 		goto out;
2566 	}
2567 
2568 	if (cs->access == CS_ACCESS_DENIED) {
2569 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
2570 		goto out;
2571 	}
2572 
2573 	sarg.sbp = &sb;
2574 	sarg.cs = cs;
2575 	sarg.is_referral = B_FALSE;
2576 
2577 	status = bitmap4_to_attrmask(args->attr_request, &sarg);
2578 	if (status == NFS4_OK) {
2579 
2580 		status = bitmap4_get_sysattrs(&sarg);
2581 		if (status == NFS4_OK) {
2582 
2583 			/* Is this a referral? */
2584 			if (vn_is_nfs_reparse(cs->vp, cs->cr)) {
2585 				/* Older V4 Solaris client sees a link */
2586 				if (client_is_downrev(req))
2587 					sarg.vap->va_type = VLNK;
2588 				else
2589 					sarg.is_referral = B_TRUE;
2590 			}
2591 
2592 			status = do_rfs4_op_getattr(args->attr_request,
2593 			    &resp->obj_attributes, &sarg);
2594 		}
2595 	}
2596 	*cs->statusp = resp->status = status;
2597 out:
2598 	DTRACE_NFSV4_2(op__getattr__done, struct compound_state *, cs,
2599 	    GETATTR4res *, resp);
2600 }
2601 
2602 static void
2603 rfs4_op_getattr_free(nfs_resop4 *resop)
2604 {
2605 	GETATTR4res *resp = &resop->nfs_resop4_u.opgetattr;
2606 
2607 	nfs4_fattr4_free(&resp->obj_attributes);
2608 }
2609 
2610 /* ARGSUSED */
2611 static void
2612 rfs4_op_getfh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2613     struct compound_state *cs)
2614 {
2615 	GETFH4res *resp = &resop->nfs_resop4_u.opgetfh;
2616 
2617 	DTRACE_NFSV4_1(op__getfh__start, struct compound_state *, cs);
2618 
2619 	if (cs->vp == NULL) {
2620 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2621 		goto out;
2622 	}
2623 	if (cs->access == CS_ACCESS_DENIED) {
2624 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
2625 		goto out;
2626 	}
2627 
2628 	/* check for reparse point at the share point */
2629 	if (cs->exi->exi_moved || vn_is_nfs_reparse(cs->exi->exi_vp, cs->cr)) {
2630 		/* it's all bad */
2631 		cs->exi->exi_moved = 1;
2632 		*cs->statusp = resp->status = NFS4ERR_MOVED;
2633 		DTRACE_PROBE2(nfs4serv__func__referral__shared__moved,
2634 		    vnode_t *, cs->vp, char *, "rfs4_op_getfh");
2635 		return;
2636 	}
2637 
2638 	/* check for reparse point at vp */
2639 	if (vn_is_nfs_reparse(cs->vp, cs->cr) && !client_is_downrev(req)) {
2640 		/* it's not all bad */
2641 		*cs->statusp = resp->status = NFS4ERR_MOVED;
2642 		DTRACE_PROBE2(nfs4serv__func__referral__moved,
2643 		    vnode_t *, cs->vp, char *, "rfs4_op_getfh");
2644 		return;
2645 	}
2646 
2647 	resp->object.nfs_fh4_val =
2648 	    kmem_alloc(cs->fh.nfs_fh4_len, KM_SLEEP);
2649 	nfs_fh4_copy(&cs->fh, &resp->object);
2650 	*cs->statusp = resp->status = NFS4_OK;
2651 out:
2652 	DTRACE_NFSV4_2(op__getfh__done, struct compound_state *, cs,
2653 	    GETFH4res *, resp);
2654 }
2655 
2656 static void
2657 rfs4_op_getfh_free(nfs_resop4 *resop)
2658 {
2659 	GETFH4res *resp = &resop->nfs_resop4_u.opgetfh;
2660 
2661 	if (resp->status == NFS4_OK &&
2662 	    resp->object.nfs_fh4_val != NULL) {
2663 		kmem_free(resp->object.nfs_fh4_val, resp->object.nfs_fh4_len);
2664 		resp->object.nfs_fh4_val = NULL;
2665 		resp->object.nfs_fh4_len = 0;
2666 	}
2667 }
2668 
2669 /*
2670  * illegal: args: void
2671  *	    res : status (NFS4ERR_OP_ILLEGAL)
2672  */
2673 /* ARGSUSED */
2674 static void
2675 rfs4_op_illegal(nfs_argop4 *argop, nfs_resop4 *resop,
2676     struct svc_req *req, struct compound_state *cs)
2677 {
2678 	ILLEGAL4res *resp = &resop->nfs_resop4_u.opillegal;
2679 
2680 	resop->resop = OP_ILLEGAL;
2681 	*cs->statusp = resp->status = NFS4ERR_OP_ILLEGAL;
2682 }
2683 
2684 /* ARGSUSED */
2685 static void
2686 rfs4_op_notsup(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2687     struct compound_state *cs)
2688 {
2689 	*cs->statusp = *((nfsstat4 *)&(resop)->nfs_resop4_u) = NFS4ERR_NOTSUPP;
2690 }
2691 
2692 /*
2693  * link: args: SAVED_FH: file, CURRENT_FH: target directory
2694  *	 res: status. If success - CURRENT_FH unchanged, return change_info
2695  */
2696 /* ARGSUSED */
2697 static void
2698 rfs4_op_link(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2699     struct compound_state *cs)
2700 {
2701 	LINK4args *args = &argop->nfs_argop4_u.oplink;
2702 	LINK4res *resp = &resop->nfs_resop4_u.oplink;
2703 	int error;
2704 	vnode_t *vp;
2705 	vnode_t *dvp;
2706 	struct vattr bdva, idva, adva;
2707 	char *nm;
2708 	uint_t  len;
2709 	struct sockaddr *ca;
2710 	char *name = NULL;
2711 	nfsstat4 status;
2712 
2713 	DTRACE_NFSV4_2(op__link__start, struct compound_state *, cs,
2714 	    LINK4args *, args);
2715 
2716 	/* SAVED_FH: source object */
2717 	vp = cs->saved_vp;
2718 	if (vp == NULL) {
2719 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2720 		goto out;
2721 	}
2722 
2723 	/* CURRENT_FH: target directory */
2724 	dvp = cs->vp;
2725 	if (dvp == NULL) {
2726 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2727 		goto out;
2728 	}
2729 
2730 	/*
2731 	 * If there is a non-shared filesystem mounted on this vnode,
2732 	 * do not allow to link any file in this directory.
2733 	 */
2734 	if (vn_ismntpt(dvp)) {
2735 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
2736 		goto out;
2737 	}
2738 
2739 	if (cs->access == CS_ACCESS_DENIED) {
2740 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
2741 		goto out;
2742 	}
2743 
2744 	/* Check source object's type validity */
2745 	if (vp->v_type == VDIR) {
2746 		*cs->statusp = resp->status = NFS4ERR_ISDIR;
2747 		goto out;
2748 	}
2749 
2750 	/* Check target directory's type */
2751 	if (dvp->v_type != VDIR) {
2752 		*cs->statusp = resp->status = NFS4ERR_NOTDIR;
2753 		goto out;
2754 	}
2755 
2756 	if (cs->saved_exi != cs->exi) {
2757 		*cs->statusp = resp->status = NFS4ERR_XDEV;
2758 		goto out;
2759 	}
2760 
2761 	status = utf8_dir_verify(&args->newname);
2762 	if (status != NFS4_OK) {
2763 		*cs->statusp = resp->status = status;
2764 		goto out;
2765 	}
2766 
2767 	nm = utf8_to_fn(&args->newname, &len, NULL);
2768 	if (nm == NULL) {
2769 		*cs->statusp = resp->status = NFS4ERR_INVAL;
2770 		goto out;
2771 	}
2772 
2773 	if (len > MAXNAMELEN) {
2774 		*cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
2775 		kmem_free(nm, len);
2776 		goto out;
2777 	}
2778 
2779 	if (rdonly4(req, cs)) {
2780 		*cs->statusp = resp->status = NFS4ERR_ROFS;
2781 		kmem_free(nm, len);
2782 		goto out;
2783 	}
2784 
2785 	/* Get "before" change value */
2786 	bdva.va_mask = AT_CTIME|AT_SEQ;
2787 	error = VOP_GETATTR(dvp, &bdva, 0, cs->cr, NULL);
2788 	if (error) {
2789 		*cs->statusp = resp->status = puterrno4(error);
2790 		kmem_free(nm, len);
2791 		goto out;
2792 	}
2793 
2794 	ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2795 	name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
2796 	    MAXPATHLEN  + 1);
2797 
2798 	if (name == NULL) {
2799 		*cs->statusp = resp->status = NFS4ERR_INVAL;
2800 		kmem_free(nm, len);
2801 		goto out;
2802 	}
2803 
2804 	NFS4_SET_FATTR4_CHANGE(resp->cinfo.before, bdva.va_ctime)
2805 
2806 	error = VOP_LINK(dvp, vp, name, cs->cr, NULL, 0);
2807 
2808 	if (nm != name)
2809 		kmem_free(name, MAXPATHLEN + 1);
2810 	kmem_free(nm, len);
2811 
2812 	/*
2813 	 * Get the initial "after" sequence number, if it fails, set to zero
2814 	 */
2815 	idva.va_mask = AT_SEQ;
2816 	if (VOP_GETATTR(dvp, &idva, 0, cs->cr, NULL))
2817 		idva.va_seq = 0;
2818 
2819 	/*
2820 	 * Force modified data and metadata out to stable storage.
2821 	 */
2822 	(void) VOP_FSYNC(vp, FNODSYNC, cs->cr, NULL);
2823 	(void) VOP_FSYNC(dvp, 0, cs->cr, NULL);
2824 
2825 	if (error) {
2826 		*cs->statusp = resp->status = puterrno4(error);
2827 		goto out;
2828 	}
2829 
2830 	/*
2831 	 * Get "after" change value, if it fails, simply return the
2832 	 * before value.
2833 	 */
2834 	adva.va_mask = AT_CTIME|AT_SEQ;
2835 	if (VOP_GETATTR(dvp, &adva, 0, cs->cr, NULL)) {
2836 		adva.va_ctime = bdva.va_ctime;
2837 		adva.va_seq = 0;
2838 	}
2839 
2840 	NFS4_SET_FATTR4_CHANGE(resp->cinfo.after, adva.va_ctime)
2841 
2842 	/*
2843 	 * The cinfo.atomic = TRUE only if we have
2844 	 * non-zero va_seq's, and it has incremented by exactly one
2845 	 * during the VOP_LINK and it didn't change during the VOP_FSYNC.
2846 	 */
2847 	if (bdva.va_seq && idva.va_seq && adva.va_seq &&
2848 	    idva.va_seq == (bdva.va_seq + 1) && idva.va_seq == adva.va_seq)
2849 		resp->cinfo.atomic = TRUE;
2850 	else
2851 		resp->cinfo.atomic = FALSE;
2852 
2853 	*cs->statusp = resp->status = NFS4_OK;
2854 out:
2855 	DTRACE_NFSV4_2(op__link__done, struct compound_state *, cs,
2856 	    LINK4res *, resp);
2857 }
2858 
2859 /*
2860  * Used by rfs4_op_lookup and rfs4_op_lookupp to do the actual work.
2861  */
2862 
2863 /* ARGSUSED */
2864 static nfsstat4
2865 do_rfs4_op_lookup(char *nm, struct svc_req *req, struct compound_state *cs)
2866 {
2867 	int error;
2868 	int different_export = 0;
2869 	vnode_t *vp, *pre_tvp = NULL, *oldvp = NULL;
2870 	struct exportinfo *exi = NULL, *pre_exi = NULL;
2871 	nfsstat4 stat;
2872 	fid_t fid;
2873 	int attrdir, dotdot, walk;
2874 	bool_t is_newvp = FALSE;
2875 
2876 	if (cs->vp->v_flag & V_XATTRDIR) {
2877 		attrdir = 1;
2878 		ASSERT(get_fh4_flag(&cs->fh, FH4_ATTRDIR));
2879 	} else {
2880 		attrdir = 0;
2881 		ASSERT(! get_fh4_flag(&cs->fh, FH4_ATTRDIR));
2882 	}
2883 
2884 	dotdot = (nm[0] == '.' && nm[1] == '.' && nm[2] == '\0');
2885 
2886 	/*
2887 	 * If dotdotting, then need to check whether it's
2888 	 * above the root of a filesystem, or above an
2889 	 * export point.
2890 	 */
2891 	if (dotdot) {
2892 		vnode_t *zone_rootvp;
2893 
2894 		ASSERT(cs->exi != NULL);
2895 		zone_rootvp = cs->exi->exi_ne->exi_root->exi_vp;
2896 		/*
2897 		 * If dotdotting at the root of a filesystem, then
2898 		 * need to traverse back to the mounted-on filesystem
2899 		 * and do the dotdot lookup there.
2900 		 */
2901 		if ((cs->vp->v_flag & VROOT) || VN_CMP(cs->vp, zone_rootvp)) {
2902 
2903 			/*
2904 			 * If at the system root, then can
2905 			 * go up no further.
2906 			 */
2907 			if (VN_CMP(cs->vp, zone_rootvp))
2908 				return (puterrno4(ENOENT));
2909 
2910 			/*
2911 			 * Traverse back to the mounted-on filesystem
2912 			 */
2913 			cs->vp = untraverse(cs->vp, zone_rootvp);
2914 
2915 			/*
2916 			 * Set the different_export flag so we remember
2917 			 * to pick up a new exportinfo entry for
2918 			 * this new filesystem.
2919 			 */
2920 			different_export = 1;
2921 		} else {
2922 
2923 			/*
2924 			 * If dotdotting above an export point then set
2925 			 * the different_export to get new export info.
2926 			 */
2927 			different_export = nfs_exported(cs->exi, cs->vp);
2928 		}
2929 	}
2930 
2931 	error = VOP_LOOKUP(cs->vp, nm, &vp, NULL, 0, NULL, cs->cr,
2932 	    NULL, NULL, NULL);
2933 	if (error)
2934 		return (puterrno4(error));
2935 
2936 	/*
2937 	 * If the vnode is in a pseudo filesystem, check whether it is visible.
2938 	 *
2939 	 * XXX if the vnode is a symlink and it is not visible in
2940 	 * a pseudo filesystem, return ENOENT (not following symlink).
2941 	 * V4 client can not mount such symlink. This is a regression
2942 	 * from V2/V3.
2943 	 *
2944 	 * In the same exported filesystem, if the security flavor used
2945 	 * is not an explicitly shared flavor, limit the view to the visible
2946 	 * list entries only. This is not a WRONGSEC case because it's already
2947 	 * checked via PUTROOTFH/PUTPUBFH or PUTFH.
2948 	 */
2949 	if (!different_export &&
2950 	    (PSEUDO(cs->exi) || ! is_exported_sec(cs->nfsflavor, cs->exi) ||
2951 	    cs->access & CS_ACCESS_LIMITED)) {
2952 		if (! nfs_visible(cs->exi, vp, &different_export)) {
2953 			VN_RELE(vp);
2954 			return (puterrno4(ENOENT));
2955 		}
2956 	}
2957 
2958 	/*
2959 	 * If it's a mountpoint, then traverse it.
2960 	 */
2961 	if (vn_ismntpt(vp)) {
2962 		pre_exi = cs->exi;	/* save pre-traversed exportinfo */
2963 		pre_tvp = vp;		/* save pre-traversed vnode	*/
2964 
2965 		/*
2966 		 * hold pre_tvp to counteract rele by traverse.  We will
2967 		 * need pre_tvp below if checkexport4 fails
2968 		 */
2969 		VN_HOLD(pre_tvp);
2970 		if ((error = traverse(&vp)) != 0) {
2971 			VN_RELE(vp);
2972 			VN_RELE(pre_tvp);
2973 			return (puterrno4(error));
2974 		}
2975 		different_export = 1;
2976 	} else if (vp->v_vfsp != cs->vp->v_vfsp) {
2977 		/*
2978 		 * The vfsp comparison is to handle the case where
2979 		 * a LOFS mount is shared.  lo_lookup traverses mount points,
2980 		 * and NFS is unaware of local fs transistions because
2981 		 * v_vfsmountedhere isn't set.  For this special LOFS case,
2982 		 * the dir and the obj returned by lookup will have different
2983 		 * vfs ptrs.
2984 		 */
2985 		different_export = 1;
2986 	}
2987 
2988 	if (different_export) {
2989 
2990 		bzero(&fid, sizeof (fid));
2991 		fid.fid_len = MAXFIDSZ;
2992 		error = vop_fid_pseudo(vp, &fid);
2993 		if (error) {
2994 			VN_RELE(vp);
2995 			if (pre_tvp)
2996 				VN_RELE(pre_tvp);
2997 			return (puterrno4(error));
2998 		}
2999 
3000 		if (dotdot)
3001 			exi = nfs_vptoexi(NULL, vp, cs->cr, &walk, NULL, TRUE);
3002 		else
3003 			exi = checkexport4(&vp->v_vfsp->vfs_fsid, &fid, vp);
3004 
3005 		if (exi == NULL) {
3006 			if (pre_tvp) {
3007 				/*
3008 				 * If this vnode is a mounted-on vnode,
3009 				 * but the mounted-on file system is not
3010 				 * exported, send back the filehandle for
3011 				 * the mounted-on vnode, not the root of
3012 				 * the mounted-on file system.
3013 				 */
3014 				VN_RELE(vp);
3015 				vp = pre_tvp;
3016 				exi = pre_exi;
3017 			} else {
3018 				VN_RELE(vp);
3019 				return (puterrno4(EACCES));
3020 			}
3021 		} else if (pre_tvp) {
3022 			/* we're done with pre_tvp now. release extra hold */
3023 			VN_RELE(pre_tvp);
3024 		}
3025 
3026 		cs->exi = exi;
3027 
3028 		/*
3029 		 * Now we do a checkauth4. The reason is that
3030 		 * this client/user may not have access to the new
3031 		 * exported file system, and if they do,
3032 		 * the client/user may be mapped to a different uid.
3033 		 *
3034 		 * We start with a new cr, because the checkauth4 done
3035 		 * in the PUT*FH operation over wrote the cred's uid,
3036 		 * gid, etc, and we want the real thing before calling
3037 		 * checkauth4()
3038 		 */
3039 		crfree(cs->cr);
3040 		cs->cr = crdup(cs->basecr);
3041 
3042 		oldvp = cs->vp;
3043 		cs->vp = vp;
3044 		is_newvp = TRUE;
3045 
3046 		stat = call_checkauth4(cs, req);
3047 		if (stat != NFS4_OK) {
3048 			VN_RELE(cs->vp);
3049 			cs->vp = oldvp;
3050 			return (stat);
3051 		}
3052 	}
3053 
3054 	/*
3055 	 * After various NFS checks, do a label check on the path
3056 	 * component. The label on this path should either be the
3057 	 * global zone's label or a zone's label. We are only
3058 	 * interested in the zone's label because exported files
3059 	 * in global zone is accessible (though read-only) to
3060 	 * clients. The exportability/visibility check is already
3061 	 * done before reaching this code.
3062 	 */
3063 	if (is_system_labeled()) {
3064 		bslabel_t *clabel;
3065 
3066 		ASSERT(req->rq_label != NULL);
3067 		clabel = req->rq_label;
3068 		DTRACE_PROBE2(tx__rfs4__log__info__oplookup__clabel, char *,
3069 		    "got client label from request(1)", struct svc_req *, req);
3070 
3071 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
3072 			if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
3073 			    cs->exi)) {
3074 				error = EACCES;
3075 				goto err_out;
3076 			}
3077 		} else {
3078 			/*
3079 			 * We grant access to admin_low label clients
3080 			 * only if the client is trusted, i.e. also
3081 			 * running Solaris Trusted Extension.
3082 			 */
3083 			struct sockaddr	*ca;
3084 			int		addr_type;
3085 			void		*ipaddr;
3086 			tsol_tpc_t	*tp;
3087 
3088 			ca = (struct sockaddr *)svc_getrpccaller(
3089 			    req->rq_xprt)->buf;
3090 			if (ca->sa_family == AF_INET) {
3091 				addr_type = IPV4_VERSION;
3092 				ipaddr = &((struct sockaddr_in *)ca)->sin_addr;
3093 			} else if (ca->sa_family == AF_INET6) {
3094 				addr_type = IPV6_VERSION;
3095 				ipaddr = &((struct sockaddr_in6 *)
3096 				    ca)->sin6_addr;
3097 			}
3098 			tp = find_tpc(ipaddr, addr_type, B_FALSE);
3099 			if (tp == NULL || tp->tpc_tp.tp_doi !=
3100 			    l_admin_low->tsl_doi || tp->tpc_tp.host_type !=
3101 			    SUN_CIPSO) {
3102 				if (tp != NULL)
3103 					TPC_RELE(tp);
3104 				error = EACCES;
3105 				goto err_out;
3106 			}
3107 			TPC_RELE(tp);
3108 		}
3109 	}
3110 
3111 	error = makefh4(&cs->fh, vp, cs->exi);
3112 
3113 err_out:
3114 	if (error) {
3115 		if (is_newvp) {
3116 			VN_RELE(cs->vp);
3117 			cs->vp = oldvp;
3118 		} else
3119 			VN_RELE(vp);
3120 		return (puterrno4(error));
3121 	}
3122 
3123 	if (!is_newvp) {
3124 		if (cs->vp)
3125 			VN_RELE(cs->vp);
3126 		cs->vp = vp;
3127 	} else if (oldvp)
3128 		VN_RELE(oldvp);
3129 
3130 	/*
3131 	 * if did lookup on attrdir and didn't lookup .., set named
3132 	 * attr fh flag
3133 	 */
3134 	if (attrdir && ! dotdot)
3135 		set_fh4_flag(&cs->fh, FH4_NAMEDATTR);
3136 
3137 	/* Assume false for now, open proc will set this */
3138 	cs->mandlock = FALSE;
3139 
3140 	return (NFS4_OK);
3141 }
3142 
3143 /* ARGSUSED */
3144 static void
3145 rfs4_op_lookup(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3146     struct compound_state *cs)
3147 {
3148 	LOOKUP4args *args = &argop->nfs_argop4_u.oplookup;
3149 	LOOKUP4res *resp = &resop->nfs_resop4_u.oplookup;
3150 	char *nm;
3151 	uint_t len;
3152 	struct sockaddr *ca;
3153 	char *name = NULL;
3154 	nfsstat4 status;
3155 
3156 	DTRACE_NFSV4_2(op__lookup__start, struct compound_state *, cs,
3157 	    LOOKUP4args *, args);
3158 
3159 	if (cs->vp == NULL) {
3160 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
3161 		goto out;
3162 	}
3163 
3164 	if (cs->vp->v_type == VLNK) {
3165 		*cs->statusp = resp->status = NFS4ERR_SYMLINK;
3166 		goto out;
3167 	}
3168 
3169 	if (cs->vp->v_type != VDIR) {
3170 		*cs->statusp = resp->status = NFS4ERR_NOTDIR;
3171 		goto out;
3172 	}
3173 
3174 	status = utf8_dir_verify(&args->objname);
3175 	if (status != NFS4_OK) {
3176 		*cs->statusp = resp->status = status;
3177 		goto out;
3178 	}
3179 
3180 	nm = utf8_to_str(&args->objname, &len, NULL);
3181 	if (nm == NULL) {
3182 		*cs->statusp = resp->status = NFS4ERR_INVAL;
3183 		goto out;
3184 	}
3185 
3186 	if (len > MAXNAMELEN) {
3187 		*cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
3188 		kmem_free(nm, len);
3189 		goto out;
3190 	}
3191 
3192 	ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
3193 	name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
3194 	    MAXPATHLEN  + 1);
3195 
3196 	if (name == NULL) {
3197 		*cs->statusp = resp->status = NFS4ERR_INVAL;
3198 		kmem_free(nm, len);
3199 		goto out;
3200 	}
3201 
3202 	*cs->statusp = resp->status = do_rfs4_op_lookup(name, req, cs);
3203 
3204 	if (name != nm)
3205 		kmem_free(name, MAXPATHLEN + 1);
3206 	kmem_free(nm, len);
3207 
3208 out:
3209 	DTRACE_NFSV4_2(op__lookup__done, struct compound_state *, cs,
3210 	    LOOKUP4res *, resp);
3211 }
3212 
3213 /* ARGSUSED */
3214 static void
3215 rfs4_op_lookupp(nfs_argop4 *args, nfs_resop4 *resop, struct svc_req *req,
3216     struct compound_state *cs)
3217 {
3218 	LOOKUPP4res *resp = &resop->nfs_resop4_u.oplookupp;
3219 
3220 	DTRACE_NFSV4_1(op__lookupp__start, struct compound_state *, cs);
3221 
3222 	if (cs->vp == NULL) {
3223 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
3224 		goto out;
3225 	}
3226 
3227 	if (cs->vp->v_type != VDIR) {
3228 		*cs->statusp = resp->status = NFS4ERR_NOTDIR;
3229 		goto out;
3230 	}
3231 
3232 	*cs->statusp = resp->status = do_rfs4_op_lookup("..", req, cs);
3233 
3234 	/*
3235 	 * From NFSV4 Specification, LOOKUPP should not check for
3236 	 * NFS4ERR_WRONGSEC. Retrun NFS4_OK instead.
3237 	 */
3238 	if (resp->status == NFS4ERR_WRONGSEC) {
3239 		*cs->statusp = resp->status = NFS4_OK;
3240 	}
3241 
3242 out:
3243 	DTRACE_NFSV4_2(op__lookupp__done, struct compound_state *, cs,
3244 	    LOOKUPP4res *, resp);
3245 }
3246 
3247 
3248 /*ARGSUSED2*/
3249 static void
3250 rfs4_op_openattr(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3251     struct compound_state *cs)
3252 {
3253 	OPENATTR4args	*args = &argop->nfs_argop4_u.opopenattr;
3254 	OPENATTR4res	*resp = &resop->nfs_resop4_u.opopenattr;
3255 	vnode_t		*avp = NULL;
3256 	int		lookup_flags = LOOKUP_XATTR, error;
3257 	int		exp_ro = 0;
3258 
3259 	DTRACE_NFSV4_2(op__openattr__start, struct compound_state *, cs,
3260 	    OPENATTR4args *, args);
3261 
3262 	if (cs->vp == NULL) {
3263 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
3264 		goto out;
3265 	}
3266 
3267 	if ((cs->vp->v_vfsp->vfs_flag & VFS_XATTR) == 0 &&
3268 	    !vfs_has_feature(cs->vp->v_vfsp, VFSFT_SYSATTR_VIEWS)) {
3269 		*cs->statusp = resp->status = puterrno4(ENOTSUP);
3270 		goto out;
3271 	}
3272 
3273 	/*
3274 	 * If file system supports passing ACE mask to VOP_ACCESS then
3275 	 * check for ACE_READ_NAMED_ATTRS, otherwise do legacy checks
3276 	 */
3277 
3278 	if (vfs_has_feature(cs->vp->v_vfsp, VFSFT_ACEMASKONACCESS))
3279 		error = VOP_ACCESS(cs->vp, ACE_READ_NAMED_ATTRS,
3280 		    V_ACE_MASK, cs->cr, NULL);
3281 	else
3282 		error = ((VOP_ACCESS(cs->vp, VREAD, 0, cs->cr, NULL) != 0) &&
3283 		    (VOP_ACCESS(cs->vp, VWRITE, 0, cs->cr, NULL) != 0) &&
3284 		    (VOP_ACCESS(cs->vp, VEXEC, 0, cs->cr, NULL) != 0));
3285 
3286 	if (error) {
3287 		*cs->statusp = resp->status = puterrno4(EACCES);
3288 		goto out;
3289 	}
3290 
3291 	/*
3292 	 * The CREATE_XATTR_DIR VOP flag cannot be specified if
3293 	 * the file system is exported read-only -- regardless of
3294 	 * createdir flag.  Otherwise the attrdir would be created
3295 	 * (assuming server fs isn't mounted readonly locally).  If
3296 	 * VOP_LOOKUP returns ENOENT in this case, the error will
3297 	 * be translated into EROFS.  ENOSYS is mapped to ENOTSUP
3298 	 * because specfs has no VOP_LOOKUP op, so the macro would
3299 	 * return ENOSYS.  EINVAL is returned by all (current)
3300 	 * Solaris file system implementations when any of their
3301 	 * restrictions are violated (xattr(dir) can't have xattrdir).
3302 	 * Returning NOTSUPP is more appropriate in this case
3303 	 * because the object will never be able to have an attrdir.
3304 	 */
3305 	if (args->createdir && ! (exp_ro = rdonly4(req, cs)))
3306 		lookup_flags |= CREATE_XATTR_DIR;
3307 
3308 	error = VOP_LOOKUP(cs->vp, "", &avp, NULL, lookup_flags, NULL, cs->cr,
3309 	    NULL, NULL, NULL);
3310 
3311 	if (error) {
3312 		if (error == ENOENT && args->createdir && exp_ro)
3313 			*cs->statusp = resp->status = puterrno4(EROFS);
3314 		else if (error == EINVAL || error == ENOSYS)
3315 			*cs->statusp = resp->status = puterrno4(ENOTSUP);
3316 		else
3317 			*cs->statusp = resp->status = puterrno4(error);
3318 		goto out;
3319 	}
3320 
3321 	ASSERT(avp->v_flag & V_XATTRDIR);
3322 
3323 	error = makefh4(&cs->fh, avp, cs->exi);
3324 
3325 	if (error) {
3326 		VN_RELE(avp);
3327 		*cs->statusp = resp->status = puterrno4(error);
3328 		goto out;
3329 	}
3330 
3331 	VN_RELE(cs->vp);
3332 	cs->vp = avp;
3333 
3334 	/*
3335 	 * There is no requirement for an attrdir fh flag
3336 	 * because the attrdir has a vnode flag to distinguish
3337 	 * it from regular (non-xattr) directories.  The
3338 	 * FH4_ATTRDIR flag is set for future sanity checks.
3339 	 */
3340 	set_fh4_flag(&cs->fh, FH4_ATTRDIR);
3341 	*cs->statusp = resp->status = NFS4_OK;
3342 
3343 out:
3344 	DTRACE_NFSV4_2(op__openattr__done, struct compound_state *, cs,
3345 	    OPENATTR4res *, resp);
3346 }
3347 
3348 static int
3349 do_io(int direction, vnode_t *vp, struct uio *uio, int ioflag, cred_t *cred,
3350     caller_context_t *ct)
3351 {
3352 	int error;
3353 	int i;
3354 	clock_t delaytime;
3355 
3356 	delaytime = MSEC_TO_TICK_ROUNDUP(rfs4_lock_delay);
3357 
3358 	/*
3359 	 * Don't block on mandatory locks. If this routine returns
3360 	 * EAGAIN, the caller should return NFS4ERR_LOCKED.
3361 	 */
3362 	uio->uio_fmode = FNONBLOCK;
3363 
3364 	for (i = 0; i < rfs4_maxlock_tries; i++) {
3365 
3366 
3367 		if (direction == FREAD) {
3368 			(void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, ct);
3369 			error = VOP_READ(vp, uio, ioflag, cred, ct);
3370 			VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, ct);
3371 		} else {
3372 			(void) VOP_RWLOCK(vp, V_WRITELOCK_TRUE, ct);
3373 			error = VOP_WRITE(vp, uio, ioflag, cred, ct);
3374 			VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, ct);
3375 		}
3376 
3377 		if (error != EAGAIN)
3378 			break;
3379 
3380 		if (i < rfs4_maxlock_tries - 1) {
3381 			delay(delaytime);
3382 			delaytime *= 2;
3383 		}
3384 	}
3385 
3386 	return (error);
3387 }
3388 
3389 /* ARGSUSED */
3390 static void
3391 rfs4_op_read(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3392     struct compound_state *cs)
3393 {
3394 	READ4args *args = &argop->nfs_argop4_u.opread;
3395 	READ4res *resp = &resop->nfs_resop4_u.opread;
3396 	int error;
3397 	int verror;
3398 	vnode_t *vp;
3399 	struct vattr va;
3400 	struct iovec iov, *iovp = NULL;
3401 	int iovcnt;
3402 	struct uio uio;
3403 	u_offset_t offset;
3404 	bool_t *deleg = &cs->deleg;
3405 	nfsstat4 stat;
3406 	int in_crit = 0;
3407 	mblk_t *mp = NULL;
3408 	int alloc_err = 0;
3409 	int rdma_used = 0;
3410 	int loaned_buffers;
3411 	caller_context_t ct;
3412 	struct uio *uiop;
3413 
3414 	DTRACE_NFSV4_2(op__read__start, struct compound_state *, cs,
3415 	    READ4args, args);
3416 
3417 	vp = cs->vp;
3418 	if (vp == NULL) {
3419 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
3420 		goto out;
3421 	}
3422 	if (cs->access == CS_ACCESS_DENIED) {
3423 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
3424 		goto out;
3425 	}
3426 
3427 	if ((stat = rfs4_check_stateid(FREAD, vp, &args->stateid, FALSE,
3428 	    deleg, TRUE, &ct, cs)) != NFS4_OK) {
3429 		*cs->statusp = resp->status = stat;
3430 		goto out;
3431 	}
3432 
3433 	/*
3434 	 * Enter the critical region before calling VOP_RWLOCK
3435 	 * to avoid a deadlock with write requests.
3436 	 */
3437 	if (nbl_need_check(vp)) {
3438 		nbl_start_crit(vp, RW_READER);
3439 		in_crit = 1;
3440 		if (nbl_conflict(vp, NBL_READ, args->offset, args->count, 0,
3441 		    &ct)) {
3442 			*cs->statusp = resp->status = NFS4ERR_LOCKED;
3443 			goto out;
3444 		}
3445 	}
3446 
3447 	if (args->wlist) {
3448 		if (args->count > clist_len(args->wlist)) {
3449 			*cs->statusp = resp->status = NFS4ERR_INVAL;
3450 			goto out;
3451 		}
3452 		rdma_used = 1;
3453 	}
3454 
3455 	/* use loaned buffers for TCP */
3456 	loaned_buffers = (nfs_loaned_buffers && !rdma_used) ? 1 : 0;
3457 
3458 	va.va_mask = AT_MODE|AT_SIZE|AT_UID;
3459 	verror = VOP_GETATTR(vp, &va, 0, cs->cr, &ct);
3460 
3461 	/*
3462 	 * If we can't get the attributes, then we can't do the
3463 	 * right access checking.  So, we'll fail the request.
3464 	 */
3465 	if (verror) {
3466 		*cs->statusp = resp->status = puterrno4(verror);
3467 		goto out;
3468 	}
3469 
3470 	if (vp->v_type != VREG) {
3471 		*cs->statusp = resp->status =
3472 		    ((vp->v_type == VDIR) ? NFS4ERR_ISDIR : NFS4ERR_INVAL);
3473 		goto out;
3474 	}
3475 
3476 	if (crgetuid(cs->cr) != va.va_uid &&
3477 	    (error = VOP_ACCESS(vp, VREAD, 0, cs->cr, &ct)) &&
3478 	    (error = VOP_ACCESS(vp, VEXEC, 0, cs->cr, &ct))) {
3479 		*cs->statusp = resp->status = puterrno4(error);
3480 		goto out;
3481 	}
3482 
3483 	if (MANDLOCK(vp, va.va_mode)) { /* XXX - V4 supports mand locking */
3484 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
3485 		goto out;
3486 	}
3487 
3488 	offset = args->offset;
3489 	if (offset >= va.va_size) {
3490 		*cs->statusp = resp->status = NFS4_OK;
3491 		resp->eof = TRUE;
3492 		resp->data_len = 0;
3493 		resp->data_val = NULL;
3494 		resp->mblk = NULL;
3495 		/* RDMA */
3496 		resp->wlist = args->wlist;
3497 		resp->wlist_len = resp->data_len;
3498 		*cs->statusp = resp->status = NFS4_OK;
3499 		if (resp->wlist)
3500 			clist_zero_len(resp->wlist);
3501 		goto out;
3502 	}
3503 
3504 	if (args->count == 0) {
3505 		*cs->statusp = resp->status = NFS4_OK;
3506 		resp->eof = FALSE;
3507 		resp->data_len = 0;
3508 		resp->data_val = NULL;
3509 		resp->mblk = NULL;
3510 		/* RDMA */
3511 		resp->wlist = args->wlist;
3512 		resp->wlist_len = resp->data_len;
3513 		if (resp->wlist)
3514 			clist_zero_len(resp->wlist);
3515 		goto out;
3516 	}
3517 
3518 	/*
3519 	 * Do not allocate memory more than maximum allowed
3520 	 * transfer size
3521 	 */
3522 	if (args->count > rfs4_tsize(req))
3523 		args->count = rfs4_tsize(req);
3524 
3525 	if (loaned_buffers) {
3526 		uiop = (uio_t *)rfs_setup_xuio(vp);
3527 		ASSERT(uiop != NULL);
3528 		uiop->uio_segflg = UIO_SYSSPACE;
3529 		uiop->uio_loffset = args->offset;
3530 		uiop->uio_resid = args->count;
3531 
3532 		/* Jump to do the read if successful */
3533 		if (!VOP_REQZCBUF(vp, UIO_READ, (xuio_t *)uiop, cs->cr, &ct)) {
3534 			/*
3535 			 * Need to hold the vnode until after VOP_RETZCBUF()
3536 			 * is called.
3537 			 */
3538 			VN_HOLD(vp);
3539 			goto doio_read;
3540 		}
3541 
3542 		DTRACE_PROBE2(nfss__i__reqzcbuf_failed, int,
3543 		    uiop->uio_loffset, int, uiop->uio_resid);
3544 
3545 		uiop->uio_extflg = 0;
3546 
3547 		/* failure to setup for zero copy */
3548 		rfs_free_xuio((void *)uiop);
3549 		loaned_buffers = 0;
3550 	}
3551 
3552 	/*
3553 	 * If returning data via RDMA Write, then grab the chunk list. If we
3554 	 * aren't returning READ data w/RDMA_WRITE, then grab a mblk.
3555 	 */
3556 	if (rdma_used) {
3557 		mp = NULL;
3558 		(void) rdma_get_wchunk(req, &iov, args->wlist);
3559 		uio.uio_iov = &iov;
3560 		uio.uio_iovcnt = 1;
3561 	} else {
3562 		/*
3563 		 * mp will contain the data to be sent out in the read reply.
3564 		 * It will be freed after the reply has been sent.
3565 		 */
3566 		mp = rfs_read_alloc(args->count, &iovp, &iovcnt);
3567 		ASSERT(mp != NULL);
3568 		ASSERT(alloc_err == 0);
3569 		uio.uio_iov = iovp;
3570 		uio.uio_iovcnt = iovcnt;
3571 	}
3572 
3573 	uio.uio_segflg = UIO_SYSSPACE;
3574 	uio.uio_extflg = UIO_COPY_CACHED;
3575 	uio.uio_loffset = args->offset;
3576 	uio.uio_resid = args->count;
3577 	uiop = &uio;
3578 
3579 doio_read:
3580 	error = do_io(FREAD, vp, uiop, 0, cs->cr, &ct);
3581 
3582 	va.va_mask = AT_SIZE;
3583 	verror = VOP_GETATTR(vp, &va, 0, cs->cr, &ct);
3584 
3585 	if (error) {
3586 		if (mp)
3587 			freemsg(mp);
3588 		*cs->statusp = resp->status = puterrno4(error);
3589 		goto out;
3590 	}
3591 
3592 	/* make mblk using zc buffers */
3593 	if (loaned_buffers) {
3594 		mp = uio_to_mblk(uiop);
3595 		ASSERT(mp != NULL);
3596 	}
3597 
3598 	*cs->statusp = resp->status = NFS4_OK;
3599 
3600 	ASSERT(uiop->uio_resid >= 0);
3601 	resp->data_len = args->count - uiop->uio_resid;
3602 	if (mp) {
3603 		resp->data_val = (char *)mp->b_datap->db_base;
3604 		rfs_rndup_mblks(mp, resp->data_len, loaned_buffers);
3605 	} else {
3606 		resp->data_val = (caddr_t)iov.iov_base;
3607 	}
3608 
3609 	resp->mblk = mp;
3610 
3611 	if (!verror && offset + resp->data_len == va.va_size)
3612 		resp->eof = TRUE;
3613 	else
3614 		resp->eof = FALSE;
3615 
3616 	if (rdma_used) {
3617 		if (!rdma_setup_read_data4(args, resp)) {
3618 			*cs->statusp = resp->status = NFS4ERR_INVAL;
3619 		}
3620 	} else {
3621 		resp->wlist = NULL;
3622 	}
3623 
3624 out:
3625 	if (in_crit)
3626 		nbl_end_crit(vp);
3627 
3628 	if (iovp != NULL)
3629 		kmem_free(iovp, iovcnt * sizeof (struct iovec));
3630 
3631 	DTRACE_NFSV4_2(op__read__done, struct compound_state *, cs,
3632 	    READ4res *, resp);
3633 }
3634 
3635 static void
3636 rfs4_op_read_free(nfs_resop4 *resop)
3637 {
3638 	READ4res	*resp = &resop->nfs_resop4_u.opread;
3639 
3640 	if (resp->status == NFS4_OK && resp->mblk != NULL) {
3641 		freemsg(resp->mblk);
3642 		resp->mblk = NULL;
3643 		resp->data_val = NULL;
3644 		resp->data_len = 0;
3645 	}
3646 }
3647 
3648 static void
3649 rfs4_op_readdir_free(nfs_resop4 * resop)
3650 {
3651 	READDIR4res    *resp = &resop->nfs_resop4_u.opreaddir;
3652 
3653 	if (resp->status == NFS4_OK && resp->mblk != NULL) {
3654 		freeb(resp->mblk);
3655 		resp->mblk = NULL;
3656 		resp->data_len = 0;
3657 	}
3658 }
3659 
3660 
3661 /* ARGSUSED */
3662 static void
3663 rfs4_op_putpubfh(nfs_argop4 *args, nfs_resop4 *resop, struct svc_req *req,
3664     struct compound_state *cs)
3665 {
3666 	PUTPUBFH4res	*resp = &resop->nfs_resop4_u.opputpubfh;
3667 	int		error;
3668 	vnode_t		*vp;
3669 	struct exportinfo *exi, *sav_exi;
3670 	nfs_fh4_fmt_t	*fh_fmtp;
3671 	nfs_export_t *ne = nfs_get_export();
3672 
3673 	DTRACE_NFSV4_1(op__putpubfh__start, struct compound_state *, cs);
3674 
3675 	if (cs->vp) {
3676 		VN_RELE(cs->vp);
3677 		cs->vp = NULL;
3678 	}
3679 
3680 	if (cs->cr)
3681 		crfree(cs->cr);
3682 
3683 	cs->cr = crdup(cs->basecr);
3684 
3685 	vp = ne->exi_public->exi_vp;
3686 	if (vp == NULL) {
3687 		*cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
3688 		goto out;
3689 	}
3690 
3691 	error = makefh4(&cs->fh, vp, ne->exi_public);
3692 	if (error != 0) {
3693 		*cs->statusp = resp->status = puterrno4(error);
3694 		goto out;
3695 	}
3696 	sav_exi = cs->exi;
3697 	if (ne->exi_public == ne->exi_root) {
3698 		/*
3699 		 * No filesystem is actually shared public, so we default
3700 		 * to exi_root. In this case, we must check whether root
3701 		 * is exported.
3702 		 */
3703 		fh_fmtp = (nfs_fh4_fmt_t *)cs->fh.nfs_fh4_val;
3704 
3705 		/*
3706 		 * if root filesystem is exported, the exportinfo struct that we
3707 		 * should use is what checkexport4 returns, because root_exi is
3708 		 * actually a mostly empty struct.
3709 		 */
3710 		exi = checkexport4(&fh_fmtp->fh4_fsid,
3711 		    (fid_t *)&fh_fmtp->fh4_xlen, NULL);
3712 		cs->exi = ((exi != NULL) ? exi : ne->exi_public);
3713 	} else {
3714 		/*
3715 		 * it's a properly shared filesystem
3716 		 */
3717 		cs->exi = ne->exi_public;
3718 	}
3719 
3720 	if (is_system_labeled()) {
3721 		bslabel_t *clabel;
3722 
3723 		ASSERT(req->rq_label != NULL);
3724 		clabel = req->rq_label;
3725 		DTRACE_PROBE2(tx__rfs4__log__info__opputpubfh__clabel, char *,
3726 		    "got client label from request(1)",
3727 		    struct svc_req *, req);
3728 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
3729 			if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
3730 			    cs->exi)) {
3731 				*cs->statusp = resp->status =
3732 				    NFS4ERR_SERVERFAULT;
3733 				goto out;
3734 			}
3735 		}
3736 	}
3737 
3738 	VN_HOLD(vp);
3739 	cs->vp = vp;
3740 
3741 	if ((resp->status = call_checkauth4(cs, req)) != NFS4_OK) {
3742 		VN_RELE(cs->vp);
3743 		cs->vp = NULL;
3744 		cs->exi = sav_exi;
3745 		goto out;
3746 	}
3747 
3748 	*cs->statusp = resp->status = NFS4_OK;
3749 out:
3750 	DTRACE_NFSV4_2(op__putpubfh__done, struct compound_state *, cs,
3751 	    PUTPUBFH4res *, resp);
3752 }
3753 
3754 /*
3755  * XXX - issue with put*fh operations. Suppose /export/home is exported.
3756  * Suppose an NFS client goes to mount /export/home/joe. If /export, home,
3757  * or joe have restrictive search permissions, then we shouldn't let
3758  * the client get a file handle. This is easy to enforce. However, we
3759  * don't know what security flavor should be used until we resolve the
3760  * path name. Another complication is uid mapping. If root is
3761  * the user, then it will be mapped to the anonymous user by default,
3762  * but we won't know that till we've resolved the path name. And we won't
3763  * know what the anonymous user is.
3764  * Luckily, SECINFO is specified to take a full filename.
3765  * So what we will have to in rfs4_op_lookup is check that flavor of
3766  * the target object matches that of the request, and if root was the
3767  * caller, check for the root= and anon= options, and if necessary,
3768  * repeat the lookup using the right cred_t. But that's not done yet.
3769  */
3770 /* ARGSUSED */
3771 static void
3772 rfs4_op_putfh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3773     struct compound_state *cs)
3774 {
3775 	PUTFH4args *args = &argop->nfs_argop4_u.opputfh;
3776 	PUTFH4res *resp = &resop->nfs_resop4_u.opputfh;
3777 	nfs_fh4_fmt_t *fh_fmtp;
3778 
3779 	DTRACE_NFSV4_2(op__putfh__start, struct compound_state *, cs,
3780 	    PUTFH4args *, args);
3781 
3782 	if (cs->vp) {
3783 		VN_RELE(cs->vp);
3784 		cs->vp = NULL;
3785 	}
3786 
3787 	if (cs->cr) {
3788 		crfree(cs->cr);
3789 		cs->cr = NULL;
3790 	}
3791 
3792 
3793 	if (args->object.nfs_fh4_len < NFS_FH4_LEN) {
3794 		*cs->statusp = resp->status = NFS4ERR_BADHANDLE;
3795 		goto out;
3796 	}
3797 
3798 	fh_fmtp = (nfs_fh4_fmt_t *)args->object.nfs_fh4_val;
3799 	cs->exi = checkexport4(&fh_fmtp->fh4_fsid, (fid_t *)&fh_fmtp->fh4_xlen,
3800 	    NULL);
3801 
3802 	if (cs->exi == NULL) {
3803 		*cs->statusp = resp->status = NFS4ERR_STALE;
3804 		goto out;
3805 	}
3806 
3807 	cs->cr = crdup(cs->basecr);
3808 
3809 	ASSERT(cs->cr != NULL);
3810 
3811 	if (! (cs->vp = nfs4_fhtovp(&args->object, cs->exi, &resp->status))) {
3812 		*cs->statusp = resp->status;
3813 		goto out;
3814 	}
3815 
3816 	if ((resp->status = call_checkauth4(cs, req)) != NFS4_OK) {
3817 		VN_RELE(cs->vp);
3818 		cs->vp = NULL;
3819 		goto out;
3820 	}
3821 
3822 	nfs_fh4_copy(&args->object, &cs->fh);
3823 	*cs->statusp = resp->status = NFS4_OK;
3824 	cs->deleg = FALSE;
3825 
3826 out:
3827 	DTRACE_NFSV4_2(op__putfh__done, struct compound_state *, cs,
3828 	    PUTFH4res *, resp);
3829 }
3830 
3831 /* ARGSUSED */
3832 static void
3833 rfs4_op_putrootfh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3834     struct compound_state *cs)
3835 {
3836 	PUTROOTFH4res *resp = &resop->nfs_resop4_u.opputrootfh;
3837 	int error;
3838 	fid_t fid;
3839 	struct exportinfo *exi, *sav_exi;
3840 
3841 	DTRACE_NFSV4_1(op__putrootfh__start, struct compound_state *, cs);
3842 
3843 	if (cs->vp) {
3844 		VN_RELE(cs->vp);
3845 		cs->vp = NULL;
3846 	}
3847 
3848 	if (cs->cr)
3849 		crfree(cs->cr);
3850 
3851 	cs->cr = crdup(cs->basecr);
3852 
3853 	/*
3854 	 * Using rootdir, the system root vnode,
3855 	 * get its fid.
3856 	 */
3857 	bzero(&fid, sizeof (fid));
3858 	fid.fid_len = MAXFIDSZ;
3859 	error = vop_fid_pseudo(ZONE_ROOTVP(), &fid);
3860 	if (error != 0) {
3861 		*cs->statusp = resp->status = puterrno4(error);
3862 		goto out;
3863 	}
3864 
3865 	/*
3866 	 * Then use the root fsid & fid it to find out if it's exported
3867 	 *
3868 	 * If the server root isn't exported directly, then
3869 	 * it should at least be a pseudo export based on
3870 	 * one or more exports further down in the server's
3871 	 * file tree.
3872 	 */
3873 	exi = checkexport4(&ZONE_ROOTVP()->v_vfsp->vfs_fsid, &fid, NULL);
3874 	if (exi == NULL || exi->exi_export.ex_flags & EX_PUBLIC) {
3875 		NFS4_DEBUG(rfs4_debug,
3876 		    (CE_WARN, "rfs4_op_putrootfh: export check failure"));
3877 		*cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
3878 		goto out;
3879 	}
3880 
3881 	/*
3882 	 * Now make a filehandle based on the root
3883 	 * export and root vnode.
3884 	 */
3885 	error = makefh4(&cs->fh, ZONE_ROOTVP(), exi);
3886 	if (error != 0) {
3887 		*cs->statusp = resp->status = puterrno4(error);
3888 		goto out;
3889 	}
3890 
3891 	sav_exi = cs->exi;
3892 	cs->exi = exi;
3893 
3894 	VN_HOLD(ZONE_ROOTVP());
3895 	cs->vp = ZONE_ROOTVP();
3896 
3897 	if ((resp->status = call_checkauth4(cs, req)) != NFS4_OK) {
3898 		VN_RELE(cs->vp);
3899 		cs->vp = NULL;
3900 		cs->exi = sav_exi;
3901 		goto out;
3902 	}
3903 
3904 	*cs->statusp = resp->status = NFS4_OK;
3905 	cs->deleg = FALSE;
3906 out:
3907 	DTRACE_NFSV4_2(op__putrootfh__done, struct compound_state *, cs,
3908 	    PUTROOTFH4res *, resp);
3909 }
3910 
3911 /*
3912  * readlink: args: CURRENT_FH.
3913  *	res: status. If success - CURRENT_FH unchanged, return linktext.
3914  */
3915 
3916 /* ARGSUSED */
3917 static void
3918 rfs4_op_readlink(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3919     struct compound_state *cs)
3920 {
3921 	READLINK4res *resp = &resop->nfs_resop4_u.opreadlink;
3922 	int error;
3923 	vnode_t *vp;
3924 	struct iovec iov;
3925 	struct vattr va;
3926 	struct uio uio;
3927 	char *data;
3928 	struct sockaddr *ca;
3929 	char *name = NULL;
3930 	int is_referral;
3931 
3932 	DTRACE_NFSV4_1(op__readlink__start, struct compound_state *, cs);
3933 
3934 	/* CURRENT_FH: directory */
3935 	vp = cs->vp;
3936 	if (vp == NULL) {
3937 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
3938 		goto out;
3939 	}
3940 
3941 	if (cs->access == CS_ACCESS_DENIED) {
3942 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
3943 		goto out;
3944 	}
3945 
3946 	/* Is it a referral? */
3947 	if (vn_is_nfs_reparse(vp, cs->cr) && client_is_downrev(req)) {
3948 
3949 		is_referral = 1;
3950 
3951 	} else {
3952 
3953 		is_referral = 0;
3954 
3955 		if (vp->v_type == VDIR) {
3956 			*cs->statusp = resp->status = NFS4ERR_ISDIR;
3957 			goto out;
3958 		}
3959 
3960 		if (vp->v_type != VLNK) {
3961 			*cs->statusp = resp->status = NFS4ERR_INVAL;
3962 			goto out;
3963 		}
3964 
3965 	}
3966 
3967 	va.va_mask = AT_MODE;
3968 	error = VOP_GETATTR(vp, &va, 0, cs->cr, NULL);
3969 	if (error) {
3970 		*cs->statusp = resp->status = puterrno4(error);
3971 		goto out;
3972 	}
3973 
3974 	if (MANDLOCK(vp, va.va_mode)) {
3975 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
3976 		goto out;
3977 	}
3978 
3979 	data = kmem_alloc(MAXPATHLEN + 1, KM_SLEEP);
3980 
3981 	if (is_referral) {
3982 		char *s;
3983 		size_t strsz;
3984 		kstat_named_t *stat =
3985 		    cs->exi->exi_ne->ne_globals->svstat[NFS_V4];
3986 
3987 		/* Get an artificial symlink based on a referral */
3988 		s = build_symlink(vp, cs->cr, &strsz);
3989 		stat[NFS_REFERLINKS].value.ui64++;
3990 		DTRACE_PROBE2(nfs4serv__func__referral__reflink,
3991 		    vnode_t *, vp, char *, s);
3992 		if (s == NULL)
3993 			error = EINVAL;
3994 		else {
3995 			error = 0;
3996 			(void) strlcpy(data, s, MAXPATHLEN + 1);
3997 			kmem_free(s, strsz);
3998 		}
3999 
4000 	} else {
4001 
4002 		iov.iov_base = data;
4003 		iov.iov_len = MAXPATHLEN;
4004 		uio.uio_iov = &iov;
4005 		uio.uio_iovcnt = 1;
4006 		uio.uio_segflg = UIO_SYSSPACE;
4007 		uio.uio_extflg = UIO_COPY_CACHED;
4008 		uio.uio_loffset = 0;
4009 		uio.uio_resid = MAXPATHLEN;
4010 
4011 		error = VOP_READLINK(vp, &uio, cs->cr, NULL);
4012 
4013 		if (!error)
4014 			*(data + MAXPATHLEN - uio.uio_resid) = '\0';
4015 	}
4016 
4017 	if (error) {
4018 		kmem_free((caddr_t)data, (uint_t)MAXPATHLEN + 1);
4019 		*cs->statusp = resp->status = puterrno4(error);
4020 		goto out;
4021 	}
4022 
4023 	ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
4024 	name = nfscmd_convname(ca, cs->exi, data, NFSCMD_CONV_OUTBOUND,
4025 	    MAXPATHLEN  + 1);
4026 
4027 	if (name == NULL) {
4028 		/*
4029 		 * Even though the conversion failed, we return
4030 		 * something. We just don't translate it.
4031 		 */
4032 		name = data;
4033 	}
4034 
4035 	/*
4036 	 * treat link name as data
4037 	 */
4038 	(void) str_to_utf8(name, (utf8string *)&resp->link);
4039 
4040 	if (name != data)
4041 		kmem_free(name, MAXPATHLEN + 1);
4042 	kmem_free((caddr_t)data, (uint_t)MAXPATHLEN + 1);
4043 	*cs->statusp = resp->status = NFS4_OK;
4044 
4045 out:
4046 	DTRACE_NFSV4_2(op__readlink__done, struct compound_state *, cs,
4047 	    READLINK4res *, resp);
4048 }
4049 
4050 static void
4051 rfs4_op_readlink_free(nfs_resop4 *resop)
4052 {
4053 	READLINK4res *resp = &resop->nfs_resop4_u.opreadlink;
4054 	utf8string *symlink = (utf8string *)&resp->link;
4055 
4056 	if (symlink->utf8string_val) {
4057 		UTF8STRING_FREE(*symlink)
4058 	}
4059 }
4060 
4061 /*
4062  * release_lockowner:
4063  *	Release any state associated with the supplied
4064  *	lockowner. Note if any lo_state is holding locks we will not
4065  *	rele that lo_state and thus the lockowner will not be destroyed.
4066  *	A client using lock after the lock owner stateid has been released
4067  *	will suffer the consequence of NFS4ERR_BAD_STATEID and would have
4068  *	to reissue the lock with new_lock_owner set to TRUE.
4069  *	args: lock_owner
4070  *	res:  status
4071  */
4072 /* ARGSUSED */
4073 static void
4074 rfs4_op_release_lockowner(nfs_argop4 *argop, nfs_resop4 *resop,
4075     struct svc_req *req, struct compound_state *cs)
4076 {
4077 	RELEASE_LOCKOWNER4args *ap = &argop->nfs_argop4_u.oprelease_lockowner;
4078 	RELEASE_LOCKOWNER4res *resp = &resop->nfs_resop4_u.oprelease_lockowner;
4079 	rfs4_lockowner_t *lo;
4080 	rfs4_openowner_t *oo;
4081 	rfs4_state_t *sp;
4082 	rfs4_lo_state_t *lsp;
4083 	rfs4_client_t *cp;
4084 	bool_t create = FALSE;
4085 	locklist_t *llist;
4086 	sysid_t sysid;
4087 
4088 	DTRACE_NFSV4_2(op__release__lockowner__start, struct compound_state *,
4089 	    cs, RELEASE_LOCKOWNER4args *, ap);
4090 
4091 	/* Make sure there is a clientid around for this request */
4092 	cp = rfs4_findclient_by_id(ap->lock_owner.clientid, FALSE);
4093 
4094 	if (cp == NULL) {
4095 		*cs->statusp = resp->status =
4096 		    rfs4_check_clientid(&ap->lock_owner.clientid, 0);
4097 		goto out;
4098 	}
4099 	rfs4_client_rele(cp);
4100 
4101 	lo = rfs4_findlockowner(&ap->lock_owner, &create);
4102 	if (lo == NULL) {
4103 		*cs->statusp = resp->status = NFS4_OK;
4104 		goto out;
4105 	}
4106 	ASSERT(lo->rl_client != NULL);
4107 
4108 	/*
4109 	 * Check for EXPIRED client. If so will reap state with in a lease
4110 	 * period or on next set_clientid_confirm step
4111 	 */
4112 	if (rfs4_lease_expired(lo->rl_client)) {
4113 		rfs4_lockowner_rele(lo);
4114 		*cs->statusp = resp->status = NFS4ERR_EXPIRED;
4115 		goto out;
4116 	}
4117 
4118 	/*
4119 	 * If no sysid has been assigned, then no locks exist; just return.
4120 	 */
4121 	rfs4_dbe_lock(lo->rl_client->rc_dbe);
4122 	if (lo->rl_client->rc_sysidt == LM_NOSYSID) {
4123 		rfs4_lockowner_rele(lo);
4124 		rfs4_dbe_unlock(lo->rl_client->rc_dbe);
4125 		goto out;
4126 	}
4127 
4128 	sysid = lo->rl_client->rc_sysidt;
4129 	rfs4_dbe_unlock(lo->rl_client->rc_dbe);
4130 
4131 	/*
4132 	 * Mark the lockowner invalid.
4133 	 */
4134 	rfs4_dbe_hide(lo->rl_dbe);
4135 
4136 	/*
4137 	 * sysid-pid pair should now not be used since the lockowner is
4138 	 * invalid. If the client were to instantiate the lockowner again
4139 	 * it would be assigned a new pid. Thus we can get the list of
4140 	 * current locks.
4141 	 */
4142 
4143 	llist = flk_get_active_locks(sysid, lo->rl_pid);
4144 	/* If we are still holding locks fail */
4145 	if (llist != NULL) {
4146 
4147 		*cs->statusp = resp->status = NFS4ERR_LOCKS_HELD;
4148 
4149 		flk_free_locklist(llist);
4150 		/*
4151 		 * We need to unhide the lockowner so the client can
4152 		 * try it again. The bad thing here is if the client
4153 		 * has a logic error that took it here in the first place
4154 		 * they probably have lost accounting of the locks that it
4155 		 * is holding. So we may have dangling state until the
4156 		 * open owner state is reaped via close. One scenario
4157 		 * that could possibly occur is that the client has
4158 		 * sent the unlock request(s) in separate threads
4159 		 * and has not waited for the replies before sending the
4160 		 * RELEASE_LOCKOWNER request. Presumably, it would expect
4161 		 * and deal appropriately with NFS4ERR_LOCKS_HELD, by
4162 		 * reissuing the request.
4163 		 */
4164 		rfs4_dbe_unhide(lo->rl_dbe);
4165 		rfs4_lockowner_rele(lo);
4166 		goto out;
4167 	}
4168 
4169 	/*
4170 	 * For the corresponding client we need to check each open
4171 	 * owner for any opens that have lockowner state associated
4172 	 * with this lockowner.
4173 	 */
4174 
4175 	rfs4_dbe_lock(lo->rl_client->rc_dbe);
4176 	for (oo = list_head(&lo->rl_client->rc_openownerlist); oo != NULL;
4177 	    oo = list_next(&lo->rl_client->rc_openownerlist, oo)) {
4178 
4179 		rfs4_dbe_lock(oo->ro_dbe);
4180 		for (sp = list_head(&oo->ro_statelist); sp != NULL;
4181 		    sp = list_next(&oo->ro_statelist, sp)) {
4182 
4183 			rfs4_dbe_lock(sp->rs_dbe);
4184 			for (lsp = list_head(&sp->rs_lostatelist);
4185 			    lsp != NULL;
4186 			    lsp = list_next(&sp->rs_lostatelist, lsp)) {
4187 				if (lsp->rls_locker == lo) {
4188 					rfs4_dbe_lock(lsp->rls_dbe);
4189 					rfs4_dbe_invalidate(lsp->rls_dbe);
4190 					rfs4_dbe_unlock(lsp->rls_dbe);
4191 				}
4192 			}
4193 			rfs4_dbe_unlock(sp->rs_dbe);
4194 		}
4195 		rfs4_dbe_unlock(oo->ro_dbe);
4196 	}
4197 	rfs4_dbe_unlock(lo->rl_client->rc_dbe);
4198 
4199 	rfs4_lockowner_rele(lo);
4200 
4201 	*cs->statusp = resp->status = NFS4_OK;
4202 
4203 out:
4204 	DTRACE_NFSV4_2(op__release__lockowner__done, struct compound_state *,
4205 	    cs, RELEASE_LOCKOWNER4res *, resp);
4206 }
4207 
4208 /*
4209  * short utility function to lookup a file and recall the delegation
4210  */
4211 static rfs4_file_t *
4212 rfs4_lookup_and_findfile(vnode_t *dvp, char *nm, vnode_t **vpp,
4213     int *lkup_error, cred_t *cr)
4214 {
4215 	vnode_t *vp;
4216 	rfs4_file_t *fp = NULL;
4217 	bool_t fcreate = FALSE;
4218 	int error;
4219 
4220 	if (vpp)
4221 		*vpp = NULL;
4222 
4223 	if ((error = VOP_LOOKUP(dvp, nm, &vp, NULL, 0, NULL, cr, NULL, NULL,
4224 	    NULL)) == 0) {
4225 		if (vp->v_type == VREG)
4226 			fp = rfs4_findfile(vp, NULL, &fcreate);
4227 		if (vpp)
4228 			*vpp = vp;
4229 		else
4230 			VN_RELE(vp);
4231 	}
4232 
4233 	if (lkup_error)
4234 		*lkup_error = error;
4235 
4236 	return (fp);
4237 }
4238 
4239 /*
4240  * remove: args: CURRENT_FH: directory; name.
4241  *	res: status. If success - CURRENT_FH unchanged, return change_info
4242  *		for directory.
4243  */
4244 /* ARGSUSED */
4245 static void
4246 rfs4_op_remove(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
4247     struct compound_state *cs)
4248 {
4249 	REMOVE4args *args = &argop->nfs_argop4_u.opremove;
4250 	REMOVE4res *resp = &resop->nfs_resop4_u.opremove;
4251 	int error;
4252 	vnode_t *dvp, *vp;
4253 	struct vattr bdva, idva, adva;
4254 	char *nm;
4255 	uint_t len;
4256 	rfs4_file_t *fp;
4257 	int in_crit = 0;
4258 	bslabel_t *clabel;
4259 	struct sockaddr *ca;
4260 	char *name = NULL;
4261 	nfsstat4 status;
4262 
4263 	DTRACE_NFSV4_2(op__remove__start, struct compound_state *, cs,
4264 	    REMOVE4args *, args);
4265 
4266 	/* CURRENT_FH: directory */
4267 	dvp = cs->vp;
4268 	if (dvp == NULL) {
4269 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
4270 		goto out;
4271 	}
4272 
4273 	if (cs->access == CS_ACCESS_DENIED) {
4274 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
4275 		goto out;
4276 	}
4277 
4278 	/*
4279 	 * If there is an unshared filesystem mounted on this vnode,
4280 	 * Do not allow to remove anything in this directory.
4281 	 */
4282 	if (vn_ismntpt(dvp)) {
4283 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
4284 		goto out;
4285 	}
4286 
4287 	if (dvp->v_type != VDIR) {
4288 		*cs->statusp = resp->status = NFS4ERR_NOTDIR;
4289 		goto out;
4290 	}
4291 
4292 	status = utf8_dir_verify(&args->target);
4293 	if (status != NFS4_OK) {
4294 		*cs->statusp = resp->status = status;
4295 		goto out;
4296 	}
4297 
4298 	/*
4299 	 * Lookup the file so that we can check if it's a directory
4300 	 */
4301 	nm = utf8_to_fn(&args->target, &len, NULL);
4302 	if (nm == NULL) {
4303 		*cs->statusp = resp->status = NFS4ERR_INVAL;
4304 		goto out;
4305 	}
4306 
4307 	if (len > MAXNAMELEN) {
4308 		*cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
4309 		kmem_free(nm, len);
4310 		goto out;
4311 	}
4312 
4313 	if (rdonly4(req, cs)) {
4314 		*cs->statusp = resp->status = NFS4ERR_ROFS;
4315 		kmem_free(nm, len);
4316 		goto out;
4317 	}
4318 
4319 	ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
4320 	name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
4321 	    MAXPATHLEN  + 1);
4322 
4323 	if (name == NULL) {
4324 		*cs->statusp = resp->status = NFS4ERR_INVAL;
4325 		kmem_free(nm, len);
4326 		goto out;
4327 	}
4328 
4329 	/*
4330 	 * Lookup the file to determine type and while we are see if
4331 	 * there is a file struct around and check for delegation.
4332 	 * We don't need to acquire va_seq before this lookup, if
4333 	 * it causes an update, cinfo.before will not match, which will
4334 	 * trigger a cache flush even if atomic is TRUE.
4335 	 */
4336 	fp = rfs4_lookup_and_findfile(dvp, name, &vp, &error, cs->cr);
4337 	if (fp != NULL) {
4338 		if (rfs4_check_delegated_byfp(FWRITE, fp, TRUE, TRUE, TRUE,
4339 		    NULL)) {
4340 			VN_RELE(vp);
4341 			rfs4_file_rele(fp);
4342 			*cs->statusp = resp->status = NFS4ERR_DELAY;
4343 			if (nm != name)
4344 				kmem_free(name, MAXPATHLEN + 1);
4345 			kmem_free(nm, len);
4346 			goto out;
4347 		}
4348 	}
4349 
4350 	/* Didn't find anything to remove */
4351 	if (vp == NULL) {
4352 		*cs->statusp = resp->status = error;
4353 		if (nm != name)
4354 			kmem_free(name, MAXPATHLEN + 1);
4355 		kmem_free(nm, len);
4356 		goto out;
4357 	}
4358 
4359 	if (nbl_need_check(vp)) {
4360 		nbl_start_crit(vp, RW_READER);
4361 		in_crit = 1;
4362 		if (nbl_conflict(vp, NBL_REMOVE, 0, 0, 0, NULL)) {
4363 			*cs->statusp = resp->status = NFS4ERR_FILE_OPEN;
4364 			if (nm != name)
4365 				kmem_free(name, MAXPATHLEN + 1);
4366 			kmem_free(nm, len);
4367 			nbl_end_crit(vp);
4368 			VN_RELE(vp);
4369 			if (fp) {
4370 				rfs4_clear_dont_grant(fp);
4371 				rfs4_file_rele(fp);
4372 			}
4373 			goto out;
4374 		}
4375 	}
4376 
4377 	/* check label before allowing removal */
4378 	if (is_system_labeled()) {
4379 		ASSERT(req->rq_label != NULL);
4380 		clabel = req->rq_label;
4381 		DTRACE_PROBE2(tx__rfs4__log__info__opremove__clabel, char *,
4382 		    "got client label from request(1)",
4383 		    struct svc_req *, req);
4384 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
4385 			if (!do_rfs_label_check(clabel, vp, EQUALITY_CHECK,
4386 			    cs->exi)) {
4387 				*cs->statusp = resp->status = NFS4ERR_ACCESS;
4388 				if (name != nm)
4389 					kmem_free(name, MAXPATHLEN + 1);
4390 				kmem_free(nm, len);
4391 				if (in_crit)
4392 					nbl_end_crit(vp);
4393 				VN_RELE(vp);
4394 				if (fp) {
4395 					rfs4_clear_dont_grant(fp);
4396 					rfs4_file_rele(fp);
4397 				}
4398 				goto out;
4399 			}
4400 		}
4401 	}
4402 
4403 	/* Get dir "before" change value */
4404 	bdva.va_mask = AT_CTIME|AT_SEQ;
4405 	error = VOP_GETATTR(dvp, &bdva, 0, cs->cr, NULL);
4406 	if (error) {
4407 		*cs->statusp = resp->status = puterrno4(error);
4408 		if (nm != name)
4409 			kmem_free(name, MAXPATHLEN + 1);
4410 		kmem_free(nm, len);
4411 		if (in_crit)
4412 			nbl_end_crit(vp);
4413 		VN_RELE(vp);
4414 		if (fp) {
4415 			rfs4_clear_dont_grant(fp);
4416 			rfs4_file_rele(fp);
4417 		}
4418 		goto out;
4419 	}
4420 	NFS4_SET_FATTR4_CHANGE(resp->cinfo.before, bdva.va_ctime)
4421 
4422 	/* Actually do the REMOVE operation */
4423 	if (vp->v_type == VDIR) {
4424 		/*
4425 		 * Can't remove a directory that has a mounted-on filesystem.
4426 		 */
4427 		if (vn_ismntpt(vp)) {
4428 			error = EACCES;
4429 		} else {
4430 			/*
4431 			 * System V defines rmdir to return EEXIST,
4432 			 * not ENOTEMPTY, if the directory is not
4433 			 * empty.  A System V NFS server needs to map
4434 			 * NFS4ERR_EXIST to NFS4ERR_NOTEMPTY to
4435 			 * transmit over the wire.
4436 			 */
4437 			if ((error = VOP_RMDIR(dvp, name, ZONE_ROOTVP(), cs->cr,
4438 			    NULL, 0)) == EEXIST)
4439 				error = ENOTEMPTY;
4440 		}
4441 	} else {
4442 		if ((error = VOP_REMOVE(dvp, name, cs->cr, NULL, 0)) == 0 &&
4443 		    fp != NULL) {
4444 			struct vattr va;
4445 			vnode_t *tvp;
4446 
4447 			rfs4_dbe_lock(fp->rf_dbe);
4448 			tvp = fp->rf_vp;
4449 			if (tvp)
4450 				VN_HOLD(tvp);
4451 			rfs4_dbe_unlock(fp->rf_dbe);
4452 
4453 			if (tvp) {
4454 				/*
4455 				 * This is va_seq safe because we are not
4456 				 * manipulating dvp.
4457 				 */
4458 				va.va_mask = AT_NLINK;
4459 				if (!VOP_GETATTR(tvp, &va, 0, cs->cr, NULL) &&
4460 				    va.va_nlink == 0) {
4461 					/* Remove state on file remove */
4462 					if (in_crit) {
4463 						nbl_end_crit(vp);
4464 						in_crit = 0;
4465 					}
4466 					rfs4_close_all_state(fp);
4467 				}
4468 				VN_RELE(tvp);
4469 			}
4470 		}
4471 	}
4472 
4473 	if (in_crit)
4474 		nbl_end_crit(vp);
4475 	VN_RELE(vp);
4476 
4477 	if (fp) {
4478 		rfs4_clear_dont_grant(fp);
4479 		rfs4_file_rele(fp);
4480 	}
4481 	if (nm != name)
4482 		kmem_free(name, MAXPATHLEN + 1);
4483 	kmem_free(nm, len);
4484 
4485 	if (error) {
4486 		*cs->statusp = resp->status = puterrno4(error);
4487 		goto out;
4488 	}
4489 
4490 	/*
4491 	 * Get the initial "after" sequence number, if it fails, set to zero
4492 	 */
4493 	idva.va_mask = AT_SEQ;
4494 	if (VOP_GETATTR(dvp, &idva, 0, cs->cr, NULL))
4495 		idva.va_seq = 0;
4496 
4497 	/*
4498 	 * Force modified data and metadata out to stable storage.
4499 	 */
4500 	(void) VOP_FSYNC(dvp, 0, cs->cr, NULL);
4501 
4502 	/*
4503 	 * Get "after" change value, if it fails, simply return the
4504 	 * before value.
4505 	 */
4506 	adva.va_mask = AT_CTIME|AT_SEQ;
4507 	if (VOP_GETATTR(dvp, &adva, 0, cs->cr, NULL)) {
4508 		adva.va_ctime = bdva.va_ctime;
4509 		adva.va_seq = 0;
4510 	}
4511 
4512 	NFS4_SET_FATTR4_CHANGE(resp->cinfo.after, adva.va_ctime)
4513 
4514 	/*
4515 	 * The cinfo.atomic = TRUE only if we have
4516 	 * non-zero va_seq's, and it has incremented by exactly one
4517 	 * during the VOP_REMOVE/RMDIR and it didn't change during
4518 	 * the VOP_FSYNC.
4519 	 */
4520 	if (bdva.va_seq && idva.va_seq && adva.va_seq &&
4521 	    idva.va_seq == (bdva.va_seq + 1) && idva.va_seq == adva.va_seq)
4522 		resp->cinfo.atomic = TRUE;
4523 	else
4524 		resp->cinfo.atomic = FALSE;
4525 
4526 	*cs->statusp = resp->status = NFS4_OK;
4527 
4528 out:
4529 	DTRACE_NFSV4_2(op__remove__done, struct compound_state *, cs,
4530 	    REMOVE4res *, resp);
4531 }
4532 
4533 /*
4534  * rename: args: SAVED_FH: from directory, CURRENT_FH: target directory,
4535  *		oldname and newname.
4536  *	res: status. If success - CURRENT_FH unchanged, return change_info
4537  *		for both from and target directories.
4538  */
4539 /* ARGSUSED */
4540 static void
4541 rfs4_op_rename(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
4542     struct compound_state *cs)
4543 {
4544 	RENAME4args *args = &argop->nfs_argop4_u.oprename;
4545 	RENAME4res *resp = &resop->nfs_resop4_u.oprename;
4546 	int error;
4547 	vnode_t *odvp;
4548 	vnode_t *ndvp;
4549 	vnode_t *srcvp, *targvp, *tvp;
4550 	struct vattr obdva, oidva, oadva;
4551 	struct vattr nbdva, nidva, nadva;
4552 	char *onm, *nnm;
4553 	uint_t olen, nlen;
4554 	rfs4_file_t *fp, *sfp;
4555 	int in_crit_src, in_crit_targ;
4556 	int fp_rele_grant_hold, sfp_rele_grant_hold;
4557 	int unlinked;
4558 	bslabel_t *clabel;
4559 	struct sockaddr *ca;
4560 	char *converted_onm = NULL;
4561 	char *converted_nnm = NULL;
4562 	nfsstat4 status;
4563 
4564 	DTRACE_NFSV4_2(op__rename__start, struct compound_state *, cs,
4565 	    RENAME4args *, args);
4566 
4567 	fp = sfp = NULL;
4568 	srcvp = targvp = tvp = NULL;
4569 	in_crit_src = in_crit_targ = 0;
4570 	fp_rele_grant_hold = sfp_rele_grant_hold = 0;
4571 	unlinked = 0;
4572 
4573 	/* CURRENT_FH: target directory */
4574 	ndvp = cs->vp;
4575 	if (ndvp == NULL) {
4576 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
4577 		goto out;
4578 	}
4579 
4580 	/* SAVED_FH: from directory */
4581 	odvp = cs->saved_vp;
4582 	if (odvp == NULL) {
4583 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
4584 		goto out;
4585 	}
4586 
4587 	if (cs->access == CS_ACCESS_DENIED) {
4588 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
4589 		goto out;
4590 	}
4591 
4592 	/*
4593 	 * If there is an unshared filesystem mounted on this vnode,
4594 	 * do not allow to rename objects in this directory.
4595 	 */
4596 	if (vn_ismntpt(odvp)) {
4597 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
4598 		goto out;
4599 	}
4600 
4601 	/*
4602 	 * If there is an unshared filesystem mounted on this vnode,
4603 	 * do not allow to rename to this directory.
4604 	 */
4605 	if (vn_ismntpt(ndvp)) {
4606 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
4607 		goto out;
4608 	}
4609 
4610 	if (odvp->v_type != VDIR || ndvp->v_type != VDIR) {
4611 		*cs->statusp = resp->status = NFS4ERR_NOTDIR;
4612 		goto out;
4613 	}
4614 
4615 	if (cs->saved_exi != cs->exi) {
4616 		*cs->statusp = resp->status = NFS4ERR_XDEV;
4617 		goto out;
4618 	}
4619 
4620 	status = utf8_dir_verify(&args->oldname);
4621 	if (status != NFS4_OK) {
4622 		*cs->statusp = resp->status = status;
4623 		goto out;
4624 	}
4625 
4626 	status = utf8_dir_verify(&args->newname);
4627 	if (status != NFS4_OK) {
4628 		*cs->statusp = resp->status = status;
4629 		goto out;
4630 	}
4631 
4632 	onm = utf8_to_fn(&args->oldname, &olen, NULL);
4633 	if (onm == NULL) {
4634 		*cs->statusp = resp->status = NFS4ERR_INVAL;
4635 		goto out;
4636 	}
4637 	ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
4638 	nlen = MAXPATHLEN + 1;
4639 	converted_onm = nfscmd_convname(ca, cs->exi, onm, NFSCMD_CONV_INBOUND,
4640 	    nlen);
4641 
4642 	if (converted_onm == NULL) {
4643 		*cs->statusp = resp->status = NFS4ERR_INVAL;
4644 		kmem_free(onm, olen);
4645 		goto out;
4646 	}
4647 
4648 	nnm = utf8_to_fn(&args->newname, &nlen, NULL);
4649 	if (nnm == NULL) {
4650 		*cs->statusp = resp->status = NFS4ERR_INVAL;
4651 		if (onm != converted_onm)
4652 			kmem_free(converted_onm, MAXPATHLEN + 1);
4653 		kmem_free(onm, olen);
4654 		goto out;
4655 	}
4656 	converted_nnm = nfscmd_convname(ca, cs->exi, nnm, NFSCMD_CONV_INBOUND,
4657 	    MAXPATHLEN  + 1);
4658 
4659 	if (converted_nnm == NULL) {
4660 		*cs->statusp = resp->status = NFS4ERR_INVAL;
4661 		kmem_free(nnm, nlen);
4662 		nnm = NULL;
4663 		if (onm != converted_onm)
4664 			kmem_free(converted_onm, MAXPATHLEN + 1);
4665 		kmem_free(onm, olen);
4666 		goto out;
4667 	}
4668 
4669 
4670 	if (olen > MAXNAMELEN || nlen > MAXNAMELEN) {
4671 		*cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
4672 		kmem_free(onm, olen);
4673 		kmem_free(nnm, nlen);
4674 		goto out;
4675 	}
4676 
4677 
4678 	if (rdonly4(req, cs)) {
4679 		*cs->statusp = resp->status = NFS4ERR_ROFS;
4680 		if (onm != converted_onm)
4681 			kmem_free(converted_onm, MAXPATHLEN + 1);
4682 		kmem_free(onm, olen);
4683 		if (nnm != converted_nnm)
4684 			kmem_free(converted_nnm, MAXPATHLEN + 1);
4685 		kmem_free(nnm, nlen);
4686 		goto out;
4687 	}
4688 
4689 	/* check label of the target dir */
4690 	if (is_system_labeled()) {
4691 		ASSERT(req->rq_label != NULL);
4692 		clabel = req->rq_label;
4693 		DTRACE_PROBE2(tx__rfs4__log__info__oprename__clabel, char *,
4694 		    "got client label from request(1)",
4695 		    struct svc_req *, req);
4696 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
4697 			if (!do_rfs_label_check(clabel, ndvp,
4698 			    EQUALITY_CHECK, cs->exi)) {
4699 				*cs->statusp = resp->status = NFS4ERR_ACCESS;
4700 				goto err_out;
4701 			}
4702 		}
4703 	}
4704 
4705 	/*
4706 	 * Is the source a file and have a delegation?
4707 	 * We don't need to acquire va_seq before these lookups, if
4708 	 * it causes an update, cinfo.before will not match, which will
4709 	 * trigger a cache flush even if atomic is TRUE.
4710 	 */
4711 	sfp = rfs4_lookup_and_findfile(odvp, converted_onm, &srcvp,
4712 	    &error, cs->cr);
4713 	if (sfp != NULL) {
4714 		if (rfs4_check_delegated_byfp(FWRITE, sfp, TRUE, TRUE, TRUE,
4715 		    NULL)) {
4716 			*cs->statusp = resp->status = NFS4ERR_DELAY;
4717 			goto err_out;
4718 		}
4719 	}
4720 
4721 	if (srcvp == NULL) {
4722 		*cs->statusp = resp->status = puterrno4(error);
4723 		if (onm != converted_onm)
4724 			kmem_free(converted_onm, MAXPATHLEN + 1);
4725 		kmem_free(onm, olen);
4726 		if (nnm != converted_nnm)
4727 			kmem_free(converted_nnm, MAXPATHLEN + 1);
4728 		kmem_free(nnm, nlen);
4729 		goto out;
4730 	}
4731 
4732 	sfp_rele_grant_hold = 1;
4733 
4734 	/* Does the destination exist and a file and have a delegation? */
4735 	fp = rfs4_lookup_and_findfile(ndvp, converted_nnm, &targvp, NULL,
4736 	    cs->cr);
4737 	if (fp != NULL) {
4738 		if (rfs4_check_delegated_byfp(FWRITE, fp, TRUE, TRUE, TRUE,
4739 		    NULL)) {
4740 			*cs->statusp = resp->status = NFS4ERR_DELAY;
4741 			goto err_out;
4742 		}
4743 	}
4744 	fp_rele_grant_hold = 1;
4745 
4746 	/* Check for NBMAND lock on both source and target */
4747 	if (nbl_need_check(srcvp)) {
4748 		nbl_start_crit(srcvp, RW_READER);
4749 		in_crit_src = 1;
4750 		if (nbl_conflict(srcvp, NBL_RENAME, 0, 0, 0, NULL)) {
4751 			*cs->statusp = resp->status = NFS4ERR_FILE_OPEN;
4752 			goto err_out;
4753 		}
4754 	}
4755 
4756 	if (targvp && nbl_need_check(targvp)) {
4757 		nbl_start_crit(targvp, RW_READER);
4758 		in_crit_targ = 1;
4759 		if (nbl_conflict(targvp, NBL_REMOVE, 0, 0, 0, NULL)) {
4760 			*cs->statusp = resp->status = NFS4ERR_FILE_OPEN;
4761 			goto err_out;
4762 		}
4763 	}
4764 
4765 	/* Get source "before" change value */
4766 	obdva.va_mask = AT_CTIME|AT_SEQ;
4767 	error = VOP_GETATTR(odvp, &obdva, 0, cs->cr, NULL);
4768 	if (!error) {
4769 		nbdva.va_mask = AT_CTIME|AT_SEQ;
4770 		error = VOP_GETATTR(ndvp, &nbdva, 0, cs->cr, NULL);
4771 	}
4772 	if (error) {
4773 		*cs->statusp = resp->status = puterrno4(error);
4774 		goto err_out;
4775 	}
4776 
4777 	NFS4_SET_FATTR4_CHANGE(resp->source_cinfo.before, obdva.va_ctime)
4778 	NFS4_SET_FATTR4_CHANGE(resp->target_cinfo.before, nbdva.va_ctime)
4779 
4780 	error = VOP_RENAME(odvp, converted_onm, ndvp, converted_nnm, cs->cr,
4781 	    NULL, 0);
4782 
4783 	/*
4784 	 * If target existed and was unlinked by VOP_RENAME, state will need
4785 	 * closed. To avoid deadlock, rfs4_close_all_state will be done after
4786 	 * any necessary nbl_end_crit on srcvp and tgtvp.
4787 	 */
4788 	if (error == 0 && fp != NULL) {
4789 		rfs4_dbe_lock(fp->rf_dbe);
4790 		tvp = fp->rf_vp;
4791 		if (tvp)
4792 			VN_HOLD(tvp);
4793 		rfs4_dbe_unlock(fp->rf_dbe);
4794 
4795 		if (tvp) {
4796 			struct vattr va;
4797 			va.va_mask = AT_NLINK;
4798 
4799 			if (!VOP_GETATTR(tvp, &va, 0, cs->cr, NULL) &&
4800 			    va.va_nlink == 0) {
4801 				unlinked = 1;
4802 
4803 				/* DEBUG data */
4804 				if ((srcvp == targvp) || (tvp != targvp)) {
4805 					cmn_err(CE_WARN, "rfs4_op_rename: "
4806 					    "srcvp %p, targvp: %p, tvp: %p",
4807 					    (void *)srcvp, (void *)targvp,
4808 					    (void *)tvp);
4809 				}
4810 			} else {
4811 				VN_RELE(tvp);
4812 			}
4813 		}
4814 	}
4815 	if (error == 0)
4816 		vn_renamepath(ndvp, srcvp, nnm, nlen - 1);
4817 
4818 	if (in_crit_src)
4819 		nbl_end_crit(srcvp);
4820 	if (srcvp)
4821 		VN_RELE(srcvp);
4822 	if (in_crit_targ)
4823 		nbl_end_crit(targvp);
4824 	if (targvp)
4825 		VN_RELE(targvp);
4826 
4827 	if (unlinked) {
4828 		ASSERT(fp != NULL);
4829 		ASSERT(tvp != NULL);
4830 
4831 		/* DEBUG data */
4832 		if (RW_READ_HELD(&tvp->v_nbllock)) {
4833 			cmn_err(CE_WARN, "rfs4_op_rename: "
4834 			    "RW_READ_HELD(%p)", (void *)tvp);
4835 		}
4836 
4837 		/* The file is gone and so should the state */
4838 		rfs4_close_all_state(fp);
4839 		VN_RELE(tvp);
4840 	}
4841 
4842 	if (sfp) {
4843 		rfs4_clear_dont_grant(sfp);
4844 		rfs4_file_rele(sfp);
4845 	}
4846 	if (fp) {
4847 		rfs4_clear_dont_grant(fp);
4848 		rfs4_file_rele(fp);
4849 	}
4850 
4851 	if (converted_onm != onm)
4852 		kmem_free(converted_onm, MAXPATHLEN + 1);
4853 	kmem_free(onm, olen);
4854 	if (converted_nnm != nnm)
4855 		kmem_free(converted_nnm, MAXPATHLEN + 1);
4856 	kmem_free(nnm, nlen);
4857 
4858 	/*
4859 	 * Get the initial "after" sequence number, if it fails, set to zero
4860 	 */
4861 	oidva.va_mask = AT_SEQ;
4862 	if (VOP_GETATTR(odvp, &oidva, 0, cs->cr, NULL))
4863 		oidva.va_seq = 0;
4864 
4865 	nidva.va_mask = AT_SEQ;
4866 	if (VOP_GETATTR(ndvp, &nidva, 0, cs->cr, NULL))
4867 		nidva.va_seq = 0;
4868 
4869 	/*
4870 	 * Force modified data and metadata out to stable storage.
4871 	 */
4872 	(void) VOP_FSYNC(odvp, 0, cs->cr, NULL);
4873 	(void) VOP_FSYNC(ndvp, 0, cs->cr, NULL);
4874 
4875 	if (error) {
4876 		*cs->statusp = resp->status = puterrno4(error);
4877 		goto out;
4878 	}
4879 
4880 	/*
4881 	 * Get "after" change values, if it fails, simply return the
4882 	 * before value.
4883 	 */
4884 	oadva.va_mask = AT_CTIME|AT_SEQ;
4885 	if (VOP_GETATTR(odvp, &oadva, 0, cs->cr, NULL)) {
4886 		oadva.va_ctime = obdva.va_ctime;
4887 		oadva.va_seq = 0;
4888 	}
4889 
4890 	nadva.va_mask = AT_CTIME|AT_SEQ;
4891 	if (VOP_GETATTR(odvp, &nadva, 0, cs->cr, NULL)) {
4892 		nadva.va_ctime = nbdva.va_ctime;
4893 		nadva.va_seq = 0;
4894 	}
4895 
4896 	NFS4_SET_FATTR4_CHANGE(resp->source_cinfo.after, oadva.va_ctime)
4897 	NFS4_SET_FATTR4_CHANGE(resp->target_cinfo.after, nadva.va_ctime)
4898 
4899 	/*
4900 	 * The cinfo.atomic = TRUE only if we have
4901 	 * non-zero va_seq's, and it has incremented by exactly one
4902 	 * during the VOP_RENAME and it didn't change during the VOP_FSYNC.
4903 	 */
4904 	if (obdva.va_seq && oidva.va_seq && oadva.va_seq &&
4905 	    oidva.va_seq == (obdva.va_seq + 1) && oidva.va_seq == oadva.va_seq)
4906 		resp->source_cinfo.atomic = TRUE;
4907 	else
4908 		resp->source_cinfo.atomic = FALSE;
4909 
4910 	if (nbdva.va_seq && nidva.va_seq && nadva.va_seq &&
4911 	    nidva.va_seq == (nbdva.va_seq + 1) && nidva.va_seq == nadva.va_seq)
4912 		resp->target_cinfo.atomic = TRUE;
4913 	else
4914 		resp->target_cinfo.atomic = FALSE;
4915 
4916 #ifdef	VOLATILE_FH_TEST
4917 	{
4918 	extern void add_volrnm_fh(struct exportinfo *, vnode_t *);
4919 
4920 	/*
4921 	 * Add the renamed file handle to the volatile rename list
4922 	 */
4923 	if (cs->exi->exi_export.ex_flags & EX_VOLRNM) {
4924 		/* file handles may expire on rename */
4925 		vnode_t *vp;
4926 
4927 		nnm = utf8_to_fn(&args->newname, &nlen, NULL);
4928 		/*
4929 		 * Already know that nnm will be a valid string
4930 		 */
4931 		error = VOP_LOOKUP(ndvp, nnm, &vp, NULL, 0, NULL, cs->cr,
4932 		    NULL, NULL, NULL);
4933 		kmem_free(nnm, nlen);
4934 		if (!error) {
4935 			add_volrnm_fh(cs->exi, vp);
4936 			VN_RELE(vp);
4937 		}
4938 	}
4939 	}
4940 #endif	/* VOLATILE_FH_TEST */
4941 
4942 	*cs->statusp = resp->status = NFS4_OK;
4943 out:
4944 	DTRACE_NFSV4_2(op__rename__done, struct compound_state *, cs,
4945 	    RENAME4res *, resp);
4946 	return;
4947 
4948 err_out:
4949 	if (onm != converted_onm)
4950 		kmem_free(converted_onm, MAXPATHLEN + 1);
4951 	if (onm != NULL)
4952 		kmem_free(onm, olen);
4953 	if (nnm != converted_nnm)
4954 		kmem_free(converted_nnm, MAXPATHLEN + 1);
4955 	if (nnm != NULL)
4956 		kmem_free(nnm, nlen);
4957 
4958 	if (in_crit_src) nbl_end_crit(srcvp);
4959 	if (in_crit_targ) nbl_end_crit(targvp);
4960 	if (targvp) VN_RELE(targvp);
4961 	if (srcvp) VN_RELE(srcvp);
4962 	if (sfp) {
4963 		if (sfp_rele_grant_hold) rfs4_clear_dont_grant(sfp);
4964 		rfs4_file_rele(sfp);
4965 	}
4966 	if (fp) {
4967 		if (fp_rele_grant_hold) rfs4_clear_dont_grant(fp);
4968 		rfs4_file_rele(fp);
4969 	}
4970 
4971 	DTRACE_NFSV4_2(op__rename__done, struct compound_state *, cs,
4972 	    RENAME4res *, resp);
4973 }
4974 
4975 /* ARGSUSED */
4976 static void
4977 rfs4_op_renew(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
4978     struct compound_state *cs)
4979 {
4980 	RENEW4args *args = &argop->nfs_argop4_u.oprenew;
4981 	RENEW4res *resp = &resop->nfs_resop4_u.oprenew;
4982 	rfs4_client_t *cp;
4983 
4984 	DTRACE_NFSV4_2(op__renew__start, struct compound_state *, cs,
4985 	    RENEW4args *, args);
4986 
4987 	if ((cp = rfs4_findclient_by_id(args->clientid, FALSE)) == NULL) {
4988 		*cs->statusp = resp->status =
4989 		    rfs4_check_clientid(&args->clientid, 0);
4990 		goto out;
4991 	}
4992 
4993 	if (rfs4_lease_expired(cp)) {
4994 		rfs4_client_rele(cp);
4995 		*cs->statusp = resp->status = NFS4ERR_EXPIRED;
4996 		goto out;
4997 	}
4998 
4999 	rfs4_update_lease(cp);
5000 
5001 	mutex_enter(cp->rc_cbinfo.cb_lock);
5002 	if (cp->rc_cbinfo.cb_notified_of_cb_path_down == FALSE) {
5003 		cp->rc_cbinfo.cb_notified_of_cb_path_down = TRUE;
5004 		*cs->statusp = resp->status = NFS4ERR_CB_PATH_DOWN;
5005 	} else {
5006 		*cs->statusp = resp->status = NFS4_OK;
5007 	}
5008 	mutex_exit(cp->rc_cbinfo.cb_lock);
5009 
5010 	rfs4_client_rele(cp);
5011 
5012 out:
5013 	DTRACE_NFSV4_2(op__renew__done, struct compound_state *, cs,
5014 	    RENEW4res *, resp);
5015 }
5016 
5017 /* ARGSUSED */
5018 static void
5019 rfs4_op_restorefh(nfs_argop4 *args, nfs_resop4 *resop, struct svc_req *req,
5020     struct compound_state *cs)
5021 {
5022 	RESTOREFH4res *resp = &resop->nfs_resop4_u.oprestorefh;
5023 
5024 	DTRACE_NFSV4_1(op__restorefh__start, struct compound_state *, cs);
5025 
5026 	/* No need to check cs->access - we are not accessing any object */
5027 	if ((cs->saved_vp == NULL) || (cs->saved_fh.nfs_fh4_val == NULL)) {
5028 		*cs->statusp = resp->status = NFS4ERR_RESTOREFH;
5029 		goto out;
5030 	}
5031 	if (cs->vp != NULL) {
5032 		VN_RELE(cs->vp);
5033 	}
5034 	cs->vp = cs->saved_vp;
5035 	cs->saved_vp = NULL;
5036 	cs->exi = cs->saved_exi;
5037 	nfs_fh4_copy(&cs->saved_fh, &cs->fh);
5038 	*cs->statusp = resp->status = NFS4_OK;
5039 	cs->deleg = FALSE;
5040 
5041 out:
5042 	DTRACE_NFSV4_2(op__restorefh__done, struct compound_state *, cs,
5043 	    RESTOREFH4res *, resp);
5044 }
5045 
5046 /* ARGSUSED */
5047 static void
5048 rfs4_op_savefh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
5049     struct compound_state *cs)
5050 {
5051 	SAVEFH4res *resp = &resop->nfs_resop4_u.opsavefh;
5052 
5053 	DTRACE_NFSV4_1(op__savefh__start, struct compound_state *, cs);
5054 
5055 	/* No need to check cs->access - we are not accessing any object */
5056 	if (cs->vp == NULL) {
5057 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
5058 		goto out;
5059 	}
5060 	if (cs->saved_vp != NULL) {
5061 		VN_RELE(cs->saved_vp);
5062 	}
5063 	cs->saved_vp = cs->vp;
5064 	VN_HOLD(cs->saved_vp);
5065 	cs->saved_exi = cs->exi;
5066 	/*
5067 	 * since SAVEFH is fairly rare, don't alloc space for its fh
5068 	 * unless necessary.
5069 	 */
5070 	if (cs->saved_fh.nfs_fh4_val == NULL) {
5071 		cs->saved_fh.nfs_fh4_val = kmem_alloc(NFS4_FHSIZE, KM_SLEEP);
5072 	}
5073 	nfs_fh4_copy(&cs->fh, &cs->saved_fh);
5074 	*cs->statusp = resp->status = NFS4_OK;
5075 
5076 out:
5077 	DTRACE_NFSV4_2(op__savefh__done, struct compound_state *, cs,
5078 	    SAVEFH4res *, resp);
5079 }
5080 
5081 /*
5082  * rfs4_verify_attr is called when nfsv4 Setattr failed, but we wish to
5083  * return the bitmap of attrs that were set successfully. It is also
5084  * called by Verify/Nverify to test the vattr/vfsstat attrs. It should
5085  * always be called only after rfs4_do_set_attrs().
5086  *
5087  * Verify that the attributes are same as the expected ones. sargp->vap
5088  * and sargp->sbp contain the input attributes as translated from fattr4.
5089  *
5090  * This function verifies only the attrs that correspond to a vattr or
5091  * vfsstat struct. That is because of the extra step needed to get the
5092  * corresponding system structs. Other attributes have already been set or
5093  * verified by do_rfs4_set_attrs.
5094  *
5095  * Return 0 if all attrs match, -1 if some don't, error if error processing.
5096  */
5097 static int
5098 rfs4_verify_attr(struct nfs4_svgetit_arg *sargp,
5099     bitmap4 *resp, struct nfs4_ntov_table *ntovp)
5100 {
5101 	int error, ret_error = 0;
5102 	int i, k;
5103 	uint_t sva_mask = sargp->vap->va_mask;
5104 	uint_t vbit;
5105 	union nfs4_attr_u *na;
5106 	uint8_t *amap;
5107 	bool_t getsb = ntovp->vfsstat;
5108 
5109 	if (sva_mask != 0) {
5110 		/*
5111 		 * Okay to overwrite sargp->vap because we verify based
5112 		 * on the incoming values.
5113 		 */
5114 		ret_error = VOP_GETATTR(sargp->cs->vp, sargp->vap, 0,
5115 		    sargp->cs->cr, NULL);
5116 		if (ret_error) {
5117 			if (resp == NULL)
5118 				return (ret_error);
5119 			/*
5120 			 * Must return bitmap of successful attrs
5121 			 */
5122 			sva_mask = 0;	/* to prevent checking vap later */
5123 		} else {
5124 			/*
5125 			 * Some file systems clobber va_mask. it is probably
5126 			 * wrong of them to do so, nonethless we practice
5127 			 * defensive coding.
5128 			 * See bug id 4276830.
5129 			 */
5130 			sargp->vap->va_mask = sva_mask;
5131 		}
5132 	}
5133 
5134 	if (getsb) {
5135 		/*
5136 		 * Now get the superblock and loop on the bitmap, as there is
5137 		 * no simple way of translating from superblock to bitmap4.
5138 		 */
5139 		ret_error = VFS_STATVFS(sargp->cs->vp->v_vfsp, sargp->sbp);
5140 		if (ret_error) {
5141 			if (resp == NULL)
5142 				goto errout;
5143 			getsb = FALSE;
5144 		}
5145 	}
5146 
5147 	/*
5148 	 * Now loop and verify each attribute which getattr returned
5149 	 * whether it's the same as the input.
5150 	 */
5151 	if (resp == NULL && !getsb && (sva_mask == 0))
5152 		goto errout;
5153 
5154 	na = ntovp->na;
5155 	amap = ntovp->amap;
5156 	k = 0;
5157 	for (i = 0; i < ntovp->attrcnt; i++, na++, amap++) {
5158 		k = *amap;
5159 		ASSERT(nfs4_ntov_map[k].nval == k);
5160 		vbit = nfs4_ntov_map[k].vbit;
5161 
5162 		/*
5163 		 * If vattr attribute but VOP_GETATTR failed, or it's
5164 		 * superblock attribute but VFS_STATVFS failed, skip
5165 		 */
5166 		if (vbit) {
5167 			if ((vbit & sva_mask) == 0)
5168 				continue;
5169 		} else if (!(getsb && nfs4_ntov_map[k].vfsstat)) {
5170 			continue;
5171 		}
5172 		error = (*nfs4_ntov_map[k].sv_getit)(NFS4ATTR_VERIT, sargp, na);
5173 		if (resp != NULL) {
5174 			if (error)
5175 				ret_error = -1;	/* not all match */
5176 			else	/* update response bitmap */
5177 				*resp |= nfs4_ntov_map[k].fbit;
5178 			continue;
5179 		}
5180 		if (error) {
5181 			ret_error = -1;	/* not all match */
5182 			break;
5183 		}
5184 	}
5185 errout:
5186 	return (ret_error);
5187 }
5188 
5189 /*
5190  * Decode the attribute to be set/verified. If the attr requires a sys op
5191  * (VOP_GETATTR, VFS_VFSSTAT), and the request is to verify, then don't
5192  * call the sv_getit function for it, because the sys op hasn't yet been done.
5193  * Return 0 for success, error code if failed.
5194  *
5195  * Note: the decoded arg is not freed here but in nfs4_ntov_table_free.
5196  */
5197 static int
5198 decode_fattr4_attr(nfs4_attr_cmd_t cmd, struct nfs4_svgetit_arg *sargp,
5199     int k, XDR *xdrp, bitmap4 *resp_bval, union nfs4_attr_u *nap)
5200 {
5201 	int error = 0;
5202 	bool_t set_later;
5203 
5204 	sargp->vap->va_mask |= nfs4_ntov_map[k].vbit;
5205 
5206 	if ((*nfs4_ntov_map[k].xfunc)(xdrp, nap)) {
5207 		set_later = nfs4_ntov_map[k].vbit || nfs4_ntov_map[k].vfsstat;
5208 		/*
5209 		 * don't verify yet if a vattr or sb dependent attr,
5210 		 * because we don't have their sys values yet.
5211 		 * Will be done later.
5212 		 */
5213 		if (! (set_later && (cmd == NFS4ATTR_VERIT))) {
5214 			/*
5215 			 * ACLs are a special case, since setting the MODE
5216 			 * conflicts with setting the ACL.  We delay setting
5217 			 * the ACL until all other attributes have been set.
5218 			 * The ACL gets set in do_rfs4_op_setattr().
5219 			 */
5220 			if (nfs4_ntov_map[k].fbit != FATTR4_ACL_MASK) {
5221 				error = (*nfs4_ntov_map[k].sv_getit)(cmd,
5222 				    sargp, nap);
5223 				if (error) {
5224 					xdr_free(nfs4_ntov_map[k].xfunc,
5225 					    (caddr_t)nap);
5226 				}
5227 			}
5228 		}
5229 	} else {
5230 #ifdef  DEBUG
5231 		cmn_err(CE_NOTE, "decode_fattr4_attr: error "
5232 		    "decoding attribute %d\n", k);
5233 #endif
5234 		error = EINVAL;
5235 	}
5236 	if (!error && resp_bval && !set_later) {
5237 		*resp_bval |= nfs4_ntov_map[k].fbit;
5238 	}
5239 
5240 	return (error);
5241 }
5242 
5243 /*
5244  * Set vattr based on incoming fattr4 attrs - used by setattr.
5245  * Set response mask. Ignore any values that are not writable vattr attrs.
5246  */
5247 static nfsstat4
5248 do_rfs4_set_attrs(bitmap4 *resp, fattr4 *fattrp, struct compound_state *cs,
5249     struct nfs4_svgetit_arg *sargp, struct nfs4_ntov_table *ntovp,
5250     nfs4_attr_cmd_t cmd)
5251 {
5252 	int error = 0;
5253 	int i;
5254 	char *attrs = fattrp->attrlist4;
5255 	uint32_t attrslen = fattrp->attrlist4_len;
5256 	XDR xdr;
5257 	nfsstat4 status = NFS4_OK;
5258 	vnode_t *vp = cs->vp;
5259 	union nfs4_attr_u *na;
5260 	uint8_t *amap;
5261 
5262 #ifndef lint
5263 	/*
5264 	 * Make sure that maximum attribute number can be expressed as an
5265 	 * 8 bit quantity.
5266 	 */
5267 	ASSERT(NFS4_MAXNUM_ATTRS <= (UINT8_MAX + 1));
5268 #endif
5269 
5270 	if (vp == NULL) {
5271 		if (resp)
5272 			*resp = 0;
5273 		return (NFS4ERR_NOFILEHANDLE);
5274 	}
5275 	if (cs->access == CS_ACCESS_DENIED) {
5276 		if (resp)
5277 			*resp = 0;
5278 		return (NFS4ERR_ACCESS);
5279 	}
5280 
5281 	sargp->op = cmd;
5282 	sargp->cs = cs;
5283 	sargp->flag = 0;	/* may be set later */
5284 	sargp->vap->va_mask = 0;
5285 	sargp->rdattr_error = NFS4_OK;
5286 	sargp->rdattr_error_req = FALSE;
5287 	/* sargp->sbp is set by the caller */
5288 
5289 	xdrmem_create(&xdr, attrs, attrslen, XDR_DECODE);
5290 
5291 	na = ntovp->na;
5292 	amap = ntovp->amap;
5293 
5294 	/*
5295 	 * The following loop iterates on the nfs4_ntov_map checking
5296 	 * if the fbit is set in the requested bitmap.
5297 	 * If set then we process the arguments using the
5298 	 * rfs4_fattr4 conversion functions to populate the setattr
5299 	 * vattr and va_mask. Any settable attrs that are not using vattr
5300 	 * will be set in this loop.
5301 	 */
5302 	for (i = 0; i < nfs4_ntov_map_size; i++) {
5303 		if (!(fattrp->attrmask & nfs4_ntov_map[i].fbit)) {
5304 			continue;
5305 		}
5306 		/*
5307 		 * If setattr, must be a writable attr.
5308 		 * If verify/nverify, must be a readable attr.
5309 		 */
5310 		if ((error = (*nfs4_ntov_map[i].sv_getit)(
5311 		    NFS4ATTR_SUPPORTED, sargp, NULL)) != 0) {
5312 			/*
5313 			 * Client tries to set/verify an
5314 			 * unsupported attribute, tries to set
5315 			 * a read only attr or verify a write
5316 			 * only one - error!
5317 			 */
5318 			break;
5319 		}
5320 		/*
5321 		 * Decode the attribute to set/verify
5322 		 */
5323 		error = decode_fattr4_attr(cmd, sargp, nfs4_ntov_map[i].nval,
5324 		    &xdr, resp ? resp : NULL, na);
5325 		if (error)
5326 			break;
5327 		*amap++ = (uint8_t)nfs4_ntov_map[i].nval;
5328 		na++;
5329 		(ntovp->attrcnt)++;
5330 		if (nfs4_ntov_map[i].vfsstat)
5331 			ntovp->vfsstat = TRUE;
5332 	}
5333 
5334 	if (error != 0)
5335 		status = (error == ENOTSUP ? NFS4ERR_ATTRNOTSUPP :
5336 		    puterrno4(error));
5337 	/* xdrmem_destroy(&xdrs); */	/* NO-OP */
5338 	return (status);
5339 }
5340 
5341 static nfsstat4
5342 do_rfs4_op_setattr(bitmap4 *resp, fattr4 *fattrp, struct compound_state *cs,
5343     stateid4 *stateid)
5344 {
5345 	int error = 0;
5346 	struct nfs4_svgetit_arg sarg;
5347 	bool_t trunc;
5348 
5349 	nfsstat4 status = NFS4_OK;
5350 	cred_t *cr = cs->cr;
5351 	vnode_t *vp = cs->vp;
5352 	struct nfs4_ntov_table ntov;
5353 	struct statvfs64 sb;
5354 	struct vattr bva;
5355 	struct flock64 bf;
5356 	int in_crit = 0;
5357 	uint_t saved_mask = 0;
5358 	caller_context_t ct;
5359 
5360 	*resp = 0;
5361 	sarg.sbp = &sb;
5362 	sarg.is_referral = B_FALSE;
5363 	nfs4_ntov_table_init(&ntov);
5364 	status = do_rfs4_set_attrs(resp, fattrp, cs, &sarg, &ntov,
5365 	    NFS4ATTR_SETIT);
5366 	if (status != NFS4_OK) {
5367 		/*
5368 		 * failed set attrs
5369 		 */
5370 		goto done;
5371 	}
5372 
5373 	if ((sarg.vap->va_mask == 0) &&
5374 	    (! (fattrp->attrmask & FATTR4_ACL_MASK))) {
5375 		/*
5376 		 * no further work to be done
5377 		 */
5378 		goto done;
5379 	}
5380 
5381 	/*
5382 	 * If we got a request to set the ACL and the MODE, only
5383 	 * allow changing VSUID, VSGID, and VSVTX.  Attempting
5384 	 * to change any other bits, along with setting an ACL,
5385 	 * gives NFS4ERR_INVAL.
5386 	 */
5387 	if ((fattrp->attrmask & FATTR4_ACL_MASK) &&
5388 	    (fattrp->attrmask & FATTR4_MODE_MASK)) {
5389 		vattr_t va;
5390 
5391 		va.va_mask = AT_MODE;
5392 		error = VOP_GETATTR(vp, &va, 0, cs->cr, NULL);
5393 		if (error) {
5394 			status = puterrno4(error);
5395 			goto done;
5396 		}
5397 		if ((sarg.vap->va_mode ^ va.va_mode) &
5398 		    ~(VSUID | VSGID | VSVTX)) {
5399 			status = NFS4ERR_INVAL;
5400 			goto done;
5401 		}
5402 	}
5403 
5404 	/* Check stateid only if size has been set */
5405 	if (sarg.vap->va_mask & AT_SIZE) {
5406 		trunc = (sarg.vap->va_size == 0);
5407 		status = rfs4_check_stateid(FWRITE, cs->vp, stateid,
5408 		    trunc, &cs->deleg, sarg.vap->va_mask & AT_SIZE, &ct, cs);
5409 		if (status != NFS4_OK)
5410 			goto done;
5411 	} else {
5412 		ct.cc_sysid = 0;
5413 		ct.cc_pid = 0;
5414 		ct.cc_caller_id = nfs4_srv_caller_id;
5415 		ct.cc_flags = CC_DONTBLOCK;
5416 	}
5417 
5418 	/* XXX start of possible race with delegations */
5419 
5420 	/*
5421 	 * We need to specially handle size changes because it is
5422 	 * possible for the client to create a file with read-only
5423 	 * modes, but with the file opened for writing. If the client
5424 	 * then tries to set the file size, e.g. ftruncate(3C),
5425 	 * fcntl(F_FREESP), the normal access checking done in
5426 	 * VOP_SETATTR would prevent the client from doing it even though
5427 	 * it should be allowed to do so.  To get around this, we do the
5428 	 * access checking for ourselves and use VOP_SPACE which doesn't
5429 	 * do the access checking.
5430 	 * Also the client should not be allowed to change the file
5431 	 * size if there is a conflicting non-blocking mandatory lock in
5432 	 * the region of the change.
5433 	 */
5434 	if (vp->v_type == VREG && (sarg.vap->va_mask & AT_SIZE)) {
5435 		u_offset_t offset;
5436 		ssize_t length;
5437 
5438 		/*
5439 		 * ufs_setattr clears AT_SIZE from vap->va_mask, but
5440 		 * before returning, sarg.vap->va_mask is used to
5441 		 * generate the setattr reply bitmap.  We also clear
5442 		 * AT_SIZE below before calling VOP_SPACE.  For both
5443 		 * of these cases, the va_mask needs to be saved here
5444 		 * and restored after calling VOP_SETATTR.
5445 		 */
5446 		saved_mask = sarg.vap->va_mask;
5447 
5448 		/*
5449 		 * Check any possible conflict due to NBMAND locks.
5450 		 * Get into critical region before VOP_GETATTR, so the
5451 		 * size attribute is valid when checking conflicts.
5452 		 */
5453 		if (nbl_need_check(vp)) {
5454 			nbl_start_crit(vp, RW_READER);
5455 			in_crit = 1;
5456 		}
5457 
5458 		bva.va_mask = AT_UID|AT_SIZE;
5459 		error = VOP_GETATTR(vp, &bva, 0, cr, &ct);
5460 		if (error != 0) {
5461 			status = puterrno4(error);
5462 			goto done;
5463 		}
5464 
5465 		if (in_crit) {
5466 			if (sarg.vap->va_size < bva.va_size) {
5467 				offset = sarg.vap->va_size;
5468 				length = bva.va_size - sarg.vap->va_size;
5469 			} else {
5470 				offset = bva.va_size;
5471 				length = sarg.vap->va_size - bva.va_size;
5472 			}
5473 			if (nbl_conflict(vp, NBL_WRITE, offset, length, 0,
5474 			    &ct)) {
5475 				status = NFS4ERR_LOCKED;
5476 				goto done;
5477 			}
5478 		}
5479 
5480 		if (crgetuid(cr) == bva.va_uid) {
5481 			sarg.vap->va_mask &= ~AT_SIZE;
5482 			bf.l_type = F_WRLCK;
5483 			bf.l_whence = 0;
5484 			bf.l_start = (off64_t)sarg.vap->va_size;
5485 			bf.l_len = 0;
5486 			bf.l_sysid = 0;
5487 			bf.l_pid = 0;
5488 			error = VOP_SPACE(vp, F_FREESP, &bf, FWRITE,
5489 			    (offset_t)sarg.vap->va_size, cr, &ct);
5490 		}
5491 	}
5492 
5493 	if (!error && sarg.vap->va_mask != 0)
5494 		error = VOP_SETATTR(vp, sarg.vap, sarg.flag, cr, &ct);
5495 
5496 	/* restore va_mask -- ufs_setattr clears AT_SIZE */
5497 	if (saved_mask & AT_SIZE)
5498 		sarg.vap->va_mask |= AT_SIZE;
5499 
5500 	/*
5501 	 * If an ACL was being set, it has been delayed until now,
5502 	 * in order to set the mode (via the VOP_SETATTR() above) first.
5503 	 */
5504 	if ((! error) && (fattrp->attrmask & FATTR4_ACL_MASK)) {
5505 		int i;
5506 
5507 		for (i = 0; i < NFS4_MAXNUM_ATTRS; i++)
5508 			if (ntov.amap[i] == FATTR4_ACL)
5509 				break;
5510 		if (i < NFS4_MAXNUM_ATTRS) {
5511 			error = (*nfs4_ntov_map[FATTR4_ACL].sv_getit)(
5512 			    NFS4ATTR_SETIT, &sarg, &ntov.na[i]);
5513 			if (error == 0) {
5514 				*resp |= FATTR4_ACL_MASK;
5515 			} else if (error == ENOTSUP) {
5516 				(void) rfs4_verify_attr(&sarg, resp, &ntov);
5517 				status = NFS4ERR_ATTRNOTSUPP;
5518 				goto done;
5519 			}
5520 		} else {
5521 			NFS4_DEBUG(rfs4_debug,
5522 			    (CE_NOTE, "do_rfs4_op_setattr: "
5523 			    "unable to find ACL in fattr4"));
5524 			error = EINVAL;
5525 		}
5526 	}
5527 
5528 	if (error) {
5529 		/* check if a monitor detected a delegation conflict */
5530 		if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK))
5531 			status = NFS4ERR_DELAY;
5532 		else
5533 			status = puterrno4(error);
5534 
5535 		/*
5536 		 * Set the response bitmap when setattr failed.
5537 		 * If VOP_SETATTR partially succeeded, test by doing a
5538 		 * VOP_GETATTR on the object and comparing the data
5539 		 * to the setattr arguments.
5540 		 */
5541 		(void) rfs4_verify_attr(&sarg, resp, &ntov);
5542 	} else {
5543 		/*
5544 		 * Force modified metadata out to stable storage.
5545 		 */
5546 		(void) VOP_FSYNC(vp, FNODSYNC, cr, &ct);
5547 		/*
5548 		 * Set response bitmap
5549 		 */
5550 		nfs4_vmask_to_nmask_set(sarg.vap->va_mask, resp);
5551 	}
5552 
5553 /* Return early and already have a NFSv4 error */
5554 done:
5555 	/*
5556 	 * Except for nfs4_vmask_to_nmask_set(), vattr --> fattr
5557 	 * conversion sets both readable and writeable NFS4 attrs
5558 	 * for AT_MTIME and AT_ATIME.  The line below masks out
5559 	 * unrequested attrs from the setattr result bitmap.  This
5560 	 * is placed after the done: label to catch the ATTRNOTSUP
5561 	 * case.
5562 	 */
5563 	*resp &= fattrp->attrmask;
5564 
5565 	if (in_crit)
5566 		nbl_end_crit(vp);
5567 
5568 	nfs4_ntov_table_free(&ntov, &sarg);
5569 
5570 	return (status);
5571 }
5572 
5573 /* ARGSUSED */
5574 static void
5575 rfs4_op_setattr(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
5576     struct compound_state *cs)
5577 {
5578 	SETATTR4args *args = &argop->nfs_argop4_u.opsetattr;
5579 	SETATTR4res *resp = &resop->nfs_resop4_u.opsetattr;
5580 	bslabel_t *clabel;
5581 
5582 	DTRACE_NFSV4_2(op__setattr__start, struct compound_state *, cs,
5583 	    SETATTR4args *, args);
5584 
5585 	if (cs->vp == NULL) {
5586 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
5587 		goto out;
5588 	}
5589 
5590 	/*
5591 	 * If there is an unshared filesystem mounted on this vnode,
5592 	 * do not allow to setattr on this vnode.
5593 	 */
5594 	if (vn_ismntpt(cs->vp)) {
5595 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
5596 		goto out;
5597 	}
5598 
5599 	resp->attrsset = 0;
5600 
5601 	if (rdonly4(req, cs)) {
5602 		*cs->statusp = resp->status = NFS4ERR_ROFS;
5603 		goto out;
5604 	}
5605 
5606 	/* check label before setting attributes */
5607 	if (is_system_labeled()) {
5608 		ASSERT(req->rq_label != NULL);
5609 		clabel = req->rq_label;
5610 		DTRACE_PROBE2(tx__rfs4__log__info__opsetattr__clabel, char *,
5611 		    "got client label from request(1)",
5612 		    struct svc_req *, req);
5613 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
5614 			if (!do_rfs_label_check(clabel, cs->vp,
5615 			    EQUALITY_CHECK, cs->exi)) {
5616 				*cs->statusp = resp->status = NFS4ERR_ACCESS;
5617 				goto out;
5618 			}
5619 		}
5620 	}
5621 
5622 	*cs->statusp = resp->status =
5623 	    do_rfs4_op_setattr(&resp->attrsset, &args->obj_attributes, cs,
5624 	    &args->stateid);
5625 
5626 out:
5627 	DTRACE_NFSV4_2(op__setattr__done, struct compound_state *, cs,
5628 	    SETATTR4res *, resp);
5629 }
5630 
5631 /* ARGSUSED */
5632 static void
5633 rfs4_op_verify(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
5634     struct compound_state *cs)
5635 {
5636 	/*
5637 	 * verify and nverify are exactly the same, except that nverify
5638 	 * succeeds when some argument changed, and verify succeeds when
5639 	 * when none changed.
5640 	 */
5641 
5642 	VERIFY4args  *args = &argop->nfs_argop4_u.opverify;
5643 	VERIFY4res *resp = &resop->nfs_resop4_u.opverify;
5644 
5645 	int error;
5646 	struct nfs4_svgetit_arg sarg;
5647 	struct statvfs64 sb;
5648 	struct nfs4_ntov_table ntov;
5649 
5650 	DTRACE_NFSV4_2(op__verify__start, struct compound_state *, cs,
5651 	    VERIFY4args *, args);
5652 
5653 	if (cs->vp == NULL) {
5654 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
5655 		goto out;
5656 	}
5657 
5658 	sarg.sbp = &sb;
5659 	sarg.is_referral = B_FALSE;
5660 	nfs4_ntov_table_init(&ntov);
5661 	resp->status = do_rfs4_set_attrs(NULL, &args->obj_attributes, cs,
5662 	    &sarg, &ntov, NFS4ATTR_VERIT);
5663 	if (resp->status != NFS4_OK) {
5664 		/*
5665 		 * do_rfs4_set_attrs will try to verify systemwide attrs,
5666 		 * so could return -1 for "no match".
5667 		 */
5668 		if (resp->status == -1)
5669 			resp->status = NFS4ERR_NOT_SAME;
5670 		goto done;
5671 	}
5672 	error = rfs4_verify_attr(&sarg, NULL, &ntov);
5673 	switch (error) {
5674 	case 0:
5675 		resp->status = NFS4_OK;
5676 		break;
5677 	case -1:
5678 		resp->status = NFS4ERR_NOT_SAME;
5679 		break;
5680 	default:
5681 		resp->status = puterrno4(error);
5682 		break;
5683 	}
5684 done:
5685 	*cs->statusp = resp->status;
5686 	nfs4_ntov_table_free(&ntov, &sarg);
5687 out:
5688 	DTRACE_NFSV4_2(op__verify__done, struct compound_state *, cs,
5689 	    VERIFY4res *, resp);
5690 }
5691 
5692 /* ARGSUSED */
5693 static void
5694 rfs4_op_nverify(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
5695     struct compound_state *cs)
5696 {
5697 	/*
5698 	 * verify and nverify are exactly the same, except that nverify
5699 	 * succeeds when some argument changed, and verify succeeds when
5700 	 * when none changed.
5701 	 */
5702 
5703 	NVERIFY4args  *args = &argop->nfs_argop4_u.opnverify;
5704 	NVERIFY4res *resp = &resop->nfs_resop4_u.opnverify;
5705 
5706 	int error;
5707 	struct nfs4_svgetit_arg sarg;
5708 	struct statvfs64 sb;
5709 	struct nfs4_ntov_table ntov;
5710 
5711 	DTRACE_NFSV4_2(op__nverify__start, struct compound_state *, cs,
5712 	    NVERIFY4args *, args);
5713 
5714 	if (cs->vp == NULL) {
5715 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
5716 		DTRACE_NFSV4_2(op__nverify__done, struct compound_state *, cs,
5717 		    NVERIFY4res *, resp);
5718 		return;
5719 	}
5720 	sarg.sbp = &sb;
5721 	sarg.is_referral = B_FALSE;
5722 	nfs4_ntov_table_init(&ntov);
5723 	resp->status = do_rfs4_set_attrs(NULL, &args->obj_attributes, cs,
5724 	    &sarg, &ntov, NFS4ATTR_VERIT);
5725 	if (resp->status != NFS4_OK) {
5726 		/*
5727 		 * do_rfs4_set_attrs will try to verify systemwide attrs,
5728 		 * so could return -1 for "no match".
5729 		 */
5730 		if (resp->status == -1)
5731 			resp->status = NFS4_OK;
5732 		goto done;
5733 	}
5734 	error = rfs4_verify_attr(&sarg, NULL, &ntov);
5735 	switch (error) {
5736 	case 0:
5737 		resp->status = NFS4ERR_SAME;
5738 		break;
5739 	case -1:
5740 		resp->status = NFS4_OK;
5741 		break;
5742 	default:
5743 		resp->status = puterrno4(error);
5744 		break;
5745 	}
5746 done:
5747 	*cs->statusp = resp->status;
5748 	nfs4_ntov_table_free(&ntov, &sarg);
5749 
5750 	DTRACE_NFSV4_2(op__nverify__done, struct compound_state *, cs,
5751 	    NVERIFY4res *, resp);
5752 }
5753 
5754 /*
5755  * XXX - This should live in an NFS header file.
5756  */
5757 #define	MAX_IOVECS	12
5758 
5759 /* ARGSUSED */
5760 static void
5761 rfs4_op_write(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
5762     struct compound_state *cs)
5763 {
5764 	WRITE4args *args = &argop->nfs_argop4_u.opwrite;
5765 	WRITE4res *resp = &resop->nfs_resop4_u.opwrite;
5766 	int error;
5767 	vnode_t *vp;
5768 	struct vattr bva;
5769 	u_offset_t rlimit;
5770 	struct uio uio;
5771 	struct iovec iov[MAX_IOVECS];
5772 	struct iovec *iovp;
5773 	int iovcnt;
5774 	int ioflag;
5775 	cred_t *savecred, *cr;
5776 	bool_t *deleg = &cs->deleg;
5777 	nfsstat4 stat;
5778 	int in_crit = 0;
5779 	caller_context_t ct;
5780 	nfs4_srv_t *nsrv4;
5781 
5782 	DTRACE_NFSV4_2(op__write__start, struct compound_state *, cs,
5783 	    WRITE4args *, args);
5784 
5785 	vp = cs->vp;
5786 	if (vp == NULL) {
5787 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
5788 		goto out;
5789 	}
5790 	if (cs->access == CS_ACCESS_DENIED) {
5791 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
5792 		goto out;
5793 	}
5794 
5795 	cr = cs->cr;
5796 
5797 	if ((stat = rfs4_check_stateid(FWRITE, vp, &args->stateid, FALSE,
5798 	    deleg, TRUE, &ct, cs)) != NFS4_OK) {
5799 		*cs->statusp = resp->status = stat;
5800 		goto out;
5801 	}
5802 
5803 	/*
5804 	 * We have to enter the critical region before calling VOP_RWLOCK
5805 	 * to avoid a deadlock with ufs.
5806 	 */
5807 	if (nbl_need_check(vp)) {
5808 		nbl_start_crit(vp, RW_READER);
5809 		in_crit = 1;
5810 		if (nbl_conflict(vp, NBL_WRITE,
5811 		    args->offset, args->data_len, 0, &ct)) {
5812 			*cs->statusp = resp->status = NFS4ERR_LOCKED;
5813 			goto out;
5814 		}
5815 	}
5816 
5817 	bva.va_mask = AT_MODE | AT_UID;
5818 	error = VOP_GETATTR(vp, &bva, 0, cr, &ct);
5819 
5820 	/*
5821 	 * If we can't get the attributes, then we can't do the
5822 	 * right access checking.  So, we'll fail the request.
5823 	 */
5824 	if (error) {
5825 		*cs->statusp = resp->status = puterrno4(error);
5826 		goto out;
5827 	}
5828 
5829 	if (rdonly4(req, cs)) {
5830 		*cs->statusp = resp->status = NFS4ERR_ROFS;
5831 		goto out;
5832 	}
5833 
5834 	if (vp->v_type != VREG) {
5835 		*cs->statusp = resp->status =
5836 		    ((vp->v_type == VDIR) ? NFS4ERR_ISDIR : NFS4ERR_INVAL);
5837 		goto out;
5838 	}
5839 
5840 	if (crgetuid(cr) != bva.va_uid &&
5841 	    (error = VOP_ACCESS(vp, VWRITE, 0, cr, &ct))) {
5842 		*cs->statusp = resp->status = puterrno4(error);
5843 		goto out;
5844 	}
5845 
5846 	if (MANDLOCK(vp, bva.va_mode)) {
5847 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
5848 		goto out;
5849 	}
5850 
5851 	nsrv4 = nfs4_get_srv();
5852 	if (args->data_len == 0) {
5853 		*cs->statusp = resp->status = NFS4_OK;
5854 		resp->count = 0;
5855 		resp->committed = args->stable;
5856 		resp->writeverf = nsrv4->write4verf;
5857 		goto out;
5858 	}
5859 
5860 	if (args->mblk != NULL) {
5861 		mblk_t *m;
5862 		uint_t bytes, round_len;
5863 
5864 		iovcnt = 0;
5865 		bytes = 0;
5866 		round_len = roundup(args->data_len, BYTES_PER_XDR_UNIT);
5867 		for (m = args->mblk;
5868 		    m != NULL && bytes < round_len;
5869 		    m = m->b_cont) {
5870 			iovcnt++;
5871 			bytes += MBLKL(m);
5872 		}
5873 #ifdef DEBUG
5874 		/* should have ended on an mblk boundary */
5875 		if (bytes != round_len) {
5876 			printf("bytes=0x%x, round_len=0x%x, req len=0x%x\n",
5877 			    bytes, round_len, args->data_len);
5878 			printf("args=%p, args->mblk=%p, m=%p", (void *)args,
5879 			    (void *)args->mblk, (void *)m);
5880 			ASSERT(bytes == round_len);
5881 		}
5882 #endif
5883 		if (iovcnt <= MAX_IOVECS) {
5884 			iovp = iov;
5885 		} else {
5886 			iovp = kmem_alloc(sizeof (*iovp) * iovcnt, KM_SLEEP);
5887 		}
5888 		mblk_to_iov(args->mblk, iovcnt, iovp);
5889 	} else if (args->rlist != NULL) {
5890 		iovcnt = 1;
5891 		iovp = iov;
5892 		iovp->iov_base = (char *)((args->rlist)->u.c_daddr3);
5893 		iovp->iov_len = args->data_len;
5894 	} else {
5895 		iovcnt = 1;
5896 		iovp = iov;
5897 		iovp->iov_base = args->data_val;
5898 		iovp->iov_len = args->data_len;
5899 	}
5900 
5901 	uio.uio_iov = iovp;
5902 	uio.uio_iovcnt = iovcnt;
5903 
5904 	uio.uio_segflg = UIO_SYSSPACE;
5905 	uio.uio_extflg = UIO_COPY_DEFAULT;
5906 	uio.uio_loffset = args->offset;
5907 	uio.uio_resid = args->data_len;
5908 	uio.uio_llimit = curproc->p_fsz_ctl;
5909 	rlimit = uio.uio_llimit - args->offset;
5910 	if (rlimit < (u_offset_t)uio.uio_resid)
5911 		uio.uio_resid = (int)rlimit;
5912 
5913 	if (args->stable == UNSTABLE4)
5914 		ioflag = 0;
5915 	else if (args->stable == FILE_SYNC4)
5916 		ioflag = FSYNC;
5917 	else if (args->stable == DATA_SYNC4)
5918 		ioflag = FDSYNC;
5919 	else {
5920 		if (iovp != iov)
5921 			kmem_free(iovp, sizeof (*iovp) * iovcnt);
5922 		*cs->statusp = resp->status = NFS4ERR_INVAL;
5923 		goto out;
5924 	}
5925 
5926 	/*
5927 	 * We're changing creds because VM may fault and we need
5928 	 * the cred of the current thread to be used if quota
5929 	 * checking is enabled.
5930 	 */
5931 	savecred = curthread->t_cred;
5932 	curthread->t_cred = cr;
5933 	error = do_io(FWRITE, vp, &uio, ioflag, cr, &ct);
5934 	curthread->t_cred = savecred;
5935 
5936 	if (iovp != iov)
5937 		kmem_free(iovp, sizeof (*iovp) * iovcnt);
5938 
5939 	if (error) {
5940 		*cs->statusp = resp->status = puterrno4(error);
5941 		goto out;
5942 	}
5943 
5944 	*cs->statusp = resp->status = NFS4_OK;
5945 	resp->count = args->data_len - uio.uio_resid;
5946 
5947 	if (ioflag == 0)
5948 		resp->committed = UNSTABLE4;
5949 	else
5950 		resp->committed = FILE_SYNC4;
5951 
5952 	resp->writeverf = nsrv4->write4verf;
5953 
5954 out:
5955 	if (in_crit)
5956 		nbl_end_crit(vp);
5957 
5958 	DTRACE_NFSV4_2(op__write__done, struct compound_state *, cs,
5959 	    WRITE4res *, resp);
5960 }
5961 
5962 static inline int
5963 rfs4_opnum_in_range(const compound_state_t *cs, int opnum)
5964 {
5965 	if (opnum < FIRST_NFS4_OP || opnum > LAST_NFS4_OP)
5966 		return (0);
5967 	else if (cs->minorversion == 0 && opnum > LAST_NFS40_OP)
5968 		return (0);
5969 	else if (cs->minorversion == 1 && opnum > LAST_NFS41_OP)
5970 		return (0);
5971 	else if (cs->minorversion == 2 && opnum > LAST_NFS42_OP)
5972 		return (0);
5973 	return (1);
5974 }
5975 
5976 void
5977 rfs4_compound(COMPOUND4args *args, COMPOUND4res *resp, compound_state_t *cs,
5978     struct svc_req *req, int *rv)
5979 {
5980 	uint_t i;
5981 	cred_t *cr;
5982 	nfs4_srv_t *nsrv4;
5983 	nfs_export_t *ne = nfs_get_export();
5984 
5985 	if (rv != NULL)
5986 		*rv = 0;
5987 	/*
5988 	 * Form a reply tag by copying over the request tag.
5989 	 */
5990 	resp->tag.utf8string_len = args->tag.utf8string_len;
5991 	if (args->tag.utf8string_len != 0) {
5992 		resp->tag.utf8string_val =
5993 		    kmem_alloc(args->tag.utf8string_len, KM_SLEEP);
5994 		bcopy(args->tag.utf8string_val, resp->tag.utf8string_val,
5995 		    resp->tag.utf8string_len);
5996 	} else {
5997 		resp->tag.utf8string_val = NULL;
5998 	}
5999 
6000 	cs->statusp = &resp->status;
6001 	cs->req = req;
6002 	cs->minorversion = args->minorversion;
6003 	resp->array = NULL;
6004 	resp->array_len = 0;
6005 
6006 	if (args->array_len == 0) {
6007 		resp->status = NFS4_OK;
6008 		return;
6009 	}
6010 
6011 	cr = svc_xprt_cred(req->rq_xprt);
6012 	ASSERT(cr != NULL);
6013 
6014 	if (sec_svc_getcred(req, cr, &cs->principal, &cs->nfsflavor) == 0) {
6015 		DTRACE_NFSV4_2(compound__start, struct compound_state *,
6016 		    cs, COMPOUND4args *, args);
6017 		DTRACE_NFSV4_2(compound__done, struct compound_state *,
6018 		    cs, COMPOUND4res *, resp);
6019 		svcerr_badcred(req->rq_xprt);
6020 		if (rv != NULL)
6021 			*rv = 1;
6022 		return;
6023 	}
6024 
6025 	resp->array_len = args->array_len;
6026 	resp->array = kmem_zalloc(args->array_len * sizeof (nfs_resop4),
6027 	    KM_SLEEP);
6028 
6029 	cs->op_len = args->array_len;
6030 	cs->basecr = cr;
6031 	nsrv4 = nfs4_get_srv();
6032 
6033 	DTRACE_NFSV4_2(compound__start, struct compound_state *, cs,
6034 	    COMPOUND4args *, args);
6035 
6036 	/*
6037 	 * For now, NFS4 compound processing must be protected by
6038 	 * exported_lock because it can access more than one exportinfo
6039 	 * per compound and share/unshare can now change multiple
6040 	 * exinfo structs.  The NFS2/3 code only refs 1 exportinfo
6041 	 * per proc (excluding public exinfo), and exi_count design
6042 	 * is sufficient to protect concurrent execution of NFS2/3
6043 	 * ops along with unexport.  This lock will be removed as
6044 	 * part of the NFSv4 phase 2 namespace redesign work.
6045 	 */
6046 	rw_enter(&ne->exported_lock, RW_READER);
6047 
6048 	/*
6049 	 * If this is the first compound we've seen, we need to start all
6050 	 * new instances' grace periods.
6051 	 */
6052 	if (nsrv4->seen_first_compound == 0) {
6053 		rfs4_grace_start_new(nsrv4);
6054 		/*
6055 		 * This must be set after rfs4_grace_start_new(), otherwise
6056 		 * another thread could proceed past here before the former
6057 		 * is finished.
6058 		 */
6059 		nsrv4->seen_first_compound = 1;
6060 	}
6061 
6062 	for (i = 0; i < args->array_len && cs->cont; i++) {
6063 		nfs_argop4 *argop;
6064 		nfs_resop4 *resop;
6065 		uint_t op;
6066 		kstat_named_t *stat = ne->ne_globals->rfsproccnt[NFS_V4];
6067 
6068 		argop = &args->array[i];
6069 		resop = &resp->array[i];
6070 		resop->resop = argop->argop;
6071 		op = (uint_t)resop->resop;
6072 
6073 		cs->op_pos = i;
6074 		if (op < rfsv4disp_cnt && rfs4_opnum_in_range(cs, op)) {
6075 			/*
6076 			 * Count the individual ops here; NULL and COMPOUND
6077 			 * are counted in common_dispatch()
6078 			 */
6079 			stat[op].value.ui64++;
6080 
6081 			NFS4_DEBUG(rfs4_debug > 1,
6082 			    (CE_NOTE, "Executing %s", rfs4_op_string[op]));
6083 			(*rfsv4disptab[op].dis_proc)(argop, resop, req, cs);
6084 			NFS4_DEBUG(rfs4_debug > 1, (CE_NOTE, "%s returned %d",
6085 			    rfs4_op_string[op], *cs->statusp));
6086 			if (*cs->statusp != NFS4_OK)
6087 				cs->cont = FALSE;
6088 		} else {
6089 			/*
6090 			 * This is effectively dead code since XDR code
6091 			 * will have already returned BADXDR if op doesn't
6092 			 * decode to legal value.  This only done for a
6093 			 * day when XDR code doesn't verify v4 opcodes.
6094 			 */
6095 			op = OP_ILLEGAL;
6096 			stat[OP_ILLEGAL_IDX].value.ui64++;
6097 
6098 			rfs4_op_illegal(argop, resop, req, cs);
6099 			cs->cont = FALSE;
6100 		}
6101 
6102 		/*
6103 		 * If not at last op, and if we are to stop, then
6104 		 * compact the results array.
6105 		 */
6106 		if ((i + 1) < args->array_len && !cs->cont) {
6107 			nfs_resop4 *new_res = kmem_alloc(
6108 			    (i+1) * sizeof (nfs_resop4), KM_SLEEP);
6109 			bcopy(resp->array,
6110 			    new_res, (i+1) * sizeof (nfs_resop4));
6111 			kmem_free(resp->array,
6112 			    args->array_len * sizeof (nfs_resop4));
6113 
6114 			resp->array_len =  i + 1;
6115 			resp->array = new_res;
6116 		}
6117 	}
6118 
6119 	rw_exit(&ne->exported_lock);
6120 
6121 	DTRACE_NFSV4_2(compound__done, struct compound_state *, cs,
6122 	    COMPOUND4res *, resp);
6123 
6124 	/*
6125 	 * done with this compound request, free the label
6126 	 */
6127 
6128 	if (req->rq_label != NULL) {
6129 		kmem_free(req->rq_label, sizeof (bslabel_t));
6130 		req->rq_label = NULL;
6131 	}
6132 }
6133 
6134 /*
6135  * XXX because of what appears to be duplicate calls to rfs4_compound_free
6136  * XXX zero out the tag and array values. Need to investigate why the
6137  * XXX calls occur, but at least prevent the panic for now.
6138  */
6139 void
6140 rfs4_compound_free(COMPOUND4res *resp)
6141 {
6142 	uint_t i;
6143 
6144 	if (resp->tag.utf8string_val) {
6145 		UTF8STRING_FREE(resp->tag)
6146 	}
6147 
6148 	for (i = 0; i < resp->array_len; i++) {
6149 		nfs_resop4 *resop;
6150 		uint_t op;
6151 
6152 		resop = &resp->array[i];
6153 		op = (uint_t)resop->resop;
6154 		if (op < rfsv4disp_cnt) {
6155 			(*rfsv4disptab[op].dis_resfree)(resop);
6156 		}
6157 	}
6158 	if (resp->array != NULL) {
6159 		kmem_free(resp->array, resp->array_len * sizeof (nfs_resop4));
6160 	}
6161 }
6162 
6163 /*
6164  * Process the value of the compound request rpc flags, as a bit-AND
6165  * of the individual per-op flags (idempotent, allowork, publicfh_ok)
6166  */
6167 void
6168 rfs4_compound_flagproc(COMPOUND4args *args, int *flagp)
6169 {
6170 	int i;
6171 	int flag = RPC_ALL;
6172 
6173 	for (i = 0; flag && i < args->array_len; i++) {
6174 		uint_t op;
6175 
6176 		op = (uint_t)args->array[i].argop;
6177 
6178 		if (op < rfsv4disp_cnt)
6179 			flag &= rfsv4disptab[op].dis_flags;
6180 		else
6181 			flag = 0;
6182 	}
6183 	*flagp = flag;
6184 }
6185 
6186 nfsstat4
6187 rfs4_client_sysid(rfs4_client_t *cp, sysid_t *sp)
6188 {
6189 	nfsstat4 e;
6190 
6191 	rfs4_dbe_lock(cp->rc_dbe);
6192 
6193 	if (cp->rc_sysidt != LM_NOSYSID) {
6194 		*sp = cp->rc_sysidt;
6195 		e = NFS4_OK;
6196 
6197 	} else if ((cp->rc_sysidt = lm_alloc_sysidt()) != LM_NOSYSID) {
6198 		*sp = cp->rc_sysidt;
6199 		e = NFS4_OK;
6200 
6201 		NFS4_DEBUG(rfs4_debug, (CE_NOTE,
6202 		    "rfs4_client_sysid: allocated 0x%x\n", *sp));
6203 	} else
6204 		e = NFS4ERR_DELAY;
6205 
6206 	rfs4_dbe_unlock(cp->rc_dbe);
6207 	return (e);
6208 }
6209 
6210 #if defined(DEBUG) && ! defined(lint)
6211 static void lock_print(char *str, int operation, struct flock64 *flk)
6212 {
6213 	char *op, *type;
6214 
6215 	switch (operation) {
6216 	case F_GETLK: op = "F_GETLK";
6217 		break;
6218 	case F_SETLK: op = "F_SETLK";
6219 		break;
6220 	case F_SETLK_NBMAND: op = "F_SETLK_NBMAND";
6221 		break;
6222 	default: op = "F_UNKNOWN";
6223 		break;
6224 	}
6225 	switch (flk->l_type) {
6226 	case F_UNLCK: type = "F_UNLCK";
6227 		break;
6228 	case F_RDLCK: type = "F_RDLCK";
6229 		break;
6230 	case F_WRLCK: type = "F_WRLCK";
6231 		break;
6232 	default: type = "F_UNKNOWN";
6233 		break;
6234 	}
6235 
6236 	ASSERT(flk->l_whence == 0);
6237 	cmn_err(CE_NOTE, "%s:  %s, type = %s, off = %llx len = %llx pid = %d",
6238 	    str, op, type, (longlong_t)flk->l_start,
6239 	    flk->l_len ? (longlong_t)flk->l_len : ~0LL, flk->l_pid);
6240 }
6241 
6242 #define	LOCK_PRINT(d, s, t, f) if (d) lock_print(s, t, f)
6243 #else
6244 #define	LOCK_PRINT(d, s, t, f)
6245 #endif
6246 
6247 /*ARGSUSED*/
6248 static bool_t
6249 creds_ok(cred_set_t *cr_set, struct svc_req *req, struct compound_state *cs)
6250 {
6251 	return (TRUE);
6252 }
6253 
6254 /*
6255  * Look up the pathname using the vp in cs as the directory vnode.
6256  * cs->vp will be the vnode for the file on success
6257  */
6258 
6259 static nfsstat4
6260 rfs4_lookup(component4 *component, struct svc_req *req,
6261     struct compound_state *cs)
6262 {
6263 	char *nm;
6264 	uint32_t len;
6265 	nfsstat4 status;
6266 	struct sockaddr *ca;
6267 	char *name;
6268 
6269 	if (cs->vp == NULL) {
6270 		return (NFS4ERR_NOFILEHANDLE);
6271 	}
6272 	if (cs->vp->v_type != VDIR) {
6273 		return (NFS4ERR_NOTDIR);
6274 	}
6275 
6276 	status = utf8_dir_verify(component);
6277 	if (status != NFS4_OK)
6278 		return (status);
6279 
6280 	nm = utf8_to_fn(component, &len, NULL);
6281 	if (nm == NULL) {
6282 		return (NFS4ERR_INVAL);
6283 	}
6284 
6285 	if (len > MAXNAMELEN) {
6286 		kmem_free(nm, len);
6287 		return (NFS4ERR_NAMETOOLONG);
6288 	}
6289 
6290 	ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
6291 	name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
6292 	    MAXPATHLEN + 1);
6293 
6294 	if (name == NULL) {
6295 		kmem_free(nm, len);
6296 		return (NFS4ERR_INVAL);
6297 	}
6298 
6299 	status = do_rfs4_op_lookup(name, req, cs);
6300 
6301 	if (name != nm)
6302 		kmem_free(name, MAXPATHLEN + 1);
6303 
6304 	kmem_free(nm, len);
6305 
6306 	return (status);
6307 }
6308 
6309 static nfsstat4
6310 rfs4_lookupfile(component4 *component, struct svc_req *req,
6311     struct compound_state *cs, uint32_t access, change_info4 *cinfo)
6312 {
6313 	nfsstat4 status;
6314 	vnode_t *dvp = cs->vp;
6315 	vattr_t bva, ava, fva;
6316 	int error;
6317 
6318 	/* Get "before" change value */
6319 	bva.va_mask = AT_CTIME|AT_SEQ;
6320 	error = VOP_GETATTR(dvp, &bva, 0, cs->cr, NULL);
6321 	if (error)
6322 		return (puterrno4(error));
6323 
6324 	/* rfs4_lookup may VN_RELE directory */
6325 	VN_HOLD(dvp);
6326 
6327 	status = rfs4_lookup(component, req, cs);
6328 	if (status != NFS4_OK) {
6329 		VN_RELE(dvp);
6330 		return (status);
6331 	}
6332 
6333 	/*
6334 	 * Get "after" change value, if it fails, simply return the
6335 	 * before value.
6336 	 */
6337 	ava.va_mask = AT_CTIME|AT_SEQ;
6338 	if (VOP_GETATTR(dvp, &ava, 0, cs->cr, NULL)) {
6339 		ava.va_ctime = bva.va_ctime;
6340 		ava.va_seq = 0;
6341 	}
6342 	VN_RELE(dvp);
6343 
6344 	/*
6345 	 * Validate the file is a file
6346 	 */
6347 	fva.va_mask = AT_TYPE|AT_MODE;
6348 	error = VOP_GETATTR(cs->vp, &fva, 0, cs->cr, NULL);
6349 	if (error)
6350 		return (puterrno4(error));
6351 
6352 	if (fva.va_type != VREG) {
6353 		if (fva.va_type == VDIR)
6354 			return (NFS4ERR_ISDIR);
6355 		if (fva.va_type == VLNK)
6356 			return (NFS4ERR_SYMLINK);
6357 		return (NFS4ERR_INVAL);
6358 	}
6359 
6360 	NFS4_SET_FATTR4_CHANGE(cinfo->before, bva.va_ctime);
6361 	NFS4_SET_FATTR4_CHANGE(cinfo->after, ava.va_ctime);
6362 
6363 	/*
6364 	 * It is undefined if VOP_LOOKUP will change va_seq, so
6365 	 * cinfo.atomic = TRUE only if we have
6366 	 * non-zero va_seq's, and they have not changed.
6367 	 */
6368 	if (bva.va_seq && ava.va_seq && ava.va_seq == bva.va_seq)
6369 		cinfo->atomic = TRUE;
6370 	else
6371 		cinfo->atomic = FALSE;
6372 
6373 	/* Check for mandatory locking */
6374 	cs->mandlock = MANDLOCK(cs->vp, fva.va_mode);
6375 	return (check_open_access(access, cs, req));
6376 }
6377 
6378 static nfsstat4
6379 create_vnode(vnode_t *dvp, char *nm,  vattr_t *vap, createmode4 mode,
6380     cred_t *cr, vnode_t **vpp, bool_t *created)
6381 {
6382 	int error;
6383 	nfsstat4 status = NFS4_OK;
6384 	vattr_t va;
6385 
6386 tryagain:
6387 
6388 	/*
6389 	 * The file open mode used is VWRITE.  If the client needs
6390 	 * some other semantic, then it should do the access checking
6391 	 * itself.  It would have been nice to have the file open mode
6392 	 * passed as part of the arguments.
6393 	 */
6394 
6395 	*created = TRUE;
6396 	error = VOP_CREATE(dvp, nm, vap, EXCL, VWRITE, vpp, cr, 0, NULL, NULL);
6397 
6398 	if (error) {
6399 		*created = FALSE;
6400 
6401 		/*
6402 		 * If we got something other than file already exists
6403 		 * then just return this error.  Otherwise, we got
6404 		 * EEXIST.  If we were doing a GUARDED create, then
6405 		 * just return this error.  Otherwise, we need to
6406 		 * make sure that this wasn't a duplicate of an
6407 		 * exclusive create request.
6408 		 *
6409 		 * The assumption is made that a non-exclusive create
6410 		 * request will never return EEXIST.
6411 		 */
6412 
6413 		if (error != EEXIST || mode == GUARDED4) {
6414 			status = puterrno4(error);
6415 			return (status);
6416 		}
6417 		error = VOP_LOOKUP(dvp, nm, vpp, NULL, 0, NULL, cr,
6418 		    NULL, NULL, NULL);
6419 
6420 		if (error) {
6421 			/*
6422 			 * We couldn't find the file that we thought that
6423 			 * we just created.  So, we'll just try creating
6424 			 * it again.
6425 			 */
6426 			if (error == ENOENT)
6427 				goto tryagain;
6428 
6429 			status = puterrno4(error);
6430 			return (status);
6431 		}
6432 
6433 		if (mode == UNCHECKED4) {
6434 			/* existing object must be regular file */
6435 			if ((*vpp)->v_type != VREG) {
6436 				if ((*vpp)->v_type == VDIR)
6437 					status = NFS4ERR_ISDIR;
6438 				else if ((*vpp)->v_type == VLNK)
6439 					status = NFS4ERR_SYMLINK;
6440 				else
6441 					status = NFS4ERR_INVAL;
6442 				VN_RELE(*vpp);
6443 				return (status);
6444 			}
6445 
6446 			return (NFS4_OK);
6447 		}
6448 
6449 		/* Check for duplicate request */
6450 		va.va_mask = AT_MTIME;
6451 		error = VOP_GETATTR(*vpp, &va, 0, cr, NULL);
6452 		if (!error) {
6453 			/* We found the file */
6454 			const timestruc_t *mtime = &vap->va_mtime;
6455 
6456 			if (va.va_mtime.tv_sec != mtime->tv_sec ||
6457 			    va.va_mtime.tv_nsec != mtime->tv_nsec) {
6458 				/* but its not our creation */
6459 				VN_RELE(*vpp);
6460 				return (NFS4ERR_EXIST);
6461 			}
6462 			*created = TRUE; /* retrans of create == created */
6463 			return (NFS4_OK);
6464 		}
6465 		VN_RELE(*vpp);
6466 		return (NFS4ERR_EXIST);
6467 	}
6468 
6469 	return (NFS4_OK);
6470 }
6471 
6472 static nfsstat4
6473 check_open_access(uint32_t access, struct compound_state *cs,
6474     struct svc_req *req)
6475 {
6476 	int error;
6477 	vnode_t *vp;
6478 	bool_t readonly;
6479 	cred_t *cr = cs->cr;
6480 
6481 	/* For now we don't allow mandatory locking as per V2/V3 */
6482 	if (cs->access == CS_ACCESS_DENIED || cs->mandlock) {
6483 		return (NFS4ERR_ACCESS);
6484 	}
6485 
6486 	vp = cs->vp;
6487 	ASSERT(cr != NULL && vp->v_type == VREG);
6488 
6489 	/*
6490 	 * If the file system is exported read only and we are trying
6491 	 * to open for write, then return NFS4ERR_ROFS
6492 	 */
6493 
6494 	readonly = rdonly4(req, cs);
6495 
6496 	if ((access & OPEN4_SHARE_ACCESS_WRITE) && readonly)
6497 		return (NFS4ERR_ROFS);
6498 
6499 	if (access & OPEN4_SHARE_ACCESS_READ) {
6500 		if ((VOP_ACCESS(vp, VREAD, 0, cr, NULL) != 0) &&
6501 		    (VOP_ACCESS(vp, VEXEC, 0, cr, NULL) != 0)) {
6502 			return (NFS4ERR_ACCESS);
6503 		}
6504 	}
6505 
6506 	if (access & OPEN4_SHARE_ACCESS_WRITE) {
6507 		error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL);
6508 		if (error)
6509 			return (NFS4ERR_ACCESS);
6510 	}
6511 
6512 	return (NFS4_OK);
6513 }
6514 
6515 static void
6516 rfs4_verifier_to_mtime(verifier4 v, timestruc_t *mtime)
6517 {
6518 	timespec32_t *time = (timespec32_t *)&v;
6519 
6520 	/*
6521 	 * Ensure no time overflows. Assumes underlying
6522 	 * filesystem supports at least 32 bits.
6523 	 * Truncate nsec to usec resolution to allow valid
6524 	 * compares even if the underlying filesystem truncates.
6525 	 */
6526 	mtime->tv_sec = time->tv_sec % TIME32_MAX;
6527 	mtime->tv_nsec = (time->tv_nsec / 1000) * 1000;
6528 }
6529 
6530 static nfsstat4
6531 rfs4_createfile(OPEN4args *args, struct svc_req *req, struct compound_state *cs,
6532     change_info4 *cinfo, bitmap4 *attrset, clientid4 clientid)
6533 {
6534 	struct nfs4_svgetit_arg sarg;
6535 	struct nfs4_ntov_table ntov;
6536 
6537 	bool_t ntov_table_init = FALSE;
6538 	struct statvfs64 sb;
6539 	nfsstat4 status;
6540 	vnode_t *vp;
6541 	vattr_t bva, ava, iva, cva, *vap;
6542 	vnode_t *dvp;
6543 	char *nm = NULL;
6544 	uint_t buflen;
6545 	bool_t created;
6546 	bool_t setsize = FALSE;
6547 	len_t reqsize;
6548 	int error;
6549 	bool_t trunc;
6550 	caller_context_t ct;
6551 	component4 *component;
6552 	bslabel_t *clabel;
6553 	struct sockaddr *ca;
6554 	char *name = NULL;
6555 	fattr4 *fattr = NULL;
6556 
6557 	ASSERT(*attrset == 0);
6558 
6559 	sarg.sbp = &sb;
6560 	sarg.is_referral = B_FALSE;
6561 
6562 	dvp = cs->vp;
6563 
6564 	/* Check if the file system is read only */
6565 	if (rdonly4(req, cs))
6566 		return (NFS4ERR_ROFS);
6567 
6568 	/* check the label of including directory */
6569 	if (is_system_labeled()) {
6570 		ASSERT(req->rq_label != NULL);
6571 		clabel = req->rq_label;
6572 		DTRACE_PROBE2(tx__rfs4__log__info__opremove__clabel, char *,
6573 		    "got client label from request(1)",
6574 		    struct svc_req *, req);
6575 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
6576 			if (!do_rfs_label_check(clabel, dvp, EQUALITY_CHECK,
6577 			    cs->exi)) {
6578 				return (NFS4ERR_ACCESS);
6579 			}
6580 		}
6581 	}
6582 
6583 	if ((args->mode == EXCLUSIVE4 || args->mode == EXCLUSIVE4_1) &&
6584 	    dvp->v_flag & V_XATTRDIR) {
6585 		/* prohibit EXCL create of named attributes */
6586 		return (NFS4ERR_INVAL);
6587 	}
6588 
6589 	/*
6590 	 * Get the last component of path name in nm. cs will reference
6591 	 * the including directory on success.
6592 	 */
6593 	component = &args->claim.open_claim4_u.file;
6594 	status = utf8_dir_verify(component);
6595 	if (status != NFS4_OK)
6596 		return (status);
6597 
6598 	nm = utf8_to_fn(component, &buflen, NULL);
6599 
6600 	if (nm == NULL)
6601 		return (NFS4ERR_RESOURCE);
6602 
6603 	if (buflen > MAXNAMELEN) {
6604 		kmem_free(nm, buflen);
6605 		return (NFS4ERR_NAMETOOLONG);
6606 	}
6607 
6608 	bva.va_mask = AT_TYPE|AT_CTIME|AT_SEQ;
6609 	error = VOP_GETATTR(dvp, &bva, 0, cs->cr, NULL);
6610 	if (error) {
6611 		kmem_free(nm, buflen);
6612 		return (puterrno4(error));
6613 	}
6614 
6615 	if (bva.va_type != VDIR) {
6616 		kmem_free(nm, buflen);
6617 		return (NFS4ERR_NOTDIR);
6618 	}
6619 
6620 	NFS4_SET_FATTR4_CHANGE(cinfo->before, bva.va_ctime)
6621 
6622 	switch (args->mode) {
6623 	case GUARDED4:
6624 		/*FALLTHROUGH*/
6625 	case UNCHECKED4:
6626 	case EXCLUSIVE4_1:
6627 		nfs4_ntov_table_init(&ntov);
6628 		ntov_table_init = TRUE;
6629 
6630 		if (args->mode == EXCLUSIVE4_1)
6631 			fattr = &args->createhow4_u.ch_createboth.cva_attrs;
6632 		else
6633 			fattr = &args->createhow4_u.createattrs;
6634 
6635 		status = do_rfs4_set_attrs(attrset,
6636 		    fattr,
6637 		    cs, &sarg, &ntov, NFS4ATTR_SETIT);
6638 
6639 		if (status == NFS4_OK && (sarg.vap->va_mask & AT_TYPE) &&
6640 		    sarg.vap->va_type != VREG) {
6641 			if (sarg.vap->va_type == VDIR)
6642 				status = NFS4ERR_ISDIR;
6643 			else if (sarg.vap->va_type == VLNK)
6644 				status = NFS4ERR_SYMLINK;
6645 			else
6646 				status = NFS4ERR_INVAL;
6647 		}
6648 
6649 		if (status != NFS4_OK) {
6650 			kmem_free(nm, buflen);
6651 			nfs4_ntov_table_free(&ntov, &sarg);
6652 			*attrset = 0;
6653 			return (status);
6654 		}
6655 
6656 		vap = sarg.vap;
6657 		vap->va_type = VREG;
6658 		vap->va_mask |= AT_TYPE;
6659 
6660 		if ((vap->va_mask & AT_MODE) == 0) {
6661 			vap->va_mask |= AT_MODE;
6662 			vap->va_mode = (mode_t)0600;
6663 		}
6664 
6665 		if (vap->va_mask & AT_SIZE) {
6666 
6667 			/* Disallow create with a non-zero size */
6668 
6669 			if ((reqsize = sarg.vap->va_size) != 0) {
6670 				kmem_free(nm, buflen);
6671 				nfs4_ntov_table_free(&ntov, &sarg);
6672 				*attrset = 0;
6673 				return (NFS4ERR_INVAL);
6674 			}
6675 			setsize = TRUE;
6676 		}
6677 		if (args->mode == EXCLUSIVE4_1) {
6678 			rfs4_verifier_to_mtime(
6679 			    args->createhow4_u.ch_createboth.cva_verf,
6680 			    &vap->va_mtime);
6681 			/* attrset will be set later */
6682 			fattr->attrmask |= FATTR4_TIME_MODIFY_MASK;
6683 			vap->va_mask |= AT_MTIME;
6684 		}
6685 		break;
6686 
6687 	case EXCLUSIVE4:
6688 		cva.va_mask = AT_TYPE | AT_MTIME | AT_MODE;
6689 		cva.va_type = VREG;
6690 		cva.va_mode = (mode_t)0;
6691 
6692 		rfs4_verifier_to_mtime(args->createhow4_u.createverf,
6693 		    &cva.va_mtime);
6694 
6695 		vap = &cva;
6696 
6697 		/*
6698 		 * For EXCL create, attrset is set to the server attr
6699 		 * used to cache the client's verifier.
6700 		 */
6701 		*attrset = FATTR4_TIME_MODIFY_MASK;
6702 		break;
6703 	}
6704 
6705 	ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
6706 	name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
6707 	    MAXPATHLEN  + 1);
6708 
6709 	if (name == NULL) {
6710 		kmem_free(nm, buflen);
6711 		return (NFS4ERR_SERVERFAULT);
6712 	}
6713 
6714 	status = create_vnode(dvp, name, vap, args->mode,
6715 	    cs->cr, &vp, &created);
6716 	if (nm != name)
6717 		kmem_free(name, MAXPATHLEN + 1);
6718 	kmem_free(nm, buflen);
6719 
6720 	if (status != NFS4_OK) {
6721 		if (ntov_table_init)
6722 			nfs4_ntov_table_free(&ntov, &sarg);
6723 		*attrset = 0;
6724 		return (status);
6725 	}
6726 
6727 	trunc = (setsize && !created);
6728 
6729 	if (args->mode != EXCLUSIVE4) {
6730 		bitmap4 createmask = fattr->attrmask;
6731 
6732 		/*
6733 		 * True verification that object was created with correct
6734 		 * attrs is impossible.  The attrs could have been changed
6735 		 * immediately after object creation.  If attributes did
6736 		 * not verify, the only recourse for the server is to
6737 		 * destroy the object.  Maybe if some attrs (like gid)
6738 		 * are set incorrectly, the object should be destroyed;
6739 		 * however, seems bad as a default policy.  Do we really
6740 		 * want to destroy an object over one of the times not
6741 		 * verifying correctly?  For these reasons, the server
6742 		 * currently sets bits in attrset for createattrs
6743 		 * that were set; however, no verification is done.
6744 		 *
6745 		 * vmask_to_nmask accounts for vattr bits set on create
6746 		 *	[do_rfs4_set_attrs() only sets resp bits for
6747 		 *	 non-vattr/vfs bits.]
6748 		 * Mask off any bits we set by default so as not to return
6749 		 * more attrset bits than were requested in createattrs
6750 		 */
6751 		if (created) {
6752 			nfs4_vmask_to_nmask(sarg.vap->va_mask, attrset);
6753 			*attrset &= createmask;
6754 		} else {
6755 			/*
6756 			 * We did not create the vnode (we tried but it
6757 			 * already existed).  In this case, the only createattr
6758 			 * that the spec allows the server to set is size,
6759 			 * and even then, it can only be set if it is 0.
6760 			 */
6761 			*attrset = 0;
6762 			if (trunc)
6763 				*attrset = FATTR4_SIZE_MASK;
6764 		}
6765 	}
6766 	if (ntov_table_init)
6767 		nfs4_ntov_table_free(&ntov, &sarg);
6768 
6769 	/*
6770 	 * Get the initial "after" sequence number, if it fails,
6771 	 * set to zero, time to before.
6772 	 */
6773 	iva.va_mask = AT_CTIME|AT_SEQ;
6774 	if (VOP_GETATTR(dvp, &iva, 0, cs->cr, NULL)) {
6775 		iva.va_seq = 0;
6776 		iva.va_ctime = bva.va_ctime;
6777 	}
6778 
6779 	/*
6780 	 * create_vnode attempts to create the file exclusive,
6781 	 * if it already exists the VOP_CREATE will fail and
6782 	 * may not increase va_seq. It is atomic if
6783 	 * we haven't changed the directory, but if it has changed
6784 	 * we don't know what changed it.
6785 	 */
6786 	if (!created) {
6787 		if (bva.va_seq && iva.va_seq &&
6788 		    bva.va_seq == iva.va_seq)
6789 			cinfo->atomic = TRUE;
6790 		else
6791 			cinfo->atomic = FALSE;
6792 		NFS4_SET_FATTR4_CHANGE(cinfo->after, iva.va_ctime);
6793 	} else {
6794 		/*
6795 		 * The entry was created, we need to sync the
6796 		 * directory metadata.
6797 		 */
6798 		(void) VOP_FSYNC(dvp, 0, cs->cr, NULL);
6799 
6800 		/*
6801 		 * Get "after" change value, if it fails, simply return the
6802 		 * before value.
6803 		 */
6804 		ava.va_mask = AT_CTIME|AT_SEQ;
6805 		if (VOP_GETATTR(dvp, &ava, 0, cs->cr, NULL)) {
6806 			ava.va_ctime = bva.va_ctime;
6807 			ava.va_seq = 0;
6808 		}
6809 
6810 		NFS4_SET_FATTR4_CHANGE(cinfo->after, ava.va_ctime);
6811 
6812 		/*
6813 		 * The cinfo->atomic = TRUE only if we have
6814 		 * non-zero va_seq's, and it has incremented by exactly one
6815 		 * during the create_vnode and it didn't
6816 		 * change during the VOP_FSYNC.
6817 		 */
6818 		if (bva.va_seq && iva.va_seq && ava.va_seq &&
6819 		    iva.va_seq == (bva.va_seq + 1) && iva.va_seq == ava.va_seq)
6820 			cinfo->atomic = TRUE;
6821 		else
6822 			cinfo->atomic = FALSE;
6823 	}
6824 
6825 	/* Check for mandatory locking and that the size gets set. */
6826 	cva.va_mask = AT_MODE;
6827 	if (setsize)
6828 		cva.va_mask |= AT_SIZE;
6829 
6830 	/* Assume the worst */
6831 	cs->mandlock = TRUE;
6832 
6833 	if (VOP_GETATTR(vp, &cva, 0, cs->cr, NULL) == 0) {
6834 		cs->mandlock = MANDLOCK(cs->vp, cva.va_mode);
6835 
6836 		/*
6837 		 * Truncate the file if necessary; this would be
6838 		 * the case for create over an existing file.
6839 		 */
6840 
6841 		if (trunc) {
6842 			int in_crit = 0;
6843 			rfs4_file_t *fp;
6844 			nfs4_srv_t *nsrv4;
6845 			bool_t create = FALSE;
6846 
6847 			/*
6848 			 * We are writing over an existing file.
6849 			 * Check to see if we need to recall a delegation.
6850 			 */
6851 			nsrv4 = nfs4_get_srv();
6852 			rfs4_hold_deleg_policy(nsrv4);
6853 			if ((fp = rfs4_findfile(vp, NULL, &create)) != NULL) {
6854 				if (rfs4_check_delegated_byfp(FWRITE, fp,
6855 				    (reqsize == 0), FALSE, FALSE, &clientid)) {
6856 					rfs4_file_rele(fp);
6857 					rfs4_rele_deleg_policy(nsrv4);
6858 					VN_RELE(vp);
6859 					*attrset = 0;
6860 					return (NFS4ERR_DELAY);
6861 				}
6862 				rfs4_file_rele(fp);
6863 			}
6864 			rfs4_rele_deleg_policy(nsrv4);
6865 
6866 			if (nbl_need_check(vp)) {
6867 				in_crit = 1;
6868 
6869 				ASSERT(reqsize == 0);
6870 
6871 				nbl_start_crit(vp, RW_READER);
6872 				if (nbl_conflict(vp, NBL_WRITE, 0,
6873 				    cva.va_size, 0, NULL)) {
6874 					in_crit = 0;
6875 					nbl_end_crit(vp);
6876 					VN_RELE(vp);
6877 					*attrset = 0;
6878 					return (NFS4ERR_ACCESS);
6879 				}
6880 			}
6881 			ct.cc_sysid = 0;
6882 			ct.cc_pid = 0;
6883 			ct.cc_caller_id = nfs4_srv_caller_id;
6884 			ct.cc_flags = CC_DONTBLOCK;
6885 
6886 			cva.va_mask = AT_SIZE;
6887 			cva.va_size = reqsize;
6888 			(void) VOP_SETATTR(vp, &cva, 0, cs->cr, &ct);
6889 			if (in_crit)
6890 				nbl_end_crit(vp);
6891 		}
6892 	}
6893 
6894 	error = makefh4(&cs->fh, vp, cs->exi);
6895 
6896 	/*
6897 	 * Force modified data and metadata out to stable storage.
6898 	 */
6899 	(void) VOP_FSYNC(vp, FNODSYNC, cs->cr, NULL);
6900 
6901 	if (error) {
6902 		VN_RELE(vp);
6903 		*attrset = 0;
6904 		return (puterrno4(error));
6905 	}
6906 
6907 	/* if parent dir is attrdir, set namedattr fh flag */
6908 	if (dvp->v_flag & V_XATTRDIR)
6909 		set_fh4_flag(&cs->fh, FH4_NAMEDATTR);
6910 
6911 	if (cs->vp)
6912 		VN_RELE(cs->vp);
6913 
6914 	cs->vp = vp;
6915 
6916 	/*
6917 	 * if we did not create the file, we will need to check
6918 	 * the access bits on the file
6919 	 */
6920 
6921 	if (!created) {
6922 		if (setsize)
6923 			args->share_access |= OPEN4_SHARE_ACCESS_WRITE;
6924 		status = check_open_access(args->share_access, cs, req);
6925 		if (status != NFS4_OK)
6926 			*attrset = 0;
6927 	}
6928 	return (status);
6929 }
6930 
6931 /*ARGSUSED*/
6932 static void
6933 rfs4_do_open(struct compound_state *cs, struct svc_req *req,
6934     rfs4_openowner_t *oo, delegreq_t deleg,
6935     uint32_t access, uint32_t deny,
6936     OPEN4res *resp, int deleg_cur)
6937 {
6938 	/* XXX Currently not using req  */
6939 	rfs4_state_t *sp;
6940 	rfs4_file_t *fp;
6941 	bool_t screate = TRUE;
6942 	bool_t fcreate = TRUE;
6943 	uint32_t open_a, share_a;
6944 	uint32_t open_d, share_d;
6945 	rfs4_deleg_state_t *dsp;
6946 	sysid_t sysid;
6947 	nfsstat4 status;
6948 	caller_context_t ct;
6949 	int fflags = 0;
6950 	int recall = 0;
6951 	int err;
6952 	int first_open;
6953 
6954 	/* get the file struct and hold a lock on it during initial open */
6955 	fp = rfs4_findfile_withlock(cs->vp, &cs->fh, &fcreate);
6956 	if (fp == NULL) {
6957 		resp->status = NFS4ERR_RESOURCE;
6958 		DTRACE_PROBE1(nfss__e__do__open1, nfsstat4, resp->status);
6959 		return;
6960 	}
6961 
6962 	sp = rfs4_findstate_by_owner_file(oo, fp, &screate);
6963 	if (sp == NULL) {
6964 		resp->status = NFS4ERR_RESOURCE;
6965 		DTRACE_PROBE1(nfss__e__do__open2, nfsstat4, resp->status);
6966 		/* No need to keep any reference */
6967 		rw_exit(&fp->rf_file_rwlock);
6968 		rfs4_file_rele(fp);
6969 		return;
6970 	}
6971 
6972 	/* try to get the sysid before continuing */
6973 	if ((status = rfs4_client_sysid(oo->ro_client, &sysid)) != NFS4_OK) {
6974 		resp->status = status;
6975 		rfs4_file_rele(fp);
6976 		/* Not a fully formed open; "close" it */
6977 		if (screate == TRUE)
6978 			rfs4_state_close(sp, FALSE, FALSE, cs->cr);
6979 		rfs4_state_rele(sp);
6980 		return;
6981 	}
6982 
6983 	/* Calculate the fflags for this OPEN. */
6984 	if (access & OPEN4_SHARE_ACCESS_READ)
6985 		fflags |= FREAD;
6986 	if (access & OPEN4_SHARE_ACCESS_WRITE)
6987 		fflags |= FWRITE;
6988 
6989 	rfs4_dbe_lock(sp->rs_dbe);
6990 
6991 	/*
6992 	 * Calculate the new deny and access mode that this open is adding to
6993 	 * the file for this open owner;
6994 	 */
6995 	open_d = (deny & ~sp->rs_open_deny);
6996 	open_a = (access & ~sp->rs_open_access);
6997 
6998 	/*
6999 	 * Calculate the new share access and share deny modes that this open
7000 	 * is adding to the file for this open owner;
7001 	 */
7002 	share_a = (access & ~sp->rs_share_access);
7003 	share_d = (deny & ~sp->rs_share_deny);
7004 
7005 	first_open = (sp->rs_open_access & OPEN4_SHARE_ACCESS_BOTH) == 0;
7006 
7007 	/*
7008 	 * Check to see the client has already sent an open for this
7009 	 * open owner on this file with the same share/deny modes.
7010 	 * If so, we don't need to check for a conflict and we don't
7011 	 * need to add another shrlock.  If not, then we need to
7012 	 * check for conflicts in deny and access before checking for
7013 	 * conflicts in delegation.  We don't want to recall a
7014 	 * delegation based on an open that will eventually fail based
7015 	 * on shares modes.
7016 	 */
7017 
7018 	if (share_a || share_d) {
7019 		if ((err = rfs4_share(sp, access, deny)) != 0) {
7020 			rfs4_dbe_unlock(sp->rs_dbe);
7021 			resp->status = err;
7022 
7023 			rfs4_file_rele(fp);
7024 			/* Not a fully formed open; "close" it */
7025 			if (screate == TRUE)
7026 				rfs4_state_close(sp, FALSE, FALSE, cs->cr);
7027 			rfs4_state_rele(sp);
7028 			return;
7029 		}
7030 	}
7031 
7032 	rfs4_dbe_lock(fp->rf_dbe);
7033 
7034 	/*
7035 	 * Check to see if this file is delegated and if so, if a
7036 	 * recall needs to be done.
7037 	 */
7038 	if (rfs4_check_recall(sp, access)) {
7039 		rfs4_dbe_unlock(fp->rf_dbe);
7040 		rfs4_dbe_unlock(sp->rs_dbe);
7041 		rfs4_recall_deleg(fp, FALSE, sp->rs_owner->ro_client);
7042 		delay(NFS4_DELEGATION_CONFLICT_DELAY);
7043 		rfs4_dbe_lock(sp->rs_dbe);
7044 
7045 		/* if state closed while lock was dropped */
7046 		if (sp->rs_closed) {
7047 			if (share_a || share_d)
7048 				(void) rfs4_unshare(sp);
7049 			rfs4_dbe_unlock(sp->rs_dbe);
7050 			rfs4_file_rele(fp);
7051 			/* Not a fully formed open; "close" it */
7052 			if (screate == TRUE)
7053 				rfs4_state_close(sp, FALSE, FALSE, cs->cr);
7054 			rfs4_state_rele(sp);
7055 			resp->status = NFS4ERR_OLD_STATEID;
7056 			return;
7057 		}
7058 
7059 		rfs4_dbe_lock(fp->rf_dbe);
7060 		/* Let's see if the delegation was returned */
7061 		if (rfs4_check_recall(sp, access)) {
7062 			rfs4_dbe_unlock(fp->rf_dbe);
7063 			if (share_a || share_d)
7064 				(void) rfs4_unshare(sp);
7065 			rfs4_dbe_unlock(sp->rs_dbe);
7066 			rfs4_file_rele(fp);
7067 			rfs4_update_lease(sp->rs_owner->ro_client);
7068 
7069 			/* Not a fully formed open; "close" it */
7070 			if (screate == TRUE)
7071 				rfs4_state_close(sp, FALSE, FALSE, cs->cr);
7072 			rfs4_state_rele(sp);
7073 			resp->status = NFS4ERR_DELAY;
7074 			return;
7075 		}
7076 	}
7077 	/*
7078 	 * the share check passed and any delegation conflict has been
7079 	 * taken care of, now call vop_open.
7080 	 * if this is the first open then call vop_open with fflags.
7081 	 * if not, call vn_open_upgrade with just the upgrade flags.
7082 	 *
7083 	 * if the file has been opened already, it will have the current
7084 	 * access mode in the state struct.  if it has no share access, then
7085 	 * this is a new open.
7086 	 *
7087 	 * However, if this is open with CLAIM_DLEGATE_CUR, then don't
7088 	 * call VOP_OPEN(), just do the open upgrade.
7089 	 */
7090 	if (first_open && !deleg_cur) {
7091 		ct.cc_sysid = sysid;
7092 		ct.cc_pid = rfs4_dbe_getid(sp->rs_owner->ro_dbe);
7093 		ct.cc_caller_id = nfs4_srv_caller_id;
7094 		ct.cc_flags = CC_DONTBLOCK;
7095 		err = VOP_OPEN(&cs->vp, fflags, cs->cr, &ct);
7096 		if (err) {
7097 			rfs4_dbe_unlock(fp->rf_dbe);
7098 			if (share_a || share_d)
7099 				(void) rfs4_unshare(sp);
7100 			rfs4_dbe_unlock(sp->rs_dbe);
7101 			rfs4_file_rele(fp);
7102 
7103 			/* Not a fully formed open; "close" it */
7104 			if (screate == TRUE)
7105 				rfs4_state_close(sp, FALSE, FALSE, cs->cr);
7106 			rfs4_state_rele(sp);
7107 			/* check if a monitor detected a delegation conflict */
7108 			if (err == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK))
7109 				resp->status = NFS4ERR_DELAY;
7110 			else
7111 				resp->status = NFS4ERR_SERVERFAULT;
7112 			return;
7113 		}
7114 	} else { /* open upgrade */
7115 		/*
7116 		 * calculate the fflags for the new mode that is being added
7117 		 * by this upgrade.
7118 		 */
7119 		fflags = 0;
7120 		if (open_a & OPEN4_SHARE_ACCESS_READ)
7121 			fflags |= FREAD;
7122 		if (open_a & OPEN4_SHARE_ACCESS_WRITE)
7123 			fflags |= FWRITE;
7124 		vn_open_upgrade(cs->vp, fflags);
7125 	}
7126 	sp->rs_open_access |= access;
7127 	sp->rs_open_deny |= deny;
7128 
7129 	if (open_d & OPEN4_SHARE_DENY_READ)
7130 		fp->rf_deny_read++;
7131 	if (open_d & OPEN4_SHARE_DENY_WRITE)
7132 		fp->rf_deny_write++;
7133 	fp->rf_share_deny |= deny;
7134 
7135 	if (open_a & OPEN4_SHARE_ACCESS_READ)
7136 		fp->rf_access_read++;
7137 	if (open_a & OPEN4_SHARE_ACCESS_WRITE)
7138 		fp->rf_access_write++;
7139 	fp->rf_share_access |= access;
7140 
7141 	/*
7142 	 * Check for delegation here. if the deleg argument is not
7143 	 * DELEG_ANY, then this is a reclaim from a client and
7144 	 * we must honor the delegation requested. If necessary we can
7145 	 * set the recall flag.
7146 	 */
7147 
7148 	dsp = rfs4_grant_delegation(deleg, sp, &recall);
7149 
7150 	cs->deleg = (fp->rf_dinfo.rd_dtype == OPEN_DELEGATE_WRITE);
7151 
7152 	next_stateid(&sp->rs_stateid);
7153 
7154 	resp->stateid = sp->rs_stateid.stateid;
7155 
7156 	rfs4_dbe_unlock(fp->rf_dbe);
7157 	rfs4_dbe_unlock(sp->rs_dbe);
7158 
7159 	if (dsp) {
7160 		rfs4_set_deleg_response(dsp, &resp->delegation, NULL, recall);
7161 		rfs4_deleg_state_rele(dsp);
7162 	}
7163 
7164 	rfs4_file_rele(fp);
7165 	rfs4_state_rele(sp);
7166 
7167 	resp->status = NFS4_OK;
7168 }
7169 
7170 /*ARGSUSED*/
7171 static void
7172 rfs4_do_openfh(struct compound_state *cs, struct svc_req *req, OPEN4args *args,
7173     rfs4_openowner_t *oo, OPEN4res *resp)
7174 {
7175 	/* cs->vp and cs->fh have been updated by putfh. */
7176 	rfs4_do_open(cs, req, oo, DELEG_ANY,
7177 	    (args->share_access & 0xff), args->share_deny, resp, 0);
7178 }
7179 
7180 /*ARGSUSED*/
7181 static void
7182 rfs4_do_opennull(struct compound_state *cs, struct svc_req *req,
7183     OPEN4args *args, rfs4_openowner_t *oo, OPEN4res *resp)
7184 {
7185 	change_info4 *cinfo = &resp->cinfo;
7186 	bitmap4 *attrset = &resp->attrset;
7187 
7188 	if (args->opentype == OPEN4_NOCREATE)
7189 		resp->status = rfs4_lookupfile(&args->claim.open_claim4_u.file,
7190 		    req, cs, args->share_access, cinfo);
7191 	else {
7192 		/* inhibit delegation grants during exclusive create */
7193 
7194 		if (args->mode == EXCLUSIVE4)
7195 			rfs4_disable_delegation();
7196 
7197 		resp->status = rfs4_createfile(args, req, cs, cinfo, attrset,
7198 		    oo->ro_client->rc_clientid);
7199 	}
7200 
7201 	if (resp->status == NFS4_OK) {
7202 
7203 		/* cs->vp cs->fh now reference the desired file */
7204 
7205 		rfs4_do_open(cs, req, oo,
7206 		    oo->ro_need_confirm ? DELEG_NONE : DELEG_ANY,
7207 		    args->share_access, args->share_deny, resp, 0);
7208 
7209 		/*
7210 		 * If rfs4_createfile set attrset, we must
7211 		 * clear this attrset before the response is copied.
7212 		 */
7213 		if (resp->status != NFS4_OK && resp->attrset) {
7214 			resp->attrset = 0;
7215 		}
7216 	}
7217 	else
7218 		*cs->statusp = resp->status;
7219 
7220 	if (args->mode == EXCLUSIVE4)
7221 		rfs4_enable_delegation();
7222 }
7223 
7224 /*ARGSUSED*/
7225 static void
7226 rfs4_do_openprev(struct compound_state *cs, struct svc_req *req,
7227     OPEN4args *args, rfs4_openowner_t *oo, OPEN4res *resp)
7228 {
7229 	change_info4 *cinfo = &resp->cinfo;
7230 	vattr_t va;
7231 	vtype_t v_type = cs->vp->v_type;
7232 	int error = 0;
7233 
7234 	/* Verify that we have a regular file */
7235 	if (v_type != VREG) {
7236 		if (v_type == VDIR)
7237 			resp->status = NFS4ERR_ISDIR;
7238 		else if (v_type == VLNK)
7239 			resp->status = NFS4ERR_SYMLINK;
7240 		else
7241 			resp->status = NFS4ERR_INVAL;
7242 		return;
7243 	}
7244 
7245 	va.va_mask = AT_MODE|AT_UID;
7246 	error = VOP_GETATTR(cs->vp, &va, 0, cs->cr, NULL);
7247 	if (error) {
7248 		resp->status = puterrno4(error);
7249 		return;
7250 	}
7251 
7252 	cs->mandlock = MANDLOCK(cs->vp, va.va_mode);
7253 
7254 	/*
7255 	 * Check if we have access to the file, Note the the file
7256 	 * could have originally been open UNCHECKED or GUARDED
7257 	 * with mode bits that will now fail, but there is nothing
7258 	 * we can really do about that except in the case that the
7259 	 * owner of the file is the one requesting the open.
7260 	 */
7261 	if (crgetuid(cs->cr) != va.va_uid) {
7262 		resp->status = check_open_access(args->share_access, cs, req);
7263 		if (resp->status != NFS4_OK) {
7264 			return;
7265 		}
7266 	}
7267 
7268 	/*
7269 	 * cinfo on a CLAIM_PREVIOUS is undefined, initialize to zero
7270 	 */
7271 	cinfo->before = 0;
7272 	cinfo->after = 0;
7273 	cinfo->atomic = FALSE;
7274 
7275 	rfs4_do_open(cs, req, oo,
7276 	    NFS4_DELEG4TYPE2REQTYPE(args->claim.open_claim4_u.delegate_type),
7277 	    args->share_access, args->share_deny, resp, 0);
7278 }
7279 
7280 static void
7281 rfs4_do_opendelcur(struct compound_state *cs, struct svc_req *req,
7282     OPEN4args *args, rfs4_openowner_t *oo, OPEN4res *resp)
7283 {
7284 	int error;
7285 	nfsstat4 status;
7286 	stateid4 stateid =
7287 	    args->claim.open_claim4_u.delegate_cur_info.delegate_stateid;
7288 	rfs4_deleg_state_t *dsp;
7289 
7290 	/*
7291 	 * Find the state info from the stateid and confirm that the
7292 	 * file is delegated.  If the state openowner is the same as
7293 	 * the supplied openowner we're done. If not, get the file
7294 	 * info from the found state info. Use that file info to
7295 	 * create the state for this lock owner. Note solaris doen't
7296 	 * really need the pathname to find the file. We may want to
7297 	 * lookup the pathname and make sure that the vp exist and
7298 	 * matches the vp in the file structure. However it is
7299 	 * possible that the pathname nolonger exists (local process
7300 	 * unlinks the file), so this may not be that useful.
7301 	 */
7302 
7303 	status = rfs4_get_deleg_state(&stateid, &dsp);
7304 	if (status != NFS4_OK) {
7305 		resp->status = status;
7306 		return;
7307 	}
7308 
7309 	ASSERT(dsp->rds_finfo->rf_dinfo.rd_dtype != OPEN_DELEGATE_NONE);
7310 
7311 	/*
7312 	 * New lock owner, create state. Since this was probably called
7313 	 * in response to a CB_RECALL we set deleg to DELEG_NONE
7314 	 */
7315 
7316 	ASSERT(cs->vp != NULL);
7317 	VN_RELE(cs->vp);
7318 	VN_HOLD(dsp->rds_finfo->rf_vp);
7319 	cs->vp = dsp->rds_finfo->rf_vp;
7320 
7321 	error = makefh4(&cs->fh, cs->vp, cs->exi);
7322 	if (error != 0) {
7323 		rfs4_deleg_state_rele(dsp);
7324 		*cs->statusp = resp->status = puterrno4(error);
7325 		return;
7326 	}
7327 
7328 	/* Mark progress for delegation returns */
7329 	dsp->rds_finfo->rf_dinfo.rd_time_lastwrite = gethrestime_sec();
7330 	rfs4_deleg_state_rele(dsp);
7331 	rfs4_do_open(cs, req, oo, DELEG_NONE,
7332 	    args->share_access, args->share_deny, resp, 1);
7333 }
7334 
7335 /*ARGSUSED*/
7336 static void
7337 rfs4_do_opendelprev(struct compound_state *cs, struct svc_req *req,
7338     OPEN4args *args, rfs4_openowner_t *oo, OPEN4res *resp)
7339 {
7340 	/*
7341 	 * Lookup the pathname, it must already exist since this file
7342 	 * was delegated.
7343 	 *
7344 	 * Find the file and state info for this vp and open owner pair.
7345 	 *	check that they are in fact delegated.
7346 	 *	check that the state access and deny modes are the same.
7347 	 *
7348 	 * Return the delgation possibly seting the recall flag.
7349 	 */
7350 	rfs4_file_t *fp;
7351 	rfs4_state_t *sp;
7352 	bool_t create = FALSE;
7353 	bool_t dcreate = FALSE;
7354 	rfs4_deleg_state_t *dsp;
7355 	nfsace4 *ace;
7356 
7357 	/* Note we ignore oflags */
7358 	resp->status = rfs4_lookupfile(
7359 	    &args->claim.open_claim4_u.file_delegate_prev,
7360 	    req, cs, args->share_access, &resp->cinfo);
7361 
7362 	if (resp->status != NFS4_OK) {
7363 		return;
7364 	}
7365 
7366 	/* get the file struct and hold a lock on it during initial open */
7367 	fp = rfs4_findfile_withlock(cs->vp, NULL, &create);
7368 	if (fp == NULL) {
7369 		resp->status = NFS4ERR_RESOURCE;
7370 		DTRACE_PROBE1(nfss__e__do_opendelprev1, nfsstat4, resp->status);
7371 		return;
7372 	}
7373 
7374 	sp = rfs4_findstate_by_owner_file(oo, fp, &create);
7375 	if (sp == NULL) {
7376 		resp->status = NFS4ERR_SERVERFAULT;
7377 		DTRACE_PROBE1(nfss__e__do_opendelprev2, nfsstat4, resp->status);
7378 		rw_exit(&fp->rf_file_rwlock);
7379 		rfs4_file_rele(fp);
7380 		return;
7381 	}
7382 
7383 	rfs4_dbe_lock(sp->rs_dbe);
7384 	rfs4_dbe_lock(fp->rf_dbe);
7385 	if (args->share_access != sp->rs_share_access ||
7386 	    args->share_deny != sp->rs_share_deny ||
7387 	    sp->rs_finfo->rf_dinfo.rd_dtype == OPEN_DELEGATE_NONE) {
7388 		NFS4_DEBUG(rfs4_debug,
7389 		    (CE_NOTE, "rfs4_do_opendelprev: state mixup"));
7390 		rfs4_dbe_unlock(fp->rf_dbe);
7391 		rfs4_dbe_unlock(sp->rs_dbe);
7392 		rfs4_file_rele(fp);
7393 		rfs4_state_rele(sp);
7394 		resp->status = NFS4ERR_SERVERFAULT;
7395 		return;
7396 	}
7397 	rfs4_dbe_unlock(fp->rf_dbe);
7398 	rfs4_dbe_unlock(sp->rs_dbe);
7399 
7400 	dsp = rfs4_finddeleg(sp, &dcreate);
7401 	if (dsp == NULL) {
7402 		rfs4_state_rele(sp);
7403 		rfs4_file_rele(fp);
7404 		resp->status = NFS4ERR_SERVERFAULT;
7405 		return;
7406 	}
7407 
7408 	next_stateid(&sp->rs_stateid);
7409 
7410 	resp->stateid = sp->rs_stateid.stateid;
7411 
7412 	resp->delegation.delegation_type = dsp->rds_dtype;
7413 
7414 	if (dsp->rds_dtype == OPEN_DELEGATE_READ) {
7415 		open_read_delegation4 *rv =
7416 		    &resp->delegation.open_delegation4_u.read;
7417 
7418 		rv->stateid = dsp->rds_delegid.stateid;
7419 		rv->recall = FALSE; /* no policy in place to set to TRUE */
7420 		ace = &rv->permissions;
7421 	} else {
7422 		open_write_delegation4 *rv =
7423 		    &resp->delegation.open_delegation4_u.write;
7424 
7425 		rv->stateid = dsp->rds_delegid.stateid;
7426 		rv->recall = FALSE;  /* no policy in place to set to TRUE */
7427 		ace = &rv->permissions;
7428 		rv->space_limit.limitby = NFS_LIMIT_SIZE;
7429 		rv->space_limit.nfs_space_limit4_u.filesize = UINT64_MAX;
7430 	}
7431 
7432 	/* XXX For now */
7433 	ace->type = ACE4_ACCESS_ALLOWED_ACE_TYPE;
7434 	ace->flag = 0;
7435 	ace->access_mask = 0;
7436 	ace->who.utf8string_len = 0;
7437 	ace->who.utf8string_val = 0;
7438 
7439 	rfs4_deleg_state_rele(dsp);
7440 	rfs4_state_rele(sp);
7441 	rfs4_file_rele(fp);
7442 }
7443 
7444 typedef enum {
7445 	NFS4_CHKSEQ_OKAY = 0,
7446 	NFS4_CHKSEQ_REPLAY = 1,
7447 	NFS4_CHKSEQ_BAD = 2
7448 } rfs4_chkseq_t;
7449 
7450 /*
7451  * Generic function for sequence number checks.
7452  */
7453 static rfs4_chkseq_t
7454 rfs4_check_seqid(seqid4 seqid, nfs_resop4 *lastop,
7455     seqid4 rqst_seq, nfs_resop4 *resop, bool_t copyres)
7456 {
7457 	/* Same sequence ids and matching operations? */
7458 	if (seqid == rqst_seq && resop->resop == lastop->resop) {
7459 		if (copyres == TRUE) {
7460 			rfs4_free_reply(resop);
7461 			rfs4_copy_reply(resop, lastop);
7462 		}
7463 		NFS4_DEBUG(rfs4_debug, (CE_NOTE,
7464 		    "Replayed SEQID %d\n", seqid));
7465 		return (NFS4_CHKSEQ_REPLAY);
7466 	}
7467 
7468 	/* If the incoming sequence is not the next expected then it is bad */
7469 	if (rqst_seq != seqid + 1) {
7470 		if (rqst_seq == seqid) {
7471 			NFS4_DEBUG(rfs4_debug,
7472 			    (CE_NOTE, "BAD SEQID: Replayed sequence id "
7473 			    "but last op was %d current op is %d\n",
7474 			    lastop->resop, resop->resop));
7475 			return (NFS4_CHKSEQ_BAD);
7476 		}
7477 		NFS4_DEBUG(rfs4_debug,
7478 		    (CE_NOTE, "BAD SEQID: got %u expecting %u\n",
7479 		    rqst_seq, seqid));
7480 		return (NFS4_CHKSEQ_BAD);
7481 	}
7482 
7483 	/* Everything okay -- next expected */
7484 	return (NFS4_CHKSEQ_OKAY);
7485 }
7486 
7487 
7488 static rfs4_chkseq_t
7489 rfs4_check_open_seqid(seqid4 seqid, rfs4_openowner_t *op, nfs_resop4 *resop,
7490     const compound_state_t *cs)
7491 {
7492 	rfs4_chkseq_t rc;
7493 
7494 	if (rfs4_has_session(cs))
7495 		return (NFS4_CHKSEQ_OKAY);
7496 
7497 	rfs4_dbe_lock(op->ro_dbe);
7498 	rc = rfs4_check_seqid(op->ro_open_seqid, &op->ro_reply, seqid, resop,
7499 	    TRUE);
7500 	rfs4_dbe_unlock(op->ro_dbe);
7501 
7502 	if (rc == NFS4_CHKSEQ_OKAY)
7503 		rfs4_update_lease(op->ro_client);
7504 
7505 	return (rc);
7506 }
7507 
7508 static rfs4_chkseq_t
7509 rfs4_check_olo_seqid(seqid4 olo_seqid, rfs4_openowner_t *op, nfs_resop4 *resop)
7510 {
7511 	rfs4_chkseq_t rc;
7512 
7513 	rfs4_dbe_lock(op->ro_dbe);
7514 	rc = rfs4_check_seqid(op->ro_open_seqid, &op->ro_reply,
7515 	    olo_seqid, resop, FALSE);
7516 	rfs4_dbe_unlock(op->ro_dbe);
7517 
7518 	return (rc);
7519 }
7520 
7521 static rfs4_chkseq_t
7522 rfs4_check_lock_seqid(seqid4 seqid, rfs4_lo_state_t *lsp, nfs_resop4 *resop)
7523 {
7524 	rfs4_chkseq_t rc = NFS4_CHKSEQ_OKAY;
7525 
7526 	rfs4_dbe_lock(lsp->rls_dbe);
7527 	if (!lsp->rls_skip_seqid_check)
7528 		rc = rfs4_check_seqid(lsp->rls_seqid, &lsp->rls_reply, seqid,
7529 		    resop, TRUE);
7530 	rfs4_dbe_unlock(lsp->rls_dbe);
7531 
7532 	return (rc);
7533 }
7534 
7535 static void
7536 rfs4_op_open(nfs_argop4 *argop, nfs_resop4 *resop,
7537     struct svc_req *req, struct compound_state *cs)
7538 {
7539 	OPEN4args *args = &argop->nfs_argop4_u.opopen;
7540 	OPEN4res *resp = &resop->nfs_resop4_u.opopen;
7541 	open_owner4 *owner = &args->owner;
7542 	open_claim_type4 claim = args->claim.claim;
7543 	rfs4_client_t *cp;
7544 	rfs4_openowner_t *oo;
7545 	bool_t create;
7546 	bool_t replay = FALSE;
7547 	int can_reclaim;
7548 
7549 	DTRACE_NFSV4_2(op__open__start, struct compound_state *, cs,
7550 	    OPEN4args *, args);
7551 
7552 	if (cs->vp == NULL) {
7553 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
7554 		goto end;
7555 	}
7556 
7557 	/* rfc5661 section 18.16.3 */
7558 	if (rfs4_has_session(cs))
7559 		owner->clientid = cs->client->rc_clientid;
7560 
7561 	/*
7562 	 * Need to check clientid and lease expiration first based on
7563 	 * error ordering and incrementing sequence id.
7564 	 */
7565 	cp = rfs4_findclient_by_id(owner->clientid, FALSE);
7566 	if (cp == NULL) {
7567 		*cs->statusp = resp->status =
7568 		    rfs4_check_clientid(&owner->clientid, 0);
7569 		goto end;
7570 	}
7571 
7572 	if (rfs4_lease_expired(cp)) {
7573 		rfs4_client_close(cp);
7574 		*cs->statusp = resp->status = NFS4ERR_EXPIRED;
7575 		goto end;
7576 	}
7577 	can_reclaim = cp->rc_can_reclaim;
7578 
7579 	/*
7580 	 * Find the open_owner for use from this point forward.  Take
7581 	 * care in updating the sequence id based on the type of error
7582 	 * being returned.
7583 	 */
7584 retry:
7585 	create = TRUE;
7586 	oo = rfs4_findopenowner(owner, &create, args->seqid);
7587 	if (oo == NULL) {
7588 		*cs->statusp = resp->status = NFS4ERR_RESOURCE;
7589 		rfs4_client_rele(cp);
7590 		goto end;
7591 	}
7592 
7593 	/*
7594 	 * OPEN_CONFIRM must not be implemented in v4.1
7595 	 */
7596 	if (rfs4_has_session(cs)) {
7597 		oo->ro_need_confirm = FALSE;
7598 	}
7599 
7600 	/* Hold off access to the sequence space while the open is done */
7601 	/* Workaround to avoid deadlock */
7602 	if (!rfs4_has_session(cs))
7603 		rfs4_sw_enter(&oo->ro_sw);
7604 
7605 	/*
7606 	 * If the open_owner existed before at the server, then check
7607 	 * the sequence id.
7608 	 */
7609 	if (!create && !oo->ro_postpone_confirm) {
7610 		switch (rfs4_check_open_seqid(args->seqid, oo, resop, cs)) {
7611 		case NFS4_CHKSEQ_BAD:
7612 			ASSERT(!rfs4_has_session(cs));
7613 			if ((args->seqid > oo->ro_open_seqid) &&
7614 			    oo->ro_need_confirm) {
7615 				rfs4_free_opens(oo, TRUE, FALSE);
7616 				rfs4_sw_exit(&oo->ro_sw);
7617 				rfs4_openowner_rele(oo);
7618 				goto retry;
7619 			}
7620 			resp->status = NFS4ERR_BAD_SEQID;
7621 			goto out;
7622 		case NFS4_CHKSEQ_REPLAY: /* replay of previous request */
7623 			replay = TRUE;
7624 			goto out;
7625 		default:
7626 			break;
7627 		}
7628 
7629 		/*
7630 		 * Sequence was ok and open owner exists
7631 		 * check to see if we have yet to see an
7632 		 * open_confirm.
7633 		 */
7634 		if (oo->ro_need_confirm) {
7635 			rfs4_free_opens(oo, TRUE, FALSE);
7636 			ASSERT(!rfs4_has_session(cs));
7637 			rfs4_sw_exit(&oo->ro_sw);
7638 			rfs4_openowner_rele(oo);
7639 			goto retry;
7640 		}
7641 	}
7642 	/* Grace only applies to regular-type OPENs */
7643 	if (rfs4_clnt_in_grace(cp) &&
7644 	    (claim == CLAIM_NULL || claim == CLAIM_DELEGATE_CUR ||
7645 	    claim == CLAIM_FH)) {
7646 		*cs->statusp = resp->status = NFS4ERR_GRACE;
7647 		goto out;
7648 	}
7649 
7650 	/*
7651 	 * If previous state at the server existed then can_reclaim
7652 	 * will be set. If not reply NFS4ERR_NO_GRACE to the
7653 	 * client.
7654 	 */
7655 	if (rfs4_clnt_in_grace(cp) && claim == CLAIM_PREVIOUS && !can_reclaim) {
7656 		*cs->statusp = resp->status = NFS4ERR_NO_GRACE;
7657 		goto out;
7658 	}
7659 
7660 
7661 	/*
7662 	 * Reject the open if the client has missed the grace period
7663 	 */
7664 	if (!rfs4_clnt_in_grace(cp) && claim == CLAIM_PREVIOUS) {
7665 		*cs->statusp = resp->status = NFS4ERR_NO_GRACE;
7666 		goto out;
7667 	}
7668 
7669 	/* Couple of up-front bookkeeping items */
7670 	if (oo->ro_need_confirm) {
7671 		/*
7672 		 * If this is a reclaim OPEN then we should not ask
7673 		 * for a confirmation of the open_owner per the
7674 		 * protocol specification.
7675 		 */
7676 		if (claim == CLAIM_PREVIOUS)
7677 			oo->ro_need_confirm = FALSE;
7678 		else
7679 			resp->rflags |= OPEN4_RESULT_CONFIRM;
7680 	}
7681 	resp->rflags |= OPEN4_RESULT_LOCKTYPE_POSIX;
7682 
7683 	/*
7684 	 * If there is an unshared filesystem mounted on this vnode,
7685 	 * do not allow to open/create in this directory.
7686 	 */
7687 	if (vn_ismntpt(cs->vp)) {
7688 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
7689 		goto out;
7690 	}
7691 
7692 	/*
7693 	 * access must READ, WRITE, or BOTH.  No access is invalid.
7694 	 * deny can be READ, WRITE, BOTH, or NONE.
7695 	 * bits not defined for access/deny are invalid.
7696 	 */
7697 	if (! (args->share_access & OPEN4_SHARE_ACCESS_BOTH) ||
7698 	    (args->share_access & ~OPEN4_SHARE_ACCESS_BOTH) ||
7699 	    (args->share_deny & ~OPEN4_SHARE_DENY_BOTH)) {
7700 		*cs->statusp = resp->status = NFS4ERR_INVAL;
7701 		goto out;
7702 	}
7703 
7704 
7705 	/*
7706 	 * make sure attrset is zero before response is built.
7707 	 */
7708 	resp->attrset = 0;
7709 
7710 	switch (claim) {
7711 	case CLAIM_NULL:
7712 		rfs4_do_opennull(cs, req, args, oo, resp);
7713 		break;
7714 	case CLAIM_PREVIOUS:
7715 		rfs4_do_openprev(cs, req, args, oo, resp);
7716 		break;
7717 	case CLAIM_DELEGATE_CUR:
7718 		rfs4_do_opendelcur(cs, req, args, oo, resp);
7719 		break;
7720 	case CLAIM_DELEGATE_PREV:
7721 		rfs4_do_opendelprev(cs, req, args, oo, resp);
7722 		break;
7723 	case CLAIM_FH:
7724 		rfs4_do_openfh(cs, req, args, oo, resp);
7725 		break;
7726 	default:
7727 		resp->status = NFS4ERR_INVAL;
7728 		break;
7729 	}
7730 
7731 out:
7732 	rfs4_client_rele(cp);
7733 
7734 	/* Catch sequence id handling here to make it a little easier */
7735 	switch (resp->status) {
7736 	case NFS4ERR_BADXDR:
7737 	case NFS4ERR_BAD_SEQID:
7738 	case NFS4ERR_BAD_STATEID:
7739 	case NFS4ERR_NOFILEHANDLE:
7740 	case NFS4ERR_RESOURCE:
7741 	case NFS4ERR_STALE_CLIENTID:
7742 	case NFS4ERR_STALE_STATEID:
7743 		/*
7744 		 * The protocol states that if any of these errors are
7745 		 * being returned, the sequence id should not be
7746 		 * incremented.  Any other return requires an
7747 		 * increment.
7748 		 */
7749 		break;
7750 	default:
7751 		/* Always update the lease in this case */
7752 		rfs4_update_lease(oo->ro_client);
7753 
7754 		/* Regular response - copy the result */
7755 		if (!replay)
7756 			rfs4_update_open_resp(oo, resop, &cs->fh);
7757 
7758 		/*
7759 		 * REPLAY case: Only if the previous response was OK
7760 		 * do we copy the filehandle.  If not OK, no
7761 		 * filehandle to copy.
7762 		 */
7763 		if (replay == TRUE &&
7764 		    resp->status == NFS4_OK &&
7765 		    oo->ro_reply_fh.nfs_fh4_val) {
7766 			/*
7767 			 * If this is a replay, we must restore the
7768 			 * current filehandle/vp to that of what was
7769 			 * returned originally.  Try our best to do
7770 			 * it.
7771 			 */
7772 			nfs_fh4_fmt_t *fh_fmtp =
7773 			    (nfs_fh4_fmt_t *)oo->ro_reply_fh.nfs_fh4_val;
7774 
7775 			cs->exi = checkexport4(&fh_fmtp->fh4_fsid,
7776 			    (fid_t *)&fh_fmtp->fh4_xlen, NULL);
7777 
7778 			if (cs->exi == NULL) {
7779 				resp->status = NFS4ERR_STALE;
7780 				goto finish;
7781 			}
7782 
7783 			VN_RELE(cs->vp);
7784 
7785 			cs->vp = nfs4_fhtovp(&oo->ro_reply_fh, cs->exi,
7786 			    &resp->status);
7787 
7788 			if (cs->vp == NULL)
7789 				goto finish;
7790 
7791 			nfs_fh4_copy(&oo->ro_reply_fh, &cs->fh);
7792 		}
7793 
7794 		/*
7795 		 * If this was a replay, no need to update the
7796 		 * sequence id. If the open_owner was not created on
7797 		 * this pass, then update.  The first use of an
7798 		 * open_owner will not bump the sequence id.
7799 		 */
7800 		if (replay == FALSE && !create)
7801 			rfs4_update_open_sequence(oo);
7802 		/*
7803 		 * If the client is receiving an error and the
7804 		 * open_owner needs to be confirmed, there is no way
7805 		 * to notify the client of this fact ignoring the fact
7806 		 * that the server has no method of returning a
7807 		 * stateid to confirm.  Therefore, the server needs to
7808 		 * mark this open_owner in a way as to avoid the
7809 		 * sequence id checking the next time the client uses
7810 		 * this open_owner.
7811 		 */
7812 		if (resp->status != NFS4_OK && oo->ro_need_confirm)
7813 			oo->ro_postpone_confirm = TRUE;
7814 		/*
7815 		 * If OK response then clear the postpone flag and
7816 		 * reset the sequence id to keep in sync with the
7817 		 * client.
7818 		 */
7819 		if (resp->status == NFS4_OK && oo->ro_postpone_confirm) {
7820 			oo->ro_postpone_confirm = FALSE;
7821 			oo->ro_open_seqid = args->seqid;
7822 		}
7823 		break;
7824 	}
7825 
7826 finish:
7827 	*cs->statusp = resp->status;
7828 
7829 	if (!rfs4_has_session(cs))
7830 		rfs4_sw_exit(&oo->ro_sw);
7831 	rfs4_openowner_rele(oo);
7832 
7833 end:
7834 	DTRACE_NFSV4_2(op__open__done, struct compound_state *, cs,
7835 	    OPEN4res *, resp);
7836 }
7837 
7838 /*ARGSUSED*/
7839 void
7840 rfs4_op_open_confirm(nfs_argop4 *argop, nfs_resop4 *resop,
7841     struct svc_req *req, struct compound_state *cs)
7842 {
7843 	OPEN_CONFIRM4args *args = &argop->nfs_argop4_u.opopen_confirm;
7844 	OPEN_CONFIRM4res *resp = &resop->nfs_resop4_u.opopen_confirm;
7845 	rfs4_state_t *sp;
7846 	nfsstat4 status;
7847 
7848 	DTRACE_NFSV4_2(op__open__confirm__start, struct compound_state *, cs,
7849 	    OPEN_CONFIRM4args *, args);
7850 
7851 	ASSERT(!rfs4_has_session(cs));
7852 
7853 	if (cs->vp == NULL) {
7854 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
7855 		goto out;
7856 	}
7857 
7858 	if (cs->vp->v_type != VREG) {
7859 		*cs->statusp = resp->status =
7860 		    cs->vp->v_type == VDIR ? NFS4ERR_ISDIR : NFS4ERR_INVAL;
7861 		return;
7862 	}
7863 
7864 	status = rfs4_get_state(&args->open_stateid, &sp, RFS4_DBS_VALID);
7865 	if (status != NFS4_OK) {
7866 		*cs->statusp = resp->status = status;
7867 		goto out;
7868 	}
7869 
7870 	/* Ensure specified filehandle matches */
7871 	if (cs->vp != sp->rs_finfo->rf_vp) {
7872 		rfs4_state_rele(sp);
7873 		*cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7874 		goto out;
7875 	}
7876 
7877 	/* hold off other access to open_owner while we tinker */
7878 	rfs4_sw_enter(&sp->rs_owner->ro_sw);
7879 
7880 	switch (rfs4_check_stateid_seqid(sp, &args->open_stateid, cs)) {
7881 	case NFS4_CHECK_STATEID_OKAY:
7882 		if (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
7883 		    resop, cs) != 0) {
7884 			*cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
7885 			break;
7886 		}
7887 		/*
7888 		 * If it is the appropriate stateid and determined to
7889 		 * be "OKAY" then this means that the stateid does not
7890 		 * need to be confirmed and the client is in error for
7891 		 * sending an OPEN_CONFIRM.
7892 		 */
7893 		*cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7894 		break;
7895 	case NFS4_CHECK_STATEID_OLD:
7896 		*cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
7897 		break;
7898 	case NFS4_CHECK_STATEID_BAD:
7899 		*cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7900 		break;
7901 	case NFS4_CHECK_STATEID_EXPIRED:
7902 		*cs->statusp = resp->status = NFS4ERR_EXPIRED;
7903 		break;
7904 	case NFS4_CHECK_STATEID_CLOSED:
7905 		*cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
7906 		break;
7907 	case NFS4_CHECK_STATEID_REPLAY:
7908 		switch (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
7909 		    resop, cs)) {
7910 		case NFS4_CHKSEQ_OKAY:
7911 			/*
7912 			 * This is replayed stateid; if seqid matches
7913 			 * next expected, then client is using wrong seqid.
7914 			 */
7915 			/* fall through */
7916 		case NFS4_CHKSEQ_BAD:
7917 			*cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
7918 			break;
7919 		case NFS4_CHKSEQ_REPLAY:
7920 			/*
7921 			 * Note this case is the duplicate case so
7922 			 * resp->status is already set.
7923 			 */
7924 			*cs->statusp = resp->status;
7925 			rfs4_update_lease(sp->rs_owner->ro_client);
7926 			break;
7927 		}
7928 		break;
7929 	case NFS4_CHECK_STATEID_UNCONFIRMED:
7930 		if (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
7931 		    resop, cs) != NFS4_CHKSEQ_OKAY) {
7932 			*cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
7933 			break;
7934 		}
7935 		*cs->statusp = resp->status = NFS4_OK;
7936 
7937 		next_stateid(&sp->rs_stateid);
7938 		resp->open_stateid = sp->rs_stateid.stateid;
7939 		sp->rs_owner->ro_need_confirm = FALSE;
7940 		rfs4_update_lease(sp->rs_owner->ro_client);
7941 		rfs4_update_open_sequence(sp->rs_owner);
7942 		rfs4_update_open_resp(sp->rs_owner, resop, NULL);
7943 		break;
7944 	default:
7945 		ASSERT(FALSE);
7946 		*cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
7947 		break;
7948 	}
7949 	rfs4_sw_exit(&sp->rs_owner->ro_sw);
7950 	rfs4_state_rele(sp);
7951 
7952 out:
7953 	DTRACE_NFSV4_2(op__open__confirm__done, struct compound_state *, cs,
7954 	    OPEN_CONFIRM4res *, resp);
7955 }
7956 
7957 /*ARGSUSED*/
7958 void
7959 rfs4_op_open_downgrade(nfs_argop4 *argop, nfs_resop4 *resop,
7960     struct svc_req *req, struct compound_state *cs)
7961 {
7962 	OPEN_DOWNGRADE4args *args = &argop->nfs_argop4_u.opopen_downgrade;
7963 	OPEN_DOWNGRADE4res *resp = &resop->nfs_resop4_u.opopen_downgrade;
7964 	uint32_t access = args->share_access;
7965 	uint32_t deny = args->share_deny;
7966 	nfsstat4 status;
7967 	rfs4_state_t *sp;
7968 	rfs4_file_t *fp;
7969 	int fflags = 0;
7970 
7971 	DTRACE_NFSV4_2(op__open__downgrade__start, struct compound_state *, cs,
7972 	    OPEN_DOWNGRADE4args *, args);
7973 
7974 	if (cs->vp == NULL) {
7975 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
7976 		goto out;
7977 	}
7978 
7979 	if (cs->vp->v_type != VREG) {
7980 		*cs->statusp = resp->status = NFS4ERR_INVAL;
7981 		return;
7982 	}
7983 
7984 	status = rfs4_get_state(&args->open_stateid, &sp, RFS4_DBS_VALID);
7985 	if (status != NFS4_OK) {
7986 		*cs->statusp = resp->status = status;
7987 		goto out;
7988 	}
7989 
7990 	/* Ensure specified filehandle matches */
7991 	if (cs->vp != sp->rs_finfo->rf_vp) {
7992 		rfs4_state_rele(sp);
7993 		*cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7994 		goto out;
7995 	}
7996 
7997 	/* hold off other access to open_owner while we tinker */
7998 	rfs4_sw_enter(&sp->rs_owner->ro_sw);
7999 
8000 	switch (rfs4_check_stateid_seqid(sp, &args->open_stateid, cs)) {
8001 	case NFS4_CHECK_STATEID_OKAY:
8002 		if (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
8003 		    resop, cs) != NFS4_CHKSEQ_OKAY) {
8004 			*cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
8005 			goto end;
8006 		}
8007 		break;
8008 	case NFS4_CHECK_STATEID_OLD:
8009 		*cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
8010 		goto end;
8011 	case NFS4_CHECK_STATEID_BAD:
8012 		*cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8013 		goto end;
8014 	case NFS4_CHECK_STATEID_EXPIRED:
8015 		*cs->statusp = resp->status = NFS4ERR_EXPIRED;
8016 		goto end;
8017 	case NFS4_CHECK_STATEID_CLOSED:
8018 		*cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
8019 		goto end;
8020 	case NFS4_CHECK_STATEID_UNCONFIRMED:
8021 		*cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8022 		goto end;
8023 	case NFS4_CHECK_STATEID_REPLAY:
8024 		ASSERT(!rfs4_has_session(cs));
8025 
8026 		/* Check the sequence id for the open owner */
8027 		switch (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
8028 		    resop, cs)) {
8029 		case NFS4_CHKSEQ_OKAY:
8030 			/*
8031 			 * This is replayed stateid; if seqid matches
8032 			 * next expected, then client is using wrong seqid.
8033 			 */
8034 			/* fall through */
8035 		case NFS4_CHKSEQ_BAD:
8036 			*cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
8037 			goto end;
8038 		case NFS4_CHKSEQ_REPLAY:
8039 			/*
8040 			 * Note this case is the duplicate case so
8041 			 * resp->status is already set.
8042 			 */
8043 			*cs->statusp = resp->status;
8044 			rfs4_update_lease(sp->rs_owner->ro_client);
8045 			goto end;
8046 		}
8047 		break;
8048 	default:
8049 		ASSERT(FALSE);
8050 		break;
8051 	}
8052 
8053 	rfs4_dbe_lock(sp->rs_dbe);
8054 	/*
8055 	 * Check that the new access modes and deny modes are valid.
8056 	 * Check that no invalid bits are set.
8057 	 */
8058 	if ((access & ~(OPEN4_SHARE_ACCESS_READ | OPEN4_SHARE_ACCESS_WRITE)) ||
8059 	    (deny & ~(OPEN4_SHARE_DENY_READ | OPEN4_SHARE_DENY_WRITE))) {
8060 		*cs->statusp = resp->status = NFS4ERR_INVAL;
8061 		rfs4_update_open_sequence(sp->rs_owner);
8062 		rfs4_dbe_unlock(sp->rs_dbe);
8063 		goto end;
8064 	}
8065 
8066 	/*
8067 	 * The new modes must be a subset of the current modes and
8068 	 * the access must specify at least one mode. To test that
8069 	 * the new mode is a subset of the current modes we bitwise
8070 	 * AND them together and check that the result equals the new
8071 	 * mode. For example:
8072 	 * New mode, access == R and current mode, sp->rs_open_access  == RW
8073 	 * access & sp->rs_open_access == R == access, so the new access mode
8074 	 * is valid. Consider access == RW, sp->rs_open_access = R
8075 	 * access & sp->rs_open_access == R != access, so the new access mode
8076 	 * is invalid.
8077 	 */
8078 	if ((access & sp->rs_open_access) != access ||
8079 	    (deny & sp->rs_open_deny) != deny ||
8080 	    (access &
8081 	    (OPEN4_SHARE_ACCESS_READ | OPEN4_SHARE_ACCESS_WRITE)) == 0) {
8082 		*cs->statusp = resp->status = NFS4ERR_INVAL;
8083 		rfs4_update_open_sequence(sp->rs_owner);
8084 		rfs4_dbe_unlock(sp->rs_dbe);
8085 		goto end;
8086 	}
8087 
8088 	/*
8089 	 * Release any share locks associated with this stateID.
8090 	 * Strictly speaking, this violates the spec because the
8091 	 * spec effectively requires that open downgrade be atomic.
8092 	 * At present, fs_shrlock does not have this capability.
8093 	 */
8094 	(void) rfs4_unshare(sp);
8095 
8096 	status = rfs4_share(sp, access, deny);
8097 	if (status != NFS4_OK) {
8098 		*cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
8099 		rfs4_update_open_sequence(sp->rs_owner);
8100 		rfs4_dbe_unlock(sp->rs_dbe);
8101 		goto end;
8102 	}
8103 
8104 	fp = sp->rs_finfo;
8105 	rfs4_dbe_lock(fp->rf_dbe);
8106 
8107 	/*
8108 	 * If the current mode has deny read and the new mode
8109 	 * does not, decrement the number of deny read mode bits
8110 	 * and if it goes to zero turn off the deny read bit
8111 	 * on the file.
8112 	 */
8113 	if ((sp->rs_open_deny & OPEN4_SHARE_DENY_READ) &&
8114 	    (deny & OPEN4_SHARE_DENY_READ) == 0) {
8115 		fp->rf_deny_read--;
8116 		if (fp->rf_deny_read == 0)
8117 			fp->rf_share_deny &= ~OPEN4_SHARE_DENY_READ;
8118 	}
8119 
8120 	/*
8121 	 * If the current mode has deny write and the new mode
8122 	 * does not, decrement the number of deny write mode bits
8123 	 * and if it goes to zero turn off the deny write bit
8124 	 * on the file.
8125 	 */
8126 	if ((sp->rs_open_deny & OPEN4_SHARE_DENY_WRITE) &&
8127 	    (deny & OPEN4_SHARE_DENY_WRITE) == 0) {
8128 		fp->rf_deny_write--;
8129 		if (fp->rf_deny_write == 0)
8130 			fp->rf_share_deny &= ~OPEN4_SHARE_DENY_WRITE;
8131 	}
8132 
8133 	/*
8134 	 * If the current mode has access read and the new mode
8135 	 * does not, decrement the number of access read mode bits
8136 	 * and if it goes to zero turn off the access read bit
8137 	 * on the file.  set fflags to FREAD for the call to
8138 	 * vn_open_downgrade().
8139 	 */
8140 	if ((sp->rs_open_access & OPEN4_SHARE_ACCESS_READ) &&
8141 	    (access & OPEN4_SHARE_ACCESS_READ) == 0) {
8142 		fp->rf_access_read--;
8143 		if (fp->rf_access_read == 0)
8144 			fp->rf_share_access &= ~OPEN4_SHARE_ACCESS_READ;
8145 		fflags |= FREAD;
8146 	}
8147 
8148 	/*
8149 	 * If the current mode has access write and the new mode
8150 	 * does not, decrement the number of access write mode bits
8151 	 * and if it goes to zero turn off the access write bit
8152 	 * on the file.  set fflags to FWRITE for the call to
8153 	 * vn_open_downgrade().
8154 	 */
8155 	if ((sp->rs_open_access & OPEN4_SHARE_ACCESS_WRITE) &&
8156 	    (access & OPEN4_SHARE_ACCESS_WRITE) == 0) {
8157 		fp->rf_access_write--;
8158 		if (fp->rf_access_write == 0)
8159 			fp->rf_share_deny &= ~OPEN4_SHARE_ACCESS_WRITE;
8160 		fflags |= FWRITE;
8161 	}
8162 
8163 	/* Check that the file is still accessible */
8164 	ASSERT(fp->rf_share_access);
8165 
8166 	rfs4_dbe_unlock(fp->rf_dbe);
8167 
8168 	/* now set the new open access and deny modes */
8169 	sp->rs_open_access = access;
8170 	sp->rs_open_deny = deny;
8171 
8172 	/*
8173 	 * we successfully downgraded the share lock, now we need to downgrade
8174 	 * the open. it is possible that the downgrade was only for a deny
8175 	 * mode and we have nothing else to do.
8176 	 */
8177 	if ((fflags & (FREAD|FWRITE)) != 0)
8178 		vn_open_downgrade(cs->vp, fflags);
8179 
8180 	/* Update the stateid */
8181 	next_stateid(&sp->rs_stateid);
8182 	resp->open_stateid = sp->rs_stateid.stateid;
8183 
8184 	rfs4_dbe_unlock(sp->rs_dbe);
8185 
8186 	*cs->statusp = resp->status = NFS4_OK;
8187 	/* Update the lease */
8188 	rfs4_update_lease(sp->rs_owner->ro_client);
8189 	/* And the sequence */
8190 	rfs4_update_open_sequence(sp->rs_owner);
8191 	rfs4_update_open_resp(sp->rs_owner, resop, NULL);
8192 
8193 end:
8194 	rfs4_sw_exit(&sp->rs_owner->ro_sw);
8195 	rfs4_state_rele(sp);
8196 out:
8197 	DTRACE_NFSV4_2(op__open__downgrade__done, struct compound_state *, cs,
8198 	    OPEN_DOWNGRADE4res *, resp);
8199 }
8200 
8201 static void *
8202 memstr(const void *s1, const char *s2, size_t n)
8203 {
8204 	size_t l = strlen(s2);
8205 	char *p = (char *)s1;
8206 
8207 	while (n >= l) {
8208 		if (bcmp(p, s2, l) == 0)
8209 			return (p);
8210 		p++;
8211 		n--;
8212 	}
8213 
8214 	return (NULL);
8215 }
8216 
8217 /*
8218  * The logic behind this function is detailed in the NFSv4 RFC in the
8219  * SETCLIENTID operation description under IMPLEMENTATION.  Refer to
8220  * that section for explicit guidance to server behavior for
8221  * SETCLIENTID.
8222  */
8223 void
8224 rfs4_op_setclientid(nfs_argop4 *argop, nfs_resop4 *resop,
8225     struct svc_req *req, struct compound_state *cs)
8226 {
8227 	SETCLIENTID4args *args = &argop->nfs_argop4_u.opsetclientid;
8228 	SETCLIENTID4res *res = &resop->nfs_resop4_u.opsetclientid;
8229 	rfs4_client_t *cp, *newcp, *cp_confirmed, *cp_unconfirmed;
8230 	rfs4_clntip_t *ci;
8231 	bool_t create;
8232 	char *addr, *netid;
8233 	int len;
8234 
8235 	DTRACE_NFSV4_2(op__setclientid__start, struct compound_state *, cs,
8236 	    SETCLIENTID4args *, args);
8237 retry:
8238 	newcp = cp_confirmed = cp_unconfirmed = NULL;
8239 
8240 	/*
8241 	 * Save the caller's IP address
8242 	 */
8243 	args->client.cl_addr =
8244 	    (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
8245 
8246 	/*
8247 	 * Record if it is a Solaris client that cannot handle referrals.
8248 	 */
8249 	if (memstr(args->client.id_val, "Solaris", args->client.id_len) &&
8250 	    !memstr(args->client.id_val, "+referrals", args->client.id_len)) {
8251 		/* Add a "yes, it's downrev" record */
8252 		create = TRUE;
8253 		ci = rfs4_find_clntip(args->client.cl_addr, &create);
8254 		ASSERT(ci != NULL);
8255 		rfs4_dbe_rele(ci->ri_dbe);
8256 	} else {
8257 		/* Remove any previous record */
8258 		rfs4_invalidate_clntip(args->client.cl_addr);
8259 	}
8260 
8261 	/*
8262 	 * In search of an EXISTING client matching the incoming
8263 	 * request to establish a new client identifier at the server
8264 	 */
8265 	create = TRUE;
8266 	cp = rfs4_findclient(&args->client, &create, NULL);
8267 
8268 	/* Should never happen */
8269 	ASSERT(cp != NULL);
8270 
8271 	if (cp == NULL) {
8272 		*cs->statusp = res->status = NFS4ERR_SERVERFAULT;
8273 		goto out;
8274 	}
8275 
8276 	/*
8277 	 * Easiest case. Client identifier is newly created and is
8278 	 * unconfirmed.  Also note that for this case, no other
8279 	 * entries exist for the client identifier.  Nothing else to
8280 	 * check.  Just setup the response and respond.
8281 	 */
8282 	if (create) {
8283 		*cs->statusp = res->status = NFS4_OK;
8284 		res->SETCLIENTID4res_u.resok4.clientid = cp->rc_clientid;
8285 		res->SETCLIENTID4res_u.resok4.setclientid_confirm =
8286 		    cp->rc_confirm_verf;
8287 		/* Setup callback information; CB_NULL confirmation later */
8288 		rfs4_client_setcb(cp, &args->callback, args->callback_ident);
8289 
8290 		rfs4_client_rele(cp);
8291 		goto out;
8292 	}
8293 
8294 	/*
8295 	 * An existing, confirmed client may exist but it may not have
8296 	 * been active for at least one lease period.  If so, then
8297 	 * "close" the client and create a new client identifier
8298 	 */
8299 	if (rfs4_lease_expired(cp)) {
8300 		rfs4_client_close(cp);
8301 		goto retry;
8302 	}
8303 
8304 	if (cp->rc_need_confirm == TRUE)
8305 		cp_unconfirmed = cp;
8306 	else
8307 		cp_confirmed = cp;
8308 
8309 	cp = NULL;
8310 
8311 	/*
8312 	 * We have a confirmed client, now check for an
8313 	 * unconfimred entry
8314 	 */
8315 	if (cp_confirmed) {
8316 		/* If creds don't match then client identifier is inuse */
8317 		if (!creds_ok(&cp_confirmed->rc_cr_set, req, cs)) {
8318 			rfs4_cbinfo_t *cbp;
8319 			/*
8320 			 * Some one else has established this client
8321 			 * id. Try and say * who they are. We will use
8322 			 * the call back address supplied by * the
8323 			 * first client.
8324 			 */
8325 			*cs->statusp = res->status = NFS4ERR_CLID_INUSE;
8326 
8327 			addr = netid = NULL;
8328 
8329 			cbp = &cp_confirmed->rc_cbinfo;
8330 			if (cbp->cb_callback.cb_location.r_addr &&
8331 			    cbp->cb_callback.cb_location.r_netid) {
8332 				cb_client4 *cbcp = &cbp->cb_callback;
8333 
8334 				len = strlen(cbcp->cb_location.r_addr)+1;
8335 				addr = kmem_alloc(len, KM_SLEEP);
8336 				bcopy(cbcp->cb_location.r_addr, addr, len);
8337 				len = strlen(cbcp->cb_location.r_netid)+1;
8338 				netid = kmem_alloc(len, KM_SLEEP);
8339 				bcopy(cbcp->cb_location.r_netid, netid, len);
8340 			}
8341 
8342 			res->SETCLIENTID4res_u.client_using.r_addr = addr;
8343 			res->SETCLIENTID4res_u.client_using.r_netid = netid;
8344 
8345 			rfs4_client_rele(cp_confirmed);
8346 		}
8347 
8348 		/*
8349 		 * Confirmed, creds match, and verifier matches; must
8350 		 * be an update of the callback info
8351 		 */
8352 		if (cp_confirmed->rc_nfs_client.verifier ==
8353 		    args->client.verifier) {
8354 			/* Setup callback information */
8355 			rfs4_client_setcb(cp_confirmed, &args->callback,
8356 			    args->callback_ident);
8357 
8358 			/* everything okay -- move ahead */
8359 			*cs->statusp = res->status = NFS4_OK;
8360 			res->SETCLIENTID4res_u.resok4.clientid =
8361 			    cp_confirmed->rc_clientid;
8362 
8363 			/* update the confirm_verifier and return it */
8364 			rfs4_client_scv_next(cp_confirmed);
8365 			res->SETCLIENTID4res_u.resok4.setclientid_confirm =
8366 			    cp_confirmed->rc_confirm_verf;
8367 
8368 			rfs4_client_rele(cp_confirmed);
8369 			goto out;
8370 		}
8371 
8372 		/*
8373 		 * Creds match but the verifier doesn't.  Must search
8374 		 * for an unconfirmed client that would be replaced by
8375 		 * this request.
8376 		 */
8377 		create = FALSE;
8378 		cp_unconfirmed = rfs4_findclient(&args->client, &create,
8379 		    cp_confirmed);
8380 	}
8381 
8382 	/*
8383 	 * At this point, we have taken care of the brand new client
8384 	 * struct, INUSE case, update of an existing, and confirmed
8385 	 * client struct.
8386 	 */
8387 
8388 	/*
8389 	 * check to see if things have changed while we originally
8390 	 * picked up the client struct.  If they have, then return and
8391 	 * retry the processing of this SETCLIENTID request.
8392 	 */
8393 	if (cp_unconfirmed) {
8394 		rfs4_dbe_lock(cp_unconfirmed->rc_dbe);
8395 		if (!cp_unconfirmed->rc_need_confirm) {
8396 			rfs4_dbe_unlock(cp_unconfirmed->rc_dbe);
8397 			rfs4_client_rele(cp_unconfirmed);
8398 			if (cp_confirmed)
8399 				rfs4_client_rele(cp_confirmed);
8400 			goto retry;
8401 		}
8402 		/* do away with the old unconfirmed one */
8403 		rfs4_dbe_invalidate(cp_unconfirmed->rc_dbe);
8404 		rfs4_dbe_unlock(cp_unconfirmed->rc_dbe);
8405 		rfs4_client_rele(cp_unconfirmed);
8406 		cp_unconfirmed = NULL;
8407 	}
8408 
8409 	/*
8410 	 * This search will temporarily hide the confirmed client
8411 	 * struct while a new client struct is created as the
8412 	 * unconfirmed one.
8413 	 */
8414 	create = TRUE;
8415 	newcp = rfs4_findclient(&args->client, &create, cp_confirmed);
8416 
8417 	ASSERT(newcp != NULL);
8418 
8419 	if (newcp == NULL) {
8420 		*cs->statusp = res->status = NFS4ERR_SERVERFAULT;
8421 		rfs4_client_rele(cp_confirmed);
8422 		goto out;
8423 	}
8424 
8425 	/*
8426 	 * If one was not created, then a similar request must be in
8427 	 * process so release and start over with this one
8428 	 */
8429 	if (create != TRUE) {
8430 		rfs4_client_rele(newcp);
8431 		if (cp_confirmed)
8432 			rfs4_client_rele(cp_confirmed);
8433 		goto retry;
8434 	}
8435 
8436 	*cs->statusp = res->status = NFS4_OK;
8437 	res->SETCLIENTID4res_u.resok4.clientid = newcp->rc_clientid;
8438 	res->SETCLIENTID4res_u.resok4.setclientid_confirm =
8439 	    newcp->rc_confirm_verf;
8440 	/* Setup callback information; CB_NULL confirmation later */
8441 	rfs4_client_setcb(newcp, &args->callback, args->callback_ident);
8442 
8443 	newcp->rc_cp_confirmed = cp_confirmed;
8444 
8445 	rfs4_client_rele(newcp);
8446 
8447 out:
8448 	DTRACE_NFSV4_2(op__setclientid__done, struct compound_state *, cs,
8449 	    SETCLIENTID4res *, res);
8450 }
8451 
8452 /*ARGSUSED*/
8453 void
8454 rfs4_op_setclientid_confirm(nfs_argop4 *argop, nfs_resop4 *resop,
8455     struct svc_req *req, struct compound_state *cs)
8456 {
8457 	SETCLIENTID_CONFIRM4args *args =
8458 	    &argop->nfs_argop4_u.opsetclientid_confirm;
8459 	SETCLIENTID_CONFIRM4res *res =
8460 	    &resop->nfs_resop4_u.opsetclientid_confirm;
8461 	rfs4_client_t *cp, *cptoclose = NULL;
8462 	nfs4_srv_t *nsrv4;
8463 
8464 	DTRACE_NFSV4_2(op__setclientid__confirm__start,
8465 	    struct compound_state *, cs,
8466 	    SETCLIENTID_CONFIRM4args *, args);
8467 
8468 	nsrv4 = nfs4_get_srv();
8469 	*cs->statusp = res->status = NFS4_OK;
8470 
8471 	cp = rfs4_findclient_by_id(args->clientid, TRUE);
8472 
8473 	if (cp == NULL) {
8474 		*cs->statusp = res->status =
8475 		    rfs4_check_clientid(&args->clientid, 1);
8476 		goto out;
8477 	}
8478 
8479 	if (!creds_ok(&cp->rc_cr_set, req, cs)) {
8480 		*cs->statusp = res->status = NFS4ERR_CLID_INUSE;
8481 		rfs4_client_rele(cp);
8482 		goto out;
8483 	}
8484 
8485 	/* If the verifier doesn't match, the record doesn't match */
8486 	if (cp->rc_confirm_verf != args->setclientid_confirm) {
8487 		*cs->statusp = res->status = NFS4ERR_STALE_CLIENTID;
8488 		rfs4_client_rele(cp);
8489 		goto out;
8490 	}
8491 
8492 	rfs4_dbe_lock(cp->rc_dbe);
8493 	cp->rc_need_confirm = FALSE;
8494 	if (cp->rc_cp_confirmed) {
8495 		cptoclose = cp->rc_cp_confirmed;
8496 		cptoclose->rc_ss_remove = 1;
8497 		cp->rc_cp_confirmed = NULL;
8498 	}
8499 
8500 	/*
8501 	 * Update the client's associated server instance, if it's changed
8502 	 * since the client was created.
8503 	 */
8504 	if (rfs4_servinst(cp) != nsrv4->nfs4_cur_servinst)
8505 		rfs4_servinst_assign(nsrv4, cp, nsrv4->nfs4_cur_servinst);
8506 
8507 	/*
8508 	 * Record clientid in stable storage.
8509 	 * Must be done after server instance has been assigned.
8510 	 */
8511 	rfs4_ss_clid(nsrv4, cp);
8512 
8513 	rfs4_dbe_unlock(cp->rc_dbe);
8514 
8515 	if (cptoclose)
8516 		/* don't need to rele, client_close does it */
8517 		rfs4_client_close(cptoclose);
8518 
8519 	/* If needed, initiate CB_NULL call for callback path */
8520 	rfs4_deleg_cb_check(cp);
8521 	rfs4_update_lease(cp);
8522 
8523 	/*
8524 	 * Check to see if client can perform reclaims
8525 	 */
8526 	rfs4_ss_chkclid(nsrv4, cp);
8527 
8528 	rfs4_client_rele(cp);
8529 
8530 out:
8531 	DTRACE_NFSV4_2(op__setclientid__confirm__done,
8532 	    struct compound_state *, cs,
8533 	    SETCLIENTID_CONFIRM4 *, res);
8534 }
8535 
8536 
8537 /*ARGSUSED*/
8538 void
8539 rfs4_op_close(nfs_argop4 *argop, nfs_resop4 *resop,
8540     struct svc_req *req, struct compound_state *cs)
8541 {
8542 	CLOSE4args *args = &argop->nfs_argop4_u.opclose;
8543 	CLOSE4res *resp = &resop->nfs_resop4_u.opclose;
8544 	rfs4_state_t *sp;
8545 	nfsstat4 status;
8546 
8547 	DTRACE_NFSV4_2(op__close__start, struct compound_state *, cs,
8548 	    CLOSE4args *, args);
8549 
8550 	if (cs->vp == NULL) {
8551 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
8552 		goto out;
8553 	}
8554 
8555 	status = rfs4_get_state(&args->open_stateid, &sp, RFS4_DBS_INVALID);
8556 	if (status != NFS4_OK) {
8557 		*cs->statusp = resp->status = status;
8558 		goto out;
8559 	}
8560 
8561 	/* Ensure specified filehandle matches */
8562 	if (cs->vp != sp->rs_finfo->rf_vp) {
8563 		rfs4_state_rele(sp);
8564 		*cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8565 		goto out;
8566 	}
8567 
8568 	/* hold off other access to open_owner while we tinker */
8569 	rfs4_sw_enter(&sp->rs_owner->ro_sw);
8570 
8571 	switch (rfs4_check_stateid_seqid(sp, &args->open_stateid, cs)) {
8572 	case NFS4_CHECK_STATEID_OKAY:
8573 		if (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
8574 		    resop, cs) != NFS4_CHKSEQ_OKAY) {
8575 			*cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
8576 			goto end;
8577 		}
8578 		break;
8579 	case NFS4_CHECK_STATEID_OLD:
8580 		*cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
8581 		goto end;
8582 	case NFS4_CHECK_STATEID_BAD:
8583 		*cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8584 		goto end;
8585 	case NFS4_CHECK_STATEID_EXPIRED:
8586 		*cs->statusp = resp->status = NFS4ERR_EXPIRED;
8587 		goto end;
8588 	case NFS4_CHECK_STATEID_CLOSED:
8589 		*cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
8590 		goto end;
8591 	case NFS4_CHECK_STATEID_UNCONFIRMED:
8592 		*cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8593 		goto end;
8594 	case NFS4_CHECK_STATEID_REPLAY:
8595 		ASSERT(!rfs4_has_session(cs));
8596 
8597 		/* Check the sequence id for the open owner */
8598 		switch (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
8599 		    resop, cs)) {
8600 		case NFS4_CHKSEQ_OKAY:
8601 			/*
8602 			 * This is replayed stateid; if seqid matches
8603 			 * next expected, then client is using wrong seqid.
8604 			 */
8605 			/* FALL THROUGH */
8606 		case NFS4_CHKSEQ_BAD:
8607 			*cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
8608 			goto end;
8609 		case NFS4_CHKSEQ_REPLAY:
8610 			/*
8611 			 * Note this case is the duplicate case so
8612 			 * resp->status is already set.
8613 			 */
8614 			*cs->statusp = resp->status;
8615 			rfs4_update_lease(sp->rs_owner->ro_client);
8616 			goto end;
8617 		}
8618 		break;
8619 	default:
8620 		ASSERT(FALSE);
8621 		break;
8622 	}
8623 
8624 	rfs4_dbe_lock(sp->rs_dbe);
8625 
8626 	/* Update the stateid. */
8627 	next_stateid(&sp->rs_stateid);
8628 	resp->open_stateid = sp->rs_stateid.stateid;
8629 
8630 	rfs4_dbe_unlock(sp->rs_dbe);
8631 
8632 	rfs4_update_lease(sp->rs_owner->ro_client);
8633 	rfs4_update_open_sequence(sp->rs_owner);
8634 	rfs4_update_open_resp(sp->rs_owner, resop, NULL);
8635 
8636 	rfs4_state_close(sp, FALSE, FALSE, cs->cr);
8637 
8638 	*cs->statusp = resp->status = status;
8639 
8640 end:
8641 	rfs4_sw_exit(&sp->rs_owner->ro_sw);
8642 	rfs4_state_rele(sp);
8643 out:
8644 	DTRACE_NFSV4_2(op__close__done, struct compound_state *, cs,
8645 	    CLOSE4res *, resp);
8646 }
8647 
8648 /*
8649  * Manage the counts on the file struct and close all file locks
8650  */
8651 /*ARGSUSED*/
8652 void
8653 rfs4_release_share_lock_state(rfs4_state_t *sp, cred_t *cr,
8654     bool_t close_of_client)
8655 {
8656 	rfs4_file_t *fp = sp->rs_finfo;
8657 	rfs4_lo_state_t *lsp;
8658 	int fflags = 0;
8659 
8660 	/*
8661 	 * If this call is part of the larger closing down of client
8662 	 * state then it is just easier to release all locks
8663 	 * associated with this client instead of going through each
8664 	 * individual file and cleaning locks there.
8665 	 */
8666 	if (close_of_client) {
8667 		if (sp->rs_owner->ro_client->rc_unlksys_completed == FALSE &&
8668 		    !list_is_empty(&sp->rs_lostatelist) &&
8669 		    sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID) {
8670 			/* Is the PxFS kernel module loaded? */
8671 			if (lm_remove_file_locks != NULL) {
8672 				int new_sysid;
8673 
8674 				/* Encode the cluster nodeid in new sysid */
8675 				new_sysid = sp->rs_owner->ro_client->rc_sysidt;
8676 				lm_set_nlmid_flk(&new_sysid);
8677 
8678 				/*
8679 				 * This PxFS routine removes file locks for a
8680 				 * client over all nodes of a cluster.
8681 				 */
8682 				NFS4_DEBUG(rfs4_debug, (CE_NOTE,
8683 				    "lm_remove_file_locks(sysid=0x%x)\n",
8684 				    new_sysid));
8685 				(*lm_remove_file_locks)(new_sysid);
8686 			} else {
8687 				struct flock64 flk;
8688 
8689 				/* Release all locks for this client */
8690 				flk.l_type = F_UNLKSYS;
8691 				flk.l_whence = 0;
8692 				flk.l_start = 0;
8693 				flk.l_len = 0;
8694 				flk.l_sysid =
8695 				    sp->rs_owner->ro_client->rc_sysidt;
8696 				flk.l_pid = 0;
8697 				(void) VOP_FRLOCK(sp->rs_finfo->rf_vp, F_SETLK,
8698 				    &flk, F_REMOTELOCK | FREAD | FWRITE,
8699 				    (u_offset_t)0, NULL, CRED(), NULL);
8700 			}
8701 
8702 			sp->rs_owner->ro_client->rc_unlksys_completed = TRUE;
8703 		}
8704 	}
8705 
8706 	/*
8707 	 * Release all locks on this file by this lock owner or at
8708 	 * least mark the locks as having been released
8709 	 */
8710 	for (lsp = list_head(&sp->rs_lostatelist); lsp != NULL;
8711 	    lsp = list_next(&sp->rs_lostatelist, lsp)) {
8712 		lsp->rls_locks_cleaned = TRUE;
8713 
8714 		/* Was this already taken care of above? */
8715 		if (!close_of_client &&
8716 		    sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID)
8717 			(void) cleanlocks(sp->rs_finfo->rf_vp,
8718 			    lsp->rls_locker->rl_pid,
8719 			    lsp->rls_locker->rl_client->rc_sysidt);
8720 	}
8721 
8722 	/*
8723 	 * Release any shrlocks associated with this open state ID.
8724 	 * This must be done before the rfs4_state gets marked closed.
8725 	 */
8726 	if (sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID)
8727 		(void) rfs4_unshare(sp);
8728 
8729 	if (sp->rs_open_access) {
8730 		rfs4_dbe_lock(fp->rf_dbe);
8731 
8732 		/*
8733 		 * Decrement the count for each access and deny bit that this
8734 		 * state has contributed to the file.
8735 		 * If the file counts go to zero
8736 		 * clear the appropriate bit in the appropriate mask.
8737 		 */
8738 		if (sp->rs_open_access & OPEN4_SHARE_ACCESS_READ) {
8739 			fp->rf_access_read--;
8740 			fflags |= FREAD;
8741 			if (fp->rf_access_read == 0)
8742 				fp->rf_share_access &= ~OPEN4_SHARE_ACCESS_READ;
8743 		}
8744 		if (sp->rs_open_access & OPEN4_SHARE_ACCESS_WRITE) {
8745 			fp->rf_access_write--;
8746 			fflags |= FWRITE;
8747 			if (fp->rf_access_write == 0)
8748 				fp->rf_share_access &=
8749 				    ~OPEN4_SHARE_ACCESS_WRITE;
8750 		}
8751 		if (sp->rs_open_deny & OPEN4_SHARE_DENY_READ) {
8752 			fp->rf_deny_read--;
8753 			if (fp->rf_deny_read == 0)
8754 				fp->rf_share_deny &= ~OPEN4_SHARE_DENY_READ;
8755 		}
8756 		if (sp->rs_open_deny & OPEN4_SHARE_DENY_WRITE) {
8757 			fp->rf_deny_write--;
8758 			if (fp->rf_deny_write == 0)
8759 				fp->rf_share_deny &= ~OPEN4_SHARE_DENY_WRITE;
8760 		}
8761 
8762 		(void) VOP_CLOSE(fp->rf_vp, fflags, 1, (offset_t)0, cr, NULL);
8763 
8764 		rfs4_dbe_unlock(fp->rf_dbe);
8765 
8766 		sp->rs_open_access = 0;
8767 		sp->rs_open_deny = 0;
8768 	}
8769 }
8770 
8771 /*
8772  * lock_denied: Fill in a LOCK4deneid structure given an flock64 structure.
8773  */
8774 static nfsstat4
8775 lock_denied(LOCK4denied *dp, struct flock64 *flk)
8776 {
8777 	rfs4_lockowner_t *lo;
8778 	rfs4_client_t *cp;
8779 	uint32_t len;
8780 
8781 	lo = rfs4_findlockowner_by_pid(flk->l_pid);
8782 	if (lo != NULL) {
8783 		cp = lo->rl_client;
8784 		if (rfs4_lease_expired(cp)) {
8785 			rfs4_lockowner_rele(lo);
8786 			rfs4_dbe_hold(cp->rc_dbe);
8787 			rfs4_client_close(cp);
8788 			return (NFS4ERR_EXPIRED);
8789 		}
8790 		dp->owner.clientid = lo->rl_owner.clientid;
8791 		len = lo->rl_owner.owner_len;
8792 		dp->owner.owner_val = kmem_alloc(len, KM_SLEEP);
8793 		bcopy(lo->rl_owner.owner_val, dp->owner.owner_val, len);
8794 		dp->owner.owner_len = len;
8795 		rfs4_lockowner_rele(lo);
8796 		goto finish;
8797 	}
8798 
8799 	/*
8800 	 * Its not a NFS4 lock. We take advantage that the upper 32 bits
8801 	 * of the client id contain the boot time for a NFS4 lock. So we
8802 	 * fabricate and identity by setting clientid to the sysid, and
8803 	 * the lock owner to the pid.
8804 	 */
8805 	dp->owner.clientid = flk->l_sysid;
8806 	len = sizeof (pid_t);
8807 	dp->owner.owner_len = len;
8808 	dp->owner.owner_val = kmem_alloc(len, KM_SLEEP);
8809 	bcopy(&flk->l_pid, dp->owner.owner_val, len);
8810 finish:
8811 	dp->offset = flk->l_start;
8812 	dp->length = flk->l_len;
8813 
8814 	if (flk->l_type == F_RDLCK)
8815 		dp->locktype = READ_LT;
8816 	else if (flk->l_type == F_WRLCK)
8817 		dp->locktype = WRITE_LT;
8818 	else
8819 		return (NFS4ERR_INVAL);	/* no mapping from POSIX ltype to v4 */
8820 
8821 	return (NFS4_OK);
8822 }
8823 
8824 /*
8825  * The NFSv4.0 LOCK operation does not support the blocking lock (at the
8826  * NFSv4.0 protocol level) so the client needs to resend the LOCK request in a
8827  * case the lock is denied by the NFSv4.0 server.  NFSv4.0 clients are prepared
8828  * for that (obviously); they are sending the LOCK requests with some delays
8829  * between the attempts.  See nfs4frlock() and nfs4_block_and_wait() for the
8830  * locking and delay implementation at the client side.
8831  *
8832  * To make the life of the clients easier, the NFSv4.0 server tries to do some
8833  * fast retries on its own (the for loop below) in a hope the lock will be
8834  * available soon.  And if not, the client won't need to resend the LOCK
8835  * requests so fast to check the lock availability.  This basically saves some
8836  * network traffic and tries to make sure the client gets the lock ASAP.
8837  */
8838 static int
8839 setlock(vnode_t *vp, struct flock64 *flock, int flag, cred_t *cred)
8840 {
8841 	int error;
8842 	struct flock64 flk;
8843 	int i;
8844 	clock_t delaytime;
8845 	int cmd;
8846 	int spin_cnt = 0;
8847 
8848 	cmd = nbl_need_check(vp) ? F_SETLK_NBMAND : F_SETLK;
8849 retry:
8850 	delaytime = MSEC_TO_TICK_ROUNDUP(rfs4_lock_delay);
8851 
8852 	for (i = 0; i < rfs4_maxlock_tries; i++) {
8853 		LOCK_PRINT(rfs4_debug, "setlock", cmd, flock);
8854 		error = VOP_FRLOCK(vp, cmd,
8855 		    flock, flag, (u_offset_t)0, NULL, cred, NULL);
8856 
8857 		if (error != EAGAIN && error != EACCES)
8858 			break;
8859 
8860 		if (i < rfs4_maxlock_tries - 1) {
8861 			delay(delaytime);
8862 			delaytime *= 2;
8863 		}
8864 	}
8865 
8866 	if (error == EAGAIN || error == EACCES) {
8867 		/* Get the owner of the lock */
8868 		flk = *flock;
8869 		LOCK_PRINT(rfs4_debug, "setlock", F_GETLK, &flk);
8870 		if (VOP_FRLOCK(vp, F_GETLK, &flk, flag, 0, NULL, cred,
8871 		    NULL) == 0) {
8872 			/*
8873 			 * There's a race inherent in the current VOP_FRLOCK
8874 			 * design where:
8875 			 * a: "other guy" takes a lock that conflicts with a
8876 			 * lock we want
8877 			 * b: we attempt to take our lock (non-blocking) and
8878 			 * the attempt fails.
8879 			 * c: "other guy" releases the conflicting lock
8880 			 * d: we ask what lock conflicts with the lock we want,
8881 			 * getting F_UNLCK (no lock blocks us)
8882 			 *
8883 			 * If we retry the non-blocking lock attempt in this
8884 			 * case (restart at step 'b') there's some possibility
8885 			 * that many such attempts might fail.  However a test
8886 			 * designed to actually provoke this race shows that
8887 			 * the vast majority of cases require no retry, and
8888 			 * only a few took as many as three retries.  Here's
8889 			 * the test outcome:
8890 			 *
8891 			 *	   number of retries    how many times we needed
8892 			 *				that many retries
8893 			 *	   0			79461
8894 			 *	   1			  862
8895 			 *	   2			   49
8896 			 *	   3			    5
8897 			 *
8898 			 * Given those empirical results, we arbitrarily limit
8899 			 * the retry count to ten.
8900 			 *
8901 			 * If we actually make to ten retries and give up,
8902 			 * nothing catastrophic happens, but we're unable to
8903 			 * return the information about the conflicting lock to
8904 			 * the NFS client.  That's an acceptable trade off vs.
8905 			 * letting this retry loop run forever.
8906 			 */
8907 			if (flk.l_type == F_UNLCK) {
8908 				if (spin_cnt++ < 10) {
8909 					/* No longer locked, retry */
8910 					goto retry;
8911 				}
8912 			} else {
8913 				*flock = flk;
8914 				LOCK_PRINT(rfs4_debug, "setlock(blocking lock)",
8915 				    F_GETLK, &flk);
8916 			}
8917 		}
8918 	}
8919 
8920 	return (error);
8921 }
8922 
8923 /*ARGSUSED*/
8924 static nfsstat4
8925 rfs4_do_lock(rfs4_lo_state_t *lsp, nfs_lock_type4 locktype,
8926     offset4 offset, length4 length, cred_t *cred, nfs_resop4 *resop)
8927 {
8928 	nfsstat4 status;
8929 	rfs4_lockowner_t *lo = lsp->rls_locker;
8930 	rfs4_state_t *sp = lsp->rls_state;
8931 	struct flock64 flock;
8932 	int16_t ltype;
8933 	int flag;
8934 	int error;
8935 	sysid_t sysid;
8936 	LOCK4res *lres;
8937 	vnode_t *vp;
8938 
8939 	if (rfs4_lease_expired(lo->rl_client)) {
8940 		return (NFS4ERR_EXPIRED);
8941 	}
8942 
8943 	if ((status = rfs4_client_sysid(lo->rl_client, &sysid)) != NFS4_OK)
8944 		return (status);
8945 
8946 	/* Check for zero length. To lock to end of file use all ones for V4 */
8947 	if (length == 0)
8948 		return (NFS4ERR_INVAL);
8949 	else if (length == (length4)(~0))
8950 		length = 0;		/* Posix to end of file  */
8951 
8952 retry:
8953 	rfs4_dbe_lock(sp->rs_dbe);
8954 	if (sp->rs_closed == TRUE) {
8955 		rfs4_dbe_unlock(sp->rs_dbe);
8956 		return (NFS4ERR_OLD_STATEID);
8957 	}
8958 
8959 	if (resop->resop != OP_LOCKU) {
8960 		switch (locktype) {
8961 		case READ_LT:
8962 		case READW_LT:
8963 			if ((sp->rs_share_access
8964 			    & OPEN4_SHARE_ACCESS_READ) == 0) {
8965 				rfs4_dbe_unlock(sp->rs_dbe);
8966 
8967 				return (NFS4ERR_OPENMODE);
8968 			}
8969 			ltype = F_RDLCK;
8970 			break;
8971 		case WRITE_LT:
8972 		case WRITEW_LT:
8973 			if ((sp->rs_share_access
8974 			    & OPEN4_SHARE_ACCESS_WRITE) == 0) {
8975 				rfs4_dbe_unlock(sp->rs_dbe);
8976 
8977 				return (NFS4ERR_OPENMODE);
8978 			}
8979 			ltype = F_WRLCK;
8980 			break;
8981 		}
8982 	} else
8983 		ltype = F_UNLCK;
8984 
8985 	flock.l_type = ltype;
8986 	flock.l_whence = 0;		/* SEEK_SET */
8987 	flock.l_start = offset;
8988 	flock.l_len = length;
8989 	flock.l_sysid = sysid;
8990 	flock.l_pid = lsp->rls_locker->rl_pid;
8991 
8992 	/* Note that length4 is uint64_t but l_len and l_start are off64_t */
8993 	if (flock.l_len < 0 || flock.l_start < 0) {
8994 		rfs4_dbe_unlock(sp->rs_dbe);
8995 		return (NFS4ERR_INVAL);
8996 	}
8997 
8998 	/*
8999 	 * N.B. FREAD has the same value as OPEN4_SHARE_ACCESS_READ and
9000 	 * FWRITE has the same value as OPEN4_SHARE_ACCESS_WRITE.
9001 	 */
9002 	flag = (int)sp->rs_share_access | F_REMOTELOCK;
9003 
9004 	vp = sp->rs_finfo->rf_vp;
9005 	VN_HOLD(vp);
9006 
9007 	/*
9008 	 * We need to unlock sp before we call the underlying filesystem to
9009 	 * acquire the file lock.
9010 	 */
9011 	rfs4_dbe_unlock(sp->rs_dbe);
9012 
9013 	error = setlock(vp, &flock, flag, cred);
9014 
9015 	/*
9016 	 * Make sure the file is still open.  In a case the file was closed in
9017 	 * the meantime, clean the lock we acquired using the setlock() call
9018 	 * above, and return the appropriate error.
9019 	 */
9020 	rfs4_dbe_lock(sp->rs_dbe);
9021 	if (sp->rs_closed == TRUE) {
9022 		cleanlocks(vp, lsp->rls_locker->rl_pid, sysid);
9023 		rfs4_dbe_unlock(sp->rs_dbe);
9024 
9025 		VN_RELE(vp);
9026 
9027 		return (NFS4ERR_OLD_STATEID);
9028 	}
9029 	rfs4_dbe_unlock(sp->rs_dbe);
9030 
9031 	VN_RELE(vp);
9032 
9033 	if (error == 0) {
9034 		rfs4_dbe_lock(lsp->rls_dbe);
9035 		next_stateid(&lsp->rls_lockid);
9036 		rfs4_dbe_unlock(lsp->rls_dbe);
9037 	}
9038 
9039 	/*
9040 	 * N.B. We map error values to nfsv4 errors. This is differrent
9041 	 * than puterrno4 routine.
9042 	 */
9043 	switch (error) {
9044 	case 0:
9045 		status = NFS4_OK;
9046 		break;
9047 	case EAGAIN:
9048 	case EACCES:		/* Old value */
9049 		/* Can only get here if op is OP_LOCK */
9050 		ASSERT(resop->resop == OP_LOCK);
9051 		lres = &resop->nfs_resop4_u.oplock;
9052 		status = NFS4ERR_DENIED;
9053 		if (lock_denied(&lres->LOCK4res_u.denied, &flock)
9054 		    == NFS4ERR_EXPIRED)
9055 			goto retry;
9056 		break;
9057 	case ENOLCK:
9058 		status = NFS4ERR_DELAY;
9059 		break;
9060 	case EOVERFLOW:
9061 		status = NFS4ERR_INVAL;
9062 		break;
9063 	case EINVAL:
9064 		status = NFS4ERR_NOTSUPP;
9065 		break;
9066 	default:
9067 		status = NFS4ERR_SERVERFAULT;
9068 		break;
9069 	}
9070 
9071 	return (status);
9072 }
9073 
9074 /*ARGSUSED*/
9075 void
9076 rfs4_op_lock(nfs_argop4 *argop, nfs_resop4 *resop,
9077     struct svc_req *req, struct compound_state *cs)
9078 {
9079 	LOCK4args *args = &argop->nfs_argop4_u.oplock;
9080 	LOCK4res *resp = &resop->nfs_resop4_u.oplock;
9081 	nfsstat4 status;
9082 	stateid4 *stateid;
9083 	rfs4_lockowner_t *lo;
9084 	rfs4_client_t *cp;
9085 	rfs4_state_t *sp = NULL;
9086 	rfs4_lo_state_t *lsp = NULL;
9087 	bool_t ls_sw_held = FALSE;
9088 	bool_t create = TRUE;
9089 	bool_t lcreate = TRUE;
9090 	bool_t dup_lock = FALSE;
9091 	int rc;
9092 
9093 	DTRACE_NFSV4_2(op__lock__start, struct compound_state *, cs,
9094 	    LOCK4args *, args);
9095 
9096 	if (cs->vp == NULL) {
9097 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
9098 		DTRACE_NFSV4_2(op__lock__done, struct compound_state *,
9099 		    cs, LOCK4res *, resp);
9100 		return;
9101 	}
9102 
9103 	if (args->locker.new_lock_owner) {
9104 		/* Create a new lockowner for this instance */
9105 		open_to_lock_owner4 *olo = &args->locker.locker4_u.open_owner;
9106 
9107 		NFS4_DEBUG(rfs4_debug, (CE_NOTE, "Creating new lock owner"));
9108 
9109 		stateid = &olo->open_stateid;
9110 		status = rfs4_get_state(stateid, &sp, RFS4_DBS_VALID);
9111 		if (status != NFS4_OK) {
9112 			NFS4_DEBUG(rfs4_debug,
9113 			    (CE_NOTE, "Get state failed in lock %d", status));
9114 			*cs->statusp = resp->status = status;
9115 			DTRACE_NFSV4_2(op__lock__done, struct compound_state *,
9116 			    cs, LOCK4res *, resp);
9117 			return;
9118 		}
9119 
9120 		/* Ensure specified filehandle matches */
9121 		if (cs->vp != sp->rs_finfo->rf_vp) {
9122 			rfs4_state_rele(sp);
9123 			*cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
9124 			DTRACE_NFSV4_2(op__lock__done, struct compound_state *,
9125 			    cs, LOCK4res *, resp);
9126 			return;
9127 		}
9128 
9129 		/* hold off other access to open_owner while we tinker */
9130 		rfs4_sw_enter(&sp->rs_owner->ro_sw);
9131 
9132 		switch (rc = rfs4_check_stateid_seqid(sp, stateid, cs)) {
9133 		case NFS4_CHECK_STATEID_OLD:
9134 			*cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
9135 			goto end;
9136 		case NFS4_CHECK_STATEID_BAD:
9137 			*cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
9138 			goto end;
9139 		case NFS4_CHECK_STATEID_EXPIRED:
9140 			*cs->statusp = resp->status = NFS4ERR_EXPIRED;
9141 			goto end;
9142 		case NFS4_CHECK_STATEID_UNCONFIRMED:
9143 			*cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
9144 			goto end;
9145 		case NFS4_CHECK_STATEID_CLOSED:
9146 			*cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
9147 			goto end;
9148 		case NFS4_CHECK_STATEID_OKAY:
9149 			if (rfs4_has_session(cs))
9150 				break;
9151 			/* FALLTHROUGH */
9152 		case NFS4_CHECK_STATEID_REPLAY:
9153 			ASSERT(!rfs4_has_session(cs));
9154 
9155 			switch (rfs4_check_olo_seqid(olo->open_seqid,
9156 			    sp->rs_owner, resop)) {
9157 			case NFS4_CHKSEQ_OKAY:
9158 				if (rc == NFS4_CHECK_STATEID_OKAY)
9159 					break;
9160 				/*
9161 				 * This is replayed stateid; if seqid
9162 				 * matches next expected, then client
9163 				 * is using wrong seqid.
9164 				 */
9165 				/* FALLTHROUGH */
9166 			case NFS4_CHKSEQ_BAD:
9167 				*cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
9168 				goto end;
9169 			case NFS4_CHKSEQ_REPLAY:
9170 				/* This is a duplicate LOCK request */
9171 				dup_lock = TRUE;
9172 
9173 				/*
9174 				 * For a duplicate we do not want to
9175 				 * create a new lockowner as it should
9176 				 * already exist.
9177 				 * Turn off the lockowner create flag.
9178 				 */
9179 				lcreate = FALSE;
9180 			}
9181 			break;
9182 		}
9183 
9184 		lo = rfs4_findlockowner(&olo->lock_owner, &lcreate);
9185 		if (lo == NULL) {
9186 			NFS4_DEBUG(rfs4_debug,
9187 			    (CE_NOTE, "rfs4_op_lock: no lock owner"));
9188 			*cs->statusp = resp->status = NFS4ERR_RESOURCE;
9189 			goto end;
9190 		}
9191 
9192 		lsp = rfs4_findlo_state_by_owner(lo, sp, &create);
9193 		if (lsp == NULL) {
9194 			rfs4_update_lease(sp->rs_owner->ro_client);
9195 			/*
9196 			 * Only update theh open_seqid if this is not
9197 			 * a duplicate request
9198 			 */
9199 			if (dup_lock == FALSE) {
9200 				rfs4_update_open_sequence(sp->rs_owner);
9201 			}
9202 
9203 			NFS4_DEBUG(rfs4_debug,
9204 			    (CE_NOTE, "rfs4_op_lock: no state"));
9205 			*cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
9206 			rfs4_update_open_resp(sp->rs_owner, resop, NULL);
9207 			rfs4_lockowner_rele(lo);
9208 			goto end;
9209 		}
9210 
9211 		/*
9212 		 * This is the new_lock_owner branch and the client is
9213 		 * supposed to be associating a new lock_owner with
9214 		 * the open file at this point.  If we find that a
9215 		 * lock_owner/state association already exists and a
9216 		 * successful LOCK request was returned to the client,
9217 		 * an error is returned to the client since this is
9218 		 * not appropriate.  The client should be using the
9219 		 * existing lock_owner branch.
9220 		 */
9221 		if (!rfs4_has_session(cs) && !dup_lock && !create) {
9222 			if (lsp->rls_lock_completed == TRUE) {
9223 				*cs->statusp =
9224 				    resp->status = NFS4ERR_BAD_SEQID;
9225 				rfs4_lockowner_rele(lo);
9226 				goto end;
9227 			}
9228 		}
9229 
9230 		rfs4_update_lease(sp->rs_owner->ro_client);
9231 
9232 		/*
9233 		 * Only update theh open_seqid if this is not
9234 		 * a duplicate request
9235 		 */
9236 		if (dup_lock == FALSE) {
9237 			rfs4_update_open_sequence(sp->rs_owner);
9238 		}
9239 
9240 		/*
9241 		 * If this is a duplicate lock request, just copy the
9242 		 * previously saved reply and return.
9243 		 */
9244 		if (dup_lock == TRUE) {
9245 			/* verify that lock_seqid's match */
9246 			if (lsp->rls_seqid != olo->lock_seqid) {
9247 				NFS4_DEBUG(rfs4_debug,
9248 				    (CE_NOTE, "rfs4_op_lock: Dup-Lock seqid bad"
9249 				    "lsp->seqid=%d old->seqid=%d",
9250 				    lsp->rls_seqid, olo->lock_seqid));
9251 				*cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
9252 			} else {
9253 				rfs4_copy_reply(resop, &lsp->rls_reply);
9254 				/*
9255 				 * Make sure to copy the just
9256 				 * retrieved reply status into the
9257 				 * overall compound status
9258 				 */
9259 				*cs->statusp = resp->status;
9260 			}
9261 			rfs4_lockowner_rele(lo);
9262 			goto end;
9263 		}
9264 
9265 		rfs4_dbe_lock(lsp->rls_dbe);
9266 
9267 		/* Make sure to update the lock sequence id */
9268 		lsp->rls_seqid = olo->lock_seqid;
9269 
9270 		NFS4_DEBUG(rfs4_debug,
9271 		    (CE_NOTE, "Lock seqid established as %d", lsp->rls_seqid));
9272 
9273 		/*
9274 		 * This is used to signify the newly created lockowner
9275 		 * stateid and its sequence number.  The checks for
9276 		 * sequence number and increment don't occur on the
9277 		 * very first lock request for a lockowner.
9278 		 */
9279 		lsp->rls_skip_seqid_check = TRUE;
9280 
9281 		/* hold off other access to lsp while we tinker */
9282 		rfs4_sw_enter(&lsp->rls_sw);
9283 		ls_sw_held = TRUE;
9284 
9285 		rfs4_dbe_unlock(lsp->rls_dbe);
9286 
9287 		rfs4_lockowner_rele(lo);
9288 	} else {
9289 		stateid = &args->locker.locker4_u.lock_owner.lock_stateid;
9290 		/* get lsp and hold the lock on the underlying file struct */
9291 		if ((status = rfs4_get_lo_state(stateid, &lsp, TRUE))
9292 		    != NFS4_OK) {
9293 			*cs->statusp = resp->status = status;
9294 			DTRACE_NFSV4_2(op__lock__done, struct compound_state *,
9295 			    cs, LOCK4res *, resp);
9296 			return;
9297 		}
9298 		create = FALSE;	/* We didn't create lsp */
9299 
9300 		/* Ensure specified filehandle matches */
9301 		if (cs->vp != lsp->rls_state->rs_finfo->rf_vp) {
9302 			rfs4_lo_state_rele(lsp, TRUE);
9303 			*cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
9304 			DTRACE_NFSV4_2(op__lock__done, struct compound_state *,
9305 			    cs, LOCK4res *, resp);
9306 			return;
9307 		}
9308 
9309 		/* hold off other access to lsp while we tinker */
9310 		rfs4_sw_enter(&lsp->rls_sw);
9311 		ls_sw_held = TRUE;
9312 
9313 		switch (rfs4_check_lo_stateid_seqid(lsp, stateid, cs)) {
9314 		/*
9315 		 * The stateid looks like it was okay (expected to be
9316 		 * the next one)
9317 		 */
9318 		case NFS4_CHECK_STATEID_OKAY:
9319 			if (rfs4_has_session(cs))
9320 				break;
9321 
9322 			/*
9323 			 * The sequence id is now checked.  Determine
9324 			 * if this is a replay or if it is in the
9325 			 * expected (next) sequence.  In the case of a
9326 			 * replay, there are two replay conditions
9327 			 * that may occur.  The first is the normal
9328 			 * condition where a LOCK is done with a
9329 			 * NFS4_OK response and the stateid is
9330 			 * updated.  That case is handled below when
9331 			 * the stateid is identified as a REPLAY.  The
9332 			 * second is the case where an error is
9333 			 * returned, like NFS4ERR_DENIED, and the
9334 			 * sequence number is updated but the stateid
9335 			 * is not updated.  This second case is dealt
9336 			 * with here.  So it may seem odd that the
9337 			 * stateid is okay but the sequence id is a
9338 			 * replay but it is okay.
9339 			 */
9340 			switch (rfs4_check_lock_seqid(
9341 			    args->locker.locker4_u.lock_owner.lock_seqid,
9342 			    lsp, resop)) {
9343 			case NFS4_CHKSEQ_REPLAY:
9344 				if (resp->status != NFS4_OK) {
9345 					/*
9346 					 * Here is our replay and need
9347 					 * to verify that the last
9348 					 * response was an error.
9349 					 */
9350 					*cs->statusp = resp->status;
9351 					goto end;
9352 				}
9353 				/*
9354 				 * This is done since the sequence id
9355 				 * looked like a replay but it didn't
9356 				 * pass our check so a BAD_SEQID is
9357 				 * returned as a result.
9358 				 */
9359 				/*FALLTHROUGH*/
9360 			case NFS4_CHKSEQ_BAD:
9361 				*cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
9362 				goto end;
9363 			case NFS4_CHKSEQ_OKAY:
9364 				/* Everything looks okay move ahead */
9365 				break;
9366 			}
9367 			break;
9368 		case NFS4_CHECK_STATEID_OLD:
9369 			*cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
9370 			goto end;
9371 		case NFS4_CHECK_STATEID_BAD:
9372 			*cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
9373 			goto end;
9374 		case NFS4_CHECK_STATEID_EXPIRED:
9375 			*cs->statusp = resp->status = NFS4ERR_EXPIRED;
9376 			goto end;
9377 		case NFS4_CHECK_STATEID_CLOSED:
9378 			*cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
9379 			goto end;
9380 		case NFS4_CHECK_STATEID_REPLAY:
9381 			ASSERT(!rfs4_has_session(cs));
9382 
9383 			switch (rfs4_check_lock_seqid(
9384 			    args->locker.locker4_u.lock_owner.lock_seqid,
9385 			    lsp, resop)) {
9386 			case NFS4_CHKSEQ_OKAY:
9387 				/*
9388 				 * This is a replayed stateid; if
9389 				 * seqid matches the next expected,
9390 				 * then client is using wrong seqid.
9391 				 */
9392 			case NFS4_CHKSEQ_BAD:
9393 				*cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
9394 				goto end;
9395 			case NFS4_CHKSEQ_REPLAY:
9396 				rfs4_update_lease(lsp->rls_locker->rl_client);
9397 				*cs->statusp = status = resp->status;
9398 				goto end;
9399 			}
9400 			break;
9401 		default:
9402 			ASSERT(FALSE);
9403 			break;
9404 		}
9405 
9406 		rfs4_update_lock_sequence(lsp);
9407 		rfs4_update_lease(lsp->rls_locker->rl_client);
9408 	}
9409 
9410 	/*
9411 	 * NFS4 only allows locking on regular files, so
9412 	 * verify type of object.
9413 	 */
9414 	if (cs->vp->v_type != VREG) {
9415 		if (cs->vp->v_type == VDIR)
9416 			status = NFS4ERR_ISDIR;
9417 		else
9418 			status = NFS4ERR_INVAL;
9419 		goto out;
9420 	}
9421 
9422 	cp = lsp->rls_state->rs_owner->ro_client;
9423 
9424 	if (rfs4_clnt_in_grace(cp) && !args->reclaim) {
9425 		status = NFS4ERR_GRACE;
9426 		goto out;
9427 	}
9428 
9429 	if (rfs4_clnt_in_grace(cp) && args->reclaim && !cp->rc_can_reclaim) {
9430 		status = NFS4ERR_NO_GRACE;
9431 		goto out;
9432 	}
9433 
9434 	if (!rfs4_clnt_in_grace(cp) && args->reclaim) {
9435 		status = NFS4ERR_NO_GRACE;
9436 		goto out;
9437 	}
9438 
9439 	if (lsp->rls_state->rs_finfo->rf_dinfo.rd_dtype == OPEN_DELEGATE_WRITE)
9440 		cs->deleg = TRUE;
9441 
9442 	status = rfs4_do_lock(lsp, args->locktype,
9443 	    args->offset, args->length, cs->cr, resop);
9444 
9445 out:
9446 	lsp->rls_skip_seqid_check = FALSE;
9447 
9448 	*cs->statusp = resp->status = status;
9449 
9450 	if (status == NFS4_OK) {
9451 		resp->LOCK4res_u.lock_stateid = lsp->rls_lockid.stateid;
9452 		lsp->rls_lock_completed = TRUE;
9453 	}
9454 	/*
9455 	 * Only update the "OPEN" response here if this was a new
9456 	 * lock_owner
9457 	 */
9458 	if (sp)
9459 		rfs4_update_open_resp(sp->rs_owner, resop, NULL);
9460 
9461 	rfs4_update_lock_resp(lsp, resop);
9462 
9463 end:
9464 	if (lsp) {
9465 		if (ls_sw_held)
9466 			rfs4_sw_exit(&lsp->rls_sw);
9467 		/*
9468 		 * If an sp obtained, then the lsp does not represent
9469 		 * a lock on the file struct.
9470 		 */
9471 		if (sp != NULL)
9472 			rfs4_lo_state_rele(lsp, FALSE);
9473 		else
9474 			rfs4_lo_state_rele(lsp, TRUE);
9475 	}
9476 	if (sp) {
9477 		rfs4_sw_exit(&sp->rs_owner->ro_sw);
9478 		rfs4_state_rele(sp);
9479 	}
9480 
9481 	DTRACE_NFSV4_2(op__lock__done, struct compound_state *, cs,
9482 	    LOCK4res *, resp);
9483 }
9484 
9485 /* free function for LOCK/LOCKT */
9486 static void
9487 lock_denied_free(nfs_resop4 *resop)
9488 {
9489 	LOCK4denied *dp = NULL;
9490 
9491 	switch (resop->resop) {
9492 	case OP_LOCK:
9493 		if (resop->nfs_resop4_u.oplock.status == NFS4ERR_DENIED)
9494 			dp = &resop->nfs_resop4_u.oplock.LOCK4res_u.denied;
9495 		break;
9496 	case OP_LOCKT:
9497 		if (resop->nfs_resop4_u.oplockt.status == NFS4ERR_DENIED)
9498 			dp = &resop->nfs_resop4_u.oplockt.denied;
9499 		break;
9500 	default:
9501 		break;
9502 	}
9503 
9504 	if (dp)
9505 		kmem_free(dp->owner.owner_val, dp->owner.owner_len);
9506 }
9507 
9508 /*ARGSUSED*/
9509 void
9510 rfs4_op_locku(nfs_argop4 *argop, nfs_resop4 *resop,
9511     struct svc_req *req, struct compound_state *cs)
9512 {
9513 	LOCKU4args *args = &argop->nfs_argop4_u.oplocku;
9514 	LOCKU4res *resp = &resop->nfs_resop4_u.oplocku;
9515 	nfsstat4 status;
9516 	stateid4 *stateid = &args->lock_stateid;
9517 	rfs4_lo_state_t *lsp;
9518 
9519 	DTRACE_NFSV4_2(op__locku__start, struct compound_state *, cs,
9520 	    LOCKU4args *, args);
9521 
9522 	if (cs->vp == NULL) {
9523 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
9524 		DTRACE_NFSV4_2(op__locku__done, struct compound_state *, cs,
9525 		    LOCKU4res *, resp);
9526 		return;
9527 	}
9528 
9529 	if ((status = rfs4_get_lo_state(stateid, &lsp, TRUE)) != NFS4_OK) {
9530 		*cs->statusp = resp->status = status;
9531 		DTRACE_NFSV4_2(op__locku__done, struct compound_state *, cs,
9532 		    LOCKU4res *, resp);
9533 		return;
9534 	}
9535 
9536 	/* Ensure specified filehandle matches */
9537 	if (cs->vp != lsp->rls_state->rs_finfo->rf_vp) {
9538 		rfs4_lo_state_rele(lsp, TRUE);
9539 		*cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
9540 		DTRACE_NFSV4_2(op__locku__done, struct compound_state *, cs,
9541 		    LOCKU4res *, resp);
9542 		return;
9543 	}
9544 
9545 	/* hold off other access to lsp while we tinker */
9546 	rfs4_sw_enter(&lsp->rls_sw);
9547 
9548 	switch (rfs4_check_lo_stateid_seqid(lsp, stateid, cs)) {
9549 	case NFS4_CHECK_STATEID_OKAY:
9550 		if (rfs4_has_session(cs))
9551 			break;
9552 
9553 		if (rfs4_check_lock_seqid(args->seqid, lsp, resop)
9554 		    != NFS4_CHKSEQ_OKAY) {
9555 			*cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
9556 			goto end;
9557 		}
9558 		break;
9559 	case NFS4_CHECK_STATEID_OLD:
9560 		*cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
9561 		goto end;
9562 	case NFS4_CHECK_STATEID_BAD:
9563 		*cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
9564 		goto end;
9565 	case NFS4_CHECK_STATEID_EXPIRED:
9566 		*cs->statusp = resp->status = NFS4ERR_EXPIRED;
9567 		goto end;
9568 	case NFS4_CHECK_STATEID_CLOSED:
9569 		*cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
9570 		goto end;
9571 	case NFS4_CHECK_STATEID_REPLAY:
9572 		ASSERT(!rfs4_has_session(cs));
9573 
9574 		switch (rfs4_check_lock_seqid(args->seqid, lsp, resop)) {
9575 		case NFS4_CHKSEQ_OKAY:
9576 				/*
9577 				 * This is a replayed stateid; if
9578 				 * seqid matches the next expected,
9579 				 * then client is using wrong seqid.
9580 				 */
9581 		case NFS4_CHKSEQ_BAD:
9582 			*cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
9583 			goto end;
9584 		case NFS4_CHKSEQ_REPLAY:
9585 			rfs4_update_lease(lsp->rls_locker->rl_client);
9586 			*cs->statusp = status = resp->status;
9587 			goto end;
9588 		}
9589 		break;
9590 	default:
9591 		ASSERT(FALSE);
9592 		break;
9593 	}
9594 
9595 	rfs4_update_lock_sequence(lsp);
9596 	rfs4_update_lease(lsp->rls_locker->rl_client);
9597 
9598 	/*
9599 	 * NFS4 only allows locking on regular files, so
9600 	 * verify type of object.
9601 	 */
9602 	if (cs->vp->v_type != VREG) {
9603 		if (cs->vp->v_type == VDIR)
9604 			status = NFS4ERR_ISDIR;
9605 		else
9606 			status = NFS4ERR_INVAL;
9607 		goto out;
9608 	}
9609 
9610 	if (rfs4_clnt_in_grace(lsp->rls_state->rs_owner->ro_client)) {
9611 		status = NFS4ERR_GRACE;
9612 		goto out;
9613 	}
9614 
9615 	status = rfs4_do_lock(lsp, args->locktype,
9616 	    args->offset, args->length, cs->cr, resop);
9617 
9618 out:
9619 	*cs->statusp = resp->status = status;
9620 
9621 	if (status == NFS4_OK)
9622 		resp->lock_stateid = lsp->rls_lockid.stateid;
9623 
9624 	rfs4_update_lock_resp(lsp, resop);
9625 
9626 end:
9627 	rfs4_sw_exit(&lsp->rls_sw);
9628 	rfs4_lo_state_rele(lsp, TRUE);
9629 
9630 	DTRACE_NFSV4_2(op__locku__done, struct compound_state *, cs,
9631 	    LOCKU4res *, resp);
9632 }
9633 
9634 /*
9635  * LOCKT is a best effort routine, the client can not be guaranteed that
9636  * the status return is still in effect by the time the reply is received.
9637  * They are numerous race conditions in this routine, but we are not required
9638  * and can not be accurate.
9639  */
9640 /*ARGSUSED*/
9641 void
9642 rfs4_op_lockt(nfs_argop4 *argop, nfs_resop4 *resop,
9643     struct svc_req *req, struct compound_state *cs)
9644 {
9645 	LOCKT4args *args = &argop->nfs_argop4_u.oplockt;
9646 	LOCKT4res *resp = &resop->nfs_resop4_u.oplockt;
9647 	rfs4_lockowner_t *lo;
9648 	rfs4_client_t *cp;
9649 	bool_t create = FALSE;
9650 	struct flock64 flk;
9651 	int error;
9652 	int flag = FREAD | FWRITE;
9653 	int ltype;
9654 	length4 posix_length;
9655 	sysid_t sysid;
9656 	pid_t pid;
9657 
9658 	DTRACE_NFSV4_2(op__lockt__start, struct compound_state *, cs,
9659 	    LOCKT4args *, args);
9660 
9661 	if (cs->vp == NULL) {
9662 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
9663 		goto out;
9664 	}
9665 
9666 	/*
9667 	 * NFS4 only allows locking on regular files, so
9668 	 * verify type of object.
9669 	 */
9670 	if (cs->vp->v_type != VREG) {
9671 		if (cs->vp->v_type == VDIR)
9672 			*cs->statusp = resp->status = NFS4ERR_ISDIR;
9673 		else
9674 			*cs->statusp = resp->status =  NFS4ERR_INVAL;
9675 		goto out;
9676 	}
9677 
9678 	/*
9679 	 * Check out the clientid to ensure the server knows about it
9680 	 * so that we correctly inform the client of a server reboot.
9681 	 */
9682 	if ((cp = rfs4_findclient_by_id(args->owner.clientid, FALSE))
9683 	    == NULL) {
9684 		*cs->statusp = resp->status =
9685 		    rfs4_check_clientid(&args->owner.clientid, 0);
9686 		goto out;
9687 	}
9688 	if (rfs4_lease_expired(cp)) {
9689 		rfs4_client_close(cp);
9690 		/*
9691 		 * Protocol doesn't allow returning NFS4ERR_STALE as
9692 		 * other operations do on this check so STALE_CLIENTID
9693 		 * is returned instead
9694 		 */
9695 		*cs->statusp = resp->status = NFS4ERR_STALE_CLIENTID;
9696 		goto out;
9697 	}
9698 
9699 	if (rfs4_clnt_in_grace(cp) && !(cp->rc_can_reclaim)) {
9700 		*cs->statusp = resp->status = NFS4ERR_GRACE;
9701 		rfs4_client_rele(cp);
9702 		goto out;
9703 	}
9704 	rfs4_client_rele(cp);
9705 
9706 	resp->status = NFS4_OK;
9707 
9708 	switch (args->locktype) {
9709 	case READ_LT:
9710 	case READW_LT:
9711 		ltype = F_RDLCK;
9712 		break;
9713 	case WRITE_LT:
9714 	case WRITEW_LT:
9715 		ltype = F_WRLCK;
9716 		break;
9717 	}
9718 
9719 	posix_length = args->length;
9720 	/* Check for zero length. To lock to end of file use all ones for V4 */
9721 	if (posix_length == 0) {
9722 		*cs->statusp = resp->status = NFS4ERR_INVAL;
9723 		goto out;
9724 	} else if (posix_length == (length4)(~0)) {
9725 		posix_length = 0;	/* Posix to end of file  */
9726 	}
9727 
9728 	/* Find or create a lockowner */
9729 	lo = rfs4_findlockowner(&args->owner, &create);
9730 
9731 	if (lo) {
9732 		pid = lo->rl_pid;
9733 		if ((resp->status =
9734 		    rfs4_client_sysid(lo->rl_client, &sysid)) != NFS4_OK)
9735 			goto err;
9736 	} else {
9737 		pid = 0;
9738 		sysid = lockt_sysid;
9739 	}
9740 retry:
9741 	flk.l_type = ltype;
9742 	flk.l_whence = 0;		/* SEEK_SET */
9743 	flk.l_start = args->offset;
9744 	flk.l_len = posix_length;
9745 	flk.l_sysid = sysid;
9746 	flk.l_pid = pid;
9747 	flag |= F_REMOTELOCK;
9748 
9749 	LOCK_PRINT(rfs4_debug, "rfs4_op_lockt", F_GETLK, &flk);
9750 
9751 	/* Note that length4 is uint64_t but l_len and l_start are off64_t */
9752 	if (flk.l_len < 0 || flk.l_start < 0) {
9753 		resp->status = NFS4ERR_INVAL;
9754 		goto err;
9755 	}
9756 	error = VOP_FRLOCK(cs->vp, F_GETLK, &flk, flag, (u_offset_t)0,
9757 	    NULL, cs->cr, NULL);
9758 
9759 	/*
9760 	 * N.B. We map error values to nfsv4 errors. This is differrent
9761 	 * than puterrno4 routine.
9762 	 */
9763 	switch (error) {
9764 	case 0:
9765 		if (flk.l_type == F_UNLCK)
9766 			resp->status = NFS4_OK;
9767 		else {
9768 			if (lock_denied(&resp->denied, &flk) == NFS4ERR_EXPIRED)
9769 				goto retry;
9770 			resp->status = NFS4ERR_DENIED;
9771 		}
9772 		break;
9773 	case EOVERFLOW:
9774 		resp->status = NFS4ERR_INVAL;
9775 		break;
9776 	case EINVAL:
9777 		resp->status = NFS4ERR_NOTSUPP;
9778 		break;
9779 	default:
9780 		cmn_err(CE_WARN, "rfs4_op_lockt: unexpected errno (%d)",
9781 		    error);
9782 		resp->status = NFS4ERR_SERVERFAULT;
9783 		break;
9784 	}
9785 
9786 err:
9787 	if (lo)
9788 		rfs4_lockowner_rele(lo);
9789 	*cs->statusp = resp->status;
9790 out:
9791 	DTRACE_NFSV4_2(op__lockt__done, struct compound_state *, cs,
9792 	    LOCKT4res *, resp);
9793 }
9794 
9795 int
9796 rfs4_share(rfs4_state_t *sp, uint32_t access, uint32_t deny)
9797 {
9798 	int err;
9799 	int cmd;
9800 	vnode_t *vp;
9801 	struct shrlock shr;
9802 	struct shr_locowner shr_loco;
9803 	int fflags = 0;
9804 
9805 	ASSERT(rfs4_dbe_islocked(sp->rs_dbe));
9806 	ASSERT(sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID);
9807 
9808 	if (sp->rs_closed)
9809 		return (NFS4ERR_OLD_STATEID);
9810 
9811 	vp = sp->rs_finfo->rf_vp;
9812 	ASSERT(vp);
9813 
9814 	shr.s_access = shr.s_deny = 0;
9815 
9816 	if (access & OPEN4_SHARE_ACCESS_READ) {
9817 		fflags |= FREAD;
9818 		shr.s_access |= F_RDACC;
9819 	}
9820 	if (access & OPEN4_SHARE_ACCESS_WRITE) {
9821 		fflags |= FWRITE;
9822 		shr.s_access |= F_WRACC;
9823 	}
9824 	ASSERT(shr.s_access);
9825 
9826 	if (deny & OPEN4_SHARE_DENY_READ)
9827 		shr.s_deny |= F_RDDNY;
9828 	if (deny & OPEN4_SHARE_DENY_WRITE)
9829 		shr.s_deny |= F_WRDNY;
9830 
9831 	shr.s_pid = rfs4_dbe_getid(sp->rs_owner->ro_dbe);
9832 	shr.s_sysid = sp->rs_owner->ro_client->rc_sysidt;
9833 	shr_loco.sl_pid = shr.s_pid;
9834 	shr_loco.sl_id = shr.s_sysid;
9835 	shr.s_owner = (caddr_t)&shr_loco;
9836 	shr.s_own_len = sizeof (shr_loco);
9837 
9838 	cmd = nbl_need_check(vp) ? F_SHARE_NBMAND : F_SHARE;
9839 
9840 	err = VOP_SHRLOCK(vp, cmd, &shr, fflags, CRED(), NULL);
9841 	if (err != 0) {
9842 		if (err == EAGAIN)
9843 			err = NFS4ERR_SHARE_DENIED;
9844 		else
9845 			err = puterrno4(err);
9846 		return (err);
9847 	}
9848 
9849 	sp->rs_share_access |= access;
9850 	sp->rs_share_deny |= deny;
9851 
9852 	return (0);
9853 }
9854 
9855 int
9856 rfs4_unshare(rfs4_state_t *sp)
9857 {
9858 	int err;
9859 	struct shrlock shr;
9860 	struct shr_locowner shr_loco;
9861 
9862 	ASSERT(rfs4_dbe_islocked(sp->rs_dbe));
9863 
9864 	if (sp->rs_closed || sp->rs_share_access == 0)
9865 		return (0);
9866 
9867 	ASSERT(sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID);
9868 	ASSERT(sp->rs_finfo->rf_vp);
9869 
9870 	shr.s_access = shr.s_deny = 0;
9871 	shr.s_pid = rfs4_dbe_getid(sp->rs_owner->ro_dbe);
9872 	shr.s_sysid = sp->rs_owner->ro_client->rc_sysidt;
9873 	shr_loco.sl_pid = shr.s_pid;
9874 	shr_loco.sl_id = shr.s_sysid;
9875 	shr.s_owner = (caddr_t)&shr_loco;
9876 	shr.s_own_len = sizeof (shr_loco);
9877 
9878 	err = VOP_SHRLOCK(sp->rs_finfo->rf_vp, F_UNSHARE, &shr, 0, CRED(),
9879 	    NULL);
9880 	if (err != 0) {
9881 		err = puterrno4(err);
9882 		return (err);
9883 	}
9884 
9885 	sp->rs_share_access = 0;
9886 	sp->rs_share_deny = 0;
9887 
9888 	return (0);
9889 
9890 }
9891 
9892 static int
9893 rdma_setup_read_data4(READ4args *args, READ4res *rok)
9894 {
9895 	struct clist	*wcl;
9896 	count4		count = rok->data_len;
9897 	int		wlist_len;
9898 
9899 	wcl = args->wlist;
9900 	if (rdma_setup_read_chunks(wcl, count, &wlist_len) == FALSE) {
9901 		return (FALSE);
9902 	}
9903 	wcl = args->wlist;
9904 	rok->wlist_len = wlist_len;
9905 	rok->wlist = wcl;
9906 	return (TRUE);
9907 }
9908 
9909 /* tunable to disable server referrals */
9910 int rfs4_no_referrals = 0;
9911 
9912 /*
9913  * Find an NFS record in reparse point data.
9914  * Returns 0 for success and <0 or an errno value on failure.
9915  */
9916 int
9917 vn_find_nfs_record(vnode_t *vp, nvlist_t **nvlp, char **svcp, char **datap)
9918 {
9919 	int err;
9920 	char *stype, *val;
9921 	nvlist_t *nvl;
9922 	nvpair_t *curr;
9923 
9924 	if ((nvl = reparse_init()) == NULL)
9925 		return (-1);
9926 
9927 	if ((err = reparse_vnode_parse(vp, nvl)) != 0) {
9928 		reparse_free(nvl);
9929 		return (err);
9930 	}
9931 
9932 	curr = NULL;
9933 	while ((curr = nvlist_next_nvpair(nvl, curr)) != NULL) {
9934 		if ((stype = nvpair_name(curr)) == NULL) {
9935 			reparse_free(nvl);
9936 			return (-2);
9937 		}
9938 		if (strncasecmp(stype, "NFS", 3) == 0)
9939 			break;
9940 	}
9941 
9942 	if ((curr == NULL) ||
9943 	    (nvpair_value_string(curr, &val))) {
9944 		reparse_free(nvl);
9945 		return (-3);
9946 	}
9947 	*nvlp = nvl;
9948 	*svcp = stype;
9949 	*datap = val;
9950 	return (0);
9951 }
9952 
9953 int
9954 vn_is_nfs_reparse(vnode_t *vp, cred_t *cr)
9955 {
9956 	nvlist_t *nvl;
9957 	char *s, *d;
9958 
9959 	if (rfs4_no_referrals != 0)
9960 		return (B_FALSE);
9961 
9962 	if (vn_is_reparse(vp, cr, NULL) == B_FALSE)
9963 		return (B_FALSE);
9964 
9965 	if (vn_find_nfs_record(vp, &nvl, &s, &d) != 0)
9966 		return (B_FALSE);
9967 
9968 	reparse_free(nvl);
9969 
9970 	return (B_TRUE);
9971 }
9972 
9973 /*
9974  * There is a user-level copy of this routine in ref_subr.c.
9975  * Changes should be kept in sync.
9976  */
9977 static int
9978 nfs4_create_components(char *path, component4 *comp4)
9979 {
9980 	int slen, plen, ncomp;
9981 	char *ori_path, *nxtc, buf[MAXNAMELEN];
9982 
9983 	if (path == NULL)
9984 		return (0);
9985 
9986 	plen = strlen(path) + 1;	/* include the terminator */
9987 	ori_path = path;
9988 	ncomp = 0;
9989 
9990 	/* count number of components in the path */
9991 	for (nxtc = path; nxtc < ori_path + plen; nxtc++) {
9992 		if (*nxtc == '/' || *nxtc == '\0' || *nxtc == '\n') {
9993 			if ((slen = nxtc - path) == 0) {
9994 				path = nxtc + 1;
9995 				continue;
9996 			}
9997 
9998 			if (comp4 != NULL) {
9999 				bcopy(path, buf, slen);
10000 				buf[slen] = '\0';
10001 				(void) str_to_utf8(buf, &comp4[ncomp]);
10002 			}
10003 
10004 			ncomp++;	/* 1 valid component */
10005 			path = nxtc + 1;
10006 		}
10007 		if (*nxtc == '\0' || *nxtc == '\n')
10008 			break;
10009 	}
10010 
10011 	return (ncomp);
10012 }
10013 
10014 /*
10015  * There is a user-level copy of this routine in ref_subr.c.
10016  * Changes should be kept in sync.
10017  */
10018 static int
10019 make_pathname4(char *path, pathname4 *pathname)
10020 {
10021 	int ncomp;
10022 	component4 *comp4;
10023 
10024 	if (pathname == NULL)
10025 		return (0);
10026 
10027 	if (path == NULL) {
10028 		pathname->pathname4_val = NULL;
10029 		pathname->pathname4_len = 0;
10030 		return (0);
10031 	}
10032 
10033 	/* count number of components to alloc buffer */
10034 	if ((ncomp = nfs4_create_components(path, NULL)) == 0) {
10035 		pathname->pathname4_val = NULL;
10036 		pathname->pathname4_len = 0;
10037 		return (0);
10038 	}
10039 	comp4 = kmem_zalloc(ncomp * sizeof (component4), KM_SLEEP);
10040 
10041 	/* copy components into allocated buffer */
10042 	ncomp = nfs4_create_components(path, comp4);
10043 
10044 	pathname->pathname4_val = comp4;
10045 	pathname->pathname4_len = ncomp;
10046 
10047 	return (ncomp);
10048 }
10049 
10050 #define	xdr_fs_locations4 xdr_fattr4_fs_locations
10051 
10052 fs_locations4 *
10053 fetch_referral(vnode_t *vp, cred_t *cr)
10054 {
10055 	nvlist_t *nvl;
10056 	char *stype, *sdata;
10057 	fs_locations4 *result;
10058 	char buf[1024];
10059 	size_t bufsize;
10060 	XDR xdr;
10061 	int err;
10062 
10063 	/*
10064 	 * Check attrs to ensure it's a reparse point
10065 	 */
10066 	if (vn_is_reparse(vp, cr, NULL) == B_FALSE)
10067 		return (NULL);
10068 
10069 	/*
10070 	 * Look for an NFS record and get the type and data
10071 	 */
10072 	if (vn_find_nfs_record(vp, &nvl, &stype, &sdata) != 0)
10073 		return (NULL);
10074 
10075 	/*
10076 	 * With the type and data, upcall to get the referral
10077 	 */
10078 	bufsize = sizeof (buf);
10079 	bzero(buf, sizeof (buf));
10080 	err = reparse_kderef((const char *)stype, (const char *)sdata,
10081 	    buf, &bufsize);
10082 	reparse_free(nvl);
10083 
10084 	DTRACE_PROBE4(nfs4serv__func__referral__upcall,
10085 	    char *, stype, char *, sdata, char *, buf, int, err);
10086 	if (err) {
10087 		cmn_err(CE_NOTE,
10088 		    "reparsed daemon not running: unable to get referral (%d)",
10089 		    err);
10090 		return (NULL);
10091 	}
10092 
10093 	/*
10094 	 * We get an XDR'ed record back from the kderef call
10095 	 */
10096 	xdrmem_create(&xdr, buf, bufsize, XDR_DECODE);
10097 	result = kmem_alloc(sizeof (fs_locations4), KM_SLEEP);
10098 	err = xdr_fs_locations4(&xdr, result);
10099 	XDR_DESTROY(&xdr);
10100 	if (err != TRUE) {
10101 		DTRACE_PROBE1(nfs4serv__func__referral__upcall__xdrfail,
10102 		    int, err);
10103 		return (NULL);
10104 	}
10105 
10106 	/*
10107 	 * Look at path to recover fs_root, ignoring the leading '/'
10108 	 */
10109 	(void) make_pathname4(vp->v_path, &result->fs_root);
10110 
10111 	return (result);
10112 }
10113 
10114 char *
10115 build_symlink(vnode_t *vp, cred_t *cr, size_t *strsz)
10116 {
10117 	fs_locations4 *fsl;
10118 	fs_location4 *fs;
10119 	char *server, *path, *symbuf;
10120 	static char *prefix = "/net/";
10121 	int i, size, npaths;
10122 	uint_t len;
10123 
10124 	/* Get the referral */
10125 	if ((fsl = fetch_referral(vp, cr)) == NULL)
10126 		return (NULL);
10127 
10128 	/* Deal with only the first location and first server */
10129 	fs = &fsl->locations_val[0];
10130 	server = utf8_to_str(&fs->server_val[0], &len, NULL);
10131 	if (server == NULL) {
10132 		rfs4_free_fs_locations4(fsl);
10133 		kmem_free(fsl, sizeof (fs_locations4));
10134 		return (NULL);
10135 	}
10136 
10137 	/* Figure out size for "/net/" + host + /path/path/path + NULL */
10138 	size = strlen(prefix) + len;
10139 	for (i = 0; i < fs->rootpath.pathname4_len; i++)
10140 		size += fs->rootpath.pathname4_val[i].utf8string_len + 1;
10141 
10142 	/* Allocate the symlink buffer and fill it */
10143 	symbuf = kmem_zalloc(size, KM_SLEEP);
10144 	(void) strcat(symbuf, prefix);
10145 	(void) strcat(symbuf, server);
10146 	kmem_free(server, len);
10147 
10148 	npaths = 0;
10149 	for (i = 0; i < fs->rootpath.pathname4_len; i++) {
10150 		path = utf8_to_str(&fs->rootpath.pathname4_val[i], &len, NULL);
10151 		if (path == NULL)
10152 			continue;
10153 		(void) strcat(symbuf, "/");
10154 		(void) strcat(symbuf, path);
10155 		npaths++;
10156 		kmem_free(path, len);
10157 	}
10158 
10159 	rfs4_free_fs_locations4(fsl);
10160 	kmem_free(fsl, sizeof (fs_locations4));
10161 
10162 	if (strsz != NULL)
10163 		*strsz = size;
10164 	return (symbuf);
10165 }
10166 
10167 /*
10168  * Check to see if we have a downrev Solaris client, so that we
10169  * can send it a symlink instead of a referral.
10170  */
10171 int
10172 client_is_downrev(struct svc_req *req)
10173 {
10174 	struct sockaddr *ca;
10175 	rfs4_clntip_t *ci;
10176 	bool_t create = FALSE;
10177 	int is_downrev;
10178 
10179 	ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
10180 	ASSERT(ca);
10181 	ci = rfs4_find_clntip(ca, &create);
10182 	if (ci == NULL)
10183 		return (0);
10184 	is_downrev = ci->ri_no_referrals;
10185 	rfs4_dbe_rele(ci->ri_dbe);
10186 	return (is_downrev);
10187 }
10188 
10189 /*
10190  * Do the main work of handling HA-NFSv4 Resource Group failover on
10191  * Sun Cluster.
10192  * We need to detect whether any RG admin paths have been added or removed,
10193  * and adjust resources accordingly.
10194  * Currently we're using a very inefficient algorithm, ~ 2 * O(n**2). In
10195  * order to scale, the list and array of paths need to be held in more
10196  * suitable data structures.
10197  */
10198 static void
10199 hanfsv4_failover(nfs4_srv_t *nsrv4)
10200 {
10201 	int i, start_grace, numadded_paths = 0;
10202 	char **added_paths = NULL;
10203 	rfs4_dss_path_t *dss_path;
10204 
10205 	/*
10206 	 * Note: currently, dss_pathlist cannot be NULL, since
10207 	 * it will always include an entry for NFS4_DSS_VAR_DIR. If we
10208 	 * make the latter dynamically specified too, the following will
10209 	 * need to be adjusted.
10210 	 */
10211 
10212 	/*
10213 	 * First, look for removed paths: RGs that have been failed-over
10214 	 * away from this node.
10215 	 * Walk the "currently-serving" dss_pathlist and, for each
10216 	 * path, check if it is on the "passed-in" rfs4_dss_newpaths array
10217 	 * from nfsd. If not, that RG path has been removed.
10218 	 *
10219 	 * Note that nfsd has sorted rfs4_dss_newpaths for us, and removed
10220 	 * any duplicates.
10221 	 */
10222 	dss_path = nsrv4->dss_pathlist;
10223 	do {
10224 		int found = 0;
10225 		char *path = dss_path->path;
10226 
10227 		/* used only for non-HA so may not be removed */
10228 		if (strcmp(path, NFS4_DSS_VAR_DIR) == 0) {
10229 			dss_path = dss_path->next;
10230 			continue;
10231 		}
10232 
10233 		for (i = 0; i < rfs4_dss_numnewpaths; i++) {
10234 			int cmpret;
10235 			char *newpath = rfs4_dss_newpaths[i];
10236 
10237 			/*
10238 			 * Since nfsd has sorted rfs4_dss_newpaths for us,
10239 			 * once the return from strcmp is negative we know
10240 			 * we've passed the point where "path" should be,
10241 			 * and can stop searching: "path" has been removed.
10242 			 */
10243 			cmpret = strcmp(path, newpath);
10244 			if (cmpret < 0)
10245 				break;
10246 			if (cmpret == 0) {
10247 				found = 1;
10248 				break;
10249 			}
10250 		}
10251 
10252 		if (found == 0) {
10253 			unsigned index = dss_path->index;
10254 			rfs4_servinst_t *sip = dss_path->sip;
10255 			rfs4_dss_path_t *path_next = dss_path->next;
10256 
10257 			/*
10258 			 * This path has been removed.
10259 			 * We must clear out the servinst reference to
10260 			 * it, since it's now owned by another
10261 			 * node: we should not attempt to touch it.
10262 			 */
10263 			ASSERT(dss_path == sip->dss_paths[index]);
10264 			sip->dss_paths[index] = NULL;
10265 
10266 			/* remove from "currently-serving" list, and destroy */
10267 			remque(dss_path);
10268 			/* allow for NUL */
10269 			kmem_free(dss_path->path, strlen(dss_path->path) + 1);
10270 			kmem_free(dss_path, sizeof (rfs4_dss_path_t));
10271 
10272 			dss_path = path_next;
10273 		} else {
10274 			/* path was found; not removed */
10275 			dss_path = dss_path->next;
10276 		}
10277 	} while (dss_path != nsrv4->dss_pathlist);
10278 
10279 	/*
10280 	 * Now, look for added paths: RGs that have been failed-over
10281 	 * to this node.
10282 	 * Walk the "passed-in" rfs4_dss_newpaths array from nfsd and,
10283 	 * for each path, check if it is on the "currently-serving"
10284 	 * dss_pathlist. If not, that RG path has been added.
10285 	 *
10286 	 * Note: we don't do duplicate detection here; nfsd does that for us.
10287 	 *
10288 	 * Note: numadded_paths <= rfs4_dss_numnewpaths, which gives us
10289 	 * an upper bound for the size needed for added_paths[numadded_paths].
10290 	 */
10291 
10292 	/* probably more space than we need, but guaranteed to be enough */
10293 	if (rfs4_dss_numnewpaths > 0) {
10294 		size_t sz = rfs4_dss_numnewpaths * sizeof (char *);
10295 		added_paths = kmem_zalloc(sz, KM_SLEEP);
10296 	}
10297 
10298 	/* walk the "passed-in" rfs4_dss_newpaths array from nfsd */
10299 	for (i = 0; i < rfs4_dss_numnewpaths; i++) {
10300 		int found = 0;
10301 		char *newpath = rfs4_dss_newpaths[i];
10302 
10303 		dss_path = nsrv4->dss_pathlist;
10304 		do {
10305 			char *path = dss_path->path;
10306 
10307 			/* used only for non-HA */
10308 			if (strcmp(path, NFS4_DSS_VAR_DIR) == 0) {
10309 				dss_path = dss_path->next;
10310 				continue;
10311 			}
10312 
10313 			if (strncmp(path, newpath, strlen(path)) == 0) {
10314 				found = 1;
10315 				break;
10316 			}
10317 
10318 			dss_path = dss_path->next;
10319 		} while (dss_path != nsrv4->dss_pathlist);
10320 
10321 		if (found == 0) {
10322 			added_paths[numadded_paths] = newpath;
10323 			numadded_paths++;
10324 		}
10325 	}
10326 
10327 	/* did we find any added paths? */
10328 	if (numadded_paths > 0) {
10329 
10330 		/* create a new server instance, and start its grace period */
10331 		start_grace = 1;
10332 		/* CSTYLED */
10333 		rfs4_servinst_create(nsrv4, start_grace, numadded_paths, added_paths);
10334 
10335 		/* read in the stable storage state from these paths */
10336 		rfs4_dss_readstate(nsrv4, numadded_paths, added_paths);
10337 
10338 		/*
10339 		 * Multiple failovers during a grace period will cause
10340 		 * clients of the same resource group to be partitioned
10341 		 * into different server instances, with different
10342 		 * grace periods.  Since clients of the same resource
10343 		 * group must be subject to the same grace period,
10344 		 * we need to reset all currently active grace periods.
10345 		 */
10346 		rfs4_grace_reset_all(nsrv4);
10347 	}
10348 
10349 	if (rfs4_dss_numnewpaths > 0)
10350 		kmem_free(added_paths, rfs4_dss_numnewpaths * sizeof (char *));
10351 }
10352