xref: /illumos-gate/usr/src/uts/common/fs/nfs/nfs4_srv.c (revision a86602b15b9c56eb076e48653db3cf239babfec8)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
24  */
25 
26 /*
27  *	Copyright (c) 1983,1984,1985,1986,1987,1988,1989  AT&T.
28  *	All Rights Reserved
29  */
30 
31 /*
32  * Copyright (c) 2012, 2016 by Delphix. All rights reserved.
33  * Copyright 2019 Nexenta Systems, Inc.
34  * Copyright 2019 Nexenta by DDN, Inc.
35  * Copyright 2021-2025 Racktop Systems, Inc.
36  */
37 
38 #include <sys/param.h>
39 #include <sys/types.h>
40 #include <sys/systm.h>
41 #include <sys/cred.h>
42 #include <sys/buf.h>
43 #include <sys/vfs.h>
44 #include <sys/vfs_opreg.h>
45 #include <sys/vnode.h>
46 #include <sys/uio.h>
47 #include <sys/errno.h>
48 #include <sys/sysmacros.h>
49 #include <sys/statvfs.h>
50 #include <sys/kmem.h>
51 #include <sys/dirent.h>
52 #include <sys/cmn_err.h>
53 #include <sys/debug.h>
54 #include <sys/systeminfo.h>
55 #include <sys/flock.h>
56 #include <sys/pathname.h>
57 #include <sys/nbmlock.h>
58 #include <sys/share.h>
59 #include <sys/atomic.h>
60 #include <sys/policy.h>
61 #include <sys/fem.h>
62 #include <sys/sdt.h>
63 #include <sys/ddi.h>
64 #include <sys/zone.h>
65 
66 #include <fs/fs_reparse.h>
67 
68 #include <rpc/types.h>
69 #include <rpc/auth.h>
70 #include <rpc/rpcsec_gss.h>
71 #include <rpc/svc.h>
72 
73 #include <nfs/nfs.h>
74 #include <nfs/nfssys.h>
75 #include <nfs/export.h>
76 #include <nfs/nfs_cmd.h>
77 #include <nfs/lm.h>
78 #include <nfs/nfs4.h>
79 #include <nfs/nfs4_drc.h>
80 
81 #include <sys/strsubr.h>
82 #include <sys/strsun.h>
83 
84 #include <inet/common.h>
85 #include <inet/ip.h>
86 #include <inet/ip6.h>
87 
88 #include <sys/tsol/label.h>
89 #include <sys/tsol/tndb.h>
90 
91 #define	RFS4_MAXLOCK_TRIES 4	/* Try to get the lock this many times */
92 static int rfs4_maxlock_tries = RFS4_MAXLOCK_TRIES;
93 #define	RFS4_LOCK_DELAY 10	/* Milliseconds */
94 static clock_t  rfs4_lock_delay = RFS4_LOCK_DELAY;
95 extern struct svc_ops rdma_svc_ops;
96 extern int nfs_loaned_buffers;
97 #define	RFS4_LOOKUP_EXP_STATE_MAX 8 /* Limit of loop to clean expired states */
98 static int rfs4_lookup_exp_state_max = RFS4_LOOKUP_EXP_STATE_MAX;
99 /* End of Tunables */
100 
101 static int rdma_setup_read_data4(READ4args *, READ4res *);
102 
103 /*
104  * Used to bump the stateid4.seqid value and show changes in the stateid
105  */
106 #define	next_stateid(sp) (++(sp)->bits.chgseq)
107 
108 /*
109  * RFS4_MINLEN_ENTRY4: XDR-encoded size of smallest possible dirent.
110  *	This is used to return NFS4ERR_TOOSMALL when clients specify
111  *	maxcount that isn't large enough to hold the smallest possible
112  *	XDR encoded dirent.
113  *
114  *	    sizeof cookie (8 bytes) +
115  *	    sizeof name_len (4 bytes) +
116  *	    sizeof smallest (padded) name (4 bytes) +
117  *	    sizeof bitmap4_len (12 bytes) +   NOTE: we always encode len=2 bm4
118  *	    sizeof attrlist4_len (4 bytes) +
119  *	    sizeof next boolean (4 bytes)
120  *
121  * RFS4_MINLEN_RDDIR4: XDR-encoded size of READDIR op reply containing
122  * the smallest possible entry4 (assumes no attrs requested).
123  *	sizeof nfsstat4 (4 bytes) +
124  *	sizeof verifier4 (8 bytes) +
125  *	sizeof entry4list bool (4 bytes) +
126  *	sizeof entry4 (36 bytes) +
127  *	sizeof eof bool (4 bytes)
128  *
129  * RFS4_MINLEN_RDDIR_BUF: minimum length of buffer server will provide to
130  *	VOP_READDIR.  Its value is the size of the maximum possible dirent
131  *	for solaris.  The DIRENT64_RECLEN macro returns	the size of dirent
132  *	required for a given name length.  MAXNAMELEN is the maximum
133  *	filename length allowed in Solaris.  The first two DIRENT64_RECLEN()
134  *	macros are to allow for . and .. entries -- just a minor tweak to try
135  *	and guarantee that buffer we give to VOP_READDIR will be large enough
136  *	to hold ., .., and the largest possible solaris dirent64.
137  */
138 #define	RFS4_MINLEN_ENTRY4 36
139 #define	RFS4_MINLEN_RDDIR4 (4 + NFS4_VERIFIER_SIZE + 4 + RFS4_MINLEN_ENTRY4 + 4)
140 #define	RFS4_MINLEN_RDDIR_BUF \
141 	(DIRENT64_RECLEN(1) + DIRENT64_RECLEN(2) + DIRENT64_RECLEN(MAXNAMELEN))
142 
143 /*
144  * It would be better to pad to 4 bytes since that's what XDR would do,
145  * but the dirents UFS gives us are already padded to 8, so just take
146  * what we're given.  Dircount is only a hint anyway.  Currently the
147  * solaris kernel is ASCII only, so there's no point in calling the
148  * UTF8 functions.
149  *
150  * dirent64: named padded to provide 8 byte struct alignment
151  *	d_ino(8) + d_off(8) + d_reclen(2) + d_name(namelen + null(1) + pad)
152  *
153  * cookie: uint64_t   +  utf8namelen: uint_t  +   utf8name padded to 8 bytes
154  *
155  */
156 #define	DIRENT64_TO_DIRCOUNT(dp) \
157 	(3 * BYTES_PER_XDR_UNIT + DIRENT64_NAMELEN((dp)->d_reclen))
158 
159 
160 static sysid_t		lockt_sysid;	/* dummy sysid for all LOCKT calls */
161 
162 u_longlong_t	nfs4_srv_caller_id;
163 uint_t		nfs4_srv_vkey = 0;
164 
165 void	rfs4_init_compound_state(struct compound_state *);
166 
167 static void	nullfree(caddr_t);
168 static void	rfs4_op_inval(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
169 		    struct compound_state *);
170 static void	rfs4_op_access(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
171 		    struct compound_state *);
172 static void	rfs4_op_close(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
173 		    struct compound_state *);
174 static void	rfs4_op_commit(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
175 		    struct compound_state *);
176 static void	rfs4_op_create(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
177 		    struct compound_state *);
178 static void	rfs4_op_delegreturn(nfs_argop4 *, nfs_resop4 *,
179 		    struct svc_req *, struct compound_state *);
180 static void	rfs4_op_delegpurge(nfs_argop4 *, nfs_resop4 *,
181 		    struct svc_req *, struct compound_state *);
182 static void	rfs4_op_getattr(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
183 		    struct compound_state *);
184 static void	rfs4_op_getattr_free(nfs_resop4 *);
185 static void	rfs4_op_getfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
186 		    struct compound_state *);
187 static void	rfs4_op_getfh_free(nfs_resop4 *);
188 static void	rfs4_op_illegal(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
189 		    struct compound_state *);
190 static void	rfs4_op_notsup(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
191 		    struct compound_state *);
192 static void	rfs4_op_link(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
193 		    struct compound_state *);
194 static void	rfs4_op_lock(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
195 		    struct compound_state *);
196 static void	lock_denied_free(nfs_resop4 *);
197 static void	rfs4_op_locku(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
198 		    struct compound_state *);
199 static void	rfs4_op_lockt(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
200 		    struct compound_state *);
201 static void	rfs4_op_lookup(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
202 		    struct compound_state *);
203 static void	rfs4_op_lookupp(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
204 		    struct compound_state *);
205 static void	rfs4_op_openattr(nfs_argop4 *argop, nfs_resop4 *resop,
206 		    struct svc_req *req, struct compound_state *cs);
207 static void	rfs4_op_nverify(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
208 		    struct compound_state *);
209 static void	rfs4_op_open(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
210 		    struct compound_state *);
211 static void	rfs4_op_open_confirm(nfs_argop4 *, nfs_resop4 *,
212 		    struct svc_req *, struct compound_state *);
213 static void	rfs4_op_open_downgrade(nfs_argop4 *, nfs_resop4 *,
214 		    struct svc_req *, struct compound_state *);
215 static void	rfs4_op_putfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
216 		    struct compound_state *);
217 static void	rfs4_op_putpubfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
218 		    struct compound_state *);
219 static void	rfs4_op_putrootfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
220 		    struct compound_state *);
221 static void	rfs4_op_read(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
222 		    struct compound_state *);
223 static void	rfs4_op_read_free(nfs_resop4 *);
224 static void	rfs4_op_readdir_free(nfs_resop4 *resop);
225 static void	rfs4_op_readlink(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
226 		    struct compound_state *);
227 static void	rfs4_op_readlink_free(nfs_resop4 *);
228 static void	rfs4_op_release_lockowner(nfs_argop4 *, nfs_resop4 *,
229 		    struct svc_req *, struct compound_state *);
230 static void	rfs4_op_remove(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
231 		    struct compound_state *);
232 static void	rfs4_op_rename(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
233 		    struct compound_state *);
234 static void	rfs4_op_renew(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
235 		    struct compound_state *);
236 static void	rfs4_op_restorefh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
237 		    struct compound_state *);
238 static void	rfs4_op_savefh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
239 		    struct compound_state *);
240 static void	rfs4_op_setattr(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
241 		    struct compound_state *);
242 static void	rfs4_op_verify(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
243 		    struct compound_state *);
244 static void	rfs4_op_write(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
245 		    struct compound_state *);
246 static void	rfs4_op_setclientid(nfs_argop4 *, nfs_resop4 *,
247 		    struct svc_req *, struct compound_state *);
248 static void	rfs4_op_setclientid_confirm(nfs_argop4 *, nfs_resop4 *,
249 		    struct svc_req *req, struct compound_state *);
250 static void	rfs4_op_secinfo(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
251 		    struct compound_state *);
252 static void	rfs4_op_secinfo_free(nfs_resop4 *);
253 
254 void rfs4x_op_exchange_id(nfs_argop4 *argop, nfs_resop4 *resop,
255     struct svc_req *req, struct compound_state *cs);
256 void rfs4x_exchange_id_free(nfs_resop4 *);
257 
258 void rfs4x_op_create_session(nfs_argop4 *argop, nfs_resop4 *resop,
259     struct svc_req *req, struct compound_state *cs);
260 
261 void rfs4x_op_destroy_session(nfs_argop4 *argop, nfs_resop4 *resop,
262     struct svc_req *req, compound_state_t *cs);
263 
264 void rfs4x_op_sequence(nfs_argop4 *argop, nfs_resop4 *resop,
265     struct svc_req *req, struct compound_state *cs);
266 
267 void rfs4x_op_reclaim_complete(nfs_argop4 *argop, nfs_resop4 *resop,
268     struct svc_req *req, compound_state_t *cs);
269 
270 void rfs4x_op_destroy_clientid(nfs_argop4 *argop, nfs_resop4 *resop,
271     struct svc_req *req, compound_state_t *cs);
272 
273 void rfs4x_op_bind_conn_to_session(nfs_argop4 *argop, nfs_resop4 *resop,
274     struct svc_req *req, compound_state_t *cs);
275 
276 void rfs4x_op_secinfo_noname(nfs_argop4 *argop, nfs_resop4 *resop,
277     struct svc_req *req, compound_state_t *cs);
278 void rfs4x_op_free_stateid(nfs_argop4 *argop, nfs_resop4 *resop,
279     struct svc_req *req, compound_state_t *cs);
280 
281 static nfsstat4 check_open_access(uint32_t, struct compound_state *,
282 		    struct svc_req *);
283 nfsstat4	rfs4_client_sysid(rfs4_client_t *, sysid_t *);
284 void		rfs4_ss_clid(nfs4_srv_t *, rfs4_client_t *);
285 
286 /*
287  * translation table for attrs
288  */
289 struct nfs4_ntov_table {
290 	union nfs4_attr_u *na;
291 	uint8_t amap[NFS4_MAXNUM_ATTRS];
292 	int attrcnt;
293 	bool_t vfsstat;
294 };
295 
296 static void	nfs4_ntov_table_init(struct nfs4_ntov_table *ntovp);
297 static void	nfs4_ntov_table_free(struct nfs4_ntov_table *ntovp,
298 		    struct nfs4_svgetit_arg *sargp);
299 
300 static nfsstat4	do_rfs4_set_attrs(bitmap4 *resp, fattr4 *fattrp,
301 		    struct compound_state *cs, struct nfs4_svgetit_arg *sargp,
302 		    struct nfs4_ntov_table *ntovp, nfs4_attr_cmd_t cmd);
303 
304 static void	hanfsv4_failover(nfs4_srv_t *);
305 
306 fem_t		*deleg_rdops;
307 fem_t		*deleg_wrops;
308 
309 /*
310  * NFS4 op dispatch table
311  */
312 
313 struct rfsv4disp {
314 	void	(*dis_proc)();		/* proc to call */
315 	void	(*dis_resfree)();	/* frees space allocated by proc */
316 	int	dis_flags;		/* OP_IDEMPOTENT, etc... */
317 };
318 
319 #define	OP_IDEMPOTENT		(1 << 0)
320 #define	OP_CLEAR_STATEID	(1 << 1)
321 
322 static struct rfsv4disp rfsv4disptab[] = {
323 	/*
324 	 * NFS VERSION 4
325 	 */
326 
327 	/* RFS_NULL = 0 */
328 	{rfs4_op_illegal, nullfree, 0},
329 
330 	/* UNUSED = 1 */
331 	{rfs4_op_illegal, nullfree, 0},
332 
333 	/* UNUSED = 2 */
334 	{rfs4_op_illegal, nullfree, 0},
335 
336 	/* OP_ACCESS = 3 */
337 	{rfs4_op_access, nullfree, OP_IDEMPOTENT},
338 
339 	/* OP_CLOSE = 4 */
340 	{rfs4_op_close, nullfree, OP_CLEAR_STATEID},
341 
342 	/* OP_COMMIT = 5 */
343 	{rfs4_op_commit, nullfree, OP_IDEMPOTENT},
344 
345 	/* OP_CREATE = 6 */
346 	{rfs4_op_create, nullfree, OP_CLEAR_STATEID},
347 
348 	/* OP_DELEGPURGE = 7 */
349 	{rfs4_op_delegpurge, nullfree, 0},
350 
351 	/* OP_DELEGRETURN = 8 */
352 	{rfs4_op_delegreturn, nullfree, 0},
353 
354 	/* OP_GETATTR = 9 */
355 	{rfs4_op_getattr, rfs4_op_getattr_free, OP_IDEMPOTENT},
356 
357 	/* OP_GETFH = 10 */
358 	{rfs4_op_getfh, rfs4_op_getfh_free, OP_IDEMPOTENT},
359 
360 	/* OP_LINK = 11 */
361 	{rfs4_op_link, nullfree, 0},
362 
363 	/* OP_LOCK = 12 */
364 	{rfs4_op_lock, lock_denied_free, 0},
365 
366 	/* OP_LOCKT = 13 */
367 	{rfs4_op_lockt, lock_denied_free, 0},
368 
369 	/* OP_LOCKU = 14 */
370 	{rfs4_op_locku, nullfree, 0},
371 
372 	/* OP_LOOKUP = 15 */
373 	{rfs4_op_lookup, nullfree, (OP_IDEMPOTENT | OP_CLEAR_STATEID)},
374 
375 	/* OP_LOOKUPP = 16 */
376 	{rfs4_op_lookupp, nullfree, (OP_IDEMPOTENT | OP_CLEAR_STATEID)},
377 
378 	/* OP_NVERIFY = 17 */
379 	{rfs4_op_nverify, nullfree, OP_IDEMPOTENT},
380 
381 	/* OP_OPEN = 18 */
382 	{rfs4_op_open, rfs4_free_reply, 0},
383 
384 	/* OP_OPENATTR = 19 */
385 	{rfs4_op_openattr, nullfree, 0},
386 
387 	/* OP_OPEN_CONFIRM = 20 */
388 	{rfs4_op_open_confirm, nullfree, 0},
389 
390 	/* OP_OPEN_DOWNGRADE = 21 */
391 	{rfs4_op_open_downgrade, nullfree, 0},
392 
393 	/* OP_OPEN_PUTFH = 22 */
394 	{rfs4_op_putfh, nullfree, (OP_IDEMPOTENT | OP_CLEAR_STATEID)},
395 
396 	/* OP_PUTPUBFH = 23 */
397 	{rfs4_op_putpubfh, nullfree, OP_IDEMPOTENT},
398 
399 	/* OP_PUTROOTFH = 24 */
400 	{rfs4_op_putrootfh, nullfree, (OP_IDEMPOTENT | OP_CLEAR_STATEID)},
401 
402 	/* OP_READ = 25 */
403 	{rfs4_op_read, rfs4_op_read_free, OP_IDEMPOTENT},
404 
405 	/* OP_READDIR = 26 */
406 	{rfs4_op_readdir, rfs4_op_readdir_free, OP_IDEMPOTENT},
407 
408 	/* OP_READLINK = 27 */
409 	{rfs4_op_readlink, rfs4_op_readlink_free, OP_IDEMPOTENT},
410 
411 	/* OP_REMOVE = 28 */
412 	{rfs4_op_remove, nullfree, 0},
413 
414 	/* OP_RENAME = 29 */
415 	{rfs4_op_rename, nullfree, 0},
416 
417 	/* OP_RENEW = 30 */
418 	{rfs4_op_renew, nullfree, 0},
419 
420 	/* OP_RESTOREFH = 31 */
421 	{rfs4_op_restorefh, nullfree, OP_IDEMPOTENT},
422 
423 	/* OP_SAVEFH = 32 */
424 	{rfs4_op_savefh, nullfree, OP_IDEMPOTENT},
425 
426 	/* OP_SECINFO = 33 */
427 	{rfs4_op_secinfo, rfs4_op_secinfo_free, 0},
428 
429 	/* OP_SETATTR = 34 */
430 	{rfs4_op_setattr, nullfree, 0},
431 
432 	/* OP_SETCLIENTID = 35 */
433 	{rfs4_op_setclientid, nullfree, 0},
434 
435 	/* OP_SETCLIENTID_CONFIRM = 36 */
436 	{rfs4_op_setclientid_confirm, nullfree, 0},
437 
438 	/* OP_VERIFY = 37 */
439 	{rfs4_op_verify, nullfree, OP_IDEMPOTENT},
440 
441 	/* OP_WRITE = 38 */
442 	{rfs4_op_write, nullfree, 0},
443 
444 	/* OP_RELEASE_LOCKOWNER = 39 */
445 	{rfs4_op_release_lockowner, nullfree, 0},
446 
447 	/*
448 	 * NFSv4.1 operations
449 	 */
450 
451 	/* OP_BACKCHANNEL_CTL = 40 */
452 	{rfs4_op_notsup,  nullfree,  0},
453 
454 	/*  OP_BIND_CONN_TO_SESSION = 41 */
455 	{rfs4x_op_bind_conn_to_session,  nullfree,  0},
456 
457 	/* OP_EXCHANGE_ID  = 42 */
458 	{rfs4x_op_exchange_id,  rfs4x_exchange_id_free,  0},
459 
460 	/* OP_CREATE_SESSION = 43 */
461 	{rfs4x_op_create_session,  nullfree,  0},
462 
463 	/* OP_DESTROY_SESSION = 44 */
464 	{rfs4x_op_destroy_session,  nullfree,  0},
465 
466 	/* OP_FREE_STATEID = 45 */
467 	{rfs4x_op_free_stateid,  nullfree,  0},
468 
469 	/* OP_GET_DIR_DELEGATION = 46 */
470 	{rfs4_op_notsup,  nullfree,  0},
471 
472 	/* OP_GETDEVICEINFO = 47 */
473 	{rfs4_op_notsup,  nullfree,  0},
474 
475 	/* OP_GETDEVICELIST = 48 */
476 	{rfs4_op_notsup,  nullfree,  0},
477 
478 	/* OP_LAYOUTCOMMIT = 49 */
479 	{rfs4_op_notsup,  nullfree,  0},
480 
481 	/* OP_LAYOUTGET = 50 */
482 	{rfs4_op_notsup,  nullfree,  0},
483 
484 	/* OP_LAYOUTRETURN = 51 */
485 	{rfs4_op_notsup,  nullfree,  0},
486 
487 	/* OP_SECINFO_NO_NAME = 52 */
488 	{rfs4x_op_secinfo_noname, rfs4_op_secinfo_free, 0},
489 
490 	/* OP_SEQUENCE = 53 */
491 	{rfs4x_op_sequence,  nullfree,  0},
492 
493 	/* OP_SET_SSV = 54 */
494 	{rfs4_op_notsup,  nullfree,  0},
495 
496 	/* OP_TEST_STATEID = 55 */
497 	{rfs4_op_notsup,  nullfree,  0},
498 
499 	/* OP_WANT_DELEGATION = 56 */
500 	{rfs4_op_notsup,  nullfree,  0},
501 
502 	/* OP_DESTROY_CLIENTID = 57 */
503 	{rfs4x_op_destroy_clientid,  nullfree,  0},
504 
505 	/* OP_RECLAIM_COMPLETE = 58 */
506 	{rfs4x_op_reclaim_complete,  nullfree,  0},
507 
508 	/*
509 	 * NFSv4.2 operations
510 	 */
511 	/* OP_ALLOCATE = 59 */
512 	{rfs4_op_notsup,  nullfree,  0},
513 
514 	/* OP_COPY = 60 */
515 	{rfs4_op_notsup,  nullfree,  0},
516 
517 	/* OP_COPY_NOTIFY = 61 */
518 	{rfs4_op_notsup,  nullfree,  0},
519 
520 	/* OP_DEALLOCATE = 62 */
521 	{rfs4_op_notsup,  nullfree,  0},
522 
523 	/* OP_IO_ADVISE = 63 */
524 	{rfs4_op_notsup,  nullfree,  0},
525 
526 	/* OP_LAYOUTERROR = 64 */
527 	{rfs4_op_notsup,  nullfree,  0},
528 
529 	/* OP_LAYOUTSTATS = 65 */
530 	{rfs4_op_notsup,  nullfree,  0},
531 
532 	/* OP_OFFLOAD_CANCEL = 66 */
533 	{rfs4_op_notsup,  nullfree,  0},
534 
535 	/* OP_OFFLOAD_STATUS = 67 */
536 	{rfs4_op_notsup,  nullfree,  0},
537 
538 	/* OP_READ_PLUS = 68 */
539 	{rfs4_op_notsup,  nullfree,  0},
540 
541 	/* OP_SEEK = 69 */
542 	{rfs4_op_notsup,  nullfree,  0},
543 
544 	/* OP_WRITE_SAME = 70 */
545 	{rfs4_op_notsup,  nullfree,  0},
546 
547 	/* OP_CLONE = 71 */
548 	{rfs4_op_notsup,  nullfree,  0},
549 
550 };
551 
552 static uint_t rfsv4disp_cnt = sizeof (rfsv4disptab) / sizeof (rfsv4disptab[0]);
553 
554 #define	OP_ILLEGAL_IDX (rfsv4disp_cnt)
555 
556 #ifdef DEBUG
557 
558 int		rfs4_fillone_debug = 0;
559 int		rfs4_no_stub_access = 1;
560 int		rfs4_rddir_debug = 0;
561 
562 static char    *rfs4_op_string[] = {
563 	"rfs4_op_null",
564 	"rfs4_op_1 unused",
565 	"rfs4_op_2 unused",
566 	"rfs4_op_access",
567 	"rfs4_op_close",
568 	"rfs4_op_commit",
569 	"rfs4_op_create",
570 	"rfs4_op_delegpurge",
571 	"rfs4_op_delegreturn",
572 	"rfs4_op_getattr",
573 	"rfs4_op_getfh",
574 	"rfs4_op_link",
575 	"rfs4_op_lock",
576 	"rfs4_op_lockt",
577 	"rfs4_op_locku",
578 	"rfs4_op_lookup",
579 	"rfs4_op_lookupp",
580 	"rfs4_op_nverify",
581 	"rfs4_op_open",
582 	"rfs4_op_openattr",
583 	"rfs4_op_open_confirm",
584 	"rfs4_op_open_downgrade",
585 	"rfs4_op_putfh",
586 	"rfs4_op_putpubfh",
587 	"rfs4_op_putrootfh",
588 	"rfs4_op_read",
589 	"rfs4_op_readdir",
590 	"rfs4_op_readlink",
591 	"rfs4_op_remove",
592 	"rfs4_op_rename",
593 	"rfs4_op_renew",
594 	"rfs4_op_restorefh",
595 	"rfs4_op_savefh",
596 	"rfs4_op_secinfo",
597 	"rfs4_op_setattr",
598 	"rfs4_op_setclientid",
599 	"rfs4_op_setclient_confirm",
600 	"rfs4_op_verify",
601 	"rfs4_op_write",
602 	"rfs4_op_release_lockowner",
603 	/* NFSv4.1 */
604 	"backchannel_ctl",
605 	"bind_conn_to_session",
606 	"exchange_id",
607 	"create_session",
608 	"destroy_session",
609 	"free_stateid",
610 	"get_dir_delegation",
611 	"getdeviceinfo",
612 	"getdevicelist",
613 	"layoutcommit",
614 	"layoutget",
615 	"layoutreturn",
616 	"secinfo_no_name",
617 	"sequence",
618 	"set_ssv",
619 	"test_stateid",
620 	"want_delegation",
621 	"destroy_clientid",
622 	"reclaim_complete",
623 	/* NFSv4.2 */
624 	"allocate",
625 	"copy",
626 	"copy_notify",
627 	"deallocate",
628 	"io_advise",
629 	"layouterror",
630 	"layoutstats",
631 	"offload_cancel",
632 	"offload_status",
633 	"read_plus",
634 	"seek",
635 	"write_same",
636 	"clone",
637 
638 	"rfs4_op_illegal"
639 };
640 
641 #endif
642 
643 void	rfs4_ss_chkclid(nfs4_srv_t *, rfs4_client_t *);
644 
645 extern size_t   strlcpy(char *dst, const char *src, size_t dstsize);
646 
647 extern void	rfs4_free_fs_locations4(fs_locations4 *);
648 
649 #ifdef	nextdp
650 #undef nextdp
651 #endif
652 #define	nextdp(dp)	((struct dirent64 *)((char *)(dp) + (dp)->d_reclen))
653 
654 static const fs_operation_def_t nfs4_rd_deleg_tmpl[] = {
655 	VOPNAME_OPEN,		{ .femop_open = deleg_rd_open },
656 	VOPNAME_WRITE,		{ .femop_write = deleg_rd_write },
657 	VOPNAME_SETATTR,	{ .femop_setattr = deleg_rd_setattr },
658 	VOPNAME_RWLOCK,		{ .femop_rwlock = deleg_rd_rwlock },
659 	VOPNAME_SPACE,		{ .femop_space = deleg_rd_space },
660 	VOPNAME_SETSECATTR,	{ .femop_setsecattr = deleg_rd_setsecattr },
661 	VOPNAME_VNEVENT,	{ .femop_vnevent = deleg_rd_vnevent },
662 	NULL,			NULL
663 };
664 static const fs_operation_def_t nfs4_wr_deleg_tmpl[] = {
665 	VOPNAME_OPEN,		{ .femop_open = deleg_wr_open },
666 	VOPNAME_READ,		{ .femop_read = deleg_wr_read },
667 	VOPNAME_WRITE,		{ .femop_write = deleg_wr_write },
668 	VOPNAME_SETATTR,	{ .femop_setattr = deleg_wr_setattr },
669 	VOPNAME_RWLOCK,		{ .femop_rwlock = deleg_wr_rwlock },
670 	VOPNAME_SPACE,		{ .femop_space = deleg_wr_space },
671 	VOPNAME_SETSECATTR,	{ .femop_setsecattr = deleg_wr_setsecattr },
672 	VOPNAME_VNEVENT,	{ .femop_vnevent = deleg_wr_vnevent },
673 	NULL,			NULL
674 };
675 
676 
677 nfs4_srv_t *
nfs4_get_srv(void)678 nfs4_get_srv(void)
679 {
680 	nfs_globals_t *ng = nfs_srv_getzg();
681 	nfs4_srv_t *srv = ng->nfs4_srv;
682 	ASSERT(srv != NULL);
683 	return (srv);
684 }
685 
686 void
rfs4_srv_zone_init(nfs_globals_t * ng)687 rfs4_srv_zone_init(nfs_globals_t *ng)
688 {
689 	nfs4_srv_t *nsrv4;
690 	timespec32_t verf;
691 
692 	nsrv4 = kmem_zalloc(sizeof (*nsrv4), KM_SLEEP);
693 
694 	/*
695 	 * The following algorithm attempts to find a unique verifier
696 	 * to be used as the write verifier returned from the server
697 	 * to the client.  It is important that this verifier change
698 	 * whenever the server reboots.  Of secondary importance, it
699 	 * is important for the verifier to be unique between two
700 	 * different servers.
701 	 *
702 	 * Thus, an attempt is made to use the system hostid and the
703 	 * current time in seconds when the nfssrv kernel module is
704 	 * loaded.  It is assumed that an NFS server will not be able
705 	 * to boot and then to reboot in less than a second.  If the
706 	 * hostid has not been set, then the current high resolution
707 	 * time is used.  This will ensure different verifiers each
708 	 * time the server reboots and minimize the chances that two
709 	 * different servers will have the same verifier.
710 	 * XXX - this is broken on LP64 kernels.
711 	 */
712 	verf.tv_sec = (time_t)zone_get_hostid(NULL);
713 	if (verf.tv_sec != 0) {
714 		verf.tv_nsec = gethrestime_sec();
715 	} else {
716 		timespec_t tverf;
717 
718 		gethrestime(&tverf);
719 		verf.tv_sec = (time_t)tverf.tv_sec;
720 		verf.tv_nsec = tverf.tv_nsec;
721 	}
722 	nsrv4->write4verf = *(uint64_t *)&verf;
723 
724 	/* Used to manage create/destroy of server state */
725 	nsrv4->nfs4_server_state = NULL;
726 	nsrv4->nfs4_cur_servinst = NULL;
727 	nsrv4->nfs4_deleg_policy = SRV_NEVER_DELEGATE;
728 	mutex_init(&nsrv4->deleg_lock, NULL, MUTEX_DEFAULT, NULL);
729 	mutex_init(&nsrv4->state_lock, NULL, MUTEX_DEFAULT, NULL);
730 	mutex_init(&nsrv4->servinst_lock, NULL, MUTEX_DEFAULT, NULL);
731 	rw_init(&nsrv4->deleg_policy_lock, NULL, RW_DEFAULT, NULL);
732 
733 	ng->nfs4_srv = nsrv4;
734 }
735 
736 void
rfs4_srv_zone_fini(nfs_globals_t * ng)737 rfs4_srv_zone_fini(nfs_globals_t *ng)
738 {
739 	nfs4_srv_t *nsrv4 = ng->nfs4_srv;
740 
741 	ng->nfs4_srv = NULL;
742 
743 	mutex_destroy(&nsrv4->deleg_lock);
744 	mutex_destroy(&nsrv4->state_lock);
745 	mutex_destroy(&nsrv4->servinst_lock);
746 	rw_destroy(&nsrv4->deleg_policy_lock);
747 
748 	kmem_free(nsrv4, sizeof (*nsrv4));
749 }
750 
751 void
rfs4_srvrinit(void)752 rfs4_srvrinit(void)
753 {
754 	extern void rfs4_attr_init();
755 
756 	rfs4_attr_init();
757 
758 	if (fem_create("deleg_rdops", nfs4_rd_deleg_tmpl, &deleg_rdops) != 0) {
759 		rfs4_disable_delegation();
760 	} else if (fem_create("deleg_wrops", nfs4_wr_deleg_tmpl,
761 	    &deleg_wrops) != 0) {
762 		rfs4_disable_delegation();
763 		fem_free(deleg_rdops);
764 	}
765 
766 	nfs4_srv_caller_id = fs_new_caller_id();
767 	lockt_sysid = lm_alloc_sysidt();
768 	vsd_create(&nfs4_srv_vkey, NULL);
769 	rfs4_state_g_init();
770 }
771 
772 void
rfs4_srvrfini(void)773 rfs4_srvrfini(void)
774 {
775 	if (lockt_sysid != LM_NOSYSID) {
776 		lm_free_sysidt(lockt_sysid);
777 		lockt_sysid = LM_NOSYSID;
778 	}
779 
780 	rfs4_state_g_fini();
781 
782 	fem_free(deleg_rdops);
783 	fem_free(deleg_wrops);
784 }
785 
786 void
rfs4_do_server_start(int server_upordown,int srv_delegation,nfs4_minor_t nfs4_minor_max,int cluster_booted)787 rfs4_do_server_start(int server_upordown, int srv_delegation,
788     nfs4_minor_t nfs4_minor_max, int cluster_booted)
789 {
790 	nfs4_srv_t *nsrv4 = nfs4_get_srv();
791 
792 	/* Is this a warm start? */
793 	if (server_upordown == NFS_SERVER_QUIESCED) {
794 		cmn_err(CE_NOTE, "nfs4_srv: "
795 		    "server was previously quiesced; "
796 		    "existing NFSv4 state will be re-used");
797 
798 		/*
799 		 * HA-NFSv4: this is also the signal
800 		 * that a Resource Group failover has
801 		 * occurred.
802 		 */
803 		if (cluster_booted)
804 			hanfsv4_failover(nsrv4);
805 	} else {
806 		/* Cold start */
807 		nsrv4->rfs4_start_time = 0;
808 		rfs4_state_zone_init(nsrv4);
809 		nsrv4->nfs4_drc = rfs4_init_drc(nfs4_drc_max,
810 		    nfs4_drc_hash);
811 
812 		/*
813 		 * The nfsd service was started with the -s option
814 		 * we need to pull in any state from the paths indicated.
815 		 */
816 		if (curzone == global_zone && rfs4_dss_numnewpaths > 0) {
817 			/* read in the stable storage state from these paths */
818 			rfs4_dss_readstate(nsrv4, rfs4_dss_numnewpaths,
819 			    rfs4_dss_newpaths);
820 		}
821 	}
822 
823 	nsrv4->nfs4_minor_max = nfs4_minor_max;
824 
825 	/* Check if delegation is to be enabled */
826 	if (srv_delegation != FALSE)
827 		rfs4_set_deleg_policy(nsrv4, SRV_NORMAL_DELEGATE);
828 }
829 
830 void
rfs4_init_compound_state(struct compound_state * cs)831 rfs4_init_compound_state(struct compound_state *cs)
832 {
833 	bzero(cs, sizeof (*cs));
834 	cs->cont = TRUE;
835 	cs->access = CS_ACCESS_DENIED;
836 	cs->deleg = FALSE;
837 	cs->mandlock = FALSE;
838 	cs->fh.nfs_fh4_val = cs->fhbuf;
839 }
840 
841 /* Do cleanup of the compound_state */
842 void
rfs4_fini_compound_state(struct compound_state * cs)843 rfs4_fini_compound_state(struct compound_state *cs)
844 {
845 	if (cs->vp) {
846 		VN_RELE(cs->vp);
847 	}
848 	if (cs->saved_vp) {
849 		VN_RELE(cs->saved_vp);
850 	}
851 	if (cs->cr) {
852 		crfree(cs->cr);
853 	}
854 	if (cs->saved_fh.nfs_fh4_val) {
855 		kmem_free(cs->saved_fh.nfs_fh4_val, NFS4_FHSIZE);
856 	}
857 	if (cs->sp) {
858 		rfs4x_session_rele(cs->sp);
859 	}
860 }
861 
862 void
rfs4_grace_start(rfs4_servinst_t * sip)863 rfs4_grace_start(rfs4_servinst_t *sip)
864 {
865 	rw_enter(&sip->rwlock, RW_WRITER);
866 	sip->start_time = nfs_sys_uptime();
867 	sip->grace_period = rfs4_grace_period;
868 	rw_exit(&sip->rwlock);
869 }
870 
871 /*
872  * returns true if the instance's grace period has never been started
873  */
874 int
rfs4_servinst_grace_new(rfs4_servinst_t * sip)875 rfs4_servinst_grace_new(rfs4_servinst_t *sip)
876 {
877 	time_t start_time;
878 
879 	rw_enter(&sip->rwlock, RW_READER);
880 	start_time = sip->start_time;
881 	rw_exit(&sip->rwlock);
882 
883 	return (start_time == 0);
884 }
885 
886 /*
887  * Indicates if server instance is within the
888  * grace period.
889  */
890 int
rfs4_servinst_in_grace(rfs4_servinst_t * sip)891 rfs4_servinst_in_grace(rfs4_servinst_t *sip)
892 {
893 	time_t grace_expiry;
894 
895 	/* All clients called reclaim-complete */
896 	if (sip->nreclaim == 0 || sip->grace_period == 0)
897 		return (0);
898 
899 	rw_enter(&sip->rwlock, RW_READER);
900 	grace_expiry = sip->start_time + sip->grace_period;
901 	rw_exit(&sip->rwlock);
902 
903 	if (nfs_sys_uptime() < grace_expiry)
904 		return (1);
905 
906 	/* Once grace period ends, optimize next calls */
907 	sip->grace_period = 0;
908 	return (0);
909 }
910 
911 int
rfs4_clnt_in_grace(rfs4_client_t * cp)912 rfs4_clnt_in_grace(rfs4_client_t *cp)
913 {
914 	ASSERT(rfs4_dbe_refcnt(cp->rc_dbe) > 0);
915 
916 	return (rfs4_servinst_in_grace(cp->rc_server_instance));
917 }
918 
919 /*
920  * reset all currently active grace periods
921  */
922 void
rfs4_grace_reset_all(nfs4_srv_t * nsrv4)923 rfs4_grace_reset_all(nfs4_srv_t *nsrv4)
924 {
925 	rfs4_servinst_t *sip;
926 
927 	mutex_enter(&nsrv4->servinst_lock);
928 	for (sip = nsrv4->nfs4_cur_servinst; sip != NULL; sip = sip->prev)
929 		if (rfs4_servinst_in_grace(sip))
930 			rfs4_grace_start(sip);
931 	mutex_exit(&nsrv4->servinst_lock);
932 }
933 
934 /*
935  * start any new instances' grace periods
936  */
937 void
rfs4_grace_start_new(nfs4_srv_t * nsrv4)938 rfs4_grace_start_new(nfs4_srv_t *nsrv4)
939 {
940 	rfs4_servinst_t *sip;
941 
942 	mutex_enter(&nsrv4->servinst_lock);
943 	for (sip = nsrv4->nfs4_cur_servinst; sip != NULL; sip = sip->prev)
944 		if (rfs4_servinst_grace_new(sip))
945 			rfs4_grace_start(sip);
946 	mutex_exit(&nsrv4->servinst_lock);
947 }
948 
949 static rfs4_dss_path_t *
rfs4_dss_newpath(nfs4_srv_t * nsrv4,rfs4_servinst_t * sip,char * path,unsigned index)950 rfs4_dss_newpath(nfs4_srv_t *nsrv4, rfs4_servinst_t *sip,
951     char *path, unsigned index)
952 {
953 	size_t len;
954 	rfs4_dss_path_t *dss_path;
955 
956 	dss_path = kmem_alloc(sizeof (rfs4_dss_path_t), KM_SLEEP);
957 
958 	/*
959 	 * Take a copy of the string, since the original may be overwritten.
960 	 * Sadly, no strdup() in the kernel.
961 	 */
962 	/* allow for NUL */
963 	len = strlen(path) + 1;
964 	dss_path->path = kmem_alloc(len, KM_SLEEP);
965 	(void) strlcpy(dss_path->path, path, len);
966 
967 	/* associate with servinst */
968 	dss_path->sip = sip;
969 	dss_path->index = index;
970 
971 	/*
972 	 * Add to list of served paths.
973 	 * No locking required, as we're only ever called at startup.
974 	 */
975 	if (nsrv4->dss_pathlist == NULL) {
976 		/* this is the first dss_path_t */
977 
978 		/* needed for insque/remque */
979 		dss_path->next = dss_path->prev = dss_path;
980 
981 		nsrv4->dss_pathlist = dss_path;
982 	} else {
983 		insque(dss_path, nsrv4->dss_pathlist);
984 	}
985 
986 	return (dss_path);
987 }
988 
989 /*
990  * Create a new server instance, and make it the currently active instance.
991  * Note that starting the grace period too early will reduce the clients'
992  * recovery window.
993  */
994 void
rfs4_servinst_create(nfs4_srv_t * nsrv4,int start_grace,int dss_npaths,char ** dss_paths)995 rfs4_servinst_create(nfs4_srv_t *nsrv4, int start_grace,
996     int dss_npaths, char **dss_paths)
997 {
998 	unsigned i;
999 	rfs4_servinst_t *sip;
1000 	rfs4_oldstate_t *oldstate;
1001 
1002 	sip = kmem_alloc(sizeof (rfs4_servinst_t), KM_SLEEP);
1003 	rw_init(&sip->rwlock, NULL, RW_DEFAULT, NULL);
1004 
1005 	sip->nreclaim = 0;
1006 	sip->start_time = (time_t)0;
1007 	sip->grace_period = (time_t)0;
1008 	sip->next = NULL;
1009 	sip->prev = NULL;
1010 
1011 	rw_init(&sip->oldstate_lock, NULL, RW_DEFAULT, NULL);
1012 	/*
1013 	 * This initial dummy entry is required to setup for insque/remque.
1014 	 * It must be skipped over whenever the list is traversed.
1015 	 */
1016 	oldstate = kmem_alloc(sizeof (rfs4_oldstate_t), KM_SLEEP);
1017 	/* insque/remque require initial list entry to be self-terminated */
1018 	oldstate->next = oldstate;
1019 	oldstate->prev = oldstate;
1020 	sip->oldstate = oldstate;
1021 
1022 
1023 	sip->dss_npaths = dss_npaths;
1024 	sip->dss_paths = kmem_alloc(dss_npaths *
1025 	    sizeof (rfs4_dss_path_t *), KM_SLEEP);
1026 
1027 	for (i = 0; i < dss_npaths; i++) {
1028 		sip->dss_paths[i] =
1029 		    rfs4_dss_newpath(nsrv4, sip, dss_paths[i], i);
1030 	}
1031 
1032 	mutex_enter(&nsrv4->servinst_lock);
1033 	if (nsrv4->nfs4_cur_servinst != NULL) {
1034 		/* add to linked list */
1035 		sip->prev = nsrv4->nfs4_cur_servinst;
1036 		nsrv4->nfs4_cur_servinst->next = sip;
1037 	}
1038 	if (start_grace)
1039 		rfs4_grace_start(sip);
1040 	/* make the new instance "current" */
1041 	nsrv4->nfs4_cur_servinst = sip;
1042 
1043 	mutex_exit(&nsrv4->servinst_lock);
1044 }
1045 
1046 /*
1047  * In future, we might add a rfs4_servinst_destroy(sip) but, for now, destroy
1048  * all instances directly.
1049  */
1050 void
rfs4_servinst_destroy_all(nfs4_srv_t * nsrv4)1051 rfs4_servinst_destroy_all(nfs4_srv_t *nsrv4)
1052 {
1053 	rfs4_servinst_t *sip, *prev, *current;
1054 #ifdef DEBUG
1055 	int n = 0;
1056 #endif
1057 
1058 	mutex_enter(&nsrv4->servinst_lock);
1059 	ASSERT(nsrv4->nfs4_cur_servinst != NULL);
1060 	current = nsrv4->nfs4_cur_servinst;
1061 	nsrv4->nfs4_cur_servinst = NULL;
1062 	for (sip = current; sip != NULL; sip = prev) {
1063 		prev = sip->prev;
1064 		rw_destroy(&sip->rwlock);
1065 		if (sip->oldstate)
1066 			kmem_free(sip->oldstate, sizeof (rfs4_oldstate_t));
1067 		if (sip->dss_paths) {
1068 			int i = sip->dss_npaths;
1069 
1070 			while (i > 0) {
1071 				i--;
1072 				if (sip->dss_paths[i] != NULL) {
1073 					char *path = sip->dss_paths[i]->path;
1074 
1075 					if (path != NULL) {
1076 						kmem_free(path,
1077 						    strlen(path) + 1);
1078 					}
1079 					kmem_free(sip->dss_paths[i],
1080 					    sizeof (rfs4_dss_path_t));
1081 				}
1082 			}
1083 			kmem_free(sip->dss_paths,
1084 			    sip->dss_npaths * sizeof (rfs4_dss_path_t *));
1085 		}
1086 		kmem_free(sip, sizeof (rfs4_servinst_t));
1087 #ifdef DEBUG
1088 		n++;
1089 #endif
1090 	}
1091 	mutex_exit(&nsrv4->servinst_lock);
1092 }
1093 
1094 /*
1095  * Assign the current server instance to a client_t.
1096  * Should be called with cp->rc_dbe held.
1097  */
1098 void
rfs4_servinst_assign(nfs4_srv_t * nsrv4,rfs4_client_t * cp,rfs4_servinst_t * sip)1099 rfs4_servinst_assign(nfs4_srv_t *nsrv4, rfs4_client_t *cp,
1100     rfs4_servinst_t *sip)
1101 {
1102 	ASSERT(rfs4_dbe_refcnt(cp->rc_dbe) > 0);
1103 
1104 	/*
1105 	 * The lock ensures that if the current instance is in the process
1106 	 * of changing, we will see the new one.
1107 	 */
1108 	mutex_enter(&nsrv4->servinst_lock);
1109 	cp->rc_server_instance = sip;
1110 	mutex_exit(&nsrv4->servinst_lock);
1111 }
1112 
1113 rfs4_servinst_t *
rfs4_servinst(rfs4_client_t * cp)1114 rfs4_servinst(rfs4_client_t *cp)
1115 {
1116 	ASSERT(rfs4_dbe_refcnt(cp->rc_dbe) > 0);
1117 
1118 	return (cp->rc_server_instance);
1119 }
1120 
1121 /* ARGSUSED */
1122 static void
nullfree(caddr_t resop)1123 nullfree(caddr_t resop)
1124 {
1125 }
1126 
1127 /*
1128  * This is a fall-through for invalid or not implemented (yet) ops
1129  */
1130 /* ARGSUSED */
1131 static void
rfs4_op_inval(nfs_argop4 * argop,nfs_resop4 * resop,struct svc_req * req,struct compound_state * cs)1132 rfs4_op_inval(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1133     struct compound_state *cs)
1134 {
1135 	*cs->statusp = *((nfsstat4 *)&(resop)->nfs_resop4_u) = NFS4ERR_INVAL;
1136 }
1137 
1138 /*
1139  * Check if the security flavor, nfsnum, is in the flavor_list.
1140  */
1141 bool_t
in_flavor_list(int nfsnum,int * flavor_list,int count)1142 in_flavor_list(int nfsnum, int *flavor_list, int count)
1143 {
1144 	int i;
1145 
1146 	for (i = 0; i < count; i++) {
1147 		if (nfsnum == flavor_list[i])
1148 			return (TRUE);
1149 	}
1150 	return (FALSE);
1151 }
1152 
1153 /*
1154  * Used by rfs4_op_secinfo to get the security information from the
1155  * export structure associated with the component.
1156  */
1157 /* ARGSUSED */
1158 nfsstat4
do_rfs4_op_secinfo(struct compound_state * cs,char * nm,SECINFO4res * resp)1159 do_rfs4_op_secinfo(struct compound_state *cs, char *nm, SECINFO4res *resp)
1160 {
1161 	int error, different_export = 0;
1162 	vnode_t *dvp, *vp;
1163 	struct exportinfo *exi;
1164 	fid_t fid;
1165 	uint_t count, i;
1166 	secinfo4 *resok_val;
1167 	struct secinfo *secp;
1168 	seconfig_t *si;
1169 	bool_t did_traverse = FALSE;
1170 	int dotdot, walk;
1171 	nfs_export_t *ne = nfs_get_export();
1172 
1173 	dvp = cs->vp;
1174 	exi = cs->exi;
1175 	ASSERT(exi != NULL);
1176 	dotdot = (nm[0] == '.' && nm[1] == '.' && nm[2] == '\0');
1177 
1178 	/*
1179 	 * If dotdotting, then need to check whether it's above the
1180 	 * root of a filesystem, or above an export point.
1181 	 */
1182 	if (dotdot) {
1183 		vnode_t *zone_rootvp = ne->exi_root->exi_vp;
1184 
1185 		ASSERT3U(exi->exi_zoneid, ==, ne->exi_root->exi_zoneid);
1186 		/*
1187 		 * If dotdotting at the root of a filesystem, then
1188 		 * need to traverse back to the mounted-on filesystem
1189 		 * and do the dotdot lookup there.
1190 		 */
1191 		if ((dvp->v_flag & VROOT) || VN_CMP(dvp, zone_rootvp)) {
1192 
1193 			/*
1194 			 * If at the system root, then can
1195 			 * go up no further.
1196 			 */
1197 			if (VN_CMP(dvp, zone_rootvp))
1198 				return (puterrno4(ENOENT));
1199 
1200 			/*
1201 			 * Traverse back to the mounted-on filesystem
1202 			 */
1203 			dvp = untraverse(dvp, zone_rootvp);
1204 
1205 			/*
1206 			 * Set the different_export flag so we remember
1207 			 * to pick up a new exportinfo entry for
1208 			 * this new filesystem.
1209 			 */
1210 			different_export = 1;
1211 		} else {
1212 
1213 			/*
1214 			 * If dotdotting above an export point then set
1215 			 * the different_export to get new export info.
1216 			 */
1217 			different_export = nfs_exported(exi, dvp);
1218 		}
1219 	}
1220 
1221 	/*
1222 	 * Get the vnode for the component "nm".
1223 	 */
1224 	error = VOP_LOOKUP(dvp, nm, &vp, NULL, 0, NULL, cs->cr,
1225 	    NULL, NULL, NULL);
1226 	if (error)
1227 		return (puterrno4(error));
1228 
1229 	/*
1230 	 * If the vnode is in a pseudo filesystem, or if the security flavor
1231 	 * used in the request is valid but not an explicitly shared flavor,
1232 	 * or the access bit indicates that this is a limited access,
1233 	 * check whether this vnode is visible.
1234 	 */
1235 	if (!different_export &&
1236 	    (PSEUDO(exi) || !is_exported_sec(cs->nfsflavor, exi) ||
1237 	    cs->access & CS_ACCESS_LIMITED)) {
1238 		if (! nfs_visible(exi, vp, &different_export)) {
1239 			VN_RELE(vp);
1240 			return (puterrno4(ENOENT));
1241 		}
1242 	}
1243 
1244 	/*
1245 	 * If it's a mountpoint, then traverse it.
1246 	 */
1247 	if (vn_ismntpt(vp)) {
1248 		if ((error = traverse(&vp)) != 0) {
1249 			VN_RELE(vp);
1250 			return (puterrno4(error));
1251 		}
1252 		/* remember that we had to traverse mountpoint */
1253 		did_traverse = TRUE;
1254 		different_export = 1;
1255 	} else if (vp->v_vfsp != dvp->v_vfsp) {
1256 		/*
1257 		 * If vp isn't a mountpoint and the vfs ptrs aren't the same,
1258 		 * then vp is probably an LOFS object.  We don't need the
1259 		 * realvp, we just need to know that we might have crossed
1260 		 * a server fs boundary and need to call checkexport4.
1261 		 * (LOFS lookup hides server fs mountpoints, and actually calls
1262 		 * traverse)
1263 		 */
1264 		different_export = 1;
1265 	}
1266 
1267 	/*
1268 	 * Get the export information for it.
1269 	 */
1270 	if (different_export) {
1271 
1272 		bzero(&fid, sizeof (fid));
1273 		fid.fid_len = MAXFIDSZ;
1274 		error = vop_fid_pseudo(vp, &fid);
1275 		if (error) {
1276 			VN_RELE(vp);
1277 			return (puterrno4(error));
1278 		}
1279 
1280 		/* We'll need to reassign "exi". */
1281 		if (dotdot)
1282 			exi = nfs_vptoexi(NULL, vp, cs->cr, &walk, NULL, TRUE);
1283 		else
1284 			exi = checkexport4(&vp->v_vfsp->vfs_fsid, &fid, vp);
1285 
1286 		if (exi == NULL) {
1287 			if (did_traverse == TRUE) {
1288 				/*
1289 				 * If this vnode is a mounted-on vnode,
1290 				 * but the mounted-on file system is not
1291 				 * exported, send back the secinfo for
1292 				 * the exported node that the mounted-on
1293 				 * vnode lives in.
1294 				 */
1295 				exi = cs->exi;
1296 			} else {
1297 				VN_RELE(vp);
1298 				return (puterrno4(EACCES));
1299 			}
1300 		}
1301 	}
1302 	ASSERT(exi != NULL);
1303 
1304 
1305 	/*
1306 	 * Create the secinfo result based on the security information
1307 	 * from the exportinfo structure (exi).
1308 	 *
1309 	 * Return all flavors for a pseudo node.
1310 	 * For a real export node, return the flavor that the client
1311 	 * has access with.
1312 	 */
1313 	ASSERT(RW_LOCK_HELD(&ne->exported_lock));
1314 	if (PSEUDO(exi)) {
1315 		count = exi->exi_export.ex_seccnt; /* total sec count */
1316 		resok_val = kmem_alloc(count * sizeof (secinfo4), KM_SLEEP);
1317 		secp = exi->exi_export.ex_secinfo;
1318 
1319 		for (i = 0; i < count; i++) {
1320 			si = &secp[i].s_secinfo;
1321 			resok_val[i].flavor = si->sc_rpcnum;
1322 			if (resok_val[i].flavor == RPCSEC_GSS) {
1323 				rpcsec_gss_info *info;
1324 
1325 				info = &resok_val[i].flavor_info;
1326 				info->qop = si->sc_qop;
1327 				info->service = (rpc_gss_svc_t)si->sc_service;
1328 
1329 				/* get oid opaque data */
1330 				info->oid.sec_oid4_len =
1331 				    si->sc_gss_mech_type->length;
1332 				info->oid.sec_oid4_val = kmem_alloc(
1333 				    si->sc_gss_mech_type->length, KM_SLEEP);
1334 				bcopy(
1335 				    si->sc_gss_mech_type->elements,
1336 				    info->oid.sec_oid4_val,
1337 				    info->oid.sec_oid4_len);
1338 			}
1339 		}
1340 		resp->SECINFO4resok_len = count;
1341 		resp->SECINFO4resok_val = resok_val;
1342 	} else {
1343 		int ret_cnt = 0, k = 0;
1344 		int *flavor_list;
1345 
1346 		count = exi->exi_export.ex_seccnt; /* total sec count */
1347 		secp = exi->exi_export.ex_secinfo;
1348 
1349 		flavor_list = kmem_alloc(count * sizeof (int), KM_SLEEP);
1350 		/* find out which flavors to return */
1351 		for (i = 0; i < count; i ++) {
1352 			int access, flavor, perm;
1353 
1354 			flavor = secp[i].s_secinfo.sc_nfsnum;
1355 			perm = secp[i].s_flags;
1356 
1357 			access = nfsauth4_secinfo_access(exi, cs->req,
1358 			    flavor, perm, cs->basecr);
1359 
1360 			if (! (access & NFSAUTH_DENIED) &&
1361 			    ! (access & NFSAUTH_WRONGSEC)) {
1362 				flavor_list[ret_cnt] = flavor;
1363 				ret_cnt++;
1364 			}
1365 		}
1366 
1367 		/* Create the returning SECINFO value */
1368 		resok_val = kmem_alloc(ret_cnt * sizeof (secinfo4), KM_SLEEP);
1369 
1370 		for (i = 0; i < count; i++) {
1371 			/*
1372 			 * If the flavor is in the flavor list,
1373 			 * fill in resok_val.
1374 			 */
1375 			si = &secp[i].s_secinfo;
1376 			if (in_flavor_list(si->sc_nfsnum,
1377 			    flavor_list, ret_cnt)) {
1378 				resok_val[k].flavor = si->sc_rpcnum;
1379 				if (resok_val[k].flavor == RPCSEC_GSS) {
1380 					rpcsec_gss_info *info;
1381 
1382 					info = &resok_val[k].flavor_info;
1383 					info->qop = si->sc_qop;
1384 					info->service = (rpc_gss_svc_t)
1385 					    si->sc_service;
1386 
1387 					/* get oid opaque data */
1388 					info->oid.sec_oid4_len =
1389 					    si->sc_gss_mech_type->length;
1390 					info->oid.sec_oid4_val = kmem_alloc(
1391 					    si->sc_gss_mech_type->length,
1392 					    KM_SLEEP);
1393 					bcopy(si->sc_gss_mech_type->elements,
1394 					    info->oid.sec_oid4_val,
1395 					    info->oid.sec_oid4_len);
1396 				}
1397 				k++;
1398 			}
1399 			if (k >= ret_cnt)
1400 				break;
1401 		}
1402 		resp->SECINFO4resok_len = ret_cnt;
1403 		resp->SECINFO4resok_val = resok_val;
1404 		kmem_free(flavor_list, count * sizeof (int));
1405 	}
1406 
1407 	VN_RELE(vp);
1408 	return (NFS4_OK);
1409 }
1410 
1411 /*
1412  * SECINFO (Operation 33): Obtain required security information on
1413  * the component name in the format of (security-mechanism-oid, qop, service)
1414  * triplets.
1415  */
1416 /* ARGSUSED */
1417 static void
rfs4_op_secinfo(nfs_argop4 * argop,nfs_resop4 * resop,struct svc_req * req,struct compound_state * cs)1418 rfs4_op_secinfo(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1419     struct compound_state *cs)
1420 {
1421 	SECINFO4args *args = &argop->nfs_argop4_u.opsecinfo;
1422 	SECINFO4res *resp = &resop->nfs_resop4_u.opsecinfo;
1423 	utf8string *utfnm = &args->name;
1424 	uint_t len;
1425 	char *nm;
1426 	struct sockaddr *ca;
1427 	char *name = NULL;
1428 	nfsstat4 status = NFS4_OK;
1429 
1430 	DTRACE_NFSV4_2(op__secinfo__start, struct compound_state *, cs,
1431 	    SECINFO4args *, args);
1432 
1433 	/*
1434 	 * Current file handle (cfh) should have been set before getting
1435 	 * into this function. If not, return error.
1436 	 */
1437 	if (cs->vp == NULL) {
1438 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
1439 		goto out;
1440 	}
1441 
1442 	if (cs->vp->v_type != VDIR) {
1443 		*cs->statusp = resp->status = NFS4ERR_NOTDIR;
1444 		goto out;
1445 	}
1446 
1447 	/*
1448 	 * Verify the component name. If failed, error out, but
1449 	 * do not error out if the component name is a "..".
1450 	 * SECINFO will return its parents secinfo data for SECINFO "..".
1451 	 */
1452 	status = utf8_dir_verify(utfnm);
1453 	if (status != NFS4_OK) {
1454 		if (utfnm->utf8string_len != 2 ||
1455 		    utfnm->utf8string_val[0] != '.' ||
1456 		    utfnm->utf8string_val[1] != '.') {
1457 			*cs->statusp = resp->status = status;
1458 			goto out;
1459 		}
1460 	}
1461 
1462 	nm = utf8_to_str(utfnm, &len, NULL);
1463 	if (nm == NULL) {
1464 		*cs->statusp = resp->status = NFS4ERR_INVAL;
1465 		goto out;
1466 	}
1467 
1468 	if (len > MAXNAMELEN) {
1469 		*cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
1470 		kmem_free(nm, len);
1471 		goto out;
1472 	}
1473 
1474 	ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
1475 	name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
1476 	    MAXPATHLEN  + 1);
1477 
1478 	if (name == NULL) {
1479 		*cs->statusp = resp->status = NFS4ERR_INVAL;
1480 		kmem_free(nm, len);
1481 		goto out;
1482 	}
1483 
1484 	*cs->statusp = resp->status = do_rfs4_op_secinfo(cs, name, resp);
1485 
1486 	if (resp->status == NFS4_OK && rfs4_has_session(cs)) {
1487 		/*
1488 		 * See rfc 5661 section 2.6.3.1.1.8 and 18.29.3
1489 		 *
1490 		 * 2.6.3.1.1.8
1491 		 *	SECINFO and SECINFO_NO_NAME consume the current
1492 		 *	filehandle (note that this is a change from NFSv4.0).
1493 		 *
1494 		 * 18.29.3
1495 		 *	On success, the current filehandle is consumed (see
1496 		 *	Section 2.6.3.1.1.8), and if the next operation after
1497 		 *	SECINFO tries to use the current filehandle, that
1498 		 *	operation will fail with the status
1499 		 *	NFS4ERR_NOFILEHANDLE.
1500 		 */
1501 		VN_RELE(cs->vp);
1502 		cs->vp = NULL;
1503 	}
1504 
1505 	if (name != nm)
1506 		kmem_free(name, MAXPATHLEN + 1);
1507 	kmem_free(nm, len);
1508 
1509 out:
1510 	DTRACE_NFSV4_2(op__secinfo__done, struct compound_state *, cs,
1511 	    SECINFO4res *, resp);
1512 }
1513 
1514 /*
1515  * Free SECINFO result.
1516  */
1517 /* ARGSUSED */
1518 static void
rfs4_op_secinfo_free(nfs_resop4 * resop)1519 rfs4_op_secinfo_free(nfs_resop4 *resop)
1520 {
1521 	SECINFO4res *resp = &resop->nfs_resop4_u.opsecinfo;
1522 	int count, i;
1523 	secinfo4 *resok_val;
1524 
1525 	/* If this is not an Ok result, nothing to free. */
1526 	if (resp->status != NFS4_OK) {
1527 		return;
1528 	}
1529 
1530 	count = resp->SECINFO4resok_len;
1531 	resok_val = resp->SECINFO4resok_val;
1532 
1533 	for (i = 0; i < count; i++) {
1534 		if (resok_val[i].flavor == RPCSEC_GSS) {
1535 			rpcsec_gss_info *info;
1536 
1537 			info = &resok_val[i].flavor_info;
1538 			kmem_free(info->oid.sec_oid4_val,
1539 			    info->oid.sec_oid4_len);
1540 		}
1541 	}
1542 	kmem_free(resok_val, count * sizeof (secinfo4));
1543 	resp->SECINFO4resok_len = 0;
1544 	resp->SECINFO4resok_val = NULL;
1545 }
1546 
1547 /* ARGSUSED */
1548 static void
rfs4_op_access(nfs_argop4 * argop,nfs_resop4 * resop,struct svc_req * req,struct compound_state * cs)1549 rfs4_op_access(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1550     struct compound_state *cs)
1551 {
1552 	ACCESS4args *args = &argop->nfs_argop4_u.opaccess;
1553 	ACCESS4res *resp = &resop->nfs_resop4_u.opaccess;
1554 	int error;
1555 	vnode_t *vp;
1556 	struct vattr va;
1557 	int checkwriteperm;
1558 	cred_t *cr = cs->cr;
1559 	bslabel_t *clabel, *slabel;
1560 	ts_label_t *tslabel;
1561 	boolean_t admin_low_client;
1562 
1563 	DTRACE_NFSV4_2(op__access__start, struct compound_state *, cs,
1564 	    ACCESS4args *, args);
1565 
1566 #if 0	/* XXX allow access even if !cs->access. Eventually only pseudo fs */
1567 	if (cs->access == CS_ACCESS_DENIED) {
1568 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
1569 		goto out;
1570 	}
1571 #endif
1572 	if (cs->vp == NULL) {
1573 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
1574 		goto out;
1575 	}
1576 
1577 	ASSERT(cr != NULL);
1578 
1579 	vp = cs->vp;
1580 
1581 	/*
1582 	 * If the file system is exported read only, it is not appropriate
1583 	 * to check write permissions for regular files and directories.
1584 	 * Special files are interpreted by the client, so the underlying
1585 	 * permissions are sent back to the client for interpretation.
1586 	 */
1587 	if (rdonly4(req, cs) &&
1588 	    (vp->v_type == VREG || vp->v_type == VDIR))
1589 		checkwriteperm = 0;
1590 	else
1591 		checkwriteperm = 1;
1592 
1593 	/*
1594 	 * XXX
1595 	 * We need the mode so that we can correctly determine access
1596 	 * permissions relative to a mandatory lock file.  Access to
1597 	 * mandatory lock files is denied on the server, so it might
1598 	 * as well be reflected to the server during the open.
1599 	 */
1600 	va.va_mask = AT_MODE;
1601 	error = VOP_GETATTR(vp, &va, 0, cr, NULL);
1602 	if (error) {
1603 		*cs->statusp = resp->status = puterrno4(error);
1604 		goto out;
1605 	}
1606 	resp->access = 0;
1607 	resp->supported = 0;
1608 
1609 	if (is_system_labeled()) {
1610 		ASSERT(req->rq_label != NULL);
1611 		clabel = req->rq_label;
1612 		DTRACE_PROBE2(tx__rfs4__log__info__opaccess__clabel, char *,
1613 		    "got client label from request(1)",
1614 		    struct svc_req *, req);
1615 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
1616 			if ((tslabel = nfs_getflabel(vp, cs->exi)) == NULL) {
1617 				*cs->statusp = resp->status = puterrno4(EACCES);
1618 				goto out;
1619 			}
1620 			slabel = label2bslabel(tslabel);
1621 			DTRACE_PROBE3(tx__rfs4__log__info__opaccess__slabel,
1622 			    char *, "got server label(1) for vp(2)",
1623 			    bslabel_t *, slabel, vnode_t *, vp);
1624 
1625 			admin_low_client = B_FALSE;
1626 		} else
1627 			admin_low_client = B_TRUE;
1628 	}
1629 
1630 	if (args->access & ACCESS4_READ) {
1631 		error = VOP_ACCESS(vp, VREAD, 0, cr, NULL);
1632 		if (!error && !MANDLOCK(vp, va.va_mode) &&
1633 		    (!is_system_labeled() || admin_low_client ||
1634 		    bldominates(clabel, slabel)))
1635 			resp->access |= ACCESS4_READ;
1636 		resp->supported |= ACCESS4_READ;
1637 	}
1638 	if ((args->access & ACCESS4_LOOKUP) && vp->v_type == VDIR) {
1639 		error = VOP_ACCESS(vp, VEXEC, 0, cr, NULL);
1640 		if (!error && (!is_system_labeled() || admin_low_client ||
1641 		    bldominates(clabel, slabel)))
1642 			resp->access |= ACCESS4_LOOKUP;
1643 		resp->supported |= ACCESS4_LOOKUP;
1644 	}
1645 	if (checkwriteperm &&
1646 	    (args->access & (ACCESS4_MODIFY|ACCESS4_EXTEND))) {
1647 		error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL);
1648 		if (!error && !MANDLOCK(vp, va.va_mode) &&
1649 		    (!is_system_labeled() || admin_low_client ||
1650 		    blequal(clabel, slabel)))
1651 			resp->access |=
1652 			    (args->access & (ACCESS4_MODIFY | ACCESS4_EXTEND));
1653 		resp->supported |=
1654 		    resp->access & (ACCESS4_MODIFY | ACCESS4_EXTEND);
1655 	}
1656 
1657 	if (checkwriteperm &&
1658 	    (args->access & ACCESS4_DELETE) && vp->v_type == VDIR) {
1659 		error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL);
1660 		if (!error && (!is_system_labeled() || admin_low_client ||
1661 		    blequal(clabel, slabel)))
1662 			resp->access |= ACCESS4_DELETE;
1663 		resp->supported |= ACCESS4_DELETE;
1664 	}
1665 	if (args->access & ACCESS4_EXECUTE && vp->v_type != VDIR) {
1666 		error = VOP_ACCESS(vp, VEXEC, 0, cr, NULL);
1667 		if (!error && !MANDLOCK(vp, va.va_mode) &&
1668 		    (!is_system_labeled() || admin_low_client ||
1669 		    bldominates(clabel, slabel)))
1670 			resp->access |= ACCESS4_EXECUTE;
1671 		resp->supported |= ACCESS4_EXECUTE;
1672 	}
1673 
1674 	if (is_system_labeled() && !admin_low_client)
1675 		label_rele(tslabel);
1676 
1677 	*cs->statusp = resp->status = NFS4_OK;
1678 out:
1679 	DTRACE_NFSV4_2(op__access__done, struct compound_state *, cs,
1680 	    ACCESS4res *, resp);
1681 }
1682 
1683 /* ARGSUSED */
1684 static void
rfs4_op_commit(nfs_argop4 * argop,nfs_resop4 * resop,struct svc_req * req,struct compound_state * cs)1685 rfs4_op_commit(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1686     struct compound_state *cs)
1687 {
1688 	COMMIT4args *args = &argop->nfs_argop4_u.opcommit;
1689 	COMMIT4res *resp = &resop->nfs_resop4_u.opcommit;
1690 	int error;
1691 	vnode_t *vp = cs->vp;
1692 	cred_t *cr = cs->cr;
1693 	vattr_t va;
1694 	nfs4_srv_t *nsrv4;
1695 
1696 	DTRACE_NFSV4_2(op__commit__start, struct compound_state *, cs,
1697 	    COMMIT4args *, args);
1698 
1699 	if (vp == NULL) {
1700 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
1701 		goto out;
1702 	}
1703 	if (cs->access == CS_ACCESS_DENIED) {
1704 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
1705 		goto out;
1706 	}
1707 
1708 	if (args->offset + args->count < args->offset) {
1709 		*cs->statusp = resp->status = NFS4ERR_INVAL;
1710 		goto out;
1711 	}
1712 
1713 	va.va_mask = AT_UID;
1714 	error = VOP_GETATTR(vp, &va, 0, cr, NULL);
1715 
1716 	/*
1717 	 * If we can't get the attributes, then we can't do the
1718 	 * right access checking.  So, we'll fail the request.
1719 	 */
1720 	if (error) {
1721 		*cs->statusp = resp->status = puterrno4(error);
1722 		goto out;
1723 	}
1724 	if (rdonly4(req, cs)) {
1725 		*cs->statusp = resp->status = NFS4ERR_ROFS;
1726 		goto out;
1727 	}
1728 
1729 	if (vp->v_type != VREG) {
1730 		if (vp->v_type == VDIR)
1731 			resp->status = NFS4ERR_ISDIR;
1732 		else
1733 			resp->status = NFS4ERR_INVAL;
1734 		*cs->statusp = resp->status;
1735 		goto out;
1736 	}
1737 
1738 	if (crgetuid(cr) != va.va_uid &&
1739 	    (error = VOP_ACCESS(vp, VWRITE, 0, cs->cr, NULL))) {
1740 		*cs->statusp = resp->status = puterrno4(error);
1741 		goto out;
1742 	}
1743 
1744 	error = VOP_FSYNC(vp, FSYNC, cr, NULL);
1745 
1746 	if (error) {
1747 		*cs->statusp = resp->status = puterrno4(error);
1748 		goto out;
1749 	}
1750 
1751 	nsrv4 = nfs4_get_srv();
1752 	*cs->statusp = resp->status = NFS4_OK;
1753 	resp->writeverf = nsrv4->write4verf;
1754 out:
1755 	DTRACE_NFSV4_2(op__commit__done, struct compound_state *, cs,
1756 	    COMMIT4res *, resp);
1757 }
1758 
1759 /*
1760  * rfs4_op_mknod is called from rfs4_op_create after all initial verification
1761  * was completed. It does the nfsv4 create for special files.
1762  */
1763 /* ARGSUSED */
1764 static vnode_t *
do_rfs4_op_mknod(CREATE4args * args,CREATE4res * resp,struct svc_req * req,struct compound_state * cs,vattr_t * vap,char * nm)1765 do_rfs4_op_mknod(CREATE4args *args, CREATE4res *resp, struct svc_req *req,
1766     struct compound_state *cs, vattr_t *vap, char *nm)
1767 {
1768 	int error;
1769 	cred_t *cr = cs->cr;
1770 	vnode_t *dvp = cs->vp;
1771 	vnode_t *vp = NULL;
1772 	int mode;
1773 	enum vcexcl excl;
1774 
1775 	switch (args->type) {
1776 	case NF4CHR:
1777 	case NF4BLK:
1778 		if (secpolicy_sys_devices(cr) != 0) {
1779 			*cs->statusp = resp->status = NFS4ERR_PERM;
1780 			return (NULL);
1781 		}
1782 		if (args->type == NF4CHR)
1783 			vap->va_type = VCHR;
1784 		else
1785 			vap->va_type = VBLK;
1786 		vap->va_rdev = makedevice(args->ftype4_u.devdata.specdata1,
1787 		    args->ftype4_u.devdata.specdata2);
1788 		vap->va_mask |= AT_RDEV;
1789 		break;
1790 	case NF4SOCK:
1791 		vap->va_type = VSOCK;
1792 		break;
1793 	case NF4FIFO:
1794 		vap->va_type = VFIFO;
1795 		break;
1796 	default:
1797 		*cs->statusp = resp->status = NFS4ERR_BADTYPE;
1798 		return (NULL);
1799 	}
1800 
1801 	/*
1802 	 * Must specify the mode.
1803 	 */
1804 	if (!(vap->va_mask & AT_MODE)) {
1805 		*cs->statusp = resp->status = NFS4ERR_INVAL;
1806 		return (NULL);
1807 	}
1808 
1809 	excl = EXCL;
1810 
1811 	mode = 0;
1812 
1813 	error = VOP_CREATE(dvp, nm, vap, excl, mode, &vp, cr, 0, NULL, NULL);
1814 	if (error) {
1815 		*cs->statusp = resp->status = puterrno4(error);
1816 		return (NULL);
1817 	}
1818 	return (vp);
1819 }
1820 
1821 /*
1822  * nfsv4 create is used to create non-regular files. For regular files,
1823  * use nfsv4 open.
1824  */
1825 /* ARGSUSED */
1826 static void
rfs4_op_create(nfs_argop4 * argop,nfs_resop4 * resop,struct svc_req * req,struct compound_state * cs)1827 rfs4_op_create(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1828     struct compound_state *cs)
1829 {
1830 	CREATE4args *args = &argop->nfs_argop4_u.opcreate;
1831 	CREATE4res *resp = &resop->nfs_resop4_u.opcreate;
1832 	int error;
1833 	struct vattr bva, iva, iva2, ava, *vap;
1834 	cred_t *cr = cs->cr;
1835 	vnode_t *dvp = cs->vp;
1836 	vnode_t *vp = NULL;
1837 	vnode_t *realvp;
1838 	char *nm, *lnm;
1839 	uint_t len, llen;
1840 	int syncval = 0;
1841 	struct nfs4_svgetit_arg sarg;
1842 	struct nfs4_ntov_table ntov;
1843 	struct statvfs64 sb;
1844 	nfsstat4 status;
1845 	struct sockaddr *ca;
1846 	char *name = NULL;
1847 	char *lname = NULL;
1848 
1849 	DTRACE_NFSV4_2(op__create__start, struct compound_state *, cs,
1850 	    CREATE4args *, args);
1851 
1852 	resp->attrset = 0;
1853 
1854 	if (dvp == NULL) {
1855 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
1856 		goto out;
1857 	}
1858 
1859 	/*
1860 	 * If there is an unshared filesystem mounted on this vnode,
1861 	 * do not allow to create an object in this directory.
1862 	 */
1863 	if (vn_ismntpt(dvp)) {
1864 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
1865 		goto out;
1866 	}
1867 
1868 	/* Verify that type is correct */
1869 	switch (args->type) {
1870 	case NF4LNK:
1871 	case NF4BLK:
1872 	case NF4CHR:
1873 	case NF4SOCK:
1874 	case NF4FIFO:
1875 	case NF4DIR:
1876 		break;
1877 	default:
1878 		*cs->statusp = resp->status = NFS4ERR_BADTYPE;
1879 		goto out;
1880 	};
1881 
1882 	if (cs->access == CS_ACCESS_DENIED) {
1883 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
1884 		goto out;
1885 	}
1886 	if (dvp->v_type != VDIR) {
1887 		*cs->statusp = resp->status = NFS4ERR_NOTDIR;
1888 		goto out;
1889 	}
1890 	status = utf8_dir_verify(&args->objname);
1891 	if (status != NFS4_OK) {
1892 		*cs->statusp = resp->status = status;
1893 		goto out;
1894 	}
1895 
1896 	if (rdonly4(req, cs)) {
1897 		*cs->statusp = resp->status = NFS4ERR_ROFS;
1898 		goto out;
1899 	}
1900 
1901 	/*
1902 	 * Name of newly created object
1903 	 */
1904 	nm = utf8_to_fn(&args->objname, &len, NULL);
1905 	if (nm == NULL) {
1906 		*cs->statusp = resp->status = NFS4ERR_INVAL;
1907 		goto out;
1908 	}
1909 
1910 	if (len > MAXNAMELEN) {
1911 		*cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
1912 		kmem_free(nm, len);
1913 		goto out;
1914 	}
1915 
1916 	ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
1917 	name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
1918 	    MAXPATHLEN  + 1);
1919 
1920 	if (name == NULL) {
1921 		*cs->statusp = resp->status = NFS4ERR_INVAL;
1922 		kmem_free(nm, len);
1923 		goto out;
1924 	}
1925 
1926 	resp->attrset = 0;
1927 
1928 	sarg.sbp = &sb;
1929 	sarg.is_referral = B_FALSE;
1930 	nfs4_ntov_table_init(&ntov);
1931 
1932 	status = do_rfs4_set_attrs(&resp->attrset,
1933 	    &args->createattrs, cs, &sarg, &ntov, NFS4ATTR_SETIT);
1934 
1935 	if (sarg.vap->va_mask == 0 && status == NFS4_OK)
1936 		status = NFS4ERR_INVAL;
1937 
1938 	if (status != NFS4_OK) {
1939 		*cs->statusp = resp->status = status;
1940 		if (name != nm)
1941 			kmem_free(name, MAXPATHLEN + 1);
1942 		kmem_free(nm, len);
1943 		nfs4_ntov_table_free(&ntov, &sarg);
1944 		resp->attrset = 0;
1945 		goto out;
1946 	}
1947 
1948 	/* Get "before" change value */
1949 	bva.va_mask = AT_CTIME|AT_SEQ|AT_MODE;
1950 	error = VOP_GETATTR(dvp, &bva, 0, cr, NULL);
1951 	if (error) {
1952 		*cs->statusp = resp->status = puterrno4(error);
1953 		if (name != nm)
1954 			kmem_free(name, MAXPATHLEN + 1);
1955 		kmem_free(nm, len);
1956 		nfs4_ntov_table_free(&ntov, &sarg);
1957 		resp->attrset = 0;
1958 		goto out;
1959 	}
1960 	NFS4_SET_FATTR4_CHANGE(resp->cinfo.before, bva.va_ctime)
1961 
1962 	vap = sarg.vap;
1963 
1964 	/*
1965 	 * Set the default initial values for attributes when the parent
1966 	 * directory does not have the VSUID/VSGID bit set and they have
1967 	 * not been specified in createattrs.
1968 	 */
1969 	if (!(bva.va_mode & VSUID) && (vap->va_mask & AT_UID) == 0) {
1970 		vap->va_uid = crgetuid(cr);
1971 		vap->va_mask |= AT_UID;
1972 	}
1973 	if (!(bva.va_mode & VSGID) && (vap->va_mask & AT_GID) == 0) {
1974 		vap->va_gid = crgetgid(cr);
1975 		vap->va_mask |= AT_GID;
1976 	}
1977 
1978 	vap->va_mask |= AT_TYPE;
1979 	switch (args->type) {
1980 	case NF4DIR:
1981 		vap->va_type = VDIR;
1982 		if ((vap->va_mask & AT_MODE) == 0) {
1983 			vap->va_mode = 0700;	/* default: owner rwx only */
1984 			vap->va_mask |= AT_MODE;
1985 		}
1986 		error = VOP_MKDIR(dvp, name, vap, &vp, cr, NULL, 0, NULL);
1987 		if (error)
1988 			break;
1989 
1990 		/*
1991 		 * Get the initial "after" sequence number, if it fails,
1992 		 * set to zero
1993 		 */
1994 		iva.va_mask = AT_SEQ;
1995 		if (VOP_GETATTR(dvp, &iva, 0, cs->cr, NULL))
1996 			iva.va_seq = 0;
1997 		break;
1998 	case NF4LNK:
1999 		vap->va_type = VLNK;
2000 		if ((vap->va_mask & AT_MODE) == 0) {
2001 			vap->va_mode = 0700;	/* default: owner rwx only */
2002 			vap->va_mask |= AT_MODE;
2003 		}
2004 
2005 		/*
2006 		 * symlink names must be treated as data
2007 		 */
2008 		lnm = utf8_to_str((utf8string *)&args->ftype4_u.linkdata,
2009 		    &llen, NULL);
2010 
2011 		if (lnm == NULL) {
2012 			*cs->statusp = resp->status = NFS4ERR_INVAL;
2013 			if (name != nm)
2014 				kmem_free(name, MAXPATHLEN + 1);
2015 			kmem_free(nm, len);
2016 			nfs4_ntov_table_free(&ntov, &sarg);
2017 			resp->attrset = 0;
2018 			goto out;
2019 		}
2020 
2021 		if (llen > MAXPATHLEN) {
2022 			*cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
2023 			if (name != nm)
2024 				kmem_free(name, MAXPATHLEN + 1);
2025 			kmem_free(nm, len);
2026 			kmem_free(lnm, llen);
2027 			nfs4_ntov_table_free(&ntov, &sarg);
2028 			resp->attrset = 0;
2029 			goto out;
2030 		}
2031 
2032 		lname = nfscmd_convname(ca, cs->exi, lnm,
2033 		    NFSCMD_CONV_INBOUND, MAXPATHLEN  + 1);
2034 
2035 		if (lname == NULL) {
2036 			*cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
2037 			if (name != nm)
2038 				kmem_free(name, MAXPATHLEN + 1);
2039 			kmem_free(nm, len);
2040 			kmem_free(lnm, llen);
2041 			nfs4_ntov_table_free(&ntov, &sarg);
2042 			resp->attrset = 0;
2043 			goto out;
2044 		}
2045 
2046 		error = VOP_SYMLINK(dvp, name, vap, lname, cr, NULL, 0);
2047 		if (lname != lnm)
2048 			kmem_free(lname, MAXPATHLEN + 1);
2049 		kmem_free(lnm, llen);
2050 		if (error)
2051 			break;
2052 
2053 		/*
2054 		 * Get the initial "after" sequence number, if it fails,
2055 		 * set to zero
2056 		 */
2057 		iva.va_mask = AT_SEQ;
2058 		if (VOP_GETATTR(dvp, &iva, 0, cs->cr, NULL))
2059 			iva.va_seq = 0;
2060 
2061 		error = VOP_LOOKUP(dvp, name, &vp, NULL, 0, NULL, cr,
2062 		    NULL, NULL, NULL);
2063 		if (error)
2064 			break;
2065 
2066 		/*
2067 		 * va_seq is not safe over VOP calls, check it again
2068 		 * if it has changed zero out iva to force atomic = FALSE.
2069 		 */
2070 		iva2.va_mask = AT_SEQ;
2071 		if (VOP_GETATTR(dvp, &iva2, 0, cs->cr, NULL) ||
2072 		    iva2.va_seq != iva.va_seq)
2073 			iva.va_seq = 0;
2074 		break;
2075 	default:
2076 		/*
2077 		 * probably a special file.
2078 		 */
2079 		if ((vap->va_mask & AT_MODE) == 0) {
2080 			vap->va_mode = 0600;	/* default: owner rw only */
2081 			vap->va_mask |= AT_MODE;
2082 		}
2083 		syncval = FNODSYNC;
2084 		/*
2085 		 * We know this will only generate one VOP call
2086 		 */
2087 		vp = do_rfs4_op_mknod(args, resp, req, cs, vap, name);
2088 
2089 		if (vp == NULL) {
2090 			if (name != nm)
2091 				kmem_free(name, MAXPATHLEN + 1);
2092 			kmem_free(nm, len);
2093 			nfs4_ntov_table_free(&ntov, &sarg);
2094 			resp->attrset = 0;
2095 			goto out;
2096 		}
2097 
2098 		/*
2099 		 * Get the initial "after" sequence number, if it fails,
2100 		 * set to zero
2101 		 */
2102 		iva.va_mask = AT_SEQ;
2103 		if (VOP_GETATTR(dvp, &iva, 0, cs->cr, NULL))
2104 			iva.va_seq = 0;
2105 
2106 		break;
2107 	}
2108 	if (name != nm)
2109 		kmem_free(name, MAXPATHLEN + 1);
2110 	kmem_free(nm, len);
2111 
2112 	if (error) {
2113 		*cs->statusp = resp->status = puterrno4(error);
2114 	}
2115 
2116 	/*
2117 	 * Force modified data and metadata out to stable storage.
2118 	 */
2119 	(void) VOP_FSYNC(dvp, 0, cr, NULL);
2120 
2121 	if (resp->status != NFS4_OK) {
2122 		if (vp != NULL)
2123 			VN_RELE(vp);
2124 		nfs4_ntov_table_free(&ntov, &sarg);
2125 		resp->attrset = 0;
2126 		goto out;
2127 	}
2128 
2129 	/*
2130 	 * Finish setup of cinfo response, "before" value already set.
2131 	 * Get "after" change value, if it fails, simply return the
2132 	 * before value.
2133 	 */
2134 	ava.va_mask = AT_CTIME|AT_SEQ;
2135 	if (VOP_GETATTR(dvp, &ava, 0, cr, NULL)) {
2136 		ava.va_ctime = bva.va_ctime;
2137 		ava.va_seq = 0;
2138 	}
2139 	NFS4_SET_FATTR4_CHANGE(resp->cinfo.after, ava.va_ctime);
2140 
2141 	/*
2142 	 * True verification that object was created with correct
2143 	 * attrs is impossible.  The attrs could have been changed
2144 	 * immediately after object creation.  If attributes did
2145 	 * not verify, the only recourse for the server is to
2146 	 * destroy the object.  Maybe if some attrs (like gid)
2147 	 * are set incorrectly, the object should be destroyed;
2148 	 * however, seems bad as a default policy.  Do we really
2149 	 * want to destroy an object over one of the times not
2150 	 * verifying correctly?  For these reasons, the server
2151 	 * currently sets bits in attrset for createattrs
2152 	 * that were set; however, no verification is done.
2153 	 *
2154 	 * vmask_to_nmask accounts for vattr bits set on create
2155 	 *	[do_rfs4_set_attrs() only sets resp bits for
2156 	 *	 non-vattr/vfs bits.]
2157 	 * Mask off any bits set by default so as not to return
2158 	 * more attrset bits than were requested in createattrs
2159 	 */
2160 	nfs4_vmask_to_nmask(sarg.vap->va_mask, &resp->attrset);
2161 	resp->attrset &= args->createattrs.attrmask;
2162 	nfs4_ntov_table_free(&ntov, &sarg);
2163 
2164 	error = makefh4(&cs->fh, vp, cs->exi);
2165 	if (error) {
2166 		*cs->statusp = resp->status = puterrno4(error);
2167 	}
2168 
2169 	/*
2170 	 * The cinfo.atomic = TRUE only if we got no errors, we have
2171 	 * non-zero va_seq's, and it has incremented by exactly one
2172 	 * during the creation and it didn't change during the VOP_LOOKUP
2173 	 * or VOP_FSYNC.
2174 	 */
2175 	if (!error && bva.va_seq && iva.va_seq && ava.va_seq &&
2176 	    iva.va_seq == (bva.va_seq + 1) && iva.va_seq == ava.va_seq)
2177 		resp->cinfo.atomic = TRUE;
2178 	else
2179 		resp->cinfo.atomic = FALSE;
2180 
2181 	/*
2182 	 * Force modified metadata out to stable storage.
2183 	 *
2184 	 * if a underlying vp exists, pass it to VOP_FSYNC
2185 	 */
2186 	if (VOP_REALVP(vp, &realvp, NULL) == 0)
2187 		(void) VOP_FSYNC(realvp, syncval, cr, NULL);
2188 	else
2189 		(void) VOP_FSYNC(vp, syncval, cr, NULL);
2190 
2191 	if (resp->status != NFS4_OK) {
2192 		VN_RELE(vp);
2193 		goto out;
2194 	}
2195 	if (cs->vp)
2196 		VN_RELE(cs->vp);
2197 
2198 	cs->vp = vp;
2199 	*cs->statusp = resp->status = NFS4_OK;
2200 out:
2201 	DTRACE_NFSV4_2(op__create__done, struct compound_state *, cs,
2202 	    CREATE4res *, resp);
2203 }
2204 
2205 /*ARGSUSED*/
2206 static void
rfs4_op_delegpurge(nfs_argop4 * argop,nfs_resop4 * resop,struct svc_req * req,struct compound_state * cs)2207 rfs4_op_delegpurge(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2208     struct compound_state *cs)
2209 {
2210 	DTRACE_NFSV4_2(op__delegpurge__start, struct compound_state *, cs,
2211 	    DELEGPURGE4args *, &argop->nfs_argop4_u.opdelegpurge);
2212 
2213 	rfs4_op_inval(argop, resop, req, cs);
2214 
2215 	DTRACE_NFSV4_2(op__delegpurge__done, struct compound_state *, cs,
2216 	    DELEGPURGE4res *, &resop->nfs_resop4_u.opdelegpurge);
2217 }
2218 
2219 /*ARGSUSED*/
2220 static void
rfs4_op_delegreturn(nfs_argop4 * argop,nfs_resop4 * resop,struct svc_req * req,struct compound_state * cs)2221 rfs4_op_delegreturn(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2222     struct compound_state *cs)
2223 {
2224 	DELEGRETURN4args *args = &argop->nfs_argop4_u.opdelegreturn;
2225 	DELEGRETURN4res *resp = &resop->nfs_resop4_u.opdelegreturn;
2226 	rfs4_deleg_state_t *dsp;
2227 	nfsstat4 status;
2228 
2229 	DTRACE_NFSV4_2(op__delegreturn__start, struct compound_state *, cs,
2230 	    DELEGRETURN4args *, args);
2231 
2232 	status = rfs4_get_deleg_state(&args->deleg_stateid, &dsp);
2233 	resp->status = *cs->statusp = status;
2234 	if (status != NFS4_OK)
2235 		goto out;
2236 
2237 	/* Ensure specified filehandle matches */
2238 	if (cs->vp != dsp->rds_finfo->rf_vp) {
2239 		resp->status = *cs->statusp = NFS4ERR_BAD_STATEID;
2240 	} else
2241 		rfs4_return_deleg(dsp, FALSE);
2242 
2243 	rfs4_update_lease(dsp->rds_client);
2244 
2245 	rfs4_deleg_state_rele(dsp);
2246 out:
2247 	DTRACE_NFSV4_2(op__delegreturn__done, struct compound_state *, cs,
2248 	    DELEGRETURN4res *, resp);
2249 }
2250 
2251 /*
2252  * Check to see if a given "flavor" is an explicitly shared flavor.
2253  * The assumption of this routine is the "flavor" is already a valid
2254  * flavor in the secinfo list of "exi".
2255  *
2256  *	e.g.
2257  *		# share -o sec=flavor1 /export
2258  *		# share -o sec=flavor2 /export/home
2259  *
2260  *		flavor2 is not an explicitly shared flavor for /export,
2261  *		however it is in the secinfo list for /export thru the
2262  *		server namespace setup.
2263  */
2264 int
is_exported_sec(int flavor,struct exportinfo * exi)2265 is_exported_sec(int flavor, struct exportinfo *exi)
2266 {
2267 	int	i;
2268 	struct secinfo *sp;
2269 
2270 	sp = exi->exi_export.ex_secinfo;
2271 	for (i = 0; i < exi->exi_export.ex_seccnt; i++) {
2272 		if (flavor == sp[i].s_secinfo.sc_nfsnum ||
2273 		    sp[i].s_secinfo.sc_nfsnum == AUTH_NONE) {
2274 			return (SEC_REF_EXPORTED(&sp[i]));
2275 		}
2276 	}
2277 
2278 	/* Should not reach this point based on the assumption */
2279 	return (0);
2280 }
2281 
2282 /*
2283  * Check if the security flavor used in the request matches what is
2284  * required at the export point or at the root pseudo node (exi_root).
2285  *
2286  * returns 1 if there's a match or if exported with AUTH_NONE; 0 otherwise.
2287  *
2288  */
2289 static int
secinfo_match_or_authnone(struct compound_state * cs)2290 secinfo_match_or_authnone(struct compound_state *cs)
2291 {
2292 	int	i;
2293 	struct secinfo *sp;
2294 
2295 	/*
2296 	 * Check cs->nfsflavor (from the request) against
2297 	 * the current export data in cs->exi.
2298 	 */
2299 	sp = cs->exi->exi_export.ex_secinfo;
2300 	for (i = 0; i < cs->exi->exi_export.ex_seccnt; i++) {
2301 		if (cs->nfsflavor == sp[i].s_secinfo.sc_nfsnum ||
2302 		    sp[i].s_secinfo.sc_nfsnum == AUTH_NONE)
2303 			return (1);
2304 	}
2305 
2306 	return (0);
2307 }
2308 
2309 /*
2310  * Check the access authority for the client and return the correct error.
2311  */
2312 nfsstat4
call_checkauth4(struct compound_state * cs,struct svc_req * req)2313 call_checkauth4(struct compound_state *cs, struct svc_req *req)
2314 {
2315 	int	authres;
2316 
2317 	/*
2318 	 * First, check if the security flavor used in the request
2319 	 * are among the flavors set in the server namespace.
2320 	 */
2321 	if (!secinfo_match_or_authnone(cs)) {
2322 		*cs->statusp = NFS4ERR_WRONGSEC;
2323 		return (*cs->statusp);
2324 	}
2325 
2326 	authres = checkauth4(cs, req);
2327 
2328 	if (authres > 0) {
2329 		*cs->statusp = NFS4_OK;
2330 		if (! (cs->access & CS_ACCESS_LIMITED))
2331 			cs->access = CS_ACCESS_OK;
2332 	} else if (authres == 0) {
2333 		*cs->statusp = NFS4ERR_ACCESS;
2334 	} else if (authres == -2) {
2335 		*cs->statusp = NFS4ERR_WRONGSEC;
2336 	} else {
2337 		*cs->statusp = NFS4ERR_DELAY;
2338 	}
2339 	return (*cs->statusp);
2340 }
2341 
2342 /*
2343  * bitmap4_to_attrmask is called by getattr and readdir.
2344  * It sets up the vattr mask and determines whether vfsstat call is needed
2345  * based on the input bitmap.
2346  * Returns nfsv4 status.
2347  */
2348 static nfsstat4
bitmap4_to_attrmask(bitmap4 breq,struct nfs4_svgetit_arg * sargp)2349 bitmap4_to_attrmask(bitmap4 breq, struct nfs4_svgetit_arg *sargp)
2350 {
2351 	int i;
2352 	uint_t	va_mask;
2353 	struct statvfs64 *sbp = sargp->sbp;
2354 
2355 	sargp->sbp = NULL;
2356 	sargp->flag = 0;
2357 	sargp->rdattr_error = NFS4_OK;
2358 	sargp->mntdfid_set = FALSE;
2359 	if (sargp->cs->vp)
2360 		sargp->xattr = get_fh4_flag(&sargp->cs->fh,
2361 		    FH4_ATTRDIR | FH4_NAMEDATTR);
2362 	else
2363 		sargp->xattr = 0;
2364 
2365 	/*
2366 	 * Set rdattr_error_req to true if return error per
2367 	 * failed entry rather than fail the readdir.
2368 	 */
2369 	if (breq & FATTR4_RDATTR_ERROR_MASK)
2370 		sargp->rdattr_error_req = 1;
2371 	else
2372 		sargp->rdattr_error_req = 0;
2373 
2374 	/*
2375 	 * generate the va_mask
2376 	 * Handle the easy cases first
2377 	 */
2378 	switch (breq) {
2379 	case NFS4_NTOV_ATTR_MASK:
2380 		sargp->vap->va_mask = NFS4_NTOV_ATTR_AT_MASK;
2381 		return (NFS4_OK);
2382 
2383 	case NFS4_FS_ATTR_MASK:
2384 		sargp->vap->va_mask = NFS4_FS_ATTR_AT_MASK;
2385 		sargp->sbp = sbp;
2386 		return (NFS4_OK);
2387 
2388 	case NFS4_NTOV_ATTR_CACHE_MASK:
2389 		sargp->vap->va_mask = NFS4_NTOV_ATTR_CACHE_AT_MASK;
2390 		return (NFS4_OK);
2391 
2392 	case FATTR4_LEASE_TIME_MASK:
2393 		sargp->vap->va_mask = 0;
2394 		return (NFS4_OK);
2395 
2396 	default:
2397 		va_mask = 0;
2398 		for (i = 0; i < nfs4_ntov_map_size; i++) {
2399 			if ((breq & nfs4_ntov_map[i].fbit) &&
2400 			    nfs4_ntov_map[i].vbit)
2401 				va_mask |= nfs4_ntov_map[i].vbit;
2402 		}
2403 
2404 		/*
2405 		 * Check is vfsstat is needed
2406 		 */
2407 		if (breq & NFS4_FS_ATTR_MASK)
2408 			sargp->sbp = sbp;
2409 
2410 		sargp->vap->va_mask = va_mask;
2411 		return (NFS4_OK);
2412 	}
2413 	/* NOTREACHED */
2414 }
2415 
2416 /*
2417  * bitmap4_get_sysattrs is called by getattr and readdir.
2418  * It calls both VOP_GETATTR and VFS_STATVFS calls to get the attrs.
2419  * Returns nfsv4 status.
2420  */
2421 static nfsstat4
bitmap4_get_sysattrs(struct nfs4_svgetit_arg * sargp)2422 bitmap4_get_sysattrs(struct nfs4_svgetit_arg *sargp)
2423 {
2424 	int error;
2425 	struct compound_state *cs = sargp->cs;
2426 	vnode_t *vp = cs->vp;
2427 
2428 	if (sargp->sbp != NULL) {
2429 		error = VFS_STATVFS(vp->v_vfsp, sargp->sbp);
2430 		if (error != 0) {
2431 			sargp->sbp = NULL;	/* to identify error */
2432 			return (puterrno4(error));
2433 		}
2434 	}
2435 
2436 	return (rfs4_vop_getattr(vp, sargp->vap, 0, cs->cr));
2437 }
2438 
2439 static void
nfs4_ntov_table_init(struct nfs4_ntov_table * ntovp)2440 nfs4_ntov_table_init(struct nfs4_ntov_table *ntovp)
2441 {
2442 	ntovp->na = kmem_zalloc(sizeof (union nfs4_attr_u) * nfs4_ntov_map_size,
2443 	    KM_SLEEP);
2444 	ntovp->attrcnt = 0;
2445 	ntovp->vfsstat = FALSE;
2446 }
2447 
2448 static void
nfs4_ntov_table_free(struct nfs4_ntov_table * ntovp,struct nfs4_svgetit_arg * sargp)2449 nfs4_ntov_table_free(struct nfs4_ntov_table *ntovp,
2450     struct nfs4_svgetit_arg *sargp)
2451 {
2452 	int i;
2453 	union nfs4_attr_u *na;
2454 	uint8_t *amap;
2455 
2456 	/*
2457 	 * XXX Should do the same checks for whether the bit is set
2458 	 */
2459 	for (i = 0, na = ntovp->na, amap = ntovp->amap;
2460 	    i < ntovp->attrcnt; i++, na++, amap++) {
2461 		(void) (*nfs4_ntov_map[*amap].sv_getit)(
2462 		    NFS4ATTR_FREEIT, sargp, na);
2463 	}
2464 	if ((sargp->op == NFS4ATTR_SETIT) || (sargp->op == NFS4ATTR_VERIT)) {
2465 		/*
2466 		 * xdr_free for getattr will be done later
2467 		 */
2468 		for (i = 0, na = ntovp->na, amap = ntovp->amap;
2469 		    i < ntovp->attrcnt; i++, na++, amap++) {
2470 			xdr_free(nfs4_ntov_map[*amap].xfunc, (caddr_t)na);
2471 		}
2472 	}
2473 	kmem_free(ntovp->na, sizeof (union nfs4_attr_u) * nfs4_ntov_map_size);
2474 }
2475 
2476 /*
2477  * do_rfs4_op_getattr gets the system attrs and converts into fattr4.
2478  */
2479 static nfsstat4
do_rfs4_op_getattr(bitmap4 breq,fattr4 * fattrp,struct nfs4_svgetit_arg * sargp)2480 do_rfs4_op_getattr(bitmap4 breq, fattr4 *fattrp,
2481     struct nfs4_svgetit_arg *sargp)
2482 {
2483 	int error = 0;
2484 	int i, k;
2485 	struct nfs4_ntov_table ntov;
2486 	XDR xdr;
2487 	ulong_t xdr_size;
2488 	char *xdr_attrs;
2489 	nfsstat4 status = NFS4_OK;
2490 	nfsstat4 prev_rdattr_error = sargp->rdattr_error;
2491 	union nfs4_attr_u *na;
2492 	uint8_t *amap;
2493 
2494 	sargp->op = NFS4ATTR_GETIT;
2495 	sargp->flag = 0;
2496 
2497 	fattrp->attrmask = 0;
2498 	/* if no bits requested, then return empty fattr4 */
2499 	if (breq == 0) {
2500 		fattrp->attrlist4_len = 0;
2501 		fattrp->attrlist4 = NULL;
2502 		return (NFS4_OK);
2503 	}
2504 
2505 	/*
2506 	 * return NFS4ERR_INVAL when client requests write-only attrs
2507 	 */
2508 	if (breq & (FATTR4_TIME_ACCESS_SET_MASK | FATTR4_TIME_MODIFY_SET_MASK))
2509 		return (NFS4ERR_INVAL);
2510 
2511 	nfs4_ntov_table_init(&ntov);
2512 	na = ntov.na;
2513 	amap = ntov.amap;
2514 
2515 	/*
2516 	 * Now loop to get or verify the attrs
2517 	 */
2518 	for (i = 0; i < nfs4_ntov_map_size; i++) {
2519 		if (breq & nfs4_ntov_map[i].fbit) {
2520 			if ((*nfs4_ntov_map[i].sv_getit)(
2521 			    NFS4ATTR_SUPPORTED, sargp, NULL) == 0) {
2522 
2523 				error = (*nfs4_ntov_map[i].sv_getit)(
2524 				    NFS4ATTR_GETIT, sargp, na);
2525 
2526 				/*
2527 				 * Possible error values:
2528 				 * >0 if sv_getit failed to
2529 				 * get the attr; 0 if succeeded;
2530 				 * <0 if rdattr_error and the
2531 				 * attribute cannot be returned.
2532 				 */
2533 				if (error && !(sargp->rdattr_error_req))
2534 					goto done;
2535 				/*
2536 				 * If error then just for entry
2537 				 */
2538 				if (error == 0) {
2539 					fattrp->attrmask |=
2540 					    nfs4_ntov_map[i].fbit;
2541 					*amap++ =
2542 					    (uint8_t)nfs4_ntov_map[i].nval;
2543 					na++;
2544 					(ntov.attrcnt)++;
2545 				} else if ((error > 0) &&
2546 				    (sargp->rdattr_error == NFS4_OK)) {
2547 					sargp->rdattr_error = puterrno4(error);
2548 				}
2549 				error = 0;
2550 			}
2551 		}
2552 	}
2553 
2554 	/*
2555 	 * If rdattr_error was set after the return value for it was assigned,
2556 	 * update it.
2557 	 */
2558 	if (prev_rdattr_error != sargp->rdattr_error) {
2559 		na = ntov.na;
2560 		amap = ntov.amap;
2561 		for (i = 0; i < ntov.attrcnt; i++, na++, amap++) {
2562 			k = *amap;
2563 			if (k < FATTR4_RDATTR_ERROR) {
2564 				continue;
2565 			}
2566 			if ((k == FATTR4_RDATTR_ERROR) &&
2567 			    ((*nfs4_ntov_map[k].sv_getit)(
2568 			    NFS4ATTR_SUPPORTED, sargp, NULL) == 0)) {
2569 
2570 				(void) (*nfs4_ntov_map[k].sv_getit)(
2571 				    NFS4ATTR_GETIT, sargp, na);
2572 			}
2573 			break;
2574 		}
2575 	}
2576 
2577 	xdr_size = 0;
2578 	na = ntov.na;
2579 	amap = ntov.amap;
2580 	for (i = 0; i < ntov.attrcnt; i++, na++, amap++) {
2581 		xdr_size += xdr_sizeof(nfs4_ntov_map[*amap].xfunc, na);
2582 	}
2583 
2584 	fattrp->attrlist4_len = xdr_size;
2585 	if (xdr_size) {
2586 		/* freed by rfs4_op_getattr_free() */
2587 		fattrp->attrlist4 = xdr_attrs = kmem_zalloc(xdr_size, KM_SLEEP);
2588 
2589 		xdrmem_create(&xdr, xdr_attrs, xdr_size, XDR_ENCODE);
2590 
2591 		na = ntov.na;
2592 		amap = ntov.amap;
2593 		for (i = 0; i < ntov.attrcnt; i++, na++, amap++) {
2594 			if (!(*nfs4_ntov_map[*amap].xfunc)(&xdr, na)) {
2595 				DTRACE_PROBE1(nfss__e__getattr4_encfail,
2596 				    int, *amap);
2597 				status = NFS4ERR_SERVERFAULT;
2598 				break;
2599 			}
2600 		}
2601 		/* xdrmem_destroy(&xdrs); */	/* NO-OP */
2602 	} else {
2603 		fattrp->attrlist4 = NULL;
2604 	}
2605 done:
2606 
2607 	nfs4_ntov_table_free(&ntov, sargp);
2608 
2609 	if (error != 0)
2610 		status = puterrno4(error);
2611 
2612 	return (status);
2613 }
2614 
2615 /* ARGSUSED */
2616 static void
rfs4_op_getattr(nfs_argop4 * argop,nfs_resop4 * resop,struct svc_req * req,struct compound_state * cs)2617 rfs4_op_getattr(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2618     struct compound_state *cs)
2619 {
2620 	GETATTR4args *args = &argop->nfs_argop4_u.opgetattr;
2621 	GETATTR4res *resp = &resop->nfs_resop4_u.opgetattr;
2622 	struct nfs4_svgetit_arg sarg;
2623 	struct statvfs64 sb;
2624 	nfsstat4 status;
2625 
2626 	DTRACE_NFSV4_2(op__getattr__start, struct compound_state *, cs,
2627 	    GETATTR4args *, args);
2628 
2629 	if (cs->vp == NULL) {
2630 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2631 		goto out;
2632 	}
2633 
2634 	if (cs->access == CS_ACCESS_DENIED) {
2635 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
2636 		goto out;
2637 	}
2638 
2639 	sarg.sbp = &sb;
2640 	sarg.cs = cs;
2641 	sarg.is_referral = B_FALSE;
2642 
2643 	status = bitmap4_to_attrmask(args->attr_request, &sarg);
2644 	if (status == NFS4_OK) {
2645 
2646 		status = bitmap4_get_sysattrs(&sarg);
2647 		if (status == NFS4_OK) {
2648 
2649 			/* Is this a referral? */
2650 			if (vn_is_nfs_reparse(cs->vp, cs->cr)) {
2651 				/* Older V4 Solaris client sees a link */
2652 				if (client_is_downrev(req))
2653 					sarg.vap->va_type = VLNK;
2654 				else
2655 					sarg.is_referral = B_TRUE;
2656 			}
2657 
2658 			status = do_rfs4_op_getattr(args->attr_request,
2659 			    &resp->obj_attributes, &sarg);
2660 		}
2661 	}
2662 	*cs->statusp = resp->status = status;
2663 out:
2664 	DTRACE_NFSV4_2(op__getattr__done, struct compound_state *, cs,
2665 	    GETATTR4res *, resp);
2666 }
2667 
2668 static void
rfs4_op_getattr_free(nfs_resop4 * resop)2669 rfs4_op_getattr_free(nfs_resop4 *resop)
2670 {
2671 	GETATTR4res *resp = &resop->nfs_resop4_u.opgetattr;
2672 
2673 	nfs4_fattr4_free(&resp->obj_attributes);
2674 }
2675 
2676 /* ARGSUSED */
2677 static void
rfs4_op_getfh(nfs_argop4 * argop,nfs_resop4 * resop,struct svc_req * req,struct compound_state * cs)2678 rfs4_op_getfh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2679     struct compound_state *cs)
2680 {
2681 	GETFH4res *resp = &resop->nfs_resop4_u.opgetfh;
2682 
2683 	DTRACE_NFSV4_1(op__getfh__start, struct compound_state *, cs);
2684 
2685 	if (cs->vp == NULL) {
2686 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2687 		goto out;
2688 	}
2689 	if (cs->access == CS_ACCESS_DENIED) {
2690 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
2691 		goto out;
2692 	}
2693 
2694 	/* check for reparse point at the share point */
2695 	if (cs->exi->exi_moved || vn_is_nfs_reparse(cs->exi->exi_vp, cs->cr)) {
2696 		/* it's all bad */
2697 		cs->exi->exi_moved = 1;
2698 		*cs->statusp = resp->status = NFS4ERR_MOVED;
2699 		DTRACE_PROBE2(nfs4serv__func__referral__shared__moved,
2700 		    vnode_t *, cs->vp, char *, "rfs4_op_getfh");
2701 		return;
2702 	}
2703 
2704 	/* check for reparse point at vp */
2705 	if (vn_is_nfs_reparse(cs->vp, cs->cr) && !client_is_downrev(req)) {
2706 		/* it's not all bad */
2707 		*cs->statusp = resp->status = NFS4ERR_MOVED;
2708 		DTRACE_PROBE2(nfs4serv__func__referral__moved,
2709 		    vnode_t *, cs->vp, char *, "rfs4_op_getfh");
2710 		return;
2711 	}
2712 
2713 	resp->object.nfs_fh4_val =
2714 	    kmem_alloc(cs->fh.nfs_fh4_len, KM_SLEEP);
2715 	nfs_fh4_copy(&cs->fh, &resp->object);
2716 	*cs->statusp = resp->status = NFS4_OK;
2717 out:
2718 	DTRACE_NFSV4_2(op__getfh__done, struct compound_state *, cs,
2719 	    GETFH4res *, resp);
2720 }
2721 
2722 static void
rfs4_op_getfh_free(nfs_resop4 * resop)2723 rfs4_op_getfh_free(nfs_resop4 *resop)
2724 {
2725 	GETFH4res *resp = &resop->nfs_resop4_u.opgetfh;
2726 
2727 	if (resp->status == NFS4_OK &&
2728 	    resp->object.nfs_fh4_val != NULL) {
2729 		kmem_free(resp->object.nfs_fh4_val, resp->object.nfs_fh4_len);
2730 		resp->object.nfs_fh4_val = NULL;
2731 		resp->object.nfs_fh4_len = 0;
2732 	}
2733 }
2734 
2735 /*
2736  * illegal: args: void
2737  *	    res : status (NFS4ERR_OP_ILLEGAL)
2738  */
2739 /* ARGSUSED */
2740 static void
rfs4_op_illegal(nfs_argop4 * argop,nfs_resop4 * resop,struct svc_req * req,struct compound_state * cs)2741 rfs4_op_illegal(nfs_argop4 *argop, nfs_resop4 *resop,
2742     struct svc_req *req, struct compound_state *cs)
2743 {
2744 	ILLEGAL4res *resp = &resop->nfs_resop4_u.opillegal;
2745 
2746 	resop->resop = OP_ILLEGAL;
2747 	*cs->statusp = resp->status = NFS4ERR_OP_ILLEGAL;
2748 }
2749 
2750 /* ARGSUSED */
2751 static void
rfs4_op_notsup(nfs_argop4 * argop,nfs_resop4 * resop,struct svc_req * req,struct compound_state * cs)2752 rfs4_op_notsup(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2753     struct compound_state *cs)
2754 {
2755 	*cs->statusp = *((nfsstat4 *)&(resop)->nfs_resop4_u) = NFS4ERR_NOTSUPP;
2756 }
2757 
2758 /*
2759  * link: args: SAVED_FH: file, CURRENT_FH: target directory
2760  *	 res: status. If success - CURRENT_FH unchanged, return change_info
2761  */
2762 /* ARGSUSED */
2763 static void
rfs4_op_link(nfs_argop4 * argop,nfs_resop4 * resop,struct svc_req * req,struct compound_state * cs)2764 rfs4_op_link(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2765     struct compound_state *cs)
2766 {
2767 	LINK4args *args = &argop->nfs_argop4_u.oplink;
2768 	LINK4res *resp = &resop->nfs_resop4_u.oplink;
2769 	int error;
2770 	vnode_t *vp;
2771 	vnode_t *dvp;
2772 	struct vattr bdva, idva, adva;
2773 	char *nm;
2774 	uint_t  len;
2775 	struct sockaddr *ca;
2776 	char *name = NULL;
2777 	nfsstat4 status;
2778 
2779 	DTRACE_NFSV4_2(op__link__start, struct compound_state *, cs,
2780 	    LINK4args *, args);
2781 
2782 	/* SAVED_FH: source object */
2783 	vp = cs->saved_vp;
2784 	if (vp == NULL) {
2785 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2786 		goto out;
2787 	}
2788 
2789 	/* CURRENT_FH: target directory */
2790 	dvp = cs->vp;
2791 	if (dvp == NULL) {
2792 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2793 		goto out;
2794 	}
2795 
2796 	/*
2797 	 * If there is a non-shared filesystem mounted on this vnode,
2798 	 * do not allow to link any file in this directory.
2799 	 */
2800 	if (vn_ismntpt(dvp)) {
2801 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
2802 		goto out;
2803 	}
2804 
2805 	if (cs->access == CS_ACCESS_DENIED) {
2806 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
2807 		goto out;
2808 	}
2809 
2810 	/* Check source object's type validity */
2811 	if (vp->v_type == VDIR) {
2812 		*cs->statusp = resp->status = NFS4ERR_ISDIR;
2813 		goto out;
2814 	}
2815 
2816 	/* Check target directory's type */
2817 	if (dvp->v_type != VDIR) {
2818 		*cs->statusp = resp->status = NFS4ERR_NOTDIR;
2819 		goto out;
2820 	}
2821 
2822 	if (cs->saved_exi != cs->exi) {
2823 		*cs->statusp = resp->status = NFS4ERR_XDEV;
2824 		goto out;
2825 	}
2826 
2827 	status = utf8_dir_verify(&args->newname);
2828 	if (status != NFS4_OK) {
2829 		*cs->statusp = resp->status = status;
2830 		goto out;
2831 	}
2832 
2833 	nm = utf8_to_fn(&args->newname, &len, NULL);
2834 	if (nm == NULL) {
2835 		*cs->statusp = resp->status = NFS4ERR_INVAL;
2836 		goto out;
2837 	}
2838 
2839 	if (len > MAXNAMELEN) {
2840 		*cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
2841 		kmem_free(nm, len);
2842 		goto out;
2843 	}
2844 
2845 	if (rdonly4(req, cs)) {
2846 		*cs->statusp = resp->status = NFS4ERR_ROFS;
2847 		kmem_free(nm, len);
2848 		goto out;
2849 	}
2850 
2851 	/* Get "before" change value */
2852 	bdva.va_mask = AT_CTIME|AT_SEQ;
2853 	error = VOP_GETATTR(dvp, &bdva, 0, cs->cr, NULL);
2854 	if (error) {
2855 		*cs->statusp = resp->status = puterrno4(error);
2856 		kmem_free(nm, len);
2857 		goto out;
2858 	}
2859 
2860 	ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2861 	name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
2862 	    MAXPATHLEN  + 1);
2863 
2864 	if (name == NULL) {
2865 		*cs->statusp = resp->status = NFS4ERR_INVAL;
2866 		kmem_free(nm, len);
2867 		goto out;
2868 	}
2869 
2870 	NFS4_SET_FATTR4_CHANGE(resp->cinfo.before, bdva.va_ctime)
2871 
2872 	error = VOP_LINK(dvp, vp, name, cs->cr, NULL, 0);
2873 
2874 	if (nm != name)
2875 		kmem_free(name, MAXPATHLEN + 1);
2876 	kmem_free(nm, len);
2877 
2878 	/*
2879 	 * Get the initial "after" sequence number, if it fails, set to zero
2880 	 */
2881 	idva.va_mask = AT_SEQ;
2882 	if (VOP_GETATTR(dvp, &idva, 0, cs->cr, NULL))
2883 		idva.va_seq = 0;
2884 
2885 	/*
2886 	 * Force modified data and metadata out to stable storage.
2887 	 */
2888 	(void) VOP_FSYNC(vp, FNODSYNC, cs->cr, NULL);
2889 	(void) VOP_FSYNC(dvp, 0, cs->cr, NULL);
2890 
2891 	if (error) {
2892 		*cs->statusp = resp->status = puterrno4(error);
2893 		goto out;
2894 	}
2895 
2896 	/*
2897 	 * Get "after" change value, if it fails, simply return the
2898 	 * before value.
2899 	 */
2900 	adva.va_mask = AT_CTIME|AT_SEQ;
2901 	if (VOP_GETATTR(dvp, &adva, 0, cs->cr, NULL)) {
2902 		adva.va_ctime = bdva.va_ctime;
2903 		adva.va_seq = 0;
2904 	}
2905 
2906 	NFS4_SET_FATTR4_CHANGE(resp->cinfo.after, adva.va_ctime)
2907 
2908 	/*
2909 	 * The cinfo.atomic = TRUE only if we have
2910 	 * non-zero va_seq's, and it has incremented by exactly one
2911 	 * during the VOP_LINK and it didn't change during the VOP_FSYNC.
2912 	 */
2913 	if (bdva.va_seq && idva.va_seq && adva.va_seq &&
2914 	    idva.va_seq == (bdva.va_seq + 1) && idva.va_seq == adva.va_seq)
2915 		resp->cinfo.atomic = TRUE;
2916 	else
2917 		resp->cinfo.atomic = FALSE;
2918 
2919 	*cs->statusp = resp->status = NFS4_OK;
2920 out:
2921 	DTRACE_NFSV4_2(op__link__done, struct compound_state *, cs,
2922 	    LINK4res *, resp);
2923 }
2924 
2925 /*
2926  * Used by rfs4_op_lookup and rfs4_op_lookupp to do the actual work.
2927  */
2928 
2929 /* ARGSUSED */
2930 static nfsstat4
do_rfs4_op_lookup(char * nm,struct svc_req * req,struct compound_state * cs)2931 do_rfs4_op_lookup(char *nm, struct svc_req *req, struct compound_state *cs)
2932 {
2933 	int error;
2934 	int different_export = 0;
2935 	vnode_t *vp, *pre_tvp = NULL, *oldvp = NULL;
2936 	struct exportinfo *exi = NULL, *pre_exi = NULL;
2937 	nfsstat4 stat;
2938 	fid_t fid;
2939 	int attrdir, dotdot, walk;
2940 	bool_t is_newvp = FALSE;
2941 
2942 	if (cs->vp->v_flag & V_XATTRDIR) {
2943 		attrdir = 1;
2944 		ASSERT(get_fh4_flag(&cs->fh, FH4_ATTRDIR));
2945 	} else {
2946 		attrdir = 0;
2947 		ASSERT(! get_fh4_flag(&cs->fh, FH4_ATTRDIR));
2948 	}
2949 
2950 	dotdot = (nm[0] == '.' && nm[1] == '.' && nm[2] == '\0');
2951 
2952 	/*
2953 	 * If dotdotting, then need to check whether it's
2954 	 * above the root of a filesystem, or above an
2955 	 * export point.
2956 	 */
2957 	if (dotdot) {
2958 		vnode_t *zone_rootvp;
2959 
2960 		ASSERT(cs->exi != NULL);
2961 		zone_rootvp = cs->exi->exi_ne->exi_root->exi_vp;
2962 		/*
2963 		 * If dotdotting at the root of a filesystem, then
2964 		 * need to traverse back to the mounted-on filesystem
2965 		 * and do the dotdot lookup there.
2966 		 */
2967 		if ((cs->vp->v_flag & VROOT) || VN_CMP(cs->vp, zone_rootvp)) {
2968 
2969 			/*
2970 			 * If at the system root, then can
2971 			 * go up no further.
2972 			 */
2973 			if (VN_CMP(cs->vp, zone_rootvp))
2974 				return (puterrno4(ENOENT));
2975 
2976 			/*
2977 			 * Traverse back to the mounted-on filesystem
2978 			 */
2979 			cs->vp = untraverse(cs->vp, zone_rootvp);
2980 
2981 			/*
2982 			 * Set the different_export flag so we remember
2983 			 * to pick up a new exportinfo entry for
2984 			 * this new filesystem.
2985 			 */
2986 			different_export = 1;
2987 		} else {
2988 
2989 			/*
2990 			 * If dotdotting above an export point then set
2991 			 * the different_export to get new export info.
2992 			 */
2993 			different_export = nfs_exported(cs->exi, cs->vp);
2994 		}
2995 	}
2996 
2997 	error = VOP_LOOKUP(cs->vp, nm, &vp, NULL, 0, NULL, cs->cr,
2998 	    NULL, NULL, NULL);
2999 	if (error)
3000 		return (puterrno4(error));
3001 
3002 	/*
3003 	 * If the vnode is in a pseudo filesystem, check whether it is visible.
3004 	 *
3005 	 * XXX if the vnode is a symlink and it is not visible in
3006 	 * a pseudo filesystem, return ENOENT (not following symlink).
3007 	 * V4 client can not mount such symlink. This is a regression
3008 	 * from V2/V3.
3009 	 *
3010 	 * In the same exported filesystem, if the security flavor used
3011 	 * is not an explicitly shared flavor, limit the view to the visible
3012 	 * list entries only. This is not a WRONGSEC case because it's already
3013 	 * checked via PUTROOTFH/PUTPUBFH or PUTFH.
3014 	 */
3015 	if (!different_export &&
3016 	    (PSEUDO(cs->exi) || ! is_exported_sec(cs->nfsflavor, cs->exi) ||
3017 	    cs->access & CS_ACCESS_LIMITED)) {
3018 		if (! nfs_visible(cs->exi, vp, &different_export)) {
3019 			VN_RELE(vp);
3020 			return (puterrno4(ENOENT));
3021 		}
3022 	}
3023 
3024 	/*
3025 	 * If it's a mountpoint, then traverse it.
3026 	 */
3027 	if (vn_ismntpt(vp)) {
3028 		pre_exi = cs->exi;	/* save pre-traversed exportinfo */
3029 		pre_tvp = vp;		/* save pre-traversed vnode	*/
3030 
3031 		/*
3032 		 * hold pre_tvp to counteract rele by traverse.  We will
3033 		 * need pre_tvp below if checkexport4 fails
3034 		 */
3035 		VN_HOLD(pre_tvp);
3036 		if ((error = traverse(&vp)) != 0) {
3037 			VN_RELE(vp);
3038 			VN_RELE(pre_tvp);
3039 			return (puterrno4(error));
3040 		}
3041 		different_export = 1;
3042 	} else if (vp->v_vfsp != cs->vp->v_vfsp) {
3043 		/*
3044 		 * The vfsp comparison is to handle the case where
3045 		 * a LOFS mount is shared.  lo_lookup traverses mount points,
3046 		 * and NFS is unaware of local fs transistions because
3047 		 * v_vfsmountedhere isn't set.  For this special LOFS case,
3048 		 * the dir and the obj returned by lookup will have different
3049 		 * vfs ptrs.
3050 		 */
3051 		different_export = 1;
3052 	}
3053 
3054 	if (different_export) {
3055 
3056 		bzero(&fid, sizeof (fid));
3057 		fid.fid_len = MAXFIDSZ;
3058 		error = vop_fid_pseudo(vp, &fid);
3059 		if (error) {
3060 			VN_RELE(vp);
3061 			if (pre_tvp)
3062 				VN_RELE(pre_tvp);
3063 			return (puterrno4(error));
3064 		}
3065 
3066 		if (dotdot)
3067 			exi = nfs_vptoexi(NULL, vp, cs->cr, &walk, NULL, TRUE);
3068 		else
3069 			exi = checkexport4(&vp->v_vfsp->vfs_fsid, &fid, vp);
3070 
3071 		if (exi == NULL) {
3072 			if (pre_tvp) {
3073 				/*
3074 				 * If this vnode is a mounted-on vnode,
3075 				 * but the mounted-on file system is not
3076 				 * exported, send back the filehandle for
3077 				 * the mounted-on vnode, not the root of
3078 				 * the mounted-on file system.
3079 				 */
3080 				VN_RELE(vp);
3081 				vp = pre_tvp;
3082 				exi = pre_exi;
3083 			} else {
3084 				VN_RELE(vp);
3085 				return (puterrno4(EACCES));
3086 			}
3087 		} else if (pre_tvp) {
3088 			/* we're done with pre_tvp now. release extra hold */
3089 			VN_RELE(pre_tvp);
3090 		}
3091 
3092 		cs->exi = exi;
3093 
3094 		/*
3095 		 * Now we do a checkauth4. The reason is that
3096 		 * this client/user may not have access to the new
3097 		 * exported file system, and if they do,
3098 		 * the client/user may be mapped to a different uid.
3099 		 *
3100 		 * We start with a new cr, because the checkauth4 done
3101 		 * in the PUT*FH operation over wrote the cred's uid,
3102 		 * gid, etc, and we want the real thing before calling
3103 		 * checkauth4()
3104 		 */
3105 		crfree(cs->cr);
3106 		cs->cr = crdup(cs->basecr);
3107 
3108 		oldvp = cs->vp;
3109 		cs->vp = vp;
3110 		is_newvp = TRUE;
3111 
3112 		stat = call_checkauth4(cs, req);
3113 		if (stat != NFS4_OK) {
3114 			VN_RELE(cs->vp);
3115 			cs->vp = oldvp;
3116 			return (stat);
3117 		}
3118 	}
3119 
3120 	/*
3121 	 * After various NFS checks, do a label check on the path
3122 	 * component. The label on this path should either be the
3123 	 * global zone's label or a zone's label. We are only
3124 	 * interested in the zone's label because exported files
3125 	 * in global zone is accessible (though read-only) to
3126 	 * clients. The exportability/visibility check is already
3127 	 * done before reaching this code.
3128 	 */
3129 	if (is_system_labeled()) {
3130 		bslabel_t *clabel;
3131 
3132 		ASSERT(req->rq_label != NULL);
3133 		clabel = req->rq_label;
3134 		DTRACE_PROBE2(tx__rfs4__log__info__oplookup__clabel, char *,
3135 		    "got client label from request(1)", struct svc_req *, req);
3136 
3137 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
3138 			if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
3139 			    cs->exi)) {
3140 				error = EACCES;
3141 				goto err_out;
3142 			}
3143 		} else {
3144 			/*
3145 			 * We grant access to admin_low label clients
3146 			 * only if the client is trusted, i.e. also
3147 			 * running Solaris Trusted Extension.
3148 			 */
3149 			struct sockaddr	*ca;
3150 			int		addr_type;
3151 			void		*ipaddr;
3152 			tsol_tpc_t	*tp;
3153 
3154 			ca = (struct sockaddr *)svc_getrpccaller(
3155 			    req->rq_xprt)->buf;
3156 			if (ca->sa_family == AF_INET) {
3157 				addr_type = IPV4_VERSION;
3158 				ipaddr = &((struct sockaddr_in *)ca)->sin_addr;
3159 			} else if (ca->sa_family == AF_INET6) {
3160 				addr_type = IPV6_VERSION;
3161 				ipaddr = &((struct sockaddr_in6 *)
3162 				    ca)->sin6_addr;
3163 			}
3164 			tp = find_tpc(ipaddr, addr_type, B_FALSE);
3165 			if (tp == NULL || tp->tpc_tp.tp_doi !=
3166 			    l_admin_low->tsl_doi || tp->tpc_tp.host_type !=
3167 			    SUN_CIPSO) {
3168 				if (tp != NULL)
3169 					TPC_RELE(tp);
3170 				error = EACCES;
3171 				goto err_out;
3172 			}
3173 			TPC_RELE(tp);
3174 		}
3175 	}
3176 
3177 	error = makefh4(&cs->fh, vp, cs->exi);
3178 
3179 err_out:
3180 	if (error) {
3181 		if (is_newvp) {
3182 			VN_RELE(cs->vp);
3183 			cs->vp = oldvp;
3184 		} else
3185 			VN_RELE(vp);
3186 		return (puterrno4(error));
3187 	}
3188 
3189 	if (!is_newvp) {
3190 		if (cs->vp)
3191 			VN_RELE(cs->vp);
3192 		cs->vp = vp;
3193 	} else if (oldvp)
3194 		VN_RELE(oldvp);
3195 
3196 	/*
3197 	 * if did lookup on attrdir and didn't lookup .., set named
3198 	 * attr fh flag
3199 	 */
3200 	if (attrdir && ! dotdot)
3201 		set_fh4_flag(&cs->fh, FH4_NAMEDATTR);
3202 
3203 	/* Assume false for now, open proc will set this */
3204 	cs->mandlock = FALSE;
3205 
3206 	return (NFS4_OK);
3207 }
3208 
3209 /* ARGSUSED */
3210 static void
rfs4_op_lookup(nfs_argop4 * argop,nfs_resop4 * resop,struct svc_req * req,struct compound_state * cs)3211 rfs4_op_lookup(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3212     struct compound_state *cs)
3213 {
3214 	LOOKUP4args *args = &argop->nfs_argop4_u.oplookup;
3215 	LOOKUP4res *resp = &resop->nfs_resop4_u.oplookup;
3216 	char *nm;
3217 	uint_t len;
3218 	struct sockaddr *ca;
3219 	char *name = NULL;
3220 	nfsstat4 status;
3221 
3222 	DTRACE_NFSV4_2(op__lookup__start, struct compound_state *, cs,
3223 	    LOOKUP4args *, args);
3224 
3225 	if (cs->vp == NULL) {
3226 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
3227 		goto out;
3228 	}
3229 
3230 	if (cs->vp->v_type == VLNK) {
3231 		*cs->statusp = resp->status = NFS4ERR_SYMLINK;
3232 		goto out;
3233 	}
3234 
3235 	if (cs->vp->v_type != VDIR) {
3236 		*cs->statusp = resp->status = NFS4ERR_NOTDIR;
3237 		goto out;
3238 	}
3239 
3240 	status = utf8_dir_verify(&args->objname);
3241 	if (status != NFS4_OK) {
3242 		*cs->statusp = resp->status = status;
3243 		goto out;
3244 	}
3245 
3246 	nm = utf8_to_str(&args->objname, &len, NULL);
3247 	if (nm == NULL) {
3248 		*cs->statusp = resp->status = NFS4ERR_INVAL;
3249 		goto out;
3250 	}
3251 
3252 	if (len > MAXNAMELEN) {
3253 		*cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
3254 		kmem_free(nm, len);
3255 		goto out;
3256 	}
3257 
3258 	ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
3259 	name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
3260 	    MAXPATHLEN  + 1);
3261 
3262 	if (name == NULL) {
3263 		*cs->statusp = resp->status = NFS4ERR_INVAL;
3264 		kmem_free(nm, len);
3265 		goto out;
3266 	}
3267 
3268 	*cs->statusp = resp->status = do_rfs4_op_lookup(name, req, cs);
3269 
3270 	if (name != nm)
3271 		kmem_free(name, MAXPATHLEN + 1);
3272 	kmem_free(nm, len);
3273 
3274 out:
3275 	DTRACE_NFSV4_2(op__lookup__done, struct compound_state *, cs,
3276 	    LOOKUP4res *, resp);
3277 }
3278 
3279 /* ARGSUSED */
3280 static void
rfs4_op_lookupp(nfs_argop4 * args,nfs_resop4 * resop,struct svc_req * req,struct compound_state * cs)3281 rfs4_op_lookupp(nfs_argop4 *args, nfs_resop4 *resop, struct svc_req *req,
3282     struct compound_state *cs)
3283 {
3284 	LOOKUPP4res *resp = &resop->nfs_resop4_u.oplookupp;
3285 
3286 	DTRACE_NFSV4_1(op__lookupp__start, struct compound_state *, cs);
3287 
3288 	if (cs->vp == NULL) {
3289 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
3290 		goto out;
3291 	}
3292 
3293 	if (cs->vp->v_type == VLNK) {
3294 		*cs->statusp = resp->status = NFS4ERR_SYMLINK;
3295 		goto out;
3296 	}
3297 
3298 	if (cs->vp->v_type != VDIR) {
3299 		*cs->statusp = resp->status = NFS4ERR_NOTDIR;
3300 		goto out;
3301 	}
3302 
3303 	*cs->statusp = resp->status = do_rfs4_op_lookup("..", req, cs);
3304 
3305 	/*
3306 	 * From NFSV4 Specification, LOOKUPP should not check for
3307 	 * NFS4ERR_WRONGSEC. Retrun NFS4_OK instead.
3308 	 */
3309 	if (resp->status == NFS4ERR_WRONGSEC) {
3310 		*cs->statusp = resp->status = NFS4_OK;
3311 	}
3312 
3313 out:
3314 	DTRACE_NFSV4_2(op__lookupp__done, struct compound_state *, cs,
3315 	    LOOKUPP4res *, resp);
3316 }
3317 
3318 
3319 /*ARGSUSED2*/
3320 static void
rfs4_op_openattr(nfs_argop4 * argop,nfs_resop4 * resop,struct svc_req * req,struct compound_state * cs)3321 rfs4_op_openattr(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3322     struct compound_state *cs)
3323 {
3324 	OPENATTR4args	*args = &argop->nfs_argop4_u.opopenattr;
3325 	OPENATTR4res	*resp = &resop->nfs_resop4_u.opopenattr;
3326 	vnode_t		*avp = NULL;
3327 	int		lookup_flags = LOOKUP_XATTR, error;
3328 	int		exp_ro = 0;
3329 
3330 	DTRACE_NFSV4_2(op__openattr__start, struct compound_state *, cs,
3331 	    OPENATTR4args *, args);
3332 
3333 	if (cs->vp == NULL) {
3334 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
3335 		goto out;
3336 	}
3337 
3338 	if ((cs->vp->v_vfsp->vfs_flag & VFS_XATTR) == 0 &&
3339 	    !vfs_has_feature(cs->vp->v_vfsp, VFSFT_SYSATTR_VIEWS)) {
3340 		*cs->statusp = resp->status = puterrno4(ENOTSUP);
3341 		goto out;
3342 	}
3343 
3344 	/*
3345 	 * If file system supports passing ACE mask to VOP_ACCESS then
3346 	 * check for ACE_READ_NAMED_ATTRS, otherwise do legacy checks
3347 	 */
3348 
3349 	if (vfs_has_feature(cs->vp->v_vfsp, VFSFT_ACEMASKONACCESS))
3350 		error = VOP_ACCESS(cs->vp, ACE_READ_NAMED_ATTRS,
3351 		    V_ACE_MASK, cs->cr, NULL);
3352 	else
3353 		error = ((VOP_ACCESS(cs->vp, VREAD, 0, cs->cr, NULL) != 0) &&
3354 		    (VOP_ACCESS(cs->vp, VWRITE, 0, cs->cr, NULL) != 0) &&
3355 		    (VOP_ACCESS(cs->vp, VEXEC, 0, cs->cr, NULL) != 0));
3356 
3357 	if (error) {
3358 		*cs->statusp = resp->status = puterrno4(EACCES);
3359 		goto out;
3360 	}
3361 
3362 	/*
3363 	 * The CREATE_XATTR_DIR VOP flag cannot be specified if
3364 	 * the file system is exported read-only -- regardless of
3365 	 * createdir flag.  Otherwise the attrdir would be created
3366 	 * (assuming server fs isn't mounted readonly locally).  If
3367 	 * VOP_LOOKUP returns ENOENT in this case, the error will
3368 	 * be translated into EROFS.  ENOSYS is mapped to ENOTSUP
3369 	 * because specfs has no VOP_LOOKUP op, so the macro would
3370 	 * return ENOSYS.  EINVAL is returned by all (current)
3371 	 * Solaris file system implementations when any of their
3372 	 * restrictions are violated (xattr(dir) can't have xattrdir).
3373 	 * Returning NOTSUPP is more appropriate in this case
3374 	 * because the object will never be able to have an attrdir.
3375 	 */
3376 	if (args->createdir && ! (exp_ro = rdonly4(req, cs)))
3377 		lookup_flags |= CREATE_XATTR_DIR;
3378 
3379 	error = VOP_LOOKUP(cs->vp, "", &avp, NULL, lookup_flags, NULL, cs->cr,
3380 	    NULL, NULL, NULL);
3381 
3382 	if (error) {
3383 		if (error == ENOENT && args->createdir && exp_ro)
3384 			*cs->statusp = resp->status = puterrno4(EROFS);
3385 		else if (error == EINVAL || error == ENOSYS)
3386 			*cs->statusp = resp->status = puterrno4(ENOTSUP);
3387 		else
3388 			*cs->statusp = resp->status = puterrno4(error);
3389 		goto out;
3390 	}
3391 
3392 	ASSERT(avp->v_flag & V_XATTRDIR);
3393 
3394 	error = makefh4(&cs->fh, avp, cs->exi);
3395 
3396 	if (error) {
3397 		VN_RELE(avp);
3398 		*cs->statusp = resp->status = puterrno4(error);
3399 		goto out;
3400 	}
3401 
3402 	VN_RELE(cs->vp);
3403 	cs->vp = avp;
3404 
3405 	/*
3406 	 * There is no requirement for an attrdir fh flag
3407 	 * because the attrdir has a vnode flag to distinguish
3408 	 * it from regular (non-xattr) directories.  The
3409 	 * FH4_ATTRDIR flag is set for future sanity checks.
3410 	 */
3411 	set_fh4_flag(&cs->fh, FH4_ATTRDIR);
3412 	*cs->statusp = resp->status = NFS4_OK;
3413 
3414 out:
3415 	DTRACE_NFSV4_2(op__openattr__done, struct compound_state *, cs,
3416 	    OPENATTR4res *, resp);
3417 }
3418 
3419 static int
do_io(int direction,vnode_t * vp,struct uio * uio,int ioflag,cred_t * cred,caller_context_t * ct)3420 do_io(int direction, vnode_t *vp, struct uio *uio, int ioflag, cred_t *cred,
3421     caller_context_t *ct)
3422 {
3423 	int error;
3424 	int i;
3425 	clock_t delaytime;
3426 
3427 	delaytime = MSEC_TO_TICK_ROUNDUP(rfs4_lock_delay);
3428 
3429 	/*
3430 	 * Don't block on mandatory locks. If this routine returns
3431 	 * EAGAIN, the caller should return NFS4ERR_LOCKED.
3432 	 */
3433 	uio->uio_fmode = FNONBLOCK;
3434 
3435 	for (i = 0; i < rfs4_maxlock_tries; i++) {
3436 
3437 
3438 		if (direction == FREAD) {
3439 			(void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, ct);
3440 			error = VOP_READ(vp, uio, ioflag, cred, ct);
3441 			VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, ct);
3442 		} else {
3443 			(void) VOP_RWLOCK(vp, V_WRITELOCK_TRUE, ct);
3444 			error = VOP_WRITE(vp, uio, ioflag, cred, ct);
3445 			VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, ct);
3446 		}
3447 
3448 		if (error != EAGAIN)
3449 			break;
3450 
3451 		if (i < rfs4_maxlock_tries - 1) {
3452 			delay(delaytime);
3453 			delaytime *= 2;
3454 		}
3455 	}
3456 
3457 	return (error);
3458 }
3459 
3460 /* ARGSUSED */
3461 static void
rfs4_op_read(nfs_argop4 * argop,nfs_resop4 * resop,struct svc_req * req,struct compound_state * cs)3462 rfs4_op_read(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3463     struct compound_state *cs)
3464 {
3465 	READ4args *args = &argop->nfs_argop4_u.opread;
3466 	READ4res *resp = &resop->nfs_resop4_u.opread;
3467 	int error;
3468 	int verror;
3469 	vnode_t *vp;
3470 	struct vattr va;
3471 	struct iovec iov, *iovp = NULL;
3472 	int iovcnt;
3473 	struct uio uio;
3474 	u_offset_t offset;
3475 	bool_t *deleg = &cs->deleg;
3476 	nfsstat4 stat;
3477 	int in_crit = 0;
3478 	mblk_t *mp = NULL;
3479 	int alloc_err = 0;
3480 	int rdma_used = 0;
3481 	int loaned_buffers;
3482 	caller_context_t ct;
3483 	struct uio *uiop;
3484 
3485 	DTRACE_NFSV4_2(op__read__start, struct compound_state *, cs,
3486 	    READ4args, args);
3487 
3488 	vp = cs->vp;
3489 	if (vp == NULL) {
3490 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
3491 		goto out;
3492 	}
3493 	if (cs->access == CS_ACCESS_DENIED) {
3494 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
3495 		goto out;
3496 	}
3497 
3498 	get_stateid4(cs, &args->stateid);
3499 
3500 	if ((stat = rfs4_check_stateid(FREAD, vp, &args->stateid, FALSE,
3501 	    deleg, TRUE, &ct, cs)) != NFS4_OK) {
3502 		*cs->statusp = resp->status = stat;
3503 		goto out;
3504 	}
3505 
3506 	/*
3507 	 * Enter the critical region before calling VOP_RWLOCK
3508 	 * to avoid a deadlock with write requests.
3509 	 */
3510 	if (nbl_need_check(vp)) {
3511 		nbl_start_crit(vp, RW_READER);
3512 		in_crit = 1;
3513 		if (nbl_conflict(vp, NBL_READ, args->offset, args->count, 0,
3514 		    &ct)) {
3515 			*cs->statusp = resp->status = NFS4ERR_LOCKED;
3516 			goto out;
3517 		}
3518 	}
3519 
3520 	if (args->wlist) {
3521 		if (args->count > clist_len(args->wlist)) {
3522 			*cs->statusp = resp->status = NFS4ERR_INVAL;
3523 			goto out;
3524 		}
3525 		rdma_used = 1;
3526 	}
3527 
3528 	/* use loaned buffers for TCP */
3529 	loaned_buffers = (nfs_loaned_buffers && !rdma_used) ? 1 : 0;
3530 
3531 	va.va_mask = AT_MODE|AT_SIZE|AT_UID;
3532 	verror = VOP_GETATTR(vp, &va, 0, cs->cr, &ct);
3533 
3534 	/*
3535 	 * If we can't get the attributes, then we can't do the
3536 	 * right access checking.  So, we'll fail the request.
3537 	 */
3538 	if (verror) {
3539 		*cs->statusp = resp->status = puterrno4(verror);
3540 		goto out;
3541 	}
3542 
3543 	if (vp->v_type != VREG) {
3544 		*cs->statusp = resp->status =
3545 		    ((vp->v_type == VDIR) ? NFS4ERR_ISDIR : NFS4ERR_INVAL);
3546 		goto out;
3547 	}
3548 
3549 	if (crgetuid(cs->cr) != va.va_uid &&
3550 	    (error = VOP_ACCESS(vp, VREAD, 0, cs->cr, &ct)) &&
3551 	    (error = VOP_ACCESS(vp, VEXEC, 0, cs->cr, &ct))) {
3552 		*cs->statusp = resp->status = puterrno4(error);
3553 		goto out;
3554 	}
3555 
3556 	if (MANDLOCK(vp, va.va_mode)) { /* XXX - V4 supports mand locking */
3557 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
3558 		goto out;
3559 	}
3560 
3561 	offset = args->offset;
3562 	if (offset >= va.va_size) {
3563 		*cs->statusp = resp->status = NFS4_OK;
3564 		resp->eof = TRUE;
3565 		resp->data_len = 0;
3566 		resp->data_val = NULL;
3567 		resp->mblk = NULL;
3568 		/* RDMA */
3569 		resp->wlist = args->wlist;
3570 		resp->wlist_len = resp->data_len;
3571 		*cs->statusp = resp->status = NFS4_OK;
3572 		if (resp->wlist)
3573 			clist_zero_len(resp->wlist);
3574 		goto out;
3575 	}
3576 
3577 	if (args->count == 0) {
3578 		*cs->statusp = resp->status = NFS4_OK;
3579 		resp->eof = FALSE;
3580 		resp->data_len = 0;
3581 		resp->data_val = NULL;
3582 		resp->mblk = NULL;
3583 		/* RDMA */
3584 		resp->wlist = args->wlist;
3585 		resp->wlist_len = resp->data_len;
3586 		if (resp->wlist)
3587 			clist_zero_len(resp->wlist);
3588 		goto out;
3589 	}
3590 
3591 	/*
3592 	 * Do not allocate memory more than maximum allowed
3593 	 * transfer size
3594 	 */
3595 	if (args->count > rfs4_tsize(req))
3596 		args->count = rfs4_tsize(req);
3597 
3598 	if (loaned_buffers) {
3599 		uiop = (uio_t *)rfs_setup_xuio(vp);
3600 		ASSERT(uiop != NULL);
3601 		uiop->uio_segflg = UIO_SYSSPACE;
3602 		uiop->uio_loffset = args->offset;
3603 		uiop->uio_resid = args->count;
3604 
3605 		/* Jump to do the read if successful */
3606 		if (!VOP_REQZCBUF(vp, UIO_READ, (xuio_t *)uiop, cs->cr, &ct)) {
3607 			/*
3608 			 * Need to hold the vnode until after VOP_RETZCBUF()
3609 			 * is called.
3610 			 */
3611 			VN_HOLD(vp);
3612 			goto doio_read;
3613 		}
3614 
3615 		DTRACE_PROBE2(nfss__i__reqzcbuf_failed, int,
3616 		    uiop->uio_loffset, int, uiop->uio_resid);
3617 
3618 		uiop->uio_extflg = 0;
3619 
3620 		/* failure to setup for zero copy */
3621 		rfs_free_xuio((void *)uiop);
3622 		loaned_buffers = 0;
3623 	}
3624 
3625 	/*
3626 	 * If returning data via RDMA Write, then grab the chunk list. If we
3627 	 * aren't returning READ data w/RDMA_WRITE, then grab a mblk.
3628 	 */
3629 	if (rdma_used) {
3630 		mp = NULL;
3631 		(void) rdma_get_wchunk(req, &iov, args->wlist);
3632 		uio.uio_iov = &iov;
3633 		uio.uio_iovcnt = 1;
3634 	} else {
3635 		/*
3636 		 * mp will contain the data to be sent out in the read reply.
3637 		 * It will be freed after the reply has been sent.
3638 		 */
3639 		mp = rfs_read_alloc(args->count, &iovp, &iovcnt);
3640 		ASSERT(mp != NULL);
3641 		ASSERT(alloc_err == 0);
3642 		uio.uio_iov = iovp;
3643 		uio.uio_iovcnt = iovcnt;
3644 	}
3645 
3646 	uio.uio_segflg = UIO_SYSSPACE;
3647 	uio.uio_extflg = UIO_COPY_CACHED;
3648 	uio.uio_loffset = args->offset;
3649 	uio.uio_resid = args->count;
3650 	uiop = &uio;
3651 
3652 doio_read:
3653 	error = do_io(FREAD, vp, uiop, 0, cs->cr, &ct);
3654 
3655 	va.va_mask = AT_SIZE;
3656 	verror = VOP_GETATTR(vp, &va, 0, cs->cr, &ct);
3657 
3658 	if (error) {
3659 		if (mp)
3660 			freemsg(mp);
3661 		*cs->statusp = resp->status = puterrno4(error);
3662 		goto out;
3663 	}
3664 
3665 	/* make mblk using zc buffers */
3666 	if (loaned_buffers) {
3667 		mp = uio_to_mblk(uiop);
3668 		ASSERT(mp != NULL);
3669 	}
3670 
3671 	*cs->statusp = resp->status = NFS4_OK;
3672 
3673 	ASSERT(uiop->uio_resid >= 0);
3674 	resp->data_len = args->count - uiop->uio_resid;
3675 	if (mp) {
3676 		resp->data_val = (char *)mp->b_datap->db_base;
3677 		rfs_rndup_mblks(mp, resp->data_len, loaned_buffers);
3678 	} else {
3679 		resp->data_val = (caddr_t)iov.iov_base;
3680 	}
3681 
3682 	resp->mblk = mp;
3683 
3684 	if (!verror && offset + resp->data_len == va.va_size)
3685 		resp->eof = TRUE;
3686 	else
3687 		resp->eof = FALSE;
3688 
3689 	if (rdma_used) {
3690 		if (!rdma_setup_read_data4(args, resp)) {
3691 			*cs->statusp = resp->status = NFS4ERR_INVAL;
3692 		}
3693 	} else {
3694 		resp->wlist = NULL;
3695 	}
3696 
3697 out:
3698 	if (in_crit)
3699 		nbl_end_crit(vp);
3700 
3701 	if (iovp != NULL)
3702 		kmem_free(iovp, iovcnt * sizeof (struct iovec));
3703 
3704 	DTRACE_NFSV4_2(op__read__done, struct compound_state *, cs,
3705 	    READ4res *, resp);
3706 }
3707 
3708 static void
rfs4_op_read_free(nfs_resop4 * resop)3709 rfs4_op_read_free(nfs_resop4 *resop)
3710 {
3711 	READ4res	*resp = &resop->nfs_resop4_u.opread;
3712 
3713 	if (resp->status == NFS4_OK && resp->mblk != NULL) {
3714 		freemsg(resp->mblk);
3715 		resp->mblk = NULL;
3716 		resp->data_val = NULL;
3717 		resp->data_len = 0;
3718 	}
3719 }
3720 
3721 static void
rfs4_op_readdir_free(nfs_resop4 * resop)3722 rfs4_op_readdir_free(nfs_resop4 * resop)
3723 {
3724 	READDIR4res    *resp = &resop->nfs_resop4_u.opreaddir;
3725 
3726 	if (resp->status == NFS4_OK && resp->mblk != NULL) {
3727 		freeb(resp->mblk);
3728 		resp->mblk = NULL;
3729 		resp->data_len = 0;
3730 	}
3731 }
3732 
3733 
3734 /* ARGSUSED */
3735 static void
rfs4_op_putpubfh(nfs_argop4 * args,nfs_resop4 * resop,struct svc_req * req,struct compound_state * cs)3736 rfs4_op_putpubfh(nfs_argop4 *args, nfs_resop4 *resop, struct svc_req *req,
3737     struct compound_state *cs)
3738 {
3739 	PUTPUBFH4res	*resp = &resop->nfs_resop4_u.opputpubfh;
3740 	int		error;
3741 	vnode_t		*vp;
3742 	struct exportinfo *exi, *sav_exi;
3743 	nfs_fh4_fmt_t	*fh_fmtp;
3744 	nfs_export_t *ne = nfs_get_export();
3745 
3746 	DTRACE_NFSV4_1(op__putpubfh__start, struct compound_state *, cs);
3747 
3748 	if (cs->vp) {
3749 		VN_RELE(cs->vp);
3750 		cs->vp = NULL;
3751 	}
3752 
3753 	if (cs->cr)
3754 		crfree(cs->cr);
3755 
3756 	cs->cr = crdup(cs->basecr);
3757 
3758 	vp = ne->exi_public->exi_vp;
3759 	if (vp == NULL) {
3760 		*cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
3761 		goto out;
3762 	}
3763 
3764 	error = makefh4(&cs->fh, vp, ne->exi_public);
3765 	if (error != 0) {
3766 		*cs->statusp = resp->status = puterrno4(error);
3767 		goto out;
3768 	}
3769 	sav_exi = cs->exi;
3770 	if (ne->exi_public == ne->exi_root) {
3771 		/*
3772 		 * No filesystem is actually shared public, so we default
3773 		 * to exi_root. In this case, we must check whether root
3774 		 * is exported.
3775 		 */
3776 		fh_fmtp = (nfs_fh4_fmt_t *)cs->fh.nfs_fh4_val;
3777 
3778 		/*
3779 		 * if root filesystem is exported, the exportinfo struct that we
3780 		 * should use is what checkexport4 returns, because root_exi is
3781 		 * actually a mostly empty struct.
3782 		 */
3783 		exi = checkexport4(&fh_fmtp->fh4_fsid,
3784 		    (fid_t *)&fh_fmtp->fh4_xlen, NULL);
3785 		cs->exi = ((exi != NULL) ? exi : ne->exi_public);
3786 	} else {
3787 		/*
3788 		 * it's a properly shared filesystem
3789 		 */
3790 		cs->exi = ne->exi_public;
3791 	}
3792 
3793 	if (is_system_labeled()) {
3794 		bslabel_t *clabel;
3795 
3796 		ASSERT(req->rq_label != NULL);
3797 		clabel = req->rq_label;
3798 		DTRACE_PROBE2(tx__rfs4__log__info__opputpubfh__clabel, char *,
3799 		    "got client label from request(1)",
3800 		    struct svc_req *, req);
3801 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
3802 			if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
3803 			    cs->exi)) {
3804 				*cs->statusp = resp->status =
3805 				    NFS4ERR_SERVERFAULT;
3806 				goto out;
3807 			}
3808 		}
3809 	}
3810 
3811 	VN_HOLD(vp);
3812 	cs->vp = vp;
3813 
3814 	if ((resp->status = call_checkauth4(cs, req)) != NFS4_OK) {
3815 		VN_RELE(cs->vp);
3816 		cs->vp = NULL;
3817 		cs->exi = sav_exi;
3818 		goto out;
3819 	}
3820 
3821 	*cs->statusp = resp->status = NFS4_OK;
3822 out:
3823 	DTRACE_NFSV4_2(op__putpubfh__done, struct compound_state *, cs,
3824 	    PUTPUBFH4res *, resp);
3825 }
3826 
3827 /*
3828  * XXX - issue with put*fh operations. Suppose /export/home is exported.
3829  * Suppose an NFS client goes to mount /export/home/joe. If /export, home,
3830  * or joe have restrictive search permissions, then we shouldn't let
3831  * the client get a file handle. This is easy to enforce. However, we
3832  * don't know what security flavor should be used until we resolve the
3833  * path name. Another complication is uid mapping. If root is
3834  * the user, then it will be mapped to the anonymous user by default,
3835  * but we won't know that till we've resolved the path name. And we won't
3836  * know what the anonymous user is.
3837  * Luckily, SECINFO is specified to take a full filename.
3838  * So what we will have to in rfs4_op_lookup is check that flavor of
3839  * the target object matches that of the request, and if root was the
3840  * caller, check for the root= and anon= options, and if necessary,
3841  * repeat the lookup using the right cred_t. But that's not done yet.
3842  */
3843 /* ARGSUSED */
3844 static void
rfs4_op_putfh(nfs_argop4 * argop,nfs_resop4 * resop,struct svc_req * req,struct compound_state * cs)3845 rfs4_op_putfh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3846     struct compound_state *cs)
3847 {
3848 	PUTFH4args *args = &argop->nfs_argop4_u.opputfh;
3849 	PUTFH4res *resp = &resop->nfs_resop4_u.opputfh;
3850 	nfs_fh4_fmt_t *fh_fmtp;
3851 
3852 	DTRACE_NFSV4_2(op__putfh__start, struct compound_state *, cs,
3853 	    PUTFH4args *, args);
3854 
3855 	if (cs->vp) {
3856 		VN_RELE(cs->vp);
3857 		cs->vp = NULL;
3858 	}
3859 
3860 	if (cs->cr) {
3861 		crfree(cs->cr);
3862 		cs->cr = NULL;
3863 	}
3864 
3865 
3866 	if (args->object.nfs_fh4_len < NFS_FH4_LEN) {
3867 		*cs->statusp = resp->status = NFS4ERR_BADHANDLE;
3868 		goto out;
3869 	}
3870 
3871 	fh_fmtp = (nfs_fh4_fmt_t *)args->object.nfs_fh4_val;
3872 	cs->exi = checkexport4(&fh_fmtp->fh4_fsid, (fid_t *)&fh_fmtp->fh4_xlen,
3873 	    NULL);
3874 
3875 	if (cs->exi == NULL) {
3876 		*cs->statusp = resp->status = NFS4ERR_STALE;
3877 		goto out;
3878 	}
3879 
3880 	cs->cr = crdup(cs->basecr);
3881 
3882 	ASSERT(cs->cr != NULL);
3883 
3884 	if (! (cs->vp = nfs4_fhtovp(&args->object, cs->exi, &resp->status))) {
3885 		*cs->statusp = resp->status;
3886 		goto out;
3887 	}
3888 
3889 	if ((resp->status = call_checkauth4(cs, req)) != NFS4_OK) {
3890 		VN_RELE(cs->vp);
3891 		cs->vp = NULL;
3892 		goto out;
3893 	}
3894 
3895 	nfs_fh4_copy(&args->object, &cs->fh);
3896 	*cs->statusp = resp->status = NFS4_OK;
3897 	cs->deleg = FALSE;
3898 
3899 out:
3900 	DTRACE_NFSV4_2(op__putfh__done, struct compound_state *, cs,
3901 	    PUTFH4res *, resp);
3902 }
3903 
3904 /* ARGSUSED */
3905 static void
rfs4_op_putrootfh(nfs_argop4 * argop,nfs_resop4 * resop,struct svc_req * req,struct compound_state * cs)3906 rfs4_op_putrootfh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3907     struct compound_state *cs)
3908 {
3909 	PUTROOTFH4res *resp = &resop->nfs_resop4_u.opputrootfh;
3910 	int error;
3911 	fid_t fid;
3912 	struct exportinfo *exi, *sav_exi;
3913 
3914 	DTRACE_NFSV4_1(op__putrootfh__start, struct compound_state *, cs);
3915 
3916 	if (cs->vp) {
3917 		VN_RELE(cs->vp);
3918 		cs->vp = NULL;
3919 	}
3920 
3921 	if (cs->cr)
3922 		crfree(cs->cr);
3923 
3924 	cs->cr = crdup(cs->basecr);
3925 
3926 	/*
3927 	 * Using rootdir, the system root vnode,
3928 	 * get its fid.
3929 	 */
3930 	bzero(&fid, sizeof (fid));
3931 	fid.fid_len = MAXFIDSZ;
3932 	error = vop_fid_pseudo(ZONE_ROOTVP(), &fid);
3933 	if (error != 0) {
3934 		*cs->statusp = resp->status = puterrno4(error);
3935 		goto out;
3936 	}
3937 
3938 	/*
3939 	 * Then use the root fsid & fid it to find out if it's exported
3940 	 *
3941 	 * If the server root isn't exported directly, then
3942 	 * it should at least be a pseudo export based on
3943 	 * one or more exports further down in the server's
3944 	 * file tree.
3945 	 */
3946 	exi = checkexport4(&ZONE_ROOTVP()->v_vfsp->vfs_fsid, &fid, NULL);
3947 	if (exi == NULL || exi->exi_export.ex_flags & EX_PUBLIC) {
3948 		NFS4_DEBUG(rfs4_debug,
3949 		    (CE_WARN, "rfs4_op_putrootfh: export check failure"));
3950 		*cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
3951 		goto out;
3952 	}
3953 
3954 	/*
3955 	 * Now make a filehandle based on the root
3956 	 * export and root vnode.
3957 	 */
3958 	error = makefh4(&cs->fh, ZONE_ROOTVP(), exi);
3959 	if (error != 0) {
3960 		*cs->statusp = resp->status = puterrno4(error);
3961 		goto out;
3962 	}
3963 
3964 	sav_exi = cs->exi;
3965 	cs->exi = exi;
3966 
3967 	VN_HOLD(ZONE_ROOTVP());
3968 	cs->vp = ZONE_ROOTVP();
3969 
3970 	if ((resp->status = call_checkauth4(cs, req)) != NFS4_OK) {
3971 		VN_RELE(cs->vp);
3972 		cs->vp = NULL;
3973 		cs->exi = sav_exi;
3974 		goto out;
3975 	}
3976 
3977 	*cs->statusp = resp->status = NFS4_OK;
3978 	cs->deleg = FALSE;
3979 out:
3980 	DTRACE_NFSV4_2(op__putrootfh__done, struct compound_state *, cs,
3981 	    PUTROOTFH4res *, resp);
3982 }
3983 
3984 /*
3985  * readlink: args: CURRENT_FH.
3986  *	res: status. If success - CURRENT_FH unchanged, return linktext.
3987  */
3988 
3989 /* ARGSUSED */
3990 static void
rfs4_op_readlink(nfs_argop4 * argop,nfs_resop4 * resop,struct svc_req * req,struct compound_state * cs)3991 rfs4_op_readlink(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3992     struct compound_state *cs)
3993 {
3994 	READLINK4res *resp = &resop->nfs_resop4_u.opreadlink;
3995 	int error;
3996 	vnode_t *vp;
3997 	struct iovec iov;
3998 	struct vattr va;
3999 	struct uio uio;
4000 	char *data;
4001 	struct sockaddr *ca;
4002 	char *name = NULL;
4003 	int is_referral;
4004 
4005 	DTRACE_NFSV4_1(op__readlink__start, struct compound_state *, cs);
4006 
4007 	/* CURRENT_FH: directory */
4008 	vp = cs->vp;
4009 	if (vp == NULL) {
4010 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
4011 		goto out;
4012 	}
4013 
4014 	if (cs->access == CS_ACCESS_DENIED) {
4015 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
4016 		goto out;
4017 	}
4018 
4019 	/* Is it a referral? */
4020 	if (vn_is_nfs_reparse(vp, cs->cr) && client_is_downrev(req)) {
4021 
4022 		is_referral = 1;
4023 
4024 	} else {
4025 
4026 		is_referral = 0;
4027 
4028 		if (vp->v_type == VDIR) {
4029 			*cs->statusp = resp->status = NFS4ERR_ISDIR;
4030 			goto out;
4031 		}
4032 
4033 		if (vp->v_type != VLNK) {
4034 			*cs->statusp = resp->status = NFS4ERR_INVAL;
4035 			goto out;
4036 		}
4037 
4038 	}
4039 
4040 	va.va_mask = AT_MODE;
4041 	error = VOP_GETATTR(vp, &va, 0, cs->cr, NULL);
4042 	if (error) {
4043 		*cs->statusp = resp->status = puterrno4(error);
4044 		goto out;
4045 	}
4046 
4047 	if (MANDLOCK(vp, va.va_mode)) {
4048 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
4049 		goto out;
4050 	}
4051 
4052 	data = kmem_alloc(MAXPATHLEN + 1, KM_SLEEP);
4053 
4054 	if (is_referral) {
4055 		char *s;
4056 		size_t strsz;
4057 		kstat_named_t *stat =
4058 		    cs->exi->exi_ne->ne_globals->svstat[NFS_V4];
4059 
4060 		/* Get an artificial symlink based on a referral */
4061 		s = build_symlink(vp, cs->cr, &strsz);
4062 		stat[NFS_REFERLINKS].value.ui64++;
4063 		DTRACE_PROBE2(nfs4serv__func__referral__reflink,
4064 		    vnode_t *, vp, char *, s);
4065 		if (s == NULL)
4066 			error = EINVAL;
4067 		else {
4068 			error = 0;
4069 			(void) strlcpy(data, s, MAXPATHLEN + 1);
4070 			kmem_free(s, strsz);
4071 		}
4072 
4073 	} else {
4074 
4075 		iov.iov_base = data;
4076 		iov.iov_len = MAXPATHLEN;
4077 		uio.uio_iov = &iov;
4078 		uio.uio_iovcnt = 1;
4079 		uio.uio_segflg = UIO_SYSSPACE;
4080 		uio.uio_extflg = UIO_COPY_CACHED;
4081 		uio.uio_loffset = 0;
4082 		uio.uio_resid = MAXPATHLEN;
4083 
4084 		error = VOP_READLINK(vp, &uio, cs->cr, NULL);
4085 
4086 		if (!error)
4087 			*(data + MAXPATHLEN - uio.uio_resid) = '\0';
4088 	}
4089 
4090 	if (error) {
4091 		kmem_free((caddr_t)data, (uint_t)MAXPATHLEN + 1);
4092 		*cs->statusp = resp->status = puterrno4(error);
4093 		goto out;
4094 	}
4095 
4096 	ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
4097 	name = nfscmd_convname(ca, cs->exi, data, NFSCMD_CONV_OUTBOUND,
4098 	    MAXPATHLEN  + 1);
4099 
4100 	if (name == NULL) {
4101 		/*
4102 		 * Even though the conversion failed, we return
4103 		 * something. We just don't translate it.
4104 		 */
4105 		name = data;
4106 	}
4107 
4108 	/*
4109 	 * treat link name as data
4110 	 */
4111 	(void) str_to_utf8(name, (utf8string *)&resp->link);
4112 
4113 	if (name != data)
4114 		kmem_free(name, MAXPATHLEN + 1);
4115 	kmem_free((caddr_t)data, (uint_t)MAXPATHLEN + 1);
4116 	*cs->statusp = resp->status = NFS4_OK;
4117 
4118 out:
4119 	DTRACE_NFSV4_2(op__readlink__done, struct compound_state *, cs,
4120 	    READLINK4res *, resp);
4121 }
4122 
4123 static void
rfs4_op_readlink_free(nfs_resop4 * resop)4124 rfs4_op_readlink_free(nfs_resop4 *resop)
4125 {
4126 	READLINK4res *resp = &resop->nfs_resop4_u.opreadlink;
4127 	utf8string *symlink = (utf8string *)&resp->link;
4128 
4129 	if (symlink->utf8string_val) {
4130 		UTF8STRING_FREE(*symlink)
4131 	}
4132 }
4133 
4134 /*
4135  * release_lockowner:
4136  *	Release any state associated with the supplied
4137  *	lockowner. Note if any lo_state is holding locks we will not
4138  *	rele that lo_state and thus the lockowner will not be destroyed.
4139  *	A client using lock after the lock owner stateid has been released
4140  *	will suffer the consequence of NFS4ERR_BAD_STATEID and would have
4141  *	to reissue the lock with new_lock_owner set to TRUE.
4142  *	args: lock_owner
4143  *	res:  status
4144  */
4145 /* ARGSUSED */
4146 static void
rfs4_op_release_lockowner(nfs_argop4 * argop,nfs_resop4 * resop,struct svc_req * req,struct compound_state * cs)4147 rfs4_op_release_lockowner(nfs_argop4 *argop, nfs_resop4 *resop,
4148     struct svc_req *req, struct compound_state *cs)
4149 {
4150 	RELEASE_LOCKOWNER4args *ap = &argop->nfs_argop4_u.oprelease_lockowner;
4151 	RELEASE_LOCKOWNER4res *resp = &resop->nfs_resop4_u.oprelease_lockowner;
4152 	rfs4_lockowner_t *lo;
4153 	rfs4_openowner_t *oo;
4154 	rfs4_state_t *sp;
4155 	rfs4_lo_state_t *lsp;
4156 	rfs4_client_t *cp;
4157 	bool_t create = FALSE;
4158 	locklist_t *llist;
4159 	sysid_t sysid;
4160 
4161 	DTRACE_NFSV4_2(op__release__lockowner__start, struct compound_state *,
4162 	    cs, RELEASE_LOCKOWNER4args *, ap);
4163 
4164 	/* Make sure there is a clientid around for this request */
4165 	cp = rfs4_findclient_by_id(ap->lock_owner.clientid, FALSE);
4166 
4167 	if (cp == NULL) {
4168 		*cs->statusp = resp->status =
4169 		    rfs4_check_clientid(&ap->lock_owner.clientid, 0);
4170 		goto out;
4171 	}
4172 	rfs4_client_rele(cp);
4173 
4174 	lo = rfs4_findlockowner(&ap->lock_owner, &create);
4175 	if (lo == NULL) {
4176 		*cs->statusp = resp->status = NFS4_OK;
4177 		goto out;
4178 	}
4179 	ASSERT(lo->rl_client != NULL);
4180 
4181 	/*
4182 	 * Check for EXPIRED client. If so will reap state with in a lease
4183 	 * period or on next set_clientid_confirm step
4184 	 */
4185 	if (rfs4_lease_expired(lo->rl_client)) {
4186 		rfs4_lockowner_rele(lo);
4187 		*cs->statusp = resp->status = NFS4ERR_EXPIRED;
4188 		goto out;
4189 	}
4190 
4191 	/*
4192 	 * If no sysid has been assigned, then no locks exist; just return.
4193 	 */
4194 	rfs4_dbe_lock(lo->rl_client->rc_dbe);
4195 	if (lo->rl_client->rc_sysidt == LM_NOSYSID) {
4196 		rfs4_lockowner_rele(lo);
4197 		rfs4_dbe_unlock(lo->rl_client->rc_dbe);
4198 		goto out;
4199 	}
4200 
4201 	sysid = lo->rl_client->rc_sysidt;
4202 	rfs4_dbe_unlock(lo->rl_client->rc_dbe);
4203 
4204 	/*
4205 	 * Mark the lockowner invalid.
4206 	 */
4207 	rfs4_dbe_hide(lo->rl_dbe);
4208 
4209 	/*
4210 	 * sysid-pid pair should now not be used since the lockowner is
4211 	 * invalid. If the client were to instantiate the lockowner again
4212 	 * it would be assigned a new pid. Thus we can get the list of
4213 	 * current locks.
4214 	 */
4215 
4216 	llist = flk_get_active_locks(sysid, lo->rl_pid);
4217 	/* If we are still holding locks fail */
4218 	if (llist != NULL) {
4219 
4220 		*cs->statusp = resp->status = NFS4ERR_LOCKS_HELD;
4221 
4222 		flk_free_locklist(llist);
4223 		/*
4224 		 * We need to unhide the lockowner so the client can
4225 		 * try it again. The bad thing here is if the client
4226 		 * has a logic error that took it here in the first place
4227 		 * they probably have lost accounting of the locks that it
4228 		 * is holding. So we may have dangling state until the
4229 		 * open owner state is reaped via close. One scenario
4230 		 * that could possibly occur is that the client has
4231 		 * sent the unlock request(s) in separate threads
4232 		 * and has not waited for the replies before sending the
4233 		 * RELEASE_LOCKOWNER request. Presumably, it would expect
4234 		 * and deal appropriately with NFS4ERR_LOCKS_HELD, by
4235 		 * reissuing the request.
4236 		 */
4237 		rfs4_dbe_unhide(lo->rl_dbe);
4238 		rfs4_lockowner_rele(lo);
4239 		goto out;
4240 	}
4241 
4242 	/*
4243 	 * For the corresponding client we need to check each open
4244 	 * owner for any opens that have lockowner state associated
4245 	 * with this lockowner.
4246 	 */
4247 
4248 	rfs4_dbe_lock(lo->rl_client->rc_dbe);
4249 	for (oo = list_head(&lo->rl_client->rc_openownerlist); oo != NULL;
4250 	    oo = list_next(&lo->rl_client->rc_openownerlist, oo)) {
4251 
4252 		rfs4_dbe_lock(oo->ro_dbe);
4253 		for (sp = list_head(&oo->ro_statelist); sp != NULL;
4254 		    sp = list_next(&oo->ro_statelist, sp)) {
4255 
4256 			rfs4_dbe_lock(sp->rs_dbe);
4257 			for (lsp = list_head(&sp->rs_lostatelist);
4258 			    lsp != NULL;
4259 			    lsp = list_next(&sp->rs_lostatelist, lsp)) {
4260 				if (lsp->rls_locker == lo) {
4261 					rfs4_dbe_lock(lsp->rls_dbe);
4262 					rfs4_dbe_invalidate(lsp->rls_dbe);
4263 					rfs4_dbe_unlock(lsp->rls_dbe);
4264 				}
4265 			}
4266 			rfs4_dbe_unlock(sp->rs_dbe);
4267 		}
4268 		rfs4_dbe_unlock(oo->ro_dbe);
4269 	}
4270 	rfs4_dbe_unlock(lo->rl_client->rc_dbe);
4271 
4272 	rfs4_lockowner_rele(lo);
4273 
4274 	*cs->statusp = resp->status = NFS4_OK;
4275 
4276 out:
4277 	DTRACE_NFSV4_2(op__release__lockowner__done, struct compound_state *,
4278 	    cs, RELEASE_LOCKOWNER4res *, resp);
4279 }
4280 
4281 /*
4282  * short utility function to lookup a file and recall the delegation
4283  */
4284 static rfs4_file_t *
rfs4_lookup_and_findfile(vnode_t * dvp,char * nm,vnode_t ** vpp,int * lkup_error,cred_t * cr)4285 rfs4_lookup_and_findfile(vnode_t *dvp, char *nm, vnode_t **vpp,
4286     int *lkup_error, cred_t *cr)
4287 {
4288 	vnode_t *vp;
4289 	rfs4_file_t *fp = NULL;
4290 	bool_t fcreate = FALSE;
4291 	int error;
4292 
4293 	if (vpp)
4294 		*vpp = NULL;
4295 
4296 	if ((error = VOP_LOOKUP(dvp, nm, &vp, NULL, 0, NULL, cr, NULL, NULL,
4297 	    NULL)) == 0) {
4298 		if (vp->v_type == VREG)
4299 			fp = rfs4_findfile(vp, NULL, &fcreate);
4300 		if (vpp)
4301 			*vpp = vp;
4302 		else
4303 			VN_RELE(vp);
4304 	}
4305 
4306 	if (lkup_error)
4307 		*lkup_error = error;
4308 
4309 	return (fp);
4310 }
4311 
4312 /*
4313  * remove: args: CURRENT_FH: directory; name.
4314  *	res: status. If success - CURRENT_FH unchanged, return change_info
4315  *		for directory.
4316  */
4317 /* ARGSUSED */
4318 static void
rfs4_op_remove(nfs_argop4 * argop,nfs_resop4 * resop,struct svc_req * req,struct compound_state * cs)4319 rfs4_op_remove(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
4320     struct compound_state *cs)
4321 {
4322 	REMOVE4args *args = &argop->nfs_argop4_u.opremove;
4323 	REMOVE4res *resp = &resop->nfs_resop4_u.opremove;
4324 	int error;
4325 	vnode_t *dvp, *vp;
4326 	struct vattr bdva, idva, adva;
4327 	char *nm;
4328 	uint_t len;
4329 	rfs4_file_t *fp;
4330 	int in_crit = 0;
4331 	bslabel_t *clabel;
4332 	struct sockaddr *ca;
4333 	char *name = NULL;
4334 	nfsstat4 status;
4335 
4336 	DTRACE_NFSV4_2(op__remove__start, struct compound_state *, cs,
4337 	    REMOVE4args *, args);
4338 
4339 	/* CURRENT_FH: directory */
4340 	dvp = cs->vp;
4341 	if (dvp == NULL) {
4342 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
4343 		goto out;
4344 	}
4345 
4346 	if (cs->access == CS_ACCESS_DENIED) {
4347 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
4348 		goto out;
4349 	}
4350 
4351 	/*
4352 	 * If there is an unshared filesystem mounted on this vnode,
4353 	 * Do not allow to remove anything in this directory.
4354 	 */
4355 	if (vn_ismntpt(dvp)) {
4356 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
4357 		goto out;
4358 	}
4359 
4360 	if (dvp->v_type != VDIR) {
4361 		*cs->statusp = resp->status = NFS4ERR_NOTDIR;
4362 		goto out;
4363 	}
4364 
4365 	status = utf8_dir_verify(&args->target);
4366 	if (status != NFS4_OK) {
4367 		*cs->statusp = resp->status = status;
4368 		goto out;
4369 	}
4370 
4371 	/*
4372 	 * Lookup the file so that we can check if it's a directory
4373 	 */
4374 	nm = utf8_to_fn(&args->target, &len, NULL);
4375 	if (nm == NULL) {
4376 		*cs->statusp = resp->status = NFS4ERR_INVAL;
4377 		goto out;
4378 	}
4379 
4380 	if (len > MAXNAMELEN) {
4381 		*cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
4382 		kmem_free(nm, len);
4383 		goto out;
4384 	}
4385 
4386 	if (rdonly4(req, cs)) {
4387 		*cs->statusp = resp->status = NFS4ERR_ROFS;
4388 		kmem_free(nm, len);
4389 		goto out;
4390 	}
4391 
4392 	ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
4393 	name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
4394 	    MAXPATHLEN  + 1);
4395 
4396 	if (name == NULL) {
4397 		*cs->statusp = resp->status = NFS4ERR_INVAL;
4398 		kmem_free(nm, len);
4399 		goto out;
4400 	}
4401 
4402 	/*
4403 	 * Lookup the file to determine type and while we are see if
4404 	 * there is a file struct around and check for delegation.
4405 	 * We don't need to acquire va_seq before this lookup, if
4406 	 * it causes an update, cinfo.before will not match, which will
4407 	 * trigger a cache flush even if atomic is TRUE.
4408 	 */
4409 	fp = rfs4_lookup_and_findfile(dvp, name, &vp, &error, cs->cr);
4410 	if (fp != NULL) {
4411 		if (rfs4_check_delegated_byfp(FWRITE, fp, TRUE, TRUE, TRUE,
4412 		    NULL)) {
4413 			VN_RELE(vp);
4414 			rfs4_file_rele(fp);
4415 			*cs->statusp = resp->status = NFS4ERR_DELAY;
4416 			if (nm != name)
4417 				kmem_free(name, MAXPATHLEN + 1);
4418 			kmem_free(nm, len);
4419 			goto out;
4420 		}
4421 	}
4422 
4423 	/* Didn't find anything to remove */
4424 	if (vp == NULL) {
4425 		*cs->statusp = resp->status = error;
4426 		if (nm != name)
4427 			kmem_free(name, MAXPATHLEN + 1);
4428 		kmem_free(nm, len);
4429 		goto out;
4430 	}
4431 
4432 	if (nbl_need_check(vp)) {
4433 		nbl_start_crit(vp, RW_READER);
4434 		in_crit = 1;
4435 		if (nbl_conflict(vp, NBL_REMOVE, 0, 0, 0, NULL)) {
4436 			*cs->statusp = resp->status = NFS4ERR_FILE_OPEN;
4437 			if (nm != name)
4438 				kmem_free(name, MAXPATHLEN + 1);
4439 			kmem_free(nm, len);
4440 			nbl_end_crit(vp);
4441 			VN_RELE(vp);
4442 			if (fp) {
4443 				rfs4_clear_dont_grant(fp);
4444 				rfs4_file_rele(fp);
4445 			}
4446 			goto out;
4447 		}
4448 	}
4449 
4450 	/* check label before allowing removal */
4451 	if (is_system_labeled()) {
4452 		ASSERT(req->rq_label != NULL);
4453 		clabel = req->rq_label;
4454 		DTRACE_PROBE2(tx__rfs4__log__info__opremove__clabel, char *,
4455 		    "got client label from request(1)",
4456 		    struct svc_req *, req);
4457 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
4458 			if (!do_rfs_label_check(clabel, vp, EQUALITY_CHECK,
4459 			    cs->exi)) {
4460 				*cs->statusp = resp->status = NFS4ERR_ACCESS;
4461 				if (name != nm)
4462 					kmem_free(name, MAXPATHLEN + 1);
4463 				kmem_free(nm, len);
4464 				if (in_crit)
4465 					nbl_end_crit(vp);
4466 				VN_RELE(vp);
4467 				if (fp) {
4468 					rfs4_clear_dont_grant(fp);
4469 					rfs4_file_rele(fp);
4470 				}
4471 				goto out;
4472 			}
4473 		}
4474 	}
4475 
4476 	/* Get dir "before" change value */
4477 	bdva.va_mask = AT_CTIME|AT_SEQ;
4478 	error = VOP_GETATTR(dvp, &bdva, 0, cs->cr, NULL);
4479 	if (error) {
4480 		*cs->statusp = resp->status = puterrno4(error);
4481 		if (nm != name)
4482 			kmem_free(name, MAXPATHLEN + 1);
4483 		kmem_free(nm, len);
4484 		if (in_crit)
4485 			nbl_end_crit(vp);
4486 		VN_RELE(vp);
4487 		if (fp) {
4488 			rfs4_clear_dont_grant(fp);
4489 			rfs4_file_rele(fp);
4490 		}
4491 		goto out;
4492 	}
4493 	NFS4_SET_FATTR4_CHANGE(resp->cinfo.before, bdva.va_ctime)
4494 
4495 	/* Actually do the REMOVE operation */
4496 	if (vp->v_type == VDIR) {
4497 		/*
4498 		 * Can't remove a directory that has a mounted-on filesystem.
4499 		 */
4500 		if (vn_ismntpt(vp)) {
4501 			error = EACCES;
4502 		} else {
4503 			/*
4504 			 * System V defines rmdir to return EEXIST,
4505 			 * not ENOTEMPTY, if the directory is not
4506 			 * empty.  A System V NFS server needs to map
4507 			 * NFS4ERR_EXIST to NFS4ERR_NOTEMPTY to
4508 			 * transmit over the wire.
4509 			 */
4510 			if ((error = VOP_RMDIR(dvp, name, ZONE_ROOTVP(), cs->cr,
4511 			    NULL, 0)) == EEXIST)
4512 				error = ENOTEMPTY;
4513 		}
4514 	} else {
4515 		if ((error = VOP_REMOVE(dvp, name, cs->cr, NULL, 0)) == 0 &&
4516 		    fp != NULL) {
4517 			struct vattr va;
4518 			vnode_t *tvp;
4519 
4520 			rfs4_dbe_lock(fp->rf_dbe);
4521 			tvp = fp->rf_vp;
4522 			if (tvp)
4523 				VN_HOLD(tvp);
4524 			rfs4_dbe_unlock(fp->rf_dbe);
4525 
4526 			if (tvp) {
4527 				/*
4528 				 * This is va_seq safe because we are not
4529 				 * manipulating dvp.
4530 				 */
4531 				va.va_mask = AT_NLINK;
4532 				if (!VOP_GETATTR(tvp, &va, 0, cs->cr, NULL) &&
4533 				    va.va_nlink == 0) {
4534 					/* Remove state on file remove */
4535 					if (in_crit) {
4536 						nbl_end_crit(vp);
4537 						in_crit = 0;
4538 					}
4539 					rfs4_close_all_state(fp);
4540 				}
4541 				VN_RELE(tvp);
4542 			}
4543 		}
4544 	}
4545 
4546 	if (in_crit)
4547 		nbl_end_crit(vp);
4548 	VN_RELE(vp);
4549 
4550 	if (fp) {
4551 		rfs4_clear_dont_grant(fp);
4552 		rfs4_file_rele(fp);
4553 	}
4554 	if (nm != name)
4555 		kmem_free(name, MAXPATHLEN + 1);
4556 	kmem_free(nm, len);
4557 
4558 	if (error) {
4559 		*cs->statusp = resp->status = puterrno4(error);
4560 		goto out;
4561 	}
4562 
4563 	/*
4564 	 * Get the initial "after" sequence number, if it fails, set to zero
4565 	 */
4566 	idva.va_mask = AT_SEQ;
4567 	if (VOP_GETATTR(dvp, &idva, 0, cs->cr, NULL))
4568 		idva.va_seq = 0;
4569 
4570 	/*
4571 	 * Force modified data and metadata out to stable storage.
4572 	 */
4573 	(void) VOP_FSYNC(dvp, 0, cs->cr, NULL);
4574 
4575 	/*
4576 	 * Get "after" change value, if it fails, simply return the
4577 	 * before value.
4578 	 */
4579 	adva.va_mask = AT_CTIME|AT_SEQ;
4580 	if (VOP_GETATTR(dvp, &adva, 0, cs->cr, NULL)) {
4581 		adva.va_ctime = bdva.va_ctime;
4582 		adva.va_seq = 0;
4583 	}
4584 
4585 	NFS4_SET_FATTR4_CHANGE(resp->cinfo.after, adva.va_ctime)
4586 
4587 	/*
4588 	 * The cinfo.atomic = TRUE only if we have
4589 	 * non-zero va_seq's, and it has incremented by exactly one
4590 	 * during the VOP_REMOVE/RMDIR and it didn't change during
4591 	 * the VOP_FSYNC.
4592 	 */
4593 	if (bdva.va_seq && idva.va_seq && adva.va_seq &&
4594 	    idva.va_seq == (bdva.va_seq + 1) && idva.va_seq == adva.va_seq)
4595 		resp->cinfo.atomic = TRUE;
4596 	else
4597 		resp->cinfo.atomic = FALSE;
4598 
4599 	*cs->statusp = resp->status = NFS4_OK;
4600 
4601 out:
4602 	DTRACE_NFSV4_2(op__remove__done, struct compound_state *, cs,
4603 	    REMOVE4res *, resp);
4604 }
4605 
4606 /*
4607  * rename: args: SAVED_FH: from directory, CURRENT_FH: target directory,
4608  *		oldname and newname.
4609  *	res: status. If success - CURRENT_FH unchanged, return change_info
4610  *		for both from and target directories.
4611  */
4612 /* ARGSUSED */
4613 static void
rfs4_op_rename(nfs_argop4 * argop,nfs_resop4 * resop,struct svc_req * req,struct compound_state * cs)4614 rfs4_op_rename(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
4615     struct compound_state *cs)
4616 {
4617 	RENAME4args *args = &argop->nfs_argop4_u.oprename;
4618 	RENAME4res *resp = &resop->nfs_resop4_u.oprename;
4619 	int error;
4620 	vnode_t *odvp;
4621 	vnode_t *ndvp;
4622 	vnode_t *srcvp, *targvp, *tvp;
4623 	struct vattr obdva, oidva, oadva;
4624 	struct vattr nbdva, nidva, nadva;
4625 	char *onm, *nnm;
4626 	uint_t olen, nlen;
4627 	rfs4_file_t *fp, *sfp;
4628 	int in_crit_src, in_crit_targ;
4629 	int fp_rele_grant_hold, sfp_rele_grant_hold;
4630 	int unlinked;
4631 	bslabel_t *clabel;
4632 	struct sockaddr *ca;
4633 	char *converted_onm = NULL;
4634 	char *converted_nnm = NULL;
4635 	nfsstat4 status;
4636 
4637 	DTRACE_NFSV4_2(op__rename__start, struct compound_state *, cs,
4638 	    RENAME4args *, args);
4639 
4640 	fp = sfp = NULL;
4641 	srcvp = targvp = tvp = NULL;
4642 	in_crit_src = in_crit_targ = 0;
4643 	fp_rele_grant_hold = sfp_rele_grant_hold = 0;
4644 	unlinked = 0;
4645 
4646 	/* CURRENT_FH: target directory */
4647 	ndvp = cs->vp;
4648 	if (ndvp == NULL) {
4649 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
4650 		goto out;
4651 	}
4652 
4653 	/* SAVED_FH: from directory */
4654 	odvp = cs->saved_vp;
4655 	if (odvp == NULL) {
4656 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
4657 		goto out;
4658 	}
4659 
4660 	if (cs->access == CS_ACCESS_DENIED) {
4661 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
4662 		goto out;
4663 	}
4664 
4665 	/*
4666 	 * If there is an unshared filesystem mounted on this vnode,
4667 	 * do not allow to rename objects in this directory.
4668 	 */
4669 	if (vn_ismntpt(odvp)) {
4670 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
4671 		goto out;
4672 	}
4673 
4674 	/*
4675 	 * If there is an unshared filesystem mounted on this vnode,
4676 	 * do not allow to rename to this directory.
4677 	 */
4678 	if (vn_ismntpt(ndvp)) {
4679 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
4680 		goto out;
4681 	}
4682 
4683 	if (odvp->v_type != VDIR || ndvp->v_type != VDIR) {
4684 		*cs->statusp = resp->status = NFS4ERR_NOTDIR;
4685 		goto out;
4686 	}
4687 
4688 	if (cs->saved_exi != cs->exi) {
4689 		*cs->statusp = resp->status = NFS4ERR_XDEV;
4690 		goto out;
4691 	}
4692 
4693 	status = utf8_dir_verify(&args->oldname);
4694 	if (status != NFS4_OK) {
4695 		*cs->statusp = resp->status = status;
4696 		goto out;
4697 	}
4698 
4699 	status = utf8_dir_verify(&args->newname);
4700 	if (status != NFS4_OK) {
4701 		*cs->statusp = resp->status = status;
4702 		goto out;
4703 	}
4704 
4705 	onm = utf8_to_fn(&args->oldname, &olen, NULL);
4706 	if (onm == NULL) {
4707 		*cs->statusp = resp->status = NFS4ERR_INVAL;
4708 		goto out;
4709 	}
4710 	ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
4711 	nlen = MAXPATHLEN + 1;
4712 	converted_onm = nfscmd_convname(ca, cs->exi, onm, NFSCMD_CONV_INBOUND,
4713 	    nlen);
4714 
4715 	if (converted_onm == NULL) {
4716 		*cs->statusp = resp->status = NFS4ERR_INVAL;
4717 		kmem_free(onm, olen);
4718 		goto out;
4719 	}
4720 
4721 	nnm = utf8_to_fn(&args->newname, &nlen, NULL);
4722 	if (nnm == NULL) {
4723 		*cs->statusp = resp->status = NFS4ERR_INVAL;
4724 		if (onm != converted_onm)
4725 			kmem_free(converted_onm, MAXPATHLEN + 1);
4726 		kmem_free(onm, olen);
4727 		goto out;
4728 	}
4729 	converted_nnm = nfscmd_convname(ca, cs->exi, nnm, NFSCMD_CONV_INBOUND,
4730 	    MAXPATHLEN  + 1);
4731 
4732 	if (converted_nnm == NULL) {
4733 		*cs->statusp = resp->status = NFS4ERR_INVAL;
4734 		kmem_free(nnm, nlen);
4735 		nnm = NULL;
4736 		if (onm != converted_onm)
4737 			kmem_free(converted_onm, MAXPATHLEN + 1);
4738 		kmem_free(onm, olen);
4739 		goto out;
4740 	}
4741 
4742 
4743 	if (olen > MAXNAMELEN || nlen > MAXNAMELEN) {
4744 		*cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
4745 		kmem_free(onm, olen);
4746 		kmem_free(nnm, nlen);
4747 		goto out;
4748 	}
4749 
4750 
4751 	if (rdonly4(req, cs)) {
4752 		*cs->statusp = resp->status = NFS4ERR_ROFS;
4753 		if (onm != converted_onm)
4754 			kmem_free(converted_onm, MAXPATHLEN + 1);
4755 		kmem_free(onm, olen);
4756 		if (nnm != converted_nnm)
4757 			kmem_free(converted_nnm, MAXPATHLEN + 1);
4758 		kmem_free(nnm, nlen);
4759 		goto out;
4760 	}
4761 
4762 	/* check label of the target dir */
4763 	if (is_system_labeled()) {
4764 		ASSERT(req->rq_label != NULL);
4765 		clabel = req->rq_label;
4766 		DTRACE_PROBE2(tx__rfs4__log__info__oprename__clabel, char *,
4767 		    "got client label from request(1)",
4768 		    struct svc_req *, req);
4769 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
4770 			if (!do_rfs_label_check(clabel, ndvp,
4771 			    EQUALITY_CHECK, cs->exi)) {
4772 				*cs->statusp = resp->status = NFS4ERR_ACCESS;
4773 				goto err_out;
4774 			}
4775 		}
4776 	}
4777 
4778 	/*
4779 	 * Is the source a file and have a delegation?
4780 	 * We don't need to acquire va_seq before these lookups, if
4781 	 * it causes an update, cinfo.before will not match, which will
4782 	 * trigger a cache flush even if atomic is TRUE.
4783 	 */
4784 	sfp = rfs4_lookup_and_findfile(odvp, converted_onm, &srcvp,
4785 	    &error, cs->cr);
4786 	if (sfp != NULL) {
4787 		if (rfs4_check_delegated_byfp(FWRITE, sfp, TRUE, TRUE, TRUE,
4788 		    NULL)) {
4789 			*cs->statusp = resp->status = NFS4ERR_DELAY;
4790 			goto err_out;
4791 		}
4792 	}
4793 
4794 	if (srcvp == NULL) {
4795 		*cs->statusp = resp->status = puterrno4(error);
4796 		if (onm != converted_onm)
4797 			kmem_free(converted_onm, MAXPATHLEN + 1);
4798 		kmem_free(onm, olen);
4799 		if (nnm != converted_nnm)
4800 			kmem_free(converted_nnm, MAXPATHLEN + 1);
4801 		kmem_free(nnm, nlen);
4802 		goto out;
4803 	}
4804 
4805 	sfp_rele_grant_hold = 1;
4806 
4807 	/* Does the destination exist and a file and have a delegation? */
4808 	fp = rfs4_lookup_and_findfile(ndvp, converted_nnm, &targvp, NULL,
4809 	    cs->cr);
4810 	if (fp != NULL) {
4811 		if (rfs4_check_delegated_byfp(FWRITE, fp, TRUE, TRUE, TRUE,
4812 		    NULL)) {
4813 			*cs->statusp = resp->status = NFS4ERR_DELAY;
4814 			goto err_out;
4815 		}
4816 	}
4817 	fp_rele_grant_hold = 1;
4818 
4819 	/* Check for NBMAND lock on both source and target */
4820 	if (nbl_need_check(srcvp)) {
4821 		nbl_start_crit(srcvp, RW_READER);
4822 		in_crit_src = 1;
4823 		if (nbl_conflict(srcvp, NBL_RENAME, 0, 0, 0, NULL)) {
4824 			*cs->statusp = resp->status = NFS4ERR_FILE_OPEN;
4825 			goto err_out;
4826 		}
4827 	}
4828 
4829 	if (targvp && nbl_need_check(targvp)) {
4830 		nbl_start_crit(targvp, RW_READER);
4831 		in_crit_targ = 1;
4832 		if (nbl_conflict(targvp, NBL_REMOVE, 0, 0, 0, NULL)) {
4833 			*cs->statusp = resp->status = NFS4ERR_FILE_OPEN;
4834 			goto err_out;
4835 		}
4836 	}
4837 
4838 	/* Get source "before" change value */
4839 	obdva.va_mask = AT_CTIME|AT_SEQ;
4840 	error = VOP_GETATTR(odvp, &obdva, 0, cs->cr, NULL);
4841 	if (!error) {
4842 		nbdva.va_mask = AT_CTIME|AT_SEQ;
4843 		error = VOP_GETATTR(ndvp, &nbdva, 0, cs->cr, NULL);
4844 	}
4845 	if (error) {
4846 		*cs->statusp = resp->status = puterrno4(error);
4847 		goto err_out;
4848 	}
4849 
4850 	NFS4_SET_FATTR4_CHANGE(resp->source_cinfo.before, obdva.va_ctime)
4851 	NFS4_SET_FATTR4_CHANGE(resp->target_cinfo.before, nbdva.va_ctime)
4852 
4853 	error = VOP_RENAME(odvp, converted_onm, ndvp, converted_nnm, cs->cr,
4854 	    NULL, 0);
4855 
4856 	/*
4857 	 * If target existed and was unlinked by VOP_RENAME, state will need
4858 	 * closed. To avoid deadlock, rfs4_close_all_state will be done after
4859 	 * any necessary nbl_end_crit on srcvp and tgtvp.
4860 	 */
4861 	if (error == 0 && fp != NULL) {
4862 		rfs4_dbe_lock(fp->rf_dbe);
4863 		tvp = fp->rf_vp;
4864 		if (tvp)
4865 			VN_HOLD(tvp);
4866 		rfs4_dbe_unlock(fp->rf_dbe);
4867 
4868 		if (tvp) {
4869 			struct vattr va;
4870 			va.va_mask = AT_NLINK;
4871 
4872 			if (!VOP_GETATTR(tvp, &va, 0, cs->cr, NULL) &&
4873 			    va.va_nlink == 0) {
4874 				unlinked = 1;
4875 
4876 				/* DEBUG data */
4877 				if ((srcvp == targvp) || (tvp != targvp)) {
4878 					cmn_err(CE_WARN, "rfs4_op_rename: "
4879 					    "srcvp %p, targvp: %p, tvp: %p",
4880 					    (void *)srcvp, (void *)targvp,
4881 					    (void *)tvp);
4882 				}
4883 			} else {
4884 				VN_RELE(tvp);
4885 			}
4886 		}
4887 	}
4888 	if (error == 0)
4889 		vn_renamepath(ndvp, srcvp, nnm, nlen - 1);
4890 
4891 	if (in_crit_src)
4892 		nbl_end_crit(srcvp);
4893 	if (srcvp)
4894 		VN_RELE(srcvp);
4895 	if (in_crit_targ)
4896 		nbl_end_crit(targvp);
4897 	if (targvp)
4898 		VN_RELE(targvp);
4899 
4900 	if (unlinked) {
4901 		ASSERT(fp != NULL);
4902 		ASSERT(tvp != NULL);
4903 
4904 		/* DEBUG data */
4905 		if (RW_READ_HELD(&tvp->v_nbllock)) {
4906 			cmn_err(CE_WARN, "rfs4_op_rename: "
4907 			    "RW_READ_HELD(%p)", (void *)tvp);
4908 		}
4909 
4910 		/* The file is gone and so should the state */
4911 		rfs4_close_all_state(fp);
4912 		VN_RELE(tvp);
4913 	}
4914 
4915 	if (sfp) {
4916 		rfs4_clear_dont_grant(sfp);
4917 		rfs4_file_rele(sfp);
4918 	}
4919 	if (fp) {
4920 		rfs4_clear_dont_grant(fp);
4921 		rfs4_file_rele(fp);
4922 	}
4923 
4924 	if (converted_onm != onm)
4925 		kmem_free(converted_onm, MAXPATHLEN + 1);
4926 	kmem_free(onm, olen);
4927 	if (converted_nnm != nnm)
4928 		kmem_free(converted_nnm, MAXPATHLEN + 1);
4929 	kmem_free(nnm, nlen);
4930 
4931 	/*
4932 	 * Get the initial "after" sequence number, if it fails, set to zero
4933 	 */
4934 	oidva.va_mask = AT_SEQ;
4935 	if (VOP_GETATTR(odvp, &oidva, 0, cs->cr, NULL))
4936 		oidva.va_seq = 0;
4937 
4938 	nidva.va_mask = AT_SEQ;
4939 	if (VOP_GETATTR(ndvp, &nidva, 0, cs->cr, NULL))
4940 		nidva.va_seq = 0;
4941 
4942 	/*
4943 	 * Force modified data and metadata out to stable storage.
4944 	 */
4945 	(void) VOP_FSYNC(odvp, 0, cs->cr, NULL);
4946 	(void) VOP_FSYNC(ndvp, 0, cs->cr, NULL);
4947 
4948 	if (error) {
4949 		*cs->statusp = resp->status = puterrno4(error);
4950 		goto out;
4951 	}
4952 
4953 	/*
4954 	 * Get "after" change values, if it fails, simply return the
4955 	 * before value.
4956 	 */
4957 	oadva.va_mask = AT_CTIME|AT_SEQ;
4958 	if (VOP_GETATTR(odvp, &oadva, 0, cs->cr, NULL)) {
4959 		oadva.va_ctime = obdva.va_ctime;
4960 		oadva.va_seq = 0;
4961 	}
4962 
4963 	nadva.va_mask = AT_CTIME|AT_SEQ;
4964 	if (VOP_GETATTR(odvp, &nadva, 0, cs->cr, NULL)) {
4965 		nadva.va_ctime = nbdva.va_ctime;
4966 		nadva.va_seq = 0;
4967 	}
4968 
4969 	NFS4_SET_FATTR4_CHANGE(resp->source_cinfo.after, oadva.va_ctime)
4970 	NFS4_SET_FATTR4_CHANGE(resp->target_cinfo.after, nadva.va_ctime)
4971 
4972 	/*
4973 	 * The cinfo.atomic = TRUE only if we have
4974 	 * non-zero va_seq's, and it has incremented by exactly one
4975 	 * during the VOP_RENAME and it didn't change during the VOP_FSYNC.
4976 	 */
4977 	if (obdva.va_seq && oidva.va_seq && oadva.va_seq &&
4978 	    oidva.va_seq == (obdva.va_seq + 1) && oidva.va_seq == oadva.va_seq)
4979 		resp->source_cinfo.atomic = TRUE;
4980 	else
4981 		resp->source_cinfo.atomic = FALSE;
4982 
4983 	if (nbdva.va_seq && nidva.va_seq && nadva.va_seq &&
4984 	    nidva.va_seq == (nbdva.va_seq + 1) && nidva.va_seq == nadva.va_seq)
4985 		resp->target_cinfo.atomic = TRUE;
4986 	else
4987 		resp->target_cinfo.atomic = FALSE;
4988 
4989 #ifdef	VOLATILE_FH_TEST
4990 	{
4991 	extern void add_volrnm_fh(struct exportinfo *, vnode_t *);
4992 
4993 	/*
4994 	 * Add the renamed file handle to the volatile rename list
4995 	 */
4996 	if (cs->exi->exi_export.ex_flags & EX_VOLRNM) {
4997 		/* file handles may expire on rename */
4998 		vnode_t *vp;
4999 
5000 		nnm = utf8_to_fn(&args->newname, &nlen, NULL);
5001 		/*
5002 		 * Already know that nnm will be a valid string
5003 		 */
5004 		error = VOP_LOOKUP(ndvp, nnm, &vp, NULL, 0, NULL, cs->cr,
5005 		    NULL, NULL, NULL);
5006 		kmem_free(nnm, nlen);
5007 		if (!error) {
5008 			add_volrnm_fh(cs->exi, vp);
5009 			VN_RELE(vp);
5010 		}
5011 	}
5012 	}
5013 #endif	/* VOLATILE_FH_TEST */
5014 
5015 	*cs->statusp = resp->status = NFS4_OK;
5016 out:
5017 	DTRACE_NFSV4_2(op__rename__done, struct compound_state *, cs,
5018 	    RENAME4res *, resp);
5019 	return;
5020 
5021 err_out:
5022 	if (onm != converted_onm)
5023 		kmem_free(converted_onm, MAXPATHLEN + 1);
5024 	if (onm != NULL)
5025 		kmem_free(onm, olen);
5026 	if (nnm != converted_nnm)
5027 		kmem_free(converted_nnm, MAXPATHLEN + 1);
5028 	if (nnm != NULL)
5029 		kmem_free(nnm, nlen);
5030 
5031 	if (in_crit_src) nbl_end_crit(srcvp);
5032 	if (in_crit_targ) nbl_end_crit(targvp);
5033 	if (targvp) VN_RELE(targvp);
5034 	if (srcvp) VN_RELE(srcvp);
5035 	if (sfp) {
5036 		if (sfp_rele_grant_hold) rfs4_clear_dont_grant(sfp);
5037 		rfs4_file_rele(sfp);
5038 	}
5039 	if (fp) {
5040 		if (fp_rele_grant_hold) rfs4_clear_dont_grant(fp);
5041 		rfs4_file_rele(fp);
5042 	}
5043 
5044 	DTRACE_NFSV4_2(op__rename__done, struct compound_state *, cs,
5045 	    RENAME4res *, resp);
5046 }
5047 
5048 /* ARGSUSED */
5049 static void
rfs4_op_renew(nfs_argop4 * argop,nfs_resop4 * resop,struct svc_req * req,struct compound_state * cs)5050 rfs4_op_renew(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
5051     struct compound_state *cs)
5052 {
5053 	RENEW4args *args = &argop->nfs_argop4_u.oprenew;
5054 	RENEW4res *resp = &resop->nfs_resop4_u.oprenew;
5055 	rfs4_client_t *cp;
5056 
5057 	DTRACE_NFSV4_2(op__renew__start, struct compound_state *, cs,
5058 	    RENEW4args *, args);
5059 
5060 	if ((cp = rfs4_findclient_by_id(args->clientid, FALSE)) == NULL) {
5061 		*cs->statusp = resp->status =
5062 		    rfs4_check_clientid(&args->clientid, 0);
5063 		goto out;
5064 	}
5065 
5066 	if (rfs4_lease_expired(cp)) {
5067 		rfs4_client_rele(cp);
5068 		*cs->statusp = resp->status = NFS4ERR_EXPIRED;
5069 		goto out;
5070 	}
5071 
5072 	rfs4_update_lease(cp);
5073 
5074 	mutex_enter(cp->rc_cbinfo.cb_lock);
5075 	if (cp->rc_cbinfo.cb_notified_of_cb_path_down == FALSE) {
5076 		cp->rc_cbinfo.cb_notified_of_cb_path_down = TRUE;
5077 		*cs->statusp = resp->status = NFS4ERR_CB_PATH_DOWN;
5078 	} else {
5079 		*cs->statusp = resp->status = NFS4_OK;
5080 	}
5081 	mutex_exit(cp->rc_cbinfo.cb_lock);
5082 
5083 	rfs4_client_rele(cp);
5084 
5085 out:
5086 	DTRACE_NFSV4_2(op__renew__done, struct compound_state *, cs,
5087 	    RENEW4res *, resp);
5088 }
5089 
5090 /* ARGSUSED */
5091 static void
rfs4_op_restorefh(nfs_argop4 * args,nfs_resop4 * resop,struct svc_req * req,struct compound_state * cs)5092 rfs4_op_restorefh(nfs_argop4 *args, nfs_resop4 *resop, struct svc_req *req,
5093     struct compound_state *cs)
5094 {
5095 	RESTOREFH4res *resp = &resop->nfs_resop4_u.oprestorefh;
5096 
5097 	DTRACE_NFSV4_1(op__restorefh__start, struct compound_state *, cs);
5098 
5099 	/* No need to check cs->access - we are not accessing any object */
5100 	if ((cs->saved_vp == NULL) || (cs->saved_fh.nfs_fh4_val == NULL)) {
5101 		*cs->statusp = resp->status = NFS4ERR_RESTOREFH;
5102 		goto out;
5103 	}
5104 	if (cs->vp != NULL) {
5105 		VN_RELE(cs->vp);
5106 	}
5107 	cs->vp = cs->saved_vp;
5108 	cs->saved_vp = NULL;
5109 	cs->exi = cs->saved_exi;
5110 	nfs_fh4_copy(&cs->saved_fh, &cs->fh);
5111 	*cs->statusp = resp->status = NFS4_OK;
5112 	cs->deleg = FALSE;
5113 
5114 	if (cs->cs_flags & RFS4_SAVED_STATEID) {
5115 		cs->current_stateid = cs->save_stateid;
5116 		cs->cs_flags |= RFS4_CURRENT_STATEID;
5117 	}
5118 out:
5119 	DTRACE_NFSV4_2(op__restorefh__done, struct compound_state *, cs,
5120 	    RESTOREFH4res *, resp);
5121 }
5122 
5123 /* ARGSUSED */
5124 static void
rfs4_op_savefh(nfs_argop4 * argop,nfs_resop4 * resop,struct svc_req * req,struct compound_state * cs)5125 rfs4_op_savefh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
5126     struct compound_state *cs)
5127 {
5128 	SAVEFH4res *resp = &resop->nfs_resop4_u.opsavefh;
5129 
5130 	DTRACE_NFSV4_1(op__savefh__start, struct compound_state *, cs);
5131 
5132 	/* No need to check cs->access - we are not accessing any object */
5133 	if (cs->vp == NULL) {
5134 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
5135 		goto out;
5136 	}
5137 	if (cs->saved_vp != NULL) {
5138 		VN_RELE(cs->saved_vp);
5139 	}
5140 	cs->saved_vp = cs->vp;
5141 	VN_HOLD(cs->saved_vp);
5142 	cs->saved_exi = cs->exi;
5143 	/*
5144 	 * since SAVEFH is fairly rare, don't alloc space for its fh
5145 	 * unless necessary.
5146 	 */
5147 	if (cs->saved_fh.nfs_fh4_val == NULL) {
5148 		cs->saved_fh.nfs_fh4_val = kmem_alloc(NFS4_FHSIZE, KM_SLEEP);
5149 	}
5150 	nfs_fh4_copy(&cs->fh, &cs->saved_fh);
5151 	*cs->statusp = resp->status = NFS4_OK;
5152 
5153 	if (cs->cs_flags & RFS4_CURRENT_STATEID) {
5154 		cs->save_stateid = cs->current_stateid;
5155 		cs->cs_flags |= RFS4_SAVED_STATEID;
5156 	}
5157 out:
5158 	DTRACE_NFSV4_2(op__savefh__done, struct compound_state *, cs,
5159 	    SAVEFH4res *, resp);
5160 }
5161 
5162 /*
5163  * rfs4_verify_attr is called when nfsv4 Setattr failed, but we wish to
5164  * return the bitmap of attrs that were set successfully. It is also
5165  * called by Verify/Nverify to test the vattr/vfsstat attrs. It should
5166  * always be called only after rfs4_do_set_attrs().
5167  *
5168  * Verify that the attributes are same as the expected ones. sargp->vap
5169  * and sargp->sbp contain the input attributes as translated from fattr4.
5170  *
5171  * This function verifies only the attrs that correspond to a vattr or
5172  * vfsstat struct. That is because of the extra step needed to get the
5173  * corresponding system structs. Other attributes have already been set or
5174  * verified by do_rfs4_set_attrs.
5175  *
5176  * Return 0 if all attrs match, -1 if some don't, error if error processing.
5177  */
5178 static int
rfs4_verify_attr(struct nfs4_svgetit_arg * sargp,bitmap4 * resp,struct nfs4_ntov_table * ntovp)5179 rfs4_verify_attr(struct nfs4_svgetit_arg *sargp,
5180     bitmap4 *resp, struct nfs4_ntov_table *ntovp)
5181 {
5182 	int error, ret_error = 0;
5183 	int i, k;
5184 	uint_t sva_mask = sargp->vap->va_mask;
5185 	uint_t vbit;
5186 	union nfs4_attr_u *na;
5187 	uint8_t *amap;
5188 	bool_t getsb = ntovp->vfsstat;
5189 
5190 	if (sva_mask != 0) {
5191 		/*
5192 		 * Okay to overwrite sargp->vap because we verify based
5193 		 * on the incoming values.
5194 		 */
5195 		ret_error = VOP_GETATTR(sargp->cs->vp, sargp->vap, 0,
5196 		    sargp->cs->cr, NULL);
5197 		if (ret_error) {
5198 			if (resp == NULL)
5199 				return (ret_error);
5200 			/*
5201 			 * Must return bitmap of successful attrs
5202 			 */
5203 			sva_mask = 0;	/* to prevent checking vap later */
5204 		} else {
5205 			/*
5206 			 * Some file systems clobber va_mask. it is probably
5207 			 * wrong of them to do so, nonethless we practice
5208 			 * defensive coding.
5209 			 * See bug id 4276830.
5210 			 */
5211 			sargp->vap->va_mask = sva_mask;
5212 		}
5213 	}
5214 
5215 	if (getsb) {
5216 		/*
5217 		 * Now get the superblock and loop on the bitmap, as there is
5218 		 * no simple way of translating from superblock to bitmap4.
5219 		 */
5220 		ret_error = VFS_STATVFS(sargp->cs->vp->v_vfsp, sargp->sbp);
5221 		if (ret_error) {
5222 			if (resp == NULL)
5223 				goto errout;
5224 			getsb = FALSE;
5225 		}
5226 	}
5227 
5228 	/*
5229 	 * Now loop and verify each attribute which getattr returned
5230 	 * whether it's the same as the input.
5231 	 */
5232 	if (resp == NULL && !getsb && (sva_mask == 0))
5233 		goto errout;
5234 
5235 	na = ntovp->na;
5236 	amap = ntovp->amap;
5237 	k = 0;
5238 	for (i = 0; i < ntovp->attrcnt; i++, na++, amap++) {
5239 		k = *amap;
5240 		ASSERT(nfs4_ntov_map[k].nval == k);
5241 		vbit = nfs4_ntov_map[k].vbit;
5242 
5243 		/*
5244 		 * If vattr attribute but VOP_GETATTR failed, or it's
5245 		 * superblock attribute but VFS_STATVFS failed, skip
5246 		 */
5247 		if (vbit) {
5248 			if ((vbit & sva_mask) == 0)
5249 				continue;
5250 		} else if (!(getsb && nfs4_ntov_map[k].vfsstat)) {
5251 			continue;
5252 		}
5253 		error = (*nfs4_ntov_map[k].sv_getit)(NFS4ATTR_VERIT, sargp, na);
5254 		if (resp != NULL) {
5255 			if (error)
5256 				ret_error = -1;	/* not all match */
5257 			else	/* update response bitmap */
5258 				*resp |= nfs4_ntov_map[k].fbit;
5259 			continue;
5260 		}
5261 		if (error) {
5262 			ret_error = -1;	/* not all match */
5263 			break;
5264 		}
5265 	}
5266 errout:
5267 	return (ret_error);
5268 }
5269 
5270 /*
5271  * Decode the attribute to be set/verified. If the attr requires a sys op
5272  * (VOP_GETATTR, VFS_VFSSTAT), and the request is to verify, then don't
5273  * call the sv_getit function for it, because the sys op hasn't yet been done.
5274  * Return 0 for success, error code if failed.
5275  *
5276  * Note: the decoded arg is not freed here but in nfs4_ntov_table_free.
5277  */
5278 static int
decode_fattr4_attr(nfs4_attr_cmd_t cmd,struct nfs4_svgetit_arg * sargp,int k,XDR * xdrp,bitmap4 * resp_bval,union nfs4_attr_u * nap)5279 decode_fattr4_attr(nfs4_attr_cmd_t cmd, struct nfs4_svgetit_arg *sargp,
5280     int k, XDR *xdrp, bitmap4 *resp_bval, union nfs4_attr_u *nap)
5281 {
5282 	int error = 0;
5283 	bool_t set_later;
5284 
5285 	sargp->vap->va_mask |= nfs4_ntov_map[k].vbit;
5286 
5287 	if ((*nfs4_ntov_map[k].xfunc)(xdrp, nap)) {
5288 		set_later = nfs4_ntov_map[k].vbit || nfs4_ntov_map[k].vfsstat;
5289 		/*
5290 		 * don't verify yet if a vattr or sb dependent attr,
5291 		 * because we don't have their sys values yet.
5292 		 * Will be done later.
5293 		 */
5294 		if (! (set_later && (cmd == NFS4ATTR_VERIT))) {
5295 			/*
5296 			 * ACLs are a special case, since setting the MODE
5297 			 * conflicts with setting the ACL.  We delay setting
5298 			 * the ACL until all other attributes have been set.
5299 			 * The ACL gets set in do_rfs4_op_setattr().
5300 			 */
5301 			if (nfs4_ntov_map[k].fbit != FATTR4_ACL_MASK) {
5302 				error = (*nfs4_ntov_map[k].sv_getit)(cmd,
5303 				    sargp, nap);
5304 				if (error) {
5305 					xdr_free(nfs4_ntov_map[k].xfunc,
5306 					    (caddr_t)nap);
5307 				}
5308 			}
5309 		}
5310 	} else {
5311 #ifdef  DEBUG
5312 		cmn_err(CE_NOTE, "decode_fattr4_attr: error "
5313 		    "decoding attribute %d\n", k);
5314 #endif
5315 		error = EINVAL;
5316 	}
5317 	if (!error && resp_bval && !set_later) {
5318 		*resp_bval |= nfs4_ntov_map[k].fbit;
5319 	}
5320 
5321 	return (error);
5322 }
5323 
5324 /*
5325  * Set vattr based on incoming fattr4 attrs - used by setattr.
5326  * Set response mask. Ignore any values that are not writable vattr attrs.
5327  */
5328 static nfsstat4
do_rfs4_set_attrs(bitmap4 * resp,fattr4 * fattrp,struct compound_state * cs,struct nfs4_svgetit_arg * sargp,struct nfs4_ntov_table * ntovp,nfs4_attr_cmd_t cmd)5329 do_rfs4_set_attrs(bitmap4 *resp, fattr4 *fattrp, struct compound_state *cs,
5330     struct nfs4_svgetit_arg *sargp, struct nfs4_ntov_table *ntovp,
5331     nfs4_attr_cmd_t cmd)
5332 {
5333 	int error = 0;
5334 	int i;
5335 	char *attrs = fattrp->attrlist4;
5336 	uint32_t attrslen = fattrp->attrlist4_len;
5337 	XDR xdr;
5338 	nfsstat4 status = NFS4_OK;
5339 	vnode_t *vp = cs->vp;
5340 	union nfs4_attr_u *na;
5341 	uint8_t *amap;
5342 
5343 #ifndef lint
5344 	/*
5345 	 * Make sure that maximum attribute number can be expressed as an
5346 	 * 8 bit quantity.
5347 	 */
5348 	ASSERT(NFS4_MAXNUM_ATTRS <= (UINT8_MAX + 1));
5349 #endif
5350 
5351 	if (vp == NULL) {
5352 		if (resp)
5353 			*resp = 0;
5354 		return (NFS4ERR_NOFILEHANDLE);
5355 	}
5356 	if (cs->access == CS_ACCESS_DENIED) {
5357 		if (resp)
5358 			*resp = 0;
5359 		return (NFS4ERR_ACCESS);
5360 	}
5361 
5362 	sargp->op = cmd;
5363 	sargp->cs = cs;
5364 	sargp->flag = 0;	/* may be set later */
5365 	sargp->vap->va_mask = 0;
5366 	sargp->rdattr_error = NFS4_OK;
5367 	sargp->rdattr_error_req = FALSE;
5368 	/* sargp->sbp is set by the caller */
5369 
5370 	xdrmem_create(&xdr, attrs, attrslen, XDR_DECODE);
5371 
5372 	na = ntovp->na;
5373 	amap = ntovp->amap;
5374 
5375 	/*
5376 	 * The following loop iterates on the nfs4_ntov_map checking
5377 	 * if the fbit is set in the requested bitmap.
5378 	 * If set then we process the arguments using the
5379 	 * rfs4_fattr4 conversion functions to populate the setattr
5380 	 * vattr and va_mask. Any settable attrs that are not using vattr
5381 	 * will be set in this loop.
5382 	 */
5383 	for (i = 0; i < nfs4_ntov_map_size; i++) {
5384 		if (!(fattrp->attrmask & nfs4_ntov_map[i].fbit)) {
5385 			continue;
5386 		}
5387 		/*
5388 		 * If setattr, must be a writable attr.
5389 		 * If verify/nverify, must be a readable attr.
5390 		 */
5391 		if ((error = (*nfs4_ntov_map[i].sv_getit)(
5392 		    NFS4ATTR_SUPPORTED, sargp, NULL)) != 0) {
5393 			/*
5394 			 * Client tries to set/verify an
5395 			 * unsupported attribute, tries to set
5396 			 * a read only attr or verify a write
5397 			 * only one - error!
5398 			 */
5399 			break;
5400 		}
5401 		/*
5402 		 * Decode the attribute to set/verify
5403 		 */
5404 		error = decode_fattr4_attr(cmd, sargp, nfs4_ntov_map[i].nval,
5405 		    &xdr, resp ? resp : NULL, na);
5406 		if (error)
5407 			break;
5408 		*amap++ = (uint8_t)nfs4_ntov_map[i].nval;
5409 		na++;
5410 		(ntovp->attrcnt)++;
5411 		if (nfs4_ntov_map[i].vfsstat)
5412 			ntovp->vfsstat = TRUE;
5413 	}
5414 
5415 	if (error != 0)
5416 		status = (error == ENOTSUP ? NFS4ERR_ATTRNOTSUPP :
5417 		    puterrno4(error));
5418 	/* xdrmem_destroy(&xdrs); */	/* NO-OP */
5419 	return (status);
5420 }
5421 
5422 static nfsstat4
do_rfs4_op_setattr(bitmap4 * resp,fattr4 * fattrp,struct compound_state * cs,stateid4 * stateid)5423 do_rfs4_op_setattr(bitmap4 *resp, fattr4 *fattrp, struct compound_state *cs,
5424     stateid4 *stateid)
5425 {
5426 	int error = 0;
5427 	struct nfs4_svgetit_arg sarg;
5428 	bool_t trunc;
5429 
5430 	nfsstat4 status = NFS4_OK;
5431 	cred_t *cr = cs->cr;
5432 	vnode_t *vp = cs->vp;
5433 	struct nfs4_ntov_table ntov;
5434 	struct statvfs64 sb;
5435 	struct vattr bva;
5436 	struct flock64 bf;
5437 	int in_crit = 0;
5438 	uint_t saved_mask = 0;
5439 	caller_context_t ct;
5440 
5441 	*resp = 0;
5442 	sarg.sbp = &sb;
5443 	sarg.is_referral = B_FALSE;
5444 	nfs4_ntov_table_init(&ntov);
5445 	status = do_rfs4_set_attrs(resp, fattrp, cs, &sarg, &ntov,
5446 	    NFS4ATTR_SETIT);
5447 	if (status != NFS4_OK) {
5448 		/*
5449 		 * failed set attrs
5450 		 */
5451 		goto done;
5452 	}
5453 
5454 	if ((sarg.vap->va_mask == 0) &&
5455 	    (! (fattrp->attrmask & FATTR4_ACL_MASK))) {
5456 		/*
5457 		 * no further work to be done
5458 		 */
5459 		goto done;
5460 	}
5461 
5462 	/*
5463 	 * If we got a request to set the ACL and the MODE, only
5464 	 * allow changing VSUID, VSGID, and VSVTX.  Attempting
5465 	 * to change any other bits, along with setting an ACL,
5466 	 * gives NFS4ERR_INVAL.
5467 	 */
5468 	if ((fattrp->attrmask & FATTR4_ACL_MASK) &&
5469 	    (fattrp->attrmask & FATTR4_MODE_MASK)) {
5470 		vattr_t va;
5471 
5472 		va.va_mask = AT_MODE;
5473 		error = VOP_GETATTR(vp, &va, 0, cs->cr, NULL);
5474 		if (error) {
5475 			status = puterrno4(error);
5476 			goto done;
5477 		}
5478 		if ((sarg.vap->va_mode ^ va.va_mode) &
5479 		    ~(VSUID | VSGID | VSVTX)) {
5480 			status = NFS4ERR_INVAL;
5481 			goto done;
5482 		}
5483 	}
5484 
5485 	/* Check stateid only if size has been set */
5486 	if (sarg.vap->va_mask & AT_SIZE) {
5487 		trunc = (sarg.vap->va_size == 0);
5488 		status = rfs4_check_stateid(FWRITE, cs->vp, stateid,
5489 		    trunc, &cs->deleg, sarg.vap->va_mask & AT_SIZE, &ct, cs);
5490 		if (status != NFS4_OK)
5491 			goto done;
5492 	} else {
5493 		ct.cc_sysid = 0;
5494 		ct.cc_pid = 0;
5495 		ct.cc_caller_id = nfs4_srv_caller_id;
5496 		ct.cc_flags = CC_DONTBLOCK;
5497 	}
5498 
5499 	/* XXX start of possible race with delegations */
5500 
5501 	/*
5502 	 * We need to specially handle size changes because it is
5503 	 * possible for the client to create a file with read-only
5504 	 * modes, but with the file opened for writing. If the client
5505 	 * then tries to set the file size, e.g. ftruncate(3C),
5506 	 * fcntl(F_FREESP), the normal access checking done in
5507 	 * VOP_SETATTR would prevent the client from doing it even though
5508 	 * it should be allowed to do so.  To get around this, we do the
5509 	 * access checking for ourselves and use VOP_SPACE which doesn't
5510 	 * do the access checking.
5511 	 * Also the client should not be allowed to change the file
5512 	 * size if there is a conflicting non-blocking mandatory lock in
5513 	 * the region of the change.
5514 	 */
5515 	if (vp->v_type == VREG && (sarg.vap->va_mask & AT_SIZE)) {
5516 		u_offset_t offset;
5517 		ssize_t length;
5518 
5519 		/*
5520 		 * ufs_setattr clears AT_SIZE from vap->va_mask, but
5521 		 * before returning, sarg.vap->va_mask is used to
5522 		 * generate the setattr reply bitmap.  We also clear
5523 		 * AT_SIZE below before calling VOP_SPACE.  For both
5524 		 * of these cases, the va_mask needs to be saved here
5525 		 * and restored after calling VOP_SETATTR.
5526 		 */
5527 		saved_mask = sarg.vap->va_mask;
5528 
5529 		/*
5530 		 * Check any possible conflict due to NBMAND locks.
5531 		 * Get into critical region before VOP_GETATTR, so the
5532 		 * size attribute is valid when checking conflicts.
5533 		 */
5534 		if (nbl_need_check(vp)) {
5535 			nbl_start_crit(vp, RW_READER);
5536 			in_crit = 1;
5537 		}
5538 
5539 		bva.va_mask = AT_UID|AT_SIZE;
5540 		error = VOP_GETATTR(vp, &bva, 0, cr, &ct);
5541 		if (error != 0) {
5542 			status = puterrno4(error);
5543 			goto done;
5544 		}
5545 
5546 		if (in_crit) {
5547 			if (sarg.vap->va_size < bva.va_size) {
5548 				offset = sarg.vap->va_size;
5549 				length = bva.va_size - sarg.vap->va_size;
5550 			} else {
5551 				offset = bva.va_size;
5552 				length = sarg.vap->va_size - bva.va_size;
5553 			}
5554 			if (nbl_conflict(vp, NBL_WRITE, offset, length, 0,
5555 			    &ct)) {
5556 				status = NFS4ERR_LOCKED;
5557 				goto done;
5558 			}
5559 		}
5560 
5561 		if (crgetuid(cr) == bva.va_uid) {
5562 			sarg.vap->va_mask &= ~AT_SIZE;
5563 			bf.l_type = F_WRLCK;
5564 			bf.l_whence = 0;
5565 			bf.l_start = (off64_t)sarg.vap->va_size;
5566 			bf.l_len = 0;
5567 			bf.l_sysid = 0;
5568 			bf.l_pid = 0;
5569 			error = VOP_SPACE(vp, F_FREESP, &bf, FWRITE,
5570 			    (offset_t)sarg.vap->va_size, cr, &ct);
5571 		}
5572 	}
5573 
5574 	if (!error && sarg.vap->va_mask != 0)
5575 		error = VOP_SETATTR(vp, sarg.vap, sarg.flag, cr, &ct);
5576 
5577 	/* restore va_mask -- ufs_setattr clears AT_SIZE */
5578 	if (saved_mask & AT_SIZE)
5579 		sarg.vap->va_mask |= AT_SIZE;
5580 
5581 	/*
5582 	 * If an ACL was being set, it has been delayed until now,
5583 	 * in order to set the mode (via the VOP_SETATTR() above) first.
5584 	 */
5585 	if ((! error) && (fattrp->attrmask & FATTR4_ACL_MASK)) {
5586 		int i;
5587 
5588 		for (i = 0; i < NFS4_MAXNUM_ATTRS; i++)
5589 			if (ntov.amap[i] == FATTR4_ACL)
5590 				break;
5591 		if (i < NFS4_MAXNUM_ATTRS) {
5592 			error = (*nfs4_ntov_map[FATTR4_ACL].sv_getit)(
5593 			    NFS4ATTR_SETIT, &sarg, &ntov.na[i]);
5594 			if (error == 0) {
5595 				*resp |= FATTR4_ACL_MASK;
5596 			} else if (error == ENOTSUP) {
5597 				(void) rfs4_verify_attr(&sarg, resp, &ntov);
5598 				status = NFS4ERR_ATTRNOTSUPP;
5599 				goto done;
5600 			}
5601 		} else {
5602 			NFS4_DEBUG(rfs4_debug,
5603 			    (CE_NOTE, "do_rfs4_op_setattr: "
5604 			    "unable to find ACL in fattr4"));
5605 			error = EINVAL;
5606 		}
5607 	}
5608 
5609 	if (error) {
5610 		/* check if a monitor detected a delegation conflict */
5611 		if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK))
5612 			status = NFS4ERR_DELAY;
5613 		else
5614 			status = puterrno4(error);
5615 
5616 		/*
5617 		 * Set the response bitmap when setattr failed.
5618 		 * If VOP_SETATTR partially succeeded, test by doing a
5619 		 * VOP_GETATTR on the object and comparing the data
5620 		 * to the setattr arguments.
5621 		 */
5622 		(void) rfs4_verify_attr(&sarg, resp, &ntov);
5623 	} else {
5624 		/*
5625 		 * Force modified metadata out to stable storage.
5626 		 */
5627 		(void) VOP_FSYNC(vp, FNODSYNC, cr, &ct);
5628 		/*
5629 		 * Set response bitmap
5630 		 */
5631 		nfs4_vmask_to_nmask_set(sarg.vap->va_mask, resp);
5632 	}
5633 
5634 /* Return early and already have a NFSv4 error */
5635 done:
5636 	/*
5637 	 * Except for nfs4_vmask_to_nmask_set(), vattr --> fattr
5638 	 * conversion sets both readable and writeable NFS4 attrs
5639 	 * for AT_MTIME and AT_ATIME.  The line below masks out
5640 	 * unrequested attrs from the setattr result bitmap.  This
5641 	 * is placed after the done: label to catch the ATTRNOTSUP
5642 	 * case.
5643 	 */
5644 	*resp &= fattrp->attrmask;
5645 
5646 	if (in_crit)
5647 		nbl_end_crit(vp);
5648 
5649 	nfs4_ntov_table_free(&ntov, &sarg);
5650 
5651 	return (status);
5652 }
5653 
5654 /* ARGSUSED */
5655 static void
rfs4_op_setattr(nfs_argop4 * argop,nfs_resop4 * resop,struct svc_req * req,struct compound_state * cs)5656 rfs4_op_setattr(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
5657     struct compound_state *cs)
5658 {
5659 	SETATTR4args *args = &argop->nfs_argop4_u.opsetattr;
5660 	SETATTR4res *resp = &resop->nfs_resop4_u.opsetattr;
5661 	bslabel_t *clabel;
5662 
5663 	DTRACE_NFSV4_2(op__setattr__start, struct compound_state *, cs,
5664 	    SETATTR4args *, args);
5665 
5666 	if (cs->vp == NULL) {
5667 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
5668 		goto out;
5669 	}
5670 
5671 	/*
5672 	 * If there is an unshared filesystem mounted on this vnode,
5673 	 * do not allow to setattr on this vnode.
5674 	 */
5675 	if (vn_ismntpt(cs->vp)) {
5676 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
5677 		goto out;
5678 	}
5679 
5680 	resp->attrsset = 0;
5681 
5682 	if (rdonly4(req, cs)) {
5683 		*cs->statusp = resp->status = NFS4ERR_ROFS;
5684 		goto out;
5685 	}
5686 
5687 	/* check label before setting attributes */
5688 	if (is_system_labeled()) {
5689 		ASSERT(req->rq_label != NULL);
5690 		clabel = req->rq_label;
5691 		DTRACE_PROBE2(tx__rfs4__log__info__opsetattr__clabel, char *,
5692 		    "got client label from request(1)",
5693 		    struct svc_req *, req);
5694 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
5695 			if (!do_rfs_label_check(clabel, cs->vp,
5696 			    EQUALITY_CHECK, cs->exi)) {
5697 				*cs->statusp = resp->status = NFS4ERR_ACCESS;
5698 				goto out;
5699 			}
5700 		}
5701 	}
5702 
5703 	get_stateid4(cs, &args->stateid);
5704 	*cs->statusp = resp->status =
5705 	    do_rfs4_op_setattr(&resp->attrsset, &args->obj_attributes, cs,
5706 	    &args->stateid);
5707 
5708 out:
5709 	DTRACE_NFSV4_2(op__setattr__done, struct compound_state *, cs,
5710 	    SETATTR4res *, resp);
5711 }
5712 
5713 /* ARGSUSED */
5714 static void
rfs4_op_verify(nfs_argop4 * argop,nfs_resop4 * resop,struct svc_req * req,struct compound_state * cs)5715 rfs4_op_verify(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
5716     struct compound_state *cs)
5717 {
5718 	/*
5719 	 * verify and nverify are exactly the same, except that nverify
5720 	 * succeeds when some argument changed, and verify succeeds when
5721 	 * when none changed.
5722 	 */
5723 
5724 	VERIFY4args  *args = &argop->nfs_argop4_u.opverify;
5725 	VERIFY4res *resp = &resop->nfs_resop4_u.opverify;
5726 
5727 	int error;
5728 	struct nfs4_svgetit_arg sarg;
5729 	struct statvfs64 sb;
5730 	struct nfs4_ntov_table ntov;
5731 
5732 	DTRACE_NFSV4_2(op__verify__start, struct compound_state *, cs,
5733 	    VERIFY4args *, args);
5734 
5735 	if (cs->vp == NULL) {
5736 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
5737 		goto out;
5738 	}
5739 
5740 	sarg.sbp = &sb;
5741 	sarg.is_referral = B_FALSE;
5742 	nfs4_ntov_table_init(&ntov);
5743 	resp->status = do_rfs4_set_attrs(NULL, &args->obj_attributes, cs,
5744 	    &sarg, &ntov, NFS4ATTR_VERIT);
5745 	if (resp->status != NFS4_OK) {
5746 		/*
5747 		 * do_rfs4_set_attrs will try to verify systemwide attrs,
5748 		 * so could return -1 for "no match".
5749 		 */
5750 		if (resp->status == -1)
5751 			resp->status = NFS4ERR_NOT_SAME;
5752 		goto done;
5753 	}
5754 	error = rfs4_verify_attr(&sarg, NULL, &ntov);
5755 	switch (error) {
5756 	case 0:
5757 		resp->status = NFS4_OK;
5758 		break;
5759 	case -1:
5760 		resp->status = NFS4ERR_NOT_SAME;
5761 		break;
5762 	default:
5763 		resp->status = puterrno4(error);
5764 		break;
5765 	}
5766 done:
5767 	*cs->statusp = resp->status;
5768 	nfs4_ntov_table_free(&ntov, &sarg);
5769 out:
5770 	DTRACE_NFSV4_2(op__verify__done, struct compound_state *, cs,
5771 	    VERIFY4res *, resp);
5772 }
5773 
5774 /* ARGSUSED */
5775 static void
rfs4_op_nverify(nfs_argop4 * argop,nfs_resop4 * resop,struct svc_req * req,struct compound_state * cs)5776 rfs4_op_nverify(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
5777     struct compound_state *cs)
5778 {
5779 	/*
5780 	 * verify and nverify are exactly the same, except that nverify
5781 	 * succeeds when some argument changed, and verify succeeds when
5782 	 * when none changed.
5783 	 */
5784 
5785 	NVERIFY4args  *args = &argop->nfs_argop4_u.opnverify;
5786 	NVERIFY4res *resp = &resop->nfs_resop4_u.opnverify;
5787 
5788 	int error;
5789 	struct nfs4_svgetit_arg sarg;
5790 	struct statvfs64 sb;
5791 	struct nfs4_ntov_table ntov;
5792 
5793 	DTRACE_NFSV4_2(op__nverify__start, struct compound_state *, cs,
5794 	    NVERIFY4args *, args);
5795 
5796 	if (cs->vp == NULL) {
5797 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
5798 		DTRACE_NFSV4_2(op__nverify__done, struct compound_state *, cs,
5799 		    NVERIFY4res *, resp);
5800 		return;
5801 	}
5802 	sarg.sbp = &sb;
5803 	sarg.is_referral = B_FALSE;
5804 	nfs4_ntov_table_init(&ntov);
5805 	resp->status = do_rfs4_set_attrs(NULL, &args->obj_attributes, cs,
5806 	    &sarg, &ntov, NFS4ATTR_VERIT);
5807 	if (resp->status != NFS4_OK) {
5808 		/*
5809 		 * do_rfs4_set_attrs will try to verify systemwide attrs,
5810 		 * so could return -1 for "no match".
5811 		 */
5812 		if (resp->status == -1)
5813 			resp->status = NFS4_OK;
5814 		goto done;
5815 	}
5816 	error = rfs4_verify_attr(&sarg, NULL, &ntov);
5817 	switch (error) {
5818 	case 0:
5819 		resp->status = NFS4ERR_SAME;
5820 		break;
5821 	case -1:
5822 		resp->status = NFS4_OK;
5823 		break;
5824 	default:
5825 		resp->status = puterrno4(error);
5826 		break;
5827 	}
5828 done:
5829 	*cs->statusp = resp->status;
5830 	nfs4_ntov_table_free(&ntov, &sarg);
5831 
5832 	DTRACE_NFSV4_2(op__nverify__done, struct compound_state *, cs,
5833 	    NVERIFY4res *, resp);
5834 }
5835 
5836 /*
5837  * XXX - This should live in an NFS header file.
5838  */
5839 #define	MAX_IOVECS	12
5840 
5841 /* ARGSUSED */
5842 static void
rfs4_op_write(nfs_argop4 * argop,nfs_resop4 * resop,struct svc_req * req,struct compound_state * cs)5843 rfs4_op_write(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
5844     struct compound_state *cs)
5845 {
5846 	WRITE4args *args = &argop->nfs_argop4_u.opwrite;
5847 	WRITE4res *resp = &resop->nfs_resop4_u.opwrite;
5848 	int error;
5849 	vnode_t *vp;
5850 	struct vattr bva;
5851 	u_offset_t rlimit;
5852 	struct uio uio;
5853 	struct iovec iov[MAX_IOVECS];
5854 	struct iovec *iovp;
5855 	int iovcnt;
5856 	int ioflag;
5857 	cred_t *savecred, *cr;
5858 	bool_t *deleg = &cs->deleg;
5859 	nfsstat4 stat;
5860 	int in_crit = 0;
5861 	caller_context_t ct;
5862 	nfs4_srv_t *nsrv4;
5863 
5864 	DTRACE_NFSV4_2(op__write__start, struct compound_state *, cs,
5865 	    WRITE4args *, args);
5866 
5867 	vp = cs->vp;
5868 	if (vp == NULL) {
5869 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
5870 		goto out;
5871 	}
5872 
5873 	if (cs->access == CS_ACCESS_DENIED) {
5874 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
5875 		goto out;
5876 	}
5877 
5878 	get_stateid4(cs, &args->stateid);
5879 
5880 	if ((stat = rfs4_check_stateid(FWRITE, vp, &args->stateid, FALSE,
5881 	    deleg, TRUE, &ct, cs)) != NFS4_OK) {
5882 		*cs->statusp = resp->status = stat;
5883 		goto out;
5884 	}
5885 
5886 	/*
5887 	 * We have to enter the critical region before calling VOP_RWLOCK
5888 	 * to avoid a deadlock with ufs.
5889 	 */
5890 	if (nbl_need_check(vp)) {
5891 		nbl_start_crit(vp, RW_READER);
5892 		in_crit = 1;
5893 		if (nbl_conflict(vp, NBL_WRITE,
5894 		    args->offset, args->data_len, 0, &ct)) {
5895 			*cs->statusp = resp->status = NFS4ERR_LOCKED;
5896 			goto out;
5897 		}
5898 	}
5899 
5900 	cr = cs->cr;
5901 	bva.va_mask = AT_MODE | AT_UID;
5902 	error = VOP_GETATTR(vp, &bva, 0, cr, &ct);
5903 
5904 	/*
5905 	 * If we can't get the attributes, then we can't do the
5906 	 * right access checking.  So, we'll fail the request.
5907 	 */
5908 	if (error) {
5909 		*cs->statusp = resp->status = puterrno4(error);
5910 		goto out;
5911 	}
5912 
5913 	if (rdonly4(req, cs)) {
5914 		*cs->statusp = resp->status = NFS4ERR_ROFS;
5915 		goto out;
5916 	}
5917 
5918 	if (vp->v_type != VREG) {
5919 		*cs->statusp = resp->status =
5920 		    ((vp->v_type == VDIR) ? NFS4ERR_ISDIR : NFS4ERR_INVAL);
5921 		goto out;
5922 	}
5923 
5924 	if (crgetuid(cr) != bva.va_uid &&
5925 	    (error = VOP_ACCESS(vp, VWRITE, 0, cr, &ct))) {
5926 		*cs->statusp = resp->status = puterrno4(error);
5927 		goto out;
5928 	}
5929 
5930 	if (MANDLOCK(vp, bva.va_mode)) {
5931 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
5932 		goto out;
5933 	}
5934 
5935 	nsrv4 = nfs4_get_srv();
5936 	if (args->data_len == 0) {
5937 		*cs->statusp = resp->status = NFS4_OK;
5938 		resp->count = 0;
5939 		resp->committed = args->stable;
5940 		resp->writeverf = nsrv4->write4verf;
5941 		goto out;
5942 	}
5943 
5944 	if (args->mblk != NULL) {
5945 		mblk_t *m;
5946 		uint_t bytes, round_len;
5947 
5948 		iovcnt = 0;
5949 		bytes = 0;
5950 		round_len = roundup(args->data_len, BYTES_PER_XDR_UNIT);
5951 		for (m = args->mblk;
5952 		    m != NULL && bytes < round_len;
5953 		    m = m->b_cont) {
5954 			iovcnt++;
5955 			bytes += MBLKL(m);
5956 		}
5957 #ifdef DEBUG
5958 		/* should have ended on an mblk boundary */
5959 		if (bytes != round_len) {
5960 			printf("bytes=0x%x, round_len=0x%x, req len=0x%x\n",
5961 			    bytes, round_len, args->data_len);
5962 			printf("args=%p, args->mblk=%p, m=%p", (void *)args,
5963 			    (void *)args->mblk, (void *)m);
5964 			ASSERT(bytes == round_len);
5965 		}
5966 #endif
5967 		if (iovcnt <= MAX_IOVECS) {
5968 			iovp = iov;
5969 		} else {
5970 			iovp = kmem_alloc(sizeof (*iovp) * iovcnt, KM_SLEEP);
5971 		}
5972 		mblk_to_iov(args->mblk, iovcnt, iovp);
5973 	} else if (args->rlist != NULL) {
5974 		iovcnt = 1;
5975 		iovp = iov;
5976 		iovp->iov_base = (char *)((args->rlist)->u.c_daddr3);
5977 		iovp->iov_len = args->data_len;
5978 	} else {
5979 		iovcnt = 1;
5980 		iovp = iov;
5981 		iovp->iov_base = args->data_val;
5982 		iovp->iov_len = args->data_len;
5983 	}
5984 
5985 	uio.uio_iov = iovp;
5986 	uio.uio_iovcnt = iovcnt;
5987 
5988 	uio.uio_segflg = UIO_SYSSPACE;
5989 	uio.uio_extflg = UIO_COPY_DEFAULT;
5990 	uio.uio_loffset = args->offset;
5991 	uio.uio_resid = args->data_len;
5992 	uio.uio_llimit = curproc->p_fsz_ctl;
5993 	rlimit = uio.uio_llimit - args->offset;
5994 	if (rlimit < (u_offset_t)uio.uio_resid)
5995 		uio.uio_resid = (int)rlimit;
5996 
5997 	if (args->stable == UNSTABLE4)
5998 		ioflag = 0;
5999 	else if (args->stable == FILE_SYNC4)
6000 		ioflag = FSYNC;
6001 	else if (args->stable == DATA_SYNC4)
6002 		ioflag = FDSYNC;
6003 	else {
6004 		if (iovp != iov)
6005 			kmem_free(iovp, sizeof (*iovp) * iovcnt);
6006 		*cs->statusp = resp->status = NFS4ERR_INVAL;
6007 		goto out;
6008 	}
6009 
6010 	/*
6011 	 * We're changing creds because VM may fault and we need
6012 	 * the cred of the current thread to be used if quota
6013 	 * checking is enabled.
6014 	 */
6015 	savecred = curthread->t_cred;
6016 	curthread->t_cred = cr;
6017 	error = do_io(FWRITE, vp, &uio, ioflag, cr, &ct);
6018 	curthread->t_cred = savecred;
6019 
6020 	if (iovp != iov)
6021 		kmem_free(iovp, sizeof (*iovp) * iovcnt);
6022 
6023 	if (error) {
6024 		*cs->statusp = resp->status = puterrno4(error);
6025 		goto out;
6026 	}
6027 
6028 	*cs->statusp = resp->status = NFS4_OK;
6029 	resp->count = args->data_len - uio.uio_resid;
6030 
6031 	if (ioflag == 0)
6032 		resp->committed = UNSTABLE4;
6033 	else
6034 		resp->committed = FILE_SYNC4;
6035 
6036 	resp->writeverf = nsrv4->write4verf;
6037 
6038 out:
6039 	if (in_crit)
6040 		nbl_end_crit(vp);
6041 
6042 	DTRACE_NFSV4_2(op__write__done, struct compound_state *, cs,
6043 	    WRITE4res *, resp);
6044 }
6045 
6046 static inline int
rfs4_opnum_in_range(const compound_state_t * cs,int opnum)6047 rfs4_opnum_in_range(const compound_state_t *cs, int opnum)
6048 {
6049 	if (opnum < FIRST_NFS4_OP || opnum > LAST_NFS4_OP)
6050 		return (0);
6051 	else if (cs->minorversion == 0 && opnum > LAST_NFS40_OP)
6052 		return (0);
6053 	else if (cs->minorversion == 1 && opnum > LAST_NFS41_OP)
6054 		return (0);
6055 	else if (cs->minorversion == 2 && opnum > LAST_NFS42_OP)
6056 		return (0);
6057 	return (1);
6058 }
6059 
6060 void
rfs4_compound(COMPOUND4args * args,COMPOUND4res * resp,compound_state_t * cs,struct svc_req * req,int * rv)6061 rfs4_compound(COMPOUND4args *args, COMPOUND4res *resp, compound_state_t *cs,
6062     struct svc_req *req, int *rv)
6063 {
6064 	uint_t i;
6065 	cred_t *cr;
6066 	nfs4_srv_t *nsrv4;
6067 	nfs_export_t *ne = nfs_get_export();
6068 
6069 	if (rv != NULL)
6070 		*rv = 0;
6071 	/*
6072 	 * Form a reply tag by copying over the request tag.
6073 	 */
6074 	resp->tag.utf8string_len = args->tag.utf8string_len;
6075 	if (args->tag.utf8string_len != 0) {
6076 		resp->tag.utf8string_val =
6077 		    kmem_alloc(args->tag.utf8string_len, KM_SLEEP);
6078 		bcopy(args->tag.utf8string_val, resp->tag.utf8string_val,
6079 		    resp->tag.utf8string_len);
6080 	} else {
6081 		resp->tag.utf8string_val = NULL;
6082 	}
6083 
6084 	cs->statusp = &resp->status;
6085 	cs->req = req;
6086 	cs->minorversion = args->minorversion;
6087 	resp->array = NULL;
6088 	resp->array_len = 0;
6089 
6090 	if (args->array_len == 0) {
6091 		resp->status = NFS4_OK;
6092 		return;
6093 	}
6094 
6095 	cr = svc_xprt_cred(req->rq_xprt);
6096 	ASSERT(cr != NULL);
6097 
6098 	if (sec_svc_getcred(req, cr, &cs->principal, &cs->nfsflavor) == 0) {
6099 		DTRACE_NFSV4_2(compound__start, struct compound_state *,
6100 		    cs, COMPOUND4args *, args);
6101 		DTRACE_NFSV4_2(compound__done, struct compound_state *,
6102 		    cs, COMPOUND4res *, resp);
6103 		svcerr_badcred(req->rq_xprt);
6104 		if (rv != NULL)
6105 			*rv = 1;
6106 		return;
6107 	}
6108 
6109 	resp->array_len = args->array_len;
6110 	resp->array = kmem_zalloc(args->array_len * sizeof (nfs_resop4),
6111 	    KM_SLEEP);
6112 
6113 	cs->op_len = args->array_len;
6114 	cs->basecr = cr;
6115 	nsrv4 = nfs4_get_srv();
6116 
6117 	DTRACE_NFSV4_2(compound__start, struct compound_state *, cs,
6118 	    COMPOUND4args *, args);
6119 
6120 	/*
6121 	 * For now, NFS4 compound processing must be protected by
6122 	 * exported_lock because it can access more than one exportinfo
6123 	 * per compound and share/unshare can now change multiple
6124 	 * exinfo structs.  The NFS2/3 code only refs 1 exportinfo
6125 	 * per proc (excluding public exinfo), and exi_count design
6126 	 * is sufficient to protect concurrent execution of NFS2/3
6127 	 * ops along with unexport.  This lock will be removed as
6128 	 * part of the NFSv4 phase 2 namespace redesign work.
6129 	 */
6130 	rw_enter(&ne->exported_lock, RW_READER);
6131 
6132 	/*
6133 	 * If this is the first compound we've seen, we need to start all
6134 	 * new instances' grace periods.
6135 	 */
6136 	if (nsrv4->seen_first_compound == 0) {
6137 		rfs4_grace_start_new(nsrv4);
6138 		/*
6139 		 * This must be set after rfs4_grace_start_new(), otherwise
6140 		 * another thread could proceed past here before the former
6141 		 * is finished.
6142 		 */
6143 		nsrv4->seen_first_compound = 1;
6144 	}
6145 
6146 	for (i = 0; i < args->array_len && cs->cont; i++) {
6147 		nfs_argop4 *argop;
6148 		nfs_resop4 *resop;
6149 		uint_t op;
6150 		kstat_named_t *stat = ne->ne_globals->rfsproccnt[NFS_V4];
6151 
6152 		argop = &args->array[i];
6153 		resop = &resp->array[i];
6154 		resop->resop = argop->argop;
6155 		op = (uint_t)resop->resop;
6156 
6157 		cs->op_pos = i;
6158 		if (op < rfsv4disp_cnt && rfs4_opnum_in_range(cs, op)) {
6159 			/*
6160 			 * Count the individual ops here; NULL and COMPOUND
6161 			 * are counted in common_dispatch()
6162 			 */
6163 			stat[op].value.ui64++;
6164 
6165 			NFS4_DEBUG(rfs4_debug > 1,
6166 			    (CE_NOTE, "Executing %s", rfs4_op_string[op]));
6167 			(*rfsv4disptab[op].dis_proc)(argop, resop, req, cs);
6168 			NFS4_DEBUG(rfs4_debug > 1, (CE_NOTE, "%s returned %d",
6169 			    rfs4_op_string[op], *cs->statusp));
6170 			if (*cs->statusp != NFS4_OK)
6171 				cs->cont = FALSE;
6172 			if (rfsv4disptab[op].dis_flags & OP_CLEAR_STATEID)
6173 				cs->cs_flags &= ~RFS4_CURRENT_STATEID;
6174 		} else {
6175 			/*
6176 			 * This is effectively dead code since XDR code
6177 			 * will have already returned BADXDR if op doesn't
6178 			 * decode to legal value.  This only done for a
6179 			 * day when XDR code doesn't verify v4 opcodes.
6180 			 */
6181 			op = OP_ILLEGAL;
6182 			stat[OP_ILLEGAL_IDX].value.ui64++;
6183 
6184 			rfs4_op_illegal(argop, resop, req, cs);
6185 			cs->cont = FALSE;
6186 		}
6187 
6188 		/*
6189 		 * If not at last op, and if we are to stop, then
6190 		 * compact the results array.
6191 		 */
6192 		if ((i + 1) < args->array_len && !cs->cont) {
6193 			nfs_resop4 *new_res = kmem_alloc(
6194 			    (i+1) * sizeof (nfs_resop4), KM_SLEEP);
6195 			bcopy(resp->array,
6196 			    new_res, (i+1) * sizeof (nfs_resop4));
6197 			kmem_free(resp->array,
6198 			    args->array_len * sizeof (nfs_resop4));
6199 
6200 			resp->array_len =  i + 1;
6201 			resp->array = new_res;
6202 		}
6203 	}
6204 
6205 	rw_exit(&ne->exported_lock);
6206 
6207 	DTRACE_NFSV4_2(compound__done, struct compound_state *, cs,
6208 	    COMPOUND4res *, resp);
6209 
6210 	/*
6211 	 * done with this compound request, free the label
6212 	 */
6213 
6214 	if (req->rq_label != NULL) {
6215 		kmem_free(req->rq_label, sizeof (bslabel_t));
6216 		req->rq_label = NULL;
6217 	}
6218 }
6219 
6220 /*
6221  * XXX because of what appears to be duplicate calls to rfs4_compound_free
6222  * XXX zero out the tag and array values. Need to investigate why the
6223  * XXX calls occur, but at least prevent the panic for now.
6224  */
6225 void
rfs4_compound_free(COMPOUND4res * resp)6226 rfs4_compound_free(COMPOUND4res *resp)
6227 {
6228 	uint_t i;
6229 
6230 	if (resp->tag.utf8string_val) {
6231 		UTF8STRING_FREE(resp->tag)
6232 	}
6233 
6234 	for (i = 0; i < resp->array_len; i++) {
6235 		nfs_resop4 *resop;
6236 		uint_t op;
6237 
6238 		resop = &resp->array[i];
6239 		op = (uint_t)resop->resop;
6240 		if (op < rfsv4disp_cnt) {
6241 			(*rfsv4disptab[op].dis_resfree)(resop);
6242 		}
6243 	}
6244 	if (resp->array != NULL) {
6245 		kmem_free(resp->array, resp->array_len * sizeof (nfs_resop4));
6246 	}
6247 }
6248 
6249 /*
6250  * Check if entire requst is idempotent
6251  */
6252 bool_t
rfs4_idempotent_req(const COMPOUND4args * args)6253 rfs4_idempotent_req(const COMPOUND4args *args)
6254 {
6255 	int i;
6256 
6257 	for (i = 0; i < args->array_len; i++) {
6258 		uint_t op;
6259 
6260 		op = (uint_t)args->array[i].argop;
6261 
6262 		if (op >= rfsv4disp_cnt ||
6263 		    !(rfsv4disptab[op].dis_flags & OP_IDEMPOTENT)) {
6264 			return (FALSE);
6265 		}
6266 	}
6267 	return (TRUE);
6268 }
6269 
6270 nfsstat4
rfs4_client_sysid(rfs4_client_t * cp,sysid_t * sp)6271 rfs4_client_sysid(rfs4_client_t *cp, sysid_t *sp)
6272 {
6273 	nfsstat4 e;
6274 
6275 	rfs4_dbe_lock(cp->rc_dbe);
6276 
6277 	if (cp->rc_sysidt != LM_NOSYSID) {
6278 		*sp = cp->rc_sysidt;
6279 		e = NFS4_OK;
6280 
6281 	} else if ((cp->rc_sysidt = lm_alloc_sysidt()) != LM_NOSYSID) {
6282 		*sp = cp->rc_sysidt;
6283 		e = NFS4_OK;
6284 
6285 		NFS4_DEBUG(rfs4_debug, (CE_NOTE,
6286 		    "rfs4_client_sysid: allocated 0x%x\n", *sp));
6287 	} else
6288 		e = NFS4ERR_DELAY;
6289 
6290 	rfs4_dbe_unlock(cp->rc_dbe);
6291 	return (e);
6292 }
6293 
6294 #if defined(DEBUG) && ! defined(lint)
lock_print(char * str,int operation,struct flock64 * flk)6295 static void lock_print(char *str, int operation, struct flock64 *flk)
6296 {
6297 	char *op, *type;
6298 
6299 	switch (operation) {
6300 	case F_GETLK: op = "F_GETLK";
6301 		break;
6302 	case F_SETLK: op = "F_SETLK";
6303 		break;
6304 	case F_SETLK_NBMAND: op = "F_SETLK_NBMAND";
6305 		break;
6306 	default: op = "F_UNKNOWN";
6307 		break;
6308 	}
6309 	switch (flk->l_type) {
6310 	case F_UNLCK: type = "F_UNLCK";
6311 		break;
6312 	case F_RDLCK: type = "F_RDLCK";
6313 		break;
6314 	case F_WRLCK: type = "F_WRLCK";
6315 		break;
6316 	default: type = "F_UNKNOWN";
6317 		break;
6318 	}
6319 
6320 	ASSERT(flk->l_whence == 0);
6321 	cmn_err(CE_NOTE, "%s:  %s, type = %s, off = %llx len = %llx pid = %d",
6322 	    str, op, type, (longlong_t)flk->l_start,
6323 	    flk->l_len ? (longlong_t)flk->l_len : ~0LL, flk->l_pid);
6324 }
6325 
6326 #define	LOCK_PRINT(d, s, t, f) if (d) lock_print(s, t, f)
6327 #else
6328 #define	LOCK_PRINT(d, s, t, f)
6329 #endif
6330 
6331 /*ARGSUSED*/
6332 static bool_t
creds_ok(cred_set_t * cr_set,struct svc_req * req,struct compound_state * cs)6333 creds_ok(cred_set_t *cr_set, struct svc_req *req, struct compound_state *cs)
6334 {
6335 	return (TRUE);
6336 }
6337 
6338 /*
6339  * Look up the pathname using the vp in cs as the directory vnode.
6340  * cs->vp will be the vnode for the file on success
6341  */
6342 
6343 static nfsstat4
rfs4_lookup(component4 * component,struct svc_req * req,struct compound_state * cs)6344 rfs4_lookup(component4 *component, struct svc_req *req,
6345     struct compound_state *cs)
6346 {
6347 	char *nm;
6348 	uint32_t len;
6349 	nfsstat4 status;
6350 	struct sockaddr *ca;
6351 	char *name;
6352 
6353 	if (cs->vp == NULL) {
6354 		return (NFS4ERR_NOFILEHANDLE);
6355 	}
6356 	if (cs->vp->v_type != VDIR) {
6357 		return (NFS4ERR_NOTDIR);
6358 	}
6359 
6360 	status = utf8_dir_verify(component);
6361 	if (status != NFS4_OK)
6362 		return (status);
6363 
6364 	nm = utf8_to_fn(component, &len, NULL);
6365 	if (nm == NULL) {
6366 		return (NFS4ERR_INVAL);
6367 	}
6368 
6369 	if (len > MAXNAMELEN) {
6370 		kmem_free(nm, len);
6371 		return (NFS4ERR_NAMETOOLONG);
6372 	}
6373 
6374 	ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
6375 	name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
6376 	    MAXPATHLEN + 1);
6377 
6378 	if (name == NULL) {
6379 		kmem_free(nm, len);
6380 		return (NFS4ERR_INVAL);
6381 	}
6382 
6383 	status = do_rfs4_op_lookup(name, req, cs);
6384 
6385 	if (name != nm)
6386 		kmem_free(name, MAXPATHLEN + 1);
6387 
6388 	kmem_free(nm, len);
6389 
6390 	return (status);
6391 }
6392 
6393 static nfsstat4
rfs4_lookupfile(component4 * component,struct svc_req * req,struct compound_state * cs,uint32_t access,change_info4 * cinfo)6394 rfs4_lookupfile(component4 *component, struct svc_req *req,
6395     struct compound_state *cs, uint32_t access, change_info4 *cinfo)
6396 {
6397 	nfsstat4 status;
6398 	vnode_t *dvp = cs->vp;
6399 	vattr_t bva, ava, fva;
6400 	int error;
6401 
6402 	/* Get "before" change value */
6403 	bva.va_mask = AT_CTIME|AT_SEQ;
6404 	error = VOP_GETATTR(dvp, &bva, 0, cs->cr, NULL);
6405 	if (error)
6406 		return (puterrno4(error));
6407 
6408 	/* rfs4_lookup may VN_RELE directory */
6409 	VN_HOLD(dvp);
6410 
6411 	status = rfs4_lookup(component, req, cs);
6412 	if (status != NFS4_OK) {
6413 		VN_RELE(dvp);
6414 		return (status);
6415 	}
6416 
6417 	/*
6418 	 * Get "after" change value, if it fails, simply return the
6419 	 * before value.
6420 	 */
6421 	ava.va_mask = AT_CTIME|AT_SEQ;
6422 	if (VOP_GETATTR(dvp, &ava, 0, cs->cr, NULL)) {
6423 		ava.va_ctime = bva.va_ctime;
6424 		ava.va_seq = 0;
6425 	}
6426 	VN_RELE(dvp);
6427 
6428 	/*
6429 	 * Validate the file is a file
6430 	 */
6431 	fva.va_mask = AT_TYPE|AT_MODE;
6432 	error = VOP_GETATTR(cs->vp, &fva, 0, cs->cr, NULL);
6433 	if (error)
6434 		return (puterrno4(error));
6435 
6436 	if (fva.va_type != VREG) {
6437 		if (fva.va_type == VDIR)
6438 			return (NFS4ERR_ISDIR);
6439 		if (fva.va_type == VLNK)
6440 			return (NFS4ERR_SYMLINK);
6441 		return (NFS4ERR_INVAL);
6442 	}
6443 
6444 	NFS4_SET_FATTR4_CHANGE(cinfo->before, bva.va_ctime);
6445 	NFS4_SET_FATTR4_CHANGE(cinfo->after, ava.va_ctime);
6446 
6447 	/*
6448 	 * It is undefined if VOP_LOOKUP will change va_seq, so
6449 	 * cinfo.atomic = TRUE only if we have
6450 	 * non-zero va_seq's, and they have not changed.
6451 	 */
6452 	if (bva.va_seq && ava.va_seq && ava.va_seq == bva.va_seq)
6453 		cinfo->atomic = TRUE;
6454 	else
6455 		cinfo->atomic = FALSE;
6456 
6457 	/* Check for mandatory locking */
6458 	cs->mandlock = MANDLOCK(cs->vp, fva.va_mode);
6459 	return (check_open_access(access, cs, req));
6460 }
6461 
6462 static nfsstat4
create_vnode(vnode_t * dvp,char * nm,vattr_t * vap,createmode4 mode,cred_t * cr,vnode_t ** vpp,bool_t * created)6463 create_vnode(vnode_t *dvp, char *nm,  vattr_t *vap, createmode4 mode,
6464     cred_t *cr, vnode_t **vpp, bool_t *created)
6465 {
6466 	int error;
6467 	nfsstat4 status = NFS4_OK;
6468 	vattr_t va;
6469 
6470 tryagain:
6471 
6472 	/*
6473 	 * The file open mode used is VWRITE.  If the client needs
6474 	 * some other semantic, then it should do the access checking
6475 	 * itself.  It would have been nice to have the file open mode
6476 	 * passed as part of the arguments.
6477 	 */
6478 
6479 	*created = TRUE;
6480 	error = VOP_CREATE(dvp, nm, vap, EXCL, VWRITE, vpp, cr, 0, NULL, NULL);
6481 
6482 	if (error) {
6483 		*created = FALSE;
6484 
6485 		/*
6486 		 * If we got something other than file already exists
6487 		 * then just return this error.  Otherwise, we got
6488 		 * EEXIST.  If we were doing a GUARDED create, then
6489 		 * just return this error.  Otherwise, we need to
6490 		 * make sure that this wasn't a duplicate of an
6491 		 * exclusive create request.
6492 		 *
6493 		 * The assumption is made that a non-exclusive create
6494 		 * request will never return EEXIST.
6495 		 */
6496 
6497 		if (error != EEXIST || mode == GUARDED4) {
6498 			status = puterrno4(error);
6499 			return (status);
6500 		}
6501 		error = VOP_LOOKUP(dvp, nm, vpp, NULL, 0, NULL, cr,
6502 		    NULL, NULL, NULL);
6503 
6504 		if (error) {
6505 			/*
6506 			 * We couldn't find the file that we thought that
6507 			 * we just created.  So, we'll just try creating
6508 			 * it again.
6509 			 */
6510 			if (error == ENOENT)
6511 				goto tryagain;
6512 
6513 			status = puterrno4(error);
6514 			return (status);
6515 		}
6516 
6517 		if (mode == UNCHECKED4) {
6518 			/* existing object must be regular file */
6519 			if ((*vpp)->v_type != VREG) {
6520 				if ((*vpp)->v_type == VDIR)
6521 					status = NFS4ERR_ISDIR;
6522 				else if ((*vpp)->v_type == VLNK)
6523 					status = NFS4ERR_SYMLINK;
6524 				else
6525 					status = NFS4ERR_INVAL;
6526 				VN_RELE(*vpp);
6527 				return (status);
6528 			}
6529 
6530 			return (NFS4_OK);
6531 		}
6532 
6533 		/* Check for duplicate request */
6534 		va.va_mask = AT_MTIME;
6535 		error = VOP_GETATTR(*vpp, &va, 0, cr, NULL);
6536 		if (!error) {
6537 			/* We found the file */
6538 			const timestruc_t *mtime = &vap->va_mtime;
6539 
6540 			if (va.va_mtime.tv_sec != mtime->tv_sec ||
6541 			    va.va_mtime.tv_nsec != mtime->tv_nsec) {
6542 				/* but its not our creation */
6543 				VN_RELE(*vpp);
6544 				return (NFS4ERR_EXIST);
6545 			}
6546 			*created = TRUE; /* retrans of create == created */
6547 			return (NFS4_OK);
6548 		}
6549 		VN_RELE(*vpp);
6550 		return (NFS4ERR_EXIST);
6551 	}
6552 
6553 	return (NFS4_OK);
6554 }
6555 
6556 static nfsstat4
check_open_access(uint32_t access,struct compound_state * cs,struct svc_req * req)6557 check_open_access(uint32_t access, struct compound_state *cs,
6558     struct svc_req *req)
6559 {
6560 	int error;
6561 	vnode_t *vp;
6562 	bool_t readonly;
6563 	cred_t *cr = cs->cr;
6564 
6565 	/* For now we don't allow mandatory locking as per V2/V3 */
6566 	if (cs->access == CS_ACCESS_DENIED || cs->mandlock) {
6567 		return (NFS4ERR_ACCESS);
6568 	}
6569 
6570 	vp = cs->vp;
6571 	ASSERT(cr != NULL && vp->v_type == VREG);
6572 
6573 	/*
6574 	 * If the file system is exported read only and we are trying
6575 	 * to open for write, then return NFS4ERR_ROFS
6576 	 */
6577 
6578 	readonly = rdonly4(req, cs);
6579 
6580 	if ((access & OPEN4_SHARE_ACCESS_WRITE) && readonly)
6581 		return (NFS4ERR_ROFS);
6582 
6583 	if (access & OPEN4_SHARE_ACCESS_READ) {
6584 		if ((VOP_ACCESS(vp, VREAD, 0, cr, NULL) != 0) &&
6585 		    (VOP_ACCESS(vp, VEXEC, 0, cr, NULL) != 0)) {
6586 			return (NFS4ERR_ACCESS);
6587 		}
6588 	}
6589 
6590 	if (access & OPEN4_SHARE_ACCESS_WRITE) {
6591 		error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL);
6592 		if (error)
6593 			return (NFS4ERR_ACCESS);
6594 	}
6595 
6596 	return (NFS4_OK);
6597 }
6598 
6599 static void
rfs4_verifier_to_mtime(verifier4 v,timestruc_t * mtime)6600 rfs4_verifier_to_mtime(verifier4 v, timestruc_t *mtime)
6601 {
6602 	timespec32_t *time = (timespec32_t *)&v;
6603 
6604 	/*
6605 	 * Ensure no time overflows. Assumes underlying
6606 	 * filesystem supports at least 32 bits.
6607 	 * Truncate nsec to usec resolution to allow valid
6608 	 * compares even if the underlying filesystem truncates.
6609 	 */
6610 	mtime->tv_sec = time->tv_sec % TIME32_MAX;
6611 	mtime->tv_nsec = (time->tv_nsec / 1000) * 1000;
6612 }
6613 
6614 static nfsstat4
rfs4_createfile(OPEN4args * args,struct svc_req * req,struct compound_state * cs,change_info4 * cinfo,bitmap4 * attrset,clientid4 clientid)6615 rfs4_createfile(OPEN4args *args, struct svc_req *req, struct compound_state *cs,
6616     change_info4 *cinfo, bitmap4 *attrset, clientid4 clientid)
6617 {
6618 	struct nfs4_svgetit_arg sarg;
6619 	struct nfs4_ntov_table ntov;
6620 
6621 	bool_t ntov_table_init = FALSE;
6622 	struct statvfs64 sb;
6623 	nfsstat4 status;
6624 	vnode_t *vp;
6625 	vattr_t bva, ava, iva, cva, *vap;
6626 	vnode_t *dvp;
6627 	char *nm = NULL;
6628 	uint_t buflen;
6629 	bool_t created;
6630 	bool_t setsize = FALSE;
6631 	len_t reqsize;
6632 	int error;
6633 	bool_t trunc;
6634 	caller_context_t ct;
6635 	component4 *component;
6636 	bslabel_t *clabel;
6637 	struct sockaddr *ca;
6638 	char *name = NULL;
6639 	fattr4 *fattr = NULL;
6640 
6641 	ASSERT(*attrset == 0);
6642 
6643 	sarg.sbp = &sb;
6644 	sarg.is_referral = B_FALSE;
6645 
6646 	dvp = cs->vp;
6647 
6648 	/* Check if the file system is read only */
6649 	if (rdonly4(req, cs))
6650 		return (NFS4ERR_ROFS);
6651 
6652 	/* check the label of including directory */
6653 	if (is_system_labeled()) {
6654 		ASSERT(req->rq_label != NULL);
6655 		clabel = req->rq_label;
6656 		DTRACE_PROBE2(tx__rfs4__log__info__opremove__clabel, char *,
6657 		    "got client label from request(1)",
6658 		    struct svc_req *, req);
6659 		if (!blequal(&l_admin_low->tsl_label, clabel)) {
6660 			if (!do_rfs_label_check(clabel, dvp, EQUALITY_CHECK,
6661 			    cs->exi)) {
6662 				return (NFS4ERR_ACCESS);
6663 			}
6664 		}
6665 	}
6666 
6667 	if ((args->mode == EXCLUSIVE4 || args->mode == EXCLUSIVE4_1) &&
6668 	    dvp->v_flag & V_XATTRDIR) {
6669 		/* prohibit EXCL create of named attributes */
6670 		return (NFS4ERR_INVAL);
6671 	}
6672 
6673 	/*
6674 	 * Get the last component of path name in nm. cs will reference
6675 	 * the including directory on success.
6676 	 */
6677 	component = &args->claim.open_claim4_u.file;
6678 	status = utf8_dir_verify(component);
6679 	if (status != NFS4_OK)
6680 		return (status);
6681 
6682 	nm = utf8_to_fn(component, &buflen, NULL);
6683 
6684 	if (nm == NULL)
6685 		return (NFS4ERR_RESOURCE);
6686 
6687 	if (buflen > MAXNAMELEN) {
6688 		kmem_free(nm, buflen);
6689 		return (NFS4ERR_NAMETOOLONG);
6690 	}
6691 
6692 	bva.va_mask = AT_TYPE|AT_CTIME|AT_SEQ;
6693 	error = VOP_GETATTR(dvp, &bva, 0, cs->cr, NULL);
6694 	if (error) {
6695 		kmem_free(nm, buflen);
6696 		return (puterrno4(error));
6697 	}
6698 
6699 	if (bva.va_type != VDIR) {
6700 		kmem_free(nm, buflen);
6701 		return (NFS4ERR_NOTDIR);
6702 	}
6703 
6704 	NFS4_SET_FATTR4_CHANGE(cinfo->before, bva.va_ctime)
6705 
6706 	switch (args->mode) {
6707 	case GUARDED4:
6708 		/*FALLTHROUGH*/
6709 	case UNCHECKED4:
6710 	case EXCLUSIVE4_1:
6711 		nfs4_ntov_table_init(&ntov);
6712 		ntov_table_init = TRUE;
6713 
6714 		if (args->mode == EXCLUSIVE4_1)
6715 			fattr = &args->createhow4_u.ch_createboth.cva_attrs;
6716 		else
6717 			fattr = &args->createhow4_u.createattrs;
6718 
6719 		status = do_rfs4_set_attrs(attrset,
6720 		    fattr,
6721 		    cs, &sarg, &ntov, NFS4ATTR_SETIT);
6722 
6723 		if (status == NFS4_OK && (sarg.vap->va_mask & AT_TYPE) &&
6724 		    sarg.vap->va_type != VREG) {
6725 			if (sarg.vap->va_type == VDIR)
6726 				status = NFS4ERR_ISDIR;
6727 			else if (sarg.vap->va_type == VLNK)
6728 				status = NFS4ERR_SYMLINK;
6729 			else
6730 				status = NFS4ERR_INVAL;
6731 		}
6732 
6733 		if (status != NFS4_OK) {
6734 			kmem_free(nm, buflen);
6735 			nfs4_ntov_table_free(&ntov, &sarg);
6736 			*attrset = 0;
6737 			return (status);
6738 		}
6739 
6740 		vap = sarg.vap;
6741 		vap->va_type = VREG;
6742 		vap->va_mask |= AT_TYPE;
6743 
6744 		if ((vap->va_mask & AT_MODE) == 0) {
6745 			vap->va_mask |= AT_MODE;
6746 			vap->va_mode = (mode_t)0600;
6747 		}
6748 
6749 		if (vap->va_mask & AT_SIZE) {
6750 
6751 			/* Disallow create with a non-zero size */
6752 
6753 			if ((reqsize = sarg.vap->va_size) != 0) {
6754 				kmem_free(nm, buflen);
6755 				nfs4_ntov_table_free(&ntov, &sarg);
6756 				*attrset = 0;
6757 				return (NFS4ERR_INVAL);
6758 			}
6759 			setsize = TRUE;
6760 		}
6761 		if (args->mode == EXCLUSIVE4_1) {
6762 			rfs4_verifier_to_mtime(
6763 			    args->createhow4_u.ch_createboth.cva_verf,
6764 			    &vap->va_mtime);
6765 			/* attrset will be set later */
6766 			fattr->attrmask |= FATTR4_TIME_MODIFY_MASK;
6767 			vap->va_mask |= AT_MTIME;
6768 		}
6769 		break;
6770 
6771 	case EXCLUSIVE4:
6772 		cva.va_mask = AT_TYPE | AT_MTIME | AT_MODE;
6773 		cva.va_type = VREG;
6774 		cva.va_mode = (mode_t)0;
6775 
6776 		rfs4_verifier_to_mtime(args->createhow4_u.createverf,
6777 		    &cva.va_mtime);
6778 
6779 		vap = &cva;
6780 
6781 		/*
6782 		 * For EXCL create, attrset is set to the server attr
6783 		 * used to cache the client's verifier.
6784 		 */
6785 		*attrset = FATTR4_TIME_MODIFY_MASK;
6786 		break;
6787 	}
6788 
6789 	ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
6790 	name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
6791 	    MAXPATHLEN  + 1);
6792 
6793 	if (name == NULL) {
6794 		kmem_free(nm, buflen);
6795 		return (NFS4ERR_SERVERFAULT);
6796 	}
6797 
6798 	status = create_vnode(dvp, name, vap, args->mode,
6799 	    cs->cr, &vp, &created);
6800 	if (nm != name)
6801 		kmem_free(name, MAXPATHLEN + 1);
6802 	kmem_free(nm, buflen);
6803 
6804 	if (status != NFS4_OK) {
6805 		if (ntov_table_init)
6806 			nfs4_ntov_table_free(&ntov, &sarg);
6807 		*attrset = 0;
6808 		return (status);
6809 	}
6810 
6811 	trunc = (setsize && !created);
6812 
6813 	if (args->mode != EXCLUSIVE4) {
6814 		bitmap4 createmask = fattr->attrmask;
6815 
6816 		/*
6817 		 * True verification that object was created with correct
6818 		 * attrs is impossible.  The attrs could have been changed
6819 		 * immediately after object creation.  If attributes did
6820 		 * not verify, the only recourse for the server is to
6821 		 * destroy the object.  Maybe if some attrs (like gid)
6822 		 * are set incorrectly, the object should be destroyed;
6823 		 * however, seems bad as a default policy.  Do we really
6824 		 * want to destroy an object over one of the times not
6825 		 * verifying correctly?  For these reasons, the server
6826 		 * currently sets bits in attrset for createattrs
6827 		 * that were set; however, no verification is done.
6828 		 *
6829 		 * vmask_to_nmask accounts for vattr bits set on create
6830 		 *	[do_rfs4_set_attrs() only sets resp bits for
6831 		 *	 non-vattr/vfs bits.]
6832 		 * Mask off any bits we set by default so as not to return
6833 		 * more attrset bits than were requested in createattrs
6834 		 */
6835 		if (created) {
6836 			nfs4_vmask_to_nmask(sarg.vap->va_mask, attrset);
6837 			*attrset &= createmask;
6838 		} else {
6839 			/*
6840 			 * We did not create the vnode (we tried but it
6841 			 * already existed).  In this case, the only createattr
6842 			 * that the spec allows the server to set is size,
6843 			 * and even then, it can only be set if it is 0.
6844 			 */
6845 			*attrset = 0;
6846 			if (trunc)
6847 				*attrset = FATTR4_SIZE_MASK;
6848 		}
6849 	}
6850 	if (ntov_table_init)
6851 		nfs4_ntov_table_free(&ntov, &sarg);
6852 
6853 	/*
6854 	 * Get the initial "after" sequence number, if it fails,
6855 	 * set to zero, time to before.
6856 	 */
6857 	iva.va_mask = AT_CTIME|AT_SEQ;
6858 	if (VOP_GETATTR(dvp, &iva, 0, cs->cr, NULL)) {
6859 		iva.va_seq = 0;
6860 		iva.va_ctime = bva.va_ctime;
6861 	}
6862 
6863 	/*
6864 	 * create_vnode attempts to create the file exclusive,
6865 	 * if it already exists the VOP_CREATE will fail and
6866 	 * may not increase va_seq. It is atomic if
6867 	 * we haven't changed the directory, but if it has changed
6868 	 * we don't know what changed it.
6869 	 */
6870 	if (!created) {
6871 		if (bva.va_seq && iva.va_seq &&
6872 		    bva.va_seq == iva.va_seq)
6873 			cinfo->atomic = TRUE;
6874 		else
6875 			cinfo->atomic = FALSE;
6876 		NFS4_SET_FATTR4_CHANGE(cinfo->after, iva.va_ctime);
6877 	} else {
6878 		/*
6879 		 * The entry was created, we need to sync the
6880 		 * directory metadata.
6881 		 */
6882 		(void) VOP_FSYNC(dvp, 0, cs->cr, NULL);
6883 
6884 		/*
6885 		 * Get "after" change value, if it fails, simply return the
6886 		 * before value.
6887 		 */
6888 		ava.va_mask = AT_CTIME|AT_SEQ;
6889 		if (VOP_GETATTR(dvp, &ava, 0, cs->cr, NULL)) {
6890 			ava.va_ctime = bva.va_ctime;
6891 			ava.va_seq = 0;
6892 		}
6893 
6894 		NFS4_SET_FATTR4_CHANGE(cinfo->after, ava.va_ctime);
6895 
6896 		/*
6897 		 * The cinfo->atomic = TRUE only if we have
6898 		 * non-zero va_seq's, and it has incremented by exactly one
6899 		 * during the create_vnode and it didn't
6900 		 * change during the VOP_FSYNC.
6901 		 */
6902 		if (bva.va_seq && iva.va_seq && ava.va_seq &&
6903 		    iva.va_seq == (bva.va_seq + 1) && iva.va_seq == ava.va_seq)
6904 			cinfo->atomic = TRUE;
6905 		else
6906 			cinfo->atomic = FALSE;
6907 	}
6908 
6909 	/* Check for mandatory locking and that the size gets set. */
6910 	cva.va_mask = AT_MODE;
6911 	if (setsize)
6912 		cva.va_mask |= AT_SIZE;
6913 
6914 	/* Assume the worst */
6915 	cs->mandlock = TRUE;
6916 
6917 	if (VOP_GETATTR(vp, &cva, 0, cs->cr, NULL) == 0) {
6918 		cs->mandlock = MANDLOCK(cs->vp, cva.va_mode);
6919 
6920 		/*
6921 		 * Truncate the file if necessary; this would be
6922 		 * the case for create over an existing file.
6923 		 */
6924 
6925 		if (trunc) {
6926 			int in_crit = 0;
6927 			rfs4_file_t *fp;
6928 			nfs4_srv_t *nsrv4;
6929 			bool_t create = FALSE;
6930 
6931 			/*
6932 			 * We are writing over an existing file.
6933 			 * Check to see if we need to recall a delegation.
6934 			 */
6935 			nsrv4 = nfs4_get_srv();
6936 			rfs4_hold_deleg_policy(nsrv4);
6937 			if ((fp = rfs4_findfile(vp, NULL, &create)) != NULL) {
6938 				if (rfs4_check_delegated_byfp(FWRITE, fp,
6939 				    (reqsize == 0), FALSE, FALSE, &clientid)) {
6940 					rfs4_file_rele(fp);
6941 					rfs4_rele_deleg_policy(nsrv4);
6942 					VN_RELE(vp);
6943 					*attrset = 0;
6944 					return (NFS4ERR_DELAY);
6945 				}
6946 				rfs4_file_rele(fp);
6947 			}
6948 			rfs4_rele_deleg_policy(nsrv4);
6949 
6950 			if (nbl_need_check(vp)) {
6951 				in_crit = 1;
6952 
6953 				ASSERT(reqsize == 0);
6954 
6955 				nbl_start_crit(vp, RW_READER);
6956 				if (nbl_conflict(vp, NBL_WRITE, 0,
6957 				    cva.va_size, 0, NULL)) {
6958 					in_crit = 0;
6959 					nbl_end_crit(vp);
6960 					VN_RELE(vp);
6961 					*attrset = 0;
6962 					return (NFS4ERR_ACCESS);
6963 				}
6964 			}
6965 			ct.cc_sysid = 0;
6966 			ct.cc_pid = 0;
6967 			ct.cc_caller_id = nfs4_srv_caller_id;
6968 			ct.cc_flags = CC_DONTBLOCK;
6969 
6970 			cva.va_mask = AT_SIZE;
6971 			cva.va_size = reqsize;
6972 			(void) VOP_SETATTR(vp, &cva, 0, cs->cr, &ct);
6973 			if (in_crit)
6974 				nbl_end_crit(vp);
6975 		}
6976 	}
6977 
6978 	error = makefh4(&cs->fh, vp, cs->exi);
6979 
6980 	/*
6981 	 * Force modified data and metadata out to stable storage.
6982 	 */
6983 	(void) VOP_FSYNC(vp, FNODSYNC, cs->cr, NULL);
6984 
6985 	if (error) {
6986 		VN_RELE(vp);
6987 		*attrset = 0;
6988 		return (puterrno4(error));
6989 	}
6990 
6991 	/* if parent dir is attrdir, set namedattr fh flag */
6992 	if (dvp->v_flag & V_XATTRDIR)
6993 		set_fh4_flag(&cs->fh, FH4_NAMEDATTR);
6994 
6995 	if (cs->vp)
6996 		VN_RELE(cs->vp);
6997 
6998 	cs->vp = vp;
6999 
7000 	/*
7001 	 * if we did not create the file, we will need to check
7002 	 * the access bits on the file
7003 	 */
7004 
7005 	if (!created) {
7006 		if (setsize)
7007 			args->share_access |= OPEN4_SHARE_ACCESS_WRITE;
7008 		status = check_open_access(args->share_access, cs, req);
7009 		if (status != NFS4_OK)
7010 			*attrset = 0;
7011 	}
7012 	return (status);
7013 }
7014 
7015 static void
close_expired_state(rfs4_entry_t u_entry)7016 close_expired_state(rfs4_entry_t u_entry)
7017 {
7018 	rfs4_state_t *sp = (rfs4_state_t *)u_entry;
7019 
7020 	if (sp->rs_closed)
7021 		return;
7022 
7023 	/* not expired ? */
7024 	if (gethrestime_sec() - sp->rs_owner->ro_client->rc_last_access
7025 	    <= rfs4_lease_time)
7026 		return;
7027 
7028 	rfs4_state_close(sp, TRUE, TRUE, CRED());
7029 	rfs4_dbe_invalidate(sp->rs_dbe);
7030 }
7031 
7032 /*ARGSUSED*/
7033 static void
rfs4_do_open(struct compound_state * cs,struct svc_req * req,rfs4_openowner_t * oo,delegreq_t deleg,uint32_t access,uint32_t deny,OPEN4res * resp,int deleg_cur)7034 rfs4_do_open(struct compound_state *cs, struct svc_req *req,
7035     rfs4_openowner_t *oo, delegreq_t deleg,
7036     uint32_t access, uint32_t deny,
7037     OPEN4res *resp, int deleg_cur)
7038 {
7039 	/* XXX Currently not using req  */
7040 	rfs4_state_t *sp;
7041 	rfs4_file_t *fp;
7042 	bool_t screate = TRUE;
7043 	bool_t fcreate = TRUE;
7044 	uint32_t open_a, share_a;
7045 	uint32_t open_d, share_d;
7046 	rfs4_deleg_state_t *dsp;
7047 	sysid_t sysid;
7048 	nfsstat4 status;
7049 	caller_context_t ct;
7050 	int fflags = 0;
7051 	int recall = 0;
7052 	int err;
7053 	int first_open;
7054 	int tries = 0;
7055 
7056 	/* get the file struct and hold a lock on it during initial open */
7057 	fp = rfs4_findfile_withlock(cs->vp, &cs->fh, &fcreate);
7058 	if (fp == NULL) {
7059 		resp->status = NFS4ERR_RESOURCE;
7060 		DTRACE_PROBE1(nfss__e__do__open1, nfsstat4, resp->status);
7061 		return;
7062 	}
7063 
7064 	sp = rfs4_findstate_by_owner_file(oo, fp, &screate);
7065 	if (sp == NULL) {
7066 		resp->status = NFS4ERR_RESOURCE;
7067 		DTRACE_PROBE1(nfss__e__do__open2, nfsstat4, resp->status);
7068 		/* No need to keep any reference */
7069 		rw_exit(&fp->rf_file_rwlock);
7070 		rfs4_file_rele(fp);
7071 		return;
7072 	}
7073 
7074 	/* try to get the sysid before continuing */
7075 	if ((status = rfs4_client_sysid(oo->ro_client, &sysid)) != NFS4_OK) {
7076 		resp->status = status;
7077 		rfs4_file_rele(fp);
7078 		/* Not a fully formed open; "close" it */
7079 		if (screate == TRUE)
7080 			rfs4_state_close(sp, FALSE, FALSE, cs->cr);
7081 		rfs4_state_rele(sp);
7082 		return;
7083 	}
7084 
7085 	/* Calculate the fflags for this OPEN. */
7086 	if (access & OPEN4_SHARE_ACCESS_READ)
7087 		fflags |= FREAD;
7088 	if (access & OPEN4_SHARE_ACCESS_WRITE)
7089 		fflags |= FWRITE;
7090 
7091 again:
7092 	rfs4_dbe_lock(sp->rs_dbe);
7093 
7094 	/*
7095 	 * Calculate the new deny and access mode that this open is adding to
7096 	 * the file for this open owner;
7097 	 */
7098 	open_d = (deny & ~sp->rs_open_deny);
7099 	open_a = (access & ~sp->rs_open_access);
7100 
7101 	/*
7102 	 * Calculate the new share access and share deny modes that this open
7103 	 * is adding to the file for this open owner;
7104 	 */
7105 	share_a = (access & ~sp->rs_share_access);
7106 	share_d = (deny & ~sp->rs_share_deny);
7107 
7108 	first_open = (sp->rs_open_access & OPEN4_SHARE_ACCESS_BOTH) == 0;
7109 
7110 	/*
7111 	 * Check to see the client has already sent an open for this
7112 	 * open owner on this file with the same share/deny modes.
7113 	 * If so, we don't need to check for a conflict and we don't
7114 	 * need to add another shrlock.  If not, then we need to
7115 	 * check for conflicts in deny and access before checking for
7116 	 * conflicts in delegation.  We don't want to recall a
7117 	 * delegation based on an open that will eventually fail based
7118 	 * on shares modes.
7119 	 */
7120 
7121 	if (share_a || share_d) {
7122 		if ((err = rfs4_share(sp, access, deny)) != 0) {
7123 			rfs4_dbe_unlock(sp->rs_dbe);
7124 			if (err == NFS4ERR_SHARE_DENIED && ++tries < 2) {
7125 				/*
7126 				 * Cleanup recently expired (not yet cleaned by
7127 				 * reaper thread) and re-try.
7128 				 */
7129 				nfs4_srv_t *nsrv4 = nfs4_get_srv();
7130 
7131 				rfs4_dbsearch_cb(nsrv4->rfs4_state_file_idx,
7132 				    sp->rs_finfo, rfs4_lookup_exp_state_max,
7133 				    close_expired_state);
7134 				goto again;
7135 			}
7136 
7137 			resp->status = err;
7138 
7139 			rfs4_file_rele(fp);
7140 			/* Not a fully formed open; "close" it */
7141 			if (screate == TRUE)
7142 				rfs4_state_close(sp, FALSE, FALSE, cs->cr);
7143 			rfs4_state_rele(sp);
7144 			return;
7145 		}
7146 	}
7147 
7148 	rfs4_dbe_lock(fp->rf_dbe);
7149 
7150 	/*
7151 	 * Check to see if this file is delegated and if so, if a
7152 	 * recall needs to be done.
7153 	 */
7154 	if (rfs4_check_recall(sp, access)) {
7155 		rfs4_dbe_unlock(fp->rf_dbe);
7156 		rfs4_dbe_unlock(sp->rs_dbe);
7157 		rfs4_recall_deleg(fp, FALSE, sp->rs_owner->ro_client);
7158 		delay(NFS4_DELEGATION_CONFLICT_DELAY);
7159 		rfs4_dbe_lock(sp->rs_dbe);
7160 
7161 		/* if state closed while lock was dropped */
7162 		if (sp->rs_closed) {
7163 			if (share_a || share_d)
7164 				(void) rfs4_unshare(sp);
7165 			rfs4_dbe_unlock(sp->rs_dbe);
7166 			rfs4_file_rele(fp);
7167 			/* Not a fully formed open; "close" it */
7168 			if (screate == TRUE)
7169 				rfs4_state_close(sp, FALSE, FALSE, cs->cr);
7170 			rfs4_state_rele(sp);
7171 			resp->status = NFS4ERR_OLD_STATEID;
7172 			return;
7173 		}
7174 
7175 		rfs4_dbe_lock(fp->rf_dbe);
7176 		/* Let's see if the delegation was returned */
7177 		if (rfs4_check_recall(sp, access)) {
7178 			rfs4_dbe_unlock(fp->rf_dbe);
7179 			if (share_a || share_d)
7180 				(void) rfs4_unshare(sp);
7181 			rfs4_dbe_unlock(sp->rs_dbe);
7182 			rfs4_file_rele(fp);
7183 			rfs4_update_lease(sp->rs_owner->ro_client);
7184 
7185 			/* Not a fully formed open; "close" it */
7186 			if (screate == TRUE)
7187 				rfs4_state_close(sp, FALSE, FALSE, cs->cr);
7188 			rfs4_state_rele(sp);
7189 			resp->status = NFS4ERR_DELAY;
7190 			return;
7191 		}
7192 	}
7193 	/*
7194 	 * the share check passed and any delegation conflict has been
7195 	 * taken care of, now call vop_open.
7196 	 * if this is the first open then call vop_open with fflags.
7197 	 * if not, call vn_open_upgrade with just the upgrade flags.
7198 	 *
7199 	 * if the file has been opened already, it will have the current
7200 	 * access mode in the state struct.  if it has no share access, then
7201 	 * this is a new open.
7202 	 *
7203 	 * However, if this is open with CLAIM_DLEGATE_CUR, then don't
7204 	 * call VOP_OPEN(), just do the open upgrade.
7205 	 */
7206 	if (first_open && !deleg_cur) {
7207 		ct.cc_sysid = sysid;
7208 		ct.cc_pid = rfs4_dbe_getid(sp->rs_owner->ro_dbe);
7209 		ct.cc_caller_id = nfs4_srv_caller_id;
7210 		ct.cc_flags = CC_DONTBLOCK;
7211 		err = VOP_OPEN(&cs->vp, fflags, cs->cr, &ct);
7212 		if (err) {
7213 			rfs4_dbe_unlock(fp->rf_dbe);
7214 			if (share_a || share_d)
7215 				(void) rfs4_unshare(sp);
7216 			rfs4_dbe_unlock(sp->rs_dbe);
7217 			rfs4_file_rele(fp);
7218 
7219 			/* Not a fully formed open; "close" it */
7220 			if (screate == TRUE)
7221 				rfs4_state_close(sp, FALSE, FALSE, cs->cr);
7222 			rfs4_state_rele(sp);
7223 			/* check if a monitor detected a delegation conflict */
7224 			if (err == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK))
7225 				resp->status = NFS4ERR_DELAY;
7226 			else
7227 				resp->status = NFS4ERR_SERVERFAULT;
7228 			return;
7229 		}
7230 	} else { /* open upgrade */
7231 		/*
7232 		 * calculate the fflags for the new mode that is being added
7233 		 * by this upgrade.
7234 		 */
7235 		fflags = 0;
7236 		if (open_a & OPEN4_SHARE_ACCESS_READ)
7237 			fflags |= FREAD;
7238 		if (open_a & OPEN4_SHARE_ACCESS_WRITE)
7239 			fflags |= FWRITE;
7240 		vn_open_upgrade(cs->vp, fflags);
7241 	}
7242 	sp->rs_open_access |= access;
7243 	sp->rs_open_deny |= deny;
7244 
7245 	if (open_d & OPEN4_SHARE_DENY_READ)
7246 		fp->rf_deny_read++;
7247 	if (open_d & OPEN4_SHARE_DENY_WRITE)
7248 		fp->rf_deny_write++;
7249 	fp->rf_share_deny |= deny;
7250 
7251 	if (open_a & OPEN4_SHARE_ACCESS_READ)
7252 		fp->rf_access_read++;
7253 	if (open_a & OPEN4_SHARE_ACCESS_WRITE)
7254 		fp->rf_access_write++;
7255 	fp->rf_share_access |= access;
7256 
7257 	/*
7258 	 * Check for delegation here. if the deleg argument is not
7259 	 * DELEG_ANY, then this is a reclaim from a client and
7260 	 * we must honor the delegation requested. If necessary we can
7261 	 * set the recall flag.
7262 	 */
7263 
7264 	dsp = rfs4_grant_delegation(deleg, sp, &recall);
7265 
7266 	cs->deleg = (fp->rf_dinfo.rd_dtype == OPEN_DELEGATE_WRITE);
7267 
7268 	next_stateid(&sp->rs_stateid);
7269 
7270 	resp->stateid = sp->rs_stateid.stateid;
7271 
7272 	rfs4_dbe_unlock(fp->rf_dbe);
7273 	rfs4_dbe_unlock(sp->rs_dbe);
7274 
7275 	if (dsp) {
7276 		rfs4_set_deleg_response(dsp, &resp->delegation, NULL, recall);
7277 		rfs4_deleg_state_rele(dsp);
7278 	}
7279 
7280 	rfs4_file_rele(fp);
7281 	rfs4_state_rele(sp);
7282 
7283 	resp->status = NFS4_OK;
7284 }
7285 
7286 /*ARGSUSED*/
7287 static void
rfs4_do_openfh(struct compound_state * cs,struct svc_req * req,OPEN4args * args,rfs4_openowner_t * oo,OPEN4res * resp)7288 rfs4_do_openfh(struct compound_state *cs, struct svc_req *req, OPEN4args *args,
7289     rfs4_openowner_t *oo, OPEN4res *resp)
7290 {
7291 	/* cs->vp and cs->fh have been updated by putfh. */
7292 	rfs4_do_open(cs, req, oo, DELEG_ANY,
7293 	    (args->share_access & 0xff), args->share_deny, resp, 0);
7294 }
7295 
7296 /*ARGSUSED*/
7297 static void
rfs4_do_opennull(struct compound_state * cs,struct svc_req * req,OPEN4args * args,rfs4_openowner_t * oo,OPEN4res * resp)7298 rfs4_do_opennull(struct compound_state *cs, struct svc_req *req,
7299     OPEN4args *args, rfs4_openowner_t *oo, OPEN4res *resp)
7300 {
7301 	change_info4 *cinfo = &resp->cinfo;
7302 	bitmap4 *attrset = &resp->attrset;
7303 
7304 	if (args->opentype == OPEN4_NOCREATE)
7305 		resp->status = rfs4_lookupfile(&args->claim.open_claim4_u.file,
7306 		    req, cs, args->share_access, cinfo);
7307 	else {
7308 		/* inhibit delegation grants during exclusive create */
7309 
7310 		if (args->mode == EXCLUSIVE4)
7311 			rfs4_disable_delegation();
7312 
7313 		resp->status = rfs4_createfile(args, req, cs, cinfo, attrset,
7314 		    oo->ro_client->rc_clientid);
7315 	}
7316 
7317 	if (resp->status == NFS4_OK) {
7318 
7319 		/* cs->vp cs->fh now reference the desired file */
7320 
7321 		rfs4_do_open(cs, req, oo,
7322 		    oo->ro_need_confirm ? DELEG_NONE : DELEG_ANY,
7323 		    args->share_access, args->share_deny, resp, 0);
7324 
7325 		/*
7326 		 * If rfs4_createfile set attrset, we must
7327 		 * clear this attrset before the response is copied.
7328 		 */
7329 		if (resp->status != NFS4_OK && resp->attrset) {
7330 			resp->attrset = 0;
7331 		}
7332 	}
7333 	else
7334 		*cs->statusp = resp->status;
7335 
7336 	if (args->mode == EXCLUSIVE4)
7337 		rfs4_enable_delegation();
7338 }
7339 
7340 /*ARGSUSED*/
7341 static void
rfs4_do_openprev(struct compound_state * cs,struct svc_req * req,OPEN4args * args,rfs4_openowner_t * oo,OPEN4res * resp)7342 rfs4_do_openprev(struct compound_state *cs, struct svc_req *req,
7343     OPEN4args *args, rfs4_openowner_t *oo, OPEN4res *resp)
7344 {
7345 	change_info4 *cinfo = &resp->cinfo;
7346 	vattr_t va;
7347 	vtype_t v_type = cs->vp->v_type;
7348 	int error = 0;
7349 
7350 	/* Verify that we have a regular file */
7351 	if (v_type != VREG) {
7352 		if (v_type == VDIR)
7353 			resp->status = NFS4ERR_ISDIR;
7354 		else if (v_type == VLNK)
7355 			resp->status = NFS4ERR_SYMLINK;
7356 		else
7357 			resp->status = NFS4ERR_INVAL;
7358 		return;
7359 	}
7360 
7361 	va.va_mask = AT_MODE|AT_UID;
7362 	error = VOP_GETATTR(cs->vp, &va, 0, cs->cr, NULL);
7363 	if (error) {
7364 		resp->status = puterrno4(error);
7365 		return;
7366 	}
7367 
7368 	cs->mandlock = MANDLOCK(cs->vp, va.va_mode);
7369 
7370 	/*
7371 	 * Check if we have access to the file, Note the the file
7372 	 * could have originally been open UNCHECKED or GUARDED
7373 	 * with mode bits that will now fail, but there is nothing
7374 	 * we can really do about that except in the case that the
7375 	 * owner of the file is the one requesting the open.
7376 	 */
7377 	if (crgetuid(cs->cr) != va.va_uid) {
7378 		resp->status = check_open_access(args->share_access, cs, req);
7379 		if (resp->status != NFS4_OK) {
7380 			return;
7381 		}
7382 	}
7383 
7384 	/*
7385 	 * cinfo on a CLAIM_PREVIOUS is undefined, initialize to zero
7386 	 */
7387 	cinfo->before = 0;
7388 	cinfo->after = 0;
7389 	cinfo->atomic = FALSE;
7390 
7391 	rfs4_do_open(cs, req, oo,
7392 	    NFS4_DELEG4TYPE2REQTYPE(args->claim.open_claim4_u.delegate_type),
7393 	    args->share_access, args->share_deny, resp, 0);
7394 }
7395 
7396 static void
rfs4_do_opendelcur(struct compound_state * cs,struct svc_req * req,OPEN4args * args,rfs4_openowner_t * oo,OPEN4res * resp)7397 rfs4_do_opendelcur(struct compound_state *cs, struct svc_req *req,
7398     OPEN4args *args, rfs4_openowner_t *oo, OPEN4res *resp)
7399 {
7400 	int error;
7401 	nfsstat4 status;
7402 	stateid4 stateid =
7403 	    args->claim.open_claim4_u.delegate_cur_info.delegate_stateid;
7404 	rfs4_deleg_state_t *dsp;
7405 
7406 	/*
7407 	 * Find the state info from the stateid and confirm that the
7408 	 * file is delegated.  If the state openowner is the same as
7409 	 * the supplied openowner we're done. If not, get the file
7410 	 * info from the found state info. Use that file info to
7411 	 * create the state for this lock owner. Note solaris doen't
7412 	 * really need the pathname to find the file. We may want to
7413 	 * lookup the pathname and make sure that the vp exist and
7414 	 * matches the vp in the file structure. However it is
7415 	 * possible that the pathname nolonger exists (local process
7416 	 * unlinks the file), so this may not be that useful.
7417 	 */
7418 
7419 	status = rfs4_get_deleg_state(&stateid, &dsp);
7420 	if (status != NFS4_OK) {
7421 		resp->status = status;
7422 		return;
7423 	}
7424 
7425 	ASSERT(dsp->rds_finfo->rf_dinfo.rd_dtype != OPEN_DELEGATE_NONE);
7426 
7427 	/*
7428 	 * New lock owner, create state. Since this was probably called
7429 	 * in response to a CB_RECALL we set deleg to DELEG_NONE
7430 	 */
7431 
7432 	ASSERT(cs->vp != NULL);
7433 	VN_RELE(cs->vp);
7434 	VN_HOLD(dsp->rds_finfo->rf_vp);
7435 	cs->vp = dsp->rds_finfo->rf_vp;
7436 
7437 	error = makefh4(&cs->fh, cs->vp, cs->exi);
7438 	if (error != 0) {
7439 		rfs4_deleg_state_rele(dsp);
7440 		*cs->statusp = resp->status = puterrno4(error);
7441 		return;
7442 	}
7443 
7444 	/* Mark progress for delegation returns */
7445 	dsp->rds_finfo->rf_dinfo.rd_time_lastwrite = gethrestime_sec();
7446 	rfs4_deleg_state_rele(dsp);
7447 	rfs4_do_open(cs, req, oo, DELEG_NONE,
7448 	    args->share_access, args->share_deny, resp, 1);
7449 }
7450 
7451 /*ARGSUSED*/
7452 static void
rfs4_do_opendelprev(struct compound_state * cs,struct svc_req * req,OPEN4args * args,rfs4_openowner_t * oo,OPEN4res * resp)7453 rfs4_do_opendelprev(struct compound_state *cs, struct svc_req *req,
7454     OPEN4args *args, rfs4_openowner_t *oo, OPEN4res *resp)
7455 {
7456 	/*
7457 	 * Lookup the pathname, it must already exist since this file
7458 	 * was delegated.
7459 	 *
7460 	 * Find the file and state info for this vp and open owner pair.
7461 	 *	check that they are in fact delegated.
7462 	 *	check that the state access and deny modes are the same.
7463 	 *
7464 	 * Return the delgation possibly seting the recall flag.
7465 	 */
7466 	rfs4_file_t *fp;
7467 	rfs4_state_t *sp;
7468 	bool_t create = FALSE;
7469 	bool_t dcreate = FALSE;
7470 	rfs4_deleg_state_t *dsp;
7471 	nfsace4 *ace;
7472 
7473 	/* Note we ignore oflags */
7474 	resp->status = rfs4_lookupfile(
7475 	    &args->claim.open_claim4_u.file_delegate_prev,
7476 	    req, cs, args->share_access, &resp->cinfo);
7477 
7478 	if (resp->status != NFS4_OK) {
7479 		return;
7480 	}
7481 
7482 	/* get the file struct and hold a lock on it during initial open */
7483 	fp = rfs4_findfile_withlock(cs->vp, NULL, &create);
7484 	if (fp == NULL) {
7485 		resp->status = NFS4ERR_RESOURCE;
7486 		DTRACE_PROBE1(nfss__e__do_opendelprev1, nfsstat4, resp->status);
7487 		return;
7488 	}
7489 
7490 	sp = rfs4_findstate_by_owner_file(oo, fp, &create);
7491 	if (sp == NULL) {
7492 		resp->status = NFS4ERR_SERVERFAULT;
7493 		DTRACE_PROBE1(nfss__e__do_opendelprev2, nfsstat4, resp->status);
7494 		rw_exit(&fp->rf_file_rwlock);
7495 		rfs4_file_rele(fp);
7496 		return;
7497 	}
7498 
7499 	rfs4_dbe_lock(sp->rs_dbe);
7500 	rfs4_dbe_lock(fp->rf_dbe);
7501 	if (args->share_access != sp->rs_share_access ||
7502 	    args->share_deny != sp->rs_share_deny ||
7503 	    sp->rs_finfo->rf_dinfo.rd_dtype == OPEN_DELEGATE_NONE) {
7504 		NFS4_DEBUG(rfs4_debug,
7505 		    (CE_NOTE, "rfs4_do_opendelprev: state mixup"));
7506 		rfs4_dbe_unlock(fp->rf_dbe);
7507 		rfs4_dbe_unlock(sp->rs_dbe);
7508 		rfs4_file_rele(fp);
7509 		rfs4_state_rele(sp);
7510 		resp->status = NFS4ERR_SERVERFAULT;
7511 		return;
7512 	}
7513 	rfs4_dbe_unlock(fp->rf_dbe);
7514 	rfs4_dbe_unlock(sp->rs_dbe);
7515 
7516 	dsp = rfs4_finddeleg(sp, &dcreate);
7517 	if (dsp == NULL) {
7518 		rfs4_state_rele(sp);
7519 		rfs4_file_rele(fp);
7520 		resp->status = NFS4ERR_SERVERFAULT;
7521 		return;
7522 	}
7523 
7524 	next_stateid(&sp->rs_stateid);
7525 
7526 	resp->stateid = sp->rs_stateid.stateid;
7527 
7528 	resp->delegation.delegation_type = dsp->rds_dtype;
7529 
7530 	if (dsp->rds_dtype == OPEN_DELEGATE_READ) {
7531 		open_read_delegation4 *rv =
7532 		    &resp->delegation.open_delegation4_u.read;
7533 
7534 		rv->stateid = dsp->rds_delegid.stateid;
7535 		rv->recall = FALSE; /* no policy in place to set to TRUE */
7536 		ace = &rv->permissions;
7537 	} else {
7538 		open_write_delegation4 *rv =
7539 		    &resp->delegation.open_delegation4_u.write;
7540 
7541 		rv->stateid = dsp->rds_delegid.stateid;
7542 		rv->recall = FALSE;  /* no policy in place to set to TRUE */
7543 		ace = &rv->permissions;
7544 		rv->space_limit.limitby = NFS_LIMIT_SIZE;
7545 		rv->space_limit.nfs_space_limit4_u.filesize = UINT64_MAX;
7546 	}
7547 
7548 	/* XXX For now */
7549 	ace->type = ACE4_ACCESS_ALLOWED_ACE_TYPE;
7550 	ace->flag = 0;
7551 	ace->access_mask = 0;
7552 	ace->who.utf8string_len = 0;
7553 	ace->who.utf8string_val = 0;
7554 
7555 	rfs4_deleg_state_rele(dsp);
7556 	rfs4_state_rele(sp);
7557 	rfs4_file_rele(fp);
7558 }
7559 
7560 typedef enum {
7561 	NFS4_CHKSEQ_OKAY = 0,
7562 	NFS4_CHKSEQ_REPLAY = 1,
7563 	NFS4_CHKSEQ_BAD = 2
7564 } rfs4_chkseq_t;
7565 
7566 /*
7567  * Generic function for sequence number checks.
7568  */
7569 static rfs4_chkseq_t
rfs4_check_seqid(seqid4 seqid,nfs_resop4 * lastop,seqid4 rqst_seq,nfs_resop4 * resop,bool_t copyres)7570 rfs4_check_seqid(seqid4 seqid, nfs_resop4 *lastop,
7571     seqid4 rqst_seq, nfs_resop4 *resop, bool_t copyres)
7572 {
7573 	/* Same sequence ids and matching operations? */
7574 	if (seqid == rqst_seq && resop->resop == lastop->resop) {
7575 		if (copyres == TRUE) {
7576 			rfs4_free_reply(resop);
7577 			rfs4_copy_reply(resop, lastop);
7578 		}
7579 		NFS4_DEBUG(rfs4_debug, (CE_NOTE,
7580 		    "Replayed SEQID %d\n", seqid));
7581 		return (NFS4_CHKSEQ_REPLAY);
7582 	}
7583 
7584 	/* If the incoming sequence is not the next expected then it is bad */
7585 	if (rqst_seq != seqid + 1) {
7586 		if (rqst_seq == seqid) {
7587 			NFS4_DEBUG(rfs4_debug,
7588 			    (CE_NOTE, "BAD SEQID: Replayed sequence id "
7589 			    "but last op was %d current op is %d\n",
7590 			    lastop->resop, resop->resop));
7591 			return (NFS4_CHKSEQ_BAD);
7592 		}
7593 		NFS4_DEBUG(rfs4_debug,
7594 		    (CE_NOTE, "BAD SEQID: got %u expecting %u\n",
7595 		    rqst_seq, seqid));
7596 		return (NFS4_CHKSEQ_BAD);
7597 	}
7598 
7599 	/* Everything okay -- next expected */
7600 	return (NFS4_CHKSEQ_OKAY);
7601 }
7602 
7603 
7604 static rfs4_chkseq_t
rfs4_check_open_seqid(seqid4 seqid,rfs4_openowner_t * op,nfs_resop4 * resop,const compound_state_t * cs)7605 rfs4_check_open_seqid(seqid4 seqid, rfs4_openowner_t *op, nfs_resop4 *resop,
7606     const compound_state_t *cs)
7607 {
7608 	rfs4_chkseq_t rc;
7609 
7610 	if (rfs4_has_session(cs))
7611 		return (NFS4_CHKSEQ_OKAY);
7612 
7613 	rfs4_dbe_lock(op->ro_dbe);
7614 	rc = rfs4_check_seqid(op->ro_open_seqid, &op->ro_reply, seqid, resop,
7615 	    TRUE);
7616 	rfs4_dbe_unlock(op->ro_dbe);
7617 
7618 	if (rc == NFS4_CHKSEQ_OKAY)
7619 		rfs4_update_lease(op->ro_client);
7620 
7621 	return (rc);
7622 }
7623 
7624 static rfs4_chkseq_t
rfs4_check_olo_seqid(seqid4 olo_seqid,rfs4_openowner_t * op,nfs_resop4 * resop)7625 rfs4_check_olo_seqid(seqid4 olo_seqid, rfs4_openowner_t *op, nfs_resop4 *resop)
7626 {
7627 	rfs4_chkseq_t rc;
7628 
7629 	rfs4_dbe_lock(op->ro_dbe);
7630 	rc = rfs4_check_seqid(op->ro_open_seqid, &op->ro_reply,
7631 	    olo_seqid, resop, FALSE);
7632 	rfs4_dbe_unlock(op->ro_dbe);
7633 
7634 	return (rc);
7635 }
7636 
7637 static rfs4_chkseq_t
rfs4_check_lock_seqid(seqid4 seqid,rfs4_lo_state_t * lsp,nfs_resop4 * resop)7638 rfs4_check_lock_seqid(seqid4 seqid, rfs4_lo_state_t *lsp, nfs_resop4 *resop)
7639 {
7640 	rfs4_chkseq_t rc = NFS4_CHKSEQ_OKAY;
7641 
7642 	rfs4_dbe_lock(lsp->rls_dbe);
7643 	if (!lsp->rls_skip_seqid_check)
7644 		rc = rfs4_check_seqid(lsp->rls_seqid, &lsp->rls_reply, seqid,
7645 		    resop, TRUE);
7646 	rfs4_dbe_unlock(lsp->rls_dbe);
7647 
7648 	return (rc);
7649 }
7650 
7651 static void
rfs4_op_open(nfs_argop4 * argop,nfs_resop4 * resop,struct svc_req * req,struct compound_state * cs)7652 rfs4_op_open(nfs_argop4 *argop, nfs_resop4 *resop,
7653     struct svc_req *req, struct compound_state *cs)
7654 {
7655 	OPEN4args *args = &argop->nfs_argop4_u.opopen;
7656 	OPEN4res *resp = &resop->nfs_resop4_u.opopen;
7657 	open_owner4 *owner = &args->owner;
7658 	open_claim_type4 claim = args->claim.claim;
7659 	rfs4_client_t *cp;
7660 	rfs4_openowner_t *oo;
7661 	bool_t create;
7662 	bool_t replay = FALSE;
7663 	int can_reclaim;
7664 
7665 	DTRACE_NFSV4_2(op__open__start, struct compound_state *, cs,
7666 	    OPEN4args *, args);
7667 
7668 	if (cs->vp == NULL) {
7669 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
7670 		goto end;
7671 	}
7672 
7673 	/* rfc5661 section 18.16.3 */
7674 	if (rfs4_has_session(cs))
7675 		owner->clientid = cs->client->rc_clientid;
7676 
7677 	/*
7678 	 * Need to check clientid and lease expiration first based on
7679 	 * error ordering and incrementing sequence id.
7680 	 */
7681 	cp = rfs4_findclient_by_id(owner->clientid, FALSE);
7682 	if (cp == NULL) {
7683 		*cs->statusp = resp->status =
7684 		    rfs4_check_clientid(&owner->clientid, 0);
7685 		goto end;
7686 	}
7687 
7688 	if (rfs4_lease_expired(cp)) {
7689 		rfs4_client_close(cp);
7690 		*cs->statusp = resp->status = NFS4ERR_EXPIRED;
7691 		goto end;
7692 	}
7693 	can_reclaim = cp->rc_can_reclaim;
7694 
7695 	/*
7696 	 * RFC8881 18.51.3
7697 	 * If non-reclaim locking operations are done before the
7698 	 * RECLAIM_COMPLETE, error NFS4ERR_GRACE will be returned
7699 	 */
7700 	if (rfs4_has_session(cs) && !cp->rc_reclaim_completed &&
7701 	    claim != CLAIM_PREVIOUS) {
7702 		rfs4_client_rele(cp);
7703 		*cs->statusp = resp->status = NFS4ERR_GRACE;
7704 		goto end;
7705 	}
7706 
7707 	/*
7708 	 * Find the open_owner for use from this point forward.  Take
7709 	 * care in updating the sequence id based on the type of error
7710 	 * being returned.
7711 	 */
7712 retry:
7713 	create = TRUE;
7714 	oo = rfs4_findopenowner(owner, &create, args->seqid);
7715 	if (oo == NULL) {
7716 		*cs->statusp = resp->status = NFS4ERR_RESOURCE;
7717 		rfs4_client_rele(cp);
7718 		goto end;
7719 	}
7720 
7721 	/*
7722 	 * OPEN_CONFIRM must not be implemented in v4.1
7723 	 */
7724 	if (rfs4_has_session(cs)) {
7725 		oo->ro_need_confirm = FALSE;
7726 	}
7727 
7728 	/* Hold off access to the sequence space while the open is done */
7729 	/* Workaround to avoid deadlock */
7730 	if (!rfs4_has_session(cs))
7731 		rfs4_sw_enter(&oo->ro_sw);
7732 
7733 	/*
7734 	 * If the open_owner existed before at the server, then check
7735 	 * the sequence id.
7736 	 */
7737 	if (!create && !oo->ro_postpone_confirm) {
7738 		switch (rfs4_check_open_seqid(args->seqid, oo, resop, cs)) {
7739 		case NFS4_CHKSEQ_BAD:
7740 			ASSERT(!rfs4_has_session(cs));
7741 			if ((args->seqid > oo->ro_open_seqid) &&
7742 			    oo->ro_need_confirm) {
7743 				rfs4_free_opens(oo, TRUE, FALSE);
7744 				rfs4_sw_exit(&oo->ro_sw);
7745 				rfs4_openowner_rele(oo);
7746 				goto retry;
7747 			}
7748 			resp->status = NFS4ERR_BAD_SEQID;
7749 			goto out;
7750 		case NFS4_CHKSEQ_REPLAY: /* replay of previous request */
7751 			replay = TRUE;
7752 			goto out;
7753 		default:
7754 			break;
7755 		}
7756 
7757 		/*
7758 		 * Sequence was ok and open owner exists
7759 		 * check to see if we have yet to see an
7760 		 * open_confirm.
7761 		 */
7762 		if (oo->ro_need_confirm) {
7763 			rfs4_free_opens(oo, TRUE, FALSE);
7764 			ASSERT(!rfs4_has_session(cs));
7765 			rfs4_sw_exit(&oo->ro_sw);
7766 			rfs4_openowner_rele(oo);
7767 			goto retry;
7768 		}
7769 	}
7770 	/* Grace only applies to regular-type OPENs */
7771 	if (rfs4_clnt_in_grace(cp) &&
7772 	    (claim == CLAIM_NULL || claim == CLAIM_DELEGATE_CUR ||
7773 	    claim == CLAIM_FH)) {
7774 		*cs->statusp = resp->status = NFS4ERR_GRACE;
7775 		goto out;
7776 	}
7777 
7778 	/*
7779 	 * If previous state at the server existed then can_reclaim
7780 	 * will be set. If not reply NFS4ERR_NO_GRACE to the
7781 	 * client.
7782 	 */
7783 	if (rfs4_clnt_in_grace(cp) && claim == CLAIM_PREVIOUS && !can_reclaim) {
7784 		*cs->statusp = resp->status = NFS4ERR_NO_GRACE;
7785 		goto out;
7786 	}
7787 
7788 
7789 	/*
7790 	 * Reject the open if the client has missed the grace period
7791 	 */
7792 	if (!rfs4_clnt_in_grace(cp) && claim == CLAIM_PREVIOUS) {
7793 		*cs->statusp = resp->status = NFS4ERR_NO_GRACE;
7794 		goto out;
7795 	}
7796 
7797 	/* Couple of up-front bookkeeping items */
7798 	if (oo->ro_need_confirm) {
7799 		/*
7800 		 * If this is a reclaim OPEN then we should not ask
7801 		 * for a confirmation of the open_owner per the
7802 		 * protocol specification.
7803 		 */
7804 		if (claim == CLAIM_PREVIOUS)
7805 			oo->ro_need_confirm = FALSE;
7806 		else
7807 			resp->rflags |= OPEN4_RESULT_CONFIRM;
7808 	}
7809 	resp->rflags |= OPEN4_RESULT_LOCKTYPE_POSIX;
7810 
7811 	/*
7812 	 * If there is an unshared filesystem mounted on this vnode,
7813 	 * do not allow to open/create in this directory.
7814 	 */
7815 	if (vn_ismntpt(cs->vp)) {
7816 		*cs->statusp = resp->status = NFS4ERR_ACCESS;
7817 		goto out;
7818 	}
7819 
7820 	/*
7821 	 * access must READ, WRITE, or BOTH.  No access is invalid.
7822 	 * deny can be READ, WRITE, BOTH, or NONE.
7823 	 * bits not defined for access/deny are invalid.
7824 	 */
7825 	if (! (args->share_access & OPEN4_SHARE_ACCESS_BOTH) ||
7826 	    (args->share_access & ~OPEN4_SHARE_ACCESS_BOTH) ||
7827 	    (args->share_deny & ~OPEN4_SHARE_DENY_BOTH)) {
7828 		*cs->statusp = resp->status = NFS4ERR_INVAL;
7829 		goto out;
7830 	}
7831 
7832 
7833 	/*
7834 	 * make sure attrset is zero before response is built.
7835 	 */
7836 	resp->attrset = 0;
7837 
7838 	switch (claim) {
7839 	case CLAIM_NULL:
7840 		rfs4_do_opennull(cs, req, args, oo, resp);
7841 		break;
7842 	case CLAIM_PREVIOUS:
7843 		rfs4_do_openprev(cs, req, args, oo, resp);
7844 		break;
7845 	case CLAIM_DELEGATE_CUR:
7846 		rfs4_do_opendelcur(cs, req, args, oo, resp);
7847 		break;
7848 	case CLAIM_DELEGATE_PREV:
7849 		rfs4_do_opendelprev(cs, req, args, oo, resp);
7850 		break;
7851 	case CLAIM_FH:
7852 		rfs4_do_openfh(cs, req, args, oo, resp);
7853 		break;
7854 	default:
7855 		resp->status = NFS4ERR_INVAL;
7856 		break;
7857 	}
7858 
7859 out:
7860 	rfs4_client_rele(cp);
7861 
7862 	/* Catch sequence id handling here to make it a little easier */
7863 	switch (resp->status) {
7864 	case NFS4ERR_BADXDR:
7865 	case NFS4ERR_BAD_SEQID:
7866 	case NFS4ERR_BAD_STATEID:
7867 	case NFS4ERR_NOFILEHANDLE:
7868 	case NFS4ERR_RESOURCE:
7869 	case NFS4ERR_STALE_CLIENTID:
7870 	case NFS4ERR_STALE_STATEID:
7871 		/*
7872 		 * The protocol states that if any of these errors are
7873 		 * being returned, the sequence id should not be
7874 		 * incremented.  Any other return requires an
7875 		 * increment.
7876 		 */
7877 		break;
7878 	default:
7879 		/* Always update the lease in this case */
7880 		rfs4_update_lease(oo->ro_client);
7881 
7882 		/* Regular response - copy the result */
7883 		if (!replay)
7884 			rfs4_update_open_resp(oo, resop, &cs->fh);
7885 
7886 		/*
7887 		 * REPLAY case: Only if the previous response was OK
7888 		 * do we copy the filehandle.  If not OK, no
7889 		 * filehandle to copy.
7890 		 */
7891 		if (replay == TRUE &&
7892 		    resp->status == NFS4_OK &&
7893 		    oo->ro_reply_fh.nfs_fh4_val) {
7894 			/*
7895 			 * If this is a replay, we must restore the
7896 			 * current filehandle/vp to that of what was
7897 			 * returned originally.  Try our best to do
7898 			 * it.
7899 			 */
7900 			nfs_fh4_fmt_t *fh_fmtp =
7901 			    (nfs_fh4_fmt_t *)oo->ro_reply_fh.nfs_fh4_val;
7902 
7903 			cs->exi = checkexport4(&fh_fmtp->fh4_fsid,
7904 			    (fid_t *)&fh_fmtp->fh4_xlen, NULL);
7905 
7906 			if (cs->exi == NULL) {
7907 				resp->status = NFS4ERR_STALE;
7908 				goto finish;
7909 			}
7910 
7911 			VN_RELE(cs->vp);
7912 
7913 			cs->vp = nfs4_fhtovp(&oo->ro_reply_fh, cs->exi,
7914 			    &resp->status);
7915 
7916 			if (cs->vp == NULL)
7917 				goto finish;
7918 
7919 			nfs_fh4_copy(&oo->ro_reply_fh, &cs->fh);
7920 		}
7921 
7922 		/*
7923 		 * If this was a replay, no need to update the
7924 		 * sequence id. If the open_owner was not created on
7925 		 * this pass, then update.  The first use of an
7926 		 * open_owner will not bump the sequence id.
7927 		 */
7928 		if (replay == FALSE && !create)
7929 			rfs4_update_open_sequence(oo);
7930 		/*
7931 		 * If the client is receiving an error and the
7932 		 * open_owner needs to be confirmed, there is no way
7933 		 * to notify the client of this fact ignoring the fact
7934 		 * that the server has no method of returning a
7935 		 * stateid to confirm.  Therefore, the server needs to
7936 		 * mark this open_owner in a way as to avoid the
7937 		 * sequence id checking the next time the client uses
7938 		 * this open_owner.
7939 		 */
7940 		if (resp->status != NFS4_OK && oo->ro_need_confirm)
7941 			oo->ro_postpone_confirm = TRUE;
7942 		/*
7943 		 * If OK response then clear the postpone flag and
7944 		 * reset the sequence id to keep in sync with the
7945 		 * client.
7946 		 */
7947 		if (resp->status == NFS4_OK && oo->ro_postpone_confirm) {
7948 			oo->ro_postpone_confirm = FALSE;
7949 			oo->ro_open_seqid = args->seqid;
7950 		}
7951 		break;
7952 	}
7953 
7954 finish:
7955 	*cs->statusp = resp->status;
7956 
7957 	if (!rfs4_has_session(cs))
7958 		rfs4_sw_exit(&oo->ro_sw);
7959 	rfs4_openowner_rele(oo);
7960 
7961 	put_stateid4(cs, &resp->stateid);
7962 end:
7963 	DTRACE_NFSV4_2(op__open__done, struct compound_state *, cs,
7964 	    OPEN4res *, resp);
7965 }
7966 
7967 /*ARGSUSED*/
7968 void
rfs4_op_open_confirm(nfs_argop4 * argop,nfs_resop4 * resop,struct svc_req * req,struct compound_state * cs)7969 rfs4_op_open_confirm(nfs_argop4 *argop, nfs_resop4 *resop,
7970     struct svc_req *req, struct compound_state *cs)
7971 {
7972 	OPEN_CONFIRM4args *args = &argop->nfs_argop4_u.opopen_confirm;
7973 	OPEN_CONFIRM4res *resp = &resop->nfs_resop4_u.opopen_confirm;
7974 	rfs4_state_t *sp;
7975 	nfsstat4 status;
7976 
7977 	DTRACE_NFSV4_2(op__open__confirm__start, struct compound_state *, cs,
7978 	    OPEN_CONFIRM4args *, args);
7979 
7980 	ASSERT(!rfs4_has_session(cs));
7981 
7982 	if (cs->vp == NULL) {
7983 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
7984 		goto out;
7985 	}
7986 
7987 	if (cs->vp->v_type != VREG) {
7988 		*cs->statusp = resp->status =
7989 		    cs->vp->v_type == VDIR ? NFS4ERR_ISDIR : NFS4ERR_INVAL;
7990 		return;
7991 	}
7992 
7993 	status = rfs4_get_state(&args->open_stateid, &sp, RFS4_DBS_VALID);
7994 	if (status != NFS4_OK) {
7995 		*cs->statusp = resp->status = status;
7996 		goto out;
7997 	}
7998 
7999 	/* Ensure specified filehandle matches */
8000 	if (cs->vp != sp->rs_finfo->rf_vp) {
8001 		rfs4_state_rele(sp);
8002 		*cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8003 		goto out;
8004 	}
8005 
8006 	/* hold off other access to open_owner while we tinker */
8007 	rfs4_sw_enter(&sp->rs_owner->ro_sw);
8008 
8009 	switch (rfs4_check_stateid_seqid(sp, &args->open_stateid, cs)) {
8010 	case NFS4_CHECK_STATEID_OKAY:
8011 		if (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
8012 		    resop, cs) != 0) {
8013 			*cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
8014 			break;
8015 		}
8016 		/*
8017 		 * If it is the appropriate stateid and determined to
8018 		 * be "OKAY" then this means that the stateid does not
8019 		 * need to be confirmed and the client is in error for
8020 		 * sending an OPEN_CONFIRM.
8021 		 */
8022 		*cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8023 		break;
8024 	case NFS4_CHECK_STATEID_OLD:
8025 		*cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
8026 		break;
8027 	case NFS4_CHECK_STATEID_BAD:
8028 		*cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8029 		break;
8030 	case NFS4_CHECK_STATEID_EXPIRED:
8031 		*cs->statusp = resp->status = NFS4ERR_EXPIRED;
8032 		break;
8033 	case NFS4_CHECK_STATEID_CLOSED:
8034 		*cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
8035 		break;
8036 	case NFS4_CHECK_STATEID_REPLAY:
8037 		switch (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
8038 		    resop, cs)) {
8039 		case NFS4_CHKSEQ_OKAY:
8040 			/*
8041 			 * This is replayed stateid; if seqid matches
8042 			 * next expected, then client is using wrong seqid.
8043 			 */
8044 			/* fall through */
8045 		case NFS4_CHKSEQ_BAD:
8046 			*cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
8047 			break;
8048 		case NFS4_CHKSEQ_REPLAY:
8049 			/*
8050 			 * Note this case is the duplicate case so
8051 			 * resp->status is already set.
8052 			 */
8053 			*cs->statusp = resp->status;
8054 			rfs4_update_lease(sp->rs_owner->ro_client);
8055 			break;
8056 		}
8057 		break;
8058 	case NFS4_CHECK_STATEID_UNCONFIRMED:
8059 		if (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
8060 		    resop, cs) != NFS4_CHKSEQ_OKAY) {
8061 			*cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
8062 			break;
8063 		}
8064 		*cs->statusp = resp->status = NFS4_OK;
8065 
8066 		next_stateid(&sp->rs_stateid);
8067 		resp->open_stateid = sp->rs_stateid.stateid;
8068 		sp->rs_owner->ro_need_confirm = FALSE;
8069 		rfs4_update_lease(sp->rs_owner->ro_client);
8070 		rfs4_update_open_sequence(sp->rs_owner);
8071 		rfs4_update_open_resp(sp->rs_owner, resop, NULL);
8072 		break;
8073 	default:
8074 		ASSERT(FALSE);
8075 		*cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
8076 		break;
8077 	}
8078 	rfs4_sw_exit(&sp->rs_owner->ro_sw);
8079 	rfs4_state_rele(sp);
8080 
8081 out:
8082 	DTRACE_NFSV4_2(op__open__confirm__done, struct compound_state *, cs,
8083 	    OPEN_CONFIRM4res *, resp);
8084 }
8085 
8086 /*ARGSUSED*/
8087 void
rfs4_op_open_downgrade(nfs_argop4 * argop,nfs_resop4 * resop,struct svc_req * req,struct compound_state * cs)8088 rfs4_op_open_downgrade(nfs_argop4 *argop, nfs_resop4 *resop,
8089     struct svc_req *req, struct compound_state *cs)
8090 {
8091 	OPEN_DOWNGRADE4args *args = &argop->nfs_argop4_u.opopen_downgrade;
8092 	OPEN_DOWNGRADE4res *resp = &resop->nfs_resop4_u.opopen_downgrade;
8093 	uint32_t access = args->share_access;
8094 	uint32_t deny = args->share_deny;
8095 	nfsstat4 status;
8096 	rfs4_state_t *sp;
8097 	rfs4_file_t *fp;
8098 	int fflags = 0;
8099 
8100 	DTRACE_NFSV4_2(op__open__downgrade__start, struct compound_state *, cs,
8101 	    OPEN_DOWNGRADE4args *, args);
8102 
8103 	if (cs->vp == NULL) {
8104 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
8105 		goto out;
8106 	}
8107 
8108 	if (cs->vp->v_type != VREG) {
8109 		*cs->statusp = resp->status = NFS4ERR_INVAL;
8110 		return;
8111 	}
8112 
8113 	status = rfs4_get_state(&args->open_stateid, &sp, RFS4_DBS_VALID);
8114 	if (status != NFS4_OK) {
8115 		*cs->statusp = resp->status = status;
8116 		goto out;
8117 	}
8118 
8119 	/* Ensure specified filehandle matches */
8120 	if (cs->vp != sp->rs_finfo->rf_vp) {
8121 		rfs4_state_rele(sp);
8122 		*cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8123 		goto out;
8124 	}
8125 
8126 	/* hold off other access to open_owner while we tinker */
8127 	rfs4_sw_enter(&sp->rs_owner->ro_sw);
8128 
8129 	switch (rfs4_check_stateid_seqid(sp, &args->open_stateid, cs)) {
8130 	case NFS4_CHECK_STATEID_OKAY:
8131 		if (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
8132 		    resop, cs) != NFS4_CHKSEQ_OKAY) {
8133 			*cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
8134 			goto end;
8135 		}
8136 		break;
8137 	case NFS4_CHECK_STATEID_OLD:
8138 		*cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
8139 		goto end;
8140 	case NFS4_CHECK_STATEID_BAD:
8141 		*cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8142 		goto end;
8143 	case NFS4_CHECK_STATEID_EXPIRED:
8144 		*cs->statusp = resp->status = NFS4ERR_EXPIRED;
8145 		goto end;
8146 	case NFS4_CHECK_STATEID_CLOSED:
8147 		*cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
8148 		goto end;
8149 	case NFS4_CHECK_STATEID_UNCONFIRMED:
8150 		*cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8151 		goto end;
8152 	case NFS4_CHECK_STATEID_REPLAY:
8153 		ASSERT(!rfs4_has_session(cs));
8154 
8155 		/* Check the sequence id for the open owner */
8156 		switch (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
8157 		    resop, cs)) {
8158 		case NFS4_CHKSEQ_OKAY:
8159 			/*
8160 			 * This is replayed stateid; if seqid matches
8161 			 * next expected, then client is using wrong seqid.
8162 			 */
8163 			/* fall through */
8164 		case NFS4_CHKSEQ_BAD:
8165 			*cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
8166 			goto end;
8167 		case NFS4_CHKSEQ_REPLAY:
8168 			/*
8169 			 * Note this case is the duplicate case so
8170 			 * resp->status is already set.
8171 			 */
8172 			*cs->statusp = resp->status;
8173 			rfs4_update_lease(sp->rs_owner->ro_client);
8174 			goto end;
8175 		}
8176 		break;
8177 	default:
8178 		ASSERT(FALSE);
8179 		break;
8180 	}
8181 
8182 	rfs4_dbe_lock(sp->rs_dbe);
8183 	/*
8184 	 * Check that the new access modes and deny modes are valid.
8185 	 * Check that no invalid bits are set.
8186 	 */
8187 	if ((access & ~(OPEN4_SHARE_ACCESS_READ | OPEN4_SHARE_ACCESS_WRITE)) ||
8188 	    (deny & ~(OPEN4_SHARE_DENY_READ | OPEN4_SHARE_DENY_WRITE))) {
8189 		*cs->statusp = resp->status = NFS4ERR_INVAL;
8190 		rfs4_update_open_sequence(sp->rs_owner);
8191 		rfs4_dbe_unlock(sp->rs_dbe);
8192 		goto end;
8193 	}
8194 
8195 	/*
8196 	 * The new modes must be a subset of the current modes and
8197 	 * the access must specify at least one mode. To test that
8198 	 * the new mode is a subset of the current modes we bitwise
8199 	 * AND them together and check that the result equals the new
8200 	 * mode. For example:
8201 	 * New mode, access == R and current mode, sp->rs_open_access  == RW
8202 	 * access & sp->rs_open_access == R == access, so the new access mode
8203 	 * is valid. Consider access == RW, sp->rs_open_access = R
8204 	 * access & sp->rs_open_access == R != access, so the new access mode
8205 	 * is invalid.
8206 	 */
8207 	if ((access & sp->rs_open_access) != access ||
8208 	    (deny & sp->rs_open_deny) != deny ||
8209 	    (access &
8210 	    (OPEN4_SHARE_ACCESS_READ | OPEN4_SHARE_ACCESS_WRITE)) == 0) {
8211 		*cs->statusp = resp->status = NFS4ERR_INVAL;
8212 		rfs4_update_open_sequence(sp->rs_owner);
8213 		rfs4_dbe_unlock(sp->rs_dbe);
8214 		goto end;
8215 	}
8216 
8217 	/*
8218 	 * Release any share locks associated with this stateID.
8219 	 * Strictly speaking, this violates the spec because the
8220 	 * spec effectively requires that open downgrade be atomic.
8221 	 * At present, fs_shrlock does not have this capability.
8222 	 */
8223 	(void) rfs4_unshare(sp);
8224 
8225 	status = rfs4_share(sp, access, deny);
8226 	if (status != NFS4_OK) {
8227 		*cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
8228 		rfs4_update_open_sequence(sp->rs_owner);
8229 		rfs4_dbe_unlock(sp->rs_dbe);
8230 		goto end;
8231 	}
8232 
8233 	fp = sp->rs_finfo;
8234 	rfs4_dbe_lock(fp->rf_dbe);
8235 
8236 	/*
8237 	 * If the current mode has deny read and the new mode
8238 	 * does not, decrement the number of deny read mode bits
8239 	 * and if it goes to zero turn off the deny read bit
8240 	 * on the file.
8241 	 */
8242 	if ((sp->rs_open_deny & OPEN4_SHARE_DENY_READ) &&
8243 	    (deny & OPEN4_SHARE_DENY_READ) == 0) {
8244 		fp->rf_deny_read--;
8245 		if (fp->rf_deny_read == 0)
8246 			fp->rf_share_deny &= ~OPEN4_SHARE_DENY_READ;
8247 	}
8248 
8249 	/*
8250 	 * If the current mode has deny write and the new mode
8251 	 * does not, decrement the number of deny write mode bits
8252 	 * and if it goes to zero turn off the deny write bit
8253 	 * on the file.
8254 	 */
8255 	if ((sp->rs_open_deny & OPEN4_SHARE_DENY_WRITE) &&
8256 	    (deny & OPEN4_SHARE_DENY_WRITE) == 0) {
8257 		fp->rf_deny_write--;
8258 		if (fp->rf_deny_write == 0)
8259 			fp->rf_share_deny &= ~OPEN4_SHARE_DENY_WRITE;
8260 	}
8261 
8262 	/*
8263 	 * If the current mode has access read and the new mode
8264 	 * does not, decrement the number of access read mode bits
8265 	 * and if it goes to zero turn off the access read bit
8266 	 * on the file.  set fflags to FREAD for the call to
8267 	 * vn_open_downgrade().
8268 	 */
8269 	if ((sp->rs_open_access & OPEN4_SHARE_ACCESS_READ) &&
8270 	    (access & OPEN4_SHARE_ACCESS_READ) == 0) {
8271 		fp->rf_access_read--;
8272 		if (fp->rf_access_read == 0)
8273 			fp->rf_share_access &= ~OPEN4_SHARE_ACCESS_READ;
8274 		fflags |= FREAD;
8275 	}
8276 
8277 	/*
8278 	 * If the current mode has access write and the new mode
8279 	 * does not, decrement the number of access write mode bits
8280 	 * and if it goes to zero turn off the access write bit
8281 	 * on the file.  set fflags to FWRITE for the call to
8282 	 * vn_open_downgrade().
8283 	 */
8284 	if ((sp->rs_open_access & OPEN4_SHARE_ACCESS_WRITE) &&
8285 	    (access & OPEN4_SHARE_ACCESS_WRITE) == 0) {
8286 		fp->rf_access_write--;
8287 		if (fp->rf_access_write == 0)
8288 			fp->rf_share_deny &= ~OPEN4_SHARE_ACCESS_WRITE;
8289 		fflags |= FWRITE;
8290 	}
8291 
8292 	/* Check that the file is still accessible */
8293 	ASSERT(fp->rf_share_access);
8294 
8295 	rfs4_dbe_unlock(fp->rf_dbe);
8296 
8297 	/* now set the new open access and deny modes */
8298 	sp->rs_open_access = access;
8299 	sp->rs_open_deny = deny;
8300 
8301 	/*
8302 	 * we successfully downgraded the share lock, now we need to downgrade
8303 	 * the open. it is possible that the downgrade was only for a deny
8304 	 * mode and we have nothing else to do.
8305 	 */
8306 	if ((fflags & (FREAD|FWRITE)) != 0)
8307 		vn_open_downgrade(cs->vp, fflags);
8308 
8309 	/* Update the stateid */
8310 	next_stateid(&sp->rs_stateid);
8311 	resp->open_stateid = sp->rs_stateid.stateid;
8312 
8313 	rfs4_dbe_unlock(sp->rs_dbe);
8314 
8315 	*cs->statusp = resp->status = NFS4_OK;
8316 	/* Update the lease */
8317 	rfs4_update_lease(sp->rs_owner->ro_client);
8318 	/* And the sequence */
8319 	rfs4_update_open_sequence(sp->rs_owner);
8320 	rfs4_update_open_resp(sp->rs_owner, resop, NULL);
8321 
8322 end:
8323 	rfs4_sw_exit(&sp->rs_owner->ro_sw);
8324 	rfs4_state_rele(sp);
8325 out:
8326 	DTRACE_NFSV4_2(op__open__downgrade__done, struct compound_state *, cs,
8327 	    OPEN_DOWNGRADE4res *, resp);
8328 }
8329 
8330 static void *
memstr(const void * s1,const char * s2,size_t n)8331 memstr(const void *s1, const char *s2, size_t n)
8332 {
8333 	size_t l = strlen(s2);
8334 	char *p = (char *)s1;
8335 
8336 	while (n >= l) {
8337 		if (bcmp(p, s2, l) == 0)
8338 			return (p);
8339 		p++;
8340 		n--;
8341 	}
8342 
8343 	return (NULL);
8344 }
8345 
8346 /*
8347  * The logic behind this function is detailed in the NFSv4 RFC in the
8348  * SETCLIENTID operation description under IMPLEMENTATION.  Refer to
8349  * that section for explicit guidance to server behavior for
8350  * SETCLIENTID.
8351  */
8352 void
rfs4_op_setclientid(nfs_argop4 * argop,nfs_resop4 * resop,struct svc_req * req,struct compound_state * cs)8353 rfs4_op_setclientid(nfs_argop4 *argop, nfs_resop4 *resop,
8354     struct svc_req *req, struct compound_state *cs)
8355 {
8356 	SETCLIENTID4args *args = &argop->nfs_argop4_u.opsetclientid;
8357 	SETCLIENTID4res *res = &resop->nfs_resop4_u.opsetclientid;
8358 	rfs4_client_t *cp, *newcp, *cp_confirmed, *cp_unconfirmed;
8359 	rfs4_clntip_t *ci;
8360 	bool_t create;
8361 	char *addr, *netid;
8362 	int len;
8363 
8364 	DTRACE_NFSV4_2(op__setclientid__start, struct compound_state *, cs,
8365 	    SETCLIENTID4args *, args);
8366 retry:
8367 	newcp = cp_confirmed = cp_unconfirmed = NULL;
8368 
8369 	/*
8370 	 * Save the caller's IP address
8371 	 */
8372 	args->client.cl_addr =
8373 	    (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
8374 
8375 	/*
8376 	 * Record if it is a Solaris client that cannot handle referrals.
8377 	 */
8378 	if (memstr(args->client.id_val, "Solaris", args->client.id_len) &&
8379 	    !memstr(args->client.id_val, "+referrals", args->client.id_len)) {
8380 		/* Add a "yes, it's downrev" record */
8381 		create = TRUE;
8382 		ci = rfs4_find_clntip(args->client.cl_addr, &create);
8383 		ASSERT(ci != NULL);
8384 		rfs4_dbe_rele(ci->ri_dbe);
8385 	} else {
8386 		/* Remove any previous record */
8387 		rfs4_invalidate_clntip(args->client.cl_addr);
8388 	}
8389 
8390 	/*
8391 	 * In search of an EXISTING client matching the incoming
8392 	 * request to establish a new client identifier at the server
8393 	 */
8394 	create = TRUE;
8395 	cp = rfs4_findclient(&args->client, &create, NULL);
8396 
8397 	/* Should never happen */
8398 	ASSERT(cp != NULL);
8399 
8400 	if (cp == NULL) {
8401 		*cs->statusp = res->status = NFS4ERR_SERVERFAULT;
8402 		goto out;
8403 	}
8404 
8405 	/*
8406 	 * Easiest case. Client identifier is newly created and is
8407 	 * unconfirmed.  Also note that for this case, no other
8408 	 * entries exist for the client identifier.  Nothing else to
8409 	 * check.  Just setup the response and respond.
8410 	 */
8411 	if (create) {
8412 		*cs->statusp = res->status = NFS4_OK;
8413 		res->SETCLIENTID4res_u.resok4.clientid = cp->rc_clientid;
8414 		res->SETCLIENTID4res_u.resok4.setclientid_confirm =
8415 		    cp->rc_confirm_verf;
8416 		/* Setup callback information; CB_NULL confirmation later */
8417 		rfs4_client_setcb(cp, &args->callback, args->callback_ident);
8418 
8419 		rfs4_client_rele(cp);
8420 		goto out;
8421 	}
8422 
8423 	/*
8424 	 * An existing, confirmed client may exist but it may not have
8425 	 * been active for at least one lease period.  If so, then
8426 	 * "close" the client and create a new client identifier
8427 	 */
8428 	if (rfs4_lease_expired(cp)) {
8429 		rfs4_client_close(cp);
8430 		goto retry;
8431 	}
8432 
8433 	if (cp->rc_need_confirm == TRUE)
8434 		cp_unconfirmed = cp;
8435 	else
8436 		cp_confirmed = cp;
8437 
8438 	cp = NULL;
8439 
8440 	/*
8441 	 * We have a confirmed client, now check for an
8442 	 * unconfimred entry
8443 	 */
8444 	if (cp_confirmed) {
8445 		/* If creds don't match then client identifier is inuse */
8446 		if (!creds_ok(&cp_confirmed->rc_cr_set, req, cs)) {
8447 			rfs4_cbinfo_t *cbp;
8448 			/*
8449 			 * Some one else has established this client
8450 			 * id. Try and say * who they are. We will use
8451 			 * the call back address supplied by * the
8452 			 * first client.
8453 			 */
8454 			*cs->statusp = res->status = NFS4ERR_CLID_INUSE;
8455 
8456 			addr = netid = NULL;
8457 
8458 			cbp = &cp_confirmed->rc_cbinfo;
8459 			if (cbp->cb_callback.cb_location.r_addr &&
8460 			    cbp->cb_callback.cb_location.r_netid) {
8461 				cb_client4 *cbcp = &cbp->cb_callback;
8462 
8463 				len = strlen(cbcp->cb_location.r_addr)+1;
8464 				addr = kmem_alloc(len, KM_SLEEP);
8465 				bcopy(cbcp->cb_location.r_addr, addr, len);
8466 				len = strlen(cbcp->cb_location.r_netid)+1;
8467 				netid = kmem_alloc(len, KM_SLEEP);
8468 				bcopy(cbcp->cb_location.r_netid, netid, len);
8469 			}
8470 
8471 			res->SETCLIENTID4res_u.client_using.r_addr = addr;
8472 			res->SETCLIENTID4res_u.client_using.r_netid = netid;
8473 
8474 			rfs4_client_rele(cp_confirmed);
8475 		}
8476 
8477 		/*
8478 		 * Confirmed, creds match, and verifier matches; must
8479 		 * be an update of the callback info
8480 		 */
8481 		if (cp_confirmed->rc_nfs_client.verifier ==
8482 		    args->client.verifier) {
8483 			/* Setup callback information */
8484 			rfs4_client_setcb(cp_confirmed, &args->callback,
8485 			    args->callback_ident);
8486 
8487 			/* everything okay -- move ahead */
8488 			*cs->statusp = res->status = NFS4_OK;
8489 			res->SETCLIENTID4res_u.resok4.clientid =
8490 			    cp_confirmed->rc_clientid;
8491 
8492 			/* update the confirm_verifier and return it */
8493 			rfs4_client_scv_next(cp_confirmed);
8494 			res->SETCLIENTID4res_u.resok4.setclientid_confirm =
8495 			    cp_confirmed->rc_confirm_verf;
8496 
8497 			rfs4_client_rele(cp_confirmed);
8498 			goto out;
8499 		}
8500 
8501 		/*
8502 		 * Creds match but the verifier doesn't.  Must search
8503 		 * for an unconfirmed client that would be replaced by
8504 		 * this request.
8505 		 */
8506 		create = FALSE;
8507 		cp_unconfirmed = rfs4_findclient(&args->client, &create,
8508 		    cp_confirmed);
8509 	}
8510 
8511 	/*
8512 	 * At this point, we have taken care of the brand new client
8513 	 * struct, INUSE case, update of an existing, and confirmed
8514 	 * client struct.
8515 	 */
8516 
8517 	/*
8518 	 * check to see if things have changed while we originally
8519 	 * picked up the client struct.  If they have, then return and
8520 	 * retry the processing of this SETCLIENTID request.
8521 	 */
8522 	if (cp_unconfirmed) {
8523 		rfs4_dbe_lock(cp_unconfirmed->rc_dbe);
8524 		if (!cp_unconfirmed->rc_need_confirm) {
8525 			rfs4_dbe_unlock(cp_unconfirmed->rc_dbe);
8526 			rfs4_client_rele(cp_unconfirmed);
8527 			if (cp_confirmed)
8528 				rfs4_client_rele(cp_confirmed);
8529 			goto retry;
8530 		}
8531 		/* do away with the old unconfirmed one */
8532 		rfs4_dbe_invalidate(cp_unconfirmed->rc_dbe);
8533 		rfs4_dbe_unlock(cp_unconfirmed->rc_dbe);
8534 		rfs4_client_rele(cp_unconfirmed);
8535 		cp_unconfirmed = NULL;
8536 	}
8537 
8538 	/*
8539 	 * This search will temporarily hide the confirmed client
8540 	 * struct while a new client struct is created as the
8541 	 * unconfirmed one.
8542 	 */
8543 	create = TRUE;
8544 	newcp = rfs4_findclient(&args->client, &create, cp_confirmed);
8545 
8546 	ASSERT(newcp != NULL);
8547 
8548 	if (newcp == NULL) {
8549 		*cs->statusp = res->status = NFS4ERR_SERVERFAULT;
8550 		rfs4_client_rele(cp_confirmed);
8551 		goto out;
8552 	}
8553 
8554 	/*
8555 	 * If one was not created, then a similar request must be in
8556 	 * process so release and start over with this one
8557 	 */
8558 	if (create != TRUE) {
8559 		rfs4_client_rele(newcp);
8560 		if (cp_confirmed)
8561 			rfs4_client_rele(cp_confirmed);
8562 		goto retry;
8563 	}
8564 
8565 	*cs->statusp = res->status = NFS4_OK;
8566 	res->SETCLIENTID4res_u.resok4.clientid = newcp->rc_clientid;
8567 	res->SETCLIENTID4res_u.resok4.setclientid_confirm =
8568 	    newcp->rc_confirm_verf;
8569 	/* Setup callback information; CB_NULL confirmation later */
8570 	rfs4_client_setcb(newcp, &args->callback, args->callback_ident);
8571 
8572 	newcp->rc_cp_confirmed = cp_confirmed;
8573 
8574 	rfs4_client_rele(newcp);
8575 
8576 out:
8577 	DTRACE_NFSV4_2(op__setclientid__done, struct compound_state *, cs,
8578 	    SETCLIENTID4res *, res);
8579 }
8580 
8581 /*ARGSUSED*/
8582 void
rfs4_op_setclientid_confirm(nfs_argop4 * argop,nfs_resop4 * resop,struct svc_req * req,struct compound_state * cs)8583 rfs4_op_setclientid_confirm(nfs_argop4 *argop, nfs_resop4 *resop,
8584     struct svc_req *req, struct compound_state *cs)
8585 {
8586 	SETCLIENTID_CONFIRM4args *args =
8587 	    &argop->nfs_argop4_u.opsetclientid_confirm;
8588 	SETCLIENTID_CONFIRM4res *res =
8589 	    &resop->nfs_resop4_u.opsetclientid_confirm;
8590 	rfs4_client_t *cp, *cptoclose = NULL;
8591 	nfs4_srv_t *nsrv4;
8592 
8593 	DTRACE_NFSV4_2(op__setclientid__confirm__start,
8594 	    struct compound_state *, cs,
8595 	    SETCLIENTID_CONFIRM4args *, args);
8596 
8597 	nsrv4 = nfs4_get_srv();
8598 	*cs->statusp = res->status = NFS4_OK;
8599 
8600 	cp = rfs4_findclient_by_id(args->clientid, TRUE);
8601 
8602 	if (cp == NULL) {
8603 		*cs->statusp = res->status =
8604 		    rfs4_check_clientid(&args->clientid, 1);
8605 		goto out;
8606 	}
8607 
8608 	if (!creds_ok(&cp->rc_cr_set, req, cs)) {
8609 		*cs->statusp = res->status = NFS4ERR_CLID_INUSE;
8610 		rfs4_client_rele(cp);
8611 		goto out;
8612 	}
8613 
8614 	/* If the verifier doesn't match, the record doesn't match */
8615 	if (cp->rc_confirm_verf != args->setclientid_confirm) {
8616 		*cs->statusp = res->status = NFS4ERR_STALE_CLIENTID;
8617 		rfs4_client_rele(cp);
8618 		goto out;
8619 	}
8620 
8621 	rfs4_dbe_lock(cp->rc_dbe);
8622 	cp->rc_need_confirm = FALSE;
8623 	if (cp->rc_cp_confirmed) {
8624 		cptoclose = cp->rc_cp_confirmed;
8625 		cptoclose->rc_ss_remove = 1;
8626 		cp->rc_cp_confirmed = NULL;
8627 	}
8628 
8629 	/*
8630 	 * Update the client's associated server instance, if it's changed
8631 	 * since the client was created.
8632 	 */
8633 	if (rfs4_servinst(cp) != nsrv4->nfs4_cur_servinst)
8634 		rfs4_servinst_assign(nsrv4, cp, nsrv4->nfs4_cur_servinst);
8635 
8636 	/*
8637 	 * Record clientid in stable storage.
8638 	 * Must be done after server instance has been assigned.
8639 	 */
8640 	rfs4_ss_clid(nsrv4, cp);
8641 
8642 	rfs4_dbe_unlock(cp->rc_dbe);
8643 
8644 	if (cptoclose)
8645 		/* don't need to rele, client_close does it */
8646 		rfs4_client_close(cptoclose);
8647 
8648 	/* If needed, initiate CB_NULL call for callback path */
8649 	rfs4_deleg_cb_check(cp);
8650 	rfs4_update_lease(cp);
8651 
8652 	/*
8653 	 * Check to see if client can perform reclaims
8654 	 */
8655 	rfs4_ss_chkclid(nsrv4, cp);
8656 
8657 	rfs4_client_rele(cp);
8658 
8659 out:
8660 	DTRACE_NFSV4_2(op__setclientid__confirm__done,
8661 	    struct compound_state *, cs,
8662 	    SETCLIENTID_CONFIRM4 *, res);
8663 }
8664 
8665 extern stateid4 invalid_stateid;
8666 
8667 /*ARGSUSED*/
8668 void
rfs4_op_close(nfs_argop4 * argop,nfs_resop4 * resop,struct svc_req * req,struct compound_state * cs)8669 rfs4_op_close(nfs_argop4 *argop, nfs_resop4 *resop,
8670     struct svc_req *req, struct compound_state *cs)
8671 {
8672 	CLOSE4args *args = &argop->nfs_argop4_u.opclose;
8673 	CLOSE4res *resp = &resop->nfs_resop4_u.opclose;
8674 	rfs4_state_t *sp;
8675 	nfsstat4 status;
8676 
8677 	DTRACE_NFSV4_2(op__close__start, struct compound_state *, cs,
8678 	    CLOSE4args *, args);
8679 
8680 	if (cs->vp == NULL) {
8681 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
8682 		goto out;
8683 	}
8684 
8685 	get_stateid4(cs, &args->open_stateid);
8686 
8687 	status = rfs4_get_state(&args->open_stateid, &sp, RFS4_DBS_INVALID);
8688 	if (status != NFS4_OK) {
8689 		*cs->statusp = resp->status = status;
8690 		goto out;
8691 	}
8692 
8693 	/* Ensure specified filehandle matches */
8694 	if (cs->vp != sp->rs_finfo->rf_vp) {
8695 		rfs4_state_rele(sp);
8696 		*cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8697 		goto out;
8698 	}
8699 
8700 	/* hold off other access to open_owner while we tinker */
8701 	rfs4_sw_enter(&sp->rs_owner->ro_sw);
8702 
8703 	switch (rfs4_check_stateid_seqid(sp, &args->open_stateid, cs)) {
8704 	case NFS4_CHECK_STATEID_OKAY:
8705 		if (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
8706 		    resop, cs) != NFS4_CHKSEQ_OKAY) {
8707 			*cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
8708 			goto end;
8709 		}
8710 		break;
8711 	case NFS4_CHECK_STATEID_OLD:
8712 		*cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
8713 		goto end;
8714 	case NFS4_CHECK_STATEID_BAD:
8715 		*cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8716 		goto end;
8717 	case NFS4_CHECK_STATEID_EXPIRED:
8718 		*cs->statusp = resp->status = NFS4ERR_EXPIRED;
8719 		goto end;
8720 	case NFS4_CHECK_STATEID_CLOSED:
8721 		*cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
8722 		goto end;
8723 	case NFS4_CHECK_STATEID_UNCONFIRMED:
8724 		*cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8725 		goto end;
8726 	case NFS4_CHECK_STATEID_REPLAY:
8727 		ASSERT(!rfs4_has_session(cs));
8728 
8729 		/* Check the sequence id for the open owner */
8730 		switch (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
8731 		    resop, cs)) {
8732 		case NFS4_CHKSEQ_OKAY:
8733 			/*
8734 			 * This is replayed stateid; if seqid matches
8735 			 * next expected, then client is using wrong seqid.
8736 			 */
8737 			/* FALL THROUGH */
8738 		case NFS4_CHKSEQ_BAD:
8739 			*cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
8740 			goto end;
8741 		case NFS4_CHKSEQ_REPLAY:
8742 			/*
8743 			 * Note this case is the duplicate case so
8744 			 * resp->status is already set.
8745 			 */
8746 			*cs->statusp = resp->status;
8747 			rfs4_update_lease(sp->rs_owner->ro_client);
8748 			goto end;
8749 		}
8750 		break;
8751 	default:
8752 		ASSERT(FALSE);
8753 		break;
8754 	}
8755 
8756 	rfs4_dbe_lock(sp->rs_dbe);
8757 
8758 	/* Update the stateid. */
8759 	next_stateid(&sp->rs_stateid);
8760 	rfs4_dbe_unlock(sp->rs_dbe);
8761 
8762 	rfs4_update_lease(sp->rs_owner->ro_client);
8763 	rfs4_update_open_sequence(sp->rs_owner);
8764 	rfs4_update_open_resp(sp->rs_owner, resop, NULL);
8765 
8766 	rfs4_state_close(sp, FALSE, FALSE, cs->cr);
8767 
8768 	/* See RFC8881 section 18.2.4, and RFC7530 section 16.2.5 */
8769 	resp->open_stateid = invalid_stateid;
8770 	*cs->statusp = resp->status = status;
8771 
8772 end:
8773 	rfs4_sw_exit(&sp->rs_owner->ro_sw);
8774 	rfs4_state_rele(sp);
8775 out:
8776 	DTRACE_NFSV4_2(op__close__done, struct compound_state *, cs,
8777 	    CLOSE4res *, resp);
8778 }
8779 
8780 /*
8781  * Manage the counts on the file struct and close all file locks
8782  */
8783 /*ARGSUSED*/
8784 void
rfs4_release_share_lock_state(rfs4_state_t * sp,cred_t * cr,bool_t close_of_client)8785 rfs4_release_share_lock_state(rfs4_state_t *sp, cred_t *cr,
8786     bool_t close_of_client)
8787 {
8788 	rfs4_file_t *fp = sp->rs_finfo;
8789 	rfs4_lo_state_t *lsp;
8790 	int fflags = 0;
8791 
8792 	/*
8793 	 * If this call is part of the larger closing down of client
8794 	 * state then it is just easier to release all locks
8795 	 * associated with this client instead of going through each
8796 	 * individual file and cleaning locks there.
8797 	 */
8798 	if (close_of_client) {
8799 		if (sp->rs_owner->ro_client->rc_unlksys_completed == FALSE &&
8800 		    !list_is_empty(&sp->rs_lostatelist) &&
8801 		    sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID) {
8802 			/* Is the PxFS kernel module loaded? */
8803 			if (lm_remove_file_locks != NULL) {
8804 				int new_sysid;
8805 
8806 				/* Encode the cluster nodeid in new sysid */
8807 				new_sysid = sp->rs_owner->ro_client->rc_sysidt;
8808 				lm_set_nlmid_flk(&new_sysid);
8809 
8810 				/*
8811 				 * This PxFS routine removes file locks for a
8812 				 * client over all nodes of a cluster.
8813 				 */
8814 				NFS4_DEBUG(rfs4_debug, (CE_NOTE,
8815 				    "lm_remove_file_locks(sysid=0x%x)\n",
8816 				    new_sysid));
8817 				(*lm_remove_file_locks)(new_sysid);
8818 			} else {
8819 				struct flock64 flk;
8820 
8821 				/* Release all locks for this client */
8822 				flk.l_type = F_UNLKSYS;
8823 				flk.l_whence = 0;
8824 				flk.l_start = 0;
8825 				flk.l_len = 0;
8826 				flk.l_sysid =
8827 				    sp->rs_owner->ro_client->rc_sysidt;
8828 				flk.l_pid = 0;
8829 				(void) VOP_FRLOCK(sp->rs_finfo->rf_vp, F_SETLK,
8830 				    &flk, F_REMOTELOCK | FREAD | FWRITE,
8831 				    (u_offset_t)0, NULL, CRED(), NULL);
8832 			}
8833 
8834 			sp->rs_owner->ro_client->rc_unlksys_completed = TRUE;
8835 		}
8836 	}
8837 
8838 	/*
8839 	 * Release all locks on this file by this lock owner or at
8840 	 * least mark the locks as having been released
8841 	 */
8842 	for (lsp = list_head(&sp->rs_lostatelist); lsp != NULL;
8843 	    lsp = list_next(&sp->rs_lostatelist, lsp)) {
8844 		lsp->rls_locks_cleaned = TRUE;
8845 
8846 		/* Was this already taken care of above? */
8847 		if (!close_of_client &&
8848 		    sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID)
8849 			(void) cleanlocks(sp->rs_finfo->rf_vp,
8850 			    lsp->rls_locker->rl_pid,
8851 			    lsp->rls_locker->rl_client->rc_sysidt);
8852 	}
8853 
8854 	/*
8855 	 * Release any shrlocks associated with this open state ID.
8856 	 * This must be done before the rfs4_state gets marked closed.
8857 	 */
8858 	if (sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID)
8859 		(void) rfs4_unshare(sp);
8860 
8861 	if (sp->rs_open_access) {
8862 		rfs4_dbe_lock(fp->rf_dbe);
8863 
8864 		/*
8865 		 * Decrement the count for each access and deny bit that this
8866 		 * state has contributed to the file.
8867 		 * If the file counts go to zero
8868 		 * clear the appropriate bit in the appropriate mask.
8869 		 */
8870 		if (sp->rs_open_access & OPEN4_SHARE_ACCESS_READ) {
8871 			fp->rf_access_read--;
8872 			fflags |= FREAD;
8873 			if (fp->rf_access_read == 0)
8874 				fp->rf_share_access &= ~OPEN4_SHARE_ACCESS_READ;
8875 		}
8876 		if (sp->rs_open_access & OPEN4_SHARE_ACCESS_WRITE) {
8877 			fp->rf_access_write--;
8878 			fflags |= FWRITE;
8879 			if (fp->rf_access_write == 0)
8880 				fp->rf_share_access &=
8881 				    ~OPEN4_SHARE_ACCESS_WRITE;
8882 		}
8883 		if (sp->rs_open_deny & OPEN4_SHARE_DENY_READ) {
8884 			fp->rf_deny_read--;
8885 			if (fp->rf_deny_read == 0)
8886 				fp->rf_share_deny &= ~OPEN4_SHARE_DENY_READ;
8887 		}
8888 		if (sp->rs_open_deny & OPEN4_SHARE_DENY_WRITE) {
8889 			fp->rf_deny_write--;
8890 			if (fp->rf_deny_write == 0)
8891 				fp->rf_share_deny &= ~OPEN4_SHARE_DENY_WRITE;
8892 		}
8893 
8894 		(void) VOP_CLOSE(fp->rf_vp, fflags, 1, (offset_t)0, cr, NULL);
8895 
8896 		rfs4_dbe_unlock(fp->rf_dbe);
8897 
8898 		sp->rs_open_access = 0;
8899 		sp->rs_open_deny = 0;
8900 	}
8901 }
8902 
8903 /*
8904  * lock_denied: Fill in a LOCK4deneid structure given an flock64 structure.
8905  */
8906 static nfsstat4
lock_denied(LOCK4denied * dp,struct flock64 * flk)8907 lock_denied(LOCK4denied *dp, struct flock64 *flk)
8908 {
8909 	rfs4_lockowner_t *lo;
8910 	rfs4_client_t *cp;
8911 	uint32_t len;
8912 
8913 	lo = rfs4_findlockowner_by_pid(flk->l_pid);
8914 	if (lo != NULL) {
8915 		cp = lo->rl_client;
8916 		if (rfs4_lease_expired(cp)) {
8917 			rfs4_lockowner_rele(lo);
8918 			rfs4_dbe_hold(cp->rc_dbe);
8919 			rfs4_client_close(cp);
8920 			return (NFS4ERR_EXPIRED);
8921 		}
8922 		dp->owner.clientid = lo->rl_owner.clientid;
8923 		len = lo->rl_owner.owner_len;
8924 		dp->owner.owner_val = kmem_alloc(len, KM_SLEEP);
8925 		bcopy(lo->rl_owner.owner_val, dp->owner.owner_val, len);
8926 		dp->owner.owner_len = len;
8927 		rfs4_lockowner_rele(lo);
8928 		goto finish;
8929 	}
8930 
8931 	/*
8932 	 * Its not a NFS4 lock. We take advantage that the upper 32 bits
8933 	 * of the client id contain the boot time for a NFS4 lock. So we
8934 	 * fabricate and identity by setting clientid to the sysid, and
8935 	 * the lock owner to the pid.
8936 	 */
8937 	dp->owner.clientid = flk->l_sysid;
8938 	len = sizeof (pid_t);
8939 	dp->owner.owner_len = len;
8940 	dp->owner.owner_val = kmem_alloc(len, KM_SLEEP);
8941 	bcopy(&flk->l_pid, dp->owner.owner_val, len);
8942 finish:
8943 	dp->offset = flk->l_start;
8944 	dp->length = flk->l_len;
8945 
8946 	if (flk->l_type == F_RDLCK)
8947 		dp->locktype = READ_LT;
8948 	else if (flk->l_type == F_WRLCK)
8949 		dp->locktype = WRITE_LT;
8950 	else
8951 		return (NFS4ERR_INVAL);	/* no mapping from POSIX ltype to v4 */
8952 
8953 	return (NFS4_OK);
8954 }
8955 
8956 /*
8957  * The NFSv4.0 LOCK operation does not support the blocking lock (at the
8958  * NFSv4.0 protocol level) so the client needs to resend the LOCK request in a
8959  * case the lock is denied by the NFSv4.0 server.  NFSv4.0 clients are prepared
8960  * for that (obviously); they are sending the LOCK requests with some delays
8961  * between the attempts.  See nfs4frlock() and nfs4_block_and_wait() for the
8962  * locking and delay implementation at the client side.
8963  *
8964  * To make the life of the clients easier, the NFSv4.0 server tries to do some
8965  * fast retries on its own (the for loop below) in a hope the lock will be
8966  * available soon.  And if not, the client won't need to resend the LOCK
8967  * requests so fast to check the lock availability.  This basically saves some
8968  * network traffic and tries to make sure the client gets the lock ASAP.
8969  */
8970 static int
setlock(vnode_t * vp,struct flock64 * flock,int flag,cred_t * cred)8971 setlock(vnode_t *vp, struct flock64 *flock, int flag, cred_t *cred)
8972 {
8973 	int error;
8974 	struct flock64 flk;
8975 	int i;
8976 	clock_t delaytime;
8977 	int cmd;
8978 	int spin_cnt = 0;
8979 
8980 	cmd = nbl_need_check(vp) ? F_SETLK_NBMAND : F_SETLK;
8981 retry:
8982 	delaytime = MSEC_TO_TICK_ROUNDUP(rfs4_lock_delay);
8983 
8984 	for (i = 0; i < rfs4_maxlock_tries; i++) {
8985 		LOCK_PRINT(rfs4_debug, "setlock", cmd, flock);
8986 		error = VOP_FRLOCK(vp, cmd,
8987 		    flock, flag, (u_offset_t)0, NULL, cred, NULL);
8988 
8989 		if (error != EAGAIN && error != EACCES)
8990 			break;
8991 
8992 		if (i < rfs4_maxlock_tries - 1) {
8993 			delay(delaytime);
8994 			delaytime *= 2;
8995 		}
8996 	}
8997 
8998 	if (error == EAGAIN || error == EACCES) {
8999 		/* Get the owner of the lock */
9000 		flk = *flock;
9001 		LOCK_PRINT(rfs4_debug, "setlock", F_GETLK, &flk);
9002 		if (VOP_FRLOCK(vp, F_GETLK, &flk, flag, 0, NULL, cred,
9003 		    NULL) == 0) {
9004 			/*
9005 			 * There's a race inherent in the current VOP_FRLOCK
9006 			 * design where:
9007 			 * a: "other guy" takes a lock that conflicts with a
9008 			 * lock we want
9009 			 * b: we attempt to take our lock (non-blocking) and
9010 			 * the attempt fails.
9011 			 * c: "other guy" releases the conflicting lock
9012 			 * d: we ask what lock conflicts with the lock we want,
9013 			 * getting F_UNLCK (no lock blocks us)
9014 			 *
9015 			 * If we retry the non-blocking lock attempt in this
9016 			 * case (restart at step 'b') there's some possibility
9017 			 * that many such attempts might fail.  However a test
9018 			 * designed to actually provoke this race shows that
9019 			 * the vast majority of cases require no retry, and
9020 			 * only a few took as many as three retries.  Here's
9021 			 * the test outcome:
9022 			 *
9023 			 *	   number of retries    how many times we needed
9024 			 *				that many retries
9025 			 *	   0			79461
9026 			 *	   1			  862
9027 			 *	   2			   49
9028 			 *	   3			    5
9029 			 *
9030 			 * Given those empirical results, we arbitrarily limit
9031 			 * the retry count to ten.
9032 			 *
9033 			 * If we actually make to ten retries and give up,
9034 			 * nothing catastrophic happens, but we're unable to
9035 			 * return the information about the conflicting lock to
9036 			 * the NFS client.  That's an acceptable trade off vs.
9037 			 * letting this retry loop run forever.
9038 			 */
9039 			if (flk.l_type == F_UNLCK) {
9040 				if (spin_cnt++ < 10) {
9041 					/* No longer locked, retry */
9042 					goto retry;
9043 				}
9044 			} else {
9045 				*flock = flk;
9046 				LOCK_PRINT(rfs4_debug, "setlock(blocking lock)",
9047 				    F_GETLK, &flk);
9048 			}
9049 		}
9050 	}
9051 
9052 	return (error);
9053 }
9054 
9055 /*ARGSUSED*/
9056 static nfsstat4
rfs4_do_lock(rfs4_lo_state_t * lsp,nfs_lock_type4 locktype,offset4 offset,length4 length,cred_t * cred,nfs_resop4 * resop)9057 rfs4_do_lock(rfs4_lo_state_t *lsp, nfs_lock_type4 locktype,
9058     offset4 offset, length4 length, cred_t *cred, nfs_resop4 *resop)
9059 {
9060 	nfsstat4 status;
9061 	rfs4_lockowner_t *lo = lsp->rls_locker;
9062 	rfs4_state_t *sp = lsp->rls_state;
9063 	struct flock64 flock;
9064 	int16_t ltype;
9065 	int flag;
9066 	int error;
9067 	sysid_t sysid;
9068 	LOCK4res *lres;
9069 	vnode_t *vp;
9070 
9071 	if (rfs4_lease_expired(lo->rl_client)) {
9072 		return (NFS4ERR_EXPIRED);
9073 	}
9074 
9075 	if ((status = rfs4_client_sysid(lo->rl_client, &sysid)) != NFS4_OK)
9076 		return (status);
9077 
9078 	/* Check for zero length. To lock to end of file use all ones for V4 */
9079 	if (length == 0)
9080 		return (NFS4ERR_INVAL);
9081 	else if (length == (length4)(~0))
9082 		length = 0;		/* Posix to end of file  */
9083 
9084 retry:
9085 	rfs4_dbe_lock(sp->rs_dbe);
9086 	if (sp->rs_closed == TRUE) {
9087 		rfs4_dbe_unlock(sp->rs_dbe);
9088 		return (NFS4ERR_OLD_STATEID);
9089 	}
9090 
9091 	if (resop->resop != OP_LOCKU) {
9092 		switch (locktype) {
9093 		case READ_LT:
9094 		case READW_LT:
9095 			if ((sp->rs_share_access
9096 			    & OPEN4_SHARE_ACCESS_READ) == 0) {
9097 				rfs4_dbe_unlock(sp->rs_dbe);
9098 
9099 				return (NFS4ERR_OPENMODE);
9100 			}
9101 			ltype = F_RDLCK;
9102 			break;
9103 		case WRITE_LT:
9104 		case WRITEW_LT:
9105 			if ((sp->rs_share_access
9106 			    & OPEN4_SHARE_ACCESS_WRITE) == 0) {
9107 				rfs4_dbe_unlock(sp->rs_dbe);
9108 
9109 				return (NFS4ERR_OPENMODE);
9110 			}
9111 			ltype = F_WRLCK;
9112 			break;
9113 		}
9114 	} else
9115 		ltype = F_UNLCK;
9116 
9117 	flock.l_type = ltype;
9118 	flock.l_whence = 0;		/* SEEK_SET */
9119 	flock.l_start = offset;
9120 	flock.l_len = length;
9121 	flock.l_sysid = sysid;
9122 	flock.l_pid = lsp->rls_locker->rl_pid;
9123 
9124 	/* Note that length4 is uint64_t but l_len and l_start are off64_t */
9125 	if (flock.l_len < 0 || flock.l_start < 0) {
9126 		rfs4_dbe_unlock(sp->rs_dbe);
9127 		return (NFS4ERR_INVAL);
9128 	}
9129 
9130 	/*
9131 	 * N.B. FREAD has the same value as OPEN4_SHARE_ACCESS_READ and
9132 	 * FWRITE has the same value as OPEN4_SHARE_ACCESS_WRITE.
9133 	 */
9134 	flag = (int)sp->rs_share_access | F_REMOTELOCK;
9135 
9136 	vp = sp->rs_finfo->rf_vp;
9137 	VN_HOLD(vp);
9138 
9139 	/*
9140 	 * We need to unlock sp before we call the underlying filesystem to
9141 	 * acquire the file lock.
9142 	 */
9143 	rfs4_dbe_unlock(sp->rs_dbe);
9144 
9145 	error = setlock(vp, &flock, flag, cred);
9146 
9147 	/*
9148 	 * Make sure the file is still open.  In a case the file was closed in
9149 	 * the meantime, clean the lock we acquired using the setlock() call
9150 	 * above, and return the appropriate error.
9151 	 */
9152 	rfs4_dbe_lock(sp->rs_dbe);
9153 	if (sp->rs_closed == TRUE) {
9154 		cleanlocks(vp, lsp->rls_locker->rl_pid, sysid);
9155 		rfs4_dbe_unlock(sp->rs_dbe);
9156 
9157 		VN_RELE(vp);
9158 
9159 		return (NFS4ERR_OLD_STATEID);
9160 	}
9161 	rfs4_dbe_unlock(sp->rs_dbe);
9162 
9163 	VN_RELE(vp);
9164 
9165 	if (error == 0) {
9166 		rfs4_dbe_lock(lsp->rls_dbe);
9167 		next_stateid(&lsp->rls_lockid);
9168 		rfs4_dbe_unlock(lsp->rls_dbe);
9169 	}
9170 
9171 	/*
9172 	 * N.B. We map error values to nfsv4 errors. This is differrent
9173 	 * than puterrno4 routine.
9174 	 */
9175 	switch (error) {
9176 	case 0:
9177 		status = NFS4_OK;
9178 		break;
9179 	case EAGAIN:
9180 	case EACCES:		/* Old value */
9181 		/* Can only get here if op is OP_LOCK */
9182 		ASSERT(resop->resop == OP_LOCK);
9183 		lres = &resop->nfs_resop4_u.oplock;
9184 		status = NFS4ERR_DENIED;
9185 		if (lock_denied(&lres->LOCK4res_u.denied, &flock)
9186 		    == NFS4ERR_EXPIRED)
9187 			goto retry;
9188 		break;
9189 	case ENOLCK:
9190 		status = NFS4ERR_DELAY;
9191 		break;
9192 	case EOVERFLOW:
9193 		status = NFS4ERR_INVAL;
9194 		break;
9195 	case EINVAL:
9196 		status = NFS4ERR_NOTSUPP;
9197 		break;
9198 	default:
9199 		status = NFS4ERR_SERVERFAULT;
9200 		break;
9201 	}
9202 
9203 	return (status);
9204 }
9205 
9206 /*ARGSUSED*/
9207 void
rfs4_op_lock(nfs_argop4 * argop,nfs_resop4 * resop,struct svc_req * req,struct compound_state * cs)9208 rfs4_op_lock(nfs_argop4 *argop, nfs_resop4 *resop,
9209     struct svc_req *req, struct compound_state *cs)
9210 {
9211 	LOCK4args *args = &argop->nfs_argop4_u.oplock;
9212 	LOCK4res *resp = &resop->nfs_resop4_u.oplock;
9213 	nfsstat4 status;
9214 	stateid4 *stateid;
9215 	rfs4_lockowner_t *lo;
9216 	rfs4_client_t *cp;
9217 	rfs4_state_t *sp = NULL;
9218 	rfs4_lo_state_t *lsp = NULL;
9219 	bool_t ls_sw_held = FALSE;
9220 	bool_t create = TRUE;
9221 	bool_t lcreate = TRUE;
9222 	bool_t dup_lock = FALSE;
9223 	int rc;
9224 
9225 	DTRACE_NFSV4_2(op__lock__start, struct compound_state *, cs,
9226 	    LOCK4args *, args);
9227 
9228 	if (cs->vp == NULL) {
9229 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
9230 		DTRACE_NFSV4_2(op__lock__done, struct compound_state *,
9231 		    cs, LOCK4res *, resp);
9232 		return;
9233 	}
9234 
9235 	if (args->locker.new_lock_owner) {
9236 		/* Create a new lockowner for this instance */
9237 		open_to_lock_owner4 *olo = &args->locker.locker4_u.open_owner;
9238 
9239 		NFS4_DEBUG(rfs4_debug, (CE_NOTE, "Creating new lock owner"));
9240 
9241 		stateid = &olo->open_stateid;
9242 		get_stateid4(cs, stateid);
9243 		status = rfs4_get_state(stateid, &sp, RFS4_DBS_VALID);
9244 		if (status != NFS4_OK) {
9245 			NFS4_DEBUG(rfs4_debug,
9246 			    (CE_NOTE, "Get state failed in lock %d", status));
9247 			*cs->statusp = resp->status = status;
9248 			DTRACE_NFSV4_2(op__lock__done, struct compound_state *,
9249 			    cs, LOCK4res *, resp);
9250 			return;
9251 		}
9252 
9253 		/* Ensure specified filehandle matches */
9254 		if (cs->vp != sp->rs_finfo->rf_vp) {
9255 			rfs4_state_rele(sp);
9256 			*cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
9257 			DTRACE_NFSV4_2(op__lock__done, struct compound_state *,
9258 			    cs, LOCK4res *, resp);
9259 			return;
9260 		}
9261 
9262 		/* hold off other access to open_owner while we tinker */
9263 		rfs4_sw_enter(&sp->rs_owner->ro_sw);
9264 
9265 		switch (rc = rfs4_check_stateid_seqid(sp, stateid, cs)) {
9266 		case NFS4_CHECK_STATEID_OLD:
9267 			*cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
9268 			goto end;
9269 		case NFS4_CHECK_STATEID_BAD:
9270 			*cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
9271 			goto end;
9272 		case NFS4_CHECK_STATEID_EXPIRED:
9273 			*cs->statusp = resp->status = NFS4ERR_EXPIRED;
9274 			goto end;
9275 		case NFS4_CHECK_STATEID_UNCONFIRMED:
9276 			*cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
9277 			goto end;
9278 		case NFS4_CHECK_STATEID_CLOSED:
9279 			*cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
9280 			goto end;
9281 		case NFS4_CHECK_STATEID_OKAY:
9282 			if (rfs4_has_session(cs))
9283 				break;
9284 			/* FALLTHROUGH */
9285 		case NFS4_CHECK_STATEID_REPLAY:
9286 			ASSERT(!rfs4_has_session(cs));
9287 
9288 			switch (rfs4_check_olo_seqid(olo->open_seqid,
9289 			    sp->rs_owner, resop)) {
9290 			case NFS4_CHKSEQ_OKAY:
9291 				if (rc == NFS4_CHECK_STATEID_OKAY)
9292 					break;
9293 				/*
9294 				 * This is replayed stateid; if seqid
9295 				 * matches next expected, then client
9296 				 * is using wrong seqid.
9297 				 */
9298 				/* FALLTHROUGH */
9299 			case NFS4_CHKSEQ_BAD:
9300 				*cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
9301 				goto end;
9302 			case NFS4_CHKSEQ_REPLAY:
9303 				/* This is a duplicate LOCK request */
9304 				dup_lock = TRUE;
9305 
9306 				/*
9307 				 * For a duplicate we do not want to
9308 				 * create a new lockowner as it should
9309 				 * already exist.
9310 				 * Turn off the lockowner create flag.
9311 				 */
9312 				lcreate = FALSE;
9313 			}
9314 			break;
9315 		}
9316 
9317 		/*
9318 		 * See RFC 8881 18.10.3. MUST be ignored by the server:
9319 		 * The clientid field of the lock_owner field of the
9320 		 * open_owner field (locker.open_owner.lock_owner.clientid).
9321 		 */
9322 		if (rfs4_has_session(cs))
9323 			olo->lock_owner.clientid = cs->client->rc_clientid;
9324 
9325 		lo = rfs4_findlockowner(&olo->lock_owner, &lcreate);
9326 		if (lo == NULL) {
9327 			NFS4_DEBUG(rfs4_debug,
9328 			    (CE_NOTE, "rfs4_op_lock: no lock owner"));
9329 			*cs->statusp = resp->status = NFS4ERR_RESOURCE;
9330 			goto end;
9331 		}
9332 
9333 		lsp = rfs4_findlo_state_by_owner(lo, sp, &create);
9334 		if (lsp == NULL) {
9335 			rfs4_update_lease(sp->rs_owner->ro_client);
9336 			/*
9337 			 * Only update theh open_seqid if this is not
9338 			 * a duplicate request
9339 			 */
9340 			if (dup_lock == FALSE) {
9341 				rfs4_update_open_sequence(sp->rs_owner);
9342 			}
9343 
9344 			NFS4_DEBUG(rfs4_debug,
9345 			    (CE_NOTE, "rfs4_op_lock: no state"));
9346 			*cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
9347 			rfs4_update_open_resp(sp->rs_owner, resop, NULL);
9348 			rfs4_lockowner_rele(lo);
9349 			goto end;
9350 		}
9351 
9352 		/*
9353 		 * This is the new_lock_owner branch and the client is
9354 		 * supposed to be associating a new lock_owner with
9355 		 * the open file at this point.  If we find that a
9356 		 * lock_owner/state association already exists and a
9357 		 * successful LOCK request was returned to the client,
9358 		 * an error is returned to the client since this is
9359 		 * not appropriate.  The client should be using the
9360 		 * existing lock_owner branch.
9361 		 */
9362 		if (!rfs4_has_session(cs) && !dup_lock && !create) {
9363 			if (lsp->rls_lock_completed == TRUE) {
9364 				*cs->statusp =
9365 				    resp->status = NFS4ERR_BAD_SEQID;
9366 				rfs4_lockowner_rele(lo);
9367 				goto end;
9368 			}
9369 		}
9370 
9371 		rfs4_update_lease(sp->rs_owner->ro_client);
9372 
9373 		/*
9374 		 * Only update theh open_seqid if this is not
9375 		 * a duplicate request
9376 		 */
9377 		if (dup_lock == FALSE) {
9378 			rfs4_update_open_sequence(sp->rs_owner);
9379 		}
9380 
9381 		/*
9382 		 * If this is a duplicate lock request, just copy the
9383 		 * previously saved reply and return.
9384 		 */
9385 		if (dup_lock == TRUE) {
9386 			/* verify that lock_seqid's match */
9387 			if (lsp->rls_seqid != olo->lock_seqid) {
9388 				NFS4_DEBUG(rfs4_debug,
9389 				    (CE_NOTE, "rfs4_op_lock: Dup-Lock seqid bad"
9390 				    "lsp->seqid=%d old->seqid=%d",
9391 				    lsp->rls_seqid, olo->lock_seqid));
9392 				*cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
9393 			} else {
9394 				rfs4_copy_reply(resop, &lsp->rls_reply);
9395 				/*
9396 				 * Make sure to copy the just
9397 				 * retrieved reply status into the
9398 				 * overall compound status
9399 				 */
9400 				*cs->statusp = resp->status;
9401 			}
9402 			rfs4_lockowner_rele(lo);
9403 			goto end;
9404 		}
9405 
9406 		rfs4_dbe_lock(lsp->rls_dbe);
9407 
9408 		/* Make sure to update the lock sequence id */
9409 		lsp->rls_seqid = olo->lock_seqid;
9410 
9411 		NFS4_DEBUG(rfs4_debug,
9412 		    (CE_NOTE, "Lock seqid established as %d", lsp->rls_seqid));
9413 
9414 		/*
9415 		 * This is used to signify the newly created lockowner
9416 		 * stateid and its sequence number.  The checks for
9417 		 * sequence number and increment don't occur on the
9418 		 * very first lock request for a lockowner.
9419 		 */
9420 		lsp->rls_skip_seqid_check = TRUE;
9421 
9422 		/* hold off other access to lsp while we tinker */
9423 		rfs4_sw_enter(&lsp->rls_sw);
9424 		ls_sw_held = TRUE;
9425 
9426 		rfs4_dbe_unlock(lsp->rls_dbe);
9427 
9428 		rfs4_lockowner_rele(lo);
9429 	} else {
9430 		stateid = &args->locker.locker4_u.lock_owner.lock_stateid;
9431 		/* get lsp and hold the lock on the underlying file struct */
9432 		if ((status = rfs4_get_lo_state(stateid, &lsp, TRUE))
9433 		    != NFS4_OK) {
9434 			*cs->statusp = resp->status = status;
9435 			DTRACE_NFSV4_2(op__lock__done, struct compound_state *,
9436 			    cs, LOCK4res *, resp);
9437 			return;
9438 		}
9439 		create = FALSE;	/* We didn't create lsp */
9440 
9441 		/* Ensure specified filehandle matches */
9442 		if (cs->vp != lsp->rls_state->rs_finfo->rf_vp) {
9443 			rfs4_lo_state_rele(lsp, TRUE);
9444 			*cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
9445 			DTRACE_NFSV4_2(op__lock__done, struct compound_state *,
9446 			    cs, LOCK4res *, resp);
9447 			return;
9448 		}
9449 
9450 		/* hold off other access to lsp while we tinker */
9451 		rfs4_sw_enter(&lsp->rls_sw);
9452 		ls_sw_held = TRUE;
9453 
9454 		switch (rfs4_check_lo_stateid_seqid(lsp, stateid, cs)) {
9455 		/*
9456 		 * The stateid looks like it was okay (expected to be
9457 		 * the next one)
9458 		 */
9459 		case NFS4_CHECK_STATEID_OKAY:
9460 			if (rfs4_has_session(cs))
9461 				break;
9462 
9463 			/*
9464 			 * The sequence id is now checked.  Determine
9465 			 * if this is a replay or if it is in the
9466 			 * expected (next) sequence.  In the case of a
9467 			 * replay, there are two replay conditions
9468 			 * that may occur.  The first is the normal
9469 			 * condition where a LOCK is done with a
9470 			 * NFS4_OK response and the stateid is
9471 			 * updated.  That case is handled below when
9472 			 * the stateid is identified as a REPLAY.  The
9473 			 * second is the case where an error is
9474 			 * returned, like NFS4ERR_DENIED, and the
9475 			 * sequence number is updated but the stateid
9476 			 * is not updated.  This second case is dealt
9477 			 * with here.  So it may seem odd that the
9478 			 * stateid is okay but the sequence id is a
9479 			 * replay but it is okay.
9480 			 */
9481 			switch (rfs4_check_lock_seqid(
9482 			    args->locker.locker4_u.lock_owner.lock_seqid,
9483 			    lsp, resop)) {
9484 			case NFS4_CHKSEQ_REPLAY:
9485 				if (resp->status != NFS4_OK) {
9486 					/*
9487 					 * Here is our replay and need
9488 					 * to verify that the last
9489 					 * response was an error.
9490 					 */
9491 					*cs->statusp = resp->status;
9492 					goto end;
9493 				}
9494 				/*
9495 				 * This is done since the sequence id
9496 				 * looked like a replay but it didn't
9497 				 * pass our check so a BAD_SEQID is
9498 				 * returned as a result.
9499 				 */
9500 				/*FALLTHROUGH*/
9501 			case NFS4_CHKSEQ_BAD:
9502 				*cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
9503 				goto end;
9504 			case NFS4_CHKSEQ_OKAY:
9505 				/* Everything looks okay move ahead */
9506 				break;
9507 			}
9508 			break;
9509 		case NFS4_CHECK_STATEID_OLD:
9510 			*cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
9511 			goto end;
9512 		case NFS4_CHECK_STATEID_BAD:
9513 			*cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
9514 			goto end;
9515 		case NFS4_CHECK_STATEID_EXPIRED:
9516 			*cs->statusp = resp->status = NFS4ERR_EXPIRED;
9517 			goto end;
9518 		case NFS4_CHECK_STATEID_CLOSED:
9519 			*cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
9520 			goto end;
9521 		case NFS4_CHECK_STATEID_REPLAY:
9522 			ASSERT(!rfs4_has_session(cs));
9523 
9524 			switch (rfs4_check_lock_seqid(
9525 			    args->locker.locker4_u.lock_owner.lock_seqid,
9526 			    lsp, resop)) {
9527 			case NFS4_CHKSEQ_OKAY:
9528 				/*
9529 				 * This is a replayed stateid; if
9530 				 * seqid matches the next expected,
9531 				 * then client is using wrong seqid.
9532 				 */
9533 			case NFS4_CHKSEQ_BAD:
9534 				*cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
9535 				goto end;
9536 			case NFS4_CHKSEQ_REPLAY:
9537 				rfs4_update_lease(lsp->rls_locker->rl_client);
9538 				*cs->statusp = status = resp->status;
9539 				goto end;
9540 			}
9541 			break;
9542 		default:
9543 			ASSERT(FALSE);
9544 			break;
9545 		}
9546 
9547 		rfs4_update_lock_sequence(lsp);
9548 		rfs4_update_lease(lsp->rls_locker->rl_client);
9549 	}
9550 
9551 	/*
9552 	 * NFS4 only allows locking on regular files, so
9553 	 * verify type of object.
9554 	 */
9555 	if (cs->vp->v_type != VREG) {
9556 		if (cs->vp->v_type == VDIR)
9557 			status = NFS4ERR_ISDIR;
9558 		else
9559 			status = NFS4ERR_INVAL;
9560 		goto out;
9561 	}
9562 
9563 	cp = lsp->rls_state->rs_owner->ro_client;
9564 
9565 	if (rfs4_clnt_in_grace(cp) && !args->reclaim) {
9566 		status = NFS4ERR_GRACE;
9567 		goto out;
9568 	}
9569 
9570 	if (rfs4_clnt_in_grace(cp) && args->reclaim && !cp->rc_can_reclaim) {
9571 		status = NFS4ERR_NO_GRACE;
9572 		goto out;
9573 	}
9574 
9575 	if (!rfs4_clnt_in_grace(cp) && args->reclaim) {
9576 		status = NFS4ERR_NO_GRACE;
9577 		goto out;
9578 	}
9579 
9580 	if (lsp->rls_state->rs_finfo->rf_dinfo.rd_dtype == OPEN_DELEGATE_WRITE)
9581 		cs->deleg = TRUE;
9582 
9583 	status = rfs4_do_lock(lsp, args->locktype,
9584 	    args->offset, args->length, cs->cr, resop);
9585 
9586 out:
9587 	lsp->rls_skip_seqid_check = FALSE;
9588 
9589 	*cs->statusp = resp->status = status;
9590 
9591 	if (status == NFS4_OK) {
9592 		resp->LOCK4res_u.lock_stateid = lsp->rls_lockid.stateid;
9593 		lsp->rls_lock_completed = TRUE;
9594 
9595 		put_stateid4(cs, &resp->LOCK4res_u.lock_stateid);
9596 	}
9597 	/*
9598 	 * Only update the "OPEN" response here if this was a new
9599 	 * lock_owner
9600 	 */
9601 	if (sp)
9602 		rfs4_update_open_resp(sp->rs_owner, resop, NULL);
9603 
9604 	rfs4_update_lock_resp(lsp, resop);
9605 
9606 end:
9607 	if (lsp) {
9608 		if (ls_sw_held)
9609 			rfs4_sw_exit(&lsp->rls_sw);
9610 		/*
9611 		 * If an sp obtained, then the lsp does not represent
9612 		 * a lock on the file struct.
9613 		 */
9614 		if (sp != NULL)
9615 			rfs4_lo_state_rele(lsp, FALSE);
9616 		else
9617 			rfs4_lo_state_rele(lsp, TRUE);
9618 	}
9619 	if (sp) {
9620 		rfs4_sw_exit(&sp->rs_owner->ro_sw);
9621 		rfs4_state_rele(sp);
9622 	}
9623 
9624 	DTRACE_NFSV4_2(op__lock__done, struct compound_state *, cs,
9625 	    LOCK4res *, resp);
9626 }
9627 
9628 /* free function for LOCK/LOCKT */
9629 static void
lock_denied_free(nfs_resop4 * resop)9630 lock_denied_free(nfs_resop4 *resop)
9631 {
9632 	LOCK4denied *dp = NULL;
9633 
9634 	switch (resop->resop) {
9635 	case OP_LOCK:
9636 		if (resop->nfs_resop4_u.oplock.status == NFS4ERR_DENIED)
9637 			dp = &resop->nfs_resop4_u.oplock.LOCK4res_u.denied;
9638 		break;
9639 	case OP_LOCKT:
9640 		if (resop->nfs_resop4_u.oplockt.status == NFS4ERR_DENIED)
9641 			dp = &resop->nfs_resop4_u.oplockt.denied;
9642 		break;
9643 	default:
9644 		break;
9645 	}
9646 
9647 	if (dp)
9648 		kmem_free(dp->owner.owner_val, dp->owner.owner_len);
9649 }
9650 
9651 /*ARGSUSED*/
9652 void
rfs4_op_locku(nfs_argop4 * argop,nfs_resop4 * resop,struct svc_req * req,struct compound_state * cs)9653 rfs4_op_locku(nfs_argop4 *argop, nfs_resop4 *resop,
9654     struct svc_req *req, struct compound_state *cs)
9655 {
9656 	LOCKU4args *args = &argop->nfs_argop4_u.oplocku;
9657 	LOCKU4res *resp = &resop->nfs_resop4_u.oplocku;
9658 	nfsstat4 status;
9659 	stateid4 *stateid = &args->lock_stateid;
9660 	rfs4_lo_state_t *lsp;
9661 
9662 	DTRACE_NFSV4_2(op__locku__start, struct compound_state *, cs,
9663 	    LOCKU4args *, args);
9664 
9665 	if (cs->vp == NULL) {
9666 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
9667 		DTRACE_NFSV4_2(op__locku__done, struct compound_state *, cs,
9668 		    LOCKU4res *, resp);
9669 		return;
9670 	}
9671 
9672 	get_stateid4(cs, stateid);
9673 
9674 	if ((status = rfs4_get_lo_state(stateid, &lsp, TRUE)) != NFS4_OK) {
9675 		*cs->statusp = resp->status = status;
9676 		DTRACE_NFSV4_2(op__locku__done, struct compound_state *, cs,
9677 		    LOCKU4res *, resp);
9678 		return;
9679 	}
9680 
9681 	/* Ensure specified filehandle matches */
9682 	if (cs->vp != lsp->rls_state->rs_finfo->rf_vp) {
9683 		rfs4_lo_state_rele(lsp, TRUE);
9684 		*cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
9685 		DTRACE_NFSV4_2(op__locku__done, struct compound_state *, cs,
9686 		    LOCKU4res *, resp);
9687 		return;
9688 	}
9689 
9690 	/* hold off other access to lsp while we tinker */
9691 	rfs4_sw_enter(&lsp->rls_sw);
9692 
9693 	switch (rfs4_check_lo_stateid_seqid(lsp, stateid, cs)) {
9694 	case NFS4_CHECK_STATEID_OKAY:
9695 		if (rfs4_has_session(cs))
9696 			break;
9697 
9698 		if (rfs4_check_lock_seqid(args->seqid, lsp, resop)
9699 		    != NFS4_CHKSEQ_OKAY) {
9700 			*cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
9701 			goto end;
9702 		}
9703 		break;
9704 	case NFS4_CHECK_STATEID_OLD:
9705 		*cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
9706 		goto end;
9707 	case NFS4_CHECK_STATEID_BAD:
9708 		*cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
9709 		goto end;
9710 	case NFS4_CHECK_STATEID_EXPIRED:
9711 		*cs->statusp = resp->status = NFS4ERR_EXPIRED;
9712 		goto end;
9713 	case NFS4_CHECK_STATEID_CLOSED:
9714 		*cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
9715 		goto end;
9716 	case NFS4_CHECK_STATEID_REPLAY:
9717 		ASSERT(!rfs4_has_session(cs));
9718 
9719 		switch (rfs4_check_lock_seqid(args->seqid, lsp, resop)) {
9720 		case NFS4_CHKSEQ_OKAY:
9721 				/*
9722 				 * This is a replayed stateid; if
9723 				 * seqid matches the next expected,
9724 				 * then client is using wrong seqid.
9725 				 */
9726 		case NFS4_CHKSEQ_BAD:
9727 			*cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
9728 			goto end;
9729 		case NFS4_CHKSEQ_REPLAY:
9730 			rfs4_update_lease(lsp->rls_locker->rl_client);
9731 			*cs->statusp = status = resp->status;
9732 			goto end;
9733 		}
9734 		break;
9735 	default:
9736 		ASSERT(FALSE);
9737 		break;
9738 	}
9739 
9740 	rfs4_update_lock_sequence(lsp);
9741 	rfs4_update_lease(lsp->rls_locker->rl_client);
9742 
9743 	/*
9744 	 * NFS4 only allows locking on regular files, so
9745 	 * verify type of object.
9746 	 */
9747 	if (cs->vp->v_type != VREG) {
9748 		if (cs->vp->v_type == VDIR)
9749 			status = NFS4ERR_ISDIR;
9750 		else
9751 			status = NFS4ERR_INVAL;
9752 		goto out;
9753 	}
9754 
9755 	if (rfs4_clnt_in_grace(lsp->rls_state->rs_owner->ro_client)) {
9756 		status = NFS4ERR_GRACE;
9757 		goto out;
9758 	}
9759 
9760 	status = rfs4_do_lock(lsp, args->locktype,
9761 	    args->offset, args->length, cs->cr, resop);
9762 
9763 out:
9764 	*cs->statusp = resp->status = status;
9765 
9766 	if (status == NFS4_OK)
9767 		resp->lock_stateid = lsp->rls_lockid.stateid;
9768 
9769 	rfs4_update_lock_resp(lsp, resop);
9770 
9771 end:
9772 	rfs4_sw_exit(&lsp->rls_sw);
9773 	rfs4_lo_state_rele(lsp, TRUE);
9774 
9775 	DTRACE_NFSV4_2(op__locku__done, struct compound_state *, cs,
9776 	    LOCKU4res *, resp);
9777 }
9778 
9779 /*
9780  * LOCKT is a best effort routine, the client can not be guaranteed that
9781  * the status return is still in effect by the time the reply is received.
9782  * They are numerous race conditions in this routine, but we are not required
9783  * and can not be accurate.
9784  */
9785 /*ARGSUSED*/
9786 void
rfs4_op_lockt(nfs_argop4 * argop,nfs_resop4 * resop,struct svc_req * req,struct compound_state * cs)9787 rfs4_op_lockt(nfs_argop4 *argop, nfs_resop4 *resop,
9788     struct svc_req *req, struct compound_state *cs)
9789 {
9790 	LOCKT4args *args = &argop->nfs_argop4_u.oplockt;
9791 	LOCKT4res *resp = &resop->nfs_resop4_u.oplockt;
9792 	rfs4_lockowner_t *lo;
9793 	rfs4_client_t *cp;
9794 	bool_t create = FALSE;
9795 	struct flock64 flk;
9796 	int error;
9797 	int flag = FREAD | FWRITE;
9798 	int ltype;
9799 	length4 posix_length;
9800 	sysid_t sysid;
9801 	pid_t pid;
9802 
9803 	DTRACE_NFSV4_2(op__lockt__start, struct compound_state *, cs,
9804 	    LOCKT4args *, args);
9805 
9806 	if (cs->vp == NULL) {
9807 		*cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
9808 		goto out;
9809 	}
9810 
9811 	/*
9812 	 * NFS4 only allows locking on regular files, so
9813 	 * verify type of object.
9814 	 */
9815 	if (cs->vp->v_type != VREG) {
9816 		if (cs->vp->v_type == VDIR)
9817 			*cs->statusp = resp->status = NFS4ERR_ISDIR;
9818 		else
9819 			*cs->statusp = resp->status =  NFS4ERR_INVAL;
9820 		goto out;
9821 	}
9822 
9823 	/*
9824 	 * Check out the clientid to ensure the server knows about it
9825 	 * so that we correctly inform the client of a server reboot.
9826 	 */
9827 	if ((cp = rfs4_findclient_by_id(args->owner.clientid, FALSE))
9828 	    == NULL) {
9829 		*cs->statusp = resp->status =
9830 		    rfs4_check_clientid(&args->owner.clientid, 0);
9831 		goto out;
9832 	}
9833 	if (rfs4_lease_expired(cp)) {
9834 		rfs4_client_close(cp);
9835 		/*
9836 		 * Protocol doesn't allow returning NFS4ERR_STALE as
9837 		 * other operations do on this check so STALE_CLIENTID
9838 		 * is returned instead
9839 		 */
9840 		*cs->statusp = resp->status = NFS4ERR_STALE_CLIENTID;
9841 		goto out;
9842 	}
9843 
9844 	if (rfs4_clnt_in_grace(cp) && !(cp->rc_can_reclaim)) {
9845 		*cs->statusp = resp->status = NFS4ERR_GRACE;
9846 		rfs4_client_rele(cp);
9847 		goto out;
9848 	}
9849 	rfs4_client_rele(cp);
9850 
9851 	resp->status = NFS4_OK;
9852 
9853 	switch (args->locktype) {
9854 	case READ_LT:
9855 	case READW_LT:
9856 		ltype = F_RDLCK;
9857 		break;
9858 	case WRITE_LT:
9859 	case WRITEW_LT:
9860 		ltype = F_WRLCK;
9861 		break;
9862 	}
9863 
9864 	posix_length = args->length;
9865 	/* Check for zero length. To lock to end of file use all ones for V4 */
9866 	if (posix_length == 0) {
9867 		*cs->statusp = resp->status = NFS4ERR_INVAL;
9868 		goto out;
9869 	} else if (posix_length == (length4)(~0)) {
9870 		posix_length = 0;	/* Posix to end of file  */
9871 	}
9872 
9873 	/*
9874 	 * See RFC 8881 18.11.3:
9875 	 * The clientid field of the owner MAY be set to any value
9876 	 * by the client and MUST be ignored by the server.
9877 	 */
9878 	if (rfs4_has_session(cs))
9879 		args->owner.clientid = cs->client->rc_clientid;
9880 
9881 	/* Find or create a lockowner */
9882 	lo = rfs4_findlockowner(&args->owner, &create);
9883 
9884 	if (lo) {
9885 		pid = lo->rl_pid;
9886 		if ((resp->status =
9887 		    rfs4_client_sysid(lo->rl_client, &sysid)) != NFS4_OK)
9888 			goto err;
9889 	} else {
9890 		pid = 0;
9891 		sysid = lockt_sysid;
9892 	}
9893 retry:
9894 	flk.l_type = ltype;
9895 	flk.l_whence = 0;		/* SEEK_SET */
9896 	flk.l_start = args->offset;
9897 	flk.l_len = posix_length;
9898 	flk.l_sysid = sysid;
9899 	flk.l_pid = pid;
9900 	flag |= F_REMOTELOCK;
9901 
9902 	LOCK_PRINT(rfs4_debug, "rfs4_op_lockt", F_GETLK, &flk);
9903 
9904 	/* Note that length4 is uint64_t but l_len and l_start are off64_t */
9905 	if (flk.l_len < 0 || flk.l_start < 0) {
9906 		resp->status = NFS4ERR_INVAL;
9907 		goto err;
9908 	}
9909 	error = VOP_FRLOCK(cs->vp, F_GETLK, &flk, flag, (u_offset_t)0,
9910 	    NULL, cs->cr, NULL);
9911 
9912 	/*
9913 	 * N.B. We map error values to nfsv4 errors. This is differrent
9914 	 * than puterrno4 routine.
9915 	 */
9916 	switch (error) {
9917 	case 0:
9918 		if (flk.l_type == F_UNLCK)
9919 			resp->status = NFS4_OK;
9920 		else {
9921 			if (lock_denied(&resp->denied, &flk) == NFS4ERR_EXPIRED)
9922 				goto retry;
9923 			resp->status = NFS4ERR_DENIED;
9924 		}
9925 		break;
9926 	case EOVERFLOW:
9927 		resp->status = NFS4ERR_INVAL;
9928 		break;
9929 	case EINVAL:
9930 		resp->status = NFS4ERR_NOTSUPP;
9931 		break;
9932 	default:
9933 		cmn_err(CE_WARN, "rfs4_op_lockt: unexpected errno (%d)",
9934 		    error);
9935 		resp->status = NFS4ERR_SERVERFAULT;
9936 		break;
9937 	}
9938 
9939 err:
9940 	if (lo)
9941 		rfs4_lockowner_rele(lo);
9942 	*cs->statusp = resp->status;
9943 out:
9944 	DTRACE_NFSV4_2(op__lockt__done, struct compound_state *, cs,
9945 	    LOCKT4res *, resp);
9946 }
9947 
9948 int
rfs4_share(rfs4_state_t * sp,uint32_t access,uint32_t deny)9949 rfs4_share(rfs4_state_t *sp, uint32_t access, uint32_t deny)
9950 {
9951 	int err;
9952 	int cmd;
9953 	vnode_t *vp;
9954 	struct shrlock shr;
9955 	struct shr_locowner shr_loco;
9956 	int fflags = 0;
9957 
9958 	ASSERT(rfs4_dbe_islocked(sp->rs_dbe));
9959 	ASSERT(sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID);
9960 
9961 	if (sp->rs_closed)
9962 		return (NFS4ERR_OLD_STATEID);
9963 
9964 	vp = sp->rs_finfo->rf_vp;
9965 	ASSERT(vp);
9966 
9967 	shr.s_access = shr.s_deny = 0;
9968 
9969 	if (access & OPEN4_SHARE_ACCESS_READ) {
9970 		fflags |= FREAD;
9971 		shr.s_access |= F_RDACC;
9972 	}
9973 	if (access & OPEN4_SHARE_ACCESS_WRITE) {
9974 		fflags |= FWRITE;
9975 		shr.s_access |= F_WRACC;
9976 	}
9977 	ASSERT(shr.s_access);
9978 
9979 	if (deny & OPEN4_SHARE_DENY_READ)
9980 		shr.s_deny |= F_RDDNY;
9981 	if (deny & OPEN4_SHARE_DENY_WRITE)
9982 		shr.s_deny |= F_WRDNY;
9983 
9984 	shr.s_pid = rfs4_dbe_getid(sp->rs_owner->ro_dbe);
9985 	shr.s_sysid = sp->rs_owner->ro_client->rc_sysidt;
9986 	shr_loco.sl_pid = shr.s_pid;
9987 	shr_loco.sl_id = shr.s_sysid;
9988 	shr.s_owner = (caddr_t)&shr_loco;
9989 	shr.s_own_len = sizeof (shr_loco);
9990 
9991 	cmd = nbl_need_check(vp) ? F_SHARE_NBMAND : F_SHARE;
9992 
9993 	err = VOP_SHRLOCK(vp, cmd, &shr, fflags, CRED(), NULL);
9994 	if (err != 0) {
9995 		if (err == EAGAIN)
9996 			err = NFS4ERR_SHARE_DENIED;
9997 		else
9998 			err = puterrno4(err);
9999 		return (err);
10000 	}
10001 
10002 	sp->rs_share_access |= access;
10003 	sp->rs_share_deny |= deny;
10004 
10005 	return (0);
10006 }
10007 
10008 int
rfs4_unshare(rfs4_state_t * sp)10009 rfs4_unshare(rfs4_state_t *sp)
10010 {
10011 	int err;
10012 	struct shrlock shr;
10013 	struct shr_locowner shr_loco;
10014 
10015 	ASSERT(rfs4_dbe_islocked(sp->rs_dbe));
10016 
10017 	if (sp->rs_closed || sp->rs_share_access == 0)
10018 		return (0);
10019 
10020 	ASSERT(sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID);
10021 	ASSERT(sp->rs_finfo->rf_vp);
10022 
10023 	shr.s_access = shr.s_deny = 0;
10024 	shr.s_pid = rfs4_dbe_getid(sp->rs_owner->ro_dbe);
10025 	shr.s_sysid = sp->rs_owner->ro_client->rc_sysidt;
10026 	shr_loco.sl_pid = shr.s_pid;
10027 	shr_loco.sl_id = shr.s_sysid;
10028 	shr.s_owner = (caddr_t)&shr_loco;
10029 	shr.s_own_len = sizeof (shr_loco);
10030 
10031 	err = VOP_SHRLOCK(sp->rs_finfo->rf_vp, F_UNSHARE, &shr, 0, CRED(),
10032 	    NULL);
10033 	if (err != 0) {
10034 		err = puterrno4(err);
10035 		return (err);
10036 	}
10037 
10038 	sp->rs_share_access = 0;
10039 	sp->rs_share_deny = 0;
10040 
10041 	return (0);
10042 
10043 }
10044 
10045 static int
rdma_setup_read_data4(READ4args * args,READ4res * rok)10046 rdma_setup_read_data4(READ4args *args, READ4res *rok)
10047 {
10048 	struct clist	*wcl;
10049 	count4		count = rok->data_len;
10050 	int		wlist_len;
10051 
10052 	wcl = args->wlist;
10053 	if (rdma_setup_read_chunks(wcl, count, &wlist_len) == FALSE) {
10054 		return (FALSE);
10055 	}
10056 	wcl = args->wlist;
10057 	rok->wlist_len = wlist_len;
10058 	rok->wlist = wcl;
10059 	return (TRUE);
10060 }
10061 
10062 /* tunable to disable server referrals */
10063 int rfs4_no_referrals = 0;
10064 
10065 /*
10066  * Find an NFS record in reparse point data.
10067  * Returns 0 for success and <0 or an errno value on failure.
10068  */
10069 int
vn_find_nfs_record(vnode_t * vp,nvlist_t ** nvlp,char ** svcp,char ** datap)10070 vn_find_nfs_record(vnode_t *vp, nvlist_t **nvlp, char **svcp, char **datap)
10071 {
10072 	int err;
10073 	char *stype, *val;
10074 	nvlist_t *nvl;
10075 	nvpair_t *curr;
10076 
10077 	if ((nvl = reparse_init()) == NULL)
10078 		return (-1);
10079 
10080 	if ((err = reparse_vnode_parse(vp, nvl)) != 0) {
10081 		reparse_free(nvl);
10082 		return (err);
10083 	}
10084 
10085 	curr = NULL;
10086 	while ((curr = nvlist_next_nvpair(nvl, curr)) != NULL) {
10087 		if ((stype = nvpair_name(curr)) == NULL) {
10088 			reparse_free(nvl);
10089 			return (-2);
10090 		}
10091 		if (strncasecmp(stype, "NFS", 3) == 0)
10092 			break;
10093 	}
10094 
10095 	if ((curr == NULL) ||
10096 	    (nvpair_value_string(curr, &val))) {
10097 		reparse_free(nvl);
10098 		return (-3);
10099 	}
10100 	*nvlp = nvl;
10101 	*svcp = stype;
10102 	*datap = val;
10103 	return (0);
10104 }
10105 
10106 int
vn_is_nfs_reparse(vnode_t * vp,cred_t * cr)10107 vn_is_nfs_reparse(vnode_t *vp, cred_t *cr)
10108 {
10109 	nvlist_t *nvl;
10110 	char *s, *d;
10111 
10112 	if (rfs4_no_referrals != 0)
10113 		return (B_FALSE);
10114 
10115 	if (vn_is_reparse(vp, cr, NULL) == B_FALSE)
10116 		return (B_FALSE);
10117 
10118 	if (vn_find_nfs_record(vp, &nvl, &s, &d) != 0)
10119 		return (B_FALSE);
10120 
10121 	reparse_free(nvl);
10122 
10123 	return (B_TRUE);
10124 }
10125 
10126 /*
10127  * There is a user-level copy of this routine in ref_subr.c.
10128  * Changes should be kept in sync.
10129  */
10130 static int
nfs4_create_components(char * path,component4 * comp4)10131 nfs4_create_components(char *path, component4 *comp4)
10132 {
10133 	int slen, plen, ncomp;
10134 	char *ori_path, *nxtc, buf[MAXNAMELEN];
10135 
10136 	if (path == NULL)
10137 		return (0);
10138 
10139 	plen = strlen(path) + 1;	/* include the terminator */
10140 	ori_path = path;
10141 	ncomp = 0;
10142 
10143 	/* count number of components in the path */
10144 	for (nxtc = path; nxtc < ori_path + plen; nxtc++) {
10145 		if (*nxtc == '/' || *nxtc == '\0' || *nxtc == '\n') {
10146 			if ((slen = nxtc - path) == 0) {
10147 				path = nxtc + 1;
10148 				continue;
10149 			}
10150 
10151 			if (comp4 != NULL) {
10152 				bcopy(path, buf, slen);
10153 				buf[slen] = '\0';
10154 				(void) str_to_utf8(buf, &comp4[ncomp]);
10155 			}
10156 
10157 			ncomp++;	/* 1 valid component */
10158 			path = nxtc + 1;
10159 		}
10160 		if (*nxtc == '\0' || *nxtc == '\n')
10161 			break;
10162 	}
10163 
10164 	return (ncomp);
10165 }
10166 
10167 /*
10168  * There is a user-level copy of this routine in ref_subr.c.
10169  * Changes should be kept in sync.
10170  */
10171 static int
make_pathname4(char * path,pathname4 * pathname)10172 make_pathname4(char *path, pathname4 *pathname)
10173 {
10174 	int ncomp;
10175 	component4 *comp4;
10176 
10177 	if (pathname == NULL)
10178 		return (0);
10179 
10180 	if (path == NULL) {
10181 		pathname->pathname4_val = NULL;
10182 		pathname->pathname4_len = 0;
10183 		return (0);
10184 	}
10185 
10186 	/* count number of components to alloc buffer */
10187 	if ((ncomp = nfs4_create_components(path, NULL)) == 0) {
10188 		pathname->pathname4_val = NULL;
10189 		pathname->pathname4_len = 0;
10190 		return (0);
10191 	}
10192 	comp4 = kmem_zalloc(ncomp * sizeof (component4), KM_SLEEP);
10193 
10194 	/* copy components into allocated buffer */
10195 	ncomp = nfs4_create_components(path, comp4);
10196 
10197 	pathname->pathname4_val = comp4;
10198 	pathname->pathname4_len = ncomp;
10199 
10200 	return (ncomp);
10201 }
10202 
10203 #define	xdr_fs_locations4 xdr_fattr4_fs_locations
10204 
10205 fs_locations4 *
fetch_referral(vnode_t * vp,cred_t * cr)10206 fetch_referral(vnode_t *vp, cred_t *cr)
10207 {
10208 	nvlist_t *nvl;
10209 	char *stype, *sdata;
10210 	fs_locations4 *result;
10211 	char buf[1024];
10212 	size_t bufsize;
10213 	XDR xdr;
10214 	int err;
10215 
10216 	/*
10217 	 * Check attrs to ensure it's a reparse point
10218 	 */
10219 	if (vn_is_reparse(vp, cr, NULL) == B_FALSE)
10220 		return (NULL);
10221 
10222 	/*
10223 	 * Look for an NFS record and get the type and data
10224 	 */
10225 	if (vn_find_nfs_record(vp, &nvl, &stype, &sdata) != 0)
10226 		return (NULL);
10227 
10228 	/*
10229 	 * With the type and data, upcall to get the referral
10230 	 */
10231 	bufsize = sizeof (buf);
10232 	bzero(buf, sizeof (buf));
10233 	err = reparse_kderef((const char *)stype, (const char *)sdata,
10234 	    buf, &bufsize);
10235 	reparse_free(nvl);
10236 
10237 	DTRACE_PROBE4(nfs4serv__func__referral__upcall,
10238 	    char *, stype, char *, sdata, char *, buf, int, err);
10239 	if (err) {
10240 		cmn_err(CE_NOTE,
10241 		    "reparsed daemon not running: unable to get referral (%d)",
10242 		    err);
10243 		return (NULL);
10244 	}
10245 
10246 	/*
10247 	 * We get an XDR'ed record back from the kderef call
10248 	 */
10249 	xdrmem_create(&xdr, buf, bufsize, XDR_DECODE);
10250 	result = kmem_alloc(sizeof (fs_locations4), KM_SLEEP);
10251 	err = xdr_fs_locations4(&xdr, result);
10252 	XDR_DESTROY(&xdr);
10253 	if (err != TRUE) {
10254 		DTRACE_PROBE1(nfs4serv__func__referral__upcall__xdrfail,
10255 		    int, err);
10256 		return (NULL);
10257 	}
10258 
10259 	/*
10260 	 * Look at path to recover fs_root, ignoring the leading '/'
10261 	 */
10262 	(void) make_pathname4(vp->v_path, &result->fs_root);
10263 
10264 	return (result);
10265 }
10266 
10267 char *
build_symlink(vnode_t * vp,cred_t * cr,size_t * strsz)10268 build_symlink(vnode_t *vp, cred_t *cr, size_t *strsz)
10269 {
10270 	fs_locations4 *fsl;
10271 	fs_location4 *fs;
10272 	char *server, *path, *symbuf;
10273 	static char *prefix = "/net/";
10274 	int i, size, npaths;
10275 	uint_t len;
10276 
10277 	/* Get the referral */
10278 	if ((fsl = fetch_referral(vp, cr)) == NULL)
10279 		return (NULL);
10280 
10281 	/* Deal with only the first location and first server */
10282 	fs = &fsl->locations_val[0];
10283 	server = utf8_to_str(&fs->server_val[0], &len, NULL);
10284 	if (server == NULL) {
10285 		rfs4_free_fs_locations4(fsl);
10286 		kmem_free(fsl, sizeof (fs_locations4));
10287 		return (NULL);
10288 	}
10289 
10290 	/* Figure out size for "/net/" + host + /path/path/path + NULL */
10291 	size = strlen(prefix) + len;
10292 	for (i = 0; i < fs->rootpath.pathname4_len; i++)
10293 		size += fs->rootpath.pathname4_val[i].utf8string_len + 1;
10294 
10295 	/* Allocate the symlink buffer and fill it */
10296 	symbuf = kmem_zalloc(size, KM_SLEEP);
10297 	(void) strcat(symbuf, prefix);
10298 	(void) strcat(symbuf, server);
10299 	kmem_free(server, len);
10300 
10301 	npaths = 0;
10302 	for (i = 0; i < fs->rootpath.pathname4_len; i++) {
10303 		path = utf8_to_str(&fs->rootpath.pathname4_val[i], &len, NULL);
10304 		if (path == NULL)
10305 			continue;
10306 		(void) strcat(symbuf, "/");
10307 		(void) strcat(symbuf, path);
10308 		npaths++;
10309 		kmem_free(path, len);
10310 	}
10311 
10312 	rfs4_free_fs_locations4(fsl);
10313 	kmem_free(fsl, sizeof (fs_locations4));
10314 
10315 	if (strsz != NULL)
10316 		*strsz = size;
10317 	return (symbuf);
10318 }
10319 
10320 /*
10321  * Check to see if we have a downrev Solaris client, so that we
10322  * can send it a symlink instead of a referral.
10323  */
10324 int
client_is_downrev(struct svc_req * req)10325 client_is_downrev(struct svc_req *req)
10326 {
10327 	struct sockaddr *ca;
10328 	rfs4_clntip_t *ci;
10329 	bool_t create = FALSE;
10330 	int is_downrev;
10331 
10332 	ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
10333 	ASSERT(ca);
10334 	ci = rfs4_find_clntip(ca, &create);
10335 	if (ci == NULL)
10336 		return (0);
10337 	is_downrev = ci->ri_no_referrals;
10338 	rfs4_dbe_rele(ci->ri_dbe);
10339 	return (is_downrev);
10340 }
10341 
10342 /*
10343  * Do the main work of handling HA-NFSv4 Resource Group failover on
10344  * Sun Cluster.
10345  * We need to detect whether any RG admin paths have been added or removed,
10346  * and adjust resources accordingly.
10347  * Currently we're using a very inefficient algorithm, ~ 2 * O(n**2). In
10348  * order to scale, the list and array of paths need to be held in more
10349  * suitable data structures.
10350  */
10351 static void
hanfsv4_failover(nfs4_srv_t * nsrv4)10352 hanfsv4_failover(nfs4_srv_t *nsrv4)
10353 {
10354 	int i, start_grace, numadded_paths = 0;
10355 	char **added_paths = NULL;
10356 	rfs4_dss_path_t *dss_path;
10357 
10358 	/*
10359 	 * Note: currently, dss_pathlist cannot be NULL, since
10360 	 * it will always include an entry for NFS4_DSS_VAR_DIR. If we
10361 	 * make the latter dynamically specified too, the following will
10362 	 * need to be adjusted.
10363 	 */
10364 
10365 	/*
10366 	 * First, look for removed paths: RGs that have been failed-over
10367 	 * away from this node.
10368 	 * Walk the "currently-serving" dss_pathlist and, for each
10369 	 * path, check if it is on the "passed-in" rfs4_dss_newpaths array
10370 	 * from nfsd. If not, that RG path has been removed.
10371 	 *
10372 	 * Note that nfsd has sorted rfs4_dss_newpaths for us, and removed
10373 	 * any duplicates.
10374 	 */
10375 	dss_path = nsrv4->dss_pathlist;
10376 	do {
10377 		int found = 0;
10378 		char *path = dss_path->path;
10379 
10380 		/* used only for non-HA so may not be removed */
10381 		if (strcmp(path, NFS4_DSS_VAR_DIR) == 0) {
10382 			dss_path = dss_path->next;
10383 			continue;
10384 		}
10385 
10386 		for (i = 0; i < rfs4_dss_numnewpaths; i++) {
10387 			int cmpret;
10388 			char *newpath = rfs4_dss_newpaths[i];
10389 
10390 			/*
10391 			 * Since nfsd has sorted rfs4_dss_newpaths for us,
10392 			 * once the return from strcmp is negative we know
10393 			 * we've passed the point where "path" should be,
10394 			 * and can stop searching: "path" has been removed.
10395 			 */
10396 			cmpret = strcmp(path, newpath);
10397 			if (cmpret < 0)
10398 				break;
10399 			if (cmpret == 0) {
10400 				found = 1;
10401 				break;
10402 			}
10403 		}
10404 
10405 		if (found == 0) {
10406 			unsigned index = dss_path->index;
10407 			rfs4_servinst_t *sip = dss_path->sip;
10408 			rfs4_dss_path_t *path_next = dss_path->next;
10409 
10410 			/*
10411 			 * This path has been removed.
10412 			 * We must clear out the servinst reference to
10413 			 * it, since it's now owned by another
10414 			 * node: we should not attempt to touch it.
10415 			 */
10416 			ASSERT(dss_path == sip->dss_paths[index]);
10417 			sip->dss_paths[index] = NULL;
10418 
10419 			/* remove from "currently-serving" list, and destroy */
10420 			remque(dss_path);
10421 			/* allow for NUL */
10422 			kmem_free(dss_path->path, strlen(dss_path->path) + 1);
10423 			kmem_free(dss_path, sizeof (rfs4_dss_path_t));
10424 
10425 			dss_path = path_next;
10426 		} else {
10427 			/* path was found; not removed */
10428 			dss_path = dss_path->next;
10429 		}
10430 	} while (dss_path != nsrv4->dss_pathlist);
10431 
10432 	/*
10433 	 * Now, look for added paths: RGs that have been failed-over
10434 	 * to this node.
10435 	 * Walk the "passed-in" rfs4_dss_newpaths array from nfsd and,
10436 	 * for each path, check if it is on the "currently-serving"
10437 	 * dss_pathlist. If not, that RG path has been added.
10438 	 *
10439 	 * Note: we don't do duplicate detection here; nfsd does that for us.
10440 	 *
10441 	 * Note: numadded_paths <= rfs4_dss_numnewpaths, which gives us
10442 	 * an upper bound for the size needed for added_paths[numadded_paths].
10443 	 */
10444 
10445 	/* probably more space than we need, but guaranteed to be enough */
10446 	if (rfs4_dss_numnewpaths > 0) {
10447 		size_t sz = rfs4_dss_numnewpaths * sizeof (char *);
10448 		added_paths = kmem_zalloc(sz, KM_SLEEP);
10449 	}
10450 
10451 	/* walk the "passed-in" rfs4_dss_newpaths array from nfsd */
10452 	for (i = 0; i < rfs4_dss_numnewpaths; i++) {
10453 		int found = 0;
10454 		char *newpath = rfs4_dss_newpaths[i];
10455 
10456 		dss_path = nsrv4->dss_pathlist;
10457 		do {
10458 			char *path = dss_path->path;
10459 
10460 			/* used only for non-HA */
10461 			if (strcmp(path, NFS4_DSS_VAR_DIR) == 0) {
10462 				dss_path = dss_path->next;
10463 				continue;
10464 			}
10465 
10466 			if (strncmp(path, newpath, strlen(path)) == 0) {
10467 				found = 1;
10468 				break;
10469 			}
10470 
10471 			dss_path = dss_path->next;
10472 		} while (dss_path != nsrv4->dss_pathlist);
10473 
10474 		if (found == 0) {
10475 			added_paths[numadded_paths] = newpath;
10476 			numadded_paths++;
10477 		}
10478 	}
10479 
10480 	/* did we find any added paths? */
10481 	if (numadded_paths > 0) {
10482 
10483 		/* create a new server instance, and start its grace period */
10484 		start_grace = 1;
10485 		/* CSTYLED */
10486 		rfs4_servinst_create(nsrv4, start_grace, numadded_paths, added_paths);
10487 
10488 		/* read in the stable storage state from these paths */
10489 		rfs4_dss_readstate(nsrv4, numadded_paths, added_paths);
10490 
10491 		/*
10492 		 * Multiple failovers during a grace period will cause
10493 		 * clients of the same resource group to be partitioned
10494 		 * into different server instances, with different
10495 		 * grace periods.  Since clients of the same resource
10496 		 * group must be subject to the same grace period,
10497 		 * we need to reset all currently active grace periods.
10498 		 */
10499 		rfs4_grace_reset_all(nsrv4);
10500 	}
10501 
10502 	if (rfs4_dss_numnewpaths > 0)
10503 		kmem_free(added_paths, rfs4_dss_numnewpaths * sizeof (char *));
10504 }
10505