xref: /titanic_51/usr/src/uts/common/fs/nfs/nfs_server.c (revision 337c098dc32f2c4eaa1d487e47e93352dc954adf)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 /*
27  *	Copyright (c) 1983,1984,1985,1986,1987,1988,1989  AT&T.
28  *	All rights reserved.
29  *	Use is subject to license terms.
30  */
31 
32 #pragma ident	"%Z%%M%	%I%	%E% SMI"
33 
34 #include <sys/param.h>
35 #include <sys/types.h>
36 #include <sys/systm.h>
37 #include <sys/cred.h>
38 #include <sys/proc.h>
39 #include <sys/user.h>
40 #include <sys/buf.h>
41 #include <sys/vfs.h>
42 #include <sys/vnode.h>
43 #include <sys/pathname.h>
44 #include <sys/uio.h>
45 #include <sys/file.h>
46 #include <sys/stat.h>
47 #include <sys/errno.h>
48 #include <sys/socket.h>
49 #include <sys/sysmacros.h>
50 #include <sys/siginfo.h>
51 #include <sys/tiuser.h>
52 #include <sys/statvfs.h>
53 #include <sys/stream.h>
54 #include <sys/strsubr.h>
55 #include <sys/stropts.h>
56 #include <sys/timod.h>
57 #include <sys/t_kuser.h>
58 #include <sys/kmem.h>
59 #include <sys/kstat.h>
60 #include <sys/dirent.h>
61 #include <sys/cmn_err.h>
62 #include <sys/debug.h>
63 #include <sys/unistd.h>
64 #include <sys/vtrace.h>
65 #include <sys/mode.h>
66 #include <sys/acl.h>
67 #include <sys/sdt.h>
68 
69 #include <rpc/types.h>
70 #include <rpc/auth.h>
71 #include <rpc/auth_unix.h>
72 #include <rpc/auth_des.h>
73 #include <rpc/svc.h>
74 #include <rpc/xdr.h>
75 
76 #include <nfs/nfs.h>
77 #include <nfs/export.h>
78 #include <nfs/nfssys.h>
79 #include <nfs/nfs_clnt.h>
80 #include <nfs/nfs_acl.h>
81 #include <nfs/nfs_log.h>
82 #include <nfs/lm.h>
83 #include <nfs/nfs_dispatch.h>
84 #include <nfs/nfs4_drc.h>
85 
86 #include <sys/modctl.h>
87 #include <sys/cladm.h>
88 #include <sys/clconf.h>
89 
90 #define	MAXHOST 32
91 const char *kinet_ntop6(uchar_t *, char *, size_t);
92 
93 /*
94  * Module linkage information.
95  */
96 
97 static struct modlmisc modlmisc = {
98 	&mod_miscops, "NFS server module"
99 };
100 
101 static struct modlinkage modlinkage = {
102 	MODREV_1, (void *)&modlmisc, NULL
103 };
104 
105 char _depends_on[] = "misc/klmmod";
106 
107 /* for testing RG failover code path on non-Cluster system */
108 int hanfsv4_force = 0;
109 
110 int
111 _init(void)
112 {
113 	int status;
114 
115 	if ((status = nfs_srvinit()) != 0) {
116 		cmn_err(CE_WARN, "_init: nfs_srvinit failed");
117 		return (status);
118 	}
119 
120 	status = mod_install((struct modlinkage *)&modlinkage);
121 	if (status != 0) {
122 		/*
123 		 * Could not load module, cleanup previous
124 		 * initialization work.
125 		 */
126 		nfs_srvfini();
127 	}
128 
129 	/*
130 	 * Initialise some placeholders for nfssys() calls. These have
131 	 * to be declared by the nfs module, since that handles nfssys()
132 	 * calls - also used by NFS clients - but are provided by this
133 	 * nfssrv module. These also then serve as confirmation to the
134 	 * relevant code in nfs that nfssrv has been loaded, as they're
135 	 * initially NULL.
136 	 */
137 	nfs_srv_quiesce_func = nfs_srv_quiesce_all;
138 	nfs_srv_dss_func = rfs4_dss_setpaths;
139 
140 	/* setup DSS paths here; must be done before initial server startup */
141 	rfs4_dss_paths = rfs4_dss_oldpaths = NULL;
142 
143 	return (status);
144 }
145 
146 int
147 _fini()
148 {
149 	return (EBUSY);
150 }
151 
152 int
153 _info(struct modinfo *modinfop)
154 {
155 	return (mod_info(&modlinkage, modinfop));
156 }
157 
158 /*
159  * PUBLICFH_CHECK() checks if the dispatch routine supports
160  * RPC_PUBLICFH_OK, if the filesystem is exported public, and if the
161  * incoming request is using the public filehandle. The check duplicates
162  * the exportmatch() call done in checkexport(), and we should consider
163  * modifying those routines to avoid the duplication. For now, we optimize
164  * by calling exportmatch() only after checking that the dispatch routine
165  * supports RPC_PUBLICFH_OK, and if the filesystem is explicitly exported
166  * public (i.e., not the placeholder).
167  */
168 #define	PUBLICFH_CHECK(disp, exi, fsid, xfid) \
169 		((disp->dis_flags & RPC_PUBLICFH_OK) && \
170 		((exi->exi_export.ex_flags & EX_PUBLIC) || \
171 		(exi == exi_public && exportmatch(exi_root, \
172 		fsid, xfid))))
173 
174 static void	nfs_srv_shutdown_all(int);
175 static void	rfs4_server_start(int);
176 static void	nullfree(void);
177 static void	rfs_dispatch(struct svc_req *, SVCXPRT *);
178 static void	acl_dispatch(struct svc_req *, SVCXPRT *);
179 static void	common_dispatch(struct svc_req *, SVCXPRT *,
180 		rpcvers_t, rpcvers_t, char *,
181 		struct rpc_disptable *);
182 static void	hanfsv4_failover(void);
183 static	int	checkauth(struct exportinfo *, struct svc_req *, cred_t *, int,
184 			bool_t);
185 static char	*client_name(struct svc_req *req);
186 static char	*client_addr(struct svc_req *req, char *buf);
187 extern	int	sec_svc_getcred(struct svc_req *, cred_t *cr, char **, int *);
188 extern	bool_t	sec_svc_inrootlist(int, caddr_t, int, caddr_t *);
189 
190 #define	NFSLOG_COPY_NETBUF(exi, xprt, nb)	{		\
191 	(nb)->maxlen = (xprt)->xp_rtaddr.maxlen;		\
192 	(nb)->len = (xprt)->xp_rtaddr.len;			\
193 	(nb)->buf = kmem_alloc((nb)->len, KM_SLEEP);		\
194 	bcopy((xprt)->xp_rtaddr.buf, (nb)->buf, (nb)->len);	\
195 	}
196 
197 /*
198  * Public Filehandle common nfs routines
199  */
200 static int	MCLpath(char **);
201 static void	URLparse(char *);
202 
203 /*
204  * NFS callout table.
205  * This table is used by svc_getreq() to dispatch a request with
206  * a given prog/vers pair to an appropriate service provider
207  * dispatch routine.
208  *
209  * NOTE: ordering is relied upon below when resetting the version min/max
210  * for NFS_PROGRAM.  Careful, if this is ever changed.
211  */
212 static SVC_CALLOUT __nfs_sc_clts[] = {
213 	{ NFS_PROGRAM,	   NFS_VERSMIN,	    NFS_VERSMAX,	rfs_dispatch },
214 	{ NFS_ACL_PROGRAM, NFS_ACL_VERSMIN, NFS_ACL_VERSMAX,	acl_dispatch }
215 };
216 
217 static SVC_CALLOUT_TABLE nfs_sct_clts = {
218 	sizeof (__nfs_sc_clts) / sizeof (__nfs_sc_clts[0]), FALSE,
219 	__nfs_sc_clts
220 };
221 
222 static SVC_CALLOUT __nfs_sc_cots[] = {
223 	{ NFS_PROGRAM,	   NFS_VERSMIN,	    NFS_VERSMAX,	rfs_dispatch },
224 	{ NFS_ACL_PROGRAM, NFS_ACL_VERSMIN, NFS_ACL_VERSMAX,	acl_dispatch }
225 };
226 
227 static SVC_CALLOUT_TABLE nfs_sct_cots = {
228 	sizeof (__nfs_sc_cots) / sizeof (__nfs_sc_cots[0]), FALSE, __nfs_sc_cots
229 };
230 
231 static SVC_CALLOUT __nfs_sc_rdma[] = {
232 	{ NFS_PROGRAM,	   NFS_VERSMIN,	    NFS_VERSMAX,	rfs_dispatch },
233 	{ NFS_ACL_PROGRAM, NFS_ACL_VERSMIN, NFS_ACL_VERSMAX,	acl_dispatch }
234 };
235 
236 static SVC_CALLOUT_TABLE nfs_sct_rdma = {
237 	sizeof (__nfs_sc_rdma) / sizeof (__nfs_sc_rdma[0]), FALSE, __nfs_sc_rdma
238 };
239 rpcvers_t nfs_versmin = NFS_VERSMIN_DEFAULT;
240 rpcvers_t nfs_versmax = NFS_VERSMAX_DEFAULT;
241 
242 /*
243  * Used to track the state of the server so that initialization
244  * can be done properly.
245  */
246 typedef enum {
247 	NFS_SERVER_STOPPED,	/* server state destroyed */
248 	NFS_SERVER_STOPPING,	/* server state being destroyed */
249 	NFS_SERVER_RUNNING,
250 	NFS_SERVER_QUIESCED,	/* server state preserved */
251 	NFS_SERVER_OFFLINE	/* server pool offline */
252 } nfs_server_running_t;
253 
254 static nfs_server_running_t nfs_server_upordown;
255 static kmutex_t nfs_server_upordown_lock;
256 static	kcondvar_t nfs_server_upordown_cv;
257 
258 /*
259  * DSS: distributed stable storage
260  * lists of all DSS paths: current, and before last warmstart
261  */
262 nvlist_t *rfs4_dss_paths, *rfs4_dss_oldpaths;
263 
264 int rfs4_dispatch(struct rpcdisp *, struct svc_req *, SVCXPRT *, char *);
265 
266 /*
267  * RDMA wait variables.
268  */
269 static kcondvar_t rdma_wait_cv;
270 static kmutex_t rdma_wait_mutex;
271 
272 /*
273  * Will be called at the point the server pool is being unregistered
274  * from the pool list. From that point onwards, the pool is waiting
275  * to be drained and as such the server state is stale and pertains
276  * to the old instantiation of the NFS server pool.
277  */
278 void
279 nfs_srv_offline(void)
280 {
281 	mutex_enter(&nfs_server_upordown_lock);
282 	if (nfs_server_upordown == NFS_SERVER_RUNNING) {
283 		nfs_server_upordown = NFS_SERVER_OFFLINE;
284 	}
285 	mutex_exit(&nfs_server_upordown_lock);
286 }
287 
288 /*
289  * Will be called at the point the server pool is being destroyed so
290  * all transports have been closed and no service threads are in
291  * existence.
292  *
293  * If we quiesce the server, we're shutting it down without destroying the
294  * server state. This allows it to warm start subsequently.
295  */
296 void
297 nfs_srv_stop_all(void)
298 {
299 	int quiesce = 0;
300 	nfs_srv_shutdown_all(quiesce);
301 }
302 
303 /*
304  * This alternative shutdown routine can be requested via nfssys()
305  */
306 void
307 nfs_srv_quiesce_all(void)
308 {
309 	int quiesce = 1;
310 	nfs_srv_shutdown_all(quiesce);
311 }
312 
313 static void
314 nfs_srv_shutdown_all(int quiesce) {
315 	mutex_enter(&nfs_server_upordown_lock);
316 	if (quiesce) {
317 		if (nfs_server_upordown == NFS_SERVER_RUNNING ||
318 			nfs_server_upordown == NFS_SERVER_OFFLINE) {
319 			nfs_server_upordown = NFS_SERVER_QUIESCED;
320 			cv_signal(&nfs_server_upordown_cv);
321 
322 			/* reset DSS state, for subsequent warm restart */
323 			rfs4_dss_numnewpaths = 0;
324 			rfs4_dss_newpaths = NULL;
325 
326 			cmn_err(CE_NOTE, "nfs_server: server is now quiesced; "
327 			    "NFSv4 state has been preserved");
328 		}
329 	} else {
330 		if (nfs_server_upordown == NFS_SERVER_OFFLINE) {
331 			nfs_server_upordown = NFS_SERVER_STOPPING;
332 			mutex_exit(&nfs_server_upordown_lock);
333 			rfs4_state_fini();
334 			rfs4_fini_drc(nfs4_drc);
335 			mutex_enter(&nfs_server_upordown_lock);
336 			nfs_server_upordown = NFS_SERVER_STOPPED;
337 			cv_signal(&nfs_server_upordown_cv);
338 		}
339 	}
340 	mutex_exit(&nfs_server_upordown_lock);
341 }
342 
343 static int
344 nfs_srv_set_sc_versions(struct file *fp, SVC_CALLOUT_TABLE **sctpp,
345 			rpcvers_t versmin, rpcvers_t versmax)
346 {
347 	struct strioctl strioc;
348 	struct T_info_ack tinfo;
349 	int		error, retval;
350 
351 	/*
352 	 * Find out what type of transport this is.
353 	 */
354 	strioc.ic_cmd = TI_GETINFO;
355 	strioc.ic_timout = -1;
356 	strioc.ic_len = sizeof (tinfo);
357 	strioc.ic_dp = (char *)&tinfo;
358 	tinfo.PRIM_type = T_INFO_REQ;
359 
360 	error = strioctl(fp->f_vnode, I_STR, (intptr_t)&strioc, 0, K_TO_K,
361 	    CRED(), &retval);
362 	if (error || retval)
363 		return (error);
364 
365 	/*
366 	 * Based on our query of the transport type...
367 	 *
368 	 * Reset the min/max versions based on the caller's request
369 	 * NOTE: This assumes that NFS_PROGRAM is first in the array!!
370 	 * And the second entry is the NFS_ACL_PROGRAM.
371 	 */
372 	switch (tinfo.SERV_type) {
373 	case T_CLTS:
374 		if (versmax == NFS_V4)
375 			return (EINVAL);
376 		__nfs_sc_clts[0].sc_versmin = versmin;
377 		__nfs_sc_clts[0].sc_versmax = versmax;
378 		__nfs_sc_clts[1].sc_versmin = versmin;
379 		__nfs_sc_clts[1].sc_versmax = versmax;
380 		*sctpp = &nfs_sct_clts;
381 		break;
382 	case T_COTS:
383 	case T_COTS_ORD:
384 		__nfs_sc_cots[0].sc_versmin = versmin;
385 		__nfs_sc_cots[0].sc_versmax = versmax;
386 		/* For the NFS_ACL program, check the max version */
387 		if (versmax > NFS_ACL_VERSMAX)
388 			versmax = NFS_ACL_VERSMAX;
389 		__nfs_sc_cots[1].sc_versmin = versmin;
390 		__nfs_sc_cots[1].sc_versmax = versmax;
391 		*sctpp = &nfs_sct_cots;
392 		break;
393 	default:
394 		error = EINVAL;
395 	}
396 
397 	return (error);
398 }
399 
400 /*
401  * NFS Server system call.
402  * Does all of the work of running a NFS server.
403  * uap->fd is the fd of an open transport provider
404  */
405 int
406 nfs_svc(struct nfs_svc_args *arg, model_t model)
407 {
408 	file_t *fp;
409 	SVCMASTERXPRT *xprt;
410 	int error;
411 	int readsize;
412 	char buf[KNC_STRSIZE];
413 	size_t len;
414 	STRUCT_HANDLE(nfs_svc_args, uap);
415 	struct netbuf addrmask;
416 	SVC_CALLOUT_TABLE *sctp = NULL;
417 
418 #ifdef lint
419 	model = model;		/* STRUCT macros don't always refer to it */
420 #endif
421 
422 	STRUCT_SET_HANDLE(uap, model, arg);
423 
424 	/* Check privileges in nfssys() */
425 
426 	if ((fp = getf(STRUCT_FGET(uap, fd))) == NULL)
427 		return (EBADF);
428 
429 	/*
430 	 * Set read buffer size to rsize
431 	 * and add room for RPC headers.
432 	 */
433 	readsize = nfs3tsize() + (RPC_MAXDATASIZE - NFS_MAXDATA);
434 	if (readsize < RPC_MAXDATASIZE)
435 		readsize = RPC_MAXDATASIZE;
436 
437 	error = copyinstr((const char *)STRUCT_FGETP(uap, netid), buf,
438 	    KNC_STRSIZE, &len);
439 	if (error) {
440 		releasef(STRUCT_FGET(uap, fd));
441 		return (error);
442 	}
443 
444 	addrmask.len = STRUCT_FGET(uap, addrmask.len);
445 	addrmask.maxlen = STRUCT_FGET(uap, addrmask.maxlen);
446 	addrmask.buf = kmem_alloc(addrmask.maxlen, KM_SLEEP);
447 	error = copyin(STRUCT_FGETP(uap, addrmask.buf), addrmask.buf,
448 	    addrmask.len);
449 	if (error) {
450 		releasef(STRUCT_FGET(uap, fd));
451 		kmem_free(addrmask.buf, addrmask.maxlen);
452 		return (error);
453 	}
454 
455 	nfs_versmin = STRUCT_FGET(uap, versmin);
456 	nfs_versmax = STRUCT_FGET(uap, versmax);
457 
458 	/* Double check the vers min/max ranges */
459 	if ((nfs_versmin > nfs_versmax) ||
460 		(nfs_versmin < NFS_VERSMIN) ||
461 		(nfs_versmax > NFS_VERSMAX)) {
462 		nfs_versmin = NFS_VERSMIN_DEFAULT;
463 		nfs_versmax = NFS_VERSMAX_DEFAULT;
464 	}
465 
466 	if (error =
467 	    nfs_srv_set_sc_versions(fp, &sctp, nfs_versmin, nfs_versmax)) {
468 		releasef(STRUCT_FGET(uap, fd));
469 		kmem_free(addrmask.buf, addrmask.maxlen);
470 		return (error);
471 	}
472 
473 	/* Initialize nfsv4 server */
474 	if (nfs_versmax == (rpcvers_t)NFS_V4)
475 		rfs4_server_start(STRUCT_FGET(uap, delegation));
476 
477 	/* Create a transport handle. */
478 	error = svc_tli_kcreate(fp, readsize, buf, &addrmask, &xprt,
479 				sctp, NULL, NFS_SVCPOOL_ID, TRUE);
480 
481 	if (error)
482 		kmem_free(addrmask.buf, addrmask.maxlen);
483 
484 	releasef(STRUCT_FGET(uap, fd));
485 
486 	/* HA-NFSv4: save the cluster nodeid */
487 	if (cluster_bootflags & CLUSTER_BOOTED)
488 		lm_global_nlmid = clconf_get_nodeid();
489 
490 	return (error);
491 }
492 
493 static void
494 rfs4_server_start(int nfs4_srv_delegation)
495 {
496 	/*
497 	 * Determine if the server has previously been "started" and
498 	 * if not, do the per instance initialization
499 	 */
500 	mutex_enter(&nfs_server_upordown_lock);
501 
502 	if (nfs_server_upordown != NFS_SERVER_RUNNING) {
503 		/* Do we need to stop and wait on the previous server? */
504 		while (nfs_server_upordown == NFS_SERVER_STOPPING ||
505 			nfs_server_upordown == NFS_SERVER_OFFLINE)
506 			cv_wait(&nfs_server_upordown_cv,
507 			    &nfs_server_upordown_lock);
508 
509 		if (nfs_server_upordown != NFS_SERVER_RUNNING) {
510 			(void) svc_pool_control(NFS_SVCPOOL_ID,
511 			    SVCPSET_UNREGISTER_PROC, (void *)&nfs_srv_offline);
512 			(void) svc_pool_control(NFS_SVCPOOL_ID,
513 			    SVCPSET_SHUTDOWN_PROC, (void *)&nfs_srv_stop_all);
514 
515 			/* is this an nfsd warm start? */
516 			if (nfs_server_upordown == NFS_SERVER_QUIESCED) {
517 				cmn_err(CE_NOTE, "nfs_server: "
518 				    "server was previously quiesced; "
519 				    "existing NFSv4 state will be re-used");
520 
521 				/*
522 				 * HA-NFSv4: this is also the signal
523 				 * that a Resource Group failover has
524 				 * occurred.
525 				 */
526 				if (cluster_bootflags & CLUSTER_BOOTED ||
527 				    hanfsv4_force)
528 					hanfsv4_failover();
529 			} else {
530 				/* cold start */
531 				rfs4_state_init();
532 				nfs4_drc = rfs4_init_drc(nfs4_drc_max,
533 							nfs4_drc_hash,
534 							nfs4_drc_lifetime);
535 			}
536 
537 			/*
538 			 * Check to see if delegation is to be
539 			 * enabled at the server
540 			 */
541 			if (nfs4_srv_delegation != FALSE)
542 				rfs4_set_deleg_policy(SRV_NORMAL_DELEGATE);
543 
544 			nfs_server_upordown = NFS_SERVER_RUNNING;
545 		}
546 		cv_signal(&nfs_server_upordown_cv);
547 	}
548 	mutex_exit(&nfs_server_upordown_lock);
549 }
550 
551 /*
552  * If RDMA device available,
553  * start RDMA listener.
554  */
555 int
556 rdma_start(struct rdma_svc_args *rsa)
557 {
558 	int error;
559 	rdma_xprt_group_t started_rdma_xprts;
560 
561 	/* Double check the vers min/max ranges */
562 	if ((rsa->nfs_versmin > rsa->nfs_versmax) ||
563 		(rsa->nfs_versmin < NFS_VERSMIN) ||
564 		(rsa->nfs_versmax > NFS_VERSMAX)) {
565 		rsa->nfs_versmin = NFS_VERSMIN_DEFAULT;
566 		rsa->nfs_versmax = NFS_VERSMAX_DEFAULT;
567 	}
568 	nfs_versmin = rsa->nfs_versmin;
569 	nfs_versmax = rsa->nfs_versmax;
570 
571 	/* Set the versions in the callout table */
572 	__nfs_sc_rdma[0].sc_versmin = rsa->nfs_versmin;
573 	__nfs_sc_rdma[0].sc_versmax = rsa->nfs_versmax;
574 	/* For the NFS_ACL program, check the max version */
575 	__nfs_sc_rdma[1].sc_versmin = rsa->nfs_versmin;
576 	if (rsa->nfs_versmax > NFS_ACL_VERSMAX)
577 		__nfs_sc_rdma[1].sc_versmax = NFS_ACL_VERSMAX;
578 	else
579 		__nfs_sc_rdma[1].sc_versmax = rsa->nfs_versmax;
580 
581 	/* Initialize nfsv4 server */
582 	if (rsa->nfs_versmax == (rpcvers_t)NFS_V4)
583 		rfs4_server_start(rsa->delegation);
584 
585 	started_rdma_xprts.rtg_count = 0;
586 	started_rdma_xprts.rtg_listhead = NULL;
587 	started_rdma_xprts.rtg_poolid = rsa->poolid;
588 	error = svc_rdma_kcreate(rsa->netid, &nfs_sct_rdma, rsa->poolid,
589 	    &started_rdma_xprts);
590 
591 	if (error == 0) {
592 		mutex_enter(&rdma_wait_mutex);
593 		if (!cv_wait_sig(&rdma_wait_cv, &rdma_wait_mutex)) {
594 			rdma_stop(started_rdma_xprts);
595 		}
596 		mutex_exit(&rdma_wait_mutex);
597 	}
598 
599 	return (error);
600 }
601 
602 /* ARGSUSED */
603 void
604 rpc_null(caddr_t *argp, caddr_t *resp)
605 {
606 }
607 
608 /* ARGSUSED */
609 static void
610 rfs_error(caddr_t *argp, caddr_t *resp)
611 {
612 	/* return (EOPNOTSUPP); */
613 }
614 
615 static void
616 nullfree(void)
617 {
618 }
619 
620 static char *rfscallnames_v2[] = {
621 	"RFS2_NULL",
622 	"RFS2_GETATTR",
623 	"RFS2_SETATTR",
624 	"RFS2_ROOT",
625 	"RFS2_LOOKUP",
626 	"RFS2_READLINK",
627 	"RFS2_READ",
628 	"RFS2_WRITECACHE",
629 	"RFS2_WRITE",
630 	"RFS2_CREATE",
631 	"RFS2_REMOVE",
632 	"RFS2_RENAME",
633 	"RFS2_LINK",
634 	"RFS2_SYMLINK",
635 	"RFS2_MKDIR",
636 	"RFS2_RMDIR",
637 	"RFS2_READDIR",
638 	"RFS2_STATFS"
639 };
640 
641 static struct rpcdisp rfsdisptab_v2[] = {
642 	/*
643 	 * NFS VERSION 2
644 	 */
645 
646 	/* RFS_NULL = 0 */
647 	{rpc_null,
648 	    xdr_void, NULL_xdrproc_t, 0,
649 	    xdr_void, NULL_xdrproc_t, 0,
650 	    nullfree, RPC_IDEMPOTENT,
651 	    0},
652 
653 	/* RFS_GETATTR = 1 */
654 	{rfs_getattr,
655 	    xdr_fhandle, xdr_fastfhandle, sizeof (fhandle_t),
656 	    xdr_attrstat, xdr_fastattrstat, sizeof (struct nfsattrstat),
657 	    nullfree, RPC_IDEMPOTENT|RPC_ALLOWANON|RPC_MAPRESP,
658 	    rfs_getattr_getfh},
659 
660 	/* RFS_SETATTR = 2 */
661 	{rfs_setattr,
662 	    xdr_saargs, NULL_xdrproc_t, sizeof (struct nfssaargs),
663 	    xdr_attrstat, xdr_fastattrstat, sizeof (struct nfsattrstat),
664 	    nullfree, RPC_MAPRESP,
665 	    rfs_setattr_getfh},
666 
667 	/* RFS_ROOT = 3 *** NO LONGER SUPPORTED *** */
668 	{rfs_error,
669 	    xdr_void, NULL_xdrproc_t, 0,
670 	    xdr_void, NULL_xdrproc_t, 0,
671 	    nullfree, RPC_IDEMPOTENT,
672 	    0},
673 
674 	/* RFS_LOOKUP = 4 */
675 	{rfs_lookup,
676 	    xdr_diropargs, NULL_xdrproc_t, sizeof (struct nfsdiropargs),
677 	    xdr_diropres, xdr_fastdiropres, sizeof (struct nfsdiropres),
678 	    nullfree, RPC_IDEMPOTENT|RPC_MAPRESP|RPC_PUBLICFH_OK,
679 	    rfs_lookup_getfh},
680 
681 	/* RFS_READLINK = 5 */
682 	{rfs_readlink,
683 	    xdr_fhandle, xdr_fastfhandle, sizeof (fhandle_t),
684 	    xdr_rdlnres, NULL_xdrproc_t, sizeof (struct nfsrdlnres),
685 	    rfs_rlfree, RPC_IDEMPOTENT,
686 	    rfs_readlink_getfh},
687 
688 	/* RFS_READ = 6 */
689 	{rfs_read,
690 	    xdr_readargs, NULL_xdrproc_t, sizeof (struct nfsreadargs),
691 	    xdr_rdresult, NULL_xdrproc_t, sizeof (struct nfsrdresult),
692 	    rfs_rdfree, RPC_IDEMPOTENT,
693 	    rfs_read_getfh},
694 
695 	/* RFS_WRITECACHE = 7 *** NO LONGER SUPPORTED *** */
696 	{rfs_error,
697 	    xdr_void, NULL_xdrproc_t, 0,
698 	    xdr_void, NULL_xdrproc_t, 0,
699 	    nullfree, RPC_IDEMPOTENT,
700 	    0},
701 
702 	/* RFS_WRITE = 8 */
703 	{rfs_write,
704 	    xdr_writeargs, NULL_xdrproc_t, sizeof (struct nfswriteargs),
705 	    xdr_attrstat, xdr_fastattrstat, sizeof (struct nfsattrstat),
706 	    nullfree, RPC_MAPRESP,
707 	    rfs_write_getfh},
708 
709 	/* RFS_CREATE = 9 */
710 	{rfs_create,
711 	    xdr_creatargs, NULL_xdrproc_t, sizeof (struct nfscreatargs),
712 	    xdr_diropres, xdr_fastdiropres, sizeof (struct nfsdiropres),
713 	    nullfree, RPC_MAPRESP,
714 	    rfs_create_getfh},
715 
716 	/* RFS_REMOVE = 10 */
717 	{rfs_remove,
718 	    xdr_diropargs, NULL_xdrproc_t, sizeof (struct nfsdiropargs),
719 #ifdef _LITTLE_ENDIAN
720 	    xdr_enum, xdr_fastenum, sizeof (enum nfsstat),
721 #else
722 	    xdr_enum, NULL_xdrproc_t, sizeof (enum nfsstat),
723 #endif
724 	    nullfree, RPC_MAPRESP,
725 	    rfs_remove_getfh},
726 
727 	/* RFS_RENAME = 11 */
728 	{rfs_rename,
729 	    xdr_rnmargs, NULL_xdrproc_t, sizeof (struct nfsrnmargs),
730 #ifdef _LITTLE_ENDIAN
731 	    xdr_enum, xdr_fastenum, sizeof (enum nfsstat),
732 #else
733 	    xdr_enum, NULL_xdrproc_t, sizeof (enum nfsstat),
734 #endif
735 	    nullfree, RPC_MAPRESP,
736 	    rfs_rename_getfh},
737 
738 	/* RFS_LINK = 12 */
739 	{rfs_link,
740 	    xdr_linkargs, NULL_xdrproc_t, sizeof (struct nfslinkargs),
741 #ifdef _LITTLE_ENDIAN
742 	    xdr_enum, xdr_fastenum, sizeof (enum nfsstat),
743 #else
744 	    xdr_enum, NULL_xdrproc_t, sizeof (enum nfsstat),
745 #endif
746 	    nullfree, RPC_MAPRESP,
747 	    rfs_link_getfh},
748 
749 	/* RFS_SYMLINK = 13 */
750 	{rfs_symlink,
751 	    xdr_slargs, NULL_xdrproc_t, sizeof (struct nfsslargs),
752 #ifdef _LITTLE_ENDIAN
753 	    xdr_enum, xdr_fastenum, sizeof (enum nfsstat),
754 #else
755 	    xdr_enum, NULL_xdrproc_t, sizeof (enum nfsstat),
756 #endif
757 	    nullfree, RPC_MAPRESP,
758 	    rfs_symlink_getfh},
759 
760 	/* RFS_MKDIR = 14 */
761 	{rfs_mkdir,
762 	    xdr_creatargs, NULL_xdrproc_t, sizeof (struct nfscreatargs),
763 	    xdr_diropres, xdr_fastdiropres, sizeof (struct nfsdiropres),
764 	    nullfree, RPC_MAPRESP,
765 	    rfs_mkdir_getfh},
766 
767 	/* RFS_RMDIR = 15 */
768 	{rfs_rmdir,
769 	    xdr_diropargs, NULL_xdrproc_t, sizeof (struct nfsdiropargs),
770 #ifdef _LITTLE_ENDIAN
771 	    xdr_enum, xdr_fastenum, sizeof (enum nfsstat),
772 #else
773 	    xdr_enum, NULL_xdrproc_t, sizeof (enum nfsstat),
774 #endif
775 	    nullfree, RPC_MAPRESP,
776 	    rfs_rmdir_getfh},
777 
778 	/* RFS_READDIR = 16 */
779 	{rfs_readdir,
780 	    xdr_rddirargs, NULL_xdrproc_t, sizeof (struct nfsrddirargs),
781 	    xdr_putrddirres, NULL_xdrproc_t, sizeof (struct nfsrddirres),
782 	    rfs_rddirfree, RPC_IDEMPOTENT,
783 	    rfs_readdir_getfh},
784 
785 	/* RFS_STATFS = 17 */
786 	{rfs_statfs,
787 	    xdr_fhandle, xdr_fastfhandle, sizeof (fhandle_t),
788 	    xdr_statfs, xdr_faststatfs, sizeof (struct nfsstatfs),
789 	    nullfree, RPC_IDEMPOTENT|RPC_ALLOWANON|RPC_MAPRESP,
790 	    rfs_statfs_getfh},
791 };
792 
793 static char *rfscallnames_v3[] = {
794 	"RFS3_NULL",
795 	"RFS3_GETATTR",
796 	"RFS3_SETATTR",
797 	"RFS3_LOOKUP",
798 	"RFS3_ACCESS",
799 	"RFS3_READLINK",
800 	"RFS3_READ",
801 	"RFS3_WRITE",
802 	"RFS3_CREATE",
803 	"RFS3_MKDIR",
804 	"RFS3_SYMLINK",
805 	"RFS3_MKNOD",
806 	"RFS3_REMOVE",
807 	"RFS3_RMDIR",
808 	"RFS3_RENAME",
809 	"RFS3_LINK",
810 	"RFS3_READDIR",
811 	"RFS3_READDIRPLUS",
812 	"RFS3_FSSTAT",
813 	"RFS3_FSINFO",
814 	"RFS3_PATHCONF",
815 	"RFS3_COMMIT"
816 };
817 
818 static struct rpcdisp rfsdisptab_v3[] = {
819 	/*
820 	 * NFS VERSION 3
821 	 */
822 
823 	/* RFS_NULL = 0 */
824 	{rpc_null,
825 	    xdr_void, NULL_xdrproc_t, 0,
826 	    xdr_void, NULL_xdrproc_t, 0,
827 	    nullfree, RPC_IDEMPOTENT,
828 	    0},
829 
830 	/* RFS3_GETATTR = 1 */
831 	{rfs3_getattr,
832 	    xdr_nfs_fh3_server, NULL_xdrproc_t, sizeof (GETATTR3args),
833 	    xdr_GETATTR3res, NULL_xdrproc_t, sizeof (GETATTR3res),
834 	    nullfree, (RPC_IDEMPOTENT | RPC_ALLOWANON),
835 	    rfs3_getattr_getfh},
836 
837 	/* RFS3_SETATTR = 2 */
838 	{rfs3_setattr,
839 	    xdr_SETATTR3args, NULL_xdrproc_t, sizeof (SETATTR3args),
840 	    xdr_SETATTR3res, NULL_xdrproc_t, sizeof (SETATTR3res),
841 	    nullfree, 0,
842 	    rfs3_setattr_getfh},
843 
844 	/* RFS3_LOOKUP = 3 */
845 	{rfs3_lookup,
846 	    xdr_diropargs3, NULL_xdrproc_t, sizeof (LOOKUP3args),
847 	    xdr_LOOKUP3res, NULL_xdrproc_t, sizeof (LOOKUP3res),
848 	    nullfree, (RPC_IDEMPOTENT | RPC_PUBLICFH_OK),
849 	    rfs3_lookup_getfh},
850 
851 	/* RFS3_ACCESS = 4 */
852 	{rfs3_access,
853 	    xdr_ACCESS3args, NULL_xdrproc_t, sizeof (ACCESS3args),
854 	    xdr_ACCESS3res, NULL_xdrproc_t, sizeof (ACCESS3res),
855 	    nullfree, RPC_IDEMPOTENT,
856 	    rfs3_access_getfh},
857 
858 	/* RFS3_READLINK = 5 */
859 	{rfs3_readlink,
860 	    xdr_nfs_fh3_server, NULL_xdrproc_t, sizeof (READLINK3args),
861 	    xdr_READLINK3res, NULL_xdrproc_t, sizeof (READLINK3res),
862 	    rfs3_readlink_free, RPC_IDEMPOTENT,
863 	    rfs3_readlink_getfh},
864 
865 	/* RFS3_READ = 6 */
866 	{rfs3_read,
867 	    xdr_READ3args, NULL_xdrproc_t, sizeof (READ3args),
868 	    xdr_READ3res, NULL_xdrproc_t, sizeof (READ3res),
869 	    rfs3_read_free, RPC_IDEMPOTENT,
870 	    rfs3_read_getfh},
871 
872 	/* RFS3_WRITE = 7 */
873 	{rfs3_write,
874 	    xdr_WRITE3args, NULL_xdrproc_t, sizeof (WRITE3args),
875 	    xdr_WRITE3res, NULL_xdrproc_t, sizeof (WRITE3res),
876 	    nullfree, 0,
877 	    rfs3_write_getfh},
878 
879 	/* RFS3_CREATE = 8 */
880 	{rfs3_create,
881 	    xdr_CREATE3args, NULL_xdrproc_t, sizeof (CREATE3args),
882 	    xdr_CREATE3res, NULL_xdrproc_t, sizeof (CREATE3res),
883 	    nullfree, 0,
884 	    rfs3_create_getfh},
885 
886 	/* RFS3_MKDIR = 9 */
887 	{rfs3_mkdir,
888 	    xdr_MKDIR3args, NULL_xdrproc_t, sizeof (MKDIR3args),
889 	    xdr_MKDIR3res, NULL_xdrproc_t, sizeof (MKDIR3res),
890 	    nullfree, 0,
891 	    rfs3_mkdir_getfh},
892 
893 	/* RFS3_SYMLINK = 10 */
894 	{rfs3_symlink,
895 	    xdr_SYMLINK3args, NULL_xdrproc_t, sizeof (SYMLINK3args),
896 	    xdr_SYMLINK3res, NULL_xdrproc_t, sizeof (SYMLINK3res),
897 	    nullfree, 0,
898 	    rfs3_symlink_getfh},
899 
900 	/* RFS3_MKNOD = 11 */
901 	{rfs3_mknod,
902 	    xdr_MKNOD3args, NULL_xdrproc_t, sizeof (MKNOD3args),
903 	    xdr_MKNOD3res, NULL_xdrproc_t, sizeof (MKNOD3res),
904 	    nullfree, 0,
905 	    rfs3_mknod_getfh},
906 
907 	/* RFS3_REMOVE = 12 */
908 	{rfs3_remove,
909 	    xdr_diropargs3, NULL_xdrproc_t, sizeof (REMOVE3args),
910 	    xdr_REMOVE3res, NULL_xdrproc_t, sizeof (REMOVE3res),
911 	    nullfree, 0,
912 	    rfs3_remove_getfh},
913 
914 	/* RFS3_RMDIR = 13 */
915 	{rfs3_rmdir,
916 	    xdr_diropargs3, NULL_xdrproc_t, sizeof (RMDIR3args),
917 	    xdr_RMDIR3res, NULL_xdrproc_t, sizeof (RMDIR3res),
918 	    nullfree, 0,
919 	    rfs3_rmdir_getfh},
920 
921 	/* RFS3_RENAME = 14 */
922 	{rfs3_rename,
923 	    xdr_RENAME3args, NULL_xdrproc_t, sizeof (RENAME3args),
924 	    xdr_RENAME3res, NULL_xdrproc_t, sizeof (RENAME3res),
925 	    nullfree, 0,
926 	    rfs3_rename_getfh},
927 
928 	/* RFS3_LINK = 15 */
929 	{rfs3_link,
930 	    xdr_LINK3args, NULL_xdrproc_t, sizeof (LINK3args),
931 	    xdr_LINK3res, NULL_xdrproc_t, sizeof (LINK3res),
932 	    nullfree, 0,
933 	    rfs3_link_getfh},
934 
935 	/* RFS3_READDIR = 16 */
936 	{rfs3_readdir,
937 	    xdr_READDIR3args, NULL_xdrproc_t, sizeof (READDIR3args),
938 	    xdr_READDIR3res, NULL_xdrproc_t, sizeof (READDIR3res),
939 	    rfs3_readdir_free, RPC_IDEMPOTENT,
940 	    rfs3_readdir_getfh},
941 
942 	/* RFS3_READDIRPLUS = 17 */
943 	{rfs3_readdirplus,
944 	    xdr_READDIRPLUS3args, NULL_xdrproc_t, sizeof (READDIRPLUS3args),
945 	    xdr_READDIRPLUS3res, NULL_xdrproc_t, sizeof (READDIRPLUS3res),
946 	    rfs3_readdirplus_free, RPC_AVOIDWORK,
947 	    rfs3_readdirplus_getfh},
948 
949 	/* RFS3_FSSTAT = 18 */
950 	{rfs3_fsstat,
951 	    xdr_nfs_fh3_server, NULL_xdrproc_t, sizeof (FSSTAT3args),
952 	    xdr_FSSTAT3res, NULL_xdrproc_t, sizeof (FSSTAT3res),
953 	    nullfree, RPC_IDEMPOTENT,
954 	    rfs3_fsstat_getfh},
955 
956 	/* RFS3_FSINFO = 19 */
957 	{rfs3_fsinfo,
958 	    xdr_nfs_fh3_server, NULL_xdrproc_t, sizeof (FSINFO3args),
959 	    xdr_FSINFO3res, NULL_xdrproc_t, sizeof (FSINFO3res),
960 	    nullfree, RPC_IDEMPOTENT|RPC_ALLOWANON,
961 	    rfs3_fsinfo_getfh},
962 
963 	/* RFS3_PATHCONF = 20 */
964 	{rfs3_pathconf,
965 	    xdr_nfs_fh3_server, NULL_xdrproc_t, sizeof (PATHCONF3args),
966 	    xdr_PATHCONF3res, NULL_xdrproc_t, sizeof (PATHCONF3res),
967 	    nullfree, RPC_IDEMPOTENT,
968 	    rfs3_pathconf_getfh},
969 
970 	/* RFS3_COMMIT = 21 */
971 	{rfs3_commit,
972 	    xdr_COMMIT3args, NULL_xdrproc_t, sizeof (COMMIT3args),
973 	    xdr_COMMIT3res, NULL_xdrproc_t, sizeof (COMMIT3res),
974 	    nullfree, RPC_IDEMPOTENT,
975 	    rfs3_commit_getfh},
976 };
977 
978 static char *rfscallnames_v4[] = {
979 	"RFS4_NULL",
980 	"RFS4_COMPOUND",
981 	"RFS4_NULL",
982 	"RFS4_NULL",
983 	"RFS4_NULL",
984 	"RFS4_NULL",
985 	"RFS4_NULL",
986 	"RFS4_NULL",
987 	"RFS4_CREATE"
988 };
989 
990 static struct rpcdisp rfsdisptab_v4[] = {
991 	/*
992 	 * NFS VERSION 4
993 	 */
994 
995 	/* RFS_NULL = 0 */
996 	{rpc_null,
997 	    xdr_void, NULL_xdrproc_t, 0,
998 	    xdr_void, NULL_xdrproc_t, 0,
999 	    nullfree, RPC_IDEMPOTENT, 0},
1000 
1001 	/* RFS4_compound = 1 */
1002 	{rfs4_compound,
1003 	    xdr_COMPOUND4args_srv, NULL_xdrproc_t, sizeof (COMPOUND4args),
1004 	    xdr_COMPOUND4res_srv, NULL_xdrproc_t, sizeof (COMPOUND4res),
1005 	    rfs4_compound_free, 0, 0},
1006 };
1007 
1008 union rfs_args {
1009 	/*
1010 	 * NFS VERSION 2
1011 	 */
1012 
1013 	/* RFS_NULL = 0 */
1014 
1015 	/* RFS_GETATTR = 1 */
1016 	fhandle_t nfs2_getattr_args;
1017 
1018 	/* RFS_SETATTR = 2 */
1019 	struct nfssaargs nfs2_setattr_args;
1020 
1021 	/* RFS_ROOT = 3 *** NO LONGER SUPPORTED *** */
1022 
1023 	/* RFS_LOOKUP = 4 */
1024 	struct nfsdiropargs nfs2_lookup_args;
1025 
1026 	/* RFS_READLINK = 5 */
1027 	fhandle_t nfs2_readlink_args;
1028 
1029 	/* RFS_READ = 6 */
1030 	struct nfsreadargs nfs2_read_args;
1031 
1032 	/* RFS_WRITECACHE = 7 *** NO LONGER SUPPORTED *** */
1033 
1034 	/* RFS_WRITE = 8 */
1035 	struct nfswriteargs nfs2_write_args;
1036 
1037 	/* RFS_CREATE = 9 */
1038 	struct nfscreatargs nfs2_create_args;
1039 
1040 	/* RFS_REMOVE = 10 */
1041 	struct nfsdiropargs nfs2_remove_args;
1042 
1043 	/* RFS_RENAME = 11 */
1044 	struct nfsrnmargs nfs2_rename_args;
1045 
1046 	/* RFS_LINK = 12 */
1047 	struct nfslinkargs nfs2_link_args;
1048 
1049 	/* RFS_SYMLINK = 13 */
1050 	struct nfsslargs nfs2_symlink_args;
1051 
1052 	/* RFS_MKDIR = 14 */
1053 	struct nfscreatargs nfs2_mkdir_args;
1054 
1055 	/* RFS_RMDIR = 15 */
1056 	struct nfsdiropargs nfs2_rmdir_args;
1057 
1058 	/* RFS_READDIR = 16 */
1059 	struct nfsrddirargs nfs2_readdir_args;
1060 
1061 	/* RFS_STATFS = 17 */
1062 	fhandle_t nfs2_statfs_args;
1063 
1064 	/*
1065 	 * NFS VERSION 3
1066 	 */
1067 
1068 	/* RFS_NULL = 0 */
1069 
1070 	/* RFS3_GETATTR = 1 */
1071 	GETATTR3args nfs3_getattr_args;
1072 
1073 	/* RFS3_SETATTR = 2 */
1074 	SETATTR3args nfs3_setattr_args;
1075 
1076 	/* RFS3_LOOKUP = 3 */
1077 	LOOKUP3args nfs3_lookup_args;
1078 
1079 	/* RFS3_ACCESS = 4 */
1080 	ACCESS3args nfs3_access_args;
1081 
1082 	/* RFS3_READLINK = 5 */
1083 	READLINK3args nfs3_readlink_args;
1084 
1085 	/* RFS3_READ = 6 */
1086 	READ3args nfs3_read_args;
1087 
1088 	/* RFS3_WRITE = 7 */
1089 	WRITE3args nfs3_write_args;
1090 
1091 	/* RFS3_CREATE = 8 */
1092 	CREATE3args nfs3_create_args;
1093 
1094 	/* RFS3_MKDIR = 9 */
1095 	MKDIR3args nfs3_mkdir_args;
1096 
1097 	/* RFS3_SYMLINK = 10 */
1098 	SYMLINK3args nfs3_symlink_args;
1099 
1100 	/* RFS3_MKNOD = 11 */
1101 	MKNOD3args nfs3_mknod_args;
1102 
1103 	/* RFS3_REMOVE = 12 */
1104 	REMOVE3args nfs3_remove_args;
1105 
1106 	/* RFS3_RMDIR = 13 */
1107 	RMDIR3args nfs3_rmdir_args;
1108 
1109 	/* RFS3_RENAME = 14 */
1110 	RENAME3args nfs3_rename_args;
1111 
1112 	/* RFS3_LINK = 15 */
1113 	LINK3args nfs3_link_args;
1114 
1115 	/* RFS3_READDIR = 16 */
1116 	READDIR3args nfs3_readdir_args;
1117 
1118 	/* RFS3_READDIRPLUS = 17 */
1119 	READDIRPLUS3args nfs3_readdirplus_args;
1120 
1121 	/* RFS3_FSSTAT = 18 */
1122 	FSSTAT3args nfs3_fsstat_args;
1123 
1124 	/* RFS3_FSINFO = 19 */
1125 	FSINFO3args nfs3_fsinfo_args;
1126 
1127 	/* RFS3_PATHCONF = 20 */
1128 	PATHCONF3args nfs3_pathconf_args;
1129 
1130 	/* RFS3_COMMIT = 21 */
1131 	COMMIT3args nfs3_commit_args;
1132 
1133 	/*
1134 	 * NFS VERSION 4
1135 	 */
1136 
1137 	/* RFS_NULL = 0 */
1138 
1139 	/* COMPUND = 1 */
1140 	COMPOUND4args nfs4_compound_args;
1141 };
1142 
1143 union rfs_res {
1144 	/*
1145 	 * NFS VERSION 2
1146 	 */
1147 
1148 	/* RFS_NULL = 0 */
1149 
1150 	/* RFS_GETATTR = 1 */
1151 	struct nfsattrstat nfs2_getattr_res;
1152 
1153 	/* RFS_SETATTR = 2 */
1154 	struct nfsattrstat nfs2_setattr_res;
1155 
1156 	/* RFS_ROOT = 3 *** NO LONGER SUPPORTED *** */
1157 
1158 	/* RFS_LOOKUP = 4 */
1159 	struct nfsdiropres nfs2_lookup_res;
1160 
1161 	/* RFS_READLINK = 5 */
1162 	struct nfsrdlnres nfs2_readlink_res;
1163 
1164 	/* RFS_READ = 6 */
1165 	struct nfsrdresult nfs2_read_res;
1166 
1167 	/* RFS_WRITECACHE = 7 *** NO LONGER SUPPORTED *** */
1168 
1169 	/* RFS_WRITE = 8 */
1170 	struct nfsattrstat nfs2_write_res;
1171 
1172 	/* RFS_CREATE = 9 */
1173 	struct nfsdiropres nfs2_create_res;
1174 
1175 	/* RFS_REMOVE = 10 */
1176 	enum nfsstat nfs2_remove_res;
1177 
1178 	/* RFS_RENAME = 11 */
1179 	enum nfsstat nfs2_rename_res;
1180 
1181 	/* RFS_LINK = 12 */
1182 	enum nfsstat nfs2_link_res;
1183 
1184 	/* RFS_SYMLINK = 13 */
1185 	enum nfsstat nfs2_symlink_res;
1186 
1187 	/* RFS_MKDIR = 14 */
1188 	struct nfsdiropres nfs2_mkdir_res;
1189 
1190 	/* RFS_RMDIR = 15 */
1191 	enum nfsstat nfs2_rmdir_res;
1192 
1193 	/* RFS_READDIR = 16 */
1194 	struct nfsrddirres nfs2_readdir_res;
1195 
1196 	/* RFS_STATFS = 17 */
1197 	struct nfsstatfs nfs2_statfs_res;
1198 
1199 	/*
1200 	 * NFS VERSION 3
1201 	 */
1202 
1203 	/* RFS_NULL = 0 */
1204 
1205 	/* RFS3_GETATTR = 1 */
1206 	GETATTR3res nfs3_getattr_res;
1207 
1208 	/* RFS3_SETATTR = 2 */
1209 	SETATTR3res nfs3_setattr_res;
1210 
1211 	/* RFS3_LOOKUP = 3 */
1212 	LOOKUP3res nfs3_lookup_res;
1213 
1214 	/* RFS3_ACCESS = 4 */
1215 	ACCESS3res nfs3_access_res;
1216 
1217 	/* RFS3_READLINK = 5 */
1218 	READLINK3res nfs3_readlink_res;
1219 
1220 	/* RFS3_READ = 6 */
1221 	READ3res nfs3_read_res;
1222 
1223 	/* RFS3_WRITE = 7 */
1224 	WRITE3res nfs3_write_res;
1225 
1226 	/* RFS3_CREATE = 8 */
1227 	CREATE3res nfs3_create_res;
1228 
1229 	/* RFS3_MKDIR = 9 */
1230 	MKDIR3res nfs3_mkdir_res;
1231 
1232 	/* RFS3_SYMLINK = 10 */
1233 	SYMLINK3res nfs3_symlink_res;
1234 
1235 	/* RFS3_MKNOD = 11 */
1236 	MKNOD3res nfs3_mknod_res;
1237 
1238 	/* RFS3_REMOVE = 12 */
1239 	REMOVE3res nfs3_remove_res;
1240 
1241 	/* RFS3_RMDIR = 13 */
1242 	RMDIR3res nfs3_rmdir_res;
1243 
1244 	/* RFS3_RENAME = 14 */
1245 	RENAME3res nfs3_rename_res;
1246 
1247 	/* RFS3_LINK = 15 */
1248 	LINK3res nfs3_link_res;
1249 
1250 	/* RFS3_READDIR = 16 */
1251 	READDIR3res nfs3_readdir_res;
1252 
1253 	/* RFS3_READDIRPLUS = 17 */
1254 	READDIRPLUS3res nfs3_readdirplus_res;
1255 
1256 	/* RFS3_FSSTAT = 18 */
1257 	FSSTAT3res nfs3_fsstat_res;
1258 
1259 	/* RFS3_FSINFO = 19 */
1260 	FSINFO3res nfs3_fsinfo_res;
1261 
1262 	/* RFS3_PATHCONF = 20 */
1263 	PATHCONF3res nfs3_pathconf_res;
1264 
1265 	/* RFS3_COMMIT = 21 */
1266 	COMMIT3res nfs3_commit_res;
1267 
1268 	/*
1269 	 * NFS VERSION 4
1270 	 */
1271 
1272 	/* RFS_NULL = 0 */
1273 
1274 	/* RFS4_COMPOUND = 1 */
1275 	COMPOUND4res nfs4_compound_res;
1276 
1277 };
1278 
1279 static struct rpc_disptable rfs_disptable[] = {
1280 	{sizeof (rfsdisptab_v2) / sizeof (rfsdisptab_v2[0]),
1281 	    rfscallnames_v2,
1282 	    &rfsproccnt_v2_ptr, rfsdisptab_v2},
1283 	{sizeof (rfsdisptab_v3) / sizeof (rfsdisptab_v3[0]),
1284 	    rfscallnames_v3,
1285 	    &rfsproccnt_v3_ptr, rfsdisptab_v3},
1286 	{sizeof (rfsdisptab_v4) / sizeof (rfsdisptab_v4[0]),
1287 	    rfscallnames_v4,
1288 	    &rfsproccnt_v4_ptr, rfsdisptab_v4},
1289 };
1290 
1291 /*
1292  * If nfs_portmon is set, then clients are required to use privileged
1293  * ports (ports < IPPORT_RESERVED) in order to get NFS services.
1294  *
1295  * N.B.: this attempt to carry forward the already ill-conceived notion
1296  * of privileged ports for TCP/UDP is really quite ineffectual.  Not only
1297  * is it transport-dependent, it's laughably easy to spoof.  If you're
1298  * really interested in security, you must start with secure RPC instead.
1299  */
1300 static int nfs_portmon = 0;
1301 
1302 #ifdef DEBUG
1303 static int cred_hits = 0;
1304 static int cred_misses = 0;
1305 #endif
1306 
1307 
1308 #ifdef DEBUG
1309 /*
1310  * Debug code to allow disabling of rfs_dispatch() use of
1311  * fastxdrargs() and fastxdrres() calls for testing purposes.
1312  */
1313 static int rfs_no_fast_xdrargs = 0;
1314 static int rfs_no_fast_xdrres = 0;
1315 #endif
1316 
1317 union acl_args {
1318 	/*
1319 	 * ACL VERSION 2
1320 	 */
1321 
1322 	/* ACL2_NULL = 0 */
1323 
1324 	/* ACL2_GETACL = 1 */
1325 	GETACL2args acl2_getacl_args;
1326 
1327 	/* ACL2_SETACL = 2 */
1328 	SETACL2args acl2_setacl_args;
1329 
1330 	/* ACL2_GETATTR = 3 */
1331 	GETATTR2args acl2_getattr_args;
1332 
1333 	/* ACL2_ACCESS = 4 */
1334 	ACCESS2args acl2_access_args;
1335 
1336 	/* ACL2_GETXATTRDIR = 5 */
1337 	GETXATTRDIR2args acl2_getxattrdir_args;
1338 
1339 	/*
1340 	 * ACL VERSION 3
1341 	 */
1342 
1343 	/* ACL3_NULL = 0 */
1344 
1345 	/* ACL3_GETACL = 1 */
1346 	GETACL3args acl3_getacl_args;
1347 
1348 	/* ACL3_SETACL = 2 */
1349 	SETACL3args acl3_setacl;
1350 
1351 	/* ACL3_GETXATTRDIR = 3 */
1352 	GETXATTRDIR3args acl3_getxattrdir_args;
1353 
1354 };
1355 
1356 union acl_res {
1357 	/*
1358 	 * ACL VERSION 2
1359 	 */
1360 
1361 	/* ACL2_NULL = 0 */
1362 
1363 	/* ACL2_GETACL = 1 */
1364 	GETACL2res acl2_getacl_res;
1365 
1366 	/* ACL2_SETACL = 2 */
1367 	SETACL2res acl2_setacl_res;
1368 
1369 	/* ACL2_GETATTR = 3 */
1370 	GETATTR2res acl2_getattr_res;
1371 
1372 	/* ACL2_ACCESS = 4 */
1373 	ACCESS2res acl2_access_res;
1374 
1375 	/* ACL2_GETXATTRDIR = 5 */
1376 	GETXATTRDIR2args acl2_getxattrdir_res;
1377 
1378 	/*
1379 	 * ACL VERSION 3
1380 	 */
1381 
1382 	/* ACL3_NULL = 0 */
1383 
1384 	/* ACL3_GETACL = 1 */
1385 	GETACL3res acl3_getacl_res;
1386 
1387 	/* ACL3_SETACL = 2 */
1388 	SETACL3res acl3_setacl_res;
1389 
1390 	/* ACL3_GETXATTRDIR = 3 */
1391 	GETXATTRDIR3res acl3_getxattrdir_res;
1392 
1393 };
1394 
1395 static bool_t
1396 auth_tooweak(struct svc_req *req, char *res)
1397 {
1398 
1399 	if (req->rq_vers == NFS_VERSION && req->rq_proc == RFS_LOOKUP) {
1400 		struct nfsdiropres *dr = (struct nfsdiropres *)res;
1401 		if (dr->dr_status == WNFSERR_CLNT_FLAVOR)
1402 			return (TRUE);
1403 	} else if (req->rq_vers == NFS_V3 && req->rq_proc == NFSPROC3_LOOKUP) {
1404 		LOOKUP3res *resp = (LOOKUP3res *)res;
1405 		if (resp->status == WNFSERR_CLNT_FLAVOR)
1406 			return (TRUE);
1407 	}
1408 	return (FALSE);
1409 }
1410 
1411 
1412 static void
1413 common_dispatch(struct svc_req *req, SVCXPRT *xprt, rpcvers_t min_vers,
1414 		rpcvers_t max_vers, char *pgmname,
1415 		struct rpc_disptable *disptable)
1416 {
1417 	int which;
1418 	rpcvers_t vers;
1419 	char *args;
1420 	union {
1421 			union rfs_args ra;
1422 			union acl_args aa;
1423 		} args_buf;
1424 	char *res;
1425 	union {
1426 			union rfs_res rr;
1427 			union acl_res ar;
1428 		} res_buf;
1429 	struct rpcdisp *disp = NULL;
1430 	int dis_flags = 0;
1431 	cred_t *cr;
1432 	int error = 0;
1433 	int anon_ok;
1434 	struct exportinfo *exi = NULL;
1435 	unsigned int nfslog_rec_id;
1436 	int dupstat;
1437 	struct dupreq *dr;
1438 	int authres;
1439 	bool_t publicfh_ok = FALSE;
1440 	enum_t auth_flavor;
1441 	bool_t dupcached = FALSE;
1442 	struct netbuf	nb;
1443 	bool_t logging_enabled = FALSE;
1444 	struct exportinfo *nfslog_exi = NULL;
1445 	char **procnames;
1446 	char cbuf[INET6_ADDRSTRLEN];	/* to hold both IPv4 and IPv6 addr */
1447 
1448 	vers = req->rq_vers;
1449 
1450 	if (vers < min_vers || vers > max_vers) {
1451 		svcerr_progvers(req->rq_xprt, min_vers, max_vers);
1452 		error++;
1453 		cmn_err(CE_NOTE, "%s: bad version number %u", pgmname, vers);
1454 		goto done;
1455 	}
1456 	vers -= min_vers;
1457 
1458 	which = req->rq_proc;
1459 	if (which < 0 || which >= disptable[(int)vers].dis_nprocs) {
1460 		svcerr_noproc(req->rq_xprt);
1461 		error++;
1462 		goto done;
1463 	}
1464 
1465 	(*(disptable[(int)vers].dis_proccntp))[which].value.ui64++;
1466 
1467 	disp = &disptable[(int)vers].dis_table[which];
1468 	procnames = disptable[(int)vers].dis_procnames;
1469 
1470 	auth_flavor = req->rq_cred.oa_flavor;
1471 
1472 	/*
1473 	 * Deserialize into the args struct.
1474 	 */
1475 	args = (char *)&args_buf;
1476 
1477 #ifdef DEBUG
1478 	if (rfs_no_fast_xdrargs || (auth_flavor == RPCSEC_GSS) ||
1479 	    disp->dis_fastxdrargs == NULL_xdrproc_t ||
1480 	    !SVC_GETARGS(xprt, disp->dis_fastxdrargs, (char *)&args))
1481 #else
1482 	if ((auth_flavor == RPCSEC_GSS) ||
1483 	    disp->dis_fastxdrargs == NULL_xdrproc_t ||
1484 	    !SVC_GETARGS(xprt, disp->dis_fastxdrargs, (char *)&args))
1485 #endif
1486 	{
1487 		bzero(args, disp->dis_argsz);
1488 		if (!SVC_GETARGS(xprt, disp->dis_xdrargs, args)) {
1489 			svcerr_decode(xprt);
1490 			error++;
1491 			cmn_err(CE_NOTE,
1492 			    "Failed to decode arguments for %s version %u "
1493 			    "procedure %s client %s%s",
1494 			    pgmname, vers + min_vers, procnames[which],
1495 			    client_name(req), client_addr(req, cbuf));
1496 			goto done;
1497 		}
1498 	}
1499 
1500 	/*
1501 	 * If Version 4 use that specific dispatch function.
1502 	 */
1503 	if (req->rq_vers == 4) {
1504 		error += rfs4_dispatch(disp, req, xprt, args);
1505 		goto done;
1506 	}
1507 
1508 	dis_flags = disp->dis_flags;
1509 
1510 	/*
1511 	 * Find export information and check authentication,
1512 	 * setting the credential if everything is ok.
1513 	 */
1514 	if (disp->dis_getfh != NULL) {
1515 		void *fh;
1516 		fsid_t *fsid;
1517 		fid_t *fid, *xfid;
1518 		fhandle_t *fh2;
1519 		nfs_fh3 *fh3;
1520 
1521 		fh = (*disp->dis_getfh)(args);
1522 		switch (req->rq_vers) {
1523 		case NFS_VERSION:
1524 			fh2 = (fhandle_t *)fh;
1525 			fsid = &fh2->fh_fsid;
1526 			fid = (fid_t *)&fh2->fh_len;
1527 			xfid = (fid_t *)&fh2->fh_xlen;
1528 			break;
1529 		case NFS_V3:
1530 			fh3 = (nfs_fh3 *)fh;
1531 			fsid = &fh3->fh3_fsid;
1532 			fid = FH3TOFIDP(fh3);
1533 			xfid = FH3TOXFIDP(fh3);
1534 			break;
1535 		}
1536 
1537 		/*
1538 		 * Fix for bug 1038302 - corbin
1539 		 * There is a problem here if anonymous access is
1540 		 * disallowed.  If the current request is part of the
1541 		 * client's mount process for the requested filesystem,
1542 		 * then it will carry root (uid 0) credentials on it, and
1543 		 * will be denied by checkauth if that client does not
1544 		 * have explicit root=0 permission.  This will cause the
1545 		 * client's mount operation to fail.  As a work-around,
1546 		 * we check here to see if the request is a getattr or
1547 		 * statfs operation on the exported vnode itself, and
1548 		 * pass a flag to checkauth with the result of this test.
1549 		 *
1550 		 * The filehandle refers to the mountpoint itself if
1551 		 * the fh_data and fh_xdata portions of the filehandle
1552 		 * are equal.
1553 		 *
1554 		 * Added anon_ok argument to checkauth().
1555 		 */
1556 
1557 		if ((dis_flags & RPC_ALLOWANON) && EQFID(fid, xfid))
1558 			anon_ok = 1;
1559 		else
1560 			anon_ok = 0;
1561 
1562 		cr = xprt->xp_cred;
1563 		ASSERT(cr != NULL);
1564 #ifdef DEBUG
1565 		if (crgetref(cr) != 1) {
1566 			crfree(cr);
1567 			cr = crget();
1568 			xprt->xp_cred = cr;
1569 			cred_misses++;
1570 		} else
1571 			cred_hits++;
1572 #else
1573 		if (crgetref(cr) != 1) {
1574 			crfree(cr);
1575 			cr = crget();
1576 			xprt->xp_cred = cr;
1577 		}
1578 #endif
1579 
1580 		exi = checkexport(fsid, xfid);
1581 
1582 		if (exi != NULL) {
1583 			publicfh_ok = PUBLICFH_CHECK(disp, exi, fsid, xfid);
1584 
1585 			/*
1586 			 * Don't allow non-V4 clients access
1587 			 * to pseudo exports
1588 			 */
1589 			if (PSEUDO(exi)) {
1590 				svcerr_weakauth(xprt);
1591 				error++;
1592 				goto done;
1593 			}
1594 
1595 			authres = checkauth(exi, req, cr, anon_ok, publicfh_ok);
1596 			/*
1597 			 * authres >  0: authentication OK - proceed
1598 			 * authres == 0: authentication weak - return error
1599 			 * authres <  0: authentication timeout - drop
1600 			 */
1601 			if (authres <= 0) {
1602 				if (authres == 0) {
1603 					svcerr_weakauth(xprt);
1604 					error++;
1605 				}
1606 				goto done;
1607 			}
1608 		}
1609 	} else
1610 		cr = NULL;
1611 
1612 	if ((dis_flags & RPC_MAPRESP) && (auth_flavor != RPCSEC_GSS)) {
1613 		res = (char *)SVC_GETRES(xprt, disp->dis_ressz);
1614 		if (res == NULL)
1615 			res = (char *)&res_buf;
1616 	} else
1617 		res = (char *)&res_buf;
1618 
1619 	if (!(dis_flags & RPC_IDEMPOTENT)) {
1620 		dupstat = SVC_DUP_EXT(xprt, req, res, disp->dis_ressz, &dr,
1621 				&dupcached);
1622 
1623 		switch (dupstat) {
1624 		case DUP_ERROR:
1625 			svcerr_systemerr(xprt);
1626 			error++;
1627 			goto done;
1628 			/* NOTREACHED */
1629 		case DUP_INPROGRESS:
1630 			if (res != (char *)&res_buf)
1631 				SVC_FREERES(xprt);
1632 			error++;
1633 			goto done;
1634 			/* NOTREACHED */
1635 		case DUP_NEW:
1636 		case DUP_DROP:
1637 			curthread->t_flag |= T_DONTPEND;
1638 
1639 			(*disp->dis_proc)(args, res, exi, req, cr);
1640 
1641 			curthread->t_flag &= ~T_DONTPEND;
1642 			if (curthread->t_flag & T_WOULDBLOCK) {
1643 				curthread->t_flag &= ~T_WOULDBLOCK;
1644 				SVC_DUPDONE_EXT(xprt, dr, res, NULL,
1645 					disp->dis_ressz, DUP_DROP);
1646 				if (res != (char *)&res_buf)
1647 					SVC_FREERES(xprt);
1648 				error++;
1649 				goto done;
1650 			}
1651 			if (dis_flags & RPC_AVOIDWORK) {
1652 				SVC_DUPDONE_EXT(xprt, dr, res, NULL,
1653 					disp->dis_ressz, DUP_DROP);
1654 			} else {
1655 				SVC_DUPDONE_EXT(xprt, dr, res,
1656 					disp->dis_resfree == nullfree ? NULL :
1657 					disp->dis_resfree,
1658 					disp->dis_ressz, DUP_DONE);
1659 				dupcached = TRUE;
1660 			}
1661 			break;
1662 		case DUP_DONE:
1663 			break;
1664 		}
1665 
1666 	} else {
1667 		curthread->t_flag |= T_DONTPEND;
1668 
1669 		(*disp->dis_proc)(args, res, exi, req, cr);
1670 
1671 		curthread->t_flag &= ~T_DONTPEND;
1672 		if (curthread->t_flag & T_WOULDBLOCK) {
1673 			curthread->t_flag &= ~T_WOULDBLOCK;
1674 			if (res != (char *)&res_buf)
1675 				SVC_FREERES(xprt);
1676 			error++;
1677 			goto done;
1678 		}
1679 	}
1680 
1681 	if (auth_tooweak(req, res)) {
1682 		svcerr_weakauth(xprt);
1683 		error++;
1684 		goto done;
1685 	}
1686 
1687 	/*
1688 	 * Check to see if logging has been enabled on the server.
1689 	 * If so, then obtain the export info struct to be used for
1690 	 * the later writing of the log record.  This is done for
1691 	 * the case that a lookup is done across a non-logged public
1692 	 * file system.
1693 	 */
1694 	if (nfslog_buffer_list != NULL) {
1695 		nfslog_exi = nfslog_get_exi(exi, req, res, &nfslog_rec_id);
1696 		/*
1697 		 * Is logging enabled?
1698 		 */
1699 		logging_enabled = (nfslog_exi != NULL);
1700 
1701 		/*
1702 		 * Copy the netbuf for logging purposes, before it is
1703 		 * freed by svc_sendreply().
1704 		 */
1705 		if (logging_enabled) {
1706 			NFSLOG_COPY_NETBUF(nfslog_exi, xprt, &nb);
1707 			/*
1708 			 * If RPC_MAPRESP flag set (i.e. in V2 ops) the
1709 			 * res gets copied directly into the mbuf and
1710 			 * may be freed soon after the sendreply. So we
1711 			 * must copy it here to a safe place...
1712 			 */
1713 			if (res != (char *)&res_buf) {
1714 				bcopy(res, (char *)&res_buf, disp->dis_ressz);
1715 			}
1716 		}
1717 	}
1718 
1719 	/*
1720 	 * Serialize and send results struct
1721 	 */
1722 #ifdef DEBUG
1723 	if (rfs_no_fast_xdrres == 0 && res != (char *)&res_buf)
1724 #else
1725 	if (res != (char *)&res_buf)
1726 #endif
1727 	{
1728 		if (!svc_sendreply(xprt, disp->dis_fastxdrres, res)) {
1729 			cmn_err(CE_NOTE, "%s: bad sendreply", pgmname);
1730 			error++;
1731 		}
1732 	} else {
1733 		if (!svc_sendreply(xprt, disp->dis_xdrres, res)) {
1734 			cmn_err(CE_NOTE, "%s: bad sendreply", pgmname);
1735 			error++;
1736 		}
1737 	}
1738 
1739 	/*
1740 	 * Log if needed
1741 	 */
1742 	if (logging_enabled) {
1743 		nfslog_write_record(nfslog_exi, req, args, (char *)&res_buf,
1744 			cr, &nb, nfslog_rec_id, NFSLOG_ONE_BUFFER);
1745 		exi_rele(nfslog_exi);
1746 		kmem_free((&nb)->buf, (&nb)->len);
1747 	}
1748 
1749 	/*
1750 	 * Free results struct. With the addition of NFS V4 we can
1751 	 * have non-idempotent procedures with functions.
1752 	 */
1753 	if (disp->dis_resfree != nullfree && dupcached == FALSE) {
1754 		(*disp->dis_resfree)(res);
1755 	}
1756 
1757 done:
1758 	/*
1759 	 * Free arguments struct
1760 	 */
1761 	if (disp) {
1762 		if (!SVC_FREEARGS(xprt, disp->dis_xdrargs, args)) {
1763 			cmn_err(CE_NOTE, "%s: bad freeargs", pgmname);
1764 			error++;
1765 		}
1766 	} else {
1767 		if (!SVC_FREEARGS(xprt, (xdrproc_t)0, (caddr_t)0)) {
1768 			cmn_err(CE_NOTE, "%s: bad freeargs", pgmname);
1769 			error++;
1770 		}
1771 	}
1772 
1773 	if (exi != NULL)
1774 		exi_rele(exi);
1775 
1776 	global_svstat_ptr[req->rq_vers][NFS_BADCALLS].value.ui64 += error;
1777 
1778 	global_svstat_ptr[req->rq_vers][NFS_CALLS].value.ui64++;
1779 }
1780 
1781 static void
1782 rfs_dispatch(struct svc_req *req, SVCXPRT *xprt)
1783 {
1784 	common_dispatch(req, xprt, NFS_VERSMIN, NFS_VERSMAX,
1785 		"NFS", rfs_disptable);
1786 }
1787 
1788 static char *aclcallnames_v2[] = {
1789 	"ACL2_NULL",
1790 	"ACL2_GETACL",
1791 	"ACL2_SETACL",
1792 	"ACL2_GETATTR",
1793 	"ACL2_ACCESS",
1794 	"ACL2_GETXATTRDIR"
1795 };
1796 
1797 static struct rpcdisp acldisptab_v2[] = {
1798 	/*
1799 	 * ACL VERSION 2
1800 	 */
1801 
1802 	/* ACL2_NULL = 0 */
1803 	{rpc_null,
1804 	    xdr_void, NULL_xdrproc_t, 0,
1805 	    xdr_void, NULL_xdrproc_t, 0,
1806 	    nullfree, RPC_IDEMPOTENT,
1807 	    0},
1808 
1809 	/* ACL2_GETACL = 1 */
1810 	{acl2_getacl,
1811 	    xdr_GETACL2args, xdr_fastGETACL2args, sizeof (GETACL2args),
1812 	    xdr_GETACL2res, NULL_xdrproc_t, sizeof (GETACL2res),
1813 	    acl2_getacl_free, RPC_IDEMPOTENT,
1814 	    acl2_getacl_getfh},
1815 
1816 	/* ACL2_SETACL = 2 */
1817 	{acl2_setacl,
1818 	    xdr_SETACL2args, NULL_xdrproc_t, sizeof (SETACL2args),
1819 #ifdef _LITTLE_ENDIAN
1820 	    xdr_SETACL2res, xdr_fastSETACL2res, sizeof (SETACL2res),
1821 #else
1822 	    xdr_SETACL2res, NULL_xdrproc_t, sizeof (SETACL2res),
1823 #endif
1824 	    nullfree, RPC_MAPRESP,
1825 	    acl2_setacl_getfh},
1826 
1827 	/* ACL2_GETATTR = 3 */
1828 	{acl2_getattr,
1829 	    xdr_GETATTR2args, xdr_fastGETATTR2args, sizeof (GETATTR2args),
1830 #ifdef _LITTLE_ENDIAN
1831 	    xdr_GETATTR2res, xdr_fastGETATTR2res, sizeof (GETATTR2res),
1832 #else
1833 	    xdr_GETATTR2res, NULL_xdrproc_t, sizeof (GETATTR2res),
1834 #endif
1835 	    nullfree, RPC_IDEMPOTENT|RPC_ALLOWANON|RPC_MAPRESP,
1836 	    acl2_getattr_getfh},
1837 
1838 	/* ACL2_ACCESS = 4 */
1839 	{acl2_access,
1840 	    xdr_ACCESS2args, xdr_fastACCESS2args, sizeof (ACCESS2args),
1841 #ifdef _LITTLE_ENDIAN
1842 	    xdr_ACCESS2res, xdr_fastACCESS2res, sizeof (ACCESS2res),
1843 #else
1844 	    xdr_ACCESS2res, NULL_xdrproc_t, sizeof (ACCESS2res),
1845 #endif
1846 	    nullfree, RPC_IDEMPOTENT|RPC_MAPRESP,
1847 	    acl2_access_getfh},
1848 
1849 	/* ACL2_GETXATTRDIR = 5 */
1850 	{acl2_getxattrdir,
1851 	    xdr_GETXATTRDIR2args, NULL_xdrproc_t, sizeof (GETXATTRDIR2args),
1852 	    xdr_GETXATTRDIR2res, NULL_xdrproc_t, sizeof (GETXATTRDIR2res),
1853 	    nullfree, RPC_IDEMPOTENT,
1854 	    acl2_getxattrdir_getfh},
1855 };
1856 
1857 static char *aclcallnames_v3[] = {
1858 	"ACL3_NULL",
1859 	"ACL3_GETACL",
1860 	"ACL3_SETACL",
1861 	"ACL3_GETXATTRDIR"
1862 };
1863 
1864 static struct rpcdisp acldisptab_v3[] = {
1865 	/*
1866 	 * ACL VERSION 3
1867 	 */
1868 
1869 	/* ACL3_NULL = 0 */
1870 	{rpc_null,
1871 	    xdr_void, NULL_xdrproc_t, 0,
1872 	    xdr_void, NULL_xdrproc_t, 0,
1873 	    nullfree, RPC_IDEMPOTENT,
1874 	    0},
1875 
1876 	/* ACL3_GETACL = 1 */
1877 	{acl3_getacl,
1878 	    xdr_GETACL3args, NULL_xdrproc_t, sizeof (GETACL3args),
1879 	    xdr_GETACL3res, NULL_xdrproc_t, sizeof (GETACL3res),
1880 	    acl3_getacl_free, RPC_IDEMPOTENT,
1881 	    acl3_getacl_getfh},
1882 
1883 	/* ACL3_SETACL = 2 */
1884 	{acl3_setacl,
1885 	    xdr_SETACL3args, NULL_xdrproc_t, sizeof (SETACL3args),
1886 	    xdr_SETACL3res, NULL_xdrproc_t, sizeof (SETACL3res),
1887 	    nullfree, 0,
1888 	    acl3_setacl_getfh},
1889 
1890 	/* ACL3_GETXATTRDIR = 3 */
1891 	{acl3_getxattrdir,
1892 	    xdr_GETXATTRDIR3args, NULL_xdrproc_t, sizeof (GETXATTRDIR3args),
1893 	    xdr_GETXATTRDIR3res, NULL_xdrproc_t, sizeof (GETXATTRDIR3res),
1894 	    nullfree, RPC_IDEMPOTENT,
1895 	    acl3_getxattrdir_getfh},
1896 };
1897 
1898 static struct rpc_disptable acl_disptable[] = {
1899 	{sizeof (acldisptab_v2) / sizeof (acldisptab_v2[0]),
1900 		aclcallnames_v2,
1901 		&aclproccnt_v2_ptr, acldisptab_v2},
1902 	{sizeof (acldisptab_v3) / sizeof (acldisptab_v3[0]),
1903 		aclcallnames_v3,
1904 		&aclproccnt_v3_ptr, acldisptab_v3},
1905 };
1906 
1907 static void
1908 acl_dispatch(struct svc_req *req, SVCXPRT *xprt)
1909 {
1910 	common_dispatch(req, xprt, NFS_ACL_VERSMIN, NFS_ACL_VERSMAX,
1911 		"ACL", acl_disptable);
1912 }
1913 
1914 int
1915 checkwin(int flavor, int window, struct svc_req *req)
1916 {
1917 	struct authdes_cred *adc;
1918 
1919 	switch (flavor) {
1920 	case AUTH_DES:
1921 		adc = (struct authdes_cred *)req->rq_clntcred;
1922 		if (adc->adc_fullname.window > window)
1923 			return (0);
1924 		break;
1925 
1926 	default:
1927 		break;
1928 	}
1929 	return (1);
1930 }
1931 
1932 
1933 /*
1934  * checkauth() will check the access permission against the export
1935  * information.  Then map root uid/gid to appropriate uid/gid.
1936  *
1937  * This routine is used by NFS V3 and V2 code.
1938  */
1939 static int
1940 checkauth(struct exportinfo *exi, struct svc_req *req, cred_t *cr, int anon_ok,
1941     bool_t publicfh_ok)
1942 {
1943 	int i, nfsflavor, rpcflavor, stat, access;
1944 	struct secinfo *secp;
1945 	caddr_t principal;
1946 	char buf[INET6_ADDRSTRLEN]; /* to hold both IPv4 and IPv6 addr */
1947 	int anon_res = 0;
1948 
1949 	/*
1950 	 *	Check for privileged port number
1951 	 *	N.B.:  this assumes that we know the format of a netbuf.
1952 	 */
1953 	if (nfs_portmon) {
1954 		struct sockaddr *ca;
1955 		ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
1956 
1957 		if (ca == NULL)
1958 			return (0);
1959 
1960 		if ((ca->sa_family == AF_INET &&
1961 		    ntohs(((struct sockaddr_in *)ca)->sin_port) >=
1962 		    IPPORT_RESERVED) ||
1963 		    (ca->sa_family == AF_INET6 &&
1964 		    ntohs(((struct sockaddr_in6 *)ca)->sin6_port) >=
1965 		    IPPORT_RESERVED)) {
1966 			cmn_err(CE_NOTE,
1967 			    "nfs_server: client %s%ssent NFS request from "
1968 			    "unprivileged port",
1969 			    client_name(req), client_addr(req, buf));
1970 			return (0);
1971 		}
1972 	}
1973 
1974 	/*
1975 	 *  return 1 on success or 0 on failure
1976 	 */
1977 	stat = sec_svc_getcred(req, cr, &principal, &nfsflavor);
1978 
1979 	/*
1980 	 * A failed AUTH_UNIX svc_get_cred() implies we couldn't set
1981 	 * the credentials; below we map that to anonymous.
1982 	 */
1983 	if (!stat && nfsflavor != AUTH_UNIX) {
1984 		cmn_err(CE_NOTE,
1985 		    "nfs_server: couldn't get unix cred for %s",
1986 		    client_name(req));
1987 		return (0);
1988 	}
1989 
1990 	/*
1991 	 * Short circuit checkauth() on operations that support the
1992 	 * public filehandle, and if the request for that operation
1993 	 * is using the public filehandle. Note that we must call
1994 	 * sec_svc_getcred() first so that xp_cookie is set to the
1995 	 * right value. Normally xp_cookie is just the RPC flavor
1996 	 * of the the request, but in the case of RPCSEC_GSS it
1997 	 * could be a pseudo flavor.
1998 	 */
1999 	if (publicfh_ok)
2000 		return (1);
2001 
2002 	rpcflavor = req->rq_cred.oa_flavor;
2003 	/*
2004 	 * Check if the auth flavor is valid for this export
2005 	 */
2006 	access = nfsauth_access(exi, req);
2007 	if (access & NFSAUTH_DROP)
2008 		return (-1);	/* drop the request */
2009 
2010 	if (access & NFSAUTH_DENIED) {
2011 		/*
2012 		 * If anon_ok == 1 and we got NFSAUTH_DENIED, it was
2013 		 * probably due to the flavor not matching during the
2014 		 * the mount attempt. So map the flavor to AUTH_NONE
2015 		 * so that the credentials get mapped to the anonymous
2016 		 * user.
2017 		 */
2018 		if (anon_ok == 1)
2019 			rpcflavor = AUTH_NONE;
2020 		else
2021 			return (0);	/* deny access */
2022 
2023 	} else if (access & NFSAUTH_MAPNONE) {
2024 		/*
2025 		 * Access was granted even though the flavor mismatched
2026 		 * because AUTH_NONE was one of the exported flavors.
2027 		 */
2028 		rpcflavor = AUTH_NONE;
2029 
2030 	} else if (access & NFSAUTH_WRONGSEC) {
2031 		/*
2032 		 * NFSAUTH_WRONGSEC is used for NFSv4. Since V2/V3 already
2033 		 * negotiates the security flavor thru MOUNT protocol, the
2034 		 * only way it can get NFSAUTH_WRONGSEC here is from
2035 		 * NFS_ACL for V4. This could be for a limited view, so
2036 		 * map it to RO access. V4 lookup/readdir will take care
2037 		 * of the limited view portion.
2038 		 */
2039 		access |= NFSAUTH_RO;
2040 		access &= ~NFSAUTH_WRONGSEC;
2041 	}
2042 
2043 	switch (rpcflavor) {
2044 	case AUTH_NONE:
2045 		anon_res = crsetugid(cr, exi->exi_export.ex_anon,
2046 				exi->exi_export.ex_anon);
2047 		(void) crsetgroups(cr, 0, NULL);
2048 		break;
2049 
2050 	case AUTH_UNIX:
2051 		if (!stat || crgetuid(cr) == 0 && !(access & NFSAUTH_ROOT)) {
2052 			anon_res = crsetugid(cr, exi->exi_export.ex_anon,
2053 					exi->exi_export.ex_anon);
2054 			(void) crsetgroups(cr, 0, NULL);
2055 		}
2056 		break;
2057 
2058 	case AUTH_DES:
2059 	case RPCSEC_GSS:
2060 		/*
2061 		 *  Find the secinfo structure.  We should be able
2062 		 *  to find it by the time we reach here.
2063 		 *  nfsauth_access() has done the checking.
2064 		 */
2065 		secp = NULL;
2066 		for (i = 0; i < exi->exi_export.ex_seccnt; i++) {
2067 			if (exi->exi_export.ex_secinfo[i].s_secinfo.sc_nfsnum ==
2068 			    nfsflavor) {
2069 				secp = &exi->exi_export.ex_secinfo[i];
2070 				break;
2071 			}
2072 		}
2073 
2074 		if (!secp) {
2075 			cmn_err(CE_NOTE, "nfs_server: client %s%shad "
2076 			    "no secinfo data for flavor %d",
2077 			    client_name(req), client_addr(req, buf),
2078 			    nfsflavor);
2079 			return (0);
2080 		}
2081 
2082 		if (!checkwin(rpcflavor, secp->s_window, req)) {
2083 			cmn_err(CE_NOTE,
2084 			    "nfs_server: client %s%sused invalid "
2085 			    "auth window value",
2086 			    client_name(req), client_addr(req, buf));
2087 			return (0);
2088 		}
2089 
2090 		/*
2091 		 * Map root principals listed in the share's root= list to root,
2092 		 * and map any others principals that were mapped to root by RPC
2093 		 * to anon.
2094 		 */
2095 		if (principal && sec_svc_inrootlist(rpcflavor, principal,
2096 			secp->s_rootcnt, secp->s_rootnames)) {
2097 			if (crgetuid(cr) == 0)
2098 				return (1);
2099 
2100 			(void) crsetugid(cr, 0, 0);
2101 
2102 			/*
2103 			 * NOTE: If and when kernel-land privilege tracing is
2104 			 * added this may have to be replaced with code that
2105 			 * retrieves root's supplementary groups (e.g., using
2106 			 * kgss_get_group_info().  In the meantime principals
2107 			 * mapped to uid 0 get all privileges, so setting cr's
2108 			 * supplementary groups for them does nothing.
2109 			 */
2110 			(void) crsetgroups(cr, 0, NULL);
2111 
2112 			return (1);
2113 		}
2114 
2115 		/*
2116 		 * Not a root princ, or not in root list, map UID 0/nobody to
2117 		 * the anon ID for the share.  (RPC sets cr's UIDs and GIDs to
2118 		 * UID_NOBODY and GID_NOBODY, respectively.)
2119 		 */
2120 		if (crgetuid(cr) != 0 &&
2121 		    (crgetuid(cr) != UID_NOBODY || crgetgid(cr) != GID_NOBODY))
2122 			return (1);
2123 
2124 		anon_res = crsetugid(cr, exi->exi_export.ex_anon,
2125 			exi->exi_export.ex_anon);
2126 		(void) crsetgroups(cr, 0, NULL);
2127 		break;
2128 	default:
2129 		return (0);
2130 	} /* switch on rpcflavor */
2131 
2132 	/*
2133 	 * Even if anon access is disallowed via ex_anon == -1, we allow
2134 	 * this access if anon_ok is set.  So set creds to the default
2135 	 * "nobody" id.
2136 	 */
2137 	if (anon_res != 0) {
2138 		if (anon_ok == 0) {
2139 			cmn_err(CE_NOTE,
2140 			    "nfs_server: client %s%ssent wrong "
2141 			    "authentication for %s",
2142 			    client_name(req), client_addr(req, buf),
2143 			    exi->exi_export.ex_path ?
2144 			    exi->exi_export.ex_path : "?");
2145 			return (0);
2146 		}
2147 
2148 		if (crsetugid(cr, UID_NOBODY, GID_NOBODY) != 0)
2149 			return (0);
2150 	}
2151 
2152 	return (1);
2153 }
2154 
2155 /*
2156  * returns 0 on failure, -1 on a drop, -2 on wrong security flavor,
2157  * and 1 on success
2158  */
2159 int
2160 checkauth4(struct compound_state *cs, struct svc_req *req)
2161 {
2162 	int i, rpcflavor, access;
2163 	struct secinfo *secp;
2164 	char buf[MAXHOST + 1];
2165 	int anon_res = 0, nfsflavor;
2166 	struct exportinfo *exi;
2167 	cred_t	*cr;
2168 	caddr_t	principal;
2169 
2170 	exi = cs->exi;
2171 	cr = cs->cr;
2172 	principal = cs->principal;
2173 	nfsflavor = cs->nfsflavor;
2174 
2175 	ASSERT(cr != NULL);
2176 
2177 	rpcflavor = req->rq_cred.oa_flavor;
2178 	cs->access &= ~CS_ACCESS_LIMITED;
2179 
2180 	/*
2181 	 * Check the access right per auth flavor on the vnode of
2182 	 * this export for the given request.
2183 	 */
2184 	access = nfsauth4_access(cs->exi, cs->vp, req);
2185 
2186 	if (access & NFSAUTH_WRONGSEC)
2187 		return (-2);	/* no access for this security flavor */
2188 
2189 	if (access & NFSAUTH_DROP)
2190 		return (-1);	/* drop the request */
2191 
2192 	if (access & NFSAUTH_DENIED) {
2193 
2194 		if (exi->exi_export.ex_seccnt > 0)
2195 			return (0);	/* deny access */
2196 
2197 	} else if (access & NFSAUTH_LIMITED) {
2198 
2199 		cs->access |= CS_ACCESS_LIMITED;
2200 
2201 	} else if (access & NFSAUTH_MAPNONE) {
2202 		/*
2203 		 * Access was granted even though the flavor mismatched
2204 		 * because AUTH_NONE was one of the exported flavors.
2205 		 */
2206 		rpcflavor = AUTH_NONE;
2207 	}
2208 
2209 	/*
2210 	 * XXX probably need to redo some of it for nfsv4?
2211 	 * return 1 on success or 0 on failure
2212 	 */
2213 
2214 	switch (rpcflavor) {
2215 	case AUTH_NONE:
2216 		anon_res = crsetugid(cr, exi->exi_export.ex_anon,
2217 				exi->exi_export.ex_anon);
2218 		(void) crsetgroups(cr, 0, NULL);
2219 		break;
2220 
2221 	case AUTH_UNIX:
2222 		if (crgetuid(cr) == 0 && !(access & NFSAUTH_ROOT)) {
2223 			anon_res = crsetugid(cr, exi->exi_export.ex_anon,
2224 					exi->exi_export.ex_anon);
2225 			(void) crsetgroups(cr, 0, NULL);
2226 		}
2227 		break;
2228 
2229 	default:
2230 		/*
2231 		 *  Find the secinfo structure.  We should be able
2232 		 *  to find it by the time we reach here.
2233 		 *  nfsauth_access() has done the checking.
2234 		 */
2235 		secp = NULL;
2236 		for (i = 0; i < exi->exi_export.ex_seccnt; i++) {
2237 			if (exi->exi_export.ex_secinfo[i].s_secinfo.sc_nfsnum ==
2238 			    nfsflavor) {
2239 				secp = &exi->exi_export.ex_secinfo[i];
2240 				break;
2241 			}
2242 		}
2243 
2244 		if (!secp) {
2245 			cmn_err(CE_NOTE, "nfs_server: client %s%shad "
2246 			    "no secinfo data for flavor %d",
2247 			    client_name(req), client_addr(req, buf),
2248 			    nfsflavor);
2249 			return (0);
2250 		}
2251 
2252 		if (!checkwin(rpcflavor, secp->s_window, req)) {
2253 			cmn_err(CE_NOTE,
2254 			    "nfs_server: client %s%sused invalid "
2255 			    "auth window value",
2256 			    client_name(req), client_addr(req, buf));
2257 			return (0);
2258 		}
2259 
2260 		/*
2261 		 * Map root principals listed in the share's root= list to root,
2262 		 * and map any others principals that were mapped to root by RPC
2263 		 * to anon.
2264 		 */
2265 		if (principal && sec_svc_inrootlist(rpcflavor, principal,
2266 			secp->s_rootcnt, secp->s_rootnames)) {
2267 			if (crgetuid(cr) == 0)
2268 				return (1);
2269 
2270 			(void) crsetugid(cr, 0, 0);
2271 
2272 			/*
2273 			 * NOTE: If and when kernel-land privilege tracing is
2274 			 * added this may have to be replaced with code that
2275 			 * retrieves root's supplementary groups (e.g., using
2276 			 * kgss_get_group_info().  In the meantime principals
2277 			 * mapped to uid 0 get all privileges, so setting cr's
2278 			 * supplementary groups for them does nothing.
2279 			 */
2280 			(void) crsetgroups(cr, 0, NULL);
2281 
2282 			return (1);
2283 		}
2284 
2285 		/*
2286 		 * Not a root princ, or not in root list, map UID 0/nobody to
2287 		 * the anon ID for the share.  (RPC sets cr's UIDs and GIDs to
2288 		 * UID_NOBODY and GID_NOBODY, respectively.)
2289 		 */
2290 		if (crgetuid(cr) != 0 &&
2291 		    (crgetuid(cr) != UID_NOBODY || crgetgid(cr) != GID_NOBODY))
2292 			return (1);
2293 
2294 		anon_res = crsetugid(cr, exi->exi_export.ex_anon,
2295 			exi->exi_export.ex_anon);
2296 		(void) crsetgroups(cr, 0, NULL);
2297 		break;
2298 	} /* switch on rpcflavor */
2299 
2300 	/*
2301 	 * Even if anon access is disallowed via ex_anon == -1, we allow
2302 	 * this access if anon_ok is set.  So set creds to the default
2303 	 * "nobody" id.
2304 	 */
2305 
2306 	if (anon_res != 0) {
2307 		cmn_err(CE_NOTE,
2308 			"nfs_server: client %s%ssent wrong "
2309 			"authentication for %s",
2310 			client_name(req), client_addr(req, buf),
2311 			exi->exi_export.ex_path ?
2312 			exi->exi_export.ex_path : "?");
2313 		return (0);
2314 	}
2315 
2316 	return (1);
2317 }
2318 
2319 
2320 static char *
2321 client_name(struct svc_req *req)
2322 {
2323 	char *hostname = NULL;
2324 
2325 	/*
2326 	 * If it's a Unix cred then use the
2327 	 * hostname from the credential.
2328 	 */
2329 	if (req->rq_cred.oa_flavor == AUTH_UNIX) {
2330 		hostname = ((struct authunix_parms *)
2331 		    req->rq_clntcred)->aup_machname;
2332 	}
2333 	if (hostname == NULL)
2334 		hostname = "";
2335 
2336 	return (hostname);
2337 }
2338 
2339 static char *
2340 client_addr(struct svc_req *req, char *buf)
2341 {
2342 	struct sockaddr *ca;
2343 	uchar_t *b;
2344 	char *frontspace = "";
2345 
2346 	/*
2347 	 * We assume we are called in tandem with client_name and the
2348 	 * format string looks like "...client %s%sblah blah..."
2349 	 *
2350 	 * If it's a Unix cred then client_name returned
2351 	 * a host name, so we need insert a space between host name
2352 	 * and IP address.
2353 	 */
2354 	if (req->rq_cred.oa_flavor == AUTH_UNIX)
2355 		frontspace = " ";
2356 
2357 	/*
2358 	 * Convert the caller's IP address to a dotted string
2359 	 */
2360 	ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2361 
2362 	if (ca->sa_family == AF_INET) {
2363 	    b = (uchar_t *)&((struct sockaddr_in *)ca)->sin_addr;
2364 	    (void) sprintf(buf, "%s(%d.%d.%d.%d) ", frontspace,
2365 		b[0] & 0xFF, b[1] & 0xFF, b[2] & 0xFF, b[3] & 0xFF);
2366 	} else if (ca->sa_family == AF_INET6) {
2367 		struct sockaddr_in6 *sin6;
2368 		sin6 = (struct sockaddr_in6 *)ca;
2369 		(void) kinet_ntop6((uchar_t *)&sin6->sin6_addr,
2370 				buf, INET6_ADDRSTRLEN);
2371 
2372 	} else {
2373 
2374 		/*
2375 		 * No IP address to print. If there was a host name
2376 		 * printed, then we print a space.
2377 		 */
2378 		(void) sprintf(buf, frontspace);
2379 	}
2380 
2381 	return (buf);
2382 }
2383 
2384 /*
2385  * NFS Server initialization routine.  This routine should only be called
2386  * once.  It performs the following tasks:
2387  *	- Call sub-initialization routines (localize access to variables)
2388  *	- Initialize all locks
2389  *	- initialize the version 3 write verifier
2390  */
2391 int
2392 nfs_srvinit(void)
2393 {
2394 	int error;
2395 
2396 	error = nfs_exportinit();
2397 	if (error != 0)
2398 		return (error);
2399 	error = rfs4_srvrinit();
2400 	if (error != 0) {
2401 		nfs_exportfini();
2402 		return (error);
2403 	}
2404 	rfs_srvrinit();
2405 	rfs3_srvrinit();
2406 	nfsauth_init();
2407 
2408 	/* Init the stuff to control start/stop */
2409 	nfs_server_upordown = NFS_SERVER_STOPPED;
2410 	mutex_init(&nfs_server_upordown_lock, NULL, MUTEX_DEFAULT, NULL);
2411 	cv_init(&nfs_server_upordown_cv, NULL, CV_DEFAULT, NULL);
2412 	mutex_init(&rdma_wait_mutex, NULL, MUTEX_DEFAULT, NULL);
2413 	cv_init(&rdma_wait_cv, NULL, CV_DEFAULT, NULL);
2414 
2415 	return (0);
2416 }
2417 
2418 /*
2419  * NFS Server finalization routine. This routine is called to cleanup the
2420  * initialization work previously performed if the NFS server module could
2421  * not be loaded correctly.
2422  */
2423 void
2424 nfs_srvfini(void)
2425 {
2426 	nfsauth_fini();
2427 	rfs3_srvrfini();
2428 	rfs_srvrfini();
2429 	nfs_exportfini();
2430 
2431 	mutex_destroy(&nfs_server_upordown_lock);
2432 	cv_destroy(&nfs_server_upordown_cv);
2433 	mutex_destroy(&rdma_wait_mutex);
2434 	cv_destroy(&rdma_wait_cv);
2435 }
2436 
2437 /*
2438  * Set up an iovec array of up to cnt pointers.
2439  */
2440 
2441 void
2442 mblk_to_iov(mblk_t *m, int cnt, struct iovec *iovp)
2443 {
2444 	while (m != NULL && cnt-- > 0) {
2445 		iovp->iov_base = (caddr_t)m->b_rptr;
2446 		iovp->iov_len = (m->b_wptr - m->b_rptr);
2447 		iovp++;
2448 		m = m->b_cont;
2449 	}
2450 }
2451 
2452 /*
2453  * Common code between NFS Version 2 and NFS Version 3 for the public
2454  * filehandle multicomponent lookups.
2455  */
2456 
2457 /*
2458  * Public filehandle evaluation of a multi-component lookup, following
2459  * symbolic links, if necessary. This may result in a vnode in another
2460  * filesystem, which is OK as long as the other filesystem is exported.
2461  *
2462  * Note that the exi will be set either to NULL or a new reference to the
2463  * exportinfo struct that corresponds to the vnode of the multi-component path.
2464  * It is the callers responsibility to release this reference.
2465  */
2466 int
2467 rfs_publicfh_mclookup(char *p, vnode_t *dvp, cred_t *cr, vnode_t **vpp,
2468     struct exportinfo **exi, struct sec_ol *sec)
2469 {
2470 	int pathflag;
2471 	vnode_t *mc_dvp = NULL;
2472 	vnode_t *realvp;
2473 	int error;
2474 
2475 	*exi = NULL;
2476 
2477 	/*
2478 	 * check if the given path is a url or native path. Since p is
2479 	 * modified by MCLpath(), it may be empty after returning from
2480 	 * there, and should be checked.
2481 	 */
2482 	if ((pathflag = MCLpath(&p)) == -1)
2483 		return (EIO);
2484 
2485 	/*
2486 	 * If pathflag is SECURITY_QUERY, turn the SEC_QUERY bit
2487 	 * on in sec->sec_flags. This bit will later serve as an
2488 	 * indication in makefh_ol() or makefh3_ol() to overload the
2489 	 * filehandle to contain the sec modes used by the server for
2490 	 * the path.
2491 	 */
2492 	if (pathflag == SECURITY_QUERY) {
2493 		if ((sec->sec_index = (uint_t)(*p)) > 0) {
2494 			sec->sec_flags |= SEC_QUERY;
2495 			p++;
2496 			if ((pathflag = MCLpath(&p)) == -1)
2497 				return (EIO);
2498 		} else {
2499 			cmn_err(CE_NOTE,
2500 			    "nfs_server: invalid security index %d, "
2501 			    "violating WebNFS SNEGO protocol.", sec->sec_index);
2502 			return (EIO);
2503 		}
2504 	}
2505 
2506 	if (p[0] == '\0') {
2507 		error = ENOENT;
2508 		goto publicfh_done;
2509 	}
2510 
2511 	error = rfs_pathname(p, &mc_dvp, vpp, dvp, cr, pathflag);
2512 
2513 	/*
2514 	 * If name resolves to "/" we get EINVAL since we asked for
2515 	 * the vnode of the directory that the file is in. Try again
2516 	 * with NULL directory vnode.
2517 	 */
2518 	if (error == EINVAL) {
2519 		error = rfs_pathname(p, NULL, vpp, dvp, cr, pathflag);
2520 		if (!error) {
2521 			ASSERT(*vpp != NULL);
2522 			if ((*vpp)->v_type == VDIR) {
2523 				VN_HOLD(*vpp);
2524 				mc_dvp = *vpp;
2525 			} else {
2526 				/*
2527 				 * This should not happen, the filesystem is
2528 				 * in an inconsistent state. Fail the lookup
2529 				 * at this point.
2530 				 */
2531 				VN_RELE(*vpp);
2532 				error = EINVAL;
2533 			}
2534 		}
2535 	}
2536 
2537 	if (error)
2538 		goto publicfh_done;
2539 
2540 	if (*vpp == NULL) {
2541 		error = ENOENT;
2542 		goto publicfh_done;
2543 	}
2544 
2545 	ASSERT(mc_dvp != NULL);
2546 	ASSERT(*vpp != NULL);
2547 
2548 	if ((*vpp)->v_type == VDIR) {
2549 		do {
2550 			/*
2551 			 * *vpp may be an AutoFS node, so we perform
2552 			 * a VOP_ACCESS() to trigger the mount of the intended
2553 			 * filesystem, so we can perform the lookup in the
2554 			 * intended filesystem.
2555 			 */
2556 			(void) VOP_ACCESS(*vpp, 0, 0, cr);
2557 
2558 			/*
2559 			 * If vnode is covered, get the
2560 			 * the topmost vnode.
2561 			 */
2562 			if (vn_mountedvfs(*vpp) != NULL) {
2563 				error = traverse(vpp);
2564 				if (error) {
2565 					VN_RELE(*vpp);
2566 					goto publicfh_done;
2567 				}
2568 			}
2569 
2570 			if (VOP_REALVP(*vpp, &realvp) == 0 && realvp != *vpp) {
2571 				/*
2572 				 * If realvp is different from *vpp
2573 				 * then release our reference on *vpp, so that
2574 				 * the export access check be performed on the
2575 				 * real filesystem instead.
2576 				 */
2577 				VN_HOLD(realvp);
2578 				VN_RELE(*vpp);
2579 				*vpp = realvp;
2580 			} else
2581 			    break;
2582 		/* LINTED */
2583 		} while (TRUE);
2584 
2585 		/*
2586 		 * Let nfs_vptexi() figure what the real parent is.
2587 		 */
2588 		VN_RELE(mc_dvp);
2589 		mc_dvp = NULL;
2590 
2591 	} else {
2592 		/*
2593 		 * If vnode is covered, get the
2594 		 * the topmost vnode.
2595 		 */
2596 		if (vn_mountedvfs(mc_dvp) != NULL) {
2597 			error = traverse(&mc_dvp);
2598 			if (error) {
2599 			    VN_RELE(*vpp);
2600 			    goto publicfh_done;
2601 			}
2602 		}
2603 
2604 		if (VOP_REALVP(mc_dvp, &realvp) == 0 && realvp != mc_dvp) {
2605 			/*
2606 			 * *vpp is a file, obtain realvp of the parent
2607 			 * directory vnode.
2608 			 */
2609 			VN_HOLD(realvp);
2610 			VN_RELE(mc_dvp);
2611 			mc_dvp = realvp;
2612 		}
2613 	}
2614 
2615 	/*
2616 	 * The pathname may take us from the public filesystem to another.
2617 	 * If that's the case then just set the exportinfo to the new export
2618 	 * and build filehandle for it. Thanks to per-access checking there's
2619 	 * no security issues with doing this. If the client is not allowed
2620 	 * access to this new export then it will get an access error when it
2621 	 * tries to use the filehandle
2622 	 */
2623 	if (error = nfs_check_vpexi(mc_dvp, *vpp, kcred, exi)) {
2624 		VN_RELE(*vpp);
2625 		goto publicfh_done;
2626 	}
2627 
2628 	/*
2629 	 * Not allowed access to pseudo exports.
2630 	 */
2631 	if (PSEUDO(*exi)) {
2632 		error = ENOENT;
2633 		VN_RELE(*vpp);
2634 		goto publicfh_done;
2635 	}
2636 
2637 	/*
2638 	 * Do a lookup for the index file. We know the index option doesn't
2639 	 * allow paths through handling in the share command, so mc_dvp will
2640 	 * be the parent for the index file vnode, if its present. Use
2641 	 * temporary pointers to preserve and reuse the vnode pointers of the
2642 	 * original directory in case there's no index file. Note that the
2643 	 * index file is a native path, and should not be interpreted by
2644 	 * the URL parser in rfs_pathname()
2645 	 */
2646 	if (((*exi)->exi_export.ex_flags & EX_INDEX) &&
2647 	    ((*vpp)->v_type == VDIR) && (pathflag == URLPATH)) {
2648 		vnode_t *tvp, *tmc_dvp;	/* temporary vnode pointers */
2649 
2650 		tmc_dvp = mc_dvp;
2651 		mc_dvp = tvp = *vpp;
2652 
2653 		error = rfs_pathname((*exi)->exi_export.ex_index, NULL, vpp,
2654 		    mc_dvp, cr, NATIVEPATH);
2655 
2656 		if (error == ENOENT) {
2657 			*vpp = tvp;
2658 			mc_dvp = tmc_dvp;
2659 			error = 0;
2660 		} else {	/* ok or error other than ENOENT */
2661 			if (tmc_dvp)
2662 				VN_RELE(tmc_dvp);
2663 			if (error)
2664 				goto publicfh_done;
2665 
2666 			/*
2667 			 * Found a valid vp for index "filename". Sanity check
2668 			 * for odd case where a directory is provided as index
2669 			 * option argument and leads us to another filesystem
2670 			 */
2671 
2672 			/* Release the reference on the old exi value */
2673 			ASSERT(*exi != NULL);
2674 			exi_rele(*exi);
2675 
2676 			if (error = nfs_check_vpexi(mc_dvp, *vpp, kcred, exi)) {
2677 				VN_RELE(*vpp);
2678 				goto publicfh_done;
2679 			}
2680 		}
2681 	}
2682 
2683 publicfh_done:
2684 	if (mc_dvp)
2685 		VN_RELE(mc_dvp);
2686 
2687 	return (error);
2688 }
2689 
2690 /*
2691  * Evaluate a multi-component path
2692  */
2693 int
2694 rfs_pathname(
2695 	char *path,			/* pathname to evaluate */
2696 	vnode_t **dirvpp,		/* ret for ptr to parent dir vnode */
2697 	vnode_t **compvpp,		/* ret for ptr to component vnode */
2698 	vnode_t *startdvp,		/* starting vnode */
2699 	cred_t *cr,			/* user's credential */
2700 	int pathflag)			/* flag to identify path, e.g. URL */
2701 {
2702 	char namebuf[TYPICALMAXPATHLEN];
2703 	struct pathname pn;
2704 	int error;
2705 
2706 	/*
2707 	 * If pathname starts with '/', then set startdvp to root.
2708 	 */
2709 	if (*path == '/') {
2710 		while (*path == '/')
2711 			path++;
2712 
2713 		startdvp = rootdir;
2714 	}
2715 
2716 	error = pn_get_buf(path, UIO_SYSSPACE, &pn, namebuf, sizeof (namebuf));
2717 	if (error == 0) {
2718 		/*
2719 		 * Call the URL parser for URL paths to modify the original
2720 		 * string to handle any '%' encoded characters that exist.
2721 		 * Done here to avoid an extra bcopy in the lookup.
2722 		 * We need to be careful about pathlen's. We know that
2723 		 * rfs_pathname() is called with a non-empty path. However,
2724 		 * it could be emptied due to the path simply being all /'s,
2725 		 * which is valid to proceed with the lookup, or due to the
2726 		 * URL parser finding an encoded null character at the
2727 		 * beginning of path which should not proceed with the lookup.
2728 		 */
2729 		if (pn.pn_pathlen != 0 && pathflag == URLPATH) {
2730 			URLparse(pn.pn_path);
2731 			if ((pn.pn_pathlen = strlen(pn.pn_path)) == 0)
2732 				return (ENOENT);
2733 		}
2734 		VN_HOLD(startdvp);
2735 		error = lookuppnvp(&pn, NULL, NO_FOLLOW, dirvpp, compvpp,
2736 		    rootdir, startdvp, cr);
2737 	}
2738 	if (error == ENAMETOOLONG) {
2739 		/*
2740 		 * This thread used a pathname > TYPICALMAXPATHLEN bytes long.
2741 		 */
2742 		if (error = pn_get(path, UIO_SYSSPACE, &pn))
2743 			return (error);
2744 		if (pn.pn_pathlen != 0 && pathflag == URLPATH) {
2745 			URLparse(pn.pn_path);
2746 			if ((pn.pn_pathlen = strlen(pn.pn_path)) == 0) {
2747 				pn_free(&pn);
2748 				return (ENOENT);
2749 			}
2750 		}
2751 		VN_HOLD(startdvp);
2752 		error = lookuppnvp(&pn, NULL, NO_FOLLOW, dirvpp, compvpp,
2753 		    rootdir, startdvp, cr);
2754 		pn_free(&pn);
2755 	}
2756 
2757 	return (error);
2758 }
2759 
2760 /*
2761  * Adapt the multicomponent lookup path depending on the pathtype
2762  */
2763 static int
2764 MCLpath(char **path)
2765 {
2766 	unsigned char c = (unsigned char)**path;
2767 
2768 	/*
2769 	 * If the MCL path is between 0x20 and 0x7E (graphic printable
2770 	 * character of the US-ASCII coded character set), its a URL path,
2771 	 * per RFC 1738.
2772 	 */
2773 	if (c >= 0x20 && c <= 0x7E)
2774 		return (URLPATH);
2775 
2776 	/*
2777 	 * If the first octet of the MCL path is not an ASCII character
2778 	 * then it must be interpreted as a tag value that describes the
2779 	 * format of the remaining octets of the MCL path.
2780 	 *
2781 	 * If the first octet of the MCL path is 0x81 it is a query
2782 	 * for the security info.
2783 	 */
2784 	switch (c) {
2785 	case 0x80:	/* native path, i.e. MCL via mount protocol */
2786 		(*path)++;
2787 		return (NATIVEPATH);
2788 	case 0x81:	/* security query */
2789 		(*path)++;
2790 		return (SECURITY_QUERY);
2791 	default:
2792 		return (-1);
2793 	}
2794 }
2795 
2796 #define	fromhex(c)  ((c >= '0' && c <= '9') ? (c - '0') : \
2797 			((c >= 'A' && c <= 'F') ? (c - 'A' + 10) :\
2798 			((c >= 'a' && c <= 'f') ? (c - 'a' + 10) : 0)))
2799 
2800 /*
2801  * The implementation of URLparse gaurantees that the final string will
2802  * fit in the original one. Replaces '%' occurrences followed by 2 characters
2803  * with its corresponding hexadecimal character.
2804  */
2805 static void
2806 URLparse(char *str)
2807 {
2808 	char *p, *q;
2809 
2810 	p = q = str;
2811 	while (*p) {
2812 		*q = *p;
2813 		if (*p++ == '%') {
2814 			if (*p) {
2815 				*q = fromhex(*p) * 16;
2816 				p++;
2817 				if (*p) {
2818 					*q += fromhex(*p);
2819 					p++;
2820 				}
2821 			}
2822 		}
2823 		q++;
2824 	}
2825 	*q = '\0';
2826 }
2827 
2828 
2829 /*
2830  * Get the export information for the lookup vnode, and verify its
2831  * useable.
2832  */
2833 int
2834 nfs_check_vpexi(vnode_t *mc_dvp, vnode_t *vp, cred_t *cr,
2835     struct exportinfo **exi)
2836 {
2837 	int walk;
2838 	int error = 0;
2839 
2840 	*exi = nfs_vptoexi(mc_dvp, vp, cr, &walk, NULL, FALSE);
2841 	if (*exi == NULL)
2842 		error = EACCES;
2843 	else {
2844 		/*
2845 		 * If nosub is set for this export then
2846 		 * a lookup relative to the public fh
2847 		 * must not terminate below the
2848 		 * exported directory.
2849 		 */
2850 		if ((*exi)->exi_export.ex_flags & EX_NOSUB && walk > 0)
2851 			error = EACCES;
2852 	}
2853 
2854 	return (error);
2855 }
2856 
2857 /*
2858  * Do the main work of handling HA-NFSv4 Resource Group failover on
2859  * Sun Cluster.
2860  * We need to detect whether any RG admin paths have been added or removed,
2861  * and adjust resources accordingly.
2862  * Currently we're using a very inefficient algorithm, ~ 2 * O(n**2). In
2863  * order to scale, the list and array of paths need to be held in more
2864  * suitable data structures.
2865  */
2866 static void
2867 hanfsv4_failover(void)
2868 {
2869 	int i, start_grace, numadded_paths = 0;
2870 	char **added_paths = NULL;
2871 	rfs4_dss_path_t *dss_path;
2872 
2873 	/*
2874 	 * First, look for removed paths: RGs that have been failed-over
2875 	 * away from this node.
2876 	 * Walk the "currently-serving" rfs4_dss_pathlist and, for each
2877 	 * path, check if it is on the "passed-in" rfs4_dss_newpaths array
2878 	 * from nfsd. If not, that RG path has been removed.
2879 	 *
2880 	 * Note that nfsd has sorted rfs4_dss_newpaths for us, and removed
2881 	 * any duplicates.
2882 	 */
2883 	dss_path = rfs4_dss_pathlist;
2884 	do {
2885 		int found = 0;
2886 		char *path = dss_path->path;
2887 
2888 		/* used only for non-HA so may not be removed */
2889 		if (strcmp(path, NFS4_DSS_VAR_DIR) == 0) {
2890 			dss_path = dss_path->next;
2891 			continue;
2892 		}
2893 
2894 		for (i = 0; i < rfs4_dss_numnewpaths; i++) {
2895 			int cmpret;
2896 			size_t ncmp;
2897 			char *newpath = rfs4_dss_newpaths[i];
2898 
2899 			ncmp = MAX(strlen(path), strlen(newpath));
2900 			cmpret = strncmp(path, newpath, ncmp);
2901 
2902 			/*
2903 			 * Since nfsd has sorted rfs4_dss_newpaths for us,
2904 			 * once the return from strncmp is negative we know
2905 			 * we've passed the point where "path" should be,
2906 			 * and can stop searching: "path" has been removed.
2907 			 */
2908 			if (cmpret < 0)
2909 				break;
2910 
2911 			if (cmpret == 0) {
2912 				found = 1;
2913 				break;
2914 			}
2915 		}
2916 
2917 		if (found == 0) {
2918 			unsigned index = dss_path->index;
2919 			rfs4_servinst_t *sip = dss_path->sip;
2920 			rfs4_dss_path_t *path_next = dss_path->next;
2921 
2922 			/*
2923 			 * This path has been removed.
2924 			 * We must clear out the servinst reference to
2925 			 * it, since it's now owned by another
2926 			 * node: we should not attempt to touch it.
2927 			 */
2928 			ASSERT(dss_path == sip->dss_paths[index]);
2929 			sip->dss_paths[index] = NULL;
2930 
2931 			/* remove from "currently-serving" list, and destroy */
2932 			remque(dss_path);
2933 			kmem_free(dss_path, sizeof (rfs4_dss_path_t));
2934 
2935 			dss_path = path_next;
2936 		} else {
2937 			/* path was found; not removed */
2938 			dss_path = dss_path->next;
2939 		}
2940 	} while (dss_path != rfs4_dss_pathlist);
2941 
2942 	/*
2943 	 * Now, look for added paths: RGs that have been failed-over
2944 	 * to this node.
2945 	 * Walk the "passed-in" rfs4_dss_newpaths array from nfsd and,
2946 	 * for each path, check if it is on the "currently-serving"
2947 	 * rfs4_dss_pathlist. If not, that RG path has been added.
2948 	 *
2949 	 * Note: we don't do duplicate detection here; nfsd does that for us.
2950 	 *
2951 	 * Note: numadded_paths <= rfs4_dss_numnewpaths, which gives us
2952 	 * an upper bound for the size needed for added_paths[numadded_paths].
2953 	 */
2954 
2955 	/* probably more space than we need, but guaranteed to be enough */
2956 	if (rfs4_dss_numnewpaths > 0) {
2957 		size_t sz = rfs4_dss_numnewpaths * sizeof (char *);
2958 		added_paths = kmem_zalloc(sz, KM_SLEEP);
2959 	}
2960 
2961 	/* walk the "passed-in" rfs4_dss_newpaths array from nfsd */
2962 	for (i = 0; i < rfs4_dss_numnewpaths; i++) {
2963 		int found = 0;
2964 		char *newpath = rfs4_dss_newpaths[i];
2965 
2966 		dss_path = rfs4_dss_pathlist;
2967 		do {
2968 			char *path = dss_path->path;
2969 
2970 			/* used only for non-HA */
2971 			if (strcmp(path, NFS4_DSS_VAR_DIR) == 0) {
2972 				dss_path = dss_path->next;
2973 				continue;
2974 			}
2975 
2976 			if (strncmp(path, newpath, strlen(path)) == 0) {
2977 				found = 1;
2978 				break;
2979 			}
2980 
2981 			dss_path = dss_path->next;
2982 		} while (dss_path != rfs4_dss_pathlist);
2983 
2984 		if (found == 0) {
2985 			added_paths[numadded_paths] = newpath;
2986 			numadded_paths++;
2987 		}
2988 	}
2989 
2990 	/* did we find any added paths? */
2991 	if (numadded_paths > 0) {
2992 		/* create a new server instance, and start its grace period */
2993 		start_grace = 1;
2994 		rfs4_servinst_create(start_grace, numadded_paths, added_paths);
2995 
2996 		/* read in the stable storage state from these paths */
2997 		rfs4_dss_readstate(numadded_paths, added_paths);
2998 
2999 		/*
3000 		 * Multiple failovers during a grace period will cause
3001 		 * clients of the same resource group to be partitioned
3002 		 * into different server instances, with different
3003 		 * grace periods.  Since clients of the same resource
3004 		 * group must be subject to the same grace period,
3005 		 * we need to reset all currently active grace periods.
3006 		 */
3007 		rfs4_grace_reset_all();
3008 	}
3009 
3010 	if (rfs4_dss_numnewpaths > 0)
3011 		kmem_free(added_paths, rfs4_dss_numnewpaths * sizeof (char *));
3012 }
3013