xref: /freebsd/usr.sbin/nfsd/nfsd.c (revision 8657387683946d0c03e09fe77029edfe309eeb20)
1 /*
2  * Copyright (c) 1989, 1993, 1994
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * This code is derived from software contributed to Berkeley by
6  * Rick Macklem at The University of Guelph.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. Neither the name of the University nor the names of its contributors
17  *    may be used to endorse or promote products derived from this software
18  *    without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  */
32 
33 #ifndef lint
34 static const char copyright[] =
35 "@(#) Copyright (c) 1989, 1993, 1994\n\
36 	The Regents of the University of California.  All rights reserved.\n";
37 #endif /* not lint */
38 
39 #ifndef lint
40 #if 0
41 static char sccsid[] = "@(#)nfsd.c	8.9 (Berkeley) 3/29/95";
42 #endif
43 static const char rcsid[] =
44   "$FreeBSD$";
45 #endif /* not lint */
46 
47 #include <sys/param.h>
48 #include <sys/syslog.h>
49 #include <sys/wait.h>
50 #include <sys/mount.h>
51 #include <sys/fcntl.h>
52 #include <sys/linker.h>
53 #include <sys/module.h>
54 #include <sys/types.h>
55 #include <sys/stat.h>
56 #include <sys/sysctl.h>
57 #include <sys/ucred.h>
58 
59 #include <rpc/rpc.h>
60 #include <rpc/pmap_clnt.h>
61 #include <rpcsvc/nfs_prot.h>
62 
63 #include <netdb.h>
64 #include <arpa/inet.h>
65 #include <nfsserver/nfs.h>
66 #include <nfs/nfssvc.h>
67 
68 #include <err.h>
69 #include <errno.h>
70 #include <signal.h>
71 #include <stdio.h>
72 #include <stdlib.h>
73 #include <string.h>
74 #include <unistd.h>
75 #include <sysexits.h>
76 
77 #include <getopt.h>
78 
79 static int	debug = 0;
80 
81 #define	NFSD_STABLERESTART	"/var/db/nfs-stablerestart"
82 #define	NFSD_STABLEBACKUP	"/var/db/nfs-stablerestart.bak"
83 #define	MAXNFSDCNT	256
84 #define	DEFNFSDCNT	 4
85 #define	NFS_VER2	 2
86 #define NFS_VER3	 3
87 #define NFS_VER4	 4
88 static pid_t children[MAXNFSDCNT]; /* PIDs of children */
89 static int nfsdcnt;		/* number of children */
90 static int nfsdcnt_set;
91 static int minthreads;
92 static int maxthreads;
93 static int nfssvc_nfsd;		/* Set to correct NFSSVC_xxx flag */
94 static int stablefd = -1;	/* Fd for the stable restart file */
95 static int backupfd;		/* Fd for the backup stable restart file */
96 static const char *getopt_shortopts;
97 static const char *getopt_usage;
98 
99 static int minthreads_set;
100 static int maxthreads_set;
101 
102 static struct option longopts[] = {
103 	{ "debug", no_argument, &debug, 1 },
104 	{ "minthreads", required_argument, &minthreads_set, 1 },
105 	{ "maxthreads", required_argument, &maxthreads_set, 1 },
106 	{ NULL, 0, NULL, 0}
107 };
108 
109 static void	cleanup(int);
110 static void	child_cleanup(int);
111 static void	killchildren(void);
112 static void	nfsd_exit(int);
113 static void	nonfs(int);
114 static void	reapchild(int);
115 static int	setbindhost(struct addrinfo **ia, const char *bindhost,
116 		    struct addrinfo hints);
117 static void	start_server(int);
118 static void	unregistration(void);
119 static void	usage(void);
120 static void	open_stable(int *, int *);
121 static void	copy_stable(int, int);
122 static void	backup_stable(int);
123 static void	set_nfsdcnt(int);
124 
125 /*
126  * Nfs server daemon mostly just a user context for nfssvc()
127  *
128  * 1 - do file descriptor and signal cleanup
129  * 2 - fork the nfsd(s)
130  * 3 - create server socket(s)
131  * 4 - register socket with rpcbind
132  *
133  * For connectionless protocols, just pass the socket into the kernel via.
134  * nfssvc().
135  * For connection based sockets, loop doing accepts. When you get a new
136  * socket from accept, pass the msgsock into the kernel via. nfssvc().
137  * The arguments are:
138  *	-r - reregister with rpcbind
139  *	-d - unregister with rpcbind
140  *	-t - support tcp nfs clients
141  *	-u - support udp nfs clients
142  *	-e - forces it to run a server that supports nfsv4
143  * followed by "n" which is the number of nfsds' to fork off
144  */
145 int
146 main(int argc, char **argv)
147 {
148 	struct nfsd_addsock_args addsockargs;
149 	struct addrinfo *ai_udp, *ai_tcp, *ai_udp6, *ai_tcp6, hints;
150 	struct netconfig *nconf_udp, *nconf_tcp, *nconf_udp6, *nconf_tcp6;
151 	struct netbuf nb_udp, nb_tcp, nb_udp6, nb_tcp6;
152 	struct sockaddr_in inetpeer;
153 	struct sockaddr_in6 inet6peer;
154 	fd_set ready, sockbits;
155 	fd_set v4bits, v6bits;
156 	int ch, connect_type_cnt, i, maxsock, msgsock;
157 	socklen_t len;
158 	int on = 1, unregister, reregister, sock;
159 	int tcp6sock, ip6flag, tcpflag, tcpsock;
160 	int udpflag, ecode, error, s;
161 	int bindhostc, bindanyflag, rpcbreg, rpcbregcnt;
162 	int nfssvc_addsock;
163 	int longindex = 0;
164 	int nfs_minvers = NFS_VER2;
165 	size_t nfs_minvers_size;
166 	const char *lopt;
167 	char **bindhost = NULL;
168 	pid_t pid;
169 
170 	nfsdcnt = DEFNFSDCNT;
171 	unregister = reregister = tcpflag = maxsock = 0;
172 	bindanyflag = udpflag = connect_type_cnt = bindhostc = 0;
173 	getopt_shortopts = "ah:n:rdtue";
174 	getopt_usage =
175 	    "usage:\n"
176 	    "  nfsd [-ardtue] [-h bindip]\n"
177 	    "       [-n numservers] [--minthreads #] [--maxthreads #]\n";
178 	while ((ch = getopt_long(argc, argv, getopt_shortopts, longopts,
179 		    &longindex)) != -1)
180 		switch (ch) {
181 		case 'a':
182 			bindanyflag = 1;
183 			break;
184 		case 'n':
185 			set_nfsdcnt(atoi(optarg));
186 			break;
187 		case 'h':
188 			bindhostc++;
189 			bindhost = realloc(bindhost,sizeof(char *)*bindhostc);
190 			if (bindhost == NULL)
191 				errx(1, "Out of memory");
192 			bindhost[bindhostc-1] = strdup(optarg);
193 			if (bindhost[bindhostc-1] == NULL)
194 				errx(1, "Out of memory");
195 			break;
196 		case 'r':
197 			reregister = 1;
198 			break;
199 		case 'd':
200 			unregister = 1;
201 			break;
202 		case 't':
203 			tcpflag = 1;
204 			break;
205 		case 'u':
206 			udpflag = 1;
207 			break;
208 		case 'e':
209 			/* now a no-op, since this is the default */
210 			break;
211 		case 0:
212 			lopt = longopts[longindex].name;
213 			if (!strcmp(lopt, "minthreads")) {
214 				minthreads = atoi(optarg);
215 			} else if (!strcmp(lopt, "maxthreads")) {
216 				maxthreads = atoi(optarg);
217 			}
218 			break;
219 		default:
220 		case '?':
221 			usage();
222 		}
223 	if (!tcpflag && !udpflag)
224 		udpflag = 1;
225 	argv += optind;
226 	argc -= optind;
227 	if (minthreads_set && maxthreads_set && minthreads > maxthreads)
228 		errx(EX_USAGE,
229 		    "error: minthreads(%d) can't be greater than "
230 		    "maxthreads(%d)", minthreads, maxthreads);
231 
232 	/*
233 	 * XXX
234 	 * Backward compatibility, trailing number is the count of daemons.
235 	 */
236 	if (argc > 1)
237 		usage();
238 	if (argc == 1)
239 		set_nfsdcnt(atoi(argv[0]));
240 
241 	/*
242 	 * Unless the "-o" option was specified, try and run "nfsd".
243 	 * If "-o" was specified, try and run "nfsserver".
244 	 */
245 	if (modfind("nfsd") < 0) {
246 		/* Not present in kernel, try loading it */
247 		if (kldload("nfsd") < 0 || modfind("nfsd") < 0)
248 			errx(1, "NFS server is not available");
249 	}
250 
251 	ip6flag = 1;
252 	s = socket(AF_INET6, SOCK_DGRAM, IPPROTO_UDP);
253 	if (s == -1) {
254 		if (errno != EPROTONOSUPPORT && errno != EAFNOSUPPORT)
255 			err(1, "socket");
256 		ip6flag = 0;
257 	} else if (getnetconfigent("udp6") == NULL ||
258 		getnetconfigent("tcp6") == NULL) {
259 		ip6flag = 0;
260 	}
261 	if (s != -1)
262 		close(s);
263 
264 	if (bindhostc == 0 || bindanyflag) {
265 		bindhostc++;
266 		bindhost = realloc(bindhost,sizeof(char *)*bindhostc);
267 		if (bindhost == NULL)
268 			errx(1, "Out of memory");
269 		bindhost[bindhostc-1] = strdup("*");
270 		if (bindhost[bindhostc-1] == NULL)
271 			errx(1, "Out of memory");
272 	}
273 
274 	nfs_minvers_size = sizeof(nfs_minvers);
275 	error = sysctlbyname("vfs.nfsd.server_min_nfsvers", &nfs_minvers,
276 	    &nfs_minvers_size, NULL, 0);
277 	if (error != 0 || nfs_minvers < NFS_VER2 || nfs_minvers > NFS_VER4) {
278 		warnx("sysctlbyname(vfs.nfsd.server_min_nfsvers) failed,"
279 		    " defaulting to NFSv2");
280 		nfs_minvers = NFS_VER2;
281 	}
282 
283 	if (unregister) {
284 		unregistration();
285 		exit (0);
286 	}
287 	if (reregister) {
288 		if (udpflag) {
289 			memset(&hints, 0, sizeof hints);
290 			hints.ai_flags = AI_PASSIVE;
291 			hints.ai_family = AF_INET;
292 			hints.ai_socktype = SOCK_DGRAM;
293 			hints.ai_protocol = IPPROTO_UDP;
294 			ecode = getaddrinfo(NULL, "nfs", &hints, &ai_udp);
295 			if (ecode != 0)
296 				err(1, "getaddrinfo udp: %s", gai_strerror(ecode));
297 			nconf_udp = getnetconfigent("udp");
298 			if (nconf_udp == NULL)
299 				err(1, "getnetconfigent udp failed");
300 			nb_udp.buf = ai_udp->ai_addr;
301 			nb_udp.len = nb_udp.maxlen = ai_udp->ai_addrlen;
302 			if (nfs_minvers == NFS_VER2)
303 				if (!rpcb_set(NFS_PROGRAM, 2, nconf_udp,
304 				    &nb_udp))
305 					err(1, "rpcb_set udp failed");
306 			if (nfs_minvers <= NFS_VER3)
307 				if (!rpcb_set(NFS_PROGRAM, 3, nconf_udp,
308 				    &nb_udp))
309 					err(1, "rpcb_set udp failed");
310 			freeaddrinfo(ai_udp);
311 		}
312 		if (udpflag && ip6flag) {
313 			memset(&hints, 0, sizeof hints);
314 			hints.ai_flags = AI_PASSIVE;
315 			hints.ai_family = AF_INET6;
316 			hints.ai_socktype = SOCK_DGRAM;
317 			hints.ai_protocol = IPPROTO_UDP;
318 			ecode = getaddrinfo(NULL, "nfs", &hints, &ai_udp6);
319 			if (ecode != 0)
320 				err(1, "getaddrinfo udp6: %s", gai_strerror(ecode));
321 			nconf_udp6 = getnetconfigent("udp6");
322 			if (nconf_udp6 == NULL)
323 				err(1, "getnetconfigent udp6 failed");
324 			nb_udp6.buf = ai_udp6->ai_addr;
325 			nb_udp6.len = nb_udp6.maxlen = ai_udp6->ai_addrlen;
326 			if (nfs_minvers == NFS_VER2)
327 				if (!rpcb_set(NFS_PROGRAM, 2, nconf_udp6,
328 				    &nb_udp6))
329 					err(1, "rpcb_set udp6 failed");
330 			if (nfs_minvers <= NFS_VER3)
331 				if (!rpcb_set(NFS_PROGRAM, 3, nconf_udp6,
332 				    &nb_udp6))
333 					err(1, "rpcb_set udp6 failed");
334 			freeaddrinfo(ai_udp6);
335 		}
336 		if (tcpflag) {
337 			memset(&hints, 0, sizeof hints);
338 			hints.ai_flags = AI_PASSIVE;
339 			hints.ai_family = AF_INET;
340 			hints.ai_socktype = SOCK_STREAM;
341 			hints.ai_protocol = IPPROTO_TCP;
342 			ecode = getaddrinfo(NULL, "nfs", &hints, &ai_tcp);
343 			if (ecode != 0)
344 				err(1, "getaddrinfo tcp: %s", gai_strerror(ecode));
345 			nconf_tcp = getnetconfigent("tcp");
346 			if (nconf_tcp == NULL)
347 				err(1, "getnetconfigent tcp failed");
348 			nb_tcp.buf = ai_tcp->ai_addr;
349 			nb_tcp.len = nb_tcp.maxlen = ai_tcp->ai_addrlen;
350 			if (nfs_minvers == NFS_VER2)
351 				if (!rpcb_set(NFS_PROGRAM, 2, nconf_tcp,
352 				    &nb_tcp))
353 					err(1, "rpcb_set tcp failed");
354 			if (nfs_minvers <= NFS_VER3)
355 				if (!rpcb_set(NFS_PROGRAM, 3, nconf_tcp,
356 				    &nb_tcp))
357 					err(1, "rpcb_set tcp failed");
358 			freeaddrinfo(ai_tcp);
359 		}
360 		if (tcpflag && ip6flag) {
361 			memset(&hints, 0, sizeof hints);
362 			hints.ai_flags = AI_PASSIVE;
363 			hints.ai_family = AF_INET6;
364 			hints.ai_socktype = SOCK_STREAM;
365 			hints.ai_protocol = IPPROTO_TCP;
366 			ecode = getaddrinfo(NULL, "nfs", &hints, &ai_tcp6);
367 			if (ecode != 0)
368 				err(1, "getaddrinfo tcp6: %s", gai_strerror(ecode));
369 			nconf_tcp6 = getnetconfigent("tcp6");
370 			if (nconf_tcp6 == NULL)
371 				err(1, "getnetconfigent tcp6 failed");
372 			nb_tcp6.buf = ai_tcp6->ai_addr;
373 			nb_tcp6.len = nb_tcp6.maxlen = ai_tcp6->ai_addrlen;
374 			if (nfs_minvers == NFS_VER2)
375 				if (!rpcb_set(NFS_PROGRAM, 2, nconf_tcp6,
376 				    &nb_tcp6))
377 					err(1, "rpcb_set tcp6 failed");
378 			if (nfs_minvers <= NFS_VER3)
379 				if (!rpcb_set(NFS_PROGRAM, 3, nconf_tcp6,
380 				   &nb_tcp6))
381 					err(1, "rpcb_set tcp6 failed");
382 			freeaddrinfo(ai_tcp6);
383 		}
384 		exit (0);
385 	}
386 	if (debug == 0) {
387 		daemon(0, 0);
388 		(void)signal(SIGHUP, SIG_IGN);
389 		(void)signal(SIGINT, SIG_IGN);
390 		/*
391 		 * nfsd sits in the kernel most of the time.  It needs
392 		 * to ignore SIGTERM/SIGQUIT in order to stay alive as long
393 		 * as possible during a shutdown, otherwise loopback
394 		 * mounts will not be able to unmount.
395 		 */
396 		(void)signal(SIGTERM, SIG_IGN);
397 		(void)signal(SIGQUIT, SIG_IGN);
398 	}
399 	(void)signal(SIGSYS, nonfs);
400 	(void)signal(SIGCHLD, reapchild);
401 	(void)signal(SIGUSR2, backup_stable);
402 
403 	openlog("nfsd", LOG_PID | (debug ? LOG_PERROR : 0), LOG_DAEMON);
404 
405 	/*
406 	 * For V4, we open the stablerestart file and call nfssvc()
407 	 * to get it loaded. This is done before the daemons do the
408 	 * regular nfssvc() call to service NFS requests.
409 	 * (This way the file remains open until the last nfsd is killed
410 	 *  off.)
411 	 * It and the backup copy will be created as empty files
412 	 * the first time this nfsd is started and should never be
413 	 * deleted/replaced if at all possible. It should live on a
414 	 * local, non-volatile storage device that does not do hardware
415 	 * level write-back caching. (See SCSI doc for more information
416 	 * on how to prevent write-back caching on SCSI disks.)
417 	 */
418 	open_stable(&stablefd, &backupfd);
419 	if (stablefd < 0) {
420 		syslog(LOG_ERR, "Can't open %s: %m\n", NFSD_STABLERESTART);
421 		exit(1);
422 	}
423 	/* This system call will fail for old kernels, but that's ok. */
424 	nfssvc(NFSSVC_BACKUPSTABLE, NULL);
425 	if (nfssvc(NFSSVC_STABLERESTART, (caddr_t)&stablefd) < 0) {
426 		syslog(LOG_ERR, "Can't read stable storage file: %m\n");
427 		exit(1);
428 	}
429 	nfssvc_addsock = NFSSVC_NFSDADDSOCK;
430 	nfssvc_nfsd = NFSSVC_NFSDNFSD;
431 
432 	if (tcpflag) {
433 		/*
434 		 * For TCP mode, we fork once to start the first
435 		 * kernel nfsd thread. The kernel will add more
436 		 * threads as needed.
437 		 */
438 		pid = fork();
439 		if (pid == -1) {
440 			syslog(LOG_ERR, "fork: %m");
441 			nfsd_exit(1);
442 		}
443 		if (pid) {
444 			children[0] = pid;
445 		} else {
446 			(void)signal(SIGUSR1, child_cleanup);
447 			setproctitle("server");
448 			start_server(0);
449 		}
450 	}
451 
452 	(void)signal(SIGUSR1, cleanup);
453 	FD_ZERO(&v4bits);
454 	FD_ZERO(&v6bits);
455 	FD_ZERO(&sockbits);
456 
457 	rpcbregcnt = 0;
458 	/* Set up the socket for udp and rpcb register it. */
459 	if (udpflag) {
460 		rpcbreg = 0;
461 		for (i = 0; i < bindhostc; i++) {
462 			memset(&hints, 0, sizeof hints);
463 			hints.ai_flags = AI_PASSIVE;
464 			hints.ai_family = AF_INET;
465 			hints.ai_socktype = SOCK_DGRAM;
466 			hints.ai_protocol = IPPROTO_UDP;
467 			if (setbindhost(&ai_udp, bindhost[i], hints) == 0) {
468 				rpcbreg = 1;
469 				rpcbregcnt++;
470 				if ((sock = socket(ai_udp->ai_family,
471 				    ai_udp->ai_socktype,
472 				    ai_udp->ai_protocol)) < 0) {
473 					syslog(LOG_ERR,
474 					    "can't create udp socket");
475 					nfsd_exit(1);
476 				}
477 				if (bind(sock, ai_udp->ai_addr,
478 				    ai_udp->ai_addrlen) < 0) {
479 					syslog(LOG_ERR,
480 					    "can't bind udp addr %s: %m",
481 					    bindhost[i]);
482 					nfsd_exit(1);
483 				}
484 				freeaddrinfo(ai_udp);
485 				addsockargs.sock = sock;
486 				addsockargs.name = NULL;
487 				addsockargs.namelen = 0;
488 				if (nfssvc(nfssvc_addsock, &addsockargs) < 0) {
489 					syslog(LOG_ERR, "can't Add UDP socket");
490 					nfsd_exit(1);
491 				}
492 				(void)close(sock);
493 			}
494 		}
495 		if (rpcbreg == 1) {
496 			memset(&hints, 0, sizeof hints);
497 			hints.ai_flags = AI_PASSIVE;
498 			hints.ai_family = AF_INET;
499 			hints.ai_socktype = SOCK_DGRAM;
500 			hints.ai_protocol = IPPROTO_UDP;
501 			ecode = getaddrinfo(NULL, "nfs", &hints, &ai_udp);
502 			if (ecode != 0) {
503 				syslog(LOG_ERR, "getaddrinfo udp: %s",
504 				   gai_strerror(ecode));
505 				nfsd_exit(1);
506 			}
507 			nconf_udp = getnetconfigent("udp");
508 			if (nconf_udp == NULL)
509 				err(1, "getnetconfigent udp failed");
510 			nb_udp.buf = ai_udp->ai_addr;
511 			nb_udp.len = nb_udp.maxlen = ai_udp->ai_addrlen;
512 			if (nfs_minvers == NFS_VER2)
513 				if (!rpcb_set(NFS_PROGRAM, 2, nconf_udp,
514 				    &nb_udp))
515 					err(1, "rpcb_set udp failed");
516 			if (nfs_minvers <= NFS_VER3)
517 				if (!rpcb_set(NFS_PROGRAM, 3, nconf_udp,
518 				    &nb_udp))
519 					err(1, "rpcb_set udp failed");
520 			freeaddrinfo(ai_udp);
521 		}
522 	}
523 
524 	/* Set up the socket for udp6 and rpcb register it. */
525 	if (udpflag && ip6flag) {
526 		rpcbreg = 0;
527 		for (i = 0; i < bindhostc; i++) {
528 			memset(&hints, 0, sizeof hints);
529 			hints.ai_flags = AI_PASSIVE;
530 			hints.ai_family = AF_INET6;
531 			hints.ai_socktype = SOCK_DGRAM;
532 			hints.ai_protocol = IPPROTO_UDP;
533 			if (setbindhost(&ai_udp6, bindhost[i], hints) == 0) {
534 				rpcbreg = 1;
535 				rpcbregcnt++;
536 				if ((sock = socket(ai_udp6->ai_family,
537 				    ai_udp6->ai_socktype,
538 				    ai_udp6->ai_protocol)) < 0) {
539 					syslog(LOG_ERR,
540 						"can't create udp6 socket");
541 					nfsd_exit(1);
542 				}
543 				if (setsockopt(sock, IPPROTO_IPV6, IPV6_V6ONLY,
544 				    &on, sizeof on) < 0) {
545 					syslog(LOG_ERR,
546 					    "can't set v6-only binding for "
547 					    "udp6 socket: %m");
548 					nfsd_exit(1);
549 				}
550 				if (bind(sock, ai_udp6->ai_addr,
551 				    ai_udp6->ai_addrlen) < 0) {
552 					syslog(LOG_ERR,
553 					    "can't bind udp6 addr %s: %m",
554 					    bindhost[i]);
555 					nfsd_exit(1);
556 				}
557 				freeaddrinfo(ai_udp6);
558 				addsockargs.sock = sock;
559 				addsockargs.name = NULL;
560 				addsockargs.namelen = 0;
561 				if (nfssvc(nfssvc_addsock, &addsockargs) < 0) {
562 					syslog(LOG_ERR,
563 					    "can't add UDP6 socket");
564 					nfsd_exit(1);
565 				}
566 				(void)close(sock);
567 			}
568 		}
569 		if (rpcbreg == 1) {
570 			memset(&hints, 0, sizeof hints);
571 			hints.ai_flags = AI_PASSIVE;
572 			hints.ai_family = AF_INET6;
573 			hints.ai_socktype = SOCK_DGRAM;
574 			hints.ai_protocol = IPPROTO_UDP;
575 			ecode = getaddrinfo(NULL, "nfs", &hints, &ai_udp6);
576 			if (ecode != 0) {
577 				syslog(LOG_ERR, "getaddrinfo udp6: %s",
578 				   gai_strerror(ecode));
579 				nfsd_exit(1);
580 			}
581 			nconf_udp6 = getnetconfigent("udp6");
582 			if (nconf_udp6 == NULL)
583 				err(1, "getnetconfigent udp6 failed");
584 			nb_udp6.buf = ai_udp6->ai_addr;
585 			nb_udp6.len = nb_udp6.maxlen = ai_udp6->ai_addrlen;
586 			if (nfs_minvers == NFS_VER2)
587 				if (!rpcb_set(NFS_PROGRAM, 2, nconf_udp6,
588 				    &nb_udp6))
589 					err(1,
590 					    "rpcb_set udp6 failed");
591 			if (nfs_minvers <= NFS_VER3)
592 				if (!rpcb_set(NFS_PROGRAM, 3, nconf_udp6,
593 				    &nb_udp6))
594 					err(1,
595 					    "rpcb_set udp6 failed");
596 			freeaddrinfo(ai_udp6);
597 		}
598 	}
599 
600 	/* Set up the socket for tcp and rpcb register it. */
601 	if (tcpflag) {
602 		rpcbreg = 0;
603 		for (i = 0; i < bindhostc; i++) {
604 			memset(&hints, 0, sizeof hints);
605 			hints.ai_flags = AI_PASSIVE;
606 			hints.ai_family = AF_INET;
607 			hints.ai_socktype = SOCK_STREAM;
608 			hints.ai_protocol = IPPROTO_TCP;
609 			if (setbindhost(&ai_tcp, bindhost[i], hints) == 0) {
610 				rpcbreg = 1;
611 				rpcbregcnt++;
612 				if ((tcpsock = socket(AF_INET, SOCK_STREAM,
613 				    0)) < 0) {
614 					syslog(LOG_ERR,
615 					    "can't create tcp socket");
616 					nfsd_exit(1);
617 				}
618 				if (setsockopt(tcpsock, SOL_SOCKET,
619 				    SO_REUSEADDR,
620 				    (char *)&on, sizeof(on)) < 0)
621 					syslog(LOG_ERR,
622 					     "setsockopt SO_REUSEADDR: %m");
623 				if (bind(tcpsock, ai_tcp->ai_addr,
624 				    ai_tcp->ai_addrlen) < 0) {
625 					syslog(LOG_ERR,
626 					    "can't bind tcp addr %s: %m",
627 					    bindhost[i]);
628 					nfsd_exit(1);
629 				}
630 				if (listen(tcpsock, -1) < 0) {
631 					syslog(LOG_ERR, "listen failed");
632 					nfsd_exit(1);
633 				}
634 				freeaddrinfo(ai_tcp);
635 				FD_SET(tcpsock, &sockbits);
636 				FD_SET(tcpsock, &v4bits);
637 				maxsock = tcpsock;
638 				connect_type_cnt++;
639 			}
640 		}
641 		if (rpcbreg == 1) {
642 			memset(&hints, 0, sizeof hints);
643 			hints.ai_flags = AI_PASSIVE;
644 			hints.ai_family = AF_INET;
645 			hints.ai_socktype = SOCK_STREAM;
646 			hints.ai_protocol = IPPROTO_TCP;
647 			ecode = getaddrinfo(NULL, "nfs", &hints,
648 			     &ai_tcp);
649 			if (ecode != 0) {
650 				syslog(LOG_ERR, "getaddrinfo tcp: %s",
651 				   gai_strerror(ecode));
652 				nfsd_exit(1);
653 			}
654 			nconf_tcp = getnetconfigent("tcp");
655 			if (nconf_tcp == NULL)
656 				err(1, "getnetconfigent tcp failed");
657 			nb_tcp.buf = ai_tcp->ai_addr;
658 			nb_tcp.len = nb_tcp.maxlen = ai_tcp->ai_addrlen;
659 			if (nfs_minvers == NFS_VER2)
660 				if (!rpcb_set(NFS_PROGRAM, 2, nconf_tcp,
661 				    &nb_tcp))
662 					err(1, "rpcb_set tcp failed");
663 			if (nfs_minvers <= NFS_VER3)
664 				if (!rpcb_set(NFS_PROGRAM, 3, nconf_tcp,
665 				    &nb_tcp))
666 					err(1, "rpcb_set tcp failed");
667 			freeaddrinfo(ai_tcp);
668 		}
669 	}
670 
671 	/* Set up the socket for tcp6 and rpcb register it. */
672 	if (tcpflag && ip6flag) {
673 		rpcbreg = 0;
674 		for (i = 0; i < bindhostc; i++) {
675 			memset(&hints, 0, sizeof hints);
676 			hints.ai_flags = AI_PASSIVE;
677 			hints.ai_family = AF_INET6;
678 			hints.ai_socktype = SOCK_STREAM;
679 			hints.ai_protocol = IPPROTO_TCP;
680 			if (setbindhost(&ai_tcp6, bindhost[i], hints) == 0) {
681 				rpcbreg = 1;
682 				rpcbregcnt++;
683 				if ((tcp6sock = socket(ai_tcp6->ai_family,
684 				    ai_tcp6->ai_socktype,
685 				    ai_tcp6->ai_protocol)) < 0) {
686 					syslog(LOG_ERR,
687 					    "can't create tcp6 socket");
688 					nfsd_exit(1);
689 				}
690 				if (setsockopt(tcp6sock, SOL_SOCKET,
691 				    SO_REUSEADDR,
692 				    (char *)&on, sizeof(on)) < 0)
693 					syslog(LOG_ERR,
694 					    "setsockopt SO_REUSEADDR: %m");
695 				if (setsockopt(tcp6sock, IPPROTO_IPV6,
696 				    IPV6_V6ONLY, &on, sizeof on) < 0) {
697 					syslog(LOG_ERR,
698 					"can't set v6-only binding for tcp6 "
699 					    "socket: %m");
700 					nfsd_exit(1);
701 				}
702 				if (bind(tcp6sock, ai_tcp6->ai_addr,
703 				    ai_tcp6->ai_addrlen) < 0) {
704 					syslog(LOG_ERR,
705 					    "can't bind tcp6 addr %s: %m",
706 					    bindhost[i]);
707 					nfsd_exit(1);
708 				}
709 				if (listen(tcp6sock, -1) < 0) {
710 					syslog(LOG_ERR, "listen failed");
711 					nfsd_exit(1);
712 				}
713 				freeaddrinfo(ai_tcp6);
714 				FD_SET(tcp6sock, &sockbits);
715 				FD_SET(tcp6sock, &v6bits);
716 				if (maxsock < tcp6sock)
717 					maxsock = tcp6sock;
718 				connect_type_cnt++;
719 			}
720 		}
721 		if (rpcbreg == 1) {
722 			memset(&hints, 0, sizeof hints);
723 			hints.ai_flags = AI_PASSIVE;
724 			hints.ai_family = AF_INET6;
725 			hints.ai_socktype = SOCK_STREAM;
726 			hints.ai_protocol = IPPROTO_TCP;
727 			ecode = getaddrinfo(NULL, "nfs", &hints, &ai_tcp6);
728 			if (ecode != 0) {
729 				syslog(LOG_ERR, "getaddrinfo tcp6: %s",
730 				   gai_strerror(ecode));
731 				nfsd_exit(1);
732 			}
733 			nconf_tcp6 = getnetconfigent("tcp6");
734 			if (nconf_tcp6 == NULL)
735 				err(1, "getnetconfigent tcp6 failed");
736 			nb_tcp6.buf = ai_tcp6->ai_addr;
737 			nb_tcp6.len = nb_tcp6.maxlen = ai_tcp6->ai_addrlen;
738 			if (nfs_minvers == NFS_VER2)
739 				if (!rpcb_set(NFS_PROGRAM, 2, nconf_tcp6,
740 				    &nb_tcp6))
741 					err(1, "rpcb_set tcp6 failed");
742 			if (nfs_minvers <= NFS_VER3)
743 				if (!rpcb_set(NFS_PROGRAM, 3, nconf_tcp6,
744 				    &nb_tcp6))
745 					err(1, "rpcb_set tcp6 failed");
746 			freeaddrinfo(ai_tcp6);
747 		}
748 	}
749 
750 	if (rpcbregcnt == 0) {
751 		syslog(LOG_ERR, "rpcb_set() failed, nothing to do: %m");
752 		nfsd_exit(1);
753 	}
754 
755 	if (tcpflag && connect_type_cnt == 0) {
756 		syslog(LOG_ERR, "tcp connects == 0, nothing to do: %m");
757 		nfsd_exit(1);
758 	}
759 
760 	setproctitle("master");
761 	/*
762 	 * We always want a master to have a clean way to to shut nfsd down
763 	 * (with unregistration): if the master is killed, it unregisters and
764 	 * kills all children. If we run for UDP only (and so do not have to
765 	 * loop waiting waiting for accept), we instead make the parent
766 	 * a "server" too. start_server will not return.
767 	 */
768 	if (!tcpflag)
769 		start_server(1);
770 
771 	/*
772 	 * Loop forever accepting connections and passing the sockets
773 	 * into the kernel for the mounts.
774 	 */
775 	for (;;) {
776 		ready = sockbits;
777 		if (connect_type_cnt > 1) {
778 			if (select(maxsock + 1,
779 			    &ready, NULL, NULL, NULL) < 1) {
780 				error = errno;
781 				if (error == EINTR)
782 					continue;
783 				syslog(LOG_ERR, "select failed: %m");
784 				nfsd_exit(1);
785 			}
786 		}
787 		for (tcpsock = 0; tcpsock <= maxsock; tcpsock++) {
788 			if (FD_ISSET(tcpsock, &ready)) {
789 				if (FD_ISSET(tcpsock, &v4bits)) {
790 					len = sizeof(inetpeer);
791 					if ((msgsock = accept(tcpsock,
792 					    (struct sockaddr *)&inetpeer, &len)) < 0) {
793 						error = errno;
794 						syslog(LOG_ERR, "accept failed: %m");
795 						if (error == ECONNABORTED ||
796 						    error == EINTR)
797 							continue;
798 						nfsd_exit(1);
799 					}
800 					memset(inetpeer.sin_zero, 0,
801 						sizeof(inetpeer.sin_zero));
802 					if (setsockopt(msgsock, SOL_SOCKET,
803 					    SO_KEEPALIVE, (char *)&on, sizeof(on)) < 0)
804 						syslog(LOG_ERR,
805 						    "setsockopt SO_KEEPALIVE: %m");
806 					addsockargs.sock = msgsock;
807 					addsockargs.name = (caddr_t)&inetpeer;
808 					addsockargs.namelen = len;
809 					nfssvc(nfssvc_addsock, &addsockargs);
810 					(void)close(msgsock);
811 				} else if (FD_ISSET(tcpsock, &v6bits)) {
812 					len = sizeof(inet6peer);
813 					if ((msgsock = accept(tcpsock,
814 					    (struct sockaddr *)&inet6peer,
815 					    &len)) < 0) {
816 						error = errno;
817 						syslog(LOG_ERR,
818 						     "accept failed: %m");
819 						if (error == ECONNABORTED ||
820 						    error == EINTR)
821 							continue;
822 						nfsd_exit(1);
823 					}
824 					if (setsockopt(msgsock, SOL_SOCKET,
825 					    SO_KEEPALIVE, (char *)&on,
826 					    sizeof(on)) < 0)
827 						syslog(LOG_ERR, "setsockopt "
828 						    "SO_KEEPALIVE: %m");
829 					addsockargs.sock = msgsock;
830 					addsockargs.name = (caddr_t)&inet6peer;
831 					addsockargs.namelen = len;
832 					nfssvc(nfssvc_addsock, &addsockargs);
833 					(void)close(msgsock);
834 				}
835 			}
836 		}
837 	}
838 }
839 
840 static int
841 setbindhost(struct addrinfo **ai, const char *bindhost, struct addrinfo hints)
842 {
843 	int ecode;
844 	u_int32_t host_addr[4];  /* IPv4 or IPv6 */
845 	const char *hostptr;
846 
847 	if (bindhost == NULL || strcmp("*", bindhost) == 0)
848 		hostptr = NULL;
849 	else
850 		hostptr = bindhost;
851 
852 	if (hostptr != NULL) {
853 		switch (hints.ai_family) {
854 		case AF_INET:
855 			if (inet_pton(AF_INET, hostptr, host_addr) == 1) {
856 				hints.ai_flags = AI_NUMERICHOST;
857 			} else {
858 				if (inet_pton(AF_INET6, hostptr,
859 				    host_addr) == 1)
860 					return (1);
861 			}
862 			break;
863 		case AF_INET6:
864 			if (inet_pton(AF_INET6, hostptr, host_addr) == 1) {
865 				hints.ai_flags = AI_NUMERICHOST;
866 			} else {
867 				if (inet_pton(AF_INET, hostptr,
868 				    host_addr) == 1)
869 					return (1);
870 			}
871 			break;
872 		default:
873 			break;
874 		}
875 	}
876 
877 	ecode = getaddrinfo(hostptr, "nfs", &hints, ai);
878 	if (ecode != 0) {
879 		syslog(LOG_ERR, "getaddrinfo %s: %s", bindhost,
880 		    gai_strerror(ecode));
881 		return (1);
882 	}
883 	return (0);
884 }
885 
886 static void
887 set_nfsdcnt(int proposed)
888 {
889 
890 	if (proposed < 1) {
891 		warnx("nfsd count too low %d; reset to %d", proposed,
892 		    DEFNFSDCNT);
893 		nfsdcnt = DEFNFSDCNT;
894 	} else if (proposed > MAXNFSDCNT) {
895 		warnx("nfsd count too high %d; truncated to %d", proposed,
896 		    MAXNFSDCNT);
897 		nfsdcnt = MAXNFSDCNT;
898 	} else
899 		nfsdcnt = proposed;
900 	nfsdcnt_set = 1;
901 }
902 
903 static void
904 usage(void)
905 {
906 	(void)fprintf(stderr, "%s", getopt_usage);
907 	exit(1);
908 }
909 
910 static void
911 nonfs(__unused int signo)
912 {
913 	syslog(LOG_ERR, "missing system call: NFS not available");
914 }
915 
916 static void
917 reapchild(__unused int signo)
918 {
919 	pid_t pid;
920 	int i;
921 
922 	while ((pid = wait3(NULL, WNOHANG, NULL)) > 0) {
923 		for (i = 0; i < nfsdcnt; i++)
924 			if (pid == children[i])
925 				children[i] = -1;
926 	}
927 }
928 
929 static void
930 unregistration(void)
931 {
932 	if ((!rpcb_unset(NFS_PROGRAM, 2, NULL)) ||
933 	    (!rpcb_unset(NFS_PROGRAM, 3, NULL)))
934 		syslog(LOG_ERR, "rpcb_unset failed");
935 }
936 
937 static void
938 killchildren(void)
939 {
940 	int i;
941 
942 	for (i = 0; i < nfsdcnt; i++) {
943 		if (children[i] > 0)
944 			kill(children[i], SIGKILL);
945 	}
946 }
947 
948 /*
949  * Cleanup master after SIGUSR1.
950  */
951 static void
952 cleanup(__unused int signo)
953 {
954 	nfsd_exit(0);
955 }
956 
957 /*
958  * Cleanup child after SIGUSR1.
959  */
960 static void
961 child_cleanup(__unused int signo)
962 {
963 	exit(0);
964 }
965 
966 static void
967 nfsd_exit(int status)
968 {
969 	killchildren();
970 	unregistration();
971 	exit(status);
972 }
973 
974 static int
975 get_tuned_nfsdcount(void)
976 {
977 	int ncpu, error, tuned_nfsdcnt;
978 	size_t ncpu_size;
979 
980 	ncpu_size = sizeof(ncpu);
981 	error = sysctlbyname("hw.ncpu", &ncpu, &ncpu_size, NULL, 0);
982 	if (error) {
983 		warnx("sysctlbyname(hw.ncpu) failed defaulting to %d nfs servers",
984 		    DEFNFSDCNT);
985 		tuned_nfsdcnt = DEFNFSDCNT;
986 	} else {
987 		tuned_nfsdcnt = ncpu * 8;
988 	}
989 	return tuned_nfsdcnt;
990 }
991 
992 static void
993 start_server(int master)
994 {
995 	char principal[MAXHOSTNAMELEN + 5];
996 	struct nfsd_nfsd_args nfsdargs;
997 	int status, error;
998 	char hostname[MAXHOSTNAMELEN + 1], *cp;
999 	struct addrinfo *aip, hints;
1000 
1001 	status = 0;
1002 	gethostname(hostname, sizeof (hostname));
1003 	snprintf(principal, sizeof (principal), "nfs@%s", hostname);
1004 	if ((cp = strchr(hostname, '.')) == NULL ||
1005 	    *(cp + 1) == '\0') {
1006 		/* If not fully qualified, try getaddrinfo() */
1007 		memset((void *)&hints, 0, sizeof (hints));
1008 		hints.ai_flags = AI_CANONNAME;
1009 		error = getaddrinfo(hostname, NULL, &hints, &aip);
1010 		if (error == 0) {
1011 			if (aip->ai_canonname != NULL &&
1012 			    (cp = strchr(aip->ai_canonname, '.')) !=
1013 			    NULL && *(cp + 1) != '\0')
1014 				snprintf(principal, sizeof (principal),
1015 				    "nfs@%s", aip->ai_canonname);
1016 			freeaddrinfo(aip);
1017 		}
1018 	}
1019 	nfsdargs.principal = principal;
1020 
1021 	if (nfsdcnt_set)
1022 		nfsdargs.minthreads = nfsdargs.maxthreads = nfsdcnt;
1023 	else {
1024 		nfsdargs.minthreads = minthreads_set ? minthreads : get_tuned_nfsdcount();
1025 		nfsdargs.maxthreads = maxthreads_set ? maxthreads : nfsdargs.minthreads;
1026 		if (nfsdargs.maxthreads < nfsdargs.minthreads)
1027 			nfsdargs.maxthreads = nfsdargs.minthreads;
1028 	}
1029 	error = nfssvc(nfssvc_nfsd, &nfsdargs);
1030 	if (error < 0 && errno == EAUTH) {
1031 		/*
1032 		 * This indicates that it could not register the
1033 		 * rpcsec_gss credentials, usually because the
1034 		 * gssd daemon isn't running.
1035 		 * (only the experimental server with nfsv4)
1036 		 */
1037 		syslog(LOG_ERR, "No gssd, using AUTH_SYS only");
1038 		principal[0] = '\0';
1039 		error = nfssvc(nfssvc_nfsd, &nfsdargs);
1040 	}
1041 	if (error < 0) {
1042 		syslog(LOG_ERR, "nfssvc: %m");
1043 		status = 1;
1044 	}
1045 	if (master)
1046 		nfsd_exit(status);
1047 	else
1048 		exit(status);
1049 }
1050 
1051 /*
1052  * Open the stable restart file and return the file descriptor for it.
1053  */
1054 static void
1055 open_stable(int *stable_fdp, int *backup_fdp)
1056 {
1057 	int stable_fd, backup_fd = -1, ret;
1058 	struct stat st, backup_st;
1059 
1060 	/* Open and stat the stable restart file. */
1061 	stable_fd = open(NFSD_STABLERESTART, O_RDWR, 0);
1062 	if (stable_fd < 0)
1063 		stable_fd = open(NFSD_STABLERESTART, O_RDWR | O_CREAT, 0600);
1064 	if (stable_fd >= 0) {
1065 		ret = fstat(stable_fd, &st);
1066 		if (ret < 0) {
1067 			close(stable_fd);
1068 			stable_fd = -1;
1069 		}
1070 	}
1071 
1072 	/* Open and stat the backup stable restart file. */
1073 	if (stable_fd >= 0) {
1074 		backup_fd = open(NFSD_STABLEBACKUP, O_RDWR, 0);
1075 		if (backup_fd < 0)
1076 			backup_fd = open(NFSD_STABLEBACKUP, O_RDWR | O_CREAT,
1077 			    0600);
1078 		if (backup_fd >= 0) {
1079 			ret = fstat(backup_fd, &backup_st);
1080 			if (ret < 0) {
1081 				close(backup_fd);
1082 				backup_fd = -1;
1083 			}
1084 		}
1085 		if (backup_fd < 0) {
1086 			close(stable_fd);
1087 			stable_fd = -1;
1088 		}
1089 	}
1090 
1091 	*stable_fdp = stable_fd;
1092 	*backup_fdp = backup_fd;
1093 	if (stable_fd < 0)
1094 		return;
1095 
1096 	/* Sync up the 2 files, as required. */
1097 	if (st.st_size > 0)
1098 		copy_stable(stable_fd, backup_fd);
1099 	else if (backup_st.st_size > 0)
1100 		copy_stable(backup_fd, stable_fd);
1101 }
1102 
1103 /*
1104  * Copy the stable restart file to the backup or vice versa.
1105  */
1106 static void
1107 copy_stable(int from_fd, int to_fd)
1108 {
1109 	int cnt, ret;
1110 	static char buf[1024];
1111 
1112 	ret = lseek(from_fd, (off_t)0, SEEK_SET);
1113 	if (ret >= 0)
1114 		ret = lseek(to_fd, (off_t)0, SEEK_SET);
1115 	if (ret >= 0)
1116 		ret = ftruncate(to_fd, (off_t)0);
1117 	if (ret >= 0)
1118 		do {
1119 			cnt = read(from_fd, buf, 1024);
1120 			if (cnt > 0)
1121 				ret = write(to_fd, buf, cnt);
1122 			else if (cnt < 0)
1123 				ret = cnt;
1124 		} while (cnt > 0 && ret >= 0);
1125 	if (ret >= 0)
1126 		ret = fsync(to_fd);
1127 	if (ret < 0)
1128 		syslog(LOG_ERR, "stable restart copy failure: %m");
1129 }
1130 
1131 /*
1132  * Back up the stable restart file when indicated by the kernel.
1133  */
1134 static void
1135 backup_stable(__unused int signo)
1136 {
1137 
1138 	if (stablefd >= 0)
1139 		copy_stable(stablefd, backupfd);
1140 }
1141 
1142