xref: /titanic_50/usr/src/cmd/avs/rdc/sndrd.c (revision 36802407db97b1bcd32a63b16112e95ffcc5bb98)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 /*
28  * Network SNDR/ncall-ip server - based on nfsd
29  */
30 #include <sys/types.h>
31 #include <rpc/types.h>
32 #include <errno.h>
33 #include <netdb.h>
34 #include <sys/socket.h>
35 #include <netconfig.h>
36 #include <stropts.h>
37 #include <fcntl.h>
38 #include <stdio.h>
39 #include <strings.h>
40 #include <signal.h>
41 #include <unistd.h>
42 #include <stdlib.h>
43 #include <netdir.h>
44 #include <rpc/rpc_com.h>
45 #include <rpc/rpc.h>
46 #include <tiuser.h>
47 #include <netinet/tcp.h>
48 #include <netinet/in.h>
49 #include <syslog.h>
50 #include <locale.h>
51 #include <langinfo.h>
52 #include <libintl.h>
53 #include <libgen.h>
54 #include <deflt.h>
55 #include <sys/resource.h>
56 
57 #include <sys/nsctl/nsctl.h>
58 
59 #ifdef	__NCALL__
60 
61 #include <sys/ncall/ncall.h>
62 #include <sys/ncall/ncall_ip.h>
63 #include <sys/nsctl/libncall.h>
64 
65 #define	RDC_POOL_CREATE	NC_IOC_POOL_CREATE
66 #define	RDC_POOL_RUN	NC_IOC_POOL_RUN
67 #define	RDC_POOL_WAIT	NC_IOC_POOL_WAIT
68 #define	RDC_PROGRAM	NCALL_PROGRAM
69 #define	RDC_SERVICE	"ncall"
70 #undef RDC_SVCPOOL_ID	/* We are overloading this value */
71 #define	RDC_SVCPOOL_ID	NCALL_SVCPOOL_ID
72 #define	RDC_SVC_NAME	"NCALL"
73 #define	RDC_VERS_MIN	NCALL_VERS_MIN
74 #define	RDC_VERS_MAX	NCALL_VERS_MAX
75 
76 #else	/* !__NCALL__ */
77 
78 #include <sys/nsctl/rdc_ioctl.h>
79 #include <sys/nsctl/rdc_io.h>
80 #include <sys/nsctl/librdc.h>
81 
82 #define	RDC_SERVICE	"rdc"
83 #define	RDC_SVC_NAME	"RDC"
84 
85 #endif	/* __NCALL__ */
86 
87 #define	RDCADMIN	"/etc/default/sndr"
88 
89 #include <nsctl.h>
90 
91 struct conn_ind {
92 	struct conn_ind *conn_next;
93 	struct conn_ind *conn_prev;
94 	struct t_call   *conn_call;
95 };
96 
97 struct conn_entry {
98 	bool_t			closing;
99 	struct netconfig	nc;
100 };
101 
102 static char *progname;
103 static struct conn_entry *conn_polled;
104 static int num_conns;			/* Current number of connections */
105 static struct pollfd *poll_array;	/* array of poll descriptors for poll */
106 static size_t num_fds = 0;		/* number of transport fds opened */
107 static void poll_for_action();
108 static void remove_from_poll_list(int);
109 static int do_poll_cots_action(int, int);
110 static int do_poll_clts_action(int, int);
111 static void add_to_poll_list(int, struct netconfig *);
112 static int bind_to_provider(char *, char *, struct netbuf **,
113     struct netconfig **);
114 static int set_addrmask(int, struct netconfig *, struct netbuf *);
115 static void conn_close_oldest(void);
116 static boolean_t conn_get(int, struct netconfig *, struct conn_ind **);
117 static void cots_listen_event(int, int);
118 static int discon_get(int, struct netconfig *, struct conn_ind **);
119 static int nofile_increase(int);
120 static int is_listen_fd_index(int);
121 #if !defined(_SunOS_5_6) && !defined(_SunOS_5_7) && !defined(_SunOS_5_8)
122 static int sndrsvcpool(int);
123 static int svcwait(int id);
124 #endif
125 
126 
127 /*
128  * RPC protocol block.  Useful for passing registration information.
129  */
130 struct protob {
131 	char *serv;		/* ASCII service name, e.g. "RDC" */
132 	int versmin;		/* minimum version no. to be registered */
133 	int versmax;		/* maximum version no. to be registered */
134 	int program;		/* program no. to be registered */
135 	struct protob *next;	/* next entry on list */
136 };
137 
138 
139 
140 static size_t end_listen_fds;
141 static int debugflg = 0;
142 static int max_conns_allowed = -1;
143 static int listen_backlog = 10;
144 static char *trans_provider = (char *)NULL;
145 static int rdcsvc(int, struct netbuf, struct netconfig *);
146 
147 /* used by cots_listen_event() */
148 static int (*Mysvc)(int, struct netbuf, struct netconfig *) = rdcsvc;
149 
150 /*
151  * Determine valid semantics for rdc.
152  */
153 #define	OK_TPI_TYPE(_nconf)	\
154 	(_nconf->nc_semantics == NC_TPI_CLTS || \
155 	_nconf->nc_semantics == NC_TPI_COTS || \
156 	_nconf->nc_semantics == NC_TPI_COTS_ORD)
157 
158 #define	BE32_TO_U32(a)		\
159 	((((uint32_t)((uchar_t *)a)[0] & 0xFF) << (uint32_t)24) |\
160 	(((uint32_t)((uchar_t *)a)[1] & 0xFF) << (uint32_t)16) |\
161 	(((uint32_t)((uchar_t *)a)[2] & 0xFF) << (uint32_t)8)  |\
162 	((uint32_t)((uchar_t *)a)[3] & 0xFF))
163 
164 #ifdef DEBUG
165 /*
166  * Only support UDP in DEBUG mode for now
167  */
168 static	char *defaultproviders[] = { "/dev/tcp", "/dev/tcp6", "/dev/udp",
169 		"/dev/udp6", NULL };
170 #else
171 static	char *defaultproviders[] = { "/dev/tcp6", "/dev/tcp", NULL };
172 #endif
173 
174 /*
175  * Number of elements to add to the poll array on each allocation.
176  */
177 #define	POLL_ARRAY_INC_SIZE	64
178 #define	NOFILE_INC_SIZE		64
179 
180 #ifdef	__NCALL__
181 const char *rdc_devr = "/dev/ncallip";
182 #else
183 const char *rdc_devr = "/dev/rdc";
184 #endif
185 
186 static int rdc_fdr;
187 static int
188 
189 open_rdc(void)
190 {
191 	int fd = open(rdc_devr, O_RDONLY);
192 
193 	if (fd < 0)
194 		return (-1);
195 
196 	return (rdc_fdr = fd);
197 }
198 
199 static int
200 sndrsys(int type, void *arg)
201 {
202 	int ret = -1;
203 	if (!rdc_fdr && open_rdc() < 0) { /* open failed */
204 		syslog(LOG_ERR, "open_rdc() failed: %m\n");
205 	} else {
206 		if ((ret = ioctl(rdc_fdr, type, arg)) < 0) {
207 			syslog(LOG_ERR, "ioctl(rdc_ioctl) failed: %m\n");
208 		}
209 	}
210 	return (ret);
211 }
212 
213 int
214 rdc_transport_open(struct netconfig *nconf)
215 {
216 	int fd;
217 	struct strioctl	strioc;
218 
219 	if ((nconf == (struct netconfig *)NULL) ||
220 	    (nconf->nc_device == (char *)NULL)) {
221 		syslog(LOG_ERR, "No netconfig device");
222 		return (-1);
223 	}
224 
225 	/*
226 	 * Open the transport device.
227 	 */
228 	fd = t_open(nconf->nc_device, O_RDWR, (struct t_info *)NULL);
229 	if (fd == -1)  {
230 		if (t_errno == TSYSERR && errno == EMFILE &&
231 				(nofile_increase(0) == 0)) {
232 			/* Try again with a higher NOFILE limit. */
233 			fd = t_open(nconf->nc_device, O_RDWR,
234 				(struct t_info *)NULL);
235 		}
236 		if (fd == -1) {
237 			if (t_errno == TSYSERR) {
238 				syslog(LOG_ERR, "t_open failed: %m");
239 			} else {
240 				syslog(LOG_ERR, "t_open failed: %s",
241 				    t_errlist[t_errno]);
242 			}
243 			return (-1);
244 		}
245 	}
246 
247 	/*
248 	 * Pop timod because the RPC module must be as close as possible
249 	 * to the transport.
250 	 */
251 	if (ioctl(fd, I_POP, 0) < 0) {
252 		syslog(LOG_ERR, "I_POP of timod failed: %m");
253 		if (t_close(fd) == -1) {
254 			if (t_errno == TSYSERR) {
255 				syslog(LOG_ERR, "t_close failed on %d: %m", fd);
256 			} else {
257 				syslog(LOG_ERR, "t_close failed on %d: %s",
258 				    fd, t_errlist[t_errno]);
259 			}
260 		}
261 		return (-1);
262 	}
263 
264 	if (nconf->nc_semantics == NC_TPI_CLTS) {
265 		/*
266 		 * Push rpcmod to filter data traffic to KRPC.
267 		 */
268 		if (ioctl(fd, I_PUSH, "rpcmod") < 0) {
269 			syslog(LOG_ERR, "I_PUSH of rpcmod failed: %m");
270 			(void) t_close(fd);
271 			return (-1);
272 		}
273 	} else {
274 		if (ioctl(fd, I_PUSH, "rpcmod") < 0) {
275 			syslog(LOG_ERR, "I_PUSH of CONS rpcmod failed: %m");
276 			if (t_close(fd) == -1) {
277 				if (t_errno == TSYSERR) {
278 					syslog(LOG_ERR,
279 						"t_close failed on %d: %m", fd);
280 				} else {
281 					syslog(LOG_ERR,
282 						"t_close failed on %d: %s",
283 						fd, t_errlist[t_errno]);
284 				}
285 			}
286 			return (-1);
287 		}
288 
289 		strioc.ic_cmd = RPC_SERVER;
290 		strioc.ic_dp = (char *)0;
291 		strioc.ic_len = 0;
292 		strioc.ic_timout = -1;
293 		/* Tell CONS rpcmod to act like a server stream. */
294 		if (ioctl(fd, I_STR, &strioc) < 0) {
295 			syslog(LOG_ERR, "CONS rpcmod set-up ioctl failed: %m");
296 			if (t_close(fd) == -1) {
297 				if (t_errno == TSYSERR) {
298 					syslog(LOG_ERR,
299 						"t_close failed on %d: %m", fd);
300 				} else {
301 					syslog(LOG_ERR,
302 						"t_close failed on %d: %s",
303 						fd, t_errlist[t_errno]);
304 				}
305 			}
306 			return (-1);
307 		}
308 	}
309 
310 	/*
311 	 * Re-push timod so that we will still be doing TLI
312 	 * operations on the descriptor.
313 	 */
314 	if (ioctl(fd, I_PUSH, "timod") < 0) {
315 		syslog(LOG_ERR, "I_PUSH of timod failed: %m");
316 		if (t_close(fd) == -1) {
317 			if (t_errno == TSYSERR) {
318 				syslog(LOG_ERR, "t_close failed on %d: %m", fd);
319 			} else {
320 				syslog(LOG_ERR, "t_close failed on %d: %s",
321 				    fd, t_errlist[t_errno]);
322 			}
323 		}
324 		return (-1);
325 	}
326 
327 	return (fd);
328 }
329 
330 
331 void
332 rdcd_log_tli_error(char *tli_name, int fd, struct netconfig *nconf)
333 {
334 	int error;
335 
336 	/*
337 	 * Save the error code across syslog(), just in case syslog()
338 	 * gets its own error and, therefore, overwrites errno.
339 	 */
340 	error = errno;
341 	if (t_errno == TSYSERR) {
342 		syslog(LOG_ERR, "%s(file descriptor %d/transport %s) %m",
343 		    tli_name, fd, nconf->nc_proto);
344 	} else {
345 		syslog(LOG_ERR,
346 		    "%s(file descriptor %d/transport %s) TLI error %d",
347 		    tli_name, fd, nconf->nc_proto, t_errno);
348 	}
349 	errno = error;
350 }
351 
352 /*
353  * Called to set up service over a particular transport
354  */
355 void
356 do_one(char *provider, char *proto, struct protob *protobp0,
357 	int (*svc)(int, struct netbuf, struct netconfig *))
358 {
359 	struct netbuf *retaddr;
360 	struct netconfig *retnconf;
361 	struct netbuf addrmask;
362 	int vers;
363 	int sock;
364 
365 	if (provider) {
366 		sock = bind_to_provider(provider, protobp0->serv, &retaddr,
367 		    &retnconf);
368 	} else {
369 		(void) syslog(LOG_ERR,
370 	"Cannot establish %s service over %s: transport setup problem.",
371 		    protobp0->serv, provider ? provider : proto);
372 		return;
373 	}
374 
375 	if (sock == -1) {
376 		if ((Is_ipv6present() &&
377 		(strcmp(provider, "/dev/tcp6") == 0)) ||
378 		(!Is_ipv6present() && (strcmp(provider, "/dev/tcp") == 0)))
379 			(void) syslog(LOG_ERR,
380 			    "Cannot establish %s service over %s: transport "
381 				"setup problem.",
382 				protobp0->serv, provider ? provider : proto);
383 		return;
384 	}
385 
386 	if (set_addrmask(sock, retnconf, &addrmask) < 0) {
387 		(void) syslog(LOG_ERR,
388 		    "Cannot set address mask for %s", retnconf->nc_netid);
389 		return;
390 	}
391 
392 
393 	/*
394 	 * Register all versions of the programs in the protocol block list
395 	 */
396 	for (vers = protobp0->versmin; vers <= protobp0->versmax; vers++) {
397 		(void) rpcb_unset(protobp0->program, vers, retnconf);
398 		(void) rpcb_set(protobp0->program, vers, retnconf, retaddr);
399 	}
400 
401 	if (retnconf->nc_semantics == NC_TPI_CLTS) {
402 		/* Don't drop core if supporting module(s) aren't loaded. */
403 		(void) signal(SIGSYS, SIG_IGN);
404 
405 		/*
406 		 * svc() doesn't block, it returns success or failure.
407 		 */
408 		if ((*svc)(sock, addrmask, retnconf) < 0) {
409 			(void) syslog(LOG_ERR,
410 "Cannot establish %s service over <file desc. %d, protocol %s> : %m. Exiting",
411 				protobp0->serv, sock, retnconf->nc_proto);
412 			exit(1);
413 		}
414 	}
415 	/*
416 	 * We successfully set up the server over this transport.
417 	 * Add this descriptor to the one being polled on.
418 	 */
419 	add_to_poll_list(sock, retnconf);
420 }
421 
422 /*
423  * Set up the SNDR/ncall-ip service over all the available transports.
424  * Returns -1 for failure, 0 for success.
425  */
426 int
427 do_all(struct protob *protobp,
428 	int (*svc)(int, struct netbuf, struct netconfig *))
429 {
430 	struct netconfig *nconf;
431 	NCONF_HANDLE *nc;
432 
433 	if ((nc = setnetconfig()) == (NCONF_HANDLE *)NULL) {
434 		syslog(LOG_ERR, "setnetconfig failed: %m");
435 		return (-1);
436 	}
437 	while (nconf = getnetconfig(nc)) {
438 		if ((nconf->nc_flag & NC_VISIBLE) &&
439 		    strcmp(nconf->nc_protofmly, "loopback") != 0 &&
440 		    OK_TPI_TYPE(nconf))
441 			do_one(nconf->nc_device, nconf->nc_proto,
442 				protobp, svc);
443 	}
444 	(void) endnetconfig(nc);
445 	return (0);
446 }
447 
448 /*
449  * Read the /etc/default/sndr configuration file to determine if the
450  * client has been configured for number of threads, backlog or transport
451  * provider.
452  */
453 
454 static void
455 read_default(void)
456 {
457 	char *defval, *tmp_str;
458 	int errno;
459 	int tmp;
460 
461 	/* Fail silently if error in opening the default rdc config file */
462 	if ((defopen(RDCADMIN)) == 0) {
463 		if ((defval = defread("SNDR_THREADS=")) != NULL) {
464 			errno = 0;
465 			tmp = strtol(defval, (char **)NULL, 10);
466 			if (errno == 0) {
467 				max_conns_allowed = tmp;
468 			}
469 		}
470 		if ((defval = defread("SNDR_LISTEN_BACKLOG=")) != NULL) {
471 			errno = 0;
472 			tmp = strtol(defval, (char **)NULL, 10);
473 			if (errno == 0) {
474 				listen_backlog = tmp;
475 			}
476 		}
477 		if ((defval = defread("SNDR_TRANSPORT=")) != NULL) {
478 			errno = 0;
479 			tmp_str = strdup(defval);
480 			if (errno == 0) {
481 				trans_provider = tmp_str;
482 			}
483 		}
484 		/* close defaults file */
485 		(void) defopen(NULL);
486 	}
487 }
488 #ifdef lint
489 int
490 sndrd_lintmain(int ac, char **av)
491 #else
492 int
493 main(int ac, char **av)
494 #endif
495 {
496 	const char *dir = "/";
497 	int allflag = 0;
498 	int pid;
499 	int i, rc;
500 	struct protob *protobp0, *protobp;
501 	char **providerp;
502 	char *required;
503 #if !defined(_SunOS_5_6) && !defined(_SunOS_5_7) && !defined(_SunOS_5_8)
504 	int maxservers;
505 #endif
506 
507 	(void) setlocale(LC_ALL, "");
508 #ifdef	__NCALL__
509 	(void) textdomain("ncall");
510 #else
511 	(void) textdomain("rdc");
512 #endif
513 
514 	progname = basename(av[0]);
515 
516 #ifdef	__NCALL__
517 	rc = ncall_check_release(&required);
518 #else
519 	rc = rdc_check_release(&required);
520 #endif
521 	if (rc < 0) {
522 		(void) fprintf(stderr,
523 		    gettext("%s: unable to determine the current "
524 		    "Solaris release: %s\n"), progname, strerror(errno));
525 		exit(1);
526 	} else if (rc == FALSE) {
527 		(void) fprintf(stderr,
528 		    gettext("%s: incorrect Solaris release (requires %s)\n"),
529 		    progname, required);
530 		exit(1);
531 	}
532 
533 	openlog(progname, LOG_PID|LOG_CONS, LOG_DAEMON);
534 	read_default();
535 
536 	/*
537 	 * Usage: <progname> [-c <number of threads>] [-t protocol] \
538 	 *		[-d] [-l <listen backlog>]
539 	 */
540 	while ((i = getopt(ac, av, "ac:t:dl:")) != EOF) {
541 		switch (i) {
542 			case 'a':
543 				allflag = 1;
544 				break;
545 			case 'c':
546 				max_conns_allowed = atoi(optarg);
547 				if (max_conns_allowed <= 0)
548 					max_conns_allowed = 16;
549 				break;
550 
551 			case 'd':
552 				debugflg++;
553 				break;
554 
555 			case 't':
556 				trans_provider = optarg;
557 				break;
558 
559 			case 'l':
560 				listen_backlog = atoi(optarg);
561 				if (listen_backlog < 0)
562 					listen_backlog = 32;
563 				break;
564 
565 			default:
566 				syslog(LOG_ERR,
567 				    "Usage: %s [-c <number of threads>] "
568 				    "[-d] [-t protocol] "
569 				    "[-l <listen backlog>]\n", progname);
570 				exit(1);
571 				break;
572 		}
573 	}
574 
575 	if (chroot(dir) < 0) {
576 		syslog(LOG_ERR, "chroot failed: %m");
577 		exit(1);
578 	}
579 
580 	if (chdir(dir) < 0) {
581 		syslog(LOG_ERR, "chdir failed: %m");
582 		exit(1);
583 	}
584 
585 	if (!debugflg) {
586 		pid = fork();
587 		if (pid < 0) {
588 			syslog(LOG_ERR, "Fork failed\n");
589 			exit(1);
590 		}
591 		if (pid != 0)
592 			exit(0);
593 
594 		/*
595 		 * Close existing file descriptors, open "/dev/null" as
596 		 * standard input, output, and error, and detach from
597 		 * controlling terminal.
598 		 */
599 #if !defined(_SunOS_5_6) && !defined(_SunOS_5_7) && !defined(_SunOS_5_8)
600 		/* use closefrom(3C) from PSARC/2000/193 when possible */
601 		closefrom(0);
602 #else
603 		for (i = 0; i < _NFILE; i++)
604 			(void) close(i);
605 #endif
606 		(void) open("/dev/null", O_RDONLY);
607 		(void) open("/dev/null", O_WRONLY);
608 		(void) dup(1);
609 		(void) setsid();
610 
611 		/*
612 		 * ignore all signals apart from SIGTERM.
613 		 */
614 		for (i = 1; i < _sys_nsig; i++)
615 			(void) sigset(i, SIG_IGN);
616 
617 		(void) sigset(SIGTERM, SIG_DFL);
618 	}
619 
620 #if !defined(_SunOS_5_6) && !defined(_SunOS_5_7) && !defined(_SunOS_5_8)
621 	/*
622 	 * Set up kernel RPC thread pool for the SNDR/ncall-ip server.
623 	 */
624 	maxservers = (max_conns_allowed < 0 ? 16 : max_conns_allowed);
625 	if (sndrsvcpool(maxservers)) {
626 		(void) syslog(LOG_ERR,
627 		    "Can't set up kernel %s service: %m. Exiting", progname);
628 		exit(1);
629 	}
630 
631 	/*
632 	 * Set up blocked thread to do LWP creation on behalf of the kernel.
633 	 */
634 	if (svcwait(RDC_SVCPOOL_ID)) {
635 		(void) syslog(LOG_ERR,
636 		    "Can't set up %s pool creator: %m, Exiting", progname);
637 		exit(1);
638 	}
639 #endif
640 
641 	/*
642 	 * Build a protocol block list for registration.
643 	 */
644 	protobp0 = protobp = (struct protob *)malloc(sizeof (struct protob));
645 	protobp->serv = RDC_SVC_NAME;
646 	protobp->versmin = RDC_VERS_MIN;
647 	protobp->versmax = RDC_VERS_MAX;
648 	protobp->program = RDC_PROGRAM;
649 	protobp->next = (struct protob *)NULL;
650 
651 	if (allflag) {
652 		if (do_all(protobp0, rdcsvc) == -1)
653 			exit(1);
654 	} else if (trans_provider)
655 		do_one(trans_provider, NULL, protobp0, rdcsvc);
656 	else {
657 		for (providerp = defaultproviders;
658 		    *providerp != NULL; providerp++) {
659 			trans_provider = *providerp;
660 			do_one(trans_provider, NULL, protobp0, rdcsvc);
661 		}
662 	}
663 
664 done:
665 	free(protobp);
666 
667 	end_listen_fds = num_fds;
668 	/*
669 	 * Poll for non-data control events on the transport descriptors.
670 	 */
671 	poll_for_action();
672 
673 	syslog(LOG_ERR, "%s fatal server error\n", progname);
674 
675 	return (-1);
676 }
677 
678 static int
679 reuseaddr(int fd)
680 {
681 	struct t_optmgmt req, resp;
682 	struct opthdr *opt;
683 	char reqbuf[128];
684 	int *ip;
685 
686 	/* LINTED pointer alignment */
687 	opt = (struct opthdr *)reqbuf;
688 	opt->level = SOL_SOCKET;
689 	opt->name = SO_REUSEADDR;
690 	opt->len = sizeof (int);
691 
692 	/* LINTED pointer alignment */
693 	ip = (int *)&reqbuf[sizeof (struct opthdr)];
694 	*ip = 1;
695 
696 	req.flags = T_NEGOTIATE;
697 	req.opt.len = sizeof (struct opthdr) + opt->len;
698 	req.opt.buf = (char *)opt;
699 
700 	resp.flags = 0;
701 	resp.opt.buf = reqbuf;
702 	resp.opt.maxlen = sizeof (reqbuf);
703 
704 	if (t_optmgmt(fd, &req, &resp) < 0 || resp.flags != T_SUCCESS) {
705 		if (t_errno == TSYSERR) {
706 			syslog(LOG_ERR, "reuseaddr() t_optmgmt failed: %m\n");
707 		} else {
708 			syslog(LOG_ERR, "reuseaddr() t_optmgmt failed: %s\n",
709 			    t_errlist[t_errno]);
710 		}
711 		return (-1);
712 	}
713 	return (0);
714 }
715 
716 /*
717  * poll on the open transport descriptors for events and errors.
718  */
719 void
720 poll_for_action(void)
721 {
722 	int nfds;
723 	int i;
724 
725 	/*
726 	 * Keep polling until all transports have been closed. When this
727 	 * happens, we return.
728 	 */
729 	while ((int)num_fds > 0) {
730 		nfds = poll(poll_array, num_fds, INFTIM);
731 		switch (nfds) {
732 		case 0:
733 			continue;
734 
735 		case -1:
736 			/*
737 			 * Some errors from poll could be
738 			 * due to temporary conditions, and we try to
739 			 * be robust in the face of them. Other
740 			 * errors (should never happen in theory)
741 			 * are fatal (eg. EINVAL, EFAULT).
742 			 */
743 			switch (errno) {
744 			case EINTR:
745 			    continue;
746 
747 			case EAGAIN:
748 			case ENOMEM:
749 				(void) sleep(10);
750 				continue;
751 
752 			default:
753 				(void) syslog(LOG_ERR,
754 				    "poll failed: %m. Exiting");
755 				exit(1);
756 			}
757 		default:
758 			break;
759 		}
760 
761 		/*
762 		 * Go through the poll list looking for events.
763 		 */
764 		for (i = 0; i < num_fds && nfds > 0; i++) {
765 			if (poll_array[i].revents) {
766 				nfds--;
767 				/*
768 				 * We have a message, so try to read it.
769 				 * Record the error return in errno,
770 				 * so that syslog(LOG_ERR, "...%m")
771 				 * dumps the corresponding error string.
772 				 */
773 				if (conn_polled[i].nc.nc_semantics ==
774 				    NC_TPI_CLTS) {
775 					errno = do_poll_clts_action(
776 					    poll_array[i].fd, i);
777 				} else {
778 					errno = do_poll_cots_action(
779 					    poll_array[i].fd, i);
780 				}
781 
782 				if (errno == 0)
783 					continue;
784 				/*
785 				 * Most returned error codes mean that there is
786 				 * fatal condition which we can only deal with
787 				 * by closing the transport.
788 				 */
789 				if (errno != EAGAIN && errno != ENOMEM) {
790 					(void) syslog(LOG_ERR,
791 					    "Error (%m) reading descriptor %d"
792 					    "/transport %s. Closing it.",
793 					    poll_array[i].fd,
794 					    conn_polled[i].nc.nc_proto);
795 					(void) t_close(poll_array[i].fd);
796 					remove_from_poll_list(poll_array[i].fd);
797 				} else if (errno == ENOMEM)
798 					(void) sleep(5);
799 			}
800 		}
801 	}
802 
803 	(void) syslog(LOG_ERR,
804 	    "All transports have been closed with errors. Exiting.");
805 }
806 
807 /*
808  * Allocate poll/transport array entries for this descriptor.
809  */
810 static void
811 add_to_poll_list(int fd, struct netconfig *nconf)
812 {
813 	static int poll_array_size = 0;
814 
815 	/*
816 	 * If the arrays are full, allocate new ones.
817 	 */
818 	if (num_fds == poll_array_size) {
819 		struct pollfd *tpa;
820 		struct conn_entry *tnp;
821 
822 		if (poll_array_size != 0) {
823 			tpa = poll_array;
824 			tnp = conn_polled;
825 		} else
826 			tpa = (struct pollfd *)0;
827 
828 		poll_array_size += POLL_ARRAY_INC_SIZE;
829 
830 		/*
831 		 * Allocate new arrays.
832 		 */
833 		poll_array = (struct pollfd *)
834 		    malloc(poll_array_size * sizeof (struct pollfd) + 256);
835 		conn_polled = (struct conn_entry *)
836 		    malloc(poll_array_size * sizeof (struct conn_entry) + 256);
837 		if (poll_array == (struct pollfd *)NULL ||
838 		    conn_polled == (struct conn_entry *)NULL) {
839 			syslog(LOG_ERR, "malloc failed for poll array");
840 			exit(1);
841 		}
842 
843 		/*
844 		 * Copy the data of the old ones into new arrays, and
845 		 * free the old ones.
846 		 * num_fds is guaranteed to be less than
847 		 * poll_array_size, so this memcpy is safe.
848 		 */
849 		if (tpa) {
850 			(void) memcpy((void *)poll_array, (void *)tpa,
851 				num_fds * sizeof (struct pollfd));
852 			(void) memcpy((void *)conn_polled, (void *)tnp,
853 				num_fds * sizeof (struct conn_entry));
854 			free((void *)tpa);
855 			free((void *)tnp);
856 		}
857 	}
858 
859 	/*
860 	 * Set the descriptor and event list. All possible events are
861 	 * polled for.
862 	 */
863 	poll_array[num_fds].fd = fd;
864 	poll_array[num_fds].events = POLLIN|POLLRDNORM|POLLRDBAND|POLLPRI;
865 
866 	/*
867 	 * Copy the transport data over too.
868 	 */
869 	conn_polled[num_fds].nc = *nconf;	/* structure copy */
870 	conn_polled[num_fds].closing = 0;
871 
872 	/*
873 	 * Set the descriptor to non-blocking. Avoids a race
874 	 * between data arriving on the stream and then having it
875 	 * flushed before we can read it.
876 	 */
877 	if (fcntl(fd, F_SETFL, O_NONBLOCK) == -1) {
878 		(void) syslog(LOG_ERR,
879 		    "fcntl(file desc. %d/transport %s, F_SETFL, "
880 		    "O_NONBLOCK): %m. Exiting",
881 		    num_fds, nconf->nc_proto);
882 		exit(1);
883 	}
884 
885 	/*
886 	 * Count this descriptor.
887 	 */
888 	++num_fds;
889 }
890 
891 static void
892 remove_from_poll_list(int fd)
893 {
894 	int i;
895 	int num_to_copy;
896 
897 	for (i = 0; i < num_fds; i++) {
898 		if (poll_array[i].fd == fd) {
899 			--num_fds;
900 			num_to_copy = num_fds - i;
901 			(void) memcpy((void *)&poll_array[i],
902 			    (void *)&poll_array[i+1],
903 			    num_to_copy * sizeof (struct pollfd));
904 			(void) memset((void *)&poll_array[num_fds], 0,
905 			    sizeof (struct pollfd));
906 			(void) memcpy((void *)&conn_polled[i],
907 			    (void *)&conn_polled[i+1],
908 			    num_to_copy * sizeof (struct conn_entry));
909 			(void) memset((void *)&conn_polled[num_fds], 0,
910 			    sizeof (struct conn_entry));
911 			return;
912 		}
913 	}
914 	syslog(LOG_ERR, "attempt to remove nonexistent fd from poll list");
915 
916 }
917 
918 static void
919 conn_close_oldest(void)
920 {
921 	int fd;
922 	int i1;
923 
924 	/*
925 	 * Find the oldest connection that is not already in the
926 	 * process of shutting down.
927 	 */
928 	for (i1 = end_listen_fds; /* no conditional expression */; i1++) {
929 		if (i1 >= num_fds)
930 			return;
931 		if (conn_polled[i1].closing == 0)
932 			break;
933 	}
934 #ifdef DEBUG
935 	(void) printf("too many connections (%d), releasing oldest (%d)\n",
936 	    num_conns, poll_array[i1].fd);
937 #else
938 	syslog(LOG_WARNING, "too many connections (%d), releasing oldest (%d)",
939 	    num_conns, poll_array[i1].fd);
940 #endif
941 	fd = poll_array[i1].fd;
942 	if (conn_polled[i1].nc.nc_semantics == NC_TPI_COTS) {
943 		/*
944 		 * For politeness, send a T_DISCON_REQ to the transport
945 		 * provider.  We close the stream anyway.
946 		 */
947 		(void) t_snddis(fd, (struct t_call *)0);
948 		num_conns--;
949 		remove_from_poll_list(fd);
950 		(void) t_close(fd);
951 	} else {
952 		/*
953 		 * For orderly release, we do not close the stream
954 		 * until the T_ORDREL_IND arrives to complete
955 		 * the handshake.
956 		 */
957 		if (t_sndrel(fd) == 0)
958 			conn_polled[i1].closing = 1;
959 	}
960 }
961 
962 static boolean_t
963 conn_get(int fd, struct netconfig *nconf, struct conn_ind **connp)
964 {
965 	struct conn_ind	*conn;
966 	struct conn_ind	*next_conn;
967 
968 	conn = (struct conn_ind *)malloc(sizeof (*conn));
969 	if (conn == NULL) {
970 		syslog(LOG_ERR, "malloc for listen indication failed");
971 		return (FALSE);
972 	}
973 
974 	/* LINTED pointer alignment */
975 	conn->conn_call = (struct t_call *)t_alloc(fd, T_CALL, T_ALL);
976 	if (conn->conn_call == NULL) {
977 		free((char *)conn);
978 		rdcd_log_tli_error("t_alloc", fd, nconf);
979 		return (FALSE);
980 	}
981 
982 	if (t_listen(fd, conn->conn_call) == -1) {
983 		rdcd_log_tli_error("t_listen", fd, nconf);
984 		(void) t_free((char *)conn->conn_call, T_CALL);
985 		free((char *)conn);
986 		return (FALSE);
987 	}
988 
989 	if (conn->conn_call->udata.len > 0) {
990 		syslog(LOG_WARNING,
991 		    "rejecting inbound connection(%s) with %d bytes "
992 		    "of connect data",
993 		    nconf->nc_proto, conn->conn_call->udata.len);
994 
995 		conn->conn_call->udata.len = 0;
996 		(void) t_snddis(fd, conn->conn_call);
997 		(void) t_free((char *)conn->conn_call, T_CALL);
998 		free((char *)conn);
999 		return (FALSE);
1000 	}
1001 
1002 	if ((next_conn = *connp) != NULL) {
1003 		next_conn->conn_prev->conn_next = conn;
1004 		conn->conn_next = next_conn;
1005 		conn->conn_prev = next_conn->conn_prev;
1006 		next_conn->conn_prev = conn;
1007 	} else {
1008 		conn->conn_next = conn;
1009 		conn->conn_prev = conn;
1010 		*connp = conn;
1011 	}
1012 	return (TRUE);
1013 }
1014 
1015 static int
1016 discon_get(int fd, struct netconfig *nconf, struct conn_ind **connp)
1017 {
1018 	struct conn_ind	*conn;
1019 	struct t_discon	discon;
1020 
1021 	discon.udata.buf = (char *)0;
1022 	discon.udata.maxlen = 0;
1023 	if (t_rcvdis(fd, &discon) == -1) {
1024 		rdcd_log_tli_error("t_rcvdis", fd, nconf);
1025 		return (-1);
1026 	}
1027 
1028 	conn = *connp;
1029 	if (conn == NULL)
1030 		return (0);
1031 
1032 	do {
1033 		if (conn->conn_call->sequence == discon.sequence) {
1034 			if (conn->conn_next == conn)
1035 				*connp = (struct conn_ind *)0;
1036 			else {
1037 				if (conn == *connp) {
1038 					*connp = conn->conn_next;
1039 				}
1040 				conn->conn_next->conn_prev = conn->conn_prev;
1041 				conn->conn_prev->conn_next = conn->conn_next;
1042 			}
1043 			free((char *)conn);
1044 			break;
1045 		}
1046 		conn = conn->conn_next;
1047 	} while (conn != *connp);
1048 
1049 	return (0);
1050 }
1051 
1052 static void
1053 cots_listen_event(int fd, int conn_index)
1054 {
1055 	struct t_call *call;
1056 	struct conn_ind	*conn;
1057 	struct conn_ind	*conn_head;
1058 	int event;
1059 	struct netconfig *nconf = &conn_polled[conn_index].nc;
1060 	int new_fd;
1061 	struct netbuf addrmask;
1062 	int ret = 0;
1063 
1064 	conn_head = (struct conn_ind *)0;
1065 	(void) conn_get(fd, nconf, &conn_head);
1066 
1067 	while ((conn = conn_head) != NULL) {
1068 		conn_head = conn->conn_next;
1069 		if (conn_head == conn)
1070 			conn_head = (struct conn_ind *)0;
1071 		else {
1072 			conn_head->conn_prev = conn->conn_prev;
1073 			conn->conn_prev->conn_next = conn_head;
1074 		}
1075 		call = conn->conn_call;
1076 		free((char *)conn);
1077 
1078 		/*
1079 		 * If we have already accepted the maximum number of
1080 		 * connections allowed on the command line, then drop
1081 		 * the oldest connection (for any protocol) before
1082 		 * accepting the new connection.  Unless explicitly
1083 		 * set on the command line, max_conns_allowed is -1.
1084 		 */
1085 		if (max_conns_allowed != -1 && num_conns >= max_conns_allowed)
1086 			conn_close_oldest();
1087 
1088 		/*
1089 		 * Create a new transport endpoint for the same proto as
1090 		 * the listener.
1091 		 */
1092 		new_fd = rdc_transport_open(nconf);
1093 		if (new_fd == -1) {
1094 			call->udata.len = 0;
1095 			(void) t_snddis(fd, call);
1096 			(void) t_free((char *)call, T_CALL);
1097 			syslog(LOG_ERR, "Cannot establish transport over %s",
1098 			    nconf->nc_device);
1099 			continue;
1100 		}
1101 
1102 		/* Bind to a generic address/port for the accepting stream. */
1103 		if (t_bind(new_fd, (struct t_bind *)NULL,
1104 		    (struct t_bind *)NULL) == -1) {
1105 			rdcd_log_tli_error("t_bind", new_fd, nconf);
1106 			call->udata.len = 0;
1107 			(void) t_snddis(fd, call);
1108 			(void) t_free((char *)call, T_CALL);
1109 			(void) t_close(new_fd);
1110 			continue;
1111 		}
1112 
1113 		while (t_accept(fd, new_fd, call) == -1) {
1114 			if (t_errno != TLOOK) {
1115 				rdcd_log_tli_error("t_accept", fd, nconf);
1116 				call->udata.len = 0;
1117 				(void) t_snddis(fd, call);
1118 				(void) t_free((char *)call, T_CALL);
1119 				(void) t_close(new_fd);
1120 				goto do_next_conn;
1121 			}
1122 			while (event = t_look(fd)) {
1123 				switch (event) {
1124 				case T_LISTEN:
1125 #ifdef DEBUG
1126 					(void) printf(
1127 "cots_listen_event(%s): T_LISTEN during accept processing\n", nconf->nc_proto);
1128 #endif
1129 					(void) conn_get(fd, nconf, &conn_head);
1130 					continue;
1131 
1132 				case T_DISCONNECT:
1133 #ifdef DEBUG
1134 					(void) printf(
1135 	"cots_listen_event(%s): T_DISCONNECT during accept processing\n",
1136 						nconf->nc_proto);
1137 #endif
1138 					(void) discon_get(fd, nconf,
1139 					    &conn_head);
1140 					continue;
1141 
1142 				default:
1143 					syslog(LOG_ERR,
1144 					    "unexpected event 0x%x during "
1145 					    "accept processing (%s)",
1146 					    event, nconf->nc_proto);
1147 					call->udata.len = 0;
1148 					(void) t_snddis(fd, call);
1149 					(void) t_free((char *)call, T_CALL);
1150 					(void) t_close(new_fd);
1151 					goto do_next_conn;
1152 				}
1153 			}
1154 		}
1155 
1156 		if (set_addrmask(new_fd, nconf, &addrmask) < 0) {
1157 			(void) syslog(LOG_ERR, "Cannot set address mask for %s",
1158 			    nconf->nc_netid);
1159 			return;
1160 		}
1161 
1162 		/* Tell KRPC about the new stream. */
1163 		ret = (*Mysvc)(new_fd, addrmask, nconf);
1164 		if (ret < 0) {
1165 			syslog(LOG_ERR,
1166 			    "unable to register with kernel rpc: %m");
1167 			free(addrmask.buf);
1168 			(void) t_snddis(new_fd, (struct t_call *)0);
1169 			(void) t_free((char *)call, T_CALL);
1170 			(void) t_close(new_fd);
1171 			goto do_next_conn;
1172 		}
1173 
1174 		free(addrmask.buf);
1175 		(void) t_free((char *)call, T_CALL);
1176 
1177 		/*
1178 		 * Poll on the new descriptor so that we get disconnect
1179 		 * and orderly release indications.
1180 		 */
1181 		num_conns++;
1182 		add_to_poll_list(new_fd, nconf);
1183 
1184 		/* Reset nconf in case it has been moved. */
1185 		nconf = &conn_polled[conn_index].nc;
1186 do_next_conn:;
1187 	}
1188 }
1189 
1190 static int
1191 do_poll_cots_action(int fd, int conn_index)
1192 {
1193 	char buf[256];
1194 	int event;
1195 	int i1;
1196 	int flags;
1197 	struct conn_entry *connent = &conn_polled[conn_index];
1198 	struct netconfig *nconf = &(connent->nc);
1199 	const char *errorstr;
1200 
1201 	while (event = t_look(fd)) {
1202 		switch (event) {
1203 		case T_LISTEN:
1204 #ifdef DEBUG
1205 	(void) printf("do_poll_cots_action(%s, %d): T_LISTEN event\n",
1206 	    nconf->nc_proto, fd);
1207 #endif
1208 			cots_listen_event(fd, conn_index);
1209 			break;
1210 
1211 		case T_DATA:
1212 #ifdef DEBUG
1213 	(void) printf("do_poll_cots_action(%d, %s): T_DATA event\n",
1214 		fd, nconf->nc_proto);
1215 #endif
1216 			/*
1217 			 * Receive a private notification from CONS rpcmod.
1218 			 */
1219 			i1 = t_rcv(fd, buf, sizeof (buf), &flags);
1220 			if (i1 == -1) {
1221 				syslog(LOG_ERR, "t_rcv failed");
1222 				break;
1223 			}
1224 			if (i1 < sizeof (int))
1225 				break;
1226 			i1 = BE32_TO_U32(buf);
1227 			if (i1 == 1 || i1 == 2) {
1228 				/*
1229 				 * This connection has been idle for too long,
1230 				 * so release it as politely as we can.  If we
1231 				 * have already initiated an orderly release
1232 				 * and we get notified that the stream is
1233 				 * still idle, pull the plug.  This prevents
1234 				 * hung connections from continuing to consume
1235 				 * resources.
1236 				 */
1237 #ifdef DEBUG
1238 (void) printf("do_poll_cots_action(%s, %d): ", nconf->nc_proto, fd);
1239 (void) printf("initiating orderly release of idle connection\n");
1240 #endif
1241 				if (nconf->nc_semantics == NC_TPI_COTS ||
1242 				    connent->closing != 0) {
1243 					(void) t_snddis(fd, (struct t_call *)0);
1244 					goto fdclose;
1245 				}
1246 				/*
1247 				 * For NC_TPI_COTS_ORD, the stream is closed
1248 				 * and removed from the poll list when the
1249 				 * T_ORDREL is received from the provider.  We
1250 				 * don't wait for it here because it may take
1251 				 * a while for the transport to shut down.
1252 				 */
1253 				if (t_sndrel(fd) == -1) {
1254 					syslog(LOG_ERR,
1255 					"unable to send orderly release %m");
1256 				}
1257 				connent->closing = 1;
1258 			} else
1259 				syslog(LOG_ERR,
1260 				    "unexpected event from CONS rpcmod %d", i1);
1261 			break;
1262 
1263 		case T_ORDREL:
1264 #ifdef DEBUG
1265 	(void) printf("do_poll_cots_action(%s, %d): T_ORDREL event\n",
1266 		nconf->nc_proto, fd);
1267 #endif
1268 			/* Perform an orderly release. */
1269 			if (t_rcvrel(fd) == 0) {
1270 				/* T_ORDREL on listen fd's should be ignored */
1271 				if (!is_listen_fd_index(fd)) {
1272 					(void) t_sndrel(fd);
1273 					goto fdclose;
1274 				}
1275 				break;
1276 
1277 			} else if (t_errno == TLOOK) {
1278 				break;
1279 			} else {
1280 				rdcd_log_tli_error("t_rcvrel", fd, nconf);
1281 				/*
1282 				 * check to make sure we do not close
1283 				 * listen fd
1284 				 */
1285 				if (!is_listen_fd_index(fd))
1286 					break;
1287 				else
1288 					goto fdclose;
1289 			}
1290 
1291 		case T_DISCONNECT:
1292 #ifdef DEBUG
1293 (void) printf("do_poll_cots_action(%s, %d): T_DISCONNECT event\n",
1294 nconf->nc_proto, fd);
1295 #endif
1296 			if (t_rcvdis(fd, (struct t_discon *)NULL) == -1)
1297 				rdcd_log_tli_error("t_rcvdis", fd, nconf);
1298 
1299 			/*
1300 			 * T_DISCONNECT on listen fd's should be ignored.
1301 			 */
1302 			if (!is_listen_fd_index(fd))
1303 				break;
1304 			else
1305 				goto fdclose;
1306 
1307 		case T_ERROR:
1308 		default:
1309 			if (event == T_ERROR || t_errno == TSYSERR) {
1310 			    if ((errorstr = strerror(errno)) == NULL) {
1311 				(void) snprintf(buf, sizeof (buf),
1312 				    "Unknown error num %d", errno);
1313 				errorstr = (const char *)buf;
1314 			    }
1315 			} else if (event == -1)
1316 				errorstr = t_strerror(t_errno);
1317 			else
1318 				errorstr = "";
1319 #ifdef DEBUG
1320 			syslog(LOG_ERR,
1321 			    "unexpected TLI event (0x%x) on "
1322 			    "connection-oriented transport(%s, %d):%s",
1323 			    event, nconf->nc_proto, fd, errorstr);
1324 #endif
1325 
1326 fdclose:
1327 			num_conns--;
1328 			remove_from_poll_list(fd);
1329 			(void) t_close(fd);
1330 			return (0);
1331 		}
1332 	}
1333 
1334 	return (0);
1335 }
1336 
1337 
1338 /*
1339  * Called to read and interpret the event on a connectionless descriptor.
1340  * Returns 0 if successful, or a UNIX error code if failure.
1341  */
1342 static int
1343 do_poll_clts_action(int fd, int conn_index)
1344 {
1345 	int error;
1346 	int ret;
1347 	int flags;
1348 	struct netconfig *nconf = &conn_polled[conn_index].nc;
1349 	static struct t_unitdata *unitdata = NULL;
1350 	static struct t_uderr *uderr = NULL;
1351 	static int oldfd = -1;
1352 	struct nd_hostservlist *host = NULL;
1353 	struct strbuf ctl[1], data[1];
1354 	/*
1355 	 * We just need to have some space to consume the
1356 	 * message in the event we can't use the TLI interface to do the
1357 	 * job.
1358 	 *
1359 	 * We flush the message using getmsg(). For the control part
1360 	 * we allocate enough for any TPI header plus 32 bytes for address
1361 	 * and options. For the data part, there is nothing magic about
1362 	 * the size of the array, but 256 bytes is probably better than
1363 	 * 1 byte, and we don't expect any data portion anyway.
1364 	 *
1365 	 * If the array sizes are too small, we handle this because getmsg()
1366 	 * (called to consume the message) will return MOREDATA|MORECTL.
1367 	 * Thus we just call getmsg() until it's read the message.
1368 	 */
1369 	char ctlbuf[sizeof (union T_primitives) + 32];
1370 	char databuf[256];
1371 
1372 	/*
1373 	 * If this is the same descriptor as the last time
1374 	 * do_poll_clts_action was called, we can save some
1375 	 * de-allocation and allocation.
1376 	 */
1377 	if (oldfd != fd) {
1378 		oldfd = fd;
1379 
1380 		if (unitdata) {
1381 			(void) t_free((char *)unitdata, T_UNITDATA);
1382 			unitdata = NULL;
1383 		}
1384 		if (uderr) {
1385 			(void) t_free((char *)uderr, T_UDERROR);
1386 			uderr = NULL;
1387 		}
1388 	}
1389 
1390 	/*
1391 	 * Allocate a unitdata structure for receiving the event.
1392 	 */
1393 	if (unitdata == NULL) {
1394 		/* LINTED pointer alignment */
1395 		unitdata = (struct t_unitdata *)t_alloc(fd, T_UNITDATA, T_ALL);
1396 		if (unitdata == NULL) {
1397 			if (t_errno == TSYSERR) {
1398 				/*
1399 				 * Save the error code across
1400 				 * syslog(), just in case
1401 				 * syslog() gets its own error
1402 				 * and therefore overwrites errno.
1403 				 */
1404 				error = errno;
1405 				(void) syslog(LOG_ERR,
1406 	"t_alloc(file descriptor %d/transport %s, T_UNITDATA) failed: %m",
1407 					fd, nconf->nc_proto);
1408 				return (error);
1409 			}
1410 			(void) syslog(LOG_ERR,
1411 "t_alloc(file descriptor %d/transport %s, T_UNITDATA) failed TLI error %d",
1412 					fd, nconf->nc_proto, t_errno);
1413 			goto flush_it;
1414 		}
1415 	}
1416 
1417 try_again:
1418 	flags = 0;
1419 
1420 	/*
1421 	 * The idea is we wait for T_UNITDATA_IND's. Of course,
1422 	 * we don't get any, because rpcmod filters them out.
1423 	 * However, we need to call t_rcvudata() to let TLI
1424 	 * tell us we have a T_UDERROR_IND.
1425 	 *
1426 	 * algorithm is:
1427 	 * 	t_rcvudata(), expecting TLOOK.
1428 	 * 	t_look(), expecting T_UDERR.
1429 	 * 	t_rcvuderr(), expecting success (0).
1430 	 * 	expand destination address into ASCII,
1431 	 *	and dump it.
1432 	 */
1433 
1434 	ret = t_rcvudata(fd, unitdata, &flags);
1435 	if (ret == 0 || t_errno == TBUFOVFLW) {
1436 		(void) syslog(LOG_WARNING,
1437 "t_rcvudata(file descriptor %d/transport %s) got unexpected data, %d bytes",
1438 			fd, nconf->nc_proto, unitdata->udata.len);
1439 
1440 		/*
1441 		 * Even though we don't expect any data, in case we do,
1442 		 * keep reading until there is no more.
1443 		 */
1444 		if (flags & T_MORE)
1445 			goto try_again;
1446 
1447 		return (0);
1448 	}
1449 
1450 	switch (t_errno) {
1451 	case TNODATA:
1452 		return (0);
1453 	case TSYSERR:
1454 		/*
1455 		 * System errors are returned to caller.
1456 		 * Save the error code across
1457 		 * syslog(), just in case
1458 		 * syslog() gets its own error
1459 		 * and therefore overwrites errno.
1460 		 */
1461 		error = errno;
1462 		(void) syslog(LOG_ERR,
1463 			"t_rcvudata(file descriptor %d/transport %s) %m",
1464 			fd, nconf->nc_proto);
1465 		return (error);
1466 	case TLOOK:
1467 		break;
1468 	default:
1469 		(void) syslog(LOG_ERR,
1470 		"t_rcvudata(file descriptor %d/transport %s) TLI error %d",
1471 			fd, nconf->nc_proto, t_errno);
1472 		goto flush_it;
1473 	}
1474 
1475 	ret = t_look(fd);
1476 	switch (ret) {
1477 	case 0:
1478 		return (0);
1479 	case -1:
1480 		/*
1481 		 * System errors are returned to caller.
1482 		 */
1483 		if (t_errno == TSYSERR) {
1484 			/*
1485 			 * Save the error code across
1486 			 * syslog(), just in case
1487 			 * syslog() gets its own error
1488 			 * and therefore overwrites errno.
1489 			 */
1490 			error = errno;
1491 			(void) syslog(LOG_ERR,
1492 				"t_look(file descriptor %d/transport %s) %m",
1493 				fd, nconf->nc_proto);
1494 			return (error);
1495 		}
1496 		(void) syslog(LOG_ERR,
1497 			"t_look(file descriptor %d/transport %s) TLI error %d",
1498 			fd, nconf->nc_proto, t_errno);
1499 		goto flush_it;
1500 	case T_UDERR:
1501 		break;
1502 	default:
1503 		(void) syslog(LOG_WARNING,
1504 	"t_look(file descriptor %d/transport %s) returned %d not T_UDERR (%d)",
1505 			fd, nconf->nc_proto, ret, T_UDERR);
1506 	}
1507 
1508 	if (uderr == NULL) {
1509 		/* LINTED pointer alignment */
1510 		uderr = (struct t_uderr *)t_alloc(fd, T_UDERROR, T_ALL);
1511 		if (uderr == NULL) {
1512 			if (t_errno == TSYSERR) {
1513 				/*
1514 				 * Save the error code across
1515 				 * syslog(), just in case
1516 				 * syslog() gets its own error
1517 				 * and therefore overwrites errno.
1518 				 */
1519 				error = errno;
1520 				(void) syslog(LOG_ERR,
1521 	"t_alloc(file descriptor %d/transport %s, T_UDERROR) failed: %m",
1522 					fd, nconf->nc_proto);
1523 				return (error);
1524 			}
1525 			(void) syslog(LOG_ERR,
1526 "t_alloc(file descriptor %d/transport %s, T_UDERROR) failed TLI error: %d",
1527 				fd, nconf->nc_proto, t_errno);
1528 			goto flush_it;
1529 		}
1530 	}
1531 
1532 	ret = t_rcvuderr(fd, uderr);
1533 	if (ret == 0) {
1534 
1535 		/*
1536 		 * Save the datagram error in errno, so that the
1537 		 * %m argument to syslog picks up the error string.
1538 		 */
1539 		errno = uderr->error;
1540 
1541 		/*
1542 		 * Log the datagram error, then log the host that
1543 		 * probably triggerred. Cannot log both in the
1544 		 * same transaction because of packet size limitations
1545 		 * in /dev/log.
1546 		 */
1547 		(void) syslog((errno == ECONNREFUSED) ? LOG_DEBUG : LOG_WARNING,
1548 		    "%s response over <file descriptor %d/transport %s> "
1549 		    "generated error: %m",
1550 		    progname, fd, nconf->nc_proto);
1551 
1552 		/*
1553 		 * Try to map the client's address back to a
1554 		 * name.
1555 		 */
1556 		ret = netdir_getbyaddr(nconf, &host, &uderr->addr);
1557 		if (ret != -1 && host && host->h_cnt > 0 &&
1558 		    host->h_hostservs) {
1559 		(void) syslog((errno == ECONNREFUSED) ? LOG_DEBUG : LOG_WARNING,
1560 		    "Bad %s response was sent to client with "
1561 		    "host name: %s; service port: %s",
1562 		    progname, host->h_hostservs->h_host,
1563 		    host->h_hostservs->h_serv);
1564 		} else {
1565 			int i, j;
1566 			char *buf;
1567 			char *hex = "0123456789abcdef";
1568 
1569 			/*
1570 			 * Mapping failed, print the whole thing
1571 			 * in ASCII hex.
1572 			 */
1573 			buf = (char *)malloc(uderr->addr.len * 2 + 1);
1574 			for (i = 0, j = 0; i < uderr->addr.len; i++, j += 2) {
1575 				buf[j] = hex[((uderr->addr.buf[i]) >> 4) & 0xf];
1576 				buf[j+1] = hex[uderr->addr.buf[i] & 0xf];
1577 			}
1578 			buf[j] = '\0';
1579 			(void) syslog((errno == ECONNREFUSED) ?
1580 			    LOG_DEBUG : LOG_WARNING,
1581 			    "Bad %s response was sent to client with "
1582 			    "transport address: 0x%s",
1583 			    progname, buf);
1584 			free((void *)buf);
1585 		}
1586 
1587 		if (ret == 0 && host != NULL)
1588 			netdir_free((void *)host, ND_HOSTSERVLIST);
1589 		return (0);
1590 	}
1591 
1592 	switch (t_errno) {
1593 	case TNOUDERR:
1594 		goto flush_it;
1595 	case TSYSERR:
1596 		/*
1597 		 * System errors are returned to caller.
1598 		 * Save the error code across
1599 		 * syslog(), just in case
1600 		 * syslog() gets its own error
1601 		 * and therefore overwrites errno.
1602 		 */
1603 		error = errno;
1604 		(void) syslog(LOG_ERR,
1605 			"t_rcvuderr(file descriptor %d/transport %s) %m",
1606 			fd, nconf->nc_proto);
1607 		return (error);
1608 	default:
1609 		(void) syslog(LOG_ERR,
1610 		"t_rcvuderr(file descriptor %d/transport %s) TLI error %d",
1611 			fd, nconf->nc_proto, t_errno);
1612 		goto flush_it;
1613 	}
1614 
1615 flush_it:
1616 	/*
1617 	 * If we get here, then we could not cope with whatever message
1618 	 * we attempted to read, so flush it. If we did read a message,
1619 	 * and one isn't present, that is all right, because fd is in
1620 	 * nonblocking mode.
1621 	 */
1622 	(void) syslog(LOG_ERR,
1623 	"Flushing one input message from <file descriptor %d/transport %s>",
1624 		fd, nconf->nc_proto);
1625 
1626 	/*
1627 	 * Read and discard the message. Do this this until there is
1628 	 * no more control/data in the message or until we get an error.
1629 	 */
1630 	do {
1631 		ctl->maxlen = sizeof (ctlbuf);
1632 		ctl->buf = ctlbuf;
1633 		data->maxlen = sizeof (databuf);
1634 		data->buf = databuf;
1635 		flags = 0;
1636 		ret = getmsg(fd, ctl, data, &flags);
1637 		if (ret == -1)
1638 			return (errno);
1639 	} while (ret != 0);
1640 
1641 	return (0);
1642 }
1643 
1644 /*
1645  * Establish service thread.
1646  */
1647 static int
1648 rdcsvc(int fd, struct netbuf addrmask, struct netconfig *nconf)
1649 {
1650 #ifdef	__NCALL__
1651 	struct ncall_svc_args nsa;
1652 #else	/* !__NCALL__ */
1653 	struct rdc_svc_args nsa;
1654 	_rdc_ioctl_t rdc_args = { 0, };
1655 #endif	/* __NCALL__ */
1656 
1657 	nsa.fd = fd;
1658 	nsa.nthr = (max_conns_allowed < 0 ? 16 : max_conns_allowed);
1659 	(void) strncpy(nsa.netid, nconf->nc_netid, sizeof (nsa.netid));
1660 	nsa.addrmask.len = addrmask.len;
1661 	nsa.addrmask.maxlen = addrmask.maxlen;
1662 	nsa.addrmask.buf = addrmask.buf;
1663 
1664 #ifdef	__NCALL__
1665 	return (sndrsys(NC_IOC_SERVER, &nsa));
1666 #else	/* !__NCALL__ */
1667 	rdc_args.arg0 = (long)&nsa;
1668 	return (sndrsys(RDC_ENABLE_SVR, &rdc_args));
1669 #endif	/* __NCALL__ */
1670 }
1671 
1672 
1673 
1674 static int
1675 nofile_increase(int limit)
1676 {
1677 	struct rlimit rl;
1678 
1679 	if (getrlimit(RLIMIT_NOFILE, &rl) == -1) {
1680 		syslog(LOG_ERR,
1681 		    "nofile_increase() getrlimit of NOFILE failed: %m");
1682 		return (-1);
1683 	}
1684 
1685 	if (limit > 0)
1686 		rl.rlim_cur = limit;
1687 	else
1688 		rl.rlim_cur += NOFILE_INC_SIZE;
1689 
1690 	if (rl.rlim_cur > rl.rlim_max && rl.rlim_max != RLIM_INFINITY)
1691 		rl.rlim_max = rl.rlim_cur;
1692 
1693 	if (setrlimit(RLIMIT_NOFILE, &rl) == -1) {
1694 		syslog(LOG_ERR,
1695 		    "nofile_increase() setrlimit of NOFILE to %d failed: %m",
1696 		    rl.rlim_cur);
1697 		return (-1);
1698 	}
1699 
1700 	return (0);
1701 }
1702 
1703 int
1704 rdcd_bindit(struct netconfig *nconf, struct netbuf **addr,
1705     struct nd_hostserv *hs, int backlog)
1706 {
1707 	int fd;
1708 	struct t_bind *ntb;
1709 	struct t_bind tb;
1710 	struct nd_addrlist *addrlist;
1711 	struct t_optmgmt req, resp;
1712 	struct opthdr *opt;
1713 	char reqbuf[128];
1714 
1715 	if ((fd = rdc_transport_open(nconf)) == -1) {
1716 		syslog(LOG_ERR, "cannot establish transport service over %s",
1717 		    nconf->nc_device);
1718 		return (-1);
1719 	}
1720 
1721 	addrlist = (struct nd_addrlist *)NULL;
1722 	if (netdir_getbyname(nconf, hs, &addrlist) != 0) {
1723 		if (strncmp(nconf->nc_netid, "udp", 3) != 0) {
1724 			syslog(LOG_ERR, "Cannot get address for transport "
1725 			    "%s host %s service %s",
1726 			    nconf->nc_netid, hs->h_host, hs->h_serv);
1727 		}
1728 		(void) t_close(fd);
1729 		return (-1);
1730 	}
1731 
1732 	if (strcmp(nconf->nc_proto, "tcp") == 0) {
1733 		/*
1734 		 * If we're running over TCP, then set the
1735 		 * SO_REUSEADDR option so that we can bind
1736 		 * to our preferred address even if previously
1737 		 * left connections exist in FIN_WAIT states.
1738 		 * This is somewhat bogus, but otherwise you have
1739 		 * to wait 2 minutes to restart after killing it.
1740 		 */
1741 		if (reuseaddr(fd) == -1) {
1742 			syslog(LOG_WARNING,
1743 			    "couldn't set SO_REUSEADDR option on transport");
1744 		}
1745 	}
1746 
1747 	if (nconf->nc_semantics == NC_TPI_CLTS)
1748 		tb.qlen = 0;
1749 	else
1750 		tb.qlen = backlog;
1751 
1752 	/* LINTED pointer alignment */
1753 	ntb = (struct t_bind *)t_alloc(fd, T_BIND, T_ALL);
1754 	if (ntb == (struct t_bind *)NULL) {
1755 		syslog(LOG_ERR, "t_alloc failed:  t_errno %d, %m", t_errno);
1756 		(void) t_close(fd);
1757 		netdir_free((void *)addrlist, ND_ADDRLIST);
1758 		return (-1);
1759 	}
1760 
1761 	tb.addr = *(addrlist->n_addrs);		/* structure copy */
1762 
1763 	if (t_bind(fd, &tb, ntb) == -1) {
1764 		syslog(LOG_ERR, "t_bind failed:  t_errno %d, %m", t_errno);
1765 		(void) t_free((char *)ntb, T_BIND);
1766 		netdir_free((void *)addrlist, ND_ADDRLIST);
1767 		(void) t_close(fd);
1768 		return (-1);
1769 	}
1770 
1771 	/* make sure we bound to the right address */
1772 	if (tb.addr.len != ntb->addr.len ||
1773 	    memcmp(tb.addr.buf, ntb->addr.buf, tb.addr.len) != 0) {
1774 		syslog(LOG_ERR, "t_bind to wrong address");
1775 		(void) t_free((char *)ntb, T_BIND);
1776 		netdir_free((void *)addrlist, ND_ADDRLIST);
1777 		(void) t_close(fd);
1778 		return (-1);
1779 	}
1780 
1781 	*addr = &ntb->addr;
1782 	netdir_free((void *)addrlist, ND_ADDRLIST);
1783 
1784 	if (strcmp(nconf->nc_proto, "tcp") == 0 ||
1785 	    strcmp(nconf->nc_proto, "tcp6") == 0) {
1786 		/*
1787 		 * Disable the Nagle algorithm on TCP connections.
1788 		 * Connections accepted from this listener will
1789 		 * inherit the listener options.
1790 		 */
1791 
1792 		/* LINTED pointer alignment */
1793 		opt = (struct opthdr *)reqbuf;
1794 		opt->level = IPPROTO_TCP;
1795 		opt->name = TCP_NODELAY;
1796 		opt->len = sizeof (int);
1797 
1798 		/* LINTED pointer alignment */
1799 		*(int *)((char *)opt + sizeof (*opt)) = 1;
1800 
1801 		req.flags = T_NEGOTIATE;
1802 		req.opt.len = sizeof (*opt) + opt->len;
1803 		req.opt.buf = (char *)opt;
1804 		resp.flags = 0;
1805 		resp.opt.buf = reqbuf;
1806 		resp.opt.maxlen = sizeof (reqbuf);
1807 
1808 		if (t_optmgmt(fd, &req, &resp) < 0 ||
1809 		    resp.flags != T_SUCCESS) {
1810 			syslog(LOG_ERR,
1811 	"couldn't set NODELAY option for proto %s: t_errno = %d, %m",
1812 				nconf->nc_proto, t_errno);
1813 		}
1814 	}
1815 
1816 	return (fd);
1817 }
1818 
1819 
1820 /* ARGSUSED */
1821 static int
1822 bind_to_provider(char *provider, char *serv, struct netbuf **addr,
1823 		struct netconfig **retnconf)
1824 {
1825 	struct netconfig *nconf;
1826 	NCONF_HANDLE *nc;
1827 	struct nd_hostserv hs;
1828 
1829 	hs.h_host = HOST_SELF;
1830 	hs.h_serv = RDC_SERVICE;	/* serv_name_to_port_name(serv); */
1831 
1832 	if ((nc = setnetconfig()) == (NCONF_HANDLE *)NULL) {
1833 		syslog(LOG_ERR, "setnetconfig failed: %m");
1834 		return (-1);
1835 	}
1836 	while (nconf = getnetconfig(nc)) {
1837 		if (OK_TPI_TYPE(nconf) &&
1838 		    strcmp(nconf->nc_device, provider) == 0) {
1839 			*retnconf = nconf;
1840 			return (rdcd_bindit(nconf, addr, &hs, listen_backlog));
1841 		}
1842 	}
1843 	(void) endnetconfig(nc);
1844 	if ((Is_ipv6present() && (strcmp(provider, "/dev/tcp6") == 0)) ||
1845 	    (!Is_ipv6present() && (strcmp(provider, "/dev/tcp") == 0)))
1846 		syslog(LOG_ERR, "couldn't find netconfig entry for provider %s",
1847 		    provider);
1848 	return (-1);
1849 }
1850 
1851 
1852 /*
1853  * For listen fd's index is always less than end_listen_fds.
1854  * It's value is equal to the number of open file descriptors after the
1855  * last listen end point was opened but before any connection was accepted.
1856  */
1857 static int
1858 is_listen_fd_index(int index)
1859 {
1860 	return (index < end_listen_fds);
1861 }
1862 
1863 
1864 /*
1865  * Create an address mask appropriate for the transport.
1866  * The mask is used to obtain the host-specific part of
1867  * a network address when comparing addresses.
1868  * For an internet address the host-specific part is just
1869  * the 32 bit IP address and this part of the mask is set
1870  * to all-ones. The port number part of the mask is zeroes.
1871  */
1872 static int
1873 set_addrmask(int fd, struct netconfig *nconf, struct netbuf *mask)
1874 {
1875 	struct t_info info;
1876 
1877 	/*
1878 	 * Find the size of the address we need to mask.
1879 	 */
1880 	if (t_getinfo(fd, &info) < 0) {
1881 		t_error("t_getinfo");
1882 		return (-1);
1883 	}
1884 	mask->len = mask->maxlen = info.addr;
1885 	if (info.addr <= 0) {
1886 		syslog(LOG_ERR, "set_addrmask: address size: %ld",
1887 			info.addr);
1888 		return (-1);
1889 	}
1890 
1891 	mask->buf = (char *)malloc(mask->len);
1892 	if (mask->buf == NULL) {
1893 		syslog(LOG_ERR, "set_addrmask: no memory");
1894 		return (-1);
1895 	}
1896 	(void) memset(mask->buf, 0, mask->len);	/* reset all mask bits */
1897 
1898 	if (strcmp(nconf->nc_protofmly, NC_INET) == 0) {
1899 		/*
1900 		 * Set the mask so that the port is ignored.
1901 		 */
1902 		/* LINTED pointer alignment */
1903 		((struct sockaddr_in *)mask->buf)->sin_addr.s_addr =
1904 		    (in_addr_t)~0;
1905 		/* LINTED pointer alignment */
1906 		((struct sockaddr_in *)mask->buf)->sin_family = (sa_family_t)~0;
1907 	}
1908 #ifdef NC_INET6
1909 	else if (strcmp(nconf->nc_protofmly, NC_INET6) == 0) {
1910 		/* LINTED pointer alignment */
1911 		(void) memset(&((struct sockaddr_in6 *)mask->buf)->sin6_addr,
1912 		    (uchar_t)~0, sizeof (struct in6_addr));
1913 		/* LINTED pointer alignment */
1914 		((struct sockaddr_in6 *)mask->buf)->sin6_family =
1915 		    (sa_family_t)~0;
1916 	}
1917 #endif
1918 	else {
1919 		/*
1920 		 * Set all mask bits.
1921 		 */
1922 		(void) memset(mask->buf, (uchar_t)~0, mask->len);
1923 	}
1924 	return (0);
1925 }
1926 
1927 #if !defined(_SunOS_5_6) && !defined(_SunOS_5_7) && !defined(_SunOS_5_8)
1928 
1929 static int
1930 sndrsvcpool(int maxservers)
1931 {
1932 	struct svcpool_args npa;
1933 
1934 	npa.id = RDC_SVCPOOL_ID;
1935 	npa.maxthreads = maxservers;
1936 	npa.redline = 0;
1937 	npa.qsize = 0;
1938 	npa.timeout = 0;
1939 	npa.stksize = 0;
1940 	npa.max_same_xprt = 0;
1941 	return (sndrsys(RDC_POOL_CREATE, &npa));
1942 }
1943 
1944 
1945 /*
1946  * The following stolen from cmd/fs.d/nfs/lib/thrpool.c
1947  */
1948 
1949 #include <thread.h>
1950 
1951 /*
1952  * Thread to call into the kernel and do work on behalf of SNDR/ncall-ip.
1953  */
1954 static void *
1955 svcstart(void *arg)
1956 {
1957 	int id = (int)arg;
1958 	int err;
1959 
1960 	while ((err = sndrsys(RDC_POOL_RUN, &id)) != 0) {
1961 		/*
1962 		 * Interrupted by a signal while in the kernel.
1963 		 * this process is still alive, try again.
1964 		 */
1965 		if (err == EINTR)
1966 			continue;
1967 		else
1968 			break;
1969 	}
1970 
1971 	/*
1972 	 * If we weren't interrupted by a signal, but did
1973 	 * return from the kernel, this thread's work is done,
1974 	 * and it should exit.
1975 	 */
1976 	thr_exit(NULL);
1977 	return (NULL);
1978 }
1979 
1980 /*
1981  * User-space "creator" thread. This thread blocks in the kernel
1982  * until new worker threads need to be created for the service
1983  * pool. On return to userspace, if there is no error, create a
1984  * new thread for the service pool.
1985  */
1986 static void *
1987 svcblock(void *arg)
1988 {
1989 	int id = (int)arg;
1990 
1991 	/* CONSTCOND */
1992 	while (1) {
1993 		thread_t tid;
1994 		int err;
1995 
1996 		/*
1997 		 * Call into the kernel, and hang out there
1998 		 * until a thread needs to be created.
1999 		 */
2000 		if (err = sndrsys(RDC_POOL_WAIT, &id)) {
2001 			if (err == ECANCELED || err == EBUSY)
2002 				/*
2003 				 * If we get back ECANCELED, the service
2004 				 * pool is exiting, and we may as well
2005 				 * clean up this thread. If EBUSY is
2006 				 * returned, there's already a thread
2007 				 * looping on this pool, so we should
2008 				 * give up.
2009 				 */
2010 				break;
2011 			else
2012 				continue;
2013 		}
2014 
2015 		(void) thr_create(NULL, NULL, svcstart, (void *)id,
2016 		    THR_BOUND | THR_DETACHED, &tid);
2017 	}
2018 
2019 	thr_exit(NULL);
2020 	return (NULL);
2021 }
2022 
2023 static int
2024 svcwait(int id)
2025 {
2026 	thread_t tid;
2027 
2028 	/*
2029 	 * Create a bound thread to wait for kernel LWPs that
2030 	 * need to be created.
2031 	 */
2032 	if (thr_create(NULL, NULL, svcblock, (void *)id,
2033 	    THR_BOUND | THR_DETACHED, &tid))
2034 		return (1);
2035 
2036 	return (0);
2037 }
2038 #endif /* Solaris 9+ */
2039