xref: /titanic_41/usr/src/cmd/avs/rdc/sndrd.c (revision 3b130e0a496138cdbe9e8aab0c1003faf847cf5b)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 /*
27  * Copyright 2014 Nexenta Systems, Inc.  All rights reserved.
28  */
29 
30 /*
31  * Network SNDR/ncall-ip server - based on nfsd
32  */
33 #include <sys/types.h>
34 #include <rpc/types.h>
35 #include <errno.h>
36 #include <netdb.h>
37 #include <sys/socket.h>
38 #include <netconfig.h>
39 #include <stropts.h>
40 #include <fcntl.h>
41 #include <stdio.h>
42 #include <strings.h>
43 #include <signal.h>
44 #include <unistd.h>
45 #include <stdlib.h>
46 #include <netdir.h>
47 #include <rpc/rpc_com.h>
48 #include <rpc/rpc.h>
49 #include <tiuser.h>
50 #include <netinet/tcp.h>
51 #include <netinet/in.h>
52 #include <syslog.h>
53 #include <locale.h>
54 #include <langinfo.h>
55 #include <libintl.h>
56 #include <libgen.h>
57 #include <deflt.h>
58 #include <sys/resource.h>
59 
60 #include <sys/nsctl/nsctl.h>
61 
62 #ifdef	__NCALL__
63 
64 #include <sys/ncall/ncall.h>
65 #include <sys/ncall/ncall_ip.h>
66 #include <sys/nsctl/libncall.h>
67 
68 #define	RDC_POOL_CREATE	NC_IOC_POOL_CREATE
69 #define	RDC_POOL_RUN	NC_IOC_POOL_RUN
70 #define	RDC_POOL_WAIT	NC_IOC_POOL_WAIT
71 #define	RDC_PROGRAM	NCALL_PROGRAM
72 #define	RDC_SERVICE	"ncall"
73 #undef RDC_SVCPOOL_ID	/* We are overloading this value */
74 #define	RDC_SVCPOOL_ID	NCALL_SVCPOOL_ID
75 #define	RDC_SVC_NAME	"NCALL"
76 #define	RDC_VERS_MIN	NCALL_VERS_MIN
77 #define	RDC_VERS_MAX	NCALL_VERS_MAX
78 
79 #else	/* !__NCALL__ */
80 
81 #include <sys/nsctl/rdc_ioctl.h>
82 #include <sys/nsctl/rdc_io.h>
83 #include <sys/nsctl/librdc.h>
84 
85 #define	RDC_SERVICE	"rdc"
86 #define	RDC_SVC_NAME	"RDC"
87 
88 #endif	/* __NCALL__ */
89 
90 #define	RDCADMIN	"/etc/default/sndr"
91 
92 #include <nsctl.h>
93 
94 struct conn_ind {
95 	struct conn_ind *conn_next;
96 	struct conn_ind *conn_prev;
97 	struct t_call   *conn_call;
98 };
99 
100 struct conn_entry {
101 	bool_t			closing;
102 	struct netconfig	nc;
103 };
104 
105 static char *progname;
106 static struct conn_entry *conn_polled;
107 static int num_conns;			/* Current number of connections */
108 static struct pollfd *poll_array;	/* array of poll descriptors for poll */
109 static size_t num_fds = 0;		/* number of transport fds opened */
110 static void poll_for_action();
111 static void remove_from_poll_list(int);
112 static int do_poll_cots_action(int, int);
113 static int do_poll_clts_action(int, int);
114 static void add_to_poll_list(int, struct netconfig *);
115 static int bind_to_provider(char *, char *, struct netbuf **,
116     struct netconfig **);
117 static int set_addrmask(int, struct netconfig *, struct netbuf *);
118 static void conn_close_oldest(void);
119 static boolean_t conn_get(int, struct netconfig *, struct conn_ind **);
120 static void cots_listen_event(int, int);
121 static int discon_get(int, struct netconfig *, struct conn_ind **);
122 static int nofile_increase(int);
123 static int is_listen_fd_index(int);
124 #if !defined(_SunOS_5_6) && !defined(_SunOS_5_7) && !defined(_SunOS_5_8)
125 static int sndrsvcpool(int);
126 static int svcwait(int id);
127 #endif
128 
129 
130 /*
131  * RPC protocol block.  Useful for passing registration information.
132  */
133 struct protob {
134 	char *serv;		/* ASCII service name, e.g. "RDC" */
135 	int versmin;		/* minimum version no. to be registered */
136 	int versmax;		/* maximum version no. to be registered */
137 	int program;		/* program no. to be registered */
138 	struct protob *next;	/* next entry on list */
139 };
140 
141 
142 
143 static size_t end_listen_fds;
144 static int debugflg = 0;
145 static int max_conns_allowed = -1;
146 static int listen_backlog = 10;
147 static char *trans_provider = (char *)NULL;
148 static int rdcsvc(int, struct netbuf, struct netconfig *);
149 
150 /* used by cots_listen_event() */
151 static int (*Mysvc)(int, struct netbuf, struct netconfig *) = rdcsvc;
152 
153 /*
154  * Determine valid semantics for rdc.
155  */
156 #define	OK_TPI_TYPE(_nconf)	\
157 	(_nconf->nc_semantics == NC_TPI_CLTS || \
158 	_nconf->nc_semantics == NC_TPI_COTS || \
159 	_nconf->nc_semantics == NC_TPI_COTS_ORD)
160 
161 #define	BE32_TO_U32(a)		\
162 	((((uint32_t)((uchar_t *)a)[0] & 0xFF) << (uint32_t)24) |\
163 	(((uint32_t)((uchar_t *)a)[1] & 0xFF) << (uint32_t)16) |\
164 	(((uint32_t)((uchar_t *)a)[2] & 0xFF) << (uint32_t)8)  |\
165 	((uint32_t)((uchar_t *)a)[3] & 0xFF))
166 
167 #ifdef DEBUG
168 /*
169  * Only support UDP in DEBUG mode for now
170  */
171 static	char *defaultproviders[] = { "/dev/tcp", "/dev/tcp6", "/dev/udp",
172 		"/dev/udp6", NULL };
173 #else
174 static	char *defaultproviders[] = { "/dev/tcp6", "/dev/tcp", NULL };
175 #endif
176 
177 /*
178  * Number of elements to add to the poll array on each allocation.
179  */
180 #define	POLL_ARRAY_INC_SIZE	64
181 #define	NOFILE_INC_SIZE		64
182 
183 #ifdef	__NCALL__
184 const char *rdc_devr = "/dev/ncallip";
185 #else
186 const char *rdc_devr = "/dev/rdc";
187 #endif
188 
189 static int rdc_fdr;
190 static int
191 
192 open_rdc(void)
193 {
194 	int fd = open(rdc_devr, O_RDONLY);
195 
196 	if (fd < 0)
197 		return (-1);
198 
199 	return (rdc_fdr = fd);
200 }
201 
202 static int
203 sndrsys(int type, void *arg)
204 {
205 	int ret = -1;
206 	if (!rdc_fdr && open_rdc() < 0) { /* open failed */
207 		syslog(LOG_ERR, "open_rdc() failed: %m\n");
208 	} else {
209 		if ((ret = ioctl(rdc_fdr, type, arg)) < 0) {
210 			syslog(LOG_ERR, "ioctl(rdc_ioctl) failed: %m\n");
211 		}
212 	}
213 	return (ret);
214 }
215 
216 int
217 rdc_transport_open(struct netconfig *nconf)
218 {
219 	int fd;
220 	struct strioctl	strioc;
221 
222 	if ((nconf == (struct netconfig *)NULL) ||
223 	    (nconf->nc_device == (char *)NULL)) {
224 		syslog(LOG_ERR, "No netconfig device");
225 		return (-1);
226 	}
227 
228 	/*
229 	 * Open the transport device.
230 	 */
231 	fd = t_open(nconf->nc_device, O_RDWR, (struct t_info *)NULL);
232 	if (fd == -1)  {
233 		if (t_errno == TSYSERR && errno == EMFILE &&
234 		    (nofile_increase(0) == 0)) {
235 			/* Try again with a higher NOFILE limit. */
236 			fd = t_open(nconf->nc_device, O_RDWR, NULL);
237 		}
238 		if (fd == -1) {
239 			if (t_errno == TSYSERR) {
240 				syslog(LOG_ERR, "t_open failed: %m");
241 			} else {
242 				syslog(LOG_ERR, "t_open failed: %s",
243 				    t_errlist[t_errno]);
244 			}
245 			return (-1);
246 		}
247 	}
248 
249 	/*
250 	 * Pop timod because the RPC module must be as close as possible
251 	 * to the transport.
252 	 */
253 	if (ioctl(fd, I_POP, 0) < 0) {
254 		syslog(LOG_ERR, "I_POP of timod failed: %m");
255 		if (t_close(fd) == -1) {
256 			if (t_errno == TSYSERR) {
257 				syslog(LOG_ERR, "t_close failed on %d: %m", fd);
258 			} else {
259 				syslog(LOG_ERR, "t_close failed on %d: %s",
260 				    fd, t_errlist[t_errno]);
261 			}
262 		}
263 		return (-1);
264 	}
265 
266 	if (nconf->nc_semantics == NC_TPI_CLTS) {
267 		/*
268 		 * Push rpcmod to filter data traffic to KRPC.
269 		 */
270 		if (ioctl(fd, I_PUSH, "rpcmod") < 0) {
271 			syslog(LOG_ERR, "I_PUSH of rpcmod failed: %m");
272 			(void) t_close(fd);
273 			return (-1);
274 		}
275 	} else {
276 		if (ioctl(fd, I_PUSH, "rpcmod") < 0) {
277 			syslog(LOG_ERR, "I_PUSH of CONS rpcmod failed: %m");
278 			if (t_close(fd) == -1) {
279 				if (t_errno == TSYSERR) {
280 					syslog(LOG_ERR,
281 					    "t_close failed on %d: %m", fd);
282 				} else {
283 					syslog(LOG_ERR,
284 					    "t_close failed on %d: %s",
285 					    fd, t_errlist[t_errno]);
286 				}
287 			}
288 			return (-1);
289 		}
290 
291 		strioc.ic_cmd = RPC_SERVER;
292 		strioc.ic_dp = (char *)0;
293 		strioc.ic_len = 0;
294 		strioc.ic_timout = -1;
295 		/* Tell CONS rpcmod to act like a server stream. */
296 		if (ioctl(fd, I_STR, &strioc) < 0) {
297 			syslog(LOG_ERR, "CONS rpcmod set-up ioctl failed: %m");
298 			if (t_close(fd) == -1) {
299 				if (t_errno == TSYSERR) {
300 					syslog(LOG_ERR,
301 					    "t_close failed on %d: %m", fd);
302 				} else {
303 					syslog(LOG_ERR,
304 					    "t_close failed on %d: %s",
305 					    fd, t_errlist[t_errno]);
306 				}
307 			}
308 			return (-1);
309 		}
310 	}
311 
312 	/*
313 	 * Re-push timod so that we will still be doing TLI
314 	 * operations on the descriptor.
315 	 */
316 	if (ioctl(fd, I_PUSH, "timod") < 0) {
317 		syslog(LOG_ERR, "I_PUSH of timod failed: %m");
318 		if (t_close(fd) == -1) {
319 			if (t_errno == TSYSERR) {
320 				syslog(LOG_ERR, "t_close failed on %d: %m", fd);
321 			} else {
322 				syslog(LOG_ERR, "t_close failed on %d: %s",
323 				    fd, t_errlist[t_errno]);
324 			}
325 		}
326 		return (-1);
327 	}
328 
329 	return (fd);
330 }
331 
332 
333 void
334 rdcd_log_tli_error(char *tli_name, int fd, struct netconfig *nconf)
335 {
336 	int error;
337 
338 	/*
339 	 * Save the error code across syslog(), just in case syslog()
340 	 * gets its own error and, therefore, overwrites errno.
341 	 */
342 	error = errno;
343 	if (t_errno == TSYSERR) {
344 		syslog(LOG_ERR, "%s(file descriptor %d/transport %s) %m",
345 		    tli_name, fd, nconf->nc_proto);
346 	} else {
347 		syslog(LOG_ERR,
348 		    "%s(file descriptor %d/transport %s) TLI error %d",
349 		    tli_name, fd, nconf->nc_proto, t_errno);
350 	}
351 	errno = error;
352 }
353 
354 /*
355  * Called to set up service over a particular transport
356  */
357 void
358 do_one(char *provider, char *proto, struct protob *protobp0,
359 	int (*svc)(int, struct netbuf, struct netconfig *))
360 {
361 	struct netbuf *retaddr;
362 	struct netconfig *retnconf;
363 	struct netbuf addrmask;
364 	int vers;
365 	int sock;
366 
367 	if (provider) {
368 		sock = bind_to_provider(provider, protobp0->serv, &retaddr,
369 		    &retnconf);
370 	} else {
371 		(void) syslog(LOG_ERR,
372 	"Cannot establish %s service over %s: transport setup problem.",
373 		    protobp0->serv, provider ? provider : proto);
374 		return;
375 	}
376 
377 	if (sock == -1) {
378 		if ((Is_ipv6present() &&
379 		    (strcmp(provider, "/dev/tcp6") == 0)) ||
380 		    (!Is_ipv6present() && (strcmp(provider, "/dev/tcp") == 0)))
381 			(void) syslog(LOG_ERR,
382 			    "Cannot establish %s service over %s: transport "
383 			    "setup problem.",
384 			    protobp0->serv, provider ? provider : proto);
385 		return;
386 	}
387 
388 	if (set_addrmask(sock, retnconf, &addrmask) < 0) {
389 		(void) syslog(LOG_ERR,
390 		    "Cannot set address mask for %s", retnconf->nc_netid);
391 		return;
392 	}
393 
394 
395 	/*
396 	 * Register all versions of the programs in the protocol block list
397 	 */
398 	for (vers = protobp0->versmin; vers <= protobp0->versmax; vers++) {
399 		(void) rpcb_unset(protobp0->program, vers, retnconf);
400 		(void) rpcb_set(protobp0->program, vers, retnconf, retaddr);
401 	}
402 
403 	if (retnconf->nc_semantics == NC_TPI_CLTS) {
404 		/* Don't drop core if supporting module(s) aren't loaded. */
405 		(void) signal(SIGSYS, SIG_IGN);
406 
407 		/*
408 		 * svc() doesn't block, it returns success or failure.
409 		 */
410 		if ((*svc)(sock, addrmask, retnconf) < 0) {
411 			(void) syslog(LOG_ERR, "Cannot establish %s service "
412 			    "over <file desc. %d, protocol %s> : %m. Exiting",
413 			    protobp0->serv, sock, retnconf->nc_proto);
414 			exit(1);
415 		}
416 	}
417 	/*
418 	 * We successfully set up the server over this transport.
419 	 * Add this descriptor to the one being polled on.
420 	 */
421 	add_to_poll_list(sock, retnconf);
422 }
423 
424 /*
425  * Set up the SNDR/ncall-ip service over all the available transports.
426  * Returns -1 for failure, 0 for success.
427  */
428 int
429 do_all(struct protob *protobp,
430 	int (*svc)(int, struct netbuf, struct netconfig *))
431 {
432 	struct netconfig *nconf;
433 	NCONF_HANDLE *nc;
434 
435 	if ((nc = setnetconfig()) == (NCONF_HANDLE *)NULL) {
436 		syslog(LOG_ERR, "setnetconfig failed: %m");
437 		return (-1);
438 	}
439 	while (nconf = getnetconfig(nc)) {
440 		if ((nconf->nc_flag & NC_VISIBLE) &&
441 		    strcmp(nconf->nc_protofmly, "loopback") != 0 &&
442 		    OK_TPI_TYPE(nconf))
443 			do_one(nconf->nc_device, nconf->nc_proto, protobp, svc);
444 	}
445 	(void) endnetconfig(nc);
446 	return (0);
447 }
448 
449 /*
450  * Read the /etc/default/sndr configuration file to determine if the
451  * client has been configured for number of threads, backlog or transport
452  * provider.
453  */
454 
455 static void
456 read_default(void)
457 {
458 	char *defval, *tmp_str;
459 	int errno;
460 	int tmp;
461 
462 	/* Fail silently if error in opening the default rdc config file */
463 	if ((defopen(RDCADMIN)) == 0) {
464 		if ((defval = defread("SNDR_THREADS=")) != NULL) {
465 			errno = 0;
466 			tmp = strtol(defval, (char **)NULL, 10);
467 			if (errno == 0) {
468 				max_conns_allowed = tmp;
469 			}
470 		}
471 		if ((defval = defread("SNDR_LISTEN_BACKLOG=")) != NULL) {
472 			errno = 0;
473 			tmp = strtol(defval, (char **)NULL, 10);
474 			if (errno == 0) {
475 				listen_backlog = tmp;
476 			}
477 		}
478 		if ((defval = defread("SNDR_TRANSPORT=")) != NULL) {
479 			errno = 0;
480 			tmp_str = strdup(defval);
481 			if (errno == 0) {
482 				trans_provider = tmp_str;
483 			}
484 		}
485 		/* close defaults file */
486 		(void) defopen(NULL);
487 	}
488 }
489 #ifdef lint
490 int
491 sndrd_lintmain(int ac, char **av)
492 #else
493 int
494 main(int ac, char **av)
495 #endif
496 {
497 	const char *dir = "/";
498 	int allflag = 0;
499 	int pid;
500 	int i, rc;
501 	struct protob *protobp0, *protobp;
502 	char **providerp;
503 	char *required;
504 #if !defined(_SunOS_5_6) && !defined(_SunOS_5_7) && !defined(_SunOS_5_8)
505 	int maxservers;
506 #endif
507 
508 	(void) setlocale(LC_ALL, "");
509 #ifdef	__NCALL__
510 	(void) textdomain("ncall");
511 #else
512 	(void) textdomain("rdc");
513 #endif
514 
515 	progname = basename(av[0]);
516 
517 #ifdef	__NCALL__
518 	rc = ncall_check_release(&required);
519 #else
520 	rc = rdc_check_release(&required);
521 #endif
522 	if (rc < 0) {
523 		(void) fprintf(stderr,
524 		    gettext("%s: unable to determine the current "
525 		    "Solaris release: %s\n"), progname, strerror(errno));
526 		exit(1);
527 	} else if (rc == FALSE) {
528 		(void) fprintf(stderr,
529 		    gettext("%s: incorrect Solaris release (requires %s)\n"),
530 		    progname, required);
531 		exit(1);
532 	}
533 
534 	openlog(progname, LOG_PID|LOG_CONS, LOG_DAEMON);
535 	read_default();
536 
537 	/*
538 	 * Usage: <progname> [-c <number of threads>] [-t protocol] \
539 	 *		[-d] [-l <listen backlog>]
540 	 */
541 	while ((i = getopt(ac, av, "ac:t:dl:")) != EOF) {
542 		switch (i) {
543 			case 'a':
544 				allflag = 1;
545 				break;
546 			case 'c':
547 				max_conns_allowed = atoi(optarg);
548 				if (max_conns_allowed <= 0)
549 					max_conns_allowed = 16;
550 				break;
551 
552 			case 'd':
553 				debugflg++;
554 				break;
555 
556 			case 't':
557 				trans_provider = optarg;
558 				break;
559 
560 			case 'l':
561 				listen_backlog = atoi(optarg);
562 				if (listen_backlog < 0)
563 					listen_backlog = 32;
564 				break;
565 
566 			default:
567 				syslog(LOG_ERR,
568 				    "Usage: %s [-c <number of threads>] "
569 				    "[-d] [-t protocol] "
570 				    "[-l <listen backlog>]\n", progname);
571 				exit(1);
572 				break;
573 		}
574 	}
575 
576 	if (chroot(dir) < 0) {
577 		syslog(LOG_ERR, "chroot failed: %m");
578 		exit(1);
579 	}
580 
581 	if (chdir(dir) < 0) {
582 		syslog(LOG_ERR, "chdir failed: %m");
583 		exit(1);
584 	}
585 
586 	if (!debugflg) {
587 		pid = fork();
588 		if (pid < 0) {
589 			syslog(LOG_ERR, "Fork failed\n");
590 			exit(1);
591 		}
592 		if (pid != 0)
593 			exit(0);
594 
595 		/*
596 		 * Close existing file descriptors, open "/dev/null" as
597 		 * standard input, output, and error, and detach from
598 		 * controlling terminal.
599 		 */
600 #if !defined(_SunOS_5_6) && !defined(_SunOS_5_7) && !defined(_SunOS_5_8)
601 		/* use closefrom(3C) from PSARC/2000/193 when possible */
602 		closefrom(0);
603 #else
604 		for (i = 0; i < _NFILE; i++)
605 			(void) close(i);
606 #endif
607 		(void) open("/dev/null", O_RDONLY);
608 		(void) open("/dev/null", O_WRONLY);
609 		(void) dup(1);
610 		(void) setsid();
611 
612 		/*
613 		 * ignore all signals apart from SIGTERM.
614 		 */
615 		for (i = 1; i < _sys_nsig; i++)
616 			(void) sigset(i, SIG_IGN);
617 
618 		(void) sigset(SIGTERM, SIG_DFL);
619 	}
620 
621 #if !defined(_SunOS_5_6) && !defined(_SunOS_5_7) && !defined(_SunOS_5_8)
622 	/*
623 	 * Set up kernel RPC thread pool for the SNDR/ncall-ip server.
624 	 */
625 	maxservers = (max_conns_allowed < 0 ? 16 : max_conns_allowed);
626 	if (sndrsvcpool(maxservers)) {
627 		(void) syslog(LOG_ERR,
628 		    "Can't set up kernel %s service: %m. Exiting", progname);
629 		exit(1);
630 	}
631 
632 	/*
633 	 * Set up blocked thread to do LWP creation on behalf of the kernel.
634 	 */
635 	if (svcwait(RDC_SVCPOOL_ID)) {
636 		(void) syslog(LOG_ERR,
637 		    "Can't set up %s pool creator: %m, Exiting", progname);
638 		exit(1);
639 	}
640 #endif
641 
642 	/*
643 	 * Build a protocol block list for registration.
644 	 */
645 	protobp0 = protobp = (struct protob *)malloc(sizeof (struct protob));
646 	protobp->serv = RDC_SVC_NAME;
647 	protobp->versmin = RDC_VERS_MIN;
648 	protobp->versmax = RDC_VERS_MAX;
649 	protobp->program = RDC_PROGRAM;
650 	protobp->next = (struct protob *)NULL;
651 
652 	if (allflag) {
653 		if (do_all(protobp0, rdcsvc) == -1)
654 			exit(1);
655 	} else if (trans_provider)
656 		do_one(trans_provider, NULL, protobp0, rdcsvc);
657 	else {
658 		for (providerp = defaultproviders;
659 		    *providerp != NULL; providerp++) {
660 			trans_provider = *providerp;
661 			do_one(trans_provider, NULL, protobp0, rdcsvc);
662 		}
663 	}
664 
665 done:
666 	free(protobp);
667 
668 	end_listen_fds = num_fds;
669 	/*
670 	 * Poll for non-data control events on the transport descriptors.
671 	 */
672 	poll_for_action();
673 
674 	syslog(LOG_ERR, "%s fatal server error\n", progname);
675 
676 	return (-1);
677 }
678 
679 static int
680 reuseaddr(int fd)
681 {
682 	struct t_optmgmt req, resp;
683 	struct opthdr *opt;
684 	char reqbuf[128];
685 	int *ip;
686 
687 	/* LINTED pointer alignment */
688 	opt = (struct opthdr *)reqbuf;
689 	opt->level = SOL_SOCKET;
690 	opt->name = SO_REUSEADDR;
691 	opt->len = sizeof (int);
692 
693 	/* LINTED pointer alignment */
694 	ip = (int *)&reqbuf[sizeof (struct opthdr)];
695 	*ip = 1;
696 
697 	req.flags = T_NEGOTIATE;
698 	req.opt.len = sizeof (struct opthdr) + opt->len;
699 	req.opt.buf = (char *)opt;
700 
701 	resp.flags = 0;
702 	resp.opt.buf = reqbuf;
703 	resp.opt.maxlen = sizeof (reqbuf);
704 
705 	if (t_optmgmt(fd, &req, &resp) < 0 || resp.flags != T_SUCCESS) {
706 		if (t_errno == TSYSERR) {
707 			syslog(LOG_ERR, "reuseaddr() t_optmgmt failed: %m\n");
708 		} else {
709 			syslog(LOG_ERR, "reuseaddr() t_optmgmt failed: %s\n",
710 			    t_errlist[t_errno]);
711 		}
712 		return (-1);
713 	}
714 	return (0);
715 }
716 
717 /*
718  * poll on the open transport descriptors for events and errors.
719  */
720 void
721 poll_for_action(void)
722 {
723 	int nfds;
724 	int i;
725 
726 	/*
727 	 * Keep polling until all transports have been closed. When this
728 	 * happens, we return.
729 	 */
730 	while ((int)num_fds > 0) {
731 		nfds = poll(poll_array, num_fds, INFTIM);
732 		switch (nfds) {
733 		case 0:
734 			continue;
735 
736 		case -1:
737 			/*
738 			 * Some errors from poll could be
739 			 * due to temporary conditions, and we try to
740 			 * be robust in the face of them. Other
741 			 * errors (should never happen in theory)
742 			 * are fatal (eg. EINVAL, EFAULT).
743 			 */
744 			switch (errno) {
745 			case EINTR:
746 				continue;
747 
748 			case EAGAIN:
749 			case ENOMEM:
750 				(void) sleep(10);
751 				continue;
752 
753 			default:
754 				(void) syslog(LOG_ERR,
755 				    "poll failed: %m. Exiting");
756 				exit(1);
757 			}
758 		default:
759 			break;
760 		}
761 
762 		/*
763 		 * Go through the poll list looking for events.
764 		 */
765 		for (i = 0; i < num_fds && nfds > 0; i++) {
766 			if (poll_array[i].revents) {
767 				nfds--;
768 				/*
769 				 * We have a message, so try to read it.
770 				 * Record the error return in errno,
771 				 * so that syslog(LOG_ERR, "...%m")
772 				 * dumps the corresponding error string.
773 				 */
774 				if (conn_polled[i].nc.nc_semantics ==
775 				    NC_TPI_CLTS) {
776 					errno = do_poll_clts_action(
777 					    poll_array[i].fd, i);
778 				} else {
779 					errno = do_poll_cots_action(
780 					    poll_array[i].fd, i);
781 				}
782 
783 				if (errno == 0)
784 					continue;
785 				/*
786 				 * Most returned error codes mean that there is
787 				 * fatal condition which we can only deal with
788 				 * by closing the transport.
789 				 */
790 				if (errno != EAGAIN && errno != ENOMEM) {
791 					(void) syslog(LOG_ERR,
792 					    "Error (%m) reading descriptor %d"
793 					    "/transport %s. Closing it.",
794 					    poll_array[i].fd,
795 					    conn_polled[i].nc.nc_proto);
796 					(void) t_close(poll_array[i].fd);
797 					remove_from_poll_list(poll_array[i].fd);
798 				} else if (errno == ENOMEM)
799 					(void) sleep(5);
800 			}
801 		}
802 	}
803 
804 	(void) syslog(LOG_ERR,
805 	    "All transports have been closed with errors. Exiting.");
806 }
807 
808 /*
809  * Allocate poll/transport array entries for this descriptor.
810  */
811 static void
812 add_to_poll_list(int fd, struct netconfig *nconf)
813 {
814 	static int poll_array_size = 0;
815 
816 	/*
817 	 * If the arrays are full, allocate new ones.
818 	 */
819 	if (num_fds == poll_array_size) {
820 		struct pollfd *tpa;
821 		struct conn_entry *tnp;
822 
823 		if (poll_array_size != 0) {
824 			tpa = poll_array;
825 			tnp = conn_polled;
826 		} else
827 			tpa = (struct pollfd *)0;
828 
829 		poll_array_size += POLL_ARRAY_INC_SIZE;
830 
831 		/*
832 		 * Allocate new arrays.
833 		 */
834 		poll_array = (struct pollfd *)
835 		    malloc(poll_array_size * sizeof (struct pollfd) + 256);
836 		conn_polled = (struct conn_entry *)
837 		    malloc(poll_array_size * sizeof (struct conn_entry) + 256);
838 		if (poll_array == (struct pollfd *)NULL ||
839 		    conn_polled == (struct conn_entry *)NULL) {
840 			syslog(LOG_ERR, "malloc failed for poll array");
841 			exit(1);
842 		}
843 
844 		/*
845 		 * Copy the data of the old ones into new arrays, and
846 		 * free the old ones.
847 		 * num_fds is guaranteed to be less than
848 		 * poll_array_size, so this memcpy is safe.
849 		 */
850 		if (tpa) {
851 			(void) memcpy((void *)poll_array, (void *)tpa,
852 			    num_fds * sizeof (struct pollfd));
853 			(void) memcpy((void *)conn_polled, (void *)tnp,
854 			    num_fds * sizeof (struct conn_entry));
855 			free((void *)tpa);
856 			free((void *)tnp);
857 		}
858 	}
859 
860 	/*
861 	 * Set the descriptor and event list. All possible events are
862 	 * polled for.
863 	 */
864 	poll_array[num_fds].fd = fd;
865 	poll_array[num_fds].events = POLLIN|POLLRDNORM|POLLRDBAND|POLLPRI;
866 
867 	/*
868 	 * Copy the transport data over too.
869 	 */
870 	conn_polled[num_fds].nc = *nconf;	/* structure copy */
871 	conn_polled[num_fds].closing = 0;
872 
873 	/*
874 	 * Set the descriptor to non-blocking. Avoids a race
875 	 * between data arriving on the stream and then having it
876 	 * flushed before we can read it.
877 	 */
878 	if (fcntl(fd, F_SETFL, O_NONBLOCK) == -1) {
879 		(void) syslog(LOG_ERR,
880 		    "fcntl(file desc. %d/transport %s, F_SETFL, "
881 		    "O_NONBLOCK): %m. Exiting",
882 		    num_fds, nconf->nc_proto);
883 		exit(1);
884 	}
885 
886 	/*
887 	 * Count this descriptor.
888 	 */
889 	++num_fds;
890 }
891 
892 static void
893 remove_from_poll_list(int fd)
894 {
895 	int i;
896 	int num_to_copy;
897 
898 	for (i = 0; i < num_fds; i++) {
899 		if (poll_array[i].fd == fd) {
900 			--num_fds;
901 			num_to_copy = num_fds - i;
902 			(void) memcpy((void *)&poll_array[i],
903 			    (void *)&poll_array[i+1],
904 			    num_to_copy * sizeof (struct pollfd));
905 			(void) memset((void *)&poll_array[num_fds], 0,
906 			    sizeof (struct pollfd));
907 			(void) memcpy((void *)&conn_polled[i],
908 			    (void *)&conn_polled[i+1],
909 			    num_to_copy * sizeof (struct conn_entry));
910 			(void) memset((void *)&conn_polled[num_fds], 0,
911 			    sizeof (struct conn_entry));
912 			return;
913 		}
914 	}
915 	syslog(LOG_ERR, "attempt to remove nonexistent fd from poll list");
916 
917 }
918 
919 static void
920 conn_close_oldest(void)
921 {
922 	int fd;
923 	int i1;
924 
925 	/*
926 	 * Find the oldest connection that is not already in the
927 	 * process of shutting down.
928 	 */
929 	for (i1 = end_listen_fds; /* no conditional expression */; i1++) {
930 		if (i1 >= num_fds)
931 			return;
932 		if (conn_polled[i1].closing == 0)
933 			break;
934 	}
935 #ifdef DEBUG
936 	(void) printf("too many connections (%d), releasing oldest (%d)\n",
937 	    num_conns, poll_array[i1].fd);
938 #else
939 	syslog(LOG_WARNING, "too many connections (%d), releasing oldest (%d)",
940 	    num_conns, poll_array[i1].fd);
941 #endif
942 	fd = poll_array[i1].fd;
943 	if (conn_polled[i1].nc.nc_semantics == NC_TPI_COTS) {
944 		/*
945 		 * For politeness, send a T_DISCON_REQ to the transport
946 		 * provider.  We close the stream anyway.
947 		 */
948 		(void) t_snddis(fd, (struct t_call *)0);
949 		num_conns--;
950 		remove_from_poll_list(fd);
951 		(void) t_close(fd);
952 	} else {
953 		/*
954 		 * For orderly release, we do not close the stream
955 		 * until the T_ORDREL_IND arrives to complete
956 		 * the handshake.
957 		 */
958 		if (t_sndrel(fd) == 0)
959 			conn_polled[i1].closing = 1;
960 	}
961 }
962 
963 static boolean_t
964 conn_get(int fd, struct netconfig *nconf, struct conn_ind **connp)
965 {
966 	struct conn_ind	*conn;
967 	struct conn_ind	*next_conn;
968 
969 	conn = (struct conn_ind *)malloc(sizeof (*conn));
970 	if (conn == NULL) {
971 		syslog(LOG_ERR, "malloc for listen indication failed");
972 		return (FALSE);
973 	}
974 
975 	/* LINTED pointer alignment */
976 	conn->conn_call = (struct t_call *)t_alloc(fd, T_CALL, T_ALL);
977 	if (conn->conn_call == NULL) {
978 		free((char *)conn);
979 		rdcd_log_tli_error("t_alloc", fd, nconf);
980 		return (FALSE);
981 	}
982 
983 	if (t_listen(fd, conn->conn_call) == -1) {
984 		rdcd_log_tli_error("t_listen", fd, nconf);
985 		(void) t_free((char *)conn->conn_call, T_CALL);
986 		free((char *)conn);
987 		return (FALSE);
988 	}
989 
990 	if (conn->conn_call->udata.len > 0) {
991 		syslog(LOG_WARNING,
992 		    "rejecting inbound connection(%s) with %d bytes "
993 		    "of connect data",
994 		    nconf->nc_proto, conn->conn_call->udata.len);
995 
996 		conn->conn_call->udata.len = 0;
997 		(void) t_snddis(fd, conn->conn_call);
998 		(void) t_free((char *)conn->conn_call, T_CALL);
999 		free((char *)conn);
1000 		return (FALSE);
1001 	}
1002 
1003 	if ((next_conn = *connp) != NULL) {
1004 		next_conn->conn_prev->conn_next = conn;
1005 		conn->conn_next = next_conn;
1006 		conn->conn_prev = next_conn->conn_prev;
1007 		next_conn->conn_prev = conn;
1008 	} else {
1009 		conn->conn_next = conn;
1010 		conn->conn_prev = conn;
1011 		*connp = conn;
1012 	}
1013 	return (TRUE);
1014 }
1015 
1016 static int
1017 discon_get(int fd, struct netconfig *nconf, struct conn_ind **connp)
1018 {
1019 	struct conn_ind	*conn;
1020 	struct t_discon	discon;
1021 
1022 	discon.udata.buf = (char *)0;
1023 	discon.udata.maxlen = 0;
1024 	if (t_rcvdis(fd, &discon) == -1) {
1025 		rdcd_log_tli_error("t_rcvdis", fd, nconf);
1026 		return (-1);
1027 	}
1028 
1029 	conn = *connp;
1030 	if (conn == NULL)
1031 		return (0);
1032 
1033 	do {
1034 		if (conn->conn_call->sequence == discon.sequence) {
1035 			if (conn->conn_next == conn)
1036 				*connp = (struct conn_ind *)0;
1037 			else {
1038 				if (conn == *connp) {
1039 					*connp = conn->conn_next;
1040 				}
1041 				conn->conn_next->conn_prev = conn->conn_prev;
1042 				conn->conn_prev->conn_next = conn->conn_next;
1043 			}
1044 			free((char *)conn);
1045 			break;
1046 		}
1047 		conn = conn->conn_next;
1048 	} while (conn != *connp);
1049 
1050 	return (0);
1051 }
1052 
1053 static void
1054 cots_listen_event(int fd, int conn_index)
1055 {
1056 	struct t_call *call;
1057 	struct conn_ind	*conn;
1058 	struct conn_ind	*conn_head;
1059 	int event;
1060 	struct netconfig *nconf = &conn_polled[conn_index].nc;
1061 	int new_fd;
1062 	struct netbuf addrmask;
1063 	int ret = 0;
1064 
1065 	conn_head = NULL;
1066 	(void) conn_get(fd, nconf, &conn_head);
1067 
1068 	while ((conn = conn_head) != NULL) {
1069 		conn_head = conn->conn_next;
1070 		if (conn_head == conn)
1071 			conn_head = NULL;
1072 		else {
1073 			conn_head->conn_prev = conn->conn_prev;
1074 			conn->conn_prev->conn_next = conn_head;
1075 		}
1076 		call = conn->conn_call;
1077 		free(conn);
1078 
1079 		/*
1080 		 * If we have already accepted the maximum number of
1081 		 * connections allowed on the command line, then drop
1082 		 * the oldest connection (for any protocol) before
1083 		 * accepting the new connection.  Unless explicitly
1084 		 * set on the command line, max_conns_allowed is -1.
1085 		 */
1086 		if (max_conns_allowed != -1 && num_conns >= max_conns_allowed)
1087 			conn_close_oldest();
1088 
1089 		/*
1090 		 * Create a new transport endpoint for the same proto as
1091 		 * the listener.
1092 		 */
1093 		new_fd = rdc_transport_open(nconf);
1094 		if (new_fd == -1) {
1095 			call->udata.len = 0;
1096 			(void) t_snddis(fd, call);
1097 			(void) t_free((char *)call, T_CALL);
1098 			syslog(LOG_ERR, "Cannot establish transport over %s",
1099 			    nconf->nc_device);
1100 			continue;
1101 		}
1102 
1103 		/* Bind to a generic address/port for the accepting stream. */
1104 		if (t_bind(new_fd, NULL, NULL) == -1) {
1105 			rdcd_log_tli_error("t_bind", new_fd, nconf);
1106 			call->udata.len = 0;
1107 			(void) t_snddis(fd, call);
1108 			(void) t_free((char *)call, T_CALL);
1109 			(void) t_close(new_fd);
1110 			continue;
1111 		}
1112 
1113 		while (t_accept(fd, new_fd, call) == -1) {
1114 			if (t_errno != TLOOK) {
1115 				rdcd_log_tli_error("t_accept", fd, nconf);
1116 				call->udata.len = 0;
1117 				(void) t_snddis(fd, call);
1118 				(void) t_free((char *)call, T_CALL);
1119 				(void) t_close(new_fd);
1120 				goto do_next_conn;
1121 			}
1122 			while (event = t_look(fd)) {
1123 				switch (event) {
1124 				case T_LISTEN:
1125 					(void) conn_get(fd, nconf, &conn_head);
1126 					continue;
1127 
1128 				case T_DISCONNECT:
1129 					(void) discon_get(fd, nconf,
1130 					    &conn_head);
1131 					continue;
1132 
1133 				default:
1134 					syslog(LOG_ERR,
1135 					    "unexpected event 0x%x during "
1136 					    "accept processing (%s)",
1137 					    event, nconf->nc_proto);
1138 					call->udata.len = 0;
1139 					(void) t_snddis(fd, call);
1140 					(void) t_free((char *)call, T_CALL);
1141 					(void) t_close(new_fd);
1142 					goto do_next_conn;
1143 				}
1144 			}
1145 		}
1146 
1147 		if (set_addrmask(new_fd, nconf, &addrmask) < 0) {
1148 			(void) syslog(LOG_ERR, "Cannot set address mask for %s",
1149 			    nconf->nc_netid);
1150 			(void) t_snddis(new_fd, NULL);
1151 			(void) t_free((char *)call, T_CALL);
1152 			(void) t_close(new_fd);
1153 			continue;
1154 		}
1155 
1156 		/* Tell kRPC about the new stream. */
1157 		ret = (*Mysvc)(new_fd, addrmask, nconf);
1158 		if (ret < 0) {
1159 			syslog(LOG_ERR,
1160 			    "unable to register with kernel rpc: %m");
1161 			free(addrmask.buf);
1162 			(void) t_snddis(new_fd, NULL);
1163 			(void) t_free((char *)call, T_CALL);
1164 			(void) t_close(new_fd);
1165 			goto do_next_conn;
1166 		}
1167 
1168 		free(addrmask.buf);
1169 		(void) t_free((char *)call, T_CALL);
1170 
1171 		/*
1172 		 * Poll on the new descriptor so that we get disconnect
1173 		 * and orderly release indications.
1174 		 */
1175 		num_conns++;
1176 		add_to_poll_list(new_fd, nconf);
1177 
1178 		/* Reset nconf in case it has been moved. */
1179 		nconf = &conn_polled[conn_index].nc;
1180 do_next_conn:;
1181 	}
1182 }
1183 
1184 static int
1185 do_poll_cots_action(int fd, int conn_index)
1186 {
1187 	char buf[256];
1188 	int event;
1189 	int i1;
1190 	int flags;
1191 	struct conn_entry *connent = &conn_polled[conn_index];
1192 	struct netconfig *nconf = &(connent->nc);
1193 	const char *errorstr;
1194 
1195 	while (event = t_look(fd)) {
1196 		switch (event) {
1197 		case T_LISTEN:
1198 			cots_listen_event(fd, conn_index);
1199 			break;
1200 
1201 		case T_DATA:
1202 			/*
1203 			 * Receive a private notification from CONS rpcmod.
1204 			 */
1205 			i1 = t_rcv(fd, buf, sizeof (buf), &flags);
1206 			if (i1 == -1) {
1207 				syslog(LOG_ERR, "t_rcv failed");
1208 				break;
1209 			}
1210 			if (i1 < sizeof (int))
1211 				break;
1212 			i1 = BE32_TO_U32(buf);
1213 			if (i1 == 1 || i1 == 2) {
1214 				/*
1215 				 * This connection has been idle for too long,
1216 				 * so release it as politely as we can.  If we
1217 				 * have already initiated an orderly release
1218 				 * and we get notified that the stream is
1219 				 * still idle, pull the plug.  This prevents
1220 				 * hung connections from continuing to consume
1221 				 * resources.
1222 				 */
1223 				if (nconf->nc_semantics == NC_TPI_COTS ||
1224 				    connent->closing != 0) {
1225 					(void) t_snddis(fd, (struct t_call *)0);
1226 					goto fdclose;
1227 				}
1228 				/*
1229 				 * For NC_TPI_COTS_ORD, the stream is closed
1230 				 * and removed from the poll list when the
1231 				 * T_ORDREL is received from the provider.  We
1232 				 * don't wait for it here because it may take
1233 				 * a while for the transport to shut down.
1234 				 */
1235 				if (t_sndrel(fd) == -1) {
1236 					syslog(LOG_ERR,
1237 					"unable to send orderly release %m");
1238 				}
1239 				connent->closing = 1;
1240 			} else
1241 				syslog(LOG_ERR,
1242 				    "unexpected event from CONS rpcmod %d", i1);
1243 			break;
1244 
1245 		case T_ORDREL:
1246 			/* Perform an orderly release. */
1247 			if (t_rcvrel(fd) == 0) {
1248 				/* T_ORDREL on listen fd's should be ignored */
1249 				if (!is_listen_fd_index(fd)) {
1250 					(void) t_sndrel(fd);
1251 					goto fdclose;
1252 				}
1253 				break;
1254 
1255 			} else if (t_errno == TLOOK) {
1256 				break;
1257 			} else {
1258 				rdcd_log_tli_error("t_rcvrel", fd, nconf);
1259 				/*
1260 				 * check to make sure we do not close
1261 				 * listen fd
1262 				 */
1263 				if (!is_listen_fd_index(fd))
1264 					break;
1265 				else
1266 					goto fdclose;
1267 			}
1268 
1269 		case T_DISCONNECT:
1270 			if (t_rcvdis(fd, (struct t_discon *)NULL) == -1)
1271 				rdcd_log_tli_error("t_rcvdis", fd, nconf);
1272 
1273 			/*
1274 			 * T_DISCONNECT on listen fd's should be ignored.
1275 			 */
1276 			if (!is_listen_fd_index(fd))
1277 				break;
1278 			else
1279 				goto fdclose;
1280 
1281 		case T_ERROR:
1282 		default:
1283 			if (event == T_ERROR || t_errno == TSYSERR) {
1284 				if ((errorstr = strerror(errno)) == NULL) {
1285 					(void) snprintf(buf, sizeof (buf),
1286 					    "Unknown error num %d", errno);
1287 					errorstr = (const char *)buf;
1288 				}
1289 			} else if (event == -1)
1290 				errorstr = t_strerror(t_errno);
1291 			else
1292 				errorstr = "";
1293 #ifdef DEBUG
1294 			syslog(LOG_ERR,
1295 			    "unexpected TLI event (0x%x) on "
1296 			    "connection-oriented transport(%s, %d):%s",
1297 			    event, nconf->nc_proto, fd, errorstr);
1298 #endif
1299 
1300 fdclose:
1301 			num_conns--;
1302 			remove_from_poll_list(fd);
1303 			(void) t_close(fd);
1304 			return (0);
1305 		}
1306 	}
1307 
1308 	return (0);
1309 }
1310 
1311 
1312 /*
1313  * Called to read and interpret the event on a connectionless descriptor.
1314  * Returns 0 if successful, or a UNIX error code if failure.
1315  */
1316 static int
1317 do_poll_clts_action(int fd, int conn_index)
1318 {
1319 	int error;
1320 	int ret;
1321 	int flags;
1322 	struct netconfig *nconf = &conn_polled[conn_index].nc;
1323 	static struct t_unitdata *unitdata = NULL;
1324 	static struct t_uderr *uderr = NULL;
1325 	static int oldfd = -1;
1326 	struct nd_hostservlist *host = NULL;
1327 	struct strbuf ctl[1], data[1];
1328 	/*
1329 	 * We just need to have some space to consume the
1330 	 * message in the event we can't use the TLI interface to do the
1331 	 * job.
1332 	 *
1333 	 * We flush the message using getmsg(). For the control part
1334 	 * we allocate enough for any TPI header plus 32 bytes for address
1335 	 * and options. For the data part, there is nothing magic about
1336 	 * the size of the array, but 256 bytes is probably better than
1337 	 * 1 byte, and we don't expect any data portion anyway.
1338 	 *
1339 	 * If the array sizes are too small, we handle this because getmsg()
1340 	 * (called to consume the message) will return MOREDATA|MORECTL.
1341 	 * Thus we just call getmsg() until it's read the message.
1342 	 */
1343 	char ctlbuf[sizeof (union T_primitives) + 32];
1344 	char databuf[256];
1345 
1346 	/*
1347 	 * If this is the same descriptor as the last time
1348 	 * do_poll_clts_action was called, we can save some
1349 	 * de-allocation and allocation.
1350 	 */
1351 	if (oldfd != fd) {
1352 		oldfd = fd;
1353 
1354 		if (unitdata) {
1355 			(void) t_free((char *)unitdata, T_UNITDATA);
1356 			unitdata = NULL;
1357 		}
1358 		if (uderr) {
1359 			(void) t_free((char *)uderr, T_UDERROR);
1360 			uderr = NULL;
1361 		}
1362 	}
1363 
1364 	/*
1365 	 * Allocate a unitdata structure for receiving the event.
1366 	 */
1367 	if (unitdata == NULL) {
1368 		/* LINTED pointer alignment */
1369 		unitdata = (struct t_unitdata *)t_alloc(fd, T_UNITDATA, T_ALL);
1370 		if (unitdata == NULL) {
1371 			if (t_errno == TSYSERR) {
1372 				/*
1373 				 * Save the error code across
1374 				 * syslog(), just in case
1375 				 * syslog() gets its own error
1376 				 * and therefore overwrites errno.
1377 				 */
1378 				error = errno;
1379 				(void) syslog(LOG_ERR,
1380 				    "t_alloc(file descriptor %d/transport %s, "
1381 				    "T_UNITDATA) failed: %m",
1382 				    fd, nconf->nc_proto);
1383 				return (error);
1384 			}
1385 			(void) syslog(LOG_ERR, "t_alloc(file descriptor %d/"
1386 			    "transport %s, T_UNITDATA) failed TLI error %d",
1387 			    fd, nconf->nc_proto, t_errno);
1388 			goto flush_it;
1389 		}
1390 	}
1391 
1392 try_again:
1393 	flags = 0;
1394 
1395 	/*
1396 	 * The idea is we wait for T_UNITDATA_IND's. Of course,
1397 	 * we don't get any, because rpcmod filters them out.
1398 	 * However, we need to call t_rcvudata() to let TLI
1399 	 * tell us we have a T_UDERROR_IND.
1400 	 *
1401 	 * algorithm is:
1402 	 * 	t_rcvudata(), expecting TLOOK.
1403 	 * 	t_look(), expecting T_UDERR.
1404 	 * 	t_rcvuderr(), expecting success (0).
1405 	 * 	expand destination address into ASCII,
1406 	 *	and dump it.
1407 	 */
1408 
1409 	ret = t_rcvudata(fd, unitdata, &flags);
1410 	if (ret == 0 || t_errno == TBUFOVFLW) {
1411 		(void) syslog(LOG_WARNING, "t_rcvudata(file descriptor %d/"
1412 		    "transport %s) got unexpected data, %d bytes",
1413 		    fd, nconf->nc_proto, unitdata->udata.len);
1414 
1415 		/*
1416 		 * Even though we don't expect any data, in case we do,
1417 		 * keep reading until there is no more.
1418 		 */
1419 		if (flags & T_MORE)
1420 			goto try_again;
1421 
1422 		return (0);
1423 	}
1424 
1425 	switch (t_errno) {
1426 	case TNODATA:
1427 		return (0);
1428 	case TSYSERR:
1429 		/*
1430 		 * System errors are returned to caller.
1431 		 * Save the error code across
1432 		 * syslog(), just in case
1433 		 * syslog() gets its own error
1434 		 * and therefore overwrites errno.
1435 		 */
1436 		error = errno;
1437 		(void) syslog(LOG_ERR,
1438 		    "t_rcvudata(file descriptor %d/transport %s) %m",
1439 		    fd, nconf->nc_proto);
1440 		return (error);
1441 	case TLOOK:
1442 		break;
1443 	default:
1444 		(void) syslog(LOG_ERR,
1445 		    "t_rcvudata(file descriptor %d/transport %s) TLI error %d",
1446 		    fd, nconf->nc_proto, t_errno);
1447 		goto flush_it;
1448 	}
1449 
1450 	ret = t_look(fd);
1451 	switch (ret) {
1452 	case 0:
1453 		return (0);
1454 	case -1:
1455 		/*
1456 		 * System errors are returned to caller.
1457 		 */
1458 		if (t_errno == TSYSERR) {
1459 			/*
1460 			 * Save the error code across
1461 			 * syslog(), just in case
1462 			 * syslog() gets its own error
1463 			 * and therefore overwrites errno.
1464 			 */
1465 			error = errno;
1466 			(void) syslog(LOG_ERR,
1467 			    "t_look(file descriptor %d/transport %s) %m",
1468 			    fd, nconf->nc_proto);
1469 			return (error);
1470 		}
1471 		(void) syslog(LOG_ERR,
1472 		    "t_look(file descriptor %d/transport %s) TLI error %d",
1473 		    fd, nconf->nc_proto, t_errno);
1474 		goto flush_it;
1475 	case T_UDERR:
1476 		break;
1477 	default:
1478 		(void) syslog(LOG_WARNING, "t_look(file descriptor %d/"
1479 		    "transport %s) returned %d not T_UDERR (%d)",
1480 		    fd, nconf->nc_proto, ret, T_UDERR);
1481 	}
1482 
1483 	if (uderr == NULL) {
1484 		/* LINTED pointer alignment */
1485 		uderr = (struct t_uderr *)t_alloc(fd, T_UDERROR, T_ALL);
1486 		if (uderr == NULL) {
1487 			if (t_errno == TSYSERR) {
1488 				/*
1489 				 * Save the error code across
1490 				 * syslog(), just in case
1491 				 * syslog() gets its own error
1492 				 * and therefore overwrites errno.
1493 				 */
1494 				error = errno;
1495 				(void) syslog(LOG_ERR,
1496 				    "t_alloc(file descriptor %d/transport %s, "
1497 				    "T_UDERROR) failed: %m",
1498 				    fd, nconf->nc_proto);
1499 				return (error);
1500 			}
1501 			(void) syslog(LOG_ERR, "t_alloc(file descriptor %d/"
1502 			    "transport %s, T_UDERROR) failed TLI error: %d",
1503 			    fd, nconf->nc_proto, t_errno);
1504 			goto flush_it;
1505 		}
1506 	}
1507 
1508 	ret = t_rcvuderr(fd, uderr);
1509 	if (ret == 0) {
1510 
1511 		/*
1512 		 * Save the datagram error in errno, so that the
1513 		 * %m argument to syslog picks up the error string.
1514 		 */
1515 		errno = uderr->error;
1516 
1517 		/*
1518 		 * Log the datagram error, then log the host that
1519 		 * probably triggerred. Cannot log both in the
1520 		 * same transaction because of packet size limitations
1521 		 * in /dev/log.
1522 		 */
1523 		(void) syslog((errno == ECONNREFUSED) ? LOG_DEBUG : LOG_WARNING,
1524 		    "%s response over <file descriptor %d/transport %s> "
1525 		    "generated error: %m",
1526 		    progname, fd, nconf->nc_proto);
1527 
1528 		/*
1529 		 * Try to map the client's address back to a
1530 		 * name.
1531 		 */
1532 		ret = netdir_getbyaddr(nconf, &host, &uderr->addr);
1533 		if (ret != -1 && host && host->h_cnt > 0 &&
1534 		    host->h_hostservs) {
1535 		(void) syslog((errno == ECONNREFUSED) ? LOG_DEBUG : LOG_WARNING,
1536 		    "Bad %s response was sent to client with "
1537 		    "host name: %s; service port: %s",
1538 		    progname, host->h_hostservs->h_host,
1539 		    host->h_hostservs->h_serv);
1540 		} else {
1541 			int i, j;
1542 			char *buf;
1543 			char *hex = "0123456789abcdef";
1544 
1545 			/*
1546 			 * Mapping failed, print the whole thing
1547 			 * in ASCII hex.
1548 			 */
1549 			buf = (char *)malloc(uderr->addr.len * 2 + 1);
1550 			for (i = 0, j = 0; i < uderr->addr.len; i++, j += 2) {
1551 				buf[j] = hex[((uderr->addr.buf[i]) >> 4) & 0xf];
1552 				buf[j+1] = hex[uderr->addr.buf[i] & 0xf];
1553 			}
1554 			buf[j] = '\0';
1555 			(void) syslog((errno == ECONNREFUSED) ?
1556 			    LOG_DEBUG : LOG_WARNING,
1557 			    "Bad %s response was sent to client with "
1558 			    "transport address: 0x%s",
1559 			    progname, buf);
1560 			free((void *)buf);
1561 		}
1562 
1563 		if (ret == 0 && host != NULL)
1564 			netdir_free((void *)host, ND_HOSTSERVLIST);
1565 		return (0);
1566 	}
1567 
1568 	switch (t_errno) {
1569 	case TNOUDERR:
1570 		goto flush_it;
1571 	case TSYSERR:
1572 		/*
1573 		 * System errors are returned to caller.
1574 		 * Save the error code across
1575 		 * syslog(), just in case
1576 		 * syslog() gets its own error
1577 		 * and therefore overwrites errno.
1578 		 */
1579 		error = errno;
1580 		(void) syslog(LOG_ERR,
1581 		    "t_rcvuderr(file descriptor %d/transport %s) %m",
1582 		    fd, nconf->nc_proto);
1583 		return (error);
1584 	default:
1585 		(void) syslog(LOG_ERR,
1586 		    "t_rcvuderr(file descriptor %d/transport %s) TLI error %d",
1587 		    fd, nconf->nc_proto, t_errno);
1588 		goto flush_it;
1589 	}
1590 
1591 flush_it:
1592 	/*
1593 	 * If we get here, then we could not cope with whatever message
1594 	 * we attempted to read, so flush it. If we did read a message,
1595 	 * and one isn't present, that is all right, because fd is in
1596 	 * nonblocking mode.
1597 	 */
1598 	(void) syslog(LOG_ERR,
1599 	    "Flushing one input message from <file descriptor %d/transport %s>",
1600 	    fd, nconf->nc_proto);
1601 
1602 	/*
1603 	 * Read and discard the message. Do this this until there is
1604 	 * no more control/data in the message or until we get an error.
1605 	 */
1606 	do {
1607 		ctl->maxlen = sizeof (ctlbuf);
1608 		ctl->buf = ctlbuf;
1609 		data->maxlen = sizeof (databuf);
1610 		data->buf = databuf;
1611 		flags = 0;
1612 		ret = getmsg(fd, ctl, data, &flags);
1613 		if (ret == -1)
1614 			return (errno);
1615 	} while (ret != 0);
1616 
1617 	return (0);
1618 }
1619 
1620 /*
1621  * Establish service thread.
1622  */
1623 static int
1624 rdcsvc(int fd, struct netbuf addrmask, struct netconfig *nconf)
1625 {
1626 #ifdef	__NCALL__
1627 	struct ncall_svc_args nsa;
1628 #else	/* !__NCALL__ */
1629 	struct rdc_svc_args nsa;
1630 	_rdc_ioctl_t rdc_args = { 0, };
1631 #endif	/* __NCALL__ */
1632 
1633 	nsa.fd = fd;
1634 	nsa.nthr = (max_conns_allowed < 0 ? 16 : max_conns_allowed);
1635 	(void) strncpy(nsa.netid, nconf->nc_netid, sizeof (nsa.netid));
1636 	nsa.addrmask.len = addrmask.len;
1637 	nsa.addrmask.maxlen = addrmask.maxlen;
1638 	nsa.addrmask.buf = addrmask.buf;
1639 
1640 #ifdef	__NCALL__
1641 	return (sndrsys(NC_IOC_SERVER, &nsa));
1642 #else	/* !__NCALL__ */
1643 	rdc_args.arg0 = (long)&nsa;
1644 	return (sndrsys(RDC_ENABLE_SVR, &rdc_args));
1645 #endif	/* __NCALL__ */
1646 }
1647 
1648 
1649 
1650 static int
1651 nofile_increase(int limit)
1652 {
1653 	struct rlimit rl;
1654 
1655 	if (getrlimit(RLIMIT_NOFILE, &rl) == -1) {
1656 		syslog(LOG_ERR,
1657 		    "nofile_increase() getrlimit of NOFILE failed: %m");
1658 		return (-1);
1659 	}
1660 
1661 	if (limit > 0)
1662 		rl.rlim_cur = limit;
1663 	else
1664 		rl.rlim_cur += NOFILE_INC_SIZE;
1665 
1666 	if (rl.rlim_cur > rl.rlim_max && rl.rlim_max != RLIM_INFINITY)
1667 		rl.rlim_max = rl.rlim_cur;
1668 
1669 	if (setrlimit(RLIMIT_NOFILE, &rl) == -1) {
1670 		syslog(LOG_ERR,
1671 		    "nofile_increase() setrlimit of NOFILE to %d failed: %m",
1672 		    rl.rlim_cur);
1673 		return (-1);
1674 	}
1675 
1676 	return (0);
1677 }
1678 
1679 int
1680 rdcd_bindit(struct netconfig *nconf, struct netbuf **addr,
1681     struct nd_hostserv *hs, int backlog)
1682 {
1683 	int fd;
1684 	struct t_bind *ntb;
1685 	struct t_bind tb;
1686 	struct nd_addrlist *addrlist;
1687 	struct t_optmgmt req, resp;
1688 	struct opthdr *opt;
1689 	char reqbuf[128];
1690 
1691 	if ((fd = rdc_transport_open(nconf)) == -1) {
1692 		syslog(LOG_ERR, "cannot establish transport service over %s",
1693 		    nconf->nc_device);
1694 		return (-1);
1695 	}
1696 
1697 	addrlist = (struct nd_addrlist *)NULL;
1698 	if (netdir_getbyname(nconf, hs, &addrlist) != 0) {
1699 		if (strncmp(nconf->nc_netid, "udp", 3) != 0) {
1700 			syslog(LOG_ERR, "Cannot get address for transport "
1701 			    "%s host %s service %s",
1702 			    nconf->nc_netid, hs->h_host, hs->h_serv);
1703 		}
1704 		(void) t_close(fd);
1705 		return (-1);
1706 	}
1707 
1708 	if (strcmp(nconf->nc_proto, "tcp") == 0) {
1709 		/*
1710 		 * If we're running over TCP, then set the
1711 		 * SO_REUSEADDR option so that we can bind
1712 		 * to our preferred address even if previously
1713 		 * left connections exist in FIN_WAIT states.
1714 		 * This is somewhat bogus, but otherwise you have
1715 		 * to wait 2 minutes to restart after killing it.
1716 		 */
1717 		if (reuseaddr(fd) == -1) {
1718 			syslog(LOG_WARNING,
1719 			    "couldn't set SO_REUSEADDR option on transport");
1720 		}
1721 	}
1722 
1723 	if (nconf->nc_semantics == NC_TPI_CLTS)
1724 		tb.qlen = 0;
1725 	else
1726 		tb.qlen = backlog;
1727 
1728 	/* LINTED pointer alignment */
1729 	ntb = (struct t_bind *)t_alloc(fd, T_BIND, T_ALL);
1730 	if (ntb == (struct t_bind *)NULL) {
1731 		syslog(LOG_ERR, "t_alloc failed:  t_errno %d, %m", t_errno);
1732 		(void) t_close(fd);
1733 		netdir_free((void *)addrlist, ND_ADDRLIST);
1734 		return (-1);
1735 	}
1736 
1737 	tb.addr = *(addrlist->n_addrs);		/* structure copy */
1738 
1739 	if (t_bind(fd, &tb, ntb) == -1) {
1740 		syslog(LOG_ERR, "t_bind failed:  t_errno %d, %m", t_errno);
1741 		(void) t_free((char *)ntb, T_BIND);
1742 		netdir_free((void *)addrlist, ND_ADDRLIST);
1743 		(void) t_close(fd);
1744 		return (-1);
1745 	}
1746 
1747 	/* make sure we bound to the right address */
1748 	if (tb.addr.len != ntb->addr.len ||
1749 	    memcmp(tb.addr.buf, ntb->addr.buf, tb.addr.len) != 0) {
1750 		syslog(LOG_ERR, "t_bind to wrong address");
1751 		(void) t_free((char *)ntb, T_BIND);
1752 		netdir_free((void *)addrlist, ND_ADDRLIST);
1753 		(void) t_close(fd);
1754 		return (-1);
1755 	}
1756 
1757 	*addr = &ntb->addr;
1758 	netdir_free((void *)addrlist, ND_ADDRLIST);
1759 
1760 	if (strcmp(nconf->nc_proto, "tcp") == 0 ||
1761 	    strcmp(nconf->nc_proto, "tcp6") == 0) {
1762 		/*
1763 		 * Disable the Nagle algorithm on TCP connections.
1764 		 * Connections accepted from this listener will
1765 		 * inherit the listener options.
1766 		 */
1767 
1768 		/* LINTED pointer alignment */
1769 		opt = (struct opthdr *)reqbuf;
1770 		opt->level = IPPROTO_TCP;
1771 		opt->name = TCP_NODELAY;
1772 		opt->len = sizeof (int);
1773 
1774 		/* LINTED pointer alignment */
1775 		*(int *)((char *)opt + sizeof (*opt)) = 1;
1776 
1777 		req.flags = T_NEGOTIATE;
1778 		req.opt.len = sizeof (*opt) + opt->len;
1779 		req.opt.buf = (char *)opt;
1780 		resp.flags = 0;
1781 		resp.opt.buf = reqbuf;
1782 		resp.opt.maxlen = sizeof (reqbuf);
1783 
1784 		if (t_optmgmt(fd, &req, &resp) < 0 ||
1785 		    resp.flags != T_SUCCESS) {
1786 			syslog(LOG_ERR, "couldn't set NODELAY option for "
1787 			    "proto %s: t_errno = %d, %m", nconf->nc_proto,
1788 			    t_errno);
1789 		}
1790 	}
1791 
1792 	return (fd);
1793 }
1794 
1795 
1796 /* ARGSUSED */
1797 static int
1798 bind_to_provider(char *provider, char *serv, struct netbuf **addr,
1799 		struct netconfig **retnconf)
1800 {
1801 	struct netconfig *nconf;
1802 	NCONF_HANDLE *nc;
1803 	struct nd_hostserv hs;
1804 
1805 	hs.h_host = HOST_SELF;
1806 	hs.h_serv = RDC_SERVICE;	/* serv_name_to_port_name(serv); */
1807 
1808 	if ((nc = setnetconfig()) == (NCONF_HANDLE *)NULL) {
1809 		syslog(LOG_ERR, "setnetconfig failed: %m");
1810 		return (-1);
1811 	}
1812 	while (nconf = getnetconfig(nc)) {
1813 		if (OK_TPI_TYPE(nconf) &&
1814 		    strcmp(nconf->nc_device, provider) == 0) {
1815 			*retnconf = nconf;
1816 			return (rdcd_bindit(nconf, addr, &hs, listen_backlog));
1817 		}
1818 	}
1819 	(void) endnetconfig(nc);
1820 	if ((Is_ipv6present() && (strcmp(provider, "/dev/tcp6") == 0)) ||
1821 	    (!Is_ipv6present() && (strcmp(provider, "/dev/tcp") == 0)))
1822 		syslog(LOG_ERR, "couldn't find netconfig entry for provider %s",
1823 		    provider);
1824 	return (-1);
1825 }
1826 
1827 
1828 /*
1829  * For listen fd's index is always less than end_listen_fds.
1830  * It's value is equal to the number of open file descriptors after the
1831  * last listen end point was opened but before any connection was accepted.
1832  */
1833 static int
1834 is_listen_fd_index(int index)
1835 {
1836 	return (index < end_listen_fds);
1837 }
1838 
1839 
1840 /*
1841  * Create an address mask appropriate for the transport.
1842  * The mask is used to obtain the host-specific part of
1843  * a network address when comparing addresses.
1844  * For an internet address the host-specific part is just
1845  * the 32 bit IP address and this part of the mask is set
1846  * to all-ones. The port number part of the mask is zeroes.
1847  */
1848 static int
1849 set_addrmask(int fd, struct netconfig *nconf, struct netbuf *mask)
1850 {
1851 	struct t_info info;
1852 
1853 	/*
1854 	 * Find the size of the address we need to mask.
1855 	 */
1856 	if (t_getinfo(fd, &info) < 0) {
1857 		t_error("t_getinfo");
1858 		return (-1);
1859 	}
1860 	mask->len = mask->maxlen = info.addr;
1861 	if (info.addr <= 0) {
1862 		syslog(LOG_ERR, "set_addrmask: address size: %ld", info.addr);
1863 		return (-1);
1864 	}
1865 
1866 	mask->buf = (char *)malloc(mask->len);
1867 	if (mask->buf == NULL) {
1868 		syslog(LOG_ERR, "set_addrmask: no memory");
1869 		return (-1);
1870 	}
1871 	(void) memset(mask->buf, 0, mask->len);	/* reset all mask bits */
1872 
1873 	if (strcmp(nconf->nc_protofmly, NC_INET) == 0) {
1874 		/*
1875 		 * Set the mask so that the port is ignored.
1876 		 */
1877 		/* LINTED pointer alignment */
1878 		((struct sockaddr_in *)mask->buf)->sin_addr.s_addr =
1879 		    (in_addr_t)~0;
1880 		/* LINTED pointer alignment */
1881 		((struct sockaddr_in *)mask->buf)->sin_family = (sa_family_t)~0;
1882 	}
1883 #ifdef NC_INET6
1884 	else if (strcmp(nconf->nc_protofmly, NC_INET6) == 0) {
1885 		/* LINTED pointer alignment */
1886 		(void) memset(&((struct sockaddr_in6 *)mask->buf)->sin6_addr,
1887 		    (uchar_t)~0, sizeof (struct in6_addr));
1888 		/* LINTED pointer alignment */
1889 		((struct sockaddr_in6 *)mask->buf)->sin6_family =
1890 		    (sa_family_t)~0;
1891 	}
1892 #endif
1893 	else {
1894 		/*
1895 		 * Set all mask bits.
1896 		 */
1897 		(void) memset(mask->buf, (uchar_t)~0, mask->len);
1898 	}
1899 	return (0);
1900 }
1901 
1902 #if !defined(_SunOS_5_6) && !defined(_SunOS_5_7) && !defined(_SunOS_5_8)
1903 
1904 static int
1905 sndrsvcpool(int maxservers)
1906 {
1907 	struct svcpool_args npa;
1908 
1909 	npa.id = RDC_SVCPOOL_ID;
1910 	npa.maxthreads = maxservers;
1911 	npa.redline = 0;
1912 	npa.qsize = 0;
1913 	npa.timeout = 0;
1914 	npa.stksize = 0;
1915 	npa.max_same_xprt = 0;
1916 	return (sndrsys(RDC_POOL_CREATE, &npa));
1917 }
1918 
1919 
1920 /*
1921  * The following stolen from cmd/fs.d/nfs/lib/thrpool.c
1922  */
1923 
1924 #include <thread.h>
1925 
1926 /*
1927  * Thread to call into the kernel and do work on behalf of SNDR/ncall-ip.
1928  */
1929 static void *
1930 svcstart(void *arg)
1931 {
1932 	int id = (int)arg;
1933 	int err;
1934 
1935 	while ((err = sndrsys(RDC_POOL_RUN, &id)) != 0) {
1936 		/*
1937 		 * Interrupted by a signal while in the kernel.
1938 		 * this process is still alive, try again.
1939 		 */
1940 		if (err == EINTR)
1941 			continue;
1942 		else
1943 			break;
1944 	}
1945 
1946 	/*
1947 	 * If we weren't interrupted by a signal, but did
1948 	 * return from the kernel, this thread's work is done,
1949 	 * and it should exit.
1950 	 */
1951 	thr_exit(NULL);
1952 	return (NULL);
1953 }
1954 
1955 /*
1956  * User-space "creator" thread. This thread blocks in the kernel
1957  * until new worker threads need to be created for the service
1958  * pool. On return to userspace, if there is no error, create a
1959  * new thread for the service pool.
1960  */
1961 static void *
1962 svcblock(void *arg)
1963 {
1964 	int id = (int)arg;
1965 
1966 	/* CONSTCOND */
1967 	while (1) {
1968 		thread_t tid;
1969 		int err;
1970 
1971 		/*
1972 		 * Call into the kernel, and hang out there
1973 		 * until a thread needs to be created.
1974 		 */
1975 		if (err = sndrsys(RDC_POOL_WAIT, &id)) {
1976 			if (err == ECANCELED || err == EBUSY)
1977 				/*
1978 				 * If we get back ECANCELED, the service
1979 				 * pool is exiting, and we may as well
1980 				 * clean up this thread. If EBUSY is
1981 				 * returned, there's already a thread
1982 				 * looping on this pool, so we should
1983 				 * give up.
1984 				 */
1985 				break;
1986 			else
1987 				continue;
1988 		}
1989 
1990 		(void) thr_create(NULL, NULL, svcstart, (void *)id,
1991 		    THR_BOUND | THR_DETACHED, &tid);
1992 	}
1993 
1994 	thr_exit(NULL);
1995 	return (NULL);
1996 }
1997 
1998 static int
1999 svcwait(int id)
2000 {
2001 	thread_t tid;
2002 
2003 	/*
2004 	 * Create a bound thread to wait for kernel LWPs that
2005 	 * need to be created.
2006 	 */
2007 	if (thr_create(NULL, NULL, svcblock, (void *)id,
2008 	    THR_BOUND | THR_DETACHED, &tid))
2009 		return (1);
2010 
2011 	return (0);
2012 }
2013 #endif /* Solaris 9+ */
2014