xref: /titanic_44/usr/src/cmd/avs/rdc/sndrd.c (revision 49b225e1cfa7bbf7738d4df0a03f18e3283426eb)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 /*
27  * Network SNDR/ncall-ip server - based on nfsd
28  */
29 #include <sys/types.h>
30 #include <rpc/types.h>
31 #include <errno.h>
32 #include <netdb.h>
33 #include <sys/socket.h>
34 #include <netconfig.h>
35 #include <stropts.h>
36 #include <fcntl.h>
37 #include <stdio.h>
38 #include <strings.h>
39 #include <signal.h>
40 #include <unistd.h>
41 #include <stdlib.h>
42 #include <netdir.h>
43 #include <rpc/rpc_com.h>
44 #include <rpc/rpc.h>
45 #include <tiuser.h>
46 #include <netinet/tcp.h>
47 #include <netinet/in.h>
48 #include <syslog.h>
49 #include <locale.h>
50 #include <langinfo.h>
51 #include <libintl.h>
52 #include <libgen.h>
53 #include <deflt.h>
54 #include <sys/resource.h>
55 
56 #include <sys/nsctl/nsctl.h>
57 
58 #ifdef	__NCALL__
59 
60 #include <sys/ncall/ncall.h>
61 #include <sys/ncall/ncall_ip.h>
62 #include <sys/nsctl/libncall.h>
63 
64 #define	RDC_POOL_CREATE	NC_IOC_POOL_CREATE
65 #define	RDC_POOL_RUN	NC_IOC_POOL_RUN
66 #define	RDC_POOL_WAIT	NC_IOC_POOL_WAIT
67 #define	RDC_PROGRAM	NCALL_PROGRAM
68 #define	RDC_SERVICE	"ncall"
69 #undef RDC_SVCPOOL_ID	/* We are overloading this value */
70 #define	RDC_SVCPOOL_ID	NCALL_SVCPOOL_ID
71 #define	RDC_SVC_NAME	"NCALL"
72 #define	RDC_VERS_MIN	NCALL_VERS_MIN
73 #define	RDC_VERS_MAX	NCALL_VERS_MAX
74 
75 #else	/* !__NCALL__ */
76 
77 #include <sys/nsctl/rdc_ioctl.h>
78 #include <sys/nsctl/rdc_io.h>
79 #include <sys/nsctl/librdc.h>
80 
81 #define	RDC_SERVICE	"rdc"
82 #define	RDC_SVC_NAME	"RDC"
83 
84 #endif	/* __NCALL__ */
85 
86 #define	RDCADMIN	"/etc/default/sndr"
87 
88 #include <nsctl.h>
89 
90 struct conn_ind {
91 	struct conn_ind *conn_next;
92 	struct conn_ind *conn_prev;
93 	struct t_call   *conn_call;
94 };
95 
96 struct conn_entry {
97 	bool_t			closing;
98 	struct netconfig	nc;
99 };
100 
101 static char *progname;
102 static struct conn_entry *conn_polled;
103 static int num_conns;			/* Current number of connections */
104 static struct pollfd *poll_array;	/* array of poll descriptors for poll */
105 static size_t num_fds = 0;		/* number of transport fds opened */
106 static void poll_for_action();
107 static void remove_from_poll_list(int);
108 static int do_poll_cots_action(int, int);
109 static int do_poll_clts_action(int, int);
110 static void add_to_poll_list(int, struct netconfig *);
111 static int bind_to_provider(char *, char *, struct netbuf **,
112     struct netconfig **);
113 static int set_addrmask(int, struct netconfig *, struct netbuf *);
114 static void conn_close_oldest(void);
115 static boolean_t conn_get(int, struct netconfig *, struct conn_ind **);
116 static void cots_listen_event(int, int);
117 static int discon_get(int, struct netconfig *, struct conn_ind **);
118 static int nofile_increase(int);
119 static int is_listen_fd_index(int);
120 #if !defined(_SunOS_5_6) && !defined(_SunOS_5_7) && !defined(_SunOS_5_8)
121 static int sndrsvcpool(int);
122 static int svcwait(int id);
123 #endif
124 
125 
126 /*
127  * RPC protocol block.  Useful for passing registration information.
128  */
129 struct protob {
130 	char *serv;		/* ASCII service name, e.g. "RDC" */
131 	int versmin;		/* minimum version no. to be registered */
132 	int versmax;		/* maximum version no. to be registered */
133 	int program;		/* program no. to be registered */
134 	struct protob *next;	/* next entry on list */
135 };
136 
137 
138 
139 static size_t end_listen_fds;
140 static int debugflg = 0;
141 static int max_conns_allowed = -1;
142 static int listen_backlog = 10;
143 static char *trans_provider = (char *)NULL;
144 static int rdcsvc(int, struct netbuf, struct netconfig *);
145 
146 /* used by cots_listen_event() */
147 static int (*Mysvc)(int, struct netbuf, struct netconfig *) = rdcsvc;
148 
149 /*
150  * Determine valid semantics for rdc.
151  */
152 #define	OK_TPI_TYPE(_nconf)	\
153 	(_nconf->nc_semantics == NC_TPI_CLTS || \
154 	_nconf->nc_semantics == NC_TPI_COTS || \
155 	_nconf->nc_semantics == NC_TPI_COTS_ORD)
156 
157 #define	BE32_TO_U32(a)		\
158 	((((uint32_t)((uchar_t *)a)[0] & 0xFF) << (uint32_t)24) |\
159 	(((uint32_t)((uchar_t *)a)[1] & 0xFF) << (uint32_t)16) |\
160 	(((uint32_t)((uchar_t *)a)[2] & 0xFF) << (uint32_t)8)  |\
161 	((uint32_t)((uchar_t *)a)[3] & 0xFF))
162 
163 #ifdef DEBUG
164 /*
165  * Only support UDP in DEBUG mode for now
166  */
167 static	char *defaultproviders[] = { "/dev/tcp", "/dev/tcp6", "/dev/udp",
168 		"/dev/udp6", NULL };
169 #else
170 static	char *defaultproviders[] = { "/dev/tcp6", "/dev/tcp", NULL };
171 #endif
172 
173 /*
174  * Number of elements to add to the poll array on each allocation.
175  */
176 #define	POLL_ARRAY_INC_SIZE	64
177 #define	NOFILE_INC_SIZE		64
178 
179 #ifdef	__NCALL__
180 const char *rdc_devr = "/dev/ncallip";
181 #else
182 const char *rdc_devr = "/dev/rdc";
183 #endif
184 
185 static int rdc_fdr;
186 static int
187 
188 open_rdc(void)
189 {
190 	int fd = open(rdc_devr, O_RDONLY);
191 
192 	if (fd < 0)
193 		return (-1);
194 
195 	return (rdc_fdr = fd);
196 }
197 
198 static int
199 sndrsys(int type, void *arg)
200 {
201 	int ret = -1;
202 	if (!rdc_fdr && open_rdc() < 0) { /* open failed */
203 		syslog(LOG_ERR, "open_rdc() failed: %m\n");
204 	} else {
205 		if ((ret = ioctl(rdc_fdr, type, arg)) < 0) {
206 			syslog(LOG_ERR, "ioctl(rdc_ioctl) failed: %m\n");
207 		}
208 	}
209 	return (ret);
210 }
211 
212 int
213 rdc_transport_open(struct netconfig *nconf)
214 {
215 	int fd;
216 	struct strioctl	strioc;
217 
218 	if ((nconf == (struct netconfig *)NULL) ||
219 	    (nconf->nc_device == (char *)NULL)) {
220 		syslog(LOG_ERR, "No netconfig device");
221 		return (-1);
222 	}
223 
224 	/*
225 	 * Open the transport device.
226 	 */
227 	fd = t_open(nconf->nc_device, O_RDWR, (struct t_info *)NULL);
228 	if (fd == -1)  {
229 		if (t_errno == TSYSERR && errno == EMFILE &&
230 				(nofile_increase(0) == 0)) {
231 			/* Try again with a higher NOFILE limit. */
232 			fd = t_open(nconf->nc_device, O_RDWR,
233 				(struct t_info *)NULL);
234 		}
235 		if (fd == -1) {
236 			if (t_errno == TSYSERR) {
237 				syslog(LOG_ERR, "t_open failed: %m");
238 			} else {
239 				syslog(LOG_ERR, "t_open failed: %s",
240 				    t_errlist[t_errno]);
241 			}
242 			return (-1);
243 		}
244 	}
245 
246 	/*
247 	 * Pop timod because the RPC module must be as close as possible
248 	 * to the transport.
249 	 */
250 	if (ioctl(fd, I_POP, 0) < 0) {
251 		syslog(LOG_ERR, "I_POP of timod failed: %m");
252 		if (t_close(fd) == -1) {
253 			if (t_errno == TSYSERR) {
254 				syslog(LOG_ERR, "t_close failed on %d: %m", fd);
255 			} else {
256 				syslog(LOG_ERR, "t_close failed on %d: %s",
257 				    fd, t_errlist[t_errno]);
258 			}
259 		}
260 		return (-1);
261 	}
262 
263 	if (nconf->nc_semantics == NC_TPI_CLTS) {
264 		/*
265 		 * Push rpcmod to filter data traffic to KRPC.
266 		 */
267 		if (ioctl(fd, I_PUSH, "rpcmod") < 0) {
268 			syslog(LOG_ERR, "I_PUSH of rpcmod failed: %m");
269 			(void) t_close(fd);
270 			return (-1);
271 		}
272 	} else {
273 		if (ioctl(fd, I_PUSH, "rpcmod") < 0) {
274 			syslog(LOG_ERR, "I_PUSH of CONS rpcmod failed: %m");
275 			if (t_close(fd) == -1) {
276 				if (t_errno == TSYSERR) {
277 					syslog(LOG_ERR,
278 						"t_close failed on %d: %m", fd);
279 				} else {
280 					syslog(LOG_ERR,
281 						"t_close failed on %d: %s",
282 						fd, t_errlist[t_errno]);
283 				}
284 			}
285 			return (-1);
286 		}
287 
288 		strioc.ic_cmd = RPC_SERVER;
289 		strioc.ic_dp = (char *)0;
290 		strioc.ic_len = 0;
291 		strioc.ic_timout = -1;
292 		/* Tell CONS rpcmod to act like a server stream. */
293 		if (ioctl(fd, I_STR, &strioc) < 0) {
294 			syslog(LOG_ERR, "CONS rpcmod set-up ioctl failed: %m");
295 			if (t_close(fd) == -1) {
296 				if (t_errno == TSYSERR) {
297 					syslog(LOG_ERR,
298 						"t_close failed on %d: %m", fd);
299 				} else {
300 					syslog(LOG_ERR,
301 						"t_close failed on %d: %s",
302 						fd, t_errlist[t_errno]);
303 				}
304 			}
305 			return (-1);
306 		}
307 	}
308 
309 	/*
310 	 * Re-push timod so that we will still be doing TLI
311 	 * operations on the descriptor.
312 	 */
313 	if (ioctl(fd, I_PUSH, "timod") < 0) {
314 		syslog(LOG_ERR, "I_PUSH of timod failed: %m");
315 		if (t_close(fd) == -1) {
316 			if (t_errno == TSYSERR) {
317 				syslog(LOG_ERR, "t_close failed on %d: %m", fd);
318 			} else {
319 				syslog(LOG_ERR, "t_close failed on %d: %s",
320 				    fd, t_errlist[t_errno]);
321 			}
322 		}
323 		return (-1);
324 	}
325 
326 	return (fd);
327 }
328 
329 
330 void
331 rdcd_log_tli_error(char *tli_name, int fd, struct netconfig *nconf)
332 {
333 	int error;
334 
335 	/*
336 	 * Save the error code across syslog(), just in case syslog()
337 	 * gets its own error and, therefore, overwrites errno.
338 	 */
339 	error = errno;
340 	if (t_errno == TSYSERR) {
341 		syslog(LOG_ERR, "%s(file descriptor %d/transport %s) %m",
342 		    tli_name, fd, nconf->nc_proto);
343 	} else {
344 		syslog(LOG_ERR,
345 		    "%s(file descriptor %d/transport %s) TLI error %d",
346 		    tli_name, fd, nconf->nc_proto, t_errno);
347 	}
348 	errno = error;
349 }
350 
351 /*
352  * Called to set up service over a particular transport
353  */
354 void
355 do_one(char *provider, char *proto, struct protob *protobp0,
356 	int (*svc)(int, struct netbuf, struct netconfig *))
357 {
358 	struct netbuf *retaddr;
359 	struct netconfig *retnconf;
360 	struct netbuf addrmask;
361 	int vers;
362 	int sock;
363 
364 	if (provider) {
365 		sock = bind_to_provider(provider, protobp0->serv, &retaddr,
366 		    &retnconf);
367 	} else {
368 		(void) syslog(LOG_ERR,
369 	"Cannot establish %s service over %s: transport setup problem.",
370 		    protobp0->serv, provider ? provider : proto);
371 		return;
372 	}
373 
374 	if (sock == -1) {
375 		if ((Is_ipv6present() &&
376 		(strcmp(provider, "/dev/tcp6") == 0)) ||
377 		(!Is_ipv6present() && (strcmp(provider, "/dev/tcp") == 0)))
378 			(void) syslog(LOG_ERR,
379 			    "Cannot establish %s service over %s: transport "
380 				"setup problem.",
381 				protobp0->serv, provider ? provider : proto);
382 		return;
383 	}
384 
385 	if (set_addrmask(sock, retnconf, &addrmask) < 0) {
386 		(void) syslog(LOG_ERR,
387 		    "Cannot set address mask for %s", retnconf->nc_netid);
388 		return;
389 	}
390 
391 
392 	/*
393 	 * Register all versions of the programs in the protocol block list
394 	 */
395 	for (vers = protobp0->versmin; vers <= protobp0->versmax; vers++) {
396 		(void) rpcb_unset(protobp0->program, vers, retnconf);
397 		(void) rpcb_set(protobp0->program, vers, retnconf, retaddr);
398 	}
399 
400 	if (retnconf->nc_semantics == NC_TPI_CLTS) {
401 		/* Don't drop core if supporting module(s) aren't loaded. */
402 		(void) signal(SIGSYS, SIG_IGN);
403 
404 		/*
405 		 * svc() doesn't block, it returns success or failure.
406 		 */
407 		if ((*svc)(sock, addrmask, retnconf) < 0) {
408 			(void) syslog(LOG_ERR,
409 "Cannot establish %s service over <file desc. %d, protocol %s> : %m. Exiting",
410 				protobp0->serv, sock, retnconf->nc_proto);
411 			exit(1);
412 		}
413 	}
414 	/*
415 	 * We successfully set up the server over this transport.
416 	 * Add this descriptor to the one being polled on.
417 	 */
418 	add_to_poll_list(sock, retnconf);
419 }
420 
421 /*
422  * Set up the SNDR/ncall-ip service over all the available transports.
423  * Returns -1 for failure, 0 for success.
424  */
425 int
426 do_all(struct protob *protobp,
427 	int (*svc)(int, struct netbuf, struct netconfig *))
428 {
429 	struct netconfig *nconf;
430 	NCONF_HANDLE *nc;
431 
432 	if ((nc = setnetconfig()) == (NCONF_HANDLE *)NULL) {
433 		syslog(LOG_ERR, "setnetconfig failed: %m");
434 		return (-1);
435 	}
436 	while (nconf = getnetconfig(nc)) {
437 		if ((nconf->nc_flag & NC_VISIBLE) &&
438 		    strcmp(nconf->nc_protofmly, "loopback") != 0 &&
439 		    OK_TPI_TYPE(nconf))
440 			do_one(nconf->nc_device, nconf->nc_proto,
441 				protobp, svc);
442 	}
443 	(void) endnetconfig(nc);
444 	return (0);
445 }
446 
447 /*
448  * Read the /etc/default/sndr configuration file to determine if the
449  * client has been configured for number of threads, backlog or transport
450  * provider.
451  */
452 
453 static void
454 read_default(void)
455 {
456 	char *defval, *tmp_str;
457 	int errno;
458 	int tmp;
459 
460 	/* Fail silently if error in opening the default rdc config file */
461 	if ((defopen(RDCADMIN)) == 0) {
462 		if ((defval = defread("SNDR_THREADS=")) != NULL) {
463 			errno = 0;
464 			tmp = strtol(defval, (char **)NULL, 10);
465 			if (errno == 0) {
466 				max_conns_allowed = tmp;
467 			}
468 		}
469 		if ((defval = defread("SNDR_LISTEN_BACKLOG=")) != NULL) {
470 			errno = 0;
471 			tmp = strtol(defval, (char **)NULL, 10);
472 			if (errno == 0) {
473 				listen_backlog = tmp;
474 			}
475 		}
476 		if ((defval = defread("SNDR_TRANSPORT=")) != NULL) {
477 			errno = 0;
478 			tmp_str = strdup(defval);
479 			if (errno == 0) {
480 				trans_provider = tmp_str;
481 			}
482 		}
483 		/* close defaults file */
484 		defopen(NULL);
485 	}
486 }
487 #ifdef lint
488 int
489 sndrd_lintmain(int ac, char **av)
490 #else
491 int
492 main(int ac, char **av)
493 #endif
494 {
495 	const char *dir = "/";
496 	int allflag = 0;
497 	int pid;
498 	int i, rc;
499 	struct protob *protobp0, *protobp;
500 	char **providerp;
501 	char *required;
502 #if !defined(_SunOS_5_6) && !defined(_SunOS_5_7) && !defined(_SunOS_5_8)
503 	int maxservers;
504 #endif
505 
506 	(void) setlocale(LC_ALL, "");
507 #ifdef	__NCALL__
508 	(void) textdomain("ncall");
509 #else
510 	(void) textdomain("rdc");
511 #endif
512 
513 	progname = basename(av[0]);
514 
515 #ifdef	__NCALL__
516 	rc = ncall_check_release(&required);
517 #else
518 	rc = rdc_check_release(&required);
519 #endif
520 	if (rc < 0) {
521 		(void) fprintf(stderr,
522 		    gettext("%s: unable to determine the current "
523 		    "Solaris release: %s\n"), progname, strerror(errno));
524 		exit(1);
525 	} else if (rc == FALSE) {
526 		(void) fprintf(stderr,
527 		    gettext("%s: incorrect Solaris release (requires %s)\n"),
528 		    progname, required);
529 		exit(1);
530 	}
531 
532 	openlog(progname, LOG_PID|LOG_CONS, LOG_DAEMON);
533 	read_default();
534 
535 	/*
536 	 * Usage: <progname> [-c <number of threads>] [-t protocol] \
537 	 *		[-d] [-l <listen backlog>]
538 	 */
539 	while ((i = getopt(ac, av, "ac:t:dl:")) != EOF) {
540 		switch (i) {
541 			case 'a':
542 				allflag = 1;
543 				break;
544 			case 'c':
545 				max_conns_allowed = atoi(optarg);
546 				if (max_conns_allowed <= 0)
547 					max_conns_allowed = 16;
548 				break;
549 
550 			case 'd':
551 				debugflg++;
552 				break;
553 
554 			case 't':
555 				trans_provider = optarg;
556 				break;
557 
558 			case 'l':
559 				listen_backlog = atoi(optarg);
560 				if (listen_backlog < 0)
561 					listen_backlog = 32;
562 				break;
563 
564 			default:
565 				syslog(LOG_ERR,
566 				    "Usage: %s [-c <number of threads>] "
567 				    "[-d] [-t protocol] "
568 				    "[-l <listen backlog>]\n", progname);
569 				exit(1);
570 				break;
571 		}
572 	}
573 
574 	if (chroot(dir) < 0) {
575 		syslog(LOG_ERR, "chroot failed: %m");
576 		exit(1);
577 	}
578 
579 	if (chdir(dir) < 0) {
580 		syslog(LOG_ERR, "chdir failed: %m");
581 		exit(1);
582 	}
583 
584 	if (!debugflg) {
585 		pid = fork();
586 		if (pid < 0) {
587 			syslog(LOG_ERR, "Fork failed\n");
588 			exit(1);
589 		}
590 		if (pid != 0)
591 			exit(0);
592 
593 		/*
594 		 * Close existing file descriptors, open "/dev/null" as
595 		 * standard input, output, and error, and detach from
596 		 * controlling terminal.
597 		 */
598 #if !defined(_SunOS_5_6) && !defined(_SunOS_5_7) && !defined(_SunOS_5_8)
599 		/* use closefrom(3C) from PSARC/2000/193 when possible */
600 		closefrom(0);
601 #else
602 		for (i = 0; i < _NFILE; i++)
603 			(void) close(i);
604 #endif
605 		(void) open("/dev/null", O_RDONLY);
606 		(void) open("/dev/null", O_WRONLY);
607 		(void) dup(1);
608 		(void) setsid();
609 
610 		/*
611 		 * ignore all signals apart from SIGTERM.
612 		 */
613 		for (i = 1; i < _sys_nsig; i++)
614 			(void) sigset(i, SIG_IGN);
615 
616 		(void) sigset(SIGTERM, SIG_DFL);
617 	}
618 
619 #if !defined(_SunOS_5_6) && !defined(_SunOS_5_7) && !defined(_SunOS_5_8)
620 	/*
621 	 * Set up kernel RPC thread pool for the SNDR/ncall-ip server.
622 	 */
623 	maxservers = (max_conns_allowed < 0 ? 16 : max_conns_allowed);
624 	if (sndrsvcpool(maxservers)) {
625 		(void) syslog(LOG_ERR,
626 		    "Can't set up kernel %s service: %m. Exiting", progname);
627 		exit(1);
628 	}
629 
630 	/*
631 	 * Set up blocked thread to do LWP creation on behalf of the kernel.
632 	 */
633 	if (svcwait(RDC_SVCPOOL_ID)) {
634 		(void) syslog(LOG_ERR,
635 		    "Can't set up %s pool creator: %m, Exiting", progname);
636 		exit(1);
637 	}
638 #endif
639 
640 	/*
641 	 * Build a protocol block list for registration.
642 	 */
643 	protobp0 = protobp = (struct protob *)malloc(sizeof (struct protob));
644 	protobp->serv = RDC_SVC_NAME;
645 	protobp->versmin = RDC_VERS_MIN;
646 	protobp->versmax = RDC_VERS_MAX;
647 	protobp->program = RDC_PROGRAM;
648 	protobp->next = (struct protob *)NULL;
649 
650 	if (allflag) {
651 		if (do_all(protobp0, rdcsvc) == -1)
652 			exit(1);
653 	} else if (trans_provider)
654 		do_one(trans_provider, NULL, protobp0, rdcsvc);
655 	else {
656 		for (providerp = defaultproviders;
657 		    *providerp != NULL; providerp++) {
658 			trans_provider = *providerp;
659 			do_one(trans_provider, NULL, protobp0, rdcsvc);
660 		}
661 	}
662 
663 done:
664 	free(protobp);
665 
666 	end_listen_fds = num_fds;
667 	/*
668 	 * Poll for non-data control events on the transport descriptors.
669 	 */
670 	poll_for_action();
671 
672 	syslog(LOG_ERR, "%s fatal server error\n", progname);
673 
674 	return (-1);
675 }
676 
677 static int
678 reuseaddr(int fd)
679 {
680 	struct t_optmgmt req, resp;
681 	struct opthdr *opt;
682 	char reqbuf[128];
683 	int *ip;
684 
685 	/* LINTED pointer alignment */
686 	opt = (struct opthdr *)reqbuf;
687 	opt->level = SOL_SOCKET;
688 	opt->name = SO_REUSEADDR;
689 	opt->len = sizeof (int);
690 
691 	/* LINTED pointer alignment */
692 	ip = (int *)&reqbuf[sizeof (struct opthdr)];
693 	*ip = 1;
694 
695 	req.flags = T_NEGOTIATE;
696 	req.opt.len = sizeof (struct opthdr) + opt->len;
697 	req.opt.buf = (char *)opt;
698 
699 	resp.flags = 0;
700 	resp.opt.buf = reqbuf;
701 	resp.opt.maxlen = sizeof (reqbuf);
702 
703 	if (t_optmgmt(fd, &req, &resp) < 0 || resp.flags != T_SUCCESS) {
704 		if (t_errno == TSYSERR) {
705 			syslog(LOG_ERR, "reuseaddr() t_optmgmt failed: %m\n");
706 		} else {
707 			syslog(LOG_ERR, "reuseaddr() t_optmgmt failed: %s\n",
708 			    t_errlist[t_errno]);
709 		}
710 		return (-1);
711 	}
712 	return (0);
713 }
714 
715 /*
716  * poll on the open transport descriptors for events and errors.
717  */
718 void
719 poll_for_action(void)
720 {
721 	int nfds;
722 	int i;
723 
724 	/*
725 	 * Keep polling until all transports have been closed. When this
726 	 * happens, we return.
727 	 */
728 	while ((int)num_fds > 0) {
729 		nfds = poll(poll_array, num_fds, INFTIM);
730 		switch (nfds) {
731 		case 0:
732 			continue;
733 
734 		case -1:
735 			/*
736 			 * Some errors from poll could be
737 			 * due to temporary conditions, and we try to
738 			 * be robust in the face of them. Other
739 			 * errors (should never happen in theory)
740 			 * are fatal (eg. EINVAL, EFAULT).
741 			 */
742 			switch (errno) {
743 			case EINTR:
744 			    continue;
745 
746 			case EAGAIN:
747 			case ENOMEM:
748 				(void) sleep(10);
749 				continue;
750 
751 			default:
752 				(void) syslog(LOG_ERR,
753 				    "poll failed: %m. Exiting");
754 				exit(1);
755 			}
756 		default:
757 			break;
758 		}
759 
760 		/*
761 		 * Go through the poll list looking for events.
762 		 */
763 		for (i = 0; i < num_fds && nfds > 0; i++) {
764 			if (poll_array[i].revents) {
765 				nfds--;
766 				/*
767 				 * We have a message, so try to read it.
768 				 * Record the error return in errno,
769 				 * so that syslog(LOG_ERR, "...%m")
770 				 * dumps the corresponding error string.
771 				 */
772 				if (conn_polled[i].nc.nc_semantics ==
773 				    NC_TPI_CLTS) {
774 					errno = do_poll_clts_action(
775 					    poll_array[i].fd, i);
776 				} else {
777 					errno = do_poll_cots_action(
778 					    poll_array[i].fd, i);
779 				}
780 
781 				if (errno == 0)
782 					continue;
783 				/*
784 				 * Most returned error codes mean that there is
785 				 * fatal condition which we can only deal with
786 				 * by closing the transport.
787 				 */
788 				if (errno != EAGAIN && errno != ENOMEM) {
789 					(void) syslog(LOG_ERR,
790 					    "Error (%m) reading descriptor %d"
791 					    "/transport %s. Closing it.",
792 					    poll_array[i].fd,
793 					    conn_polled[i].nc.nc_proto);
794 					(void) t_close(poll_array[i].fd);
795 					remove_from_poll_list(poll_array[i].fd);
796 				} else if (errno == ENOMEM)
797 					(void) sleep(5);
798 			}
799 		}
800 	}
801 
802 	(void) syslog(LOG_ERR,
803 	    "All transports have been closed with errors. Exiting.");
804 }
805 
806 /*
807  * Allocate poll/transport array entries for this descriptor.
808  */
809 static void
810 add_to_poll_list(int fd, struct netconfig *nconf)
811 {
812 	static int poll_array_size = 0;
813 
814 	/*
815 	 * If the arrays are full, allocate new ones.
816 	 */
817 	if (num_fds == poll_array_size) {
818 		struct pollfd *tpa;
819 		struct conn_entry *tnp;
820 
821 		if (poll_array_size != 0) {
822 			tpa = poll_array;
823 			tnp = conn_polled;
824 		} else
825 			tpa = (struct pollfd *)0;
826 
827 		poll_array_size += POLL_ARRAY_INC_SIZE;
828 
829 		/*
830 		 * Allocate new arrays.
831 		 */
832 		poll_array = (struct pollfd *)
833 		    malloc(poll_array_size * sizeof (struct pollfd) + 256);
834 		conn_polled = (struct conn_entry *)
835 		    malloc(poll_array_size * sizeof (struct conn_entry) + 256);
836 		if (poll_array == (struct pollfd *)NULL ||
837 		    conn_polled == (struct conn_entry *)NULL) {
838 			syslog(LOG_ERR, "malloc failed for poll array");
839 			exit(1);
840 		}
841 
842 		/*
843 		 * Copy the data of the old ones into new arrays, and
844 		 * free the old ones.
845 		 * num_fds is guaranteed to be less than
846 		 * poll_array_size, so this memcpy is safe.
847 		 */
848 		if (tpa) {
849 			(void) memcpy((void *)poll_array, (void *)tpa,
850 				num_fds * sizeof (struct pollfd));
851 			(void) memcpy((void *)conn_polled, (void *)tnp,
852 				num_fds * sizeof (struct conn_entry));
853 			free((void *)tpa);
854 			free((void *)tnp);
855 		}
856 	}
857 
858 	/*
859 	 * Set the descriptor and event list. All possible events are
860 	 * polled for.
861 	 */
862 	poll_array[num_fds].fd = fd;
863 	poll_array[num_fds].events = POLLIN|POLLRDNORM|POLLRDBAND|POLLPRI;
864 
865 	/*
866 	 * Copy the transport data over too.
867 	 */
868 	conn_polled[num_fds].nc = *nconf;	/* structure copy */
869 	conn_polled[num_fds].closing = 0;
870 
871 	/*
872 	 * Set the descriptor to non-blocking. Avoids a race
873 	 * between data arriving on the stream and then having it
874 	 * flushed before we can read it.
875 	 */
876 	if (fcntl(fd, F_SETFL, O_NONBLOCK) == -1) {
877 		(void) syslog(LOG_ERR,
878 		    "fcntl(file desc. %d/transport %s, F_SETFL, "
879 		    "O_NONBLOCK): %m. Exiting",
880 		    num_fds, nconf->nc_proto);
881 		exit(1);
882 	}
883 
884 	/*
885 	 * Count this descriptor.
886 	 */
887 	++num_fds;
888 }
889 
890 static void
891 remove_from_poll_list(int fd)
892 {
893 	int i;
894 	int num_to_copy;
895 
896 	for (i = 0; i < num_fds; i++) {
897 		if (poll_array[i].fd == fd) {
898 			--num_fds;
899 			num_to_copy = num_fds - i;
900 			(void) memcpy((void *)&poll_array[i],
901 			    (void *)&poll_array[i+1],
902 			    num_to_copy * sizeof (struct pollfd));
903 			(void) memset((void *)&poll_array[num_fds], 0,
904 			    sizeof (struct pollfd));
905 			(void) memcpy((void *)&conn_polled[i],
906 			    (void *)&conn_polled[i+1],
907 			    num_to_copy * sizeof (struct conn_entry));
908 			(void) memset((void *)&conn_polled[num_fds], 0,
909 			    sizeof (struct conn_entry));
910 			return;
911 		}
912 	}
913 	syslog(LOG_ERR, "attempt to remove nonexistent fd from poll list");
914 
915 }
916 
917 static void
918 conn_close_oldest(void)
919 {
920 	int fd;
921 	int i1;
922 
923 	/*
924 	 * Find the oldest connection that is not already in the
925 	 * process of shutting down.
926 	 */
927 	for (i1 = end_listen_fds; /* no conditional expression */; i1++) {
928 		if (i1 >= num_fds)
929 			return;
930 		if (conn_polled[i1].closing == 0)
931 			break;
932 	}
933 #ifdef DEBUG
934 	(void) printf("too many connections (%d), releasing oldest (%d)\n",
935 	    num_conns, poll_array[i1].fd);
936 #else
937 	syslog(LOG_WARNING, "too many connections (%d), releasing oldest (%d)",
938 	    num_conns, poll_array[i1].fd);
939 #endif
940 	fd = poll_array[i1].fd;
941 	if (conn_polled[i1].nc.nc_semantics == NC_TPI_COTS) {
942 		/*
943 		 * For politeness, send a T_DISCON_REQ to the transport
944 		 * provider.  We close the stream anyway.
945 		 */
946 		(void) t_snddis(fd, (struct t_call *)0);
947 		num_conns--;
948 		remove_from_poll_list(fd);
949 		(void) t_close(fd);
950 	} else {
951 		/*
952 		 * For orderly release, we do not close the stream
953 		 * until the T_ORDREL_IND arrives to complete
954 		 * the handshake.
955 		 */
956 		if (t_sndrel(fd) == 0)
957 			conn_polled[i1].closing = 1;
958 	}
959 }
960 
961 static boolean_t
962 conn_get(int fd, struct netconfig *nconf, struct conn_ind **connp)
963 {
964 	struct conn_ind	*conn;
965 	struct conn_ind	*next_conn;
966 
967 	conn = (struct conn_ind *)malloc(sizeof (*conn));
968 	if (conn == NULL) {
969 		syslog(LOG_ERR, "malloc for listen indication failed");
970 		return (FALSE);
971 	}
972 
973 	/* LINTED pointer alignment */
974 	conn->conn_call = (struct t_call *)t_alloc(fd, T_CALL, T_ALL);
975 	if (conn->conn_call == NULL) {
976 		free((char *)conn);
977 		rdcd_log_tli_error("t_alloc", fd, nconf);
978 		return (FALSE);
979 	}
980 
981 	if (t_listen(fd, conn->conn_call) == -1) {
982 		rdcd_log_tli_error("t_listen", fd, nconf);
983 		(void) t_free((char *)conn->conn_call, T_CALL);
984 		free((char *)conn);
985 		return (FALSE);
986 	}
987 
988 	if (conn->conn_call->udata.len > 0) {
989 		syslog(LOG_WARNING,
990 		    "rejecting inbound connection(%s) with %d bytes "
991 		    "of connect data",
992 		    nconf->nc_proto, conn->conn_call->udata.len);
993 
994 		conn->conn_call->udata.len = 0;
995 		(void) t_snddis(fd, conn->conn_call);
996 		(void) t_free((char *)conn->conn_call, T_CALL);
997 		free((char *)conn);
998 		return (FALSE);
999 	}
1000 
1001 	if ((next_conn = *connp) != NULL) {
1002 		next_conn->conn_prev->conn_next = conn;
1003 		conn->conn_next = next_conn;
1004 		conn->conn_prev = next_conn->conn_prev;
1005 		next_conn->conn_prev = conn;
1006 	} else {
1007 		conn->conn_next = conn;
1008 		conn->conn_prev = conn;
1009 		*connp = conn;
1010 	}
1011 	return (TRUE);
1012 }
1013 
1014 static int
1015 discon_get(int fd, struct netconfig *nconf, struct conn_ind **connp)
1016 {
1017 	struct conn_ind	*conn;
1018 	struct t_discon	discon;
1019 
1020 	discon.udata.buf = (char *)0;
1021 	discon.udata.maxlen = 0;
1022 	if (t_rcvdis(fd, &discon) == -1) {
1023 		rdcd_log_tli_error("t_rcvdis", fd, nconf);
1024 		return (-1);
1025 	}
1026 
1027 	conn = *connp;
1028 	if (conn == NULL)
1029 		return (0);
1030 
1031 	do {
1032 		if (conn->conn_call->sequence == discon.sequence) {
1033 			if (conn->conn_next == conn)
1034 				*connp = (struct conn_ind *)0;
1035 			else {
1036 				if (conn == *connp) {
1037 					*connp = conn->conn_next;
1038 				}
1039 				conn->conn_next->conn_prev = conn->conn_prev;
1040 				conn->conn_prev->conn_next = conn->conn_next;
1041 			}
1042 			free((char *)conn);
1043 			break;
1044 		}
1045 		conn = conn->conn_next;
1046 	} while (conn != *connp);
1047 
1048 	return (0);
1049 }
1050 
1051 static void
1052 cots_listen_event(int fd, int conn_index)
1053 {
1054 	struct t_call *call;
1055 	struct conn_ind	*conn;
1056 	struct conn_ind	*conn_head;
1057 	int event;
1058 	struct netconfig *nconf = &conn_polled[conn_index].nc;
1059 	int new_fd;
1060 	struct netbuf addrmask;
1061 	int ret = 0;
1062 
1063 	conn_head = (struct conn_ind *)0;
1064 	(void) conn_get(fd, nconf, &conn_head);
1065 
1066 	while ((conn = conn_head) != NULL) {
1067 		conn_head = conn->conn_next;
1068 		if (conn_head == conn)
1069 			conn_head = (struct conn_ind *)0;
1070 		else {
1071 			conn_head->conn_prev = conn->conn_prev;
1072 			conn->conn_prev->conn_next = conn_head;
1073 		}
1074 		call = conn->conn_call;
1075 		free((char *)conn);
1076 
1077 		/*
1078 		 * If we have already accepted the maximum number of
1079 		 * connections allowed on the command line, then drop
1080 		 * the oldest connection (for any protocol) before
1081 		 * accepting the new connection.  Unless explicitly
1082 		 * set on the command line, max_conns_allowed is -1.
1083 		 */
1084 		if (max_conns_allowed != -1 && num_conns >= max_conns_allowed)
1085 			conn_close_oldest();
1086 
1087 		/*
1088 		 * Create a new transport endpoint for the same proto as
1089 		 * the listener.
1090 		 */
1091 		new_fd = rdc_transport_open(nconf);
1092 		if (new_fd == -1) {
1093 			call->udata.len = 0;
1094 			(void) t_snddis(fd, call);
1095 			(void) t_free((char *)call, T_CALL);
1096 			syslog(LOG_ERR, "Cannot establish transport over %s",
1097 			    nconf->nc_device);
1098 			continue;
1099 		}
1100 
1101 		/* Bind to a generic address/port for the accepting stream. */
1102 		if (t_bind(new_fd, (struct t_bind *)NULL,
1103 		    (struct t_bind *)NULL) == -1) {
1104 			rdcd_log_tli_error("t_bind", new_fd, nconf);
1105 			call->udata.len = 0;
1106 			(void) t_snddis(fd, call);
1107 			(void) t_free((char *)call, T_CALL);
1108 			(void) t_close(new_fd);
1109 			continue;
1110 		}
1111 
1112 		while (t_accept(fd, new_fd, call) == -1) {
1113 			if (t_errno != TLOOK) {
1114 				rdcd_log_tli_error("t_accept", fd, nconf);
1115 				call->udata.len = 0;
1116 				(void) t_snddis(fd, call);
1117 				(void) t_free((char *)call, T_CALL);
1118 				(void) t_close(new_fd);
1119 				goto do_next_conn;
1120 			}
1121 			while (event = t_look(fd)) {
1122 				switch (event) {
1123 				case T_LISTEN:
1124 #ifdef DEBUG
1125 					(void) printf(
1126 "cots_listen_event(%s): T_LISTEN during accept processing\n", nconf->nc_proto);
1127 #endif
1128 					(void) conn_get(fd, nconf, &conn_head);
1129 					continue;
1130 
1131 				case T_DISCONNECT:
1132 #ifdef DEBUG
1133 					(void) printf(
1134 	"cots_listen_event(%s): T_DISCONNECT during accept processing\n",
1135 						nconf->nc_proto);
1136 #endif
1137 					(void) discon_get(fd, nconf,
1138 					    &conn_head);
1139 					continue;
1140 
1141 				default:
1142 					syslog(LOG_ERR,
1143 					    "unexpected event 0x%x during "
1144 					    "accept processing (%s)",
1145 					    event, nconf->nc_proto);
1146 					call->udata.len = 0;
1147 					(void) t_snddis(fd, call);
1148 					(void) t_free((char *)call, T_CALL);
1149 					(void) t_close(new_fd);
1150 					goto do_next_conn;
1151 				}
1152 			}
1153 		}
1154 
1155 		if (set_addrmask(new_fd, nconf, &addrmask) < 0) {
1156 			(void) syslog(LOG_ERR, "Cannot set address mask for %s",
1157 			    nconf->nc_netid);
1158 			return;
1159 		}
1160 
1161 		/* Tell KRPC about the new stream. */
1162 		ret = (*Mysvc)(new_fd, addrmask, nconf);
1163 		if (ret < 0) {
1164 			syslog(LOG_ERR,
1165 			    "unable to register with kernel rpc: %m");
1166 			free(addrmask.buf);
1167 			(void) t_snddis(new_fd, (struct t_call *)0);
1168 			(void) t_free((char *)call, T_CALL);
1169 			(void) t_close(new_fd);
1170 			goto do_next_conn;
1171 		}
1172 
1173 		free(addrmask.buf);
1174 		(void) t_free((char *)call, T_CALL);
1175 
1176 		/*
1177 		 * Poll on the new descriptor so that we get disconnect
1178 		 * and orderly release indications.
1179 		 */
1180 		num_conns++;
1181 		add_to_poll_list(new_fd, nconf);
1182 
1183 		/* Reset nconf in case it has been moved. */
1184 		nconf = &conn_polled[conn_index].nc;
1185 do_next_conn:;
1186 	}
1187 }
1188 
1189 static int
1190 do_poll_cots_action(int fd, int conn_index)
1191 {
1192 	char buf[256];
1193 	int event;
1194 	int i1;
1195 	int flags;
1196 	struct conn_entry *connent = &conn_polled[conn_index];
1197 	struct netconfig *nconf = &(connent->nc);
1198 	const char *errorstr;
1199 
1200 	while (event = t_look(fd)) {
1201 		switch (event) {
1202 		case T_LISTEN:
1203 #ifdef DEBUG
1204 	(void) printf("do_poll_cots_action(%s, %d): T_LISTEN event\n",
1205 	    nconf->nc_proto, fd);
1206 #endif
1207 			cots_listen_event(fd, conn_index);
1208 			break;
1209 
1210 		case T_DATA:
1211 #ifdef DEBUG
1212 	(void) printf("do_poll_cots_action(%d, %s): T_DATA event\n",
1213 		fd, nconf->nc_proto);
1214 #endif
1215 			/*
1216 			 * Receive a private notification from CONS rpcmod.
1217 			 */
1218 			i1 = t_rcv(fd, buf, sizeof (buf), &flags);
1219 			if (i1 == -1) {
1220 				syslog(LOG_ERR, "t_rcv failed");
1221 				break;
1222 			}
1223 			if (i1 < sizeof (int))
1224 				break;
1225 			i1 = BE32_TO_U32(buf);
1226 			if (i1 == 1 || i1 == 2) {
1227 				/*
1228 				 * This connection has been idle for too long,
1229 				 * so release it as politely as we can.  If we
1230 				 * have already initiated an orderly release
1231 				 * and we get notified that the stream is
1232 				 * still idle, pull the plug.  This prevents
1233 				 * hung connections from continuing to consume
1234 				 * resources.
1235 				 */
1236 #ifdef DEBUG
1237 (void) printf("do_poll_cots_action(%s, %d): ", nconf->nc_proto, fd);
1238 (void) printf("initiating orderly release of idle connection\n");
1239 #endif
1240 				if (nconf->nc_semantics == NC_TPI_COTS ||
1241 				    connent->closing != 0) {
1242 					(void) t_snddis(fd, (struct t_call *)0);
1243 					goto fdclose;
1244 				}
1245 				/*
1246 				 * For NC_TPI_COTS_ORD, the stream is closed
1247 				 * and removed from the poll list when the
1248 				 * T_ORDREL is received from the provider.  We
1249 				 * don't wait for it here because it may take
1250 				 * a while for the transport to shut down.
1251 				 */
1252 				if (t_sndrel(fd) == -1) {
1253 					syslog(LOG_ERR,
1254 					"unable to send orderly release %m");
1255 				}
1256 				connent->closing = 1;
1257 			} else
1258 				syslog(LOG_ERR,
1259 				    "unexpected event from CONS rpcmod %d", i1);
1260 			break;
1261 
1262 		case T_ORDREL:
1263 #ifdef DEBUG
1264 	(void) printf("do_poll_cots_action(%s, %d): T_ORDREL event\n",
1265 		nconf->nc_proto, fd);
1266 #endif
1267 			/* Perform an orderly release. */
1268 			if (t_rcvrel(fd) == 0) {
1269 				/* T_ORDREL on listen fd's should be ignored */
1270 				if (!is_listen_fd_index(fd)) {
1271 					(void) t_sndrel(fd);
1272 					goto fdclose;
1273 				}
1274 				break;
1275 
1276 			} else if (t_errno == TLOOK) {
1277 				break;
1278 			} else {
1279 				rdcd_log_tli_error("t_rcvrel", fd, nconf);
1280 				/*
1281 				 * check to make sure we do not close
1282 				 * listen fd
1283 				 */
1284 				if (!is_listen_fd_index(fd))
1285 					break;
1286 				else
1287 					goto fdclose;
1288 			}
1289 
1290 		case T_DISCONNECT:
1291 #ifdef DEBUG
1292 (void) printf("do_poll_cots_action(%s, %d): T_DISCONNECT event\n",
1293 nconf->nc_proto, fd);
1294 #endif
1295 			if (t_rcvdis(fd, (struct t_discon *)NULL) == -1)
1296 				rdcd_log_tli_error("t_rcvdis", fd, nconf);
1297 
1298 			/*
1299 			 * T_DISCONNECT on listen fd's should be ignored.
1300 			 */
1301 			if (!is_listen_fd_index(fd))
1302 				break;
1303 			else
1304 				goto fdclose;
1305 
1306 		case T_ERROR:
1307 		default:
1308 			if (event == T_ERROR || t_errno == TSYSERR) {
1309 			    if ((errorstr = strerror(errno)) == NULL) {
1310 				(void) snprintf(buf, sizeof (buf),
1311 				    "Unknown error num %d", errno);
1312 				errorstr = (const char *)buf;
1313 			    }
1314 			} else if (event == -1)
1315 				errorstr = t_strerror(t_errno);
1316 			else
1317 				errorstr = "";
1318 #ifdef DEBUG
1319 			syslog(LOG_ERR,
1320 			    "unexpected TLI event (0x%x) on "
1321 			    "connection-oriented transport(%s, %d):%s",
1322 			    event, nconf->nc_proto, fd, errorstr);
1323 #endif
1324 
1325 fdclose:
1326 			num_conns--;
1327 			remove_from_poll_list(fd);
1328 			(void) t_close(fd);
1329 			return (0);
1330 		}
1331 	}
1332 
1333 	return (0);
1334 }
1335 
1336 
1337 /*
1338  * Called to read and interpret the event on a connectionless descriptor.
1339  * Returns 0 if successful, or a UNIX error code if failure.
1340  */
1341 static int
1342 do_poll_clts_action(int fd, int conn_index)
1343 {
1344 	int error;
1345 	int ret;
1346 	int flags;
1347 	struct netconfig *nconf = &conn_polled[conn_index].nc;
1348 	static struct t_unitdata *unitdata = NULL;
1349 	static struct t_uderr *uderr = NULL;
1350 	static int oldfd = -1;
1351 	struct nd_hostservlist *host = NULL;
1352 	struct strbuf ctl[1], data[1];
1353 	/*
1354 	 * We just need to have some space to consume the
1355 	 * message in the event we can't use the TLI interface to do the
1356 	 * job.
1357 	 *
1358 	 * We flush the message using getmsg(). For the control part
1359 	 * we allocate enough for any TPI header plus 32 bytes for address
1360 	 * and options. For the data part, there is nothing magic about
1361 	 * the size of the array, but 256 bytes is probably better than
1362 	 * 1 byte, and we don't expect any data portion anyway.
1363 	 *
1364 	 * If the array sizes are too small, we handle this because getmsg()
1365 	 * (called to consume the message) will return MOREDATA|MORECTL.
1366 	 * Thus we just call getmsg() until it's read the message.
1367 	 */
1368 	char ctlbuf[sizeof (union T_primitives) + 32];
1369 	char databuf[256];
1370 
1371 	/*
1372 	 * If this is the same descriptor as the last time
1373 	 * do_poll_clts_action was called, we can save some
1374 	 * de-allocation and allocation.
1375 	 */
1376 	if (oldfd != fd) {
1377 		oldfd = fd;
1378 
1379 		if (unitdata) {
1380 			(void) t_free((char *)unitdata, T_UNITDATA);
1381 			unitdata = NULL;
1382 		}
1383 		if (uderr) {
1384 			(void) t_free((char *)uderr, T_UDERROR);
1385 			uderr = NULL;
1386 		}
1387 	}
1388 
1389 	/*
1390 	 * Allocate a unitdata structure for receiving the event.
1391 	 */
1392 	if (unitdata == NULL) {
1393 		/* LINTED pointer alignment */
1394 		unitdata = (struct t_unitdata *)t_alloc(fd, T_UNITDATA, T_ALL);
1395 		if (unitdata == NULL) {
1396 			if (t_errno == TSYSERR) {
1397 				/*
1398 				 * Save the error code across
1399 				 * syslog(), just in case
1400 				 * syslog() gets its own error
1401 				 * and therefore overwrites errno.
1402 				 */
1403 				error = errno;
1404 				(void) syslog(LOG_ERR,
1405 	"t_alloc(file descriptor %d/transport %s, T_UNITDATA) failed: %m",
1406 					fd, nconf->nc_proto);
1407 				return (error);
1408 			}
1409 			(void) syslog(LOG_ERR,
1410 "t_alloc(file descriptor %d/transport %s, T_UNITDATA) failed TLI error %d",
1411 					fd, nconf->nc_proto, t_errno);
1412 			goto flush_it;
1413 		}
1414 	}
1415 
1416 try_again:
1417 	flags = 0;
1418 
1419 	/*
1420 	 * The idea is we wait for T_UNITDATA_IND's. Of course,
1421 	 * we don't get any, because rpcmod filters them out.
1422 	 * However, we need to call t_rcvudata() to let TLI
1423 	 * tell us we have a T_UDERROR_IND.
1424 	 *
1425 	 * algorithm is:
1426 	 * 	t_rcvudata(), expecting TLOOK.
1427 	 * 	t_look(), expecting T_UDERR.
1428 	 * 	t_rcvuderr(), expecting success (0).
1429 	 * 	expand destination address into ASCII,
1430 	 *	and dump it.
1431 	 */
1432 
1433 	ret = t_rcvudata(fd, unitdata, &flags);
1434 	if (ret == 0 || t_errno == TBUFOVFLW) {
1435 		(void) syslog(LOG_WARNING,
1436 "t_rcvudata(file descriptor %d/transport %s) got unexpected data, %d bytes",
1437 			fd, nconf->nc_proto, unitdata->udata.len);
1438 
1439 		/*
1440 		 * Even though we don't expect any data, in case we do,
1441 		 * keep reading until there is no more.
1442 		 */
1443 		if (flags & T_MORE)
1444 			goto try_again;
1445 
1446 		return (0);
1447 	}
1448 
1449 	switch (t_errno) {
1450 	case TNODATA:
1451 		return (0);
1452 	case TSYSERR:
1453 		/*
1454 		 * System errors are returned to caller.
1455 		 * Save the error code across
1456 		 * syslog(), just in case
1457 		 * syslog() gets its own error
1458 		 * and therefore overwrites errno.
1459 		 */
1460 		error = errno;
1461 		(void) syslog(LOG_ERR,
1462 			"t_rcvudata(file descriptor %d/transport %s) %m",
1463 			fd, nconf->nc_proto);
1464 		return (error);
1465 	case TLOOK:
1466 		break;
1467 	default:
1468 		(void) syslog(LOG_ERR,
1469 		"t_rcvudata(file descriptor %d/transport %s) TLI error %d",
1470 			fd, nconf->nc_proto, t_errno);
1471 		goto flush_it;
1472 	}
1473 
1474 	ret = t_look(fd);
1475 	switch (ret) {
1476 	case 0:
1477 		return (0);
1478 	case -1:
1479 		/*
1480 		 * System errors are returned to caller.
1481 		 */
1482 		if (t_errno == TSYSERR) {
1483 			/*
1484 			 * Save the error code across
1485 			 * syslog(), just in case
1486 			 * syslog() gets its own error
1487 			 * and therefore overwrites errno.
1488 			 */
1489 			error = errno;
1490 			(void) syslog(LOG_ERR,
1491 				"t_look(file descriptor %d/transport %s) %m",
1492 				fd, nconf->nc_proto);
1493 			return (error);
1494 		}
1495 		(void) syslog(LOG_ERR,
1496 			"t_look(file descriptor %d/transport %s) TLI error %d",
1497 			fd, nconf->nc_proto, t_errno);
1498 		goto flush_it;
1499 	case T_UDERR:
1500 		break;
1501 	default:
1502 		(void) syslog(LOG_WARNING,
1503 	"t_look(file descriptor %d/transport %s) returned %d not T_UDERR (%d)",
1504 			fd, nconf->nc_proto, ret, T_UDERR);
1505 	}
1506 
1507 	if (uderr == NULL) {
1508 		/* LINTED pointer alignment */
1509 		uderr = (struct t_uderr *)t_alloc(fd, T_UDERROR, T_ALL);
1510 		if (uderr == NULL) {
1511 			if (t_errno == TSYSERR) {
1512 				/*
1513 				 * Save the error code across
1514 				 * syslog(), just in case
1515 				 * syslog() gets its own error
1516 				 * and therefore overwrites errno.
1517 				 */
1518 				error = errno;
1519 				(void) syslog(LOG_ERR,
1520 	"t_alloc(file descriptor %d/transport %s, T_UDERROR) failed: %m",
1521 					fd, nconf->nc_proto);
1522 				return (error);
1523 			}
1524 			(void) syslog(LOG_ERR,
1525 "t_alloc(file descriptor %d/transport %s, T_UDERROR) failed TLI error: %d",
1526 				fd, nconf->nc_proto, t_errno);
1527 			goto flush_it;
1528 		}
1529 	}
1530 
1531 	ret = t_rcvuderr(fd, uderr);
1532 	if (ret == 0) {
1533 
1534 		/*
1535 		 * Save the datagram error in errno, so that the
1536 		 * %m argument to syslog picks up the error string.
1537 		 */
1538 		errno = uderr->error;
1539 
1540 		/*
1541 		 * Log the datagram error, then log the host that
1542 		 * probably triggerred. Cannot log both in the
1543 		 * same transaction because of packet size limitations
1544 		 * in /dev/log.
1545 		 */
1546 		(void) syslog((errno == ECONNREFUSED) ? LOG_DEBUG : LOG_WARNING,
1547 		    "%s response over <file descriptor %d/transport %s> "
1548 		    "generated error: %m",
1549 		    progname, fd, nconf->nc_proto);
1550 
1551 		/*
1552 		 * Try to map the client's address back to a
1553 		 * name.
1554 		 */
1555 		ret = netdir_getbyaddr(nconf, &host, &uderr->addr);
1556 		if (ret != -1 && host && host->h_cnt > 0 &&
1557 		    host->h_hostservs) {
1558 		(void) syslog((errno == ECONNREFUSED) ? LOG_DEBUG : LOG_WARNING,
1559 		    "Bad %s response was sent to client with "
1560 		    "host name: %s; service port: %s",
1561 		    progname, host->h_hostservs->h_host,
1562 		    host->h_hostservs->h_serv);
1563 		} else {
1564 			int i, j;
1565 			char *buf;
1566 			char *hex = "0123456789abcdef";
1567 
1568 			/*
1569 			 * Mapping failed, print the whole thing
1570 			 * in ASCII hex.
1571 			 */
1572 			buf = (char *)malloc(uderr->addr.len * 2 + 1);
1573 			for (i = 0, j = 0; i < uderr->addr.len; i++, j += 2) {
1574 				buf[j] = hex[((uderr->addr.buf[i]) >> 4) & 0xf];
1575 				buf[j+1] = hex[uderr->addr.buf[i] & 0xf];
1576 			}
1577 			buf[j] = '\0';
1578 			(void) syslog((errno == ECONNREFUSED) ?
1579 			    LOG_DEBUG : LOG_WARNING,
1580 			    "Bad %s response was sent to client with "
1581 			    "transport address: 0x%s",
1582 			    progname, buf);
1583 			free((void *)buf);
1584 		}
1585 
1586 		if (ret == 0 && host != NULL)
1587 			netdir_free((void *)host, ND_HOSTSERVLIST);
1588 		return (0);
1589 	}
1590 
1591 	switch (t_errno) {
1592 	case TNOUDERR:
1593 		goto flush_it;
1594 	case TSYSERR:
1595 		/*
1596 		 * System errors are returned to caller.
1597 		 * Save the error code across
1598 		 * syslog(), just in case
1599 		 * syslog() gets its own error
1600 		 * and therefore overwrites errno.
1601 		 */
1602 		error = errno;
1603 		(void) syslog(LOG_ERR,
1604 			"t_rcvuderr(file descriptor %d/transport %s) %m",
1605 			fd, nconf->nc_proto);
1606 		return (error);
1607 	default:
1608 		(void) syslog(LOG_ERR,
1609 		"t_rcvuderr(file descriptor %d/transport %s) TLI error %d",
1610 			fd, nconf->nc_proto, t_errno);
1611 		goto flush_it;
1612 	}
1613 
1614 flush_it:
1615 	/*
1616 	 * If we get here, then we could not cope with whatever message
1617 	 * we attempted to read, so flush it. If we did read a message,
1618 	 * and one isn't present, that is all right, because fd is in
1619 	 * nonblocking mode.
1620 	 */
1621 	(void) syslog(LOG_ERR,
1622 	"Flushing one input message from <file descriptor %d/transport %s>",
1623 		fd, nconf->nc_proto);
1624 
1625 	/*
1626 	 * Read and discard the message. Do this this until there is
1627 	 * no more control/data in the message or until we get an error.
1628 	 */
1629 	do {
1630 		ctl->maxlen = sizeof (ctlbuf);
1631 		ctl->buf = ctlbuf;
1632 		data->maxlen = sizeof (databuf);
1633 		data->buf = databuf;
1634 		flags = 0;
1635 		ret = getmsg(fd, ctl, data, &flags);
1636 		if (ret == -1)
1637 			return (errno);
1638 	} while (ret != 0);
1639 
1640 	return (0);
1641 }
1642 
1643 /*
1644  * Establish service thread.
1645  */
1646 static int
1647 rdcsvc(int fd, struct netbuf addrmask, struct netconfig *nconf)
1648 {
1649 #ifdef	__NCALL__
1650 	struct ncall_svc_args nsa;
1651 #else	/* !__NCALL__ */
1652 	struct rdc_svc_args nsa;
1653 	_rdc_ioctl_t rdc_args = { 0, };
1654 #endif	/* __NCALL__ */
1655 
1656 	nsa.fd = fd;
1657 	nsa.nthr = (max_conns_allowed < 0 ? 16 : max_conns_allowed);
1658 	strncpy(nsa.netid, nconf->nc_netid, sizeof (nsa.netid));
1659 	nsa.addrmask.len = addrmask.len;
1660 	nsa.addrmask.maxlen = addrmask.maxlen;
1661 	nsa.addrmask.buf = addrmask.buf;
1662 
1663 #ifdef	__NCALL__
1664 	return (sndrsys(NC_IOC_SERVER, &nsa));
1665 #else	/* !__NCALL__ */
1666 	rdc_args.arg0 = (long)&nsa;
1667 	return (sndrsys(RDC_ENABLE_SVR, &rdc_args));
1668 #endif	/* __NCALL__ */
1669 }
1670 
1671 
1672 
1673 static int
1674 nofile_increase(int limit)
1675 {
1676 	struct rlimit rl;
1677 
1678 	if (getrlimit(RLIMIT_NOFILE, &rl) == -1) {
1679 		syslog(LOG_ERR,
1680 		    "nofile_increase() getrlimit of NOFILE failed: %m");
1681 		return (-1);
1682 	}
1683 
1684 	if (limit > 0)
1685 		rl.rlim_cur = limit;
1686 	else
1687 		rl.rlim_cur += NOFILE_INC_SIZE;
1688 
1689 	if (rl.rlim_cur > rl.rlim_max && rl.rlim_max != RLIM_INFINITY)
1690 		rl.rlim_max = rl.rlim_cur;
1691 
1692 	if (setrlimit(RLIMIT_NOFILE, &rl) == -1) {
1693 		syslog(LOG_ERR,
1694 		    "nofile_increase() setrlimit of NOFILE to %d failed: %m",
1695 		    rl.rlim_cur);
1696 		return (-1);
1697 	}
1698 
1699 	return (0);
1700 }
1701 
1702 int
1703 rdcd_bindit(struct netconfig *nconf, struct netbuf **addr,
1704     struct nd_hostserv *hs, int backlog)
1705 {
1706 	int fd;
1707 	struct t_bind *ntb;
1708 	struct t_bind tb;
1709 	struct nd_addrlist *addrlist;
1710 	struct t_optmgmt req, resp;
1711 	struct opthdr *opt;
1712 	char reqbuf[128];
1713 
1714 	if ((fd = rdc_transport_open(nconf)) == -1) {
1715 		syslog(LOG_ERR, "cannot establish transport service over %s",
1716 		    nconf->nc_device);
1717 		return (-1);
1718 	}
1719 
1720 	addrlist = (struct nd_addrlist *)NULL;
1721 	if (netdir_getbyname(nconf, hs, &addrlist) != 0) {
1722 		if (strncmp(nconf->nc_netid, "udp", 3) != 0) {
1723 			syslog(LOG_ERR, "Cannot get address for transport "
1724 			    "%s host %s service %s",
1725 			    nconf->nc_netid, hs->h_host, hs->h_serv);
1726 		}
1727 		(void) t_close(fd);
1728 		return (-1);
1729 	}
1730 
1731 	if (strcmp(nconf->nc_proto, "tcp") == 0) {
1732 		/*
1733 		 * If we're running over TCP, then set the
1734 		 * SO_REUSEADDR option so that we can bind
1735 		 * to our preferred address even if previously
1736 		 * left connections exist in FIN_WAIT states.
1737 		 * This is somewhat bogus, but otherwise you have
1738 		 * to wait 2 minutes to restart after killing it.
1739 		 */
1740 		if (reuseaddr(fd) == -1) {
1741 			syslog(LOG_WARNING,
1742 			    "couldn't set SO_REUSEADDR option on transport");
1743 		}
1744 	}
1745 
1746 	if (nconf->nc_semantics == NC_TPI_CLTS)
1747 		tb.qlen = 0;
1748 	else
1749 		tb.qlen = backlog;
1750 
1751 	/* LINTED pointer alignment */
1752 	ntb = (struct t_bind *)t_alloc(fd, T_BIND, T_ALL);
1753 	if (ntb == (struct t_bind *)NULL) {
1754 		syslog(LOG_ERR, "t_alloc failed:  t_errno %d, %m", t_errno);
1755 		(void) t_close(fd);
1756 		netdir_free((void *)addrlist, ND_ADDRLIST);
1757 		return (-1);
1758 	}
1759 
1760 	tb.addr = *(addrlist->n_addrs);		/* structure copy */
1761 
1762 	if (t_bind(fd, &tb, ntb) == -1) {
1763 		syslog(LOG_ERR, "t_bind failed:  t_errno %d, %m", t_errno);
1764 		(void) t_free((char *)ntb, T_BIND);
1765 		netdir_free((void *)addrlist, ND_ADDRLIST);
1766 		(void) t_close(fd);
1767 		return (-1);
1768 	}
1769 
1770 	/* make sure we bound to the right address */
1771 	if (tb.addr.len != ntb->addr.len ||
1772 	    memcmp(tb.addr.buf, ntb->addr.buf, tb.addr.len) != 0) {
1773 		syslog(LOG_ERR, "t_bind to wrong address");
1774 		(void) t_free((char *)ntb, T_BIND);
1775 		netdir_free((void *)addrlist, ND_ADDRLIST);
1776 		(void) t_close(fd);
1777 		return (-1);
1778 	}
1779 
1780 	*addr = &ntb->addr;
1781 	netdir_free((void *)addrlist, ND_ADDRLIST);
1782 
1783 	if (strcmp(nconf->nc_proto, "tcp") == 0 ||
1784 	    strcmp(nconf->nc_proto, "tcp6") == 0) {
1785 		/*
1786 		 * Disable the Nagle algorithm on TCP connections.
1787 		 * Connections accepted from this listener will
1788 		 * inherit the listener options.
1789 		 */
1790 
1791 		/* LINTED pointer alignment */
1792 		opt = (struct opthdr *)reqbuf;
1793 		opt->level = IPPROTO_TCP;
1794 		opt->name = TCP_NODELAY;
1795 		opt->len = sizeof (int);
1796 
1797 		/* LINTED pointer alignment */
1798 		*(int *)((char *)opt + sizeof (*opt)) = 1;
1799 
1800 		req.flags = T_NEGOTIATE;
1801 		req.opt.len = sizeof (*opt) + opt->len;
1802 		req.opt.buf = (char *)opt;
1803 		resp.flags = 0;
1804 		resp.opt.buf = reqbuf;
1805 		resp.opt.maxlen = sizeof (reqbuf);
1806 
1807 		if (t_optmgmt(fd, &req, &resp) < 0 ||
1808 		    resp.flags != T_SUCCESS) {
1809 			syslog(LOG_ERR,
1810 	"couldn't set NODELAY option for proto %s: t_errno = %d, %m",
1811 				nconf->nc_proto, t_errno);
1812 		}
1813 	}
1814 
1815 	return (fd);
1816 }
1817 
1818 
1819 /* ARGSUSED */
1820 static int
1821 bind_to_provider(char *provider, char *serv, struct netbuf **addr,
1822 		struct netconfig **retnconf)
1823 {
1824 	struct netconfig *nconf;
1825 	NCONF_HANDLE *nc;
1826 	struct nd_hostserv hs;
1827 
1828 	hs.h_host = HOST_SELF;
1829 	hs.h_serv = RDC_SERVICE;	/* serv_name_to_port_name(serv); */
1830 
1831 	if ((nc = setnetconfig()) == (NCONF_HANDLE *)NULL) {
1832 		syslog(LOG_ERR, "setnetconfig failed: %m");
1833 		return (-1);
1834 	}
1835 	while (nconf = getnetconfig(nc)) {
1836 		if (OK_TPI_TYPE(nconf) &&
1837 		    strcmp(nconf->nc_device, provider) == 0) {
1838 			*retnconf = nconf;
1839 			return (rdcd_bindit(nconf, addr, &hs, listen_backlog));
1840 		}
1841 	}
1842 	(void) endnetconfig(nc);
1843 	if ((Is_ipv6present() && (strcmp(provider, "/dev/tcp6") == 0)) ||
1844 	    (!Is_ipv6present() && (strcmp(provider, "/dev/tcp") == 0)))
1845 		syslog(LOG_ERR, "couldn't find netconfig entry for provider %s",
1846 		    provider);
1847 	return (-1);
1848 }
1849 
1850 
1851 /*
1852  * For listen fd's index is always less than end_listen_fds.
1853  * It's value is equal to the number of open file descriptors after the
1854  * last listen end point was opened but before any connection was accepted.
1855  */
1856 static int
1857 is_listen_fd_index(int index)
1858 {
1859 	return (index < end_listen_fds);
1860 }
1861 
1862 
1863 /*
1864  * Create an address mask appropriate for the transport.
1865  * The mask is used to obtain the host-specific part of
1866  * a network address when comparing addresses.
1867  * For an internet address the host-specific part is just
1868  * the 32 bit IP address and this part of the mask is set
1869  * to all-ones. The port number part of the mask is zeroes.
1870  */
1871 static int
1872 set_addrmask(int fd, struct netconfig *nconf, struct netbuf *mask)
1873 {
1874 	struct t_info info;
1875 
1876 	/*
1877 	 * Find the size of the address we need to mask.
1878 	 */
1879 	if (t_getinfo(fd, &info) < 0) {
1880 		t_error("t_getinfo");
1881 		return (-1);
1882 	}
1883 	mask->len = mask->maxlen = info.addr;
1884 	if (info.addr <= 0) {
1885 		syslog(LOG_ERR, "set_addrmask: address size: %ld",
1886 			info.addr);
1887 		return (-1);
1888 	}
1889 
1890 	mask->buf = (char *)malloc(mask->len);
1891 	if (mask->buf == NULL) {
1892 		syslog(LOG_ERR, "set_addrmask: no memory");
1893 		return (-1);
1894 	}
1895 	(void) memset(mask->buf, 0, mask->len);	/* reset all mask bits */
1896 
1897 	if (strcmp(nconf->nc_protofmly, NC_INET) == 0) {
1898 		/*
1899 		 * Set the mask so that the port is ignored.
1900 		 */
1901 		/* LINTED pointer alignment */
1902 		((struct sockaddr_in *)mask->buf)->sin_addr.s_addr =
1903 		    (in_addr_t)~0;
1904 		/* LINTED pointer alignment */
1905 		((struct sockaddr_in *)mask->buf)->sin_family = (sa_family_t)~0;
1906 	}
1907 #ifdef NC_INET6
1908 	else if (strcmp(nconf->nc_protofmly, NC_INET6) == 0) {
1909 		/* LINTED pointer alignment */
1910 		(void) memset(&((struct sockaddr_in6 *)mask->buf)->sin6_addr,
1911 		    (uchar_t)~0, sizeof (struct in6_addr));
1912 		/* LINTED pointer alignment */
1913 		((struct sockaddr_in6 *)mask->buf)->sin6_family =
1914 		    (sa_family_t)~0;
1915 	}
1916 #endif
1917 	else {
1918 		/*
1919 		 * Set all mask bits.
1920 		 */
1921 		(void) memset(mask->buf, (uchar_t)~0, mask->len);
1922 	}
1923 	return (0);
1924 }
1925 
1926 #if !defined(_SunOS_5_6) && !defined(_SunOS_5_7) && !defined(_SunOS_5_8)
1927 
1928 static int
1929 sndrsvcpool(int maxservers)
1930 {
1931 	struct svcpool_args npa;
1932 
1933 	npa.id = RDC_SVCPOOL_ID;
1934 	npa.maxthreads = maxservers;
1935 	npa.redline = 0;
1936 	npa.qsize = 0;
1937 	npa.timeout = 0;
1938 	npa.stksize = 0;
1939 	npa.max_same_xprt = 0;
1940 	return (sndrsys(RDC_POOL_CREATE, &npa));
1941 }
1942 
1943 
1944 /*
1945  * The following stolen from cmd/fs.d/nfs/lib/thrpool.c
1946  */
1947 
1948 #include <thread.h>
1949 
1950 /*
1951  * Thread to call into the kernel and do work on behalf of SNDR/ncall-ip.
1952  */
1953 static void *
1954 svcstart(void *arg)
1955 {
1956 	int id = (int)arg;
1957 	int err;
1958 
1959 	while ((err = sndrsys(RDC_POOL_RUN, &id)) != 0) {
1960 		/*
1961 		 * Interrupted by a signal while in the kernel.
1962 		 * this process is still alive, try again.
1963 		 */
1964 		if (err == EINTR)
1965 			continue;
1966 		else
1967 			break;
1968 	}
1969 
1970 	/*
1971 	 * If we weren't interrupted by a signal, but did
1972 	 * return from the kernel, this thread's work is done,
1973 	 * and it should exit.
1974 	 */
1975 	thr_exit(NULL);
1976 	return (NULL);
1977 }
1978 
1979 /*
1980  * User-space "creator" thread. This thread blocks in the kernel
1981  * until new worker threads need to be created for the service
1982  * pool. On return to userspace, if there is no error, create a
1983  * new thread for the service pool.
1984  */
1985 static void *
1986 svcblock(void *arg)
1987 {
1988 	int id = (int)arg;
1989 
1990 	/* CONSTCOND */
1991 	while (1) {
1992 		thread_t tid;
1993 		int err;
1994 
1995 		/*
1996 		 * Call into the kernel, and hang out there
1997 		 * until a thread needs to be created.
1998 		 */
1999 		if (err = sndrsys(RDC_POOL_WAIT, &id)) {
2000 			if (err == ECANCELED || err == EBUSY)
2001 				/*
2002 				 * If we get back ECANCELED, the service
2003 				 * pool is exiting, and we may as well
2004 				 * clean up this thread. If EBUSY is
2005 				 * returned, there's already a thread
2006 				 * looping on this pool, so we should
2007 				 * give up.
2008 				 */
2009 				break;
2010 			else
2011 				continue;
2012 		}
2013 
2014 		(void) thr_create(NULL, NULL, svcstart, (void *)id,
2015 		    THR_BOUND | THR_DETACHED, &tid);
2016 	}
2017 
2018 	thr_exit(NULL);
2019 	return (NULL);
2020 }
2021 
2022 static int
2023 svcwait(int id)
2024 {
2025 	thread_t tid;
2026 
2027 	/*
2028 	 * Create a bound thread to wait for kernel LWPs that
2029 	 * need to be created.
2030 	 */
2031 	if (thr_create(NULL, NULL, svcblock, (void *)id,
2032 	    THR_BOUND | THR_DETACHED, &tid))
2033 		return (1);
2034 
2035 	return (0);
2036 }
2037 #endif /* Solaris 9+ */
2038