xref: /titanic_44/usr/src/cmd/fs.d/nfs/lib/nfs_tbind.c (revision 8eea8e29cc4374d1ee24c25a07f45af132db3499)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 /*
28  * nfs_tbind.c, common part for nfsd and lockd.
29  */
30 
31 #pragma ident	"%Z%%M%	%I%	%E% SMI"
32 
33 #include <tiuser.h>
34 #include <fcntl.h>
35 #include <netconfig.h>
36 #include <stropts.h>
37 #include <errno.h>
38 #include <syslog.h>
39 #include <rpc/rpc.h>
40 #include <sys/time.h>
41 #include <sys/resource.h>
42 #include <signal.h>
43 #include <netdir.h>
44 #include <unistd.h>
45 #include <string.h>
46 #include <netinet/tcp.h>
47 #include <malloc.h>
48 #include <stdlib.h>
49 #include "nfs_tbind.h"
50 #include <nfs/nfs.h>
51 #include <nfs/nfs_acl.h>
52 #include <nfs/nfssys.h>
53 #include <nfs/nfs4.h>
54 
55 /*
56  * Determine valid semantics for most applications.
57  */
58 #define	OK_TPI_TYPE(_nconf) \
59 	(_nconf->nc_semantics == NC_TPI_CLTS || \
60 	_nconf->nc_semantics == NC_TPI_COTS || \
61 	_nconf->nc_semantics == NC_TPI_COTS_ORD)
62 
63 #define	BE32_TO_U32(a) \
64 	((((ulong_t)((uchar_t *)a)[0] & 0xFF) << (ulong_t)24) | \
65 	(((ulong_t)((uchar_t *)a)[1] & 0xFF) << (ulong_t)16) | \
66 	(((ulong_t)((uchar_t *)a)[2] & 0xFF) << (ulong_t)8)  | \
67 	((ulong_t)((uchar_t *)a)[3] & 0xFF))
68 
69 /*
70  * Number of elements to add to the poll array on each allocation.
71  */
72 #define	POLL_ARRAY_INC_SIZE	64
73 
74 /*
75  * Number of file descriptors by which the process soft limit may be
76  * increased on each call to nofile_increase(0).
77  */
78 #define	NOFILE_INC_SIZE	64
79 
80 struct conn_ind {
81 	struct conn_ind *conn_next;
82 	struct conn_ind *conn_prev;
83 	struct t_call   *conn_call;
84 };
85 
86 struct conn_entry {
87 	bool_t			closing;
88 	struct netconfig	nc;
89 };
90 
91 /*
92  * this file contains transport routines common to nfsd and lockd
93  */
94 static	int	nofile_increase(int);
95 static	int	reuseaddr(int);
96 static	void	add_to_poll_list(int, struct netconfig *);
97 static	char	*serv_name_to_port_name(char *);
98 static	int	bind_to_proto(char *, char *, struct netbuf **,
99 				struct netconfig **);
100 static	int	bind_to_provider(char *, char *, struct netbuf **,
101 					struct netconfig **);
102 static	void	conn_close_oldest(void);
103 static	boolean_t conn_get(int, struct netconfig *, struct conn_ind **);
104 static	void	cots_listen_event(int, int);
105 static	int	discon_get(int, struct netconfig *, struct conn_ind **);
106 static	int	do_poll_clts_action(int, int);
107 static	int	do_poll_cots_action(int, int);
108 static	void	remove_from_poll_list(int);
109 static	int	set_addrmask(int, struct netconfig *, struct netbuf *);
110 static	int	is_listen_fd_index(int);
111 
112 static	struct pollfd *poll_array;
113 static	struct conn_entry *conn_polled;
114 static	int	num_conns;		/* Current number of connections */
115 int		(*Mysvc4)(int, struct netbuf *, struct netconfig *, int,
116 		struct netbuf *);
117 
118 /*
119  * Called to create and prepare a transport descriptor for in-kernel
120  * RPC service.
121  * Returns -1 on failure and a valid descriptor on success.
122  */
123 int
124 nfslib_transport_open(struct netconfig *nconf)
125 {
126 	int fd;
127 	struct strioctl	strioc;
128 
129 	if ((nconf == (struct netconfig *)NULL) ||
130 	    (nconf->nc_device == (char *)NULL)) {
131 		syslog(LOG_ERR, "no netconfig device");
132 		return (-1);
133 	}
134 
135 	/*
136 	 * Open the transport device.
137 	 */
138 	fd = t_open(nconf->nc_device, O_RDWR, (struct t_info *)NULL);
139 	if (fd == -1) {
140 		if (t_errno == TSYSERR && errno == EMFILE &&
141 		    (nofile_increase(0) == 0)) {
142 			/* Try again with a higher NOFILE limit. */
143 			fd = t_open(nconf->nc_device, O_RDWR,
144 					(struct t_info *)NULL);
145 		}
146 		if (fd == -1) {
147 			syslog(LOG_ERR, "t_open %s failed:  t_errno %d, %m",
148 			    nconf->nc_device, t_errno);
149 			return (-1);
150 		}
151 	}
152 
153 	/*
154 	 * Pop timod because the RPC module must be as close as possible
155 	 * to the transport.
156 	 */
157 	if (ioctl(fd, I_POP, 0) < 0) {
158 		syslog(LOG_ERR, "I_POP of timod failed: %m");
159 		(void) t_close(fd);
160 		return (-1);
161 	}
162 
163 	/*
164 	 * Common code for CLTS and COTS transports
165 	 */
166 	if (ioctl(fd, I_PUSH, "rpcmod") < 0) {
167 		syslog(LOG_ERR, "I_PUSH of rpcmod failed: %m");
168 		(void) t_close(fd);
169 		return (-1);
170 	}
171 
172 	strioc.ic_cmd = RPC_SERVER;
173 	strioc.ic_dp = (char *)0;
174 	strioc.ic_len = 0;
175 	strioc.ic_timout = -1;
176 
177 	/* Tell rpcmod to act like a server stream. */
178 	if (ioctl(fd, I_STR, &strioc) < 0) {
179 		syslog(LOG_ERR, "rpcmod set-up ioctl failed: %m");
180 		(void) t_close(fd);
181 		return (-1);
182 	}
183 
184 	/*
185 	 * Re-push timod so that we will still be doing TLI
186 	 * operations on the descriptor.
187 	 */
188 	if (ioctl(fd, I_PUSH, "timod") < 0) {
189 		syslog(LOG_ERR, "I_PUSH of timod failed: %m");
190 		(void) t_close(fd);
191 		return (-1);
192 	}
193 
194 	return (fd);
195 }
196 
197 static int
198 nofile_increase(int limit)
199 {
200 	struct rlimit rl;
201 
202 	if (getrlimit(RLIMIT_NOFILE, &rl) == -1) {
203 		syslog(LOG_ERR, "getrlimit of NOFILE failed: %m");
204 		return (-1);
205 	}
206 
207 	if (limit > 0)
208 		rl.rlim_cur = limit;
209 	else
210 		rl.rlim_cur += NOFILE_INC_SIZE;
211 
212 	if (rl.rlim_cur > rl.rlim_max &&
213 	    rl.rlim_max != RLIM_INFINITY)
214 		rl.rlim_max = rl.rlim_cur;
215 
216 	if (setrlimit(RLIMIT_NOFILE, &rl) == -1) {
217 		syslog(LOG_ERR, "setrlimit of NOFILE to %d failed: %m",
218 			rl.rlim_cur);
219 		return (-1);
220 	}
221 
222 	return (0);
223 }
224 
225 int
226 nfslib_bindit(struct netconfig *nconf, struct netbuf **addr,
227 	struct nd_hostserv *hs, int backlog)
228 {
229 	int fd;
230 	struct t_bind  *ntb;
231 	struct t_bind tb;
232 	struct nd_addrlist *addrlist;
233 	struct t_optmgmt req, resp;
234 	struct opthdr *opt;
235 	char reqbuf[128];
236 	bool_t use_any = FALSE;
237 
238 	if ((fd = nfslib_transport_open(nconf)) == -1) {
239 		syslog(LOG_ERR, "cannot establish transport service over %s",
240 			nconf->nc_device);
241 		return (-1);
242 	}
243 
244 	addrlist = (struct nd_addrlist *)NULL;
245 
246 	/* nfs4_callback service does not used a fieed port number */
247 
248 	if (strcmp(hs->h_serv, "nfs4_callback") == 0) {
249 		tb.addr.maxlen = 0;
250 		tb.addr.len = 0;
251 		tb.addr.buf = 0;
252 		use_any = TRUE;
253 
254 	} else if (netdir_getbyname(nconf, hs, &addrlist) != 0) {
255 
256 		syslog(LOG_ERR,
257 		"Cannot get address for transport %s host %s service %s",
258 			nconf->nc_netid, hs->h_host, hs->h_serv);
259 		(void) t_close(fd);
260 		return (-1);
261 	}
262 
263 	if (strcmp(nconf->nc_proto, "tcp") == 0) {
264 		/*
265 		 * If we're running over TCP, then set the
266 		 * SO_REUSEADDR option so that we can bind
267 		 * to our preferred address even if previously
268 		 * left connections exist in FIN_WAIT states.
269 		 * This is somewhat bogus, but otherwise you have
270 		 * to wait 2 minutes to restart after killing it.
271 		 */
272 		if (reuseaddr(fd) == -1) {
273 			syslog(LOG_WARNING,
274 			"couldn't set SO_REUSEADDR option on transport");
275 		}
276 	}
277 
278 	if (nconf->nc_semantics == NC_TPI_CLTS)
279 		tb.qlen = 0;
280 	else
281 		tb.qlen = backlog;
282 
283 	/* LINTED pointer alignment */
284 	ntb = (struct t_bind *)t_alloc(fd, T_BIND, T_ALL);
285 	if (ntb == (struct t_bind *)NULL) {
286 		syslog(LOG_ERR, "t_alloc failed:  t_errno %d, %m", t_errno);
287 		(void) t_close(fd);
288 		netdir_free((void *)addrlist, ND_ADDRLIST);
289 		return (-1);
290 	}
291 
292 	/*
293 	 * XXX - what about the space tb->addr.buf points to? This should
294 	 * be either a memcpy() to/from the buf fields, or t_alloc(fd,T_BIND,)
295 	 * should't be called with T_ALL.
296 	 */
297 	if (addrlist)
298 		tb.addr = *(addrlist->n_addrs);		/* structure copy */
299 
300 	if (t_bind(fd, &tb, ntb) == -1) {
301 		syslog(LOG_ERR, "t_bind failed:  t_errno %d, %m", t_errno);
302 		(void) t_free((char *)ntb, T_BIND);
303 		netdir_free((void *)addrlist, ND_ADDRLIST);
304 		(void) t_close(fd);
305 		return (-1);
306 	}
307 
308 	/* make sure we bound to the right address */
309 	if (use_any == FALSE &&
310 	    (tb.addr.len != ntb->addr.len ||
311 	    memcmp(tb.addr.buf, ntb->addr.buf, tb.addr.len) != 0)) {
312 		syslog(LOG_ERR, "t_bind to wrong address");
313 		(void) t_free((char *)ntb, T_BIND);
314 		netdir_free((void *)addrlist, ND_ADDRLIST);
315 		(void) t_close(fd);
316 		return (-1);
317 	}
318 
319 	/*
320 	 * Call nfs4svc_setport so that the kernel can be
321 	 * informed what port number the daemon is listing
322 	 * for incoming connection requests.
323 	 */
324 
325 	if ((nconf->nc_semantics == NC_TPI_COTS ||
326 	    nconf->nc_semantics == NC_TPI_COTS_ORD) && Mysvc4 != NULL)
327 		(*Mysvc4)(fd, NULL, nconf, NFS4_SETPORT, &ntb->addr);
328 
329 	*addr = &ntb->addr;
330 	netdir_free((void *)addrlist, ND_ADDRLIST);
331 
332 	if (strcmp(nconf->nc_proto, "tcp") == 0) {
333 		/*
334 		 * Disable the Nagle algorithm on TCP connections.
335 		 * Connections accepted from this listener will
336 		 * inherit the listener options.
337 		 */
338 
339 		/* LINTED pointer alignment */
340 		opt = (struct opthdr *)reqbuf;
341 		opt->level = IPPROTO_TCP;
342 		opt->name = TCP_NODELAY;
343 		opt->len = sizeof (int);
344 
345 		/* LINTED pointer alignment */
346 		*(int *)((char *)opt + sizeof (*opt)) = 1;
347 
348 		req.flags = T_NEGOTIATE;
349 		req.opt.len = sizeof (*opt) + opt->len;
350 		req.opt.buf = (char *)opt;
351 		resp.flags = 0;
352 		resp.opt.buf = reqbuf;
353 		resp.opt.maxlen = sizeof (reqbuf);
354 
355 		if (t_optmgmt(fd, &req, &resp) < 0 ||
356 				resp.flags != T_SUCCESS) {
357 			syslog(LOG_ERR,
358 	"couldn't set NODELAY option for proto %s: t_errno = %d, %m",
359 				nconf->nc_proto, t_errno);
360 		}
361 	}
362 
363 	return (fd);
364 }
365 
366 static int
367 reuseaddr(int fd)
368 {
369 	struct t_optmgmt req, resp;
370 	struct opthdr *opt;
371 	char reqbuf[128];
372 	int *ip;
373 
374 	/* LINTED pointer alignment */
375 	opt = (struct opthdr *)reqbuf;
376 	opt->level = SOL_SOCKET;
377 	opt->name = SO_REUSEADDR;
378 	opt->len = sizeof (int);
379 
380 	/* LINTED pointer alignment */
381 	ip = (int *)&reqbuf[sizeof (struct opthdr)];
382 	*ip = 1;
383 
384 	req.flags = T_NEGOTIATE;
385 	req.opt.len = sizeof (struct opthdr) + opt->len;
386 	req.opt.buf = (char *)opt;
387 
388 	resp.flags = 0;
389 	resp.opt.buf = reqbuf;
390 	resp.opt.maxlen = sizeof (reqbuf);
391 
392 	if (t_optmgmt(fd, &req, &resp) < 0 || resp.flags != T_SUCCESS) {
393 		t_error("t_optmgmt");
394 		return (-1);
395 	}
396 	return (0);
397 }
398 
399 void
400 nfslib_log_tli_error(char *tli_name, int fd, struct netconfig *nconf)
401 {
402 	int error;
403 
404 	/*
405 	 * Save the error code across syslog(), just in case syslog()
406 	 * gets its own error and, therefore, overwrites errno.
407 	 */
408 	error = errno;
409 	if (t_errno == TSYSERR) {
410 		syslog(LOG_ERR, "%s(file descriptor %d/transport %s) %m",
411 			tli_name, fd, nconf->nc_proto);
412 	} else {
413 		syslog(LOG_ERR,
414 			"%s(file descriptor %d/transport %s) TLI error %d",
415 			tli_name, fd, nconf->nc_proto, t_errno);
416 	}
417 	errno = error;
418 }
419 
420 /*
421  * Called to set up service over a particular transport.
422  */
423 void
424 do_one(char *provider, NETSELDECL(proto), struct protob *protobp0,
425 	int (*svc)(int, struct netbuf, struct netconfig *))
426 {
427 	register int sock;
428 	struct protob *protobp;
429 	struct netbuf *retaddr;
430 	struct netconfig *retnconf;
431 	struct netbuf addrmask;
432 	int vers;
433 	int err;
434 	int l;
435 
436 	if (provider)
437 		sock = bind_to_provider(provider, protobp0->serv, &retaddr,
438 					&retnconf);
439 	else
440 		sock = bind_to_proto(proto, protobp0->serv, &retaddr,
441 					&retnconf);
442 
443 	if (sock == -1) {
444 		(void) syslog(LOG_ERR,
445 	"Cannot establish %s service over %s: transport setup problem.",
446 			protobp0->serv, provider ? provider : proto);
447 		return;
448 	}
449 
450 	if (set_addrmask(sock, retnconf, &addrmask) < 0) {
451 		(void) syslog(LOG_ERR,
452 		    "Cannot set address mask for %s", retnconf->nc_netid);
453 		return;
454 	}
455 
456 	/*
457 	 * Register all versions of the programs in the protocol block list.
458 	 */
459 	l = strlen(NC_UDP);
460 	for (protobp = protobp0; protobp; protobp = protobp->next) {
461 		for (vers = protobp->versmin; vers <= protobp->versmax;
462 			vers++) {
463 			if ((protobp->program == NFS_PROGRAM ||
464 				protobp->program == NFS_ACL_PROGRAM) &&
465 				vers == NFS_V4 &&
466 				strncasecmp(retnconf->nc_proto, NC_UDP, l) == 0)
467 				continue;
468 
469 			(void) rpcb_unset(protobp->program, vers, retnconf);
470 			(void) rpcb_set(protobp->program, vers, retnconf,
471 					retaddr);
472 		}
473 	}
474 
475 	if (retnconf->nc_semantics == NC_TPI_CLTS) {
476 		/* Don't drop core if supporting module(s) aren't loaded. */
477 		(void) signal(SIGSYS, SIG_IGN);
478 
479 		/*
480 		 * svc() doesn't block, it returns success or failure.
481 		 */
482 
483 		if (svc == NULL && Mysvc4 != NULL)
484 			err = (*Mysvc4)(sock, &addrmask, retnconf,
485 					NFS4_SETPORT|NFS4_KRPC_START, retaddr);
486 		else
487 			err = (*svc)(sock, addrmask, retnconf);
488 
489 		if (err < 0) {
490 			(void) syslog(LOG_ERR,
491 				"Cannot establish %s service over <file desc."
492 				" %d, protocol %s> : %m. Exiting",
493 				protobp0->serv, sock, retnconf->nc_proto);
494 			exit(1);
495 		}
496 	}
497 
498 	/*
499 	 * We successfully set up the server over this transport.
500 	 * Add this descriptor to the one being polled on.
501 	 */
502 	add_to_poll_list(sock, retnconf);
503 }
504 /*
505  * Set up the NFS service over all the available transports.
506  * Returns -1 for failure, 0 for success.
507  */
508 int
509 do_all(struct protob *protobp,
510 	int (*svc)(int, struct netbuf, struct netconfig *))
511 {
512 	struct netconfig *nconf;
513 	NCONF_HANDLE *nc;
514 	int l;
515 
516 	if ((nc = setnetconfig()) == (NCONF_HANDLE *)NULL) {
517 		syslog(LOG_ERR, "setnetconfig failed: %m");
518 		return (-1);
519 	}
520 	l = strlen(NC_UDP);
521 	while (nconf = getnetconfig(nc)) {
522 		if ((nconf->nc_flag & NC_VISIBLE) &&
523 		    strcmp(nconf->nc_protofmly, NC_LOOPBACK) != 0 &&
524 		    OK_TPI_TYPE(nconf) &&
525 		    (protobp->program != NFS4_CALLBACK ||
526 		    strncasecmp(nconf->nc_proto, NC_UDP, l) != 0))
527 			do_one(nconf->nc_device, nconf->nc_proto,
528 				protobp, svc);
529 	}
530 	(void) endnetconfig(nc);
531 	return (0);
532 }
533 
534 /*
535  * poll on the open transport descriptors for events and errors.
536  */
537 void
538 poll_for_action(void)
539 {
540 	int nfds;
541 	int i;
542 
543 	/*
544 	 * Keep polling until all transports have been closed. When this
545 	 * happens, we return.
546 	 */
547 	while ((int)num_fds > 0) {
548 		nfds = poll(poll_array, num_fds, INFTIM);
549 		switch (nfds) {
550 		case 0:
551 			continue;
552 
553 		case -1:
554 			/*
555 			 * Some errors from poll could be
556 			 * due to temporary conditions, and we try to
557 			 * be robust in the face of them. Other
558 			 * errors (should never happen in theory)
559 			 * are fatal (eg. EINVAL, EFAULT).
560 			 */
561 			switch (errno) {
562 			case EINTR:
563 			    continue;
564 
565 			case EAGAIN:
566 			case ENOMEM:
567 				(void) sleep(10);
568 				continue;
569 
570 			default:
571 				(void) syslog(LOG_ERR,
572 						"poll failed: %m. Exiting");
573 				exit(1);
574 			}
575 		default:
576 			break;
577 		}
578 
579 		/*
580 		 * Go through the poll list looking for events.
581 		 */
582 		for (i = 0; i < num_fds && nfds > 0; i++) {
583 			if (poll_array[i].revents) {
584 				nfds--;
585 				/*
586 				 * We have a message, so try to read it.
587 				 * Record the error return in errno,
588 				 * so that syslog(LOG_ERR, "...%m")
589 				 * dumps the corresponding error string.
590 				 */
591 				if (conn_polled[i].nc.nc_semantics ==
592 				    NC_TPI_CLTS) {
593 					errno = do_poll_clts_action(
594 							poll_array[i].fd, i);
595 				} else {
596 					errno = do_poll_cots_action(
597 							poll_array[i].fd, i);
598 				}
599 
600 				if (errno == 0)
601 					continue;
602 				/*
603 				 * Most returned error codes mean that there is
604 				 * fatal condition which we can only deal with
605 				 * by closing the transport.
606 				 */
607 				if (errno != EAGAIN && errno != ENOMEM) {
608 					(void) syslog(LOG_ERR,
609 		"Error (%m) reading descriptor %d/transport %s. Closing it.",
610 						poll_array[i].fd,
611 						conn_polled[i].nc.nc_proto);
612 					(void) t_close(poll_array[i].fd);
613 					remove_from_poll_list(poll_array[i].fd);
614 
615 				} else if (errno == ENOMEM)
616 					(void) sleep(5);
617 			}
618 		}
619 	}
620 
621 	(void) syslog(LOG_ERR,
622 		"All transports have been closed with errors. Exiting.");
623 }
624 
625 /*
626  * Allocate poll/transport array entries for this descriptor.
627  */
628 static void
629 add_to_poll_list(int fd, struct netconfig *nconf)
630 {
631 	static int poll_array_size = 0;
632 
633 	/*
634 	 * If the arrays are full, allocate new ones.
635 	 */
636 	if (num_fds == poll_array_size) {
637 		struct pollfd *tpa;
638 		struct conn_entry *tnp;
639 
640 		if (poll_array_size != 0) {
641 			tpa = poll_array;
642 			tnp = conn_polled;
643 		} else
644 			tpa = (struct pollfd *)0;
645 
646 		poll_array_size += POLL_ARRAY_INC_SIZE;
647 		/*
648 		 * Allocate new arrays.
649 		 */
650 		poll_array = (struct pollfd *)
651 		    malloc(poll_array_size * sizeof (struct pollfd) + 256);
652 		conn_polled = (struct conn_entry *)
653 		    malloc(poll_array_size * sizeof (struct conn_entry) + 256);
654 		if (poll_array == (struct pollfd *)NULL ||
655 		    conn_polled == (struct conn_entry *)NULL) {
656 			syslog(LOG_ERR, "malloc failed for poll array");
657 			exit(1);
658 		}
659 
660 		/*
661 		 * Copy the data of the old ones into new arrays, and
662 		 * free the old ones.
663 		 */
664 		if (tpa) {
665 			(void) memcpy((void *)poll_array, (void *)tpa,
666 				num_fds * sizeof (struct pollfd));
667 			(void) memcpy((void *)conn_polled, (void *)tnp,
668 				num_fds * sizeof (struct conn_entry));
669 			free((void *)tpa);
670 			free((void *)tnp);
671 		}
672 	}
673 
674 	/*
675 	 * Set the descriptor and event list. All possible events are
676 	 * polled for.
677 	 */
678 	poll_array[num_fds].fd = fd;
679 	poll_array[num_fds].events = POLLIN|POLLRDNORM|POLLRDBAND|POLLPRI;
680 
681 	/*
682 	 * Copy the transport data over too.
683 	 */
684 	conn_polled[num_fds].nc = *nconf;
685 	conn_polled[num_fds].closing = 0;
686 
687 	/*
688 	 * Set the descriptor to non-blocking. Avoids a race
689 	 * between data arriving on the stream and then having it
690 	 * flushed before we can read it.
691 	 */
692 	if (fcntl(fd, F_SETFL, O_NONBLOCK) == -1) {
693 		(void) syslog(LOG_ERR,
694 	"fcntl(file desc. %d/transport %s, F_SETFL, O_NONBLOCK): %m. Exiting",
695 			num_fds, nconf->nc_proto);
696 		exit(1);
697 	}
698 
699 	/*
700 	 * Count this descriptor.
701 	 */
702 	++num_fds;
703 }
704 
705 static void
706 remove_from_poll_list(int fd)
707 {
708 	int i;
709 	int num_to_copy;
710 
711 	for (i = 0; i < num_fds; i++) {
712 		if (poll_array[i].fd == fd) {
713 			--num_fds;
714 			num_to_copy = num_fds - i;
715 			(void) memcpy((void *)&poll_array[i],
716 				(void *)&poll_array[i+1],
717 				num_to_copy * sizeof (struct pollfd));
718 			(void) memset((void *)&poll_array[num_fds], 0,
719 				sizeof (struct pollfd));
720 			(void) memcpy((void *)&conn_polled[i],
721 				(void *)&conn_polled[i+1],
722 				num_to_copy * sizeof (struct conn_entry));
723 			(void) memset((void *)&conn_polled[num_fds], 0,
724 				sizeof (struct conn_entry));
725 			return;
726 		}
727 	}
728 	syslog(LOG_ERR, "attempt to remove nonexistent fd from poll list");
729 
730 }
731 
732 /*
733  * Called to read and interpret the event on a connectionless descriptor.
734  * Returns 0 if successful, or a UNIX error code if failure.
735  */
736 static int
737 do_poll_clts_action(int fd, int conn_index)
738 {
739 	int error;
740 	int ret;
741 	int flags;
742 	struct netconfig *nconf = &conn_polled[conn_index].nc;
743 	static struct t_unitdata *unitdata = NULL;
744 	static struct t_uderr *uderr = NULL;
745 	static int oldfd = -1;
746 	struct nd_hostservlist *host = NULL;
747 	struct strbuf ctl[1], data[1];
748 	/*
749 	 * We just need to have some space to consume the
750 	 * message in the event we can't use the TLI interface to do the
751 	 * job.
752 	 *
753 	 * We flush the message using getmsg(). For the control part
754 	 * we allocate enough for any TPI header plus 32 bytes for address
755 	 * and options. For the data part, there is nothing magic about
756 	 * the size of the array, but 256 bytes is probably better than
757 	 * 1 byte, and we don't expect any data portion anyway.
758 	 *
759 	 * If the array sizes are too small, we handle this because getmsg()
760 	 * (called to consume the message) will return MOREDATA|MORECTL.
761 	 * Thus we just call getmsg() until it's read the message.
762 	 */
763 	char ctlbuf[sizeof (union T_primitives) + 32];
764 	char databuf[256];
765 
766 	/*
767 	 * If this is the same descriptor as the last time
768 	 * do_poll_clts_action was called, we can save some
769 	 * de-allocation and allocation.
770 	 */
771 	if (oldfd != fd) {
772 		oldfd = fd;
773 
774 		if (unitdata) {
775 			(void) t_free((char *)unitdata, T_UNITDATA);
776 			unitdata = NULL;
777 		}
778 		if (uderr) {
779 			(void) t_free((char *)uderr, T_UDERROR);
780 			uderr = NULL;
781 		}
782 	}
783 
784 	/*
785 	 * Allocate a unitdata structure for receiving the event.
786 	 */
787 	if (unitdata == NULL) {
788 		/* LINTED pointer alignment */
789 		unitdata = (struct t_unitdata *)t_alloc(fd, T_UNITDATA, T_ALL);
790 		if (unitdata == NULL) {
791 			if (t_errno == TSYSERR) {
792 				/*
793 				 * Save the error code across
794 				 * syslog(), just in case
795 				 * syslog() gets its own error
796 				 * and therefore overwrites errno.
797 				 */
798 				error = errno;
799 				(void) syslog(LOG_ERR,
800 	"t_alloc(file descriptor %d/transport %s, T_UNITDATA) failed: %m",
801 					fd, nconf->nc_proto);
802 				return (error);
803 			}
804 			(void) syslog(LOG_ERR,
805 "t_alloc(file descriptor %d/transport %s, T_UNITDATA) failed TLI error %d",
806 					fd, nconf->nc_proto, t_errno);
807 			goto flush_it;
808 		}
809 	}
810 
811 try_again:
812 	flags = 0;
813 
814 	/*
815 	 * The idea is we wait for T_UNITDATA_IND's. Of course,
816 	 * we don't get any, because rpcmod filters them out.
817 	 * However, we need to call t_rcvudata() to let TLI
818 	 * tell us we have a T_UDERROR_IND.
819 	 *
820 	 * algorithm is:
821 	 * 	t_rcvudata(), expecting TLOOK.
822 	 * 	t_look(), expecting T_UDERR.
823 	 * 	t_rcvuderr(), expecting success (0).
824 	 * 	expand destination address into ASCII,
825 	 *	and dump it.
826 	 */
827 
828 	ret = t_rcvudata(fd, unitdata, &flags);
829 	if (ret == 0 || t_errno == TBUFOVFLW) {
830 		(void) syslog(LOG_WARNING,
831 "t_rcvudata(file descriptor %d/transport %s) got unexpected data, %d bytes",
832 			fd, nconf->nc_proto, unitdata->udata.len);
833 
834 		/*
835 		 * Even though we don't expect any data, in case we do,
836 		 * keep reading until there is no more.
837 		 */
838 		if (flags & T_MORE)
839 			goto try_again;
840 
841 		return (0);
842 	}
843 
844 	switch (t_errno) {
845 	case TNODATA:
846 		return (0);
847 	case TSYSERR:
848 		/*
849 		 * System errors are returned to caller.
850 		 * Save the error code across
851 		 * syslog(), just in case
852 		 * syslog() gets its own error
853 		 * and therefore overwrites errno.
854 		 */
855 		error = errno;
856 		(void) syslog(LOG_ERR,
857 			"t_rcvudata(file descriptor %d/transport %s) %m",
858 			fd, nconf->nc_proto);
859 		return (error);
860 	case TLOOK:
861 		break;
862 	default:
863 		(void) syslog(LOG_ERR,
864 		"t_rcvudata(file descriptor %d/transport %s) TLI error %d",
865 			fd, nconf->nc_proto, t_errno);
866 		goto flush_it;
867 	}
868 
869 	ret = t_look(fd);
870 	switch (ret) {
871 	case 0:
872 		return (0);
873 	case -1:
874 		/*
875 		 * System errors are returned to caller.
876 		 */
877 		if (t_errno == TSYSERR) {
878 			/*
879 			 * Save the error code across
880 			 * syslog(), just in case
881 			 * syslog() gets its own error
882 			 * and therefore overwrites errno.
883 			 */
884 			error = errno;
885 			(void) syslog(LOG_ERR,
886 				"t_look(file descriptor %d/transport %s) %m",
887 				fd, nconf->nc_proto);
888 			return (error);
889 		}
890 		(void) syslog(LOG_ERR,
891 			"t_look(file descriptor %d/transport %s) TLI error %d",
892 			fd, nconf->nc_proto, t_errno);
893 		goto flush_it;
894 	case T_UDERR:
895 		break;
896 	default:
897 		(void) syslog(LOG_WARNING,
898 	"t_look(file descriptor %d/transport %s) returned %d not T_UDERR (%d)",
899 			fd, nconf->nc_proto, ret, T_UDERR);
900 	}
901 
902 	if (uderr == NULL) {
903 		/* LINTED pointer alignment */
904 		uderr = (struct t_uderr *)t_alloc(fd, T_UDERROR, T_ALL);
905 		if (uderr == NULL) {
906 			if (t_errno == TSYSERR) {
907 				/*
908 				 * Save the error code across
909 				 * syslog(), just in case
910 				 * syslog() gets its own error
911 				 * and therefore overwrites errno.
912 				 */
913 				error = errno;
914 				(void) syslog(LOG_ERR,
915 	"t_alloc(file descriptor %d/transport %s, T_UDERROR) failed: %m",
916 					fd, nconf->nc_proto);
917 				return (error);
918 			}
919 			(void) syslog(LOG_ERR,
920 "t_alloc(file descriptor %d/transport %s, T_UDERROR) failed TLI error: %d",
921 				fd, nconf->nc_proto, t_errno);
922 			goto flush_it;
923 		}
924 	}
925 
926 	ret = t_rcvuderr(fd, uderr);
927 	if (ret == 0) {
928 
929 		/*
930 		 * Save the datagram error in errno, so that the
931 		 * %m argument to syslog picks up the error string.
932 		 */
933 		errno = uderr->error;
934 
935 		/*
936 		 * Log the datagram error, then log the host that
937 		 * probably triggerred. Cannot log both in the
938 		 * same transaction because of packet size limitations
939 		 * in /dev/log.
940 		 */
941 		(void) syslog((errno == ECONNREFUSED) ? LOG_DEBUG : LOG_WARNING,
942 "NFS response over <file descriptor %d/transport %s> generated error: %m",
943 			fd, nconf->nc_proto);
944 
945 		/*
946 		 * Try to map the client's address back to a
947 		 * name.
948 		 */
949 		ret = netdir_getbyaddr(nconf, &host, &uderr->addr);
950 		if (ret != -1 && host && host->h_cnt > 0 &&
951 		    host->h_hostservs) {
952 		(void) syslog((errno == ECONNREFUSED) ? LOG_DEBUG : LOG_WARNING,
953 "Bad NFS response was sent to client with host name: %s; service port: %s",
954 				host->h_hostservs->h_host,
955 				host->h_hostservs->h_serv);
956 		} else {
957 			int i, j;
958 			char *buf;
959 			char *hex = "0123456789abcdef";
960 
961 			/*
962 			 * Mapping failed, print the whole thing
963 			 * in ASCII hex.
964 			 */
965 			buf = (char *)malloc(uderr->addr.len * 2 + 1);
966 			for (i = 0, j = 0; i < uderr->addr.len; i++, j += 2) {
967 				buf[j] = hex[((uderr->addr.buf[i]) >> 4) & 0xf];
968 				buf[j+1] = hex[uderr->addr.buf[i] & 0xf];
969 			}
970 			buf[j] = '\0';
971 		(void) syslog((errno == ECONNREFUSED) ? LOG_DEBUG : LOG_WARNING,
972 	"Bad NFS response was sent to client with transport address: 0x%s",
973 				buf);
974 			free((void *)buf);
975 		}
976 
977 		if (ret == 0 && host != NULL)
978 			netdir_free((void *)host, ND_HOSTSERVLIST);
979 		return (0);
980 	}
981 
982 	switch (t_errno) {
983 	case TNOUDERR:
984 		goto flush_it;
985 	case TSYSERR:
986 		/*
987 		 * System errors are returned to caller.
988 		 * Save the error code across
989 		 * syslog(), just in case
990 		 * syslog() gets its own error
991 		 * and therefore overwrites errno.
992 		 */
993 		error = errno;
994 		(void) syslog(LOG_ERR,
995 			"t_rcvuderr(file descriptor %d/transport %s) %m",
996 			fd, nconf->nc_proto);
997 		return (error);
998 	default:
999 		(void) syslog(LOG_ERR,
1000 		"t_rcvuderr(file descriptor %d/transport %s) TLI error %d",
1001 			fd, nconf->nc_proto, t_errno);
1002 		goto flush_it;
1003 	}
1004 
1005 flush_it:
1006 	/*
1007 	 * If we get here, then we could not cope with whatever message
1008 	 * we attempted to read, so flush it. If we did read a message,
1009 	 * and one isn't present, that is all right, because fd is in
1010 	 * nonblocking mode.
1011 	 */
1012 	(void) syslog(LOG_ERR,
1013 	"Flushing one input message from <file descriptor %d/transport %s>",
1014 		fd, nconf->nc_proto);
1015 
1016 	/*
1017 	 * Read and discard the message. Do this this until there is
1018 	 * no more control/data in the message or until we get an error.
1019 	 */
1020 	do {
1021 		ctl->maxlen = sizeof (ctlbuf);
1022 		ctl->buf = ctlbuf;
1023 		data->maxlen = sizeof (databuf);
1024 		data->buf = databuf;
1025 		flags = 0;
1026 		ret = getmsg(fd, ctl, data, &flags);
1027 		if (ret == -1)
1028 			return (errno);
1029 	} while (ret != 0);
1030 
1031 	return (0);
1032 }
1033 
1034 static void
1035 conn_close_oldest(void)
1036 {
1037 	int fd;
1038 	int i1;
1039 
1040 	/*
1041 	 * Find the oldest connection that is not already in the
1042 	 * process of shutting down.
1043 	 */
1044 	for (i1 = end_listen_fds; /* no conditional expression */; i1++) {
1045 		if (i1 >= num_fds)
1046 			return;
1047 		if (conn_polled[i1].closing == 0)
1048 			break;
1049 	}
1050 #ifdef DEBUG
1051 	printf("too many connections (%d), releasing oldest (%d)\n",
1052 		num_conns, poll_array[i1].fd);
1053 #else
1054 	syslog(LOG_WARNING, "too many connections (%d), releasing oldest (%d)",
1055 		num_conns, poll_array[i1].fd);
1056 #endif
1057 	fd = poll_array[i1].fd;
1058 	if (conn_polled[i1].nc.nc_semantics == NC_TPI_COTS) {
1059 		/*
1060 		 * For politeness, send a T_DISCON_REQ to the transport
1061 		 * provider.  We close the stream anyway.
1062 		 */
1063 		(void) t_snddis(fd, (struct t_call *)0);
1064 		num_conns--;
1065 		remove_from_poll_list(fd);
1066 		(void) t_close(fd);
1067 	} else {
1068 		/*
1069 		 * For orderly release, we do not close the stream
1070 		 * until the T_ORDREL_IND arrives to complete
1071 		 * the handshake.
1072 		 */
1073 		if (t_sndrel(fd) == 0)
1074 			conn_polled[i1].closing = 1;
1075 	}
1076 }
1077 
1078 static boolean_t
1079 conn_get(int fd, struct netconfig *nconf, struct conn_ind **connp)
1080 {
1081 	struct conn_ind	*conn;
1082 	struct conn_ind	*next_conn;
1083 
1084 	conn = (struct conn_ind *)malloc(sizeof (*conn));
1085 	if (conn == NULL) {
1086 		syslog(LOG_ERR, "malloc for listen indication failed");
1087 		return (FALSE);
1088 	}
1089 
1090 	/* LINTED pointer alignment */
1091 	conn->conn_call = (struct t_call *)t_alloc(fd, T_CALL, T_ALL);
1092 	if (conn->conn_call == NULL) {
1093 		free((char *)conn);
1094 		nfslib_log_tli_error("t_alloc", fd, nconf);
1095 		return (FALSE);
1096 	}
1097 
1098 	if (t_listen(fd, conn->conn_call) == -1) {
1099 		nfslib_log_tli_error("t_listen", fd, nconf);
1100 		(void) t_free((char *)conn->conn_call, T_CALL);
1101 		free((char *)conn);
1102 		return (FALSE);
1103 	}
1104 
1105 	if (conn->conn_call->udata.len > 0) {
1106 		syslog(LOG_WARNING,
1107 	"rejecting inbound connection(%s) with %d bytes of connect data",
1108 			nconf->nc_proto, conn->conn_call->udata.len);
1109 
1110 		conn->conn_call->udata.len = 0;
1111 		(void) t_snddis(fd, conn->conn_call);
1112 		(void) t_free((char *)conn->conn_call, T_CALL);
1113 		free((char *)conn);
1114 		return (FALSE);
1115 	}
1116 
1117 	if ((next_conn = *connp) != NULL) {
1118 		next_conn->conn_prev->conn_next = conn;
1119 		conn->conn_next = next_conn;
1120 		conn->conn_prev = next_conn->conn_prev;
1121 		next_conn->conn_prev = conn;
1122 	} else {
1123 		conn->conn_next = conn;
1124 		conn->conn_prev = conn;
1125 		*connp = conn;
1126 	}
1127 	return (TRUE);
1128 }
1129 
1130 static int
1131 discon_get(int fd, struct netconfig *nconf, struct conn_ind **connp)
1132 {
1133 	struct conn_ind	*conn;
1134 	struct t_discon	discon;
1135 
1136 	discon.udata.buf = (char *)0;
1137 	discon.udata.maxlen = 0;
1138 	if (t_rcvdis(fd, &discon) == -1) {
1139 		nfslib_log_tli_error("t_rcvdis", fd, nconf);
1140 		return (-1);
1141 	}
1142 
1143 	conn = *connp;
1144 	if (conn == NULL)
1145 		return (0);
1146 
1147 	do {
1148 		if (conn->conn_call->sequence == discon.sequence) {
1149 			if (conn->conn_next == conn)
1150 				*connp = (struct conn_ind *)0;
1151 			else {
1152 				if (conn == *connp) {
1153 					*connp = conn->conn_next;
1154 				}
1155 				conn->conn_next->conn_prev = conn->conn_prev;
1156 				conn->conn_prev->conn_next = conn->conn_next;
1157 			}
1158 			free((char *)conn);
1159 			break;
1160 		}
1161 		conn = conn->conn_next;
1162 	} while (conn != *connp);
1163 
1164 	return (0);
1165 }
1166 
1167 static void
1168 cots_listen_event(int fd, int conn_index)
1169 {
1170 	struct t_call *call;
1171 	struct conn_ind	*conn;
1172 	struct conn_ind	*conn_head;
1173 	int event;
1174 	struct netconfig *nconf = &conn_polled[conn_index].nc;
1175 	int new_fd;
1176 	struct netbuf addrmask;
1177 	int ret = 0;
1178 	char *clnt;
1179 	char *clnt_uaddr = NULL;
1180 	struct nd_hostservlist *clnt_serv = NULL;
1181 
1182 	conn_head = (struct conn_ind *)0;
1183 	(void) conn_get(fd, nconf, &conn_head);
1184 
1185 	while ((conn = conn_head) != NULL) {
1186 		conn_head = conn->conn_next;
1187 		if (conn_head == conn)
1188 			conn_head = (struct conn_ind *)0;
1189 		else {
1190 			conn_head->conn_prev = conn->conn_prev;
1191 			conn->conn_prev->conn_next = conn_head;
1192 		}
1193 		call = conn->conn_call;
1194 		free((char *)conn);
1195 
1196 		/*
1197 		 * If we have already accepted the maximum number of
1198 		 * connections allowed on the command line, then drop
1199 		 * the oldest connection (for any protocol) before
1200 		 * accepting the new connection.  Unless explicitly
1201 		 * set on the command line, max_conns_allowed is -1.
1202 		 */
1203 		if (max_conns_allowed != -1 && num_conns >= max_conns_allowed)
1204 			conn_close_oldest();
1205 
1206 		/*
1207 		 * Create a new transport endpoint for the same proto as
1208 		 * the listener.
1209 		 */
1210 		new_fd = nfslib_transport_open(nconf);
1211 		if (new_fd == -1) {
1212 			call->udata.len = 0;
1213 			(void) t_snddis(fd, call);
1214 			(void) t_free((char *)call, T_CALL);
1215 			syslog(LOG_ERR, "Cannot establish transport over %s",
1216 				nconf->nc_device);
1217 			continue;
1218 		}
1219 
1220 		/* Bind to a generic address/port for the accepting stream. */
1221 		if (t_bind(new_fd, (struct t_bind *)NULL,
1222 		    (struct t_bind *)NULL) == -1) {
1223 			nfslib_log_tli_error("t_bind", new_fd, nconf);
1224 			call->udata.len = 0;
1225 			(void) t_snddis(fd, call);
1226 			(void) t_free((char *)call, T_CALL);
1227 			(void) t_close(new_fd);
1228 			continue;
1229 		}
1230 
1231 		while (t_accept(fd, new_fd, call) == -1) {
1232 			if (t_errno != TLOOK) {
1233 #ifdef DEBUG
1234 				nfslib_log_tli_error("t_accept", fd, nconf);
1235 #endif
1236 				call->udata.len = 0;
1237 				(void) t_snddis(fd, call);
1238 				(void) t_free((char *)call, T_CALL);
1239 				(void) t_close(new_fd);
1240 				goto do_next_conn;
1241 			}
1242 			while (event = t_look(fd)) {
1243 				switch (event) {
1244 				case T_LISTEN:
1245 #ifdef DEBUG
1246 					printf(
1247 "cots_listen_event(%s): T_LISTEN during accept processing\n", nconf->nc_proto);
1248 #endif
1249 					(void) conn_get(fd, nconf, &conn_head);
1250 					continue;
1251 				case T_DISCONNECT:
1252 #ifdef DEBUG
1253 					printf(
1254 	"cots_listen_event(%s): T_DISCONNECT during accept processing\n",
1255 						nconf->nc_proto);
1256 #endif
1257 					(void) discon_get(fd, nconf,
1258 								&conn_head);
1259 					continue;
1260 				default:
1261 					syslog(LOG_ERR,
1262 			"unexpected event 0x%x during accept processing (%s)",
1263 						event, nconf->nc_proto);
1264 					call->udata.len = 0;
1265 					(void) t_snddis(fd, call);
1266 					(void) t_free((char *)call, T_CALL);
1267 					(void) t_close(new_fd);
1268 					goto do_next_conn;
1269 				}
1270 			}
1271 		}
1272 
1273 		if (set_addrmask(new_fd, nconf, &addrmask) < 0) {
1274 			(void) syslog(LOG_ERR,
1275 			    "Cannot set address mask for %s",
1276 				nconf->nc_netid);
1277 			return;
1278 		}
1279 
1280 		/* Tell KRPC about the new stream. */
1281 		if (Mysvc4 != NULL)
1282 			ret = (*Mysvc4)(new_fd, &addrmask, nconf,
1283 				NFS4_KRPC_START, &call->addr);
1284 		else
1285 			ret = (*Mysvc)(new_fd, addrmask, nconf);
1286 
1287 		if (ret < 0) {
1288 			if (errno != ENOTCONN) {
1289 				syslog(LOG_ERR,
1290 				    "unable to register new connection: %m");
1291 			} else {
1292 				/*
1293 				 * This is the only error that could be
1294 				 * caused by the client, so who was it?
1295 				 */
1296 				if (netdir_getbyaddr(nconf, &clnt_serv,
1297 				    &(call->addr)) == ND_OK &&
1298 				    clnt_serv->h_cnt > 0)
1299 					clnt = clnt_serv->h_hostservs->h_host;
1300 				else
1301 					clnt = clnt_uaddr = taddr2uaddr(nconf,
1302 					    &(call->addr));
1303 				/*
1304 				 * If we don't know who the client was,
1305 				 * remain silent.
1306 				 */
1307 				if (clnt)
1308 					syslog(LOG_ERR,
1309 "unable to register new connection: client %s has dropped connection", clnt);
1310 				if (clnt_serv)
1311 					netdir_free(clnt_serv, ND_HOSTSERVLIST);
1312 				if (clnt_uaddr)
1313 					free(clnt_uaddr);
1314 			}
1315 			free(addrmask.buf);
1316 			(void) t_snddis(new_fd, (struct t_call *)0);
1317 			(void) t_free((char *)call, T_CALL);
1318 			(void) t_close(new_fd);
1319 			goto do_next_conn;
1320 		}
1321 
1322 		free(addrmask.buf);
1323 		(void) t_free((char *)call, T_CALL);
1324 
1325 		/*
1326 		 * Poll on the new descriptor so that we get disconnect
1327 		 * and orderly release indications.
1328 		 */
1329 		num_conns++;
1330 		add_to_poll_list(new_fd, nconf);
1331 
1332 		/* Reset nconf in case it has been moved. */
1333 		nconf = &conn_polled[conn_index].nc;
1334 do_next_conn:;
1335 	}
1336 }
1337 
1338 static int
1339 do_poll_cots_action(int fd, int conn_index)
1340 {
1341 	char buf[256];
1342 	int event;
1343 	int i1;
1344 	int flags;
1345 	struct conn_entry *connent = &conn_polled[conn_index];
1346 	struct netconfig *nconf = &(connent->nc);
1347 	const char *errorstr;
1348 
1349 	while (event = t_look(fd)) {
1350 		switch (event) {
1351 		case T_LISTEN:
1352 #ifdef DEBUG
1353 printf("do_poll_cots_action(%s,%d): T_LISTEN event\n", nconf->nc_proto, fd);
1354 #endif
1355 			cots_listen_event(fd, conn_index);
1356 			break;
1357 
1358 		case T_DATA:
1359 #ifdef DEBUG
1360 printf("do_poll_cots_action(%d,%s): T_DATA event\n", fd, nconf->nc_proto);
1361 #endif
1362 			/*
1363 			 * Receive a private notification from CONS rpcmod.
1364 			 */
1365 			i1 = t_rcv(fd, buf, sizeof (buf), &flags);
1366 			if (i1 == -1) {
1367 				syslog(LOG_ERR, "t_rcv failed");
1368 				break;
1369 			}
1370 			if (i1 < sizeof (int))
1371 				break;
1372 			i1 = BE32_TO_U32(buf);
1373 			if (i1 == 1 || i1 == 2) {
1374 				/*
1375 				 * This connection has been idle for too long,
1376 				 * so release it as politely as we can.  If we
1377 				 * have already initiated an orderly release
1378 				 * and we get notified that the stream is
1379 				 * still idle, pull the plug.  This prevents
1380 				 * hung connections from continuing to consume
1381 				 * resources.
1382 				 */
1383 #ifdef DEBUG
1384 printf("do_poll_cots_action(%s,%d): ", nconf->nc_proto, fd);
1385 printf("initiating orderly release of idle connection\n");
1386 #endif
1387 				if (nconf->nc_semantics == NC_TPI_COTS ||
1388 					connent->closing != 0) {
1389 					(void) t_snddis(fd, (struct t_call *)0);
1390 					goto fdclose;
1391 				}
1392 				/*
1393 				 * For NC_TPI_COTS_ORD, the stream is closed
1394 				 * and removed from the poll list when the
1395 				 * T_ORDREL is received from the provider.  We
1396 				 * don't wait for it here because it may take
1397 				 * a while for the transport to shut down.
1398 				 */
1399 				if (t_sndrel(fd) == -1) {
1400 					syslog(LOG_ERR,
1401 					"unable to send orderly release %m");
1402 				}
1403 				connent->closing = 1;
1404 			} else
1405 				syslog(LOG_ERR,
1406 				"unexpected event from CONS rpcmod %d", i1);
1407 			break;
1408 
1409 		case T_ORDREL:
1410 #ifdef DEBUG
1411 printf("do_poll_cots_action(%s,%d): T_ORDREL event\n", nconf->nc_proto, fd);
1412 #endif
1413 			/* Perform an orderly release. */
1414 			if (t_rcvrel(fd) == 0) {
1415 				/* T_ORDREL on listen fd's should be ignored */
1416 				if (!is_listen_fd_index(conn_index)) {
1417 					(void) t_sndrel(fd);
1418 					goto fdclose;
1419 				}
1420 				break;
1421 
1422 			} else if (t_errno == TLOOK) {
1423 				break;
1424 			} else {
1425 				nfslib_log_tli_error("t_rcvrel", fd, nconf);
1426 
1427 				/*
1428 				 * check to make sure we do not close
1429 				 * listen fd
1430 				 */
1431 				if (is_listen_fd_index(conn_index))
1432 					break;
1433 				else
1434 					goto fdclose;
1435 			}
1436 
1437 		case T_DISCONNECT:
1438 #ifdef DEBUG
1439 printf("do_poll_cots_action(%s,%d): T_DISCONNECT event\n", nconf->nc_proto, fd);
1440 #endif
1441 			if (t_rcvdis(fd, (struct t_discon *)NULL) == -1)
1442 				nfslib_log_tli_error("t_rcvdis", fd, nconf);
1443 
1444 			/*
1445 			 * T_DISCONNECT on listen fd's should be ignored.
1446 			 */
1447 			if (is_listen_fd_index(conn_index))
1448 				break;
1449 			else
1450 				goto fdclose;
1451 
1452 		case T_ERROR:
1453 		default:
1454 			if (event == T_ERROR || t_errno == TSYSERR) {
1455 			    if ((errorstr = strerror(errno)) == NULL) {
1456 				(void) sprintf(buf, "Unknown error num %d",
1457 									errno);
1458 				errorstr = (const char *) buf;
1459 			    }
1460 			} else if (event == -1)
1461 				errorstr = t_strerror(t_errno);
1462 			else
1463 				errorstr = "";
1464 			syslog(LOG_ERR,
1465 			    "unexpected TLI event (0x%x) on "
1466 			    "connection-oriented transport(%s,%d):%s",
1467 			    event, nconf->nc_proto, fd, errorstr);
1468 fdclose:
1469 			num_conns--;
1470 			remove_from_poll_list(fd);
1471 			(void) t_close(fd);
1472 			return (0);
1473 		}
1474 	}
1475 
1476 	return (0);
1477 }
1478 
1479 static char *
1480 serv_name_to_port_name(char *name)
1481 {
1482 	/*
1483 	 * Map service names (used primarily in logging) to
1484 	 * RPC port names (used by netdir_*() routines).
1485 	 */
1486 	if (strcmp(name, "NFS") == 0) {
1487 		return ("nfs");
1488 	} else if (strcmp(name, "NLM") == 0) {
1489 		return ("lockd");
1490 	} else if (strcmp(name, "NFS4_CALLBACK") == 0) {
1491 		return ("nfs4_callback");
1492 	}
1493 
1494 	return ("unrecognized");
1495 }
1496 
1497 static int
1498 bind_to_provider(char *provider, char *serv, struct netbuf **addr,
1499 		struct netconfig **retnconf)
1500 {
1501 	struct netconfig *nconf;
1502 	NCONF_HANDLE *nc;
1503 	struct nd_hostserv hs;
1504 
1505 	hs.h_host = HOST_SELF;
1506 	hs.h_serv = serv_name_to_port_name(serv);
1507 
1508 	if ((nc = setnetconfig()) == (NCONF_HANDLE *)NULL) {
1509 		syslog(LOG_ERR, "setnetconfig failed: %m");
1510 		return (-1);
1511 	}
1512 	while (nconf = getnetconfig(nc)) {
1513 		if (OK_TPI_TYPE(nconf) &&
1514 		    strcmp(nconf->nc_device, provider) == 0) {
1515 			*retnconf = nconf;
1516 			return (nfslib_bindit(nconf, addr, &hs,
1517 					listen_backlog));
1518 		}
1519 	}
1520 	(void) endnetconfig(nc);
1521 
1522 	syslog(LOG_ERR, "couldn't find netconfig entry for provider %s",
1523 	    provider);
1524 	return (-1);
1525 }
1526 
1527 static int
1528 bind_to_proto(NETSELDECL(proto), char *serv, struct netbuf **addr,
1529 		struct netconfig **retnconf)
1530 {
1531 	struct netconfig *nconf;
1532 	NCONF_HANDLE *nc = NULL;
1533 	struct nd_hostserv hs;
1534 
1535 	hs.h_host = HOST_SELF;
1536 	hs.h_serv = serv_name_to_port_name(serv);
1537 
1538 	if ((nc = setnetconfig()) == (NCONF_HANDLE *)NULL) {
1539 		syslog(LOG_ERR, "setnetconfig failed: %m");
1540 		return (-1);
1541 	}
1542 	while (nconf = getnetconfig(nc)) {
1543 		if (OK_TPI_TYPE(nconf) && NETSELEQ(nconf->nc_proto, proto)) {
1544 			*retnconf = nconf;
1545 			return (nfslib_bindit(nconf, addr, &hs,
1546 					listen_backlog));
1547 		}
1548 	}
1549 	(void) endnetconfig(nc);
1550 
1551 	syslog(LOG_ERR, "couldn't find netconfig entry for protocol %s",
1552 	    proto);
1553 	return (-1);
1554 }
1555 
1556 #include <netinet/in.h>
1557 
1558 /*
1559  * Create an address mask appropriate for the transport.
1560  * The mask is used to obtain the host-specific part of
1561  * a network address when comparing addresses.
1562  * For an internet address the host-specific part is just
1563  * the 32 bit IP address and this part of the mask is set
1564  * to all-ones. The port number part of the mask is zeroes.
1565  */
1566 static int
1567 set_addrmask(fd, nconf, mask)
1568 	struct netconfig *nconf;
1569 	struct netbuf *mask;
1570 {
1571 	struct t_info info;
1572 
1573 	/*
1574 	 * Find the size of the address we need to mask.
1575 	 */
1576 	if (t_getinfo(fd, &info) < 0) {
1577 		t_error("t_getinfo");
1578 		return (-1);
1579 	}
1580 	mask->len = mask->maxlen = info.addr;
1581 	if (info.addr <= 0) {
1582 		syslog(LOG_ERR, "set_addrmask: address size: %ld",
1583 			info.addr);
1584 		return (-1);
1585 	}
1586 
1587 	mask->buf = (char *)malloc(mask->len);
1588 	if (mask->buf == NULL) {
1589 		syslog(LOG_ERR, "set_addrmask: no memory");
1590 		return (-1);
1591 	}
1592 	(void) memset(mask->buf, 0, mask->len);	/* reset all mask bits */
1593 
1594 	if (strcmp(nconf->nc_protofmly, NC_INET) == 0) {
1595 		/*
1596 		 * Set the mask so that the port is ignored.
1597 		 */
1598 		/* LINTED pointer alignment */
1599 		((struct sockaddr_in *)mask->buf)->sin_addr.s_addr =
1600 								(ulong_t)~0;
1601 		/* LINTED pointer alignment */
1602 		((struct sockaddr_in *)mask->buf)->sin_family =
1603 								(ushort_t)~0;
1604 	} else if (strcmp(nconf->nc_protofmly, NC_INET6) == 0) {
1605 		/* LINTED pointer alignment */
1606 		(void) memset(&((struct sockaddr_in6 *)mask->buf)->sin6_addr,
1607 			(uchar_t)~0, sizeof (struct in6_addr));
1608 		/* LINTED pointer alignment */
1609 		((struct sockaddr_in6 *)mask->buf)->sin6_family =
1610 								(ushort_t)~0;
1611 	} else {
1612 
1613 		/*
1614 		 * Set all mask bits.
1615 		 */
1616 		(void) memset(mask->buf, 0xFF, mask->len);
1617 	}
1618 	return (0);
1619 }
1620 
1621 /*
1622  * For listen fd's index is always less than end_listen_fds.
1623  * end_listen_fds is defined externally in the daemon that uses this library.
1624  * It's value is equal to the number of open file descriptors after the
1625  * last listen end point was opened but before any connection was accepted.
1626  */
1627 static int
1628 is_listen_fd_index(int index)
1629 {
1630 	return (index < end_listen_fds);
1631 }
1632