xref: /illumos-gate/usr/src/cmd/fs.d/nfs/lib/nfs_tbind.c (revision 92a0208178405fef708b0283ffcaa02fbc3468ff)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 /*
27  * nfs_tbind.c, common part for nfsd and lockd.
28  */
29 
30 #pragma ident	"%Z%%M%	%I%	%E% SMI"
31 
32 #define	PORTMAP
33 
34 #include <tiuser.h>
35 #include <fcntl.h>
36 #include <netconfig.h>
37 #include <stropts.h>
38 #include <errno.h>
39 #include <syslog.h>
40 #include <rpc/rpc.h>
41 #include <rpc/pmap_prot.h>
42 #include <sys/time.h>
43 #include <sys/resource.h>
44 #include <signal.h>
45 #include <netdir.h>
46 #include <unistd.h>
47 #include <string.h>
48 #include <netinet/tcp.h>
49 #include <malloc.h>
50 #include <stdlib.h>
51 #include "nfs_tbind.h"
52 #include <nfs/nfs.h>
53 #include <nfs/nfs_acl.h>
54 #include <nfs/nfssys.h>
55 #include <nfs/nfs4.h>
56 #include <zone.h>
57 #include <sys/socket.h>
58 #include <tsol/label.h>
59 
60 /*
61  * Determine valid semantics for most applications.
62  */
63 #define	OK_TPI_TYPE(_nconf) \
64 	(_nconf->nc_semantics == NC_TPI_CLTS || \
65 	_nconf->nc_semantics == NC_TPI_COTS || \
66 	_nconf->nc_semantics == NC_TPI_COTS_ORD)
67 
68 #define	BE32_TO_U32(a) \
69 	((((ulong_t)((uchar_t *)a)[0] & 0xFF) << (ulong_t)24) | \
70 	(((ulong_t)((uchar_t *)a)[1] & 0xFF) << (ulong_t)16) | \
71 	(((ulong_t)((uchar_t *)a)[2] & 0xFF) << (ulong_t)8)  | \
72 	((ulong_t)((uchar_t *)a)[3] & 0xFF))
73 
74 /*
75  * Number of elements to add to the poll array on each allocation.
76  */
77 #define	POLL_ARRAY_INC_SIZE	64
78 
79 /*
80  * Number of file descriptors by which the process soft limit may be
81  * increased on each call to nofile_increase(0).
82  */
83 #define	NOFILE_INC_SIZE	64
84 
85 struct conn_ind {
86 	struct conn_ind *conn_next;
87 	struct conn_ind *conn_prev;
88 	struct t_call   *conn_call;
89 };
90 
91 struct conn_entry {
92 	bool_t			closing;
93 	struct netconfig	nc;
94 };
95 
96 /*
97  * this file contains transport routines common to nfsd and lockd
98  */
99 static	int	nofile_increase(int);
100 static	int	reuseaddr(int);
101 static	int	recvucred(int);
102 static  int	anonmlp(int);
103 static	void	add_to_poll_list(int, struct netconfig *);
104 static	char	*serv_name_to_port_name(char *);
105 static	int	bind_to_proto(char *, char *, struct netbuf **,
106 				struct netconfig **);
107 static	int	bind_to_provider(char *, char *, struct netbuf **,
108 					struct netconfig **);
109 static	void	conn_close_oldest(void);
110 static	boolean_t conn_get(int, struct netconfig *, struct conn_ind **);
111 static	void	cots_listen_event(int, int);
112 static	int	discon_get(int, struct netconfig *, struct conn_ind **);
113 static	int	do_poll_clts_action(int, int);
114 static	int	do_poll_cots_action(int, int);
115 static	void	remove_from_poll_list(int);
116 static	int	set_addrmask(int, struct netconfig *, struct netbuf *);
117 static	int	is_listen_fd_index(int);
118 
119 static	struct pollfd *poll_array;
120 static	struct conn_entry *conn_polled;
121 static	int	num_conns;		/* Current number of connections */
122 int		(*Mysvc4)(int, struct netbuf *, struct netconfig *, int,
123 		struct netbuf *);
124 
125 extern bool_t __pmap_set(const rpcprog_t program, const rpcvers_t version,
126     const struct netconfig *nconf, const struct netbuf *address);
127 
128 /*
129  * Called to create and prepare a transport descriptor for in-kernel
130  * RPC service.
131  * Returns -1 on failure and a valid descriptor on success.
132  */
133 int
134 nfslib_transport_open(struct netconfig *nconf)
135 {
136 	int fd;
137 	struct strioctl	strioc;
138 
139 	if ((nconf == (struct netconfig *)NULL) ||
140 	    (nconf->nc_device == (char *)NULL)) {
141 		syslog(LOG_ERR, "no netconfig device");
142 		return (-1);
143 	}
144 
145 	/*
146 	 * Open the transport device.
147 	 */
148 	fd = t_open(nconf->nc_device, O_RDWR, (struct t_info *)NULL);
149 	if (fd == -1) {
150 		if (t_errno == TSYSERR && errno == EMFILE &&
151 		    (nofile_increase(0) == 0)) {
152 			/* Try again with a higher NOFILE limit. */
153 			fd = t_open(nconf->nc_device, O_RDWR,
154 			    (struct t_info *)NULL);
155 		}
156 		if (fd == -1) {
157 			syslog(LOG_ERR, "t_open %s failed:  t_errno %d, %m",
158 			    nconf->nc_device, t_errno);
159 			return (-1);
160 		}
161 	}
162 
163 	/*
164 	 * Pop timod because the RPC module must be as close as possible
165 	 * to the transport.
166 	 */
167 	if (ioctl(fd, I_POP, 0) < 0) {
168 		syslog(LOG_ERR, "I_POP of timod failed: %m");
169 		(void) t_close(fd);
170 		return (-1);
171 	}
172 
173 	/*
174 	 * Common code for CLTS and COTS transports
175 	 */
176 	if (ioctl(fd, I_PUSH, "rpcmod") < 0) {
177 		syslog(LOG_ERR, "I_PUSH of rpcmod failed: %m");
178 		(void) t_close(fd);
179 		return (-1);
180 	}
181 
182 	strioc.ic_cmd = RPC_SERVER;
183 	strioc.ic_dp = (char *)0;
184 	strioc.ic_len = 0;
185 	strioc.ic_timout = -1;
186 
187 	/* Tell rpcmod to act like a server stream. */
188 	if (ioctl(fd, I_STR, &strioc) < 0) {
189 		syslog(LOG_ERR, "rpcmod set-up ioctl failed: %m");
190 		(void) t_close(fd);
191 		return (-1);
192 	}
193 
194 	/*
195 	 * Re-push timod so that we will still be doing TLI
196 	 * operations on the descriptor.
197 	 */
198 	if (ioctl(fd, I_PUSH, "timod") < 0) {
199 		syslog(LOG_ERR, "I_PUSH of timod failed: %m");
200 		(void) t_close(fd);
201 		return (-1);
202 	}
203 
204 	/*
205 	 * Enable options of returning the ip's for udp.
206 	 */
207 	if (strcmp(nconf->nc_netid, "udp6") == 0)
208 		__rpc_tli_set_options(fd, IPPROTO_IPV6, IPV6_RECVPKTINFO, 1);
209 	else if (strcmp(nconf->nc_netid, "udp") == 0)
210 		__rpc_tli_set_options(fd, IPPROTO_IP, IP_RECVDSTADDR, 1);
211 
212 	return (fd);
213 }
214 
215 static int
216 nofile_increase(int limit)
217 {
218 	struct rlimit rl;
219 
220 	if (getrlimit(RLIMIT_NOFILE, &rl) == -1) {
221 		syslog(LOG_ERR, "getrlimit of NOFILE failed: %m");
222 		return (-1);
223 	}
224 
225 	if (limit > 0)
226 		rl.rlim_cur = limit;
227 	else
228 		rl.rlim_cur += NOFILE_INC_SIZE;
229 
230 	if (rl.rlim_cur > rl.rlim_max &&
231 	    rl.rlim_max != RLIM_INFINITY)
232 		rl.rlim_max = rl.rlim_cur;
233 
234 	if (setrlimit(RLIMIT_NOFILE, &rl) == -1) {
235 		syslog(LOG_ERR, "setrlimit of NOFILE to %d failed: %m",
236 		    rl.rlim_cur);
237 		return (-1);
238 	}
239 
240 	return (0);
241 }
242 
243 int
244 nfslib_bindit(struct netconfig *nconf, struct netbuf **addr,
245 	struct nd_hostserv *hs, int backlog)
246 {
247 	int fd;
248 	struct t_bind  *ntb;
249 	struct t_bind tb;
250 	struct nd_addrlist *addrlist;
251 	struct t_optmgmt req, resp;
252 	struct opthdr *opt;
253 	char reqbuf[128];
254 	bool_t use_any = FALSE;
255 	bool_t gzone = TRUE;
256 
257 	if ((fd = nfslib_transport_open(nconf)) == -1) {
258 		syslog(LOG_ERR, "cannot establish transport service over %s",
259 		    nconf->nc_device);
260 		return (-1);
261 	}
262 
263 	addrlist = (struct nd_addrlist *)NULL;
264 
265 	/* nfs4_callback service does not used a fieed port number */
266 
267 	if (strcmp(hs->h_serv, "nfs4_callback") == 0) {
268 		tb.addr.maxlen = 0;
269 		tb.addr.len = 0;
270 		tb.addr.buf = 0;
271 		use_any = TRUE;
272 		gzone = (getzoneid() == GLOBAL_ZONEID);
273 	} else if (netdir_getbyname(nconf, hs, &addrlist) != 0) {
274 
275 		syslog(LOG_ERR,
276 		"Cannot get address for transport %s host %s service %s",
277 		    nconf->nc_netid, hs->h_host, hs->h_serv);
278 		(void) t_close(fd);
279 		return (-1);
280 	}
281 
282 	if (strcmp(nconf->nc_proto, "tcp") == 0) {
283 		/*
284 		 * If we're running over TCP, then set the
285 		 * SO_REUSEADDR option so that we can bind
286 		 * to our preferred address even if previously
287 		 * left connections exist in FIN_WAIT states.
288 		 * This is somewhat bogus, but otherwise you have
289 		 * to wait 2 minutes to restart after killing it.
290 		 */
291 		if (reuseaddr(fd) == -1) {
292 			syslog(LOG_WARNING,
293 			"couldn't set SO_REUSEADDR option on transport");
294 		}
295 	} else if (strcmp(nconf->nc_proto, "udp") == 0) {
296 		/*
297 		 * In order to run MLP on UDP, we need to handle creds.
298 		 */
299 		if (recvucred(fd) == -1) {
300 			syslog(LOG_WARNING,
301 			    "couldn't set SO_RECVUCRED option on transport");
302 		}
303 	}
304 
305 	/*
306 	 * Make non global zone nfs4_callback port MLP
307 	 */
308 	if (use_any && is_system_labeled() && !gzone) {
309 		if (anonmlp(fd) == -1) {
310 			/*
311 			 * failing to set this option means nfs4_callback
312 			 * could fail silently later. So fail it with
313 			 * with an error message now.
314 			 */
315 			syslog(LOG_ERR,
316 			    "couldn't set SO_ANON_MLP option on transport");
317 			(void) t_close(fd);
318 			return (-1);
319 		}
320 	}
321 
322 	if (nconf->nc_semantics == NC_TPI_CLTS)
323 		tb.qlen = 0;
324 	else
325 		tb.qlen = backlog;
326 
327 	/* LINTED pointer alignment */
328 	ntb = (struct t_bind *)t_alloc(fd, T_BIND, T_ALL);
329 	if (ntb == (struct t_bind *)NULL) {
330 		syslog(LOG_ERR, "t_alloc failed:  t_errno %d, %m", t_errno);
331 		(void) t_close(fd);
332 		netdir_free((void *)addrlist, ND_ADDRLIST);
333 		return (-1);
334 	}
335 
336 	/*
337 	 * XXX - what about the space tb->addr.buf points to? This should
338 	 * be either a memcpy() to/from the buf fields, or t_alloc(fd,T_BIND,)
339 	 * should't be called with T_ALL.
340 	 */
341 	if (addrlist)
342 		tb.addr = *(addrlist->n_addrs);		/* structure copy */
343 
344 	if (t_bind(fd, &tb, ntb) == -1) {
345 		syslog(LOG_ERR, "t_bind failed:  t_errno %d, %m", t_errno);
346 		(void) t_free((char *)ntb, T_BIND);
347 		netdir_free((void *)addrlist, ND_ADDRLIST);
348 		(void) t_close(fd);
349 		return (-1);
350 	}
351 
352 	/* make sure we bound to the right address */
353 	if (use_any == FALSE &&
354 	    (tb.addr.len != ntb->addr.len ||
355 	    memcmp(tb.addr.buf, ntb->addr.buf, tb.addr.len) != 0)) {
356 		syslog(LOG_ERR, "t_bind to wrong address");
357 		(void) t_free((char *)ntb, T_BIND);
358 		netdir_free((void *)addrlist, ND_ADDRLIST);
359 		(void) t_close(fd);
360 		return (-1);
361 	}
362 
363 	/*
364 	 * Call nfs4svc_setport so that the kernel can be
365 	 * informed what port number the daemon is listing
366 	 * for incoming connection requests.
367 	 */
368 
369 	if ((nconf->nc_semantics == NC_TPI_COTS ||
370 	    nconf->nc_semantics == NC_TPI_COTS_ORD) && Mysvc4 != NULL)
371 		(*Mysvc4)(fd, NULL, nconf, NFS4_SETPORT, &ntb->addr);
372 
373 	*addr = &ntb->addr;
374 	netdir_free((void *)addrlist, ND_ADDRLIST);
375 
376 	if (strcmp(nconf->nc_proto, "tcp") == 0) {
377 		/*
378 		 * Disable the Nagle algorithm on TCP connections.
379 		 * Connections accepted from this listener will
380 		 * inherit the listener options.
381 		 */
382 
383 		/* LINTED pointer alignment */
384 		opt = (struct opthdr *)reqbuf;
385 		opt->level = IPPROTO_TCP;
386 		opt->name = TCP_NODELAY;
387 		opt->len = sizeof (int);
388 
389 		/* LINTED pointer alignment */
390 		*(int *)((char *)opt + sizeof (*opt)) = 1;
391 
392 		req.flags = T_NEGOTIATE;
393 		req.opt.len = sizeof (*opt) + opt->len;
394 		req.opt.buf = (char *)opt;
395 		resp.flags = 0;
396 		resp.opt.buf = reqbuf;
397 		resp.opt.maxlen = sizeof (reqbuf);
398 
399 		if (t_optmgmt(fd, &req, &resp) < 0 ||
400 		    resp.flags != T_SUCCESS) {
401 			syslog(LOG_ERR,
402 	"couldn't set NODELAY option for proto %s: t_errno = %d, %m",
403 			    nconf->nc_proto, t_errno);
404 		}
405 	}
406 
407 	return (fd);
408 }
409 
410 static int
411 setopt(int fd, int level, int name, int value)
412 {
413 	struct t_optmgmt req, resp;
414 	struct {
415 		struct opthdr opt;
416 		int value;
417 	} reqbuf;
418 
419 	reqbuf.opt.level = level;
420 	reqbuf.opt.name = name;
421 	reqbuf.opt.len = sizeof (int);
422 
423 	reqbuf.value = value;
424 
425 	req.flags = T_NEGOTIATE;
426 	req.opt.len = sizeof (reqbuf);
427 	req.opt.buf = (char *)&reqbuf;
428 
429 	resp.flags = 0;
430 	resp.opt.buf = (char *)&reqbuf;
431 	resp.opt.maxlen = sizeof (reqbuf);
432 
433 	if (t_optmgmt(fd, &req, &resp) < 0 || resp.flags != T_SUCCESS) {
434 		t_error("t_optmgmt");
435 		return (-1);
436 	}
437 	return (0);
438 }
439 
440 static int
441 reuseaddr(int fd)
442 {
443 	return (setopt(fd, SOL_SOCKET, SO_REUSEADDR, 1));
444 }
445 
446 static int
447 recvucred(int fd)
448 {
449 	return (setopt(fd, SOL_SOCKET, SO_RECVUCRED, 1));
450 }
451 
452 static int
453 anonmlp(int fd)
454 {
455 	return (setopt(fd, SOL_SOCKET, SO_ANON_MLP, 1));
456 }
457 
458 void
459 nfslib_log_tli_error(char *tli_name, int fd, struct netconfig *nconf)
460 {
461 	int error;
462 
463 	/*
464 	 * Save the error code across syslog(), just in case syslog()
465 	 * gets its own error and, therefore, overwrites errno.
466 	 */
467 	error = errno;
468 	if (t_errno == TSYSERR) {
469 		syslog(LOG_ERR, "%s(file descriptor %d/transport %s) %m",
470 		    tli_name, fd, nconf->nc_proto);
471 	} else {
472 		syslog(LOG_ERR,
473 		    "%s(file descriptor %d/transport %s) TLI error %d",
474 		    tli_name, fd, nconf->nc_proto, t_errno);
475 	}
476 	errno = error;
477 }
478 
479 /*
480  * Called to set up service over a particular transport.
481  */
482 void
483 do_one(char *provider, NETSELDECL(proto), struct protob *protobp0,
484 	int (*svc)(int, struct netbuf, struct netconfig *), int use_pmap)
485 {
486 	register int sock;
487 	struct protob *protobp;
488 	struct netbuf *retaddr;
489 	struct netconfig *retnconf;
490 	struct netbuf addrmask;
491 	int vers;
492 	int err;
493 	int l;
494 
495 	if (provider)
496 		sock = bind_to_provider(provider, protobp0->serv, &retaddr,
497 		    &retnconf);
498 	else
499 		sock = bind_to_proto(proto, protobp0->serv, &retaddr,
500 		    &retnconf);
501 
502 	if (sock == -1) {
503 		(void) syslog(LOG_ERR,
504 	"Cannot establish %s service over %s: transport setup problem.",
505 		    protobp0->serv, provider ? provider : proto);
506 		return;
507 	}
508 
509 	if (set_addrmask(sock, retnconf, &addrmask) < 0) {
510 		(void) syslog(LOG_ERR,
511 		    "Cannot set address mask for %s", retnconf->nc_netid);
512 		return;
513 	}
514 
515 	/*
516 	 * Register all versions of the programs in the protocol block list.
517 	 */
518 	l = strlen(NC_UDP);
519 	for (protobp = protobp0; protobp; protobp = protobp->next) {
520 		for (vers = protobp->versmin; vers <= protobp->versmax;
521 		    vers++) {
522 			if ((protobp->program == NFS_PROGRAM ||
523 			    protobp->program == NFS_ACL_PROGRAM) &&
524 			    vers == NFS_V4 &&
525 			    strncasecmp(retnconf->nc_proto, NC_UDP, l) == 0)
526 				continue;
527 
528 			if (use_pmap) {
529 				/*
530 				 * Note that if we're using a portmapper
531 				 * instead of rpcbind then we can't do an
532 				 * unregister operation here.
533 				 *
534 				 * The reason is that the portmapper unset
535 				 * operation removes all the entries for a
536 				 * given program/version regardelss of
537 				 * transport protocol.
538 				 *
539 				 * The caller of this routine needs to ensure
540 				 * that __pmap_unset() has been called for all
541 				 * program/version service pairs they plan
542 				 * to support before they start registering
543 				 * each program/version/protocol triplet.
544 				 */
545 				(void) __pmap_set(protobp->program, vers,
546 				    retnconf, retaddr);
547 			} else {
548 				(void) rpcb_unset(protobp->program, vers,
549 				    retnconf);
550 				(void) rpcb_set(protobp->program, vers,
551 				    retnconf, retaddr);
552 			}
553 		}
554 	}
555 
556 	if (retnconf->nc_semantics == NC_TPI_CLTS) {
557 		/* Don't drop core if supporting module(s) aren't loaded. */
558 		(void) signal(SIGSYS, SIG_IGN);
559 
560 		/*
561 		 * svc() doesn't block, it returns success or failure.
562 		 */
563 
564 		if (svc == NULL && Mysvc4 != NULL)
565 			err = (*Mysvc4)(sock, &addrmask, retnconf,
566 			    NFS4_SETPORT|NFS4_KRPC_START, retaddr);
567 		else
568 			err = (*svc)(sock, addrmask, retnconf);
569 
570 		if (err < 0) {
571 			(void) syslog(LOG_ERR,
572 			    "Cannot establish %s service over <file desc."
573 			    " %d, protocol %s> : %m. Exiting",
574 			    protobp0->serv, sock, retnconf->nc_proto);
575 			exit(1);
576 		}
577 	}
578 
579 	/*
580 	 * We successfully set up the server over this transport.
581 	 * Add this descriptor to the one being polled on.
582 	 */
583 	add_to_poll_list(sock, retnconf);
584 }
585 /*
586  * Set up the NFS service over all the available transports.
587  * Returns -1 for failure, 0 for success.
588  */
589 int
590 do_all(struct protob *protobp,
591 	int (*svc)(int, struct netbuf, struct netconfig *), int use_pmap)
592 {
593 	struct netconfig *nconf;
594 	NCONF_HANDLE *nc;
595 	int l;
596 
597 	if ((nc = setnetconfig()) == (NCONF_HANDLE *)NULL) {
598 		syslog(LOG_ERR, "setnetconfig failed: %m");
599 		return (-1);
600 	}
601 	l = strlen(NC_UDP);
602 	while (nconf = getnetconfig(nc)) {
603 		if ((nconf->nc_flag & NC_VISIBLE) &&
604 		    strcmp(nconf->nc_protofmly, NC_LOOPBACK) != 0 &&
605 		    OK_TPI_TYPE(nconf) &&
606 		    (protobp->program != NFS4_CALLBACK ||
607 		    strncasecmp(nconf->nc_proto, NC_UDP, l) != 0))
608 			do_one(nconf->nc_device, nconf->nc_proto,
609 			    protobp, svc, use_pmap);
610 	}
611 	(void) endnetconfig(nc);
612 	return (0);
613 }
614 
615 /*
616  * poll on the open transport descriptors for events and errors.
617  */
618 void
619 poll_for_action(void)
620 {
621 	int nfds;
622 	int i;
623 
624 	/*
625 	 * Keep polling until all transports have been closed. When this
626 	 * happens, we return.
627 	 */
628 	while ((int)num_fds > 0) {
629 		nfds = poll(poll_array, num_fds, INFTIM);
630 		switch (nfds) {
631 		case 0:
632 			continue;
633 
634 		case -1:
635 			/*
636 			 * Some errors from poll could be
637 			 * due to temporary conditions, and we try to
638 			 * be robust in the face of them. Other
639 			 * errors (should never happen in theory)
640 			 * are fatal (eg. EINVAL, EFAULT).
641 			 */
642 			switch (errno) {
643 			case EINTR:
644 				continue;
645 
646 			case EAGAIN:
647 			case ENOMEM:
648 				(void) sleep(10);
649 				continue;
650 
651 			default:
652 				(void) syslog(LOG_ERR,
653 				    "poll failed: %m. Exiting");
654 				exit(1);
655 			}
656 		default:
657 			break;
658 		}
659 
660 		/*
661 		 * Go through the poll list looking for events.
662 		 */
663 		for (i = 0; i < num_fds && nfds > 0; i++) {
664 			if (poll_array[i].revents) {
665 				nfds--;
666 				/*
667 				 * We have a message, so try to read it.
668 				 * Record the error return in errno,
669 				 * so that syslog(LOG_ERR, "...%m")
670 				 * dumps the corresponding error string.
671 				 */
672 				if (conn_polled[i].nc.nc_semantics ==
673 				    NC_TPI_CLTS) {
674 					errno = do_poll_clts_action(
675 					    poll_array[i].fd, i);
676 				} else {
677 					errno = do_poll_cots_action(
678 					    poll_array[i].fd, i);
679 				}
680 
681 				if (errno == 0)
682 					continue;
683 				/*
684 				 * Most returned error codes mean that there is
685 				 * fatal condition which we can only deal with
686 				 * by closing the transport.
687 				 */
688 				if (errno != EAGAIN && errno != ENOMEM) {
689 					(void) syslog(LOG_ERR,
690 		"Error (%m) reading descriptor %d/transport %s. Closing it.",
691 					    poll_array[i].fd,
692 					    conn_polled[i].nc.nc_proto);
693 					(void) t_close(poll_array[i].fd);
694 					remove_from_poll_list(poll_array[i].fd);
695 
696 				} else if (errno == ENOMEM)
697 					(void) sleep(5);
698 			}
699 		}
700 	}
701 
702 	(void) syslog(LOG_ERR,
703 	    "All transports have been closed with errors. Exiting.");
704 }
705 
706 /*
707  * Allocate poll/transport array entries for this descriptor.
708  */
709 static void
710 add_to_poll_list(int fd, struct netconfig *nconf)
711 {
712 	static int poll_array_size = 0;
713 
714 	/*
715 	 * If the arrays are full, allocate new ones.
716 	 */
717 	if (num_fds == poll_array_size) {
718 		struct pollfd *tpa;
719 		struct conn_entry *tnp;
720 
721 		if (poll_array_size != 0) {
722 			tpa = poll_array;
723 			tnp = conn_polled;
724 		} else
725 			tpa = (struct pollfd *)0;
726 
727 		poll_array_size += POLL_ARRAY_INC_SIZE;
728 		/*
729 		 * Allocate new arrays.
730 		 */
731 		poll_array = (struct pollfd *)
732 		    malloc(poll_array_size * sizeof (struct pollfd) + 256);
733 		conn_polled = (struct conn_entry *)
734 		    malloc(poll_array_size * sizeof (struct conn_entry) + 256);
735 		if (poll_array == (struct pollfd *)NULL ||
736 		    conn_polled == (struct conn_entry *)NULL) {
737 			syslog(LOG_ERR, "malloc failed for poll array");
738 			exit(1);
739 		}
740 
741 		/*
742 		 * Copy the data of the old ones into new arrays, and
743 		 * free the old ones.
744 		 */
745 		if (tpa) {
746 			(void) memcpy((void *)poll_array, (void *)tpa,
747 			    num_fds * sizeof (struct pollfd));
748 			(void) memcpy((void *)conn_polled, (void *)tnp,
749 			    num_fds * sizeof (struct conn_entry));
750 			free((void *)tpa);
751 			free((void *)tnp);
752 		}
753 	}
754 
755 	/*
756 	 * Set the descriptor and event list. All possible events are
757 	 * polled for.
758 	 */
759 	poll_array[num_fds].fd = fd;
760 	poll_array[num_fds].events = POLLIN|POLLRDNORM|POLLRDBAND|POLLPRI;
761 
762 	/*
763 	 * Copy the transport data over too.
764 	 */
765 	conn_polled[num_fds].nc = *nconf;
766 	conn_polled[num_fds].closing = 0;
767 
768 	/*
769 	 * Set the descriptor to non-blocking. Avoids a race
770 	 * between data arriving on the stream and then having it
771 	 * flushed before we can read it.
772 	 */
773 	if (fcntl(fd, F_SETFL, O_NONBLOCK) == -1) {
774 		(void) syslog(LOG_ERR,
775 	"fcntl(file desc. %d/transport %s, F_SETFL, O_NONBLOCK): %m. Exiting",
776 		    num_fds, nconf->nc_proto);
777 		exit(1);
778 	}
779 
780 	/*
781 	 * Count this descriptor.
782 	 */
783 	++num_fds;
784 }
785 
786 static void
787 remove_from_poll_list(int fd)
788 {
789 	int i;
790 	int num_to_copy;
791 
792 	for (i = 0; i < num_fds; i++) {
793 		if (poll_array[i].fd == fd) {
794 			--num_fds;
795 			num_to_copy = num_fds - i;
796 			(void) memcpy((void *)&poll_array[i],
797 			    (void *)&poll_array[i+1],
798 			    num_to_copy * sizeof (struct pollfd));
799 			(void) memset((void *)&poll_array[num_fds], 0,
800 			    sizeof (struct pollfd));
801 			(void) memcpy((void *)&conn_polled[i],
802 			    (void *)&conn_polled[i+1],
803 			    num_to_copy * sizeof (struct conn_entry));
804 			(void) memset((void *)&conn_polled[num_fds], 0,
805 			    sizeof (struct conn_entry));
806 			return;
807 		}
808 	}
809 	syslog(LOG_ERR, "attempt to remove nonexistent fd from poll list");
810 
811 }
812 
813 /*
814  * Called to read and interpret the event on a connectionless descriptor.
815  * Returns 0 if successful, or a UNIX error code if failure.
816  */
817 static int
818 do_poll_clts_action(int fd, int conn_index)
819 {
820 	int error;
821 	int ret;
822 	int flags;
823 	struct netconfig *nconf = &conn_polled[conn_index].nc;
824 	static struct t_unitdata *unitdata = NULL;
825 	static struct t_uderr *uderr = NULL;
826 	static int oldfd = -1;
827 	struct nd_hostservlist *host = NULL;
828 	struct strbuf ctl[1], data[1];
829 	/*
830 	 * We just need to have some space to consume the
831 	 * message in the event we can't use the TLI interface to do the
832 	 * job.
833 	 *
834 	 * We flush the message using getmsg(). For the control part
835 	 * we allocate enough for any TPI header plus 32 bytes for address
836 	 * and options. For the data part, there is nothing magic about
837 	 * the size of the array, but 256 bytes is probably better than
838 	 * 1 byte, and we don't expect any data portion anyway.
839 	 *
840 	 * If the array sizes are too small, we handle this because getmsg()
841 	 * (called to consume the message) will return MOREDATA|MORECTL.
842 	 * Thus we just call getmsg() until it's read the message.
843 	 */
844 	char ctlbuf[sizeof (union T_primitives) + 32];
845 	char databuf[256];
846 
847 	/*
848 	 * If this is the same descriptor as the last time
849 	 * do_poll_clts_action was called, we can save some
850 	 * de-allocation and allocation.
851 	 */
852 	if (oldfd != fd) {
853 		oldfd = fd;
854 
855 		if (unitdata) {
856 			(void) t_free((char *)unitdata, T_UNITDATA);
857 			unitdata = NULL;
858 		}
859 		if (uderr) {
860 			(void) t_free((char *)uderr, T_UDERROR);
861 			uderr = NULL;
862 		}
863 	}
864 
865 	/*
866 	 * Allocate a unitdata structure for receiving the event.
867 	 */
868 	if (unitdata == NULL) {
869 		/* LINTED pointer alignment */
870 		unitdata = (struct t_unitdata *)t_alloc(fd, T_UNITDATA, T_ALL);
871 		if (unitdata == NULL) {
872 			if (t_errno == TSYSERR) {
873 				/*
874 				 * Save the error code across
875 				 * syslog(), just in case
876 				 * syslog() gets its own error
877 				 * and therefore overwrites errno.
878 				 */
879 				error = errno;
880 				(void) syslog(LOG_ERR,
881 	"t_alloc(file descriptor %d/transport %s, T_UNITDATA) failed: %m",
882 				    fd, nconf->nc_proto);
883 				return (error);
884 			}
885 			(void) syslog(LOG_ERR,
886 "t_alloc(file descriptor %d/transport %s, T_UNITDATA) failed TLI error %d",
887 			    fd, nconf->nc_proto, t_errno);
888 			goto flush_it;
889 		}
890 	}
891 
892 try_again:
893 	flags = 0;
894 
895 	/*
896 	 * The idea is we wait for T_UNITDATA_IND's. Of course,
897 	 * we don't get any, because rpcmod filters them out.
898 	 * However, we need to call t_rcvudata() to let TLI
899 	 * tell us we have a T_UDERROR_IND.
900 	 *
901 	 * algorithm is:
902 	 * 	t_rcvudata(), expecting TLOOK.
903 	 * 	t_look(), expecting T_UDERR.
904 	 * 	t_rcvuderr(), expecting success (0).
905 	 * 	expand destination address into ASCII,
906 	 *	and dump it.
907 	 */
908 
909 	ret = t_rcvudata(fd, unitdata, &flags);
910 	if (ret == 0 || t_errno == TBUFOVFLW) {
911 		(void) syslog(LOG_WARNING,
912 "t_rcvudata(file descriptor %d/transport %s) got unexpected data, %d bytes",
913 		    fd, nconf->nc_proto, unitdata->udata.len);
914 
915 		/*
916 		 * Even though we don't expect any data, in case we do,
917 		 * keep reading until there is no more.
918 		 */
919 		if (flags & T_MORE)
920 			goto try_again;
921 
922 		return (0);
923 	}
924 
925 	switch (t_errno) {
926 	case TNODATA:
927 		return (0);
928 	case TSYSERR:
929 		/*
930 		 * System errors are returned to caller.
931 		 * Save the error code across
932 		 * syslog(), just in case
933 		 * syslog() gets its own error
934 		 * and therefore overwrites errno.
935 		 */
936 		error = errno;
937 		(void) syslog(LOG_ERR,
938 		    "t_rcvudata(file descriptor %d/transport %s) %m",
939 		    fd, nconf->nc_proto);
940 		return (error);
941 	case TLOOK:
942 		break;
943 	default:
944 		(void) syslog(LOG_ERR,
945 		"t_rcvudata(file descriptor %d/transport %s) TLI error %d",
946 		    fd, nconf->nc_proto, t_errno);
947 		goto flush_it;
948 	}
949 
950 	ret = t_look(fd);
951 	switch (ret) {
952 	case 0:
953 		return (0);
954 	case -1:
955 		/*
956 		 * System errors are returned to caller.
957 		 */
958 		if (t_errno == TSYSERR) {
959 			/*
960 			 * Save the error code across
961 			 * syslog(), just in case
962 			 * syslog() gets its own error
963 			 * and therefore overwrites errno.
964 			 */
965 			error = errno;
966 			(void) syslog(LOG_ERR,
967 			    "t_look(file descriptor %d/transport %s) %m",
968 			    fd, nconf->nc_proto);
969 			return (error);
970 		}
971 		(void) syslog(LOG_ERR,
972 		    "t_look(file descriptor %d/transport %s) TLI error %d",
973 		    fd, nconf->nc_proto, t_errno);
974 		goto flush_it;
975 	case T_UDERR:
976 		break;
977 	default:
978 		(void) syslog(LOG_WARNING,
979 	"t_look(file descriptor %d/transport %s) returned %d not T_UDERR (%d)",
980 		    fd, nconf->nc_proto, ret, T_UDERR);
981 	}
982 
983 	if (uderr == NULL) {
984 		/* LINTED pointer alignment */
985 		uderr = (struct t_uderr *)t_alloc(fd, T_UDERROR, T_ALL);
986 		if (uderr == NULL) {
987 			if (t_errno == TSYSERR) {
988 				/*
989 				 * Save the error code across
990 				 * syslog(), just in case
991 				 * syslog() gets its own error
992 				 * and therefore overwrites errno.
993 				 */
994 				error = errno;
995 				(void) syslog(LOG_ERR,
996 	"t_alloc(file descriptor %d/transport %s, T_UDERROR) failed: %m",
997 				    fd, nconf->nc_proto);
998 				return (error);
999 			}
1000 			(void) syslog(LOG_ERR,
1001 "t_alloc(file descriptor %d/transport %s, T_UDERROR) failed TLI error: %d",
1002 			    fd, nconf->nc_proto, t_errno);
1003 			goto flush_it;
1004 		}
1005 	}
1006 
1007 	ret = t_rcvuderr(fd, uderr);
1008 	if (ret == 0) {
1009 
1010 		/*
1011 		 * Save the datagram error in errno, so that the
1012 		 * %m argument to syslog picks up the error string.
1013 		 */
1014 		errno = uderr->error;
1015 
1016 		/*
1017 		 * Log the datagram error, then log the host that
1018 		 * probably triggerred. Cannot log both in the
1019 		 * same transaction because of packet size limitations
1020 		 * in /dev/log.
1021 		 */
1022 		(void) syslog((errno == ECONNREFUSED) ? LOG_DEBUG : LOG_WARNING,
1023 "NFS response over <file descriptor %d/transport %s> generated error: %m",
1024 		    fd, nconf->nc_proto);
1025 
1026 		/*
1027 		 * Try to map the client's address back to a
1028 		 * name.
1029 		 */
1030 		ret = netdir_getbyaddr(nconf, &host, &uderr->addr);
1031 		if (ret != -1 && host && host->h_cnt > 0 &&
1032 		    host->h_hostservs) {
1033 		(void) syslog((errno == ECONNREFUSED) ? LOG_DEBUG : LOG_WARNING,
1034 "Bad NFS response was sent to client with host name: %s; service port: %s",
1035 		    host->h_hostservs->h_host,
1036 		    host->h_hostservs->h_serv);
1037 		} else {
1038 			int i, j;
1039 			char *buf;
1040 			char *hex = "0123456789abcdef";
1041 
1042 			/*
1043 			 * Mapping failed, print the whole thing
1044 			 * in ASCII hex.
1045 			 */
1046 			buf = (char *)malloc(uderr->addr.len * 2 + 1);
1047 			for (i = 0, j = 0; i < uderr->addr.len; i++, j += 2) {
1048 				buf[j] = hex[((uderr->addr.buf[i]) >> 4) & 0xf];
1049 				buf[j+1] = hex[uderr->addr.buf[i] & 0xf];
1050 			}
1051 			buf[j] = '\0';
1052 		(void) syslog((errno == ECONNREFUSED) ? LOG_DEBUG : LOG_WARNING,
1053 	"Bad NFS response was sent to client with transport address: 0x%s",
1054 		    buf);
1055 			free((void *)buf);
1056 		}
1057 
1058 		if (ret == 0 && host != NULL)
1059 			netdir_free((void *)host, ND_HOSTSERVLIST);
1060 		return (0);
1061 	}
1062 
1063 	switch (t_errno) {
1064 	case TNOUDERR:
1065 		goto flush_it;
1066 	case TSYSERR:
1067 		/*
1068 		 * System errors are returned to caller.
1069 		 * Save the error code across
1070 		 * syslog(), just in case
1071 		 * syslog() gets its own error
1072 		 * and therefore overwrites errno.
1073 		 */
1074 		error = errno;
1075 		(void) syslog(LOG_ERR,
1076 		    "t_rcvuderr(file descriptor %d/transport %s) %m",
1077 		    fd, nconf->nc_proto);
1078 		return (error);
1079 	default:
1080 		(void) syslog(LOG_ERR,
1081 		"t_rcvuderr(file descriptor %d/transport %s) TLI error %d",
1082 		    fd, nconf->nc_proto, t_errno);
1083 		goto flush_it;
1084 	}
1085 
1086 flush_it:
1087 	/*
1088 	 * If we get here, then we could not cope with whatever message
1089 	 * we attempted to read, so flush it. If we did read a message,
1090 	 * and one isn't present, that is all right, because fd is in
1091 	 * nonblocking mode.
1092 	 */
1093 	(void) syslog(LOG_ERR,
1094 	"Flushing one input message from <file descriptor %d/transport %s>",
1095 	    fd, nconf->nc_proto);
1096 
1097 	/*
1098 	 * Read and discard the message. Do this this until there is
1099 	 * no more control/data in the message or until we get an error.
1100 	 */
1101 	do {
1102 		ctl->maxlen = sizeof (ctlbuf);
1103 		ctl->buf = ctlbuf;
1104 		data->maxlen = sizeof (databuf);
1105 		data->buf = databuf;
1106 		flags = 0;
1107 		ret = getmsg(fd, ctl, data, &flags);
1108 		if (ret == -1)
1109 			return (errno);
1110 	} while (ret != 0);
1111 
1112 	return (0);
1113 }
1114 
1115 static void
1116 conn_close_oldest(void)
1117 {
1118 	int fd;
1119 	int i1;
1120 
1121 	/*
1122 	 * Find the oldest connection that is not already in the
1123 	 * process of shutting down.
1124 	 */
1125 	for (i1 = end_listen_fds; /* no conditional expression */; i1++) {
1126 		if (i1 >= num_fds)
1127 			return;
1128 		if (conn_polled[i1].closing == 0)
1129 			break;
1130 	}
1131 #ifdef DEBUG
1132 	printf("too many connections (%d), releasing oldest (%d)\n",
1133 	    num_conns, poll_array[i1].fd);
1134 #else
1135 	syslog(LOG_WARNING, "too many connections (%d), releasing oldest (%d)",
1136 	    num_conns, poll_array[i1].fd);
1137 #endif
1138 	fd = poll_array[i1].fd;
1139 	if (conn_polled[i1].nc.nc_semantics == NC_TPI_COTS) {
1140 		/*
1141 		 * For politeness, send a T_DISCON_REQ to the transport
1142 		 * provider.  We close the stream anyway.
1143 		 */
1144 		(void) t_snddis(fd, (struct t_call *)0);
1145 		num_conns--;
1146 		remove_from_poll_list(fd);
1147 		(void) t_close(fd);
1148 	} else {
1149 		/*
1150 		 * For orderly release, we do not close the stream
1151 		 * until the T_ORDREL_IND arrives to complete
1152 		 * the handshake.
1153 		 */
1154 		if (t_sndrel(fd) == 0)
1155 			conn_polled[i1].closing = 1;
1156 	}
1157 }
1158 
1159 static boolean_t
1160 conn_get(int fd, struct netconfig *nconf, struct conn_ind **connp)
1161 {
1162 	struct conn_ind	*conn;
1163 	struct conn_ind	*next_conn;
1164 
1165 	conn = (struct conn_ind *)malloc(sizeof (*conn));
1166 	if (conn == NULL) {
1167 		syslog(LOG_ERR, "malloc for listen indication failed");
1168 		return (FALSE);
1169 	}
1170 
1171 	/* LINTED pointer alignment */
1172 	conn->conn_call = (struct t_call *)t_alloc(fd, T_CALL, T_ALL);
1173 	if (conn->conn_call == NULL) {
1174 		free((char *)conn);
1175 		nfslib_log_tli_error("t_alloc", fd, nconf);
1176 		return (FALSE);
1177 	}
1178 
1179 	if (t_listen(fd, conn->conn_call) == -1) {
1180 		nfslib_log_tli_error("t_listen", fd, nconf);
1181 		(void) t_free((char *)conn->conn_call, T_CALL);
1182 		free((char *)conn);
1183 		return (FALSE);
1184 	}
1185 
1186 	if (conn->conn_call->udata.len > 0) {
1187 		syslog(LOG_WARNING,
1188 	"rejecting inbound connection(%s) with %d bytes of connect data",
1189 		    nconf->nc_proto, conn->conn_call->udata.len);
1190 
1191 		conn->conn_call->udata.len = 0;
1192 		(void) t_snddis(fd, conn->conn_call);
1193 		(void) t_free((char *)conn->conn_call, T_CALL);
1194 		free((char *)conn);
1195 		return (FALSE);
1196 	}
1197 
1198 	if ((next_conn = *connp) != NULL) {
1199 		next_conn->conn_prev->conn_next = conn;
1200 		conn->conn_next = next_conn;
1201 		conn->conn_prev = next_conn->conn_prev;
1202 		next_conn->conn_prev = conn;
1203 	} else {
1204 		conn->conn_next = conn;
1205 		conn->conn_prev = conn;
1206 		*connp = conn;
1207 	}
1208 	return (TRUE);
1209 }
1210 
1211 static int
1212 discon_get(int fd, struct netconfig *nconf, struct conn_ind **connp)
1213 {
1214 	struct conn_ind	*conn;
1215 	struct t_discon	discon;
1216 
1217 	discon.udata.buf = (char *)0;
1218 	discon.udata.maxlen = 0;
1219 	if (t_rcvdis(fd, &discon) == -1) {
1220 		nfslib_log_tli_error("t_rcvdis", fd, nconf);
1221 		return (-1);
1222 	}
1223 
1224 	conn = *connp;
1225 	if (conn == NULL)
1226 		return (0);
1227 
1228 	do {
1229 		if (conn->conn_call->sequence == discon.sequence) {
1230 			if (conn->conn_next == conn)
1231 				*connp = (struct conn_ind *)0;
1232 			else {
1233 				if (conn == *connp) {
1234 					*connp = conn->conn_next;
1235 				}
1236 				conn->conn_next->conn_prev = conn->conn_prev;
1237 				conn->conn_prev->conn_next = conn->conn_next;
1238 			}
1239 			free((char *)conn);
1240 			break;
1241 		}
1242 		conn = conn->conn_next;
1243 	} while (conn != *connp);
1244 
1245 	return (0);
1246 }
1247 
1248 static void
1249 cots_listen_event(int fd, int conn_index)
1250 {
1251 	struct t_call *call;
1252 	struct conn_ind	*conn;
1253 	struct conn_ind	*conn_head;
1254 	int event;
1255 	struct netconfig *nconf = &conn_polled[conn_index].nc;
1256 	int new_fd;
1257 	struct netbuf addrmask;
1258 	int ret = 0;
1259 	char *clnt;
1260 	char *clnt_uaddr = NULL;
1261 	struct nd_hostservlist *clnt_serv = NULL;
1262 
1263 	conn_head = (struct conn_ind *)0;
1264 	(void) conn_get(fd, nconf, &conn_head);
1265 
1266 	while ((conn = conn_head) != NULL) {
1267 		conn_head = conn->conn_next;
1268 		if (conn_head == conn)
1269 			conn_head = (struct conn_ind *)0;
1270 		else {
1271 			conn_head->conn_prev = conn->conn_prev;
1272 			conn->conn_prev->conn_next = conn_head;
1273 		}
1274 		call = conn->conn_call;
1275 		free((char *)conn);
1276 
1277 		/*
1278 		 * If we have already accepted the maximum number of
1279 		 * connections allowed on the command line, then drop
1280 		 * the oldest connection (for any protocol) before
1281 		 * accepting the new connection.  Unless explicitly
1282 		 * set on the command line, max_conns_allowed is -1.
1283 		 */
1284 		if (max_conns_allowed != -1 && num_conns >= max_conns_allowed)
1285 			conn_close_oldest();
1286 
1287 		/*
1288 		 * Create a new transport endpoint for the same proto as
1289 		 * the listener.
1290 		 */
1291 		new_fd = nfslib_transport_open(nconf);
1292 		if (new_fd == -1) {
1293 			call->udata.len = 0;
1294 			(void) t_snddis(fd, call);
1295 			(void) t_free((char *)call, T_CALL);
1296 			syslog(LOG_ERR, "Cannot establish transport over %s",
1297 			    nconf->nc_device);
1298 			continue;
1299 		}
1300 
1301 		/* Bind to a generic address/port for the accepting stream. */
1302 		if (t_bind(new_fd, (struct t_bind *)NULL,
1303 		    (struct t_bind *)NULL) == -1) {
1304 			nfslib_log_tli_error("t_bind", new_fd, nconf);
1305 			call->udata.len = 0;
1306 			(void) t_snddis(fd, call);
1307 			(void) t_free((char *)call, T_CALL);
1308 			(void) t_close(new_fd);
1309 			continue;
1310 		}
1311 
1312 		while (t_accept(fd, new_fd, call) == -1) {
1313 			if (t_errno != TLOOK) {
1314 #ifdef DEBUG
1315 				nfslib_log_tli_error("t_accept", fd, nconf);
1316 #endif
1317 				call->udata.len = 0;
1318 				(void) t_snddis(fd, call);
1319 				(void) t_free((char *)call, T_CALL);
1320 				(void) t_close(new_fd);
1321 				goto do_next_conn;
1322 			}
1323 			while (event = t_look(fd)) {
1324 				switch (event) {
1325 				case T_LISTEN:
1326 #ifdef DEBUG
1327 					printf(
1328 "cots_listen_event(%s): T_LISTEN during accept processing\n", nconf->nc_proto);
1329 #endif
1330 					(void) conn_get(fd, nconf, &conn_head);
1331 					continue;
1332 				case T_DISCONNECT:
1333 #ifdef DEBUG
1334 					printf(
1335 	"cots_listen_event(%s): T_DISCONNECT during accept processing\n",
1336 					    nconf->nc_proto);
1337 #endif
1338 					(void) discon_get(fd, nconf,
1339 					    &conn_head);
1340 					continue;
1341 				default:
1342 					syslog(LOG_ERR,
1343 			"unexpected event 0x%x during accept processing (%s)",
1344 					    event, nconf->nc_proto);
1345 					call->udata.len = 0;
1346 					(void) t_snddis(fd, call);
1347 					(void) t_free((char *)call, T_CALL);
1348 					(void) t_close(new_fd);
1349 					goto do_next_conn;
1350 				}
1351 			}
1352 		}
1353 
1354 		if (set_addrmask(new_fd, nconf, &addrmask) < 0) {
1355 			(void) syslog(LOG_ERR,
1356 			    "Cannot set address mask for %s",
1357 			    nconf->nc_netid);
1358 			return;
1359 		}
1360 
1361 		/* Tell KRPC about the new stream. */
1362 		if (Mysvc4 != NULL)
1363 			ret = (*Mysvc4)(new_fd, &addrmask, nconf,
1364 			    NFS4_KRPC_START, &call->addr);
1365 		else
1366 			ret = (*Mysvc)(new_fd, addrmask, nconf);
1367 
1368 		if (ret < 0) {
1369 			if (errno != ENOTCONN) {
1370 				syslog(LOG_ERR,
1371 				    "unable to register new connection: %m");
1372 			} else {
1373 				/*
1374 				 * This is the only error that could be
1375 				 * caused by the client, so who was it?
1376 				 */
1377 				if (netdir_getbyaddr(nconf, &clnt_serv,
1378 				    &(call->addr)) == ND_OK &&
1379 				    clnt_serv->h_cnt > 0)
1380 					clnt = clnt_serv->h_hostservs->h_host;
1381 				else
1382 					clnt = clnt_uaddr = taddr2uaddr(nconf,
1383 					    &(call->addr));
1384 				/*
1385 				 * If we don't know who the client was,
1386 				 * remain silent.
1387 				 */
1388 				if (clnt)
1389 					syslog(LOG_ERR,
1390 "unable to register new connection: client %s has dropped connection", clnt);
1391 				if (clnt_serv)
1392 					netdir_free(clnt_serv, ND_HOSTSERVLIST);
1393 				if (clnt_uaddr)
1394 					free(clnt_uaddr);
1395 			}
1396 			free(addrmask.buf);
1397 			(void) t_snddis(new_fd, (struct t_call *)0);
1398 			(void) t_free((char *)call, T_CALL);
1399 			(void) t_close(new_fd);
1400 			goto do_next_conn;
1401 		}
1402 
1403 		free(addrmask.buf);
1404 		(void) t_free((char *)call, T_CALL);
1405 
1406 		/*
1407 		 * Poll on the new descriptor so that we get disconnect
1408 		 * and orderly release indications.
1409 		 */
1410 		num_conns++;
1411 		add_to_poll_list(new_fd, nconf);
1412 
1413 		/* Reset nconf in case it has been moved. */
1414 		nconf = &conn_polled[conn_index].nc;
1415 do_next_conn:;
1416 	}
1417 }
1418 
1419 static int
1420 do_poll_cots_action(int fd, int conn_index)
1421 {
1422 	char buf[256];
1423 	int event;
1424 	int i1;
1425 	int flags;
1426 	struct conn_entry *connent = &conn_polled[conn_index];
1427 	struct netconfig *nconf = &(connent->nc);
1428 	const char *errorstr;
1429 
1430 	while (event = t_look(fd)) {
1431 		switch (event) {
1432 		case T_LISTEN:
1433 #ifdef DEBUG
1434 printf("do_poll_cots_action(%s,%d): T_LISTEN event\n", nconf->nc_proto, fd);
1435 #endif
1436 			cots_listen_event(fd, conn_index);
1437 			break;
1438 
1439 		case T_DATA:
1440 #ifdef DEBUG
1441 printf("do_poll_cots_action(%d,%s): T_DATA event\n", fd, nconf->nc_proto);
1442 #endif
1443 			/*
1444 			 * Receive a private notification from CONS rpcmod.
1445 			 */
1446 			i1 = t_rcv(fd, buf, sizeof (buf), &flags);
1447 			if (i1 == -1) {
1448 				syslog(LOG_ERR, "t_rcv failed");
1449 				break;
1450 			}
1451 			if (i1 < sizeof (int))
1452 				break;
1453 			i1 = BE32_TO_U32(buf);
1454 			if (i1 == 1 || i1 == 2) {
1455 				/*
1456 				 * This connection has been idle for too long,
1457 				 * so release it as politely as we can.  If we
1458 				 * have already initiated an orderly release
1459 				 * and we get notified that the stream is
1460 				 * still idle, pull the plug.  This prevents
1461 				 * hung connections from continuing to consume
1462 				 * resources.
1463 				 */
1464 #ifdef DEBUG
1465 printf("do_poll_cots_action(%s,%d): ", nconf->nc_proto, fd);
1466 printf("initiating orderly release of idle connection\n");
1467 #endif
1468 				if (nconf->nc_semantics == NC_TPI_COTS ||
1469 				    connent->closing != 0) {
1470 					(void) t_snddis(fd, (struct t_call *)0);
1471 					goto fdclose;
1472 				}
1473 				/*
1474 				 * For NC_TPI_COTS_ORD, the stream is closed
1475 				 * and removed from the poll list when the
1476 				 * T_ORDREL is received from the provider.  We
1477 				 * don't wait for it here because it may take
1478 				 * a while for the transport to shut down.
1479 				 */
1480 				if (t_sndrel(fd) == -1) {
1481 					syslog(LOG_ERR,
1482 					"unable to send orderly release %m");
1483 				}
1484 				connent->closing = 1;
1485 			} else
1486 				syslog(LOG_ERR,
1487 				"unexpected event from CONS rpcmod %d", i1);
1488 			break;
1489 
1490 		case T_ORDREL:
1491 #ifdef DEBUG
1492 printf("do_poll_cots_action(%s,%d): T_ORDREL event\n", nconf->nc_proto, fd);
1493 #endif
1494 			/* Perform an orderly release. */
1495 			if (t_rcvrel(fd) == 0) {
1496 				/* T_ORDREL on listen fd's should be ignored */
1497 				if (!is_listen_fd_index(conn_index)) {
1498 					(void) t_sndrel(fd);
1499 					goto fdclose;
1500 				}
1501 				break;
1502 
1503 			} else if (t_errno == TLOOK) {
1504 				break;
1505 			} else {
1506 				nfslib_log_tli_error("t_rcvrel", fd, nconf);
1507 
1508 				/*
1509 				 * check to make sure we do not close
1510 				 * listen fd
1511 				 */
1512 				if (is_listen_fd_index(conn_index))
1513 					break;
1514 				else
1515 					goto fdclose;
1516 			}
1517 
1518 		case T_DISCONNECT:
1519 #ifdef DEBUG
1520 printf("do_poll_cots_action(%s,%d): T_DISCONNECT event\n", nconf->nc_proto, fd);
1521 #endif
1522 			if (t_rcvdis(fd, (struct t_discon *)NULL) == -1)
1523 				nfslib_log_tli_error("t_rcvdis", fd, nconf);
1524 
1525 			/*
1526 			 * T_DISCONNECT on listen fd's should be ignored.
1527 			 */
1528 			if (is_listen_fd_index(conn_index))
1529 				break;
1530 			else
1531 				goto fdclose;
1532 
1533 		case T_ERROR:
1534 		default:
1535 			if (event == T_ERROR || t_errno == TSYSERR) {
1536 				if ((errorstr = strerror(errno)) == NULL) {
1537 					(void) sprintf(buf,
1538 					    "Unknown error num %d", errno);
1539 					errorstr = (const char *) buf;
1540 				}
1541 			} else if (event == -1)
1542 				errorstr = t_strerror(t_errno);
1543 			else
1544 				errorstr = "";
1545 			syslog(LOG_ERR,
1546 			    "unexpected TLI event (0x%x) on "
1547 			    "connection-oriented transport(%s,%d):%s",
1548 			    event, nconf->nc_proto, fd, errorstr);
1549 fdclose:
1550 			num_conns--;
1551 			remove_from_poll_list(fd);
1552 			(void) t_close(fd);
1553 			return (0);
1554 		}
1555 	}
1556 
1557 	return (0);
1558 }
1559 
1560 static char *
1561 serv_name_to_port_name(char *name)
1562 {
1563 	/*
1564 	 * Map service names (used primarily in logging) to
1565 	 * RPC port names (used by netdir_*() routines).
1566 	 */
1567 	if (strcmp(name, "NFS") == 0) {
1568 		return ("nfs");
1569 	} else if (strcmp(name, "NLM") == 0) {
1570 		return ("lockd");
1571 	} else if (strcmp(name, "NFS4_CALLBACK") == 0) {
1572 		return ("nfs4_callback");
1573 	}
1574 
1575 	return ("unrecognized");
1576 }
1577 
1578 static int
1579 bind_to_provider(char *provider, char *serv, struct netbuf **addr,
1580 		struct netconfig **retnconf)
1581 {
1582 	struct netconfig *nconf;
1583 	NCONF_HANDLE *nc;
1584 	struct nd_hostserv hs;
1585 
1586 	hs.h_host = HOST_SELF;
1587 	hs.h_serv = serv_name_to_port_name(serv);
1588 
1589 	if ((nc = setnetconfig()) == (NCONF_HANDLE *)NULL) {
1590 		syslog(LOG_ERR, "setnetconfig failed: %m");
1591 		return (-1);
1592 	}
1593 	while (nconf = getnetconfig(nc)) {
1594 		if (OK_TPI_TYPE(nconf) &&
1595 		    strcmp(nconf->nc_device, provider) == 0) {
1596 			*retnconf = nconf;
1597 			return (nfslib_bindit(nconf, addr, &hs,
1598 			    listen_backlog));
1599 		}
1600 	}
1601 	(void) endnetconfig(nc);
1602 
1603 	syslog(LOG_ERR, "couldn't find netconfig entry for provider %s",
1604 	    provider);
1605 	return (-1);
1606 }
1607 
1608 static int
1609 bind_to_proto(NETSELDECL(proto), char *serv, struct netbuf **addr,
1610 		struct netconfig **retnconf)
1611 {
1612 	struct netconfig *nconf;
1613 	NCONF_HANDLE *nc = NULL;
1614 	struct nd_hostserv hs;
1615 
1616 	hs.h_host = HOST_SELF;
1617 	hs.h_serv = serv_name_to_port_name(serv);
1618 
1619 	if ((nc = setnetconfig()) == (NCONF_HANDLE *)NULL) {
1620 		syslog(LOG_ERR, "setnetconfig failed: %m");
1621 		return (-1);
1622 	}
1623 	while (nconf = getnetconfig(nc)) {
1624 		if (OK_TPI_TYPE(nconf) && NETSELEQ(nconf->nc_proto, proto)) {
1625 			*retnconf = nconf;
1626 			return (nfslib_bindit(nconf, addr, &hs,
1627 			    listen_backlog));
1628 		}
1629 	}
1630 	(void) endnetconfig(nc);
1631 
1632 	syslog(LOG_ERR, "couldn't find netconfig entry for protocol %s",
1633 	    proto);
1634 	return (-1);
1635 }
1636 
1637 #include <netinet/in.h>
1638 
1639 /*
1640  * Create an address mask appropriate for the transport.
1641  * The mask is used to obtain the host-specific part of
1642  * a network address when comparing addresses.
1643  * For an internet address the host-specific part is just
1644  * the 32 bit IP address and this part of the mask is set
1645  * to all-ones. The port number part of the mask is zeroes.
1646  */
1647 static int
1648 set_addrmask(fd, nconf, mask)
1649 	struct netconfig *nconf;
1650 	struct netbuf *mask;
1651 {
1652 	struct t_info info;
1653 
1654 	/*
1655 	 * Find the size of the address we need to mask.
1656 	 */
1657 	if (t_getinfo(fd, &info) < 0) {
1658 		t_error("t_getinfo");
1659 		return (-1);
1660 	}
1661 	mask->len = mask->maxlen = info.addr;
1662 	if (info.addr <= 0) {
1663 		syslog(LOG_ERR, "set_addrmask: address size: %ld",
1664 			info.addr);
1665 		return (-1);
1666 	}
1667 
1668 	mask->buf = (char *)malloc(mask->len);
1669 	if (mask->buf == NULL) {
1670 		syslog(LOG_ERR, "set_addrmask: no memory");
1671 		return (-1);
1672 	}
1673 	(void) memset(mask->buf, 0, mask->len);	/* reset all mask bits */
1674 
1675 	if (strcmp(nconf->nc_protofmly, NC_INET) == 0) {
1676 		/*
1677 		 * Set the mask so that the port is ignored.
1678 		 */
1679 		/* LINTED pointer alignment */
1680 		((struct sockaddr_in *)mask->buf)->sin_addr.s_addr =
1681 								(ulong_t)~0;
1682 		/* LINTED pointer alignment */
1683 		((struct sockaddr_in *)mask->buf)->sin_family =
1684 								(ushort_t)~0;
1685 	} else if (strcmp(nconf->nc_protofmly, NC_INET6) == 0) {
1686 		/* LINTED pointer alignment */
1687 		(void) memset(&((struct sockaddr_in6 *)mask->buf)->sin6_addr,
1688 			(uchar_t)~0, sizeof (struct in6_addr));
1689 		/* LINTED pointer alignment */
1690 		((struct sockaddr_in6 *)mask->buf)->sin6_family =
1691 								(ushort_t)~0;
1692 	} else {
1693 
1694 		/*
1695 		 * Set all mask bits.
1696 		 */
1697 		(void) memset(mask->buf, 0xFF, mask->len);
1698 	}
1699 	return (0);
1700 }
1701 
1702 /*
1703  * For listen fd's index is always less than end_listen_fds.
1704  * end_listen_fds is defined externally in the daemon that uses this library.
1705  * It's value is equal to the number of open file descriptors after the
1706  * last listen end point was opened but before any connection was accepted.
1707  */
1708 static int
1709 is_listen_fd_index(int index)
1710 {
1711 	return (index < end_listen_fds);
1712 }
1713