xref: /illumos-gate/usr/src/cmd/fs.d/nfs/lib/nfs_tbind.c (revision d0698e0d179f97729cacdbc2f13446a6b0a3f22a)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright (c) 1996, 2010, Oracle and/or its affiliates. All rights reserved.
23  */
24 
25 /*
26  * nfs_tbind.c, common part for nfsd and lockd.
27  */
28 
29 #include <tiuser.h>
30 #include <fcntl.h>
31 #include <netconfig.h>
32 #include <stropts.h>
33 #include <errno.h>
34 #include <syslog.h>
35 #include <rpc/rpc.h>
36 #include <sys/time.h>
37 #include <sys/resource.h>
38 #include <signal.h>
39 #include <netdir.h>
40 #include <unistd.h>
41 #include <string.h>
42 #include <netinet/tcp.h>
43 #include <malloc.h>
44 #include <stdlib.h>
45 #include "nfs_tbind.h"
46 #include <nfs/nfs.h>
47 #include <nfs/nfs_acl.h>
48 #include <nfs/nfssys.h>
49 #include <nfs/nfs4.h>
50 #include <zone.h>
51 #include <sys/socket.h>
52 #include <tsol/label.h>
53 
54 /*
55  * Determine valid semantics for most applications.
56  */
57 #define	OK_TPI_TYPE(_nconf) \
58 	(_nconf->nc_semantics == NC_TPI_CLTS || \
59 	_nconf->nc_semantics == NC_TPI_COTS || \
60 	_nconf->nc_semantics == NC_TPI_COTS_ORD)
61 
62 #define	BE32_TO_U32(a) \
63 	((((ulong_t)((uchar_t *)a)[0] & 0xFF) << (ulong_t)24) | \
64 	(((ulong_t)((uchar_t *)a)[1] & 0xFF) << (ulong_t)16) | \
65 	(((ulong_t)((uchar_t *)a)[2] & 0xFF) << (ulong_t)8)  | \
66 	((ulong_t)((uchar_t *)a)[3] & 0xFF))
67 
68 /*
69  * Number of elements to add to the poll array on each allocation.
70  */
71 #define	POLL_ARRAY_INC_SIZE	64
72 
73 /*
74  * Number of file descriptors by which the process soft limit may be
75  * increased on each call to nofile_increase(0).
76  */
77 #define	NOFILE_INC_SIZE	64
78 
79 /*
80  * Default TCP send and receive buffer size of NFS server.
81  */
82 #define	NFSD_TCP_BUFSZ	(1024*1024)
83 
84 struct conn_ind {
85 	struct conn_ind *conn_next;
86 	struct conn_ind *conn_prev;
87 	struct t_call   *conn_call;
88 };
89 
90 struct conn_entry {
91 	bool_t			closing;
92 	struct netconfig	nc;
93 };
94 
95 /*
96  * this file contains transport routines common to nfsd and lockd
97  */
98 static	int	nofile_increase(int);
99 static	int	reuseaddr(int);
100 static	int	recvucred(int);
101 static  int	anonmlp(int);
102 static	void	add_to_poll_list(int, struct netconfig *);
103 static	char	*serv_name_to_port_name(char *);
104 static	int	bind_to_proto(char *, char *, struct netbuf **,
105 				struct netconfig **);
106 static	int	bind_to_provider(char *, char *, struct netbuf **,
107 					struct netconfig **);
108 static	void	conn_close_oldest(void);
109 static	boolean_t conn_get(int, struct netconfig *, struct conn_ind **);
110 static	void	cots_listen_event(int, int);
111 static	int	discon_get(int, struct netconfig *, struct conn_ind **);
112 static	int	do_poll_clts_action(int, int);
113 static	int	do_poll_cots_action(int, int);
114 static	void	remove_from_poll_list(int);
115 static	int	set_addrmask(int, struct netconfig *, struct netbuf *);
116 static	int	is_listen_fd_index(int);
117 
118 static	struct pollfd *poll_array;
119 static	struct conn_entry *conn_polled;
120 static	int	num_conns;		/* Current number of connections */
121 int		(*Mysvc4)(int, struct netbuf *, struct netconfig *, int,
122 		struct netbuf *);
123 static int	setopt(int fd, int level, int name, int value);
124 static int	get_opt(int fd, int level, int name);
125 static void	nfslib_set_sockbuf(int fd);
126 
127 /*
128  * Called to create and prepare a transport descriptor for in-kernel
129  * RPC service.
130  * Returns -1 on failure and a valid descriptor on success.
131  */
132 int
133 nfslib_transport_open(struct netconfig *nconf)
134 {
135 	int fd;
136 	struct strioctl	strioc;
137 
138 	if ((nconf == (struct netconfig *)NULL) ||
139 	    (nconf->nc_device == (char *)NULL)) {
140 		syslog(LOG_ERR, "no netconfig device");
141 		return (-1);
142 	}
143 
144 	/*
145 	 * Open the transport device.
146 	 */
147 	fd = t_open(nconf->nc_device, O_RDWR, (struct t_info *)NULL);
148 	if (fd == -1) {
149 		if (t_errno == TSYSERR && errno == EMFILE &&
150 		    (nofile_increase(0) == 0)) {
151 			/* Try again with a higher NOFILE limit. */
152 			fd = t_open(nconf->nc_device, O_RDWR,
153 			    (struct t_info *)NULL);
154 		}
155 		if (fd == -1) {
156 			syslog(LOG_ERR, "t_open %s failed:  t_errno %d, %m",
157 			    nconf->nc_device, t_errno);
158 			return (-1);
159 		}
160 	}
161 
162 	/*
163 	 * Pop timod because the RPC module must be as close as possible
164 	 * to the transport.
165 	 */
166 	if (ioctl(fd, I_POP, 0) < 0) {
167 		syslog(LOG_ERR, "I_POP of timod failed: %m");
168 		(void) t_close(fd);
169 		return (-1);
170 	}
171 
172 	/*
173 	 * Common code for CLTS and COTS transports
174 	 */
175 	if (ioctl(fd, I_PUSH, "rpcmod") < 0) {
176 		syslog(LOG_ERR, "I_PUSH of rpcmod failed: %m");
177 		(void) t_close(fd);
178 		return (-1);
179 	}
180 
181 	strioc.ic_cmd = RPC_SERVER;
182 	strioc.ic_dp = (char *)0;
183 	strioc.ic_len = 0;
184 	strioc.ic_timout = -1;
185 
186 	/* Tell rpcmod to act like a server stream. */
187 	if (ioctl(fd, I_STR, &strioc) < 0) {
188 		syslog(LOG_ERR, "rpcmod set-up ioctl failed: %m");
189 		(void) t_close(fd);
190 		return (-1);
191 	}
192 
193 	/*
194 	 * Re-push timod so that we will still be doing TLI
195 	 * operations on the descriptor.
196 	 */
197 	if (ioctl(fd, I_PUSH, "timod") < 0) {
198 		syslog(LOG_ERR, "I_PUSH of timod failed: %m");
199 		(void) t_close(fd);
200 		return (-1);
201 	}
202 
203 	/*
204 	 * Enable options of returning the ip's for udp.
205 	 */
206 	if (strcmp(nconf->nc_netid, "udp6") == 0)
207 		__rpc_tli_set_options(fd, IPPROTO_IPV6, IPV6_RECVPKTINFO, 1);
208 	else if (strcmp(nconf->nc_netid, "udp") == 0)
209 		__rpc_tli_set_options(fd, IPPROTO_IP, IP_RECVDSTADDR, 1);
210 
211 	return (fd);
212 }
213 
214 static int
215 nofile_increase(int limit)
216 {
217 	struct rlimit rl;
218 
219 	if (getrlimit(RLIMIT_NOFILE, &rl) == -1) {
220 		syslog(LOG_ERR, "getrlimit of NOFILE failed: %m");
221 		return (-1);
222 	}
223 
224 	if (limit > 0)
225 		rl.rlim_cur = limit;
226 	else
227 		rl.rlim_cur += NOFILE_INC_SIZE;
228 
229 	if (rl.rlim_cur > rl.rlim_max &&
230 	    rl.rlim_max != RLIM_INFINITY)
231 		rl.rlim_max = rl.rlim_cur;
232 
233 	if (setrlimit(RLIMIT_NOFILE, &rl) == -1) {
234 		syslog(LOG_ERR, "setrlimit of NOFILE to %d failed: %m",
235 		    rl.rlim_cur);
236 		return (-1);
237 	}
238 
239 	return (0);
240 }
241 
242 static void
243 nfslib_set_sockbuf(int fd)
244 {
245 	int curval, val;
246 
247 	val = NFSD_TCP_BUFSZ;
248 
249 	curval = get_opt(fd, SOL_SOCKET, SO_SNDBUF);
250 	syslog(LOG_DEBUG, "Current SO_SNDBUF value is %d", curval);
251 	if ((curval != -1) && (curval < val)) {
252 		syslog(LOG_DEBUG, "Set SO_SNDBUF  option to %d", val);
253 		if (setopt(fd, SOL_SOCKET, SO_SNDBUF, val) < 0) {
254 			syslog(LOG_ERR,
255 			    "couldn't set SO_SNDBUF to %d - t_errno = %d",
256 			    val, t_errno);
257 			syslog(LOG_ERR,
258 			    "Check and increase system-wide tcp_max_buf");
259 		}
260 	}
261 
262 	curval = get_opt(fd, SOL_SOCKET, SO_RCVBUF);
263 	syslog(LOG_DEBUG, "Current SO_RCVBUF value is %d", curval);
264 	if ((curval != -1) && (curval < val)) {
265 		syslog(LOG_DEBUG, "Set SO_RCVBUF  option to %d", val);
266 		if (setopt(fd, SOL_SOCKET, SO_RCVBUF, val) < 0) {
267 			syslog(LOG_ERR,
268 			    "couldn't set SO_RCVBUF to %d - t_errno = %d",
269 			    val, t_errno);
270 			syslog(LOG_ERR,
271 			    "Check and increase system-wide tcp_max_buf");
272 		}
273 	}
274 }
275 
276 int
277 nfslib_bindit(struct netconfig *nconf, struct netbuf **addr,
278 	struct nd_hostserv *hs, int backlog)
279 {
280 	int fd;
281 	struct t_bind  *ntb;
282 	struct t_bind tb;
283 	struct nd_addrlist *addrlist;
284 	struct t_optmgmt req, resp;
285 	struct opthdr *opt;
286 	char reqbuf[128];
287 	bool_t use_any = FALSE;
288 	bool_t gzone = TRUE;
289 
290 	if ((fd = nfslib_transport_open(nconf)) == -1) {
291 		syslog(LOG_ERR, "cannot establish transport service over %s",
292 		    nconf->nc_device);
293 		return (-1);
294 	}
295 
296 	addrlist = (struct nd_addrlist *)NULL;
297 
298 	/* nfs4_callback service does not used a fieed port number */
299 
300 	if (strcmp(hs->h_serv, "nfs4_callback") == 0) {
301 		tb.addr.maxlen = 0;
302 		tb.addr.len = 0;
303 		tb.addr.buf = 0;
304 		use_any = TRUE;
305 		gzone = (getzoneid() == GLOBAL_ZONEID);
306 	} else if (netdir_getbyname(nconf, hs, &addrlist) != 0) {
307 
308 		syslog(LOG_ERR,
309 		"Cannot get address for transport %s host %s service %s",
310 		    nconf->nc_netid, hs->h_host, hs->h_serv);
311 		(void) t_close(fd);
312 		return (-1);
313 	}
314 
315 	if (strcmp(nconf->nc_proto, "tcp") == 0) {
316 		/*
317 		 * If we're running over TCP, then set the
318 		 * SO_REUSEADDR option so that we can bind
319 		 * to our preferred address even if previously
320 		 * left connections exist in FIN_WAIT states.
321 		 * This is somewhat bogus, but otherwise you have
322 		 * to wait 2 minutes to restart after killing it.
323 		 */
324 		if (reuseaddr(fd) == -1) {
325 			syslog(LOG_WARNING,
326 			"couldn't set SO_REUSEADDR option on transport");
327 		}
328 	} else if (strcmp(nconf->nc_proto, "udp") == 0) {
329 		/*
330 		 * In order to run MLP on UDP, we need to handle creds.
331 		 */
332 		if (recvucred(fd) == -1) {
333 			syslog(LOG_WARNING,
334 			    "couldn't set SO_RECVUCRED option on transport");
335 		}
336 	}
337 
338 	/*
339 	 * Make non global zone nfs4_callback port MLP
340 	 */
341 	if (use_any && is_system_labeled() && !gzone) {
342 		if (anonmlp(fd) == -1) {
343 			/*
344 			 * failing to set this option means nfs4_callback
345 			 * could fail silently later. So fail it with
346 			 * with an error message now.
347 			 */
348 			syslog(LOG_ERR,
349 			    "couldn't set SO_ANON_MLP option on transport");
350 			(void) t_close(fd);
351 			return (-1);
352 		}
353 	}
354 
355 	if (nconf->nc_semantics == NC_TPI_CLTS)
356 		tb.qlen = 0;
357 	else
358 		tb.qlen = backlog;
359 
360 	/* LINTED pointer alignment */
361 	ntb = (struct t_bind *)t_alloc(fd, T_BIND, T_ALL);
362 	if (ntb == (struct t_bind *)NULL) {
363 		syslog(LOG_ERR, "t_alloc failed:  t_errno %d, %m", t_errno);
364 		(void) t_close(fd);
365 		netdir_free((void *)addrlist, ND_ADDRLIST);
366 		return (-1);
367 	}
368 
369 	/*
370 	 * XXX - what about the space tb->addr.buf points to? This should
371 	 * be either a memcpy() to/from the buf fields, or t_alloc(fd,T_BIND,)
372 	 * should't be called with T_ALL.
373 	 */
374 	if (addrlist)
375 		tb.addr = *(addrlist->n_addrs);		/* structure copy */
376 
377 	if (t_bind(fd, &tb, ntb) == -1) {
378 		syslog(LOG_ERR, "t_bind failed:  t_errno %d, %m", t_errno);
379 		(void) t_free((char *)ntb, T_BIND);
380 		netdir_free((void *)addrlist, ND_ADDRLIST);
381 		(void) t_close(fd);
382 		return (-1);
383 	}
384 
385 	/* make sure we bound to the right address */
386 	if (use_any == FALSE &&
387 	    (tb.addr.len != ntb->addr.len ||
388 	    memcmp(tb.addr.buf, ntb->addr.buf, tb.addr.len) != 0)) {
389 		syslog(LOG_ERR, "t_bind to wrong address");
390 		(void) t_free((char *)ntb, T_BIND);
391 		netdir_free((void *)addrlist, ND_ADDRLIST);
392 		(void) t_close(fd);
393 		return (-1);
394 	}
395 
396 	/*
397 	 * Call nfs4svc_setport so that the kernel can be
398 	 * informed what port number the daemon is listing
399 	 * for incoming connection requests.
400 	 */
401 
402 	if ((nconf->nc_semantics == NC_TPI_COTS ||
403 	    nconf->nc_semantics == NC_TPI_COTS_ORD) && Mysvc4 != NULL)
404 		(*Mysvc4)(fd, NULL, nconf, NFS4_SETPORT, &ntb->addr);
405 
406 	*addr = &ntb->addr;
407 	netdir_free((void *)addrlist, ND_ADDRLIST);
408 
409 	if (strcmp(nconf->nc_proto, "tcp") == 0) {
410 		/*
411 		 * Disable the Nagle algorithm on TCP connections.
412 		 * Connections accepted from this listener will
413 		 * inherit the listener options.
414 		 */
415 
416 		/* LINTED pointer alignment */
417 		opt = (struct opthdr *)reqbuf;
418 		opt->level = IPPROTO_TCP;
419 		opt->name = TCP_NODELAY;
420 		opt->len = sizeof (int);
421 
422 		/* LINTED pointer alignment */
423 		*(int *)((char *)opt + sizeof (*opt)) = 1;
424 
425 		req.flags = T_NEGOTIATE;
426 		req.opt.len = sizeof (*opt) + opt->len;
427 		req.opt.buf = (char *)opt;
428 		resp.flags = 0;
429 		resp.opt.buf = reqbuf;
430 		resp.opt.maxlen = sizeof (reqbuf);
431 
432 		if (t_optmgmt(fd, &req, &resp) < 0 ||
433 		    resp.flags != T_SUCCESS) {
434 			syslog(LOG_ERR,
435 	"couldn't set NODELAY option for proto %s: t_errno = %d, %m",
436 			    nconf->nc_proto, t_errno);
437 		}
438 
439 		nfslib_set_sockbuf(fd);
440 	}
441 
442 	return (fd);
443 }
444 
445 static int
446 get_opt(int fd, int level, int name)
447 {
448 	struct t_optmgmt req, res;
449 	struct {
450 		struct opthdr opt;
451 		int value;
452 	} reqbuf;
453 
454 	reqbuf.opt.level = level;
455 	reqbuf.opt.name = name;
456 	reqbuf.opt.len = sizeof (int);
457 	reqbuf.value = 0;
458 
459 	req.flags = T_CURRENT;
460 	req.opt.len = sizeof (reqbuf);
461 	req.opt.buf = (char *)&reqbuf;
462 
463 	res.flags = 0;
464 	res.opt.buf = (char *)&reqbuf;
465 	res.opt.maxlen = sizeof (reqbuf);
466 
467 	if (t_optmgmt(fd, &req, &res) < 0 || res.flags != T_SUCCESS) {
468 		t_error("t_optmgmt");
469 		return (-1);
470 	}
471 	return (reqbuf.value);
472 }
473 
474 static int
475 setopt(int fd, int level, int name, int value)
476 {
477 	struct t_optmgmt req, resp;
478 	struct {
479 		struct opthdr opt;
480 		int value;
481 	} reqbuf;
482 
483 	reqbuf.opt.level = level;
484 	reqbuf.opt.name = name;
485 	reqbuf.opt.len = sizeof (int);
486 
487 	reqbuf.value = value;
488 
489 	req.flags = T_NEGOTIATE;
490 	req.opt.len = sizeof (reqbuf);
491 	req.opt.buf = (char *)&reqbuf;
492 
493 	resp.flags = 0;
494 	resp.opt.buf = (char *)&reqbuf;
495 	resp.opt.maxlen = sizeof (reqbuf);
496 
497 	if (t_optmgmt(fd, &req, &resp) < 0 || resp.flags != T_SUCCESS) {
498 		t_error("t_optmgmt");
499 		return (-1);
500 	}
501 	return (0);
502 }
503 
504 static int
505 reuseaddr(int fd)
506 {
507 	return (setopt(fd, SOL_SOCKET, SO_REUSEADDR, 1));
508 }
509 
510 static int
511 recvucred(int fd)
512 {
513 	return (setopt(fd, SOL_SOCKET, SO_RECVUCRED, 1));
514 }
515 
516 static int
517 anonmlp(int fd)
518 {
519 	return (setopt(fd, SOL_SOCKET, SO_ANON_MLP, 1));
520 }
521 
522 void
523 nfslib_log_tli_error(char *tli_name, int fd, struct netconfig *nconf)
524 {
525 	int error;
526 
527 	/*
528 	 * Save the error code across syslog(), just in case syslog()
529 	 * gets its own error and, therefore, overwrites errno.
530 	 */
531 	error = errno;
532 	if (t_errno == TSYSERR) {
533 		syslog(LOG_ERR, "%s(file descriptor %d/transport %s) %m",
534 		    tli_name, fd, nconf->nc_proto);
535 	} else {
536 		syslog(LOG_ERR,
537 		    "%s(file descriptor %d/transport %s) TLI error %d",
538 		    tli_name, fd, nconf->nc_proto, t_errno);
539 	}
540 	errno = error;
541 }
542 
543 /*
544  * Called to set up service over a particular transport.
545  */
546 void
547 do_one(char *provider, NETSELDECL(proto), struct protob *protobp0,
548 	int (*svc)(int, struct netbuf, struct netconfig *))
549 {
550 	register int sock;
551 	struct protob *protobp;
552 	struct netbuf *retaddr;
553 	struct netconfig *retnconf;
554 	struct netbuf addrmask;
555 	int vers;
556 	int err;
557 	int l;
558 
559 	if (provider)
560 		sock = bind_to_provider(provider, protobp0->serv, &retaddr,
561 		    &retnconf);
562 	else
563 		sock = bind_to_proto(proto, protobp0->serv, &retaddr,
564 		    &retnconf);
565 
566 	if (sock == -1) {
567 		(void) syslog(LOG_ERR,
568 	"Cannot establish %s service over %s: transport setup problem.",
569 		    protobp0->serv, provider ? provider : proto);
570 		return;
571 	}
572 
573 	if (set_addrmask(sock, retnconf, &addrmask) < 0) {
574 		(void) syslog(LOG_ERR,
575 		    "Cannot set address mask for %s", retnconf->nc_netid);
576 		return;
577 	}
578 
579 	/*
580 	 * Register all versions of the programs in the protocol block list.
581 	 */
582 	l = strlen(NC_UDP);
583 	for (protobp = protobp0; protobp; protobp = protobp->next) {
584 		for (vers = protobp->versmin; vers <= protobp->versmax;
585 		    vers++) {
586 			if ((protobp->program == NFS_PROGRAM ||
587 			    protobp->program == NFS_ACL_PROGRAM) &&
588 			    vers == NFS_V4 &&
589 			    strncasecmp(retnconf->nc_proto, NC_UDP, l) == 0)
590 				continue;
591 
592 			(void) rpcb_unset(protobp->program, vers, retnconf);
593 			(void) rpcb_set(protobp->program, vers, retnconf,
594 			    retaddr);
595 		}
596 	}
597 
598 	if (retnconf->nc_semantics == NC_TPI_CLTS) {
599 		/* Don't drop core if supporting module(s) aren't loaded. */
600 		(void) signal(SIGSYS, SIG_IGN);
601 
602 		/*
603 		 * svc() doesn't block, it returns success or failure.
604 		 */
605 
606 		if (svc == NULL && Mysvc4 != NULL)
607 			err = (*Mysvc4)(sock, &addrmask, retnconf,
608 			    NFS4_SETPORT|NFS4_KRPC_START, retaddr);
609 		else
610 			err = (*svc)(sock, addrmask, retnconf);
611 
612 		if (err < 0) {
613 			(void) syslog(LOG_ERR,
614 			    "Cannot establish %s service over <file desc."
615 			    " %d, protocol %s> : %m. Exiting",
616 			    protobp0->serv, sock, retnconf->nc_proto);
617 			exit(1);
618 		}
619 	}
620 
621 	/*
622 	 * We successfully set up the server over this transport.
623 	 * Add this descriptor to the one being polled on.
624 	 */
625 	add_to_poll_list(sock, retnconf);
626 }
627 
628 /*
629  * Set up the NFS service over all the available transports.
630  * Returns -1 for failure, 0 for success.
631  */
632 int
633 do_all(struct protob *protobp,
634 	int (*svc)(int, struct netbuf, struct netconfig *))
635 {
636 	struct netconfig *nconf;
637 	NCONF_HANDLE *nc;
638 	int l;
639 
640 	if ((nc = setnetconfig()) == (NCONF_HANDLE *)NULL) {
641 		syslog(LOG_ERR, "setnetconfig failed: %m");
642 		return (-1);
643 	}
644 	l = strlen(NC_UDP);
645 	while (nconf = getnetconfig(nc)) {
646 		if ((nconf->nc_flag & NC_VISIBLE) &&
647 		    strcmp(nconf->nc_protofmly, NC_LOOPBACK) != 0 &&
648 		    OK_TPI_TYPE(nconf) &&
649 		    (protobp->program != NFS4_CALLBACK ||
650 		    strncasecmp(nconf->nc_proto, NC_UDP, l) != 0))
651 			do_one(nconf->nc_device, nconf->nc_proto,
652 			    protobp, svc);
653 	}
654 	(void) endnetconfig(nc);
655 	return (0);
656 }
657 
658 /*
659  * poll on the open transport descriptors for events and errors.
660  */
661 void
662 poll_for_action(void)
663 {
664 	int nfds;
665 	int i;
666 
667 	/*
668 	 * Keep polling until all transports have been closed. When this
669 	 * happens, we return.
670 	 */
671 	while ((int)num_fds > 0) {
672 		nfds = poll(poll_array, num_fds, INFTIM);
673 		switch (nfds) {
674 		case 0:
675 			continue;
676 
677 		case -1:
678 			/*
679 			 * Some errors from poll could be
680 			 * due to temporary conditions, and we try to
681 			 * be robust in the face of them. Other
682 			 * errors (should never happen in theory)
683 			 * are fatal (eg. EINVAL, EFAULT).
684 			 */
685 			switch (errno) {
686 			case EINTR:
687 				continue;
688 
689 			case EAGAIN:
690 			case ENOMEM:
691 				(void) sleep(10);
692 				continue;
693 
694 			default:
695 				(void) syslog(LOG_ERR,
696 				    "poll failed: %m. Exiting");
697 				exit(1);
698 			}
699 		default:
700 			break;
701 		}
702 
703 		/*
704 		 * Go through the poll list looking for events.
705 		 */
706 		for (i = 0; i < num_fds && nfds > 0; i++) {
707 			if (poll_array[i].revents) {
708 				nfds--;
709 				/*
710 				 * We have a message, so try to read it.
711 				 * Record the error return in errno,
712 				 * so that syslog(LOG_ERR, "...%m")
713 				 * dumps the corresponding error string.
714 				 */
715 				if (conn_polled[i].nc.nc_semantics ==
716 				    NC_TPI_CLTS) {
717 					errno = do_poll_clts_action(
718 					    poll_array[i].fd, i);
719 				} else {
720 					errno = do_poll_cots_action(
721 					    poll_array[i].fd, i);
722 				}
723 
724 				if (errno == 0)
725 					continue;
726 				/*
727 				 * Most returned error codes mean that there is
728 				 * fatal condition which we can only deal with
729 				 * by closing the transport.
730 				 */
731 				if (errno != EAGAIN && errno != ENOMEM) {
732 					(void) syslog(LOG_ERR,
733 		"Error (%m) reading descriptor %d/transport %s. Closing it.",
734 					    poll_array[i].fd,
735 					    conn_polled[i].nc.nc_proto);
736 					(void) t_close(poll_array[i].fd);
737 					remove_from_poll_list(poll_array[i].fd);
738 
739 				} else if (errno == ENOMEM)
740 					(void) sleep(5);
741 			}
742 		}
743 	}
744 
745 	(void) syslog(LOG_ERR,
746 	    "All transports have been closed with errors. Exiting.");
747 }
748 
749 /*
750  * Allocate poll/transport array entries for this descriptor.
751  */
752 static void
753 add_to_poll_list(int fd, struct netconfig *nconf)
754 {
755 	static int poll_array_size = 0;
756 
757 	/*
758 	 * If the arrays are full, allocate new ones.
759 	 */
760 	if (num_fds == poll_array_size) {
761 		struct pollfd *tpa;
762 		struct conn_entry *tnp;
763 
764 		if (poll_array_size != 0) {
765 			tpa = poll_array;
766 			tnp = conn_polled;
767 		} else
768 			tpa = (struct pollfd *)0;
769 
770 		poll_array_size += POLL_ARRAY_INC_SIZE;
771 		/*
772 		 * Allocate new arrays.
773 		 */
774 		poll_array = (struct pollfd *)
775 		    malloc(poll_array_size * sizeof (struct pollfd) + 256);
776 		conn_polled = (struct conn_entry *)
777 		    malloc(poll_array_size * sizeof (struct conn_entry) + 256);
778 		if (poll_array == (struct pollfd *)NULL ||
779 		    conn_polled == (struct conn_entry *)NULL) {
780 			syslog(LOG_ERR, "malloc failed for poll array");
781 			exit(1);
782 		}
783 
784 		/*
785 		 * Copy the data of the old ones into new arrays, and
786 		 * free the old ones.
787 		 */
788 		if (tpa) {
789 			(void) memcpy((void *)poll_array, (void *)tpa,
790 			    num_fds * sizeof (struct pollfd));
791 			(void) memcpy((void *)conn_polled, (void *)tnp,
792 			    num_fds * sizeof (struct conn_entry));
793 			free((void *)tpa);
794 			free((void *)tnp);
795 		}
796 	}
797 
798 	/*
799 	 * Set the descriptor and event list. All possible events are
800 	 * polled for.
801 	 */
802 	poll_array[num_fds].fd = fd;
803 	poll_array[num_fds].events = POLLIN|POLLRDNORM|POLLRDBAND|POLLPRI;
804 
805 	/*
806 	 * Copy the transport data over too.
807 	 */
808 	conn_polled[num_fds].nc = *nconf;
809 	conn_polled[num_fds].closing = 0;
810 
811 	/*
812 	 * Set the descriptor to non-blocking. Avoids a race
813 	 * between data arriving on the stream and then having it
814 	 * flushed before we can read it.
815 	 */
816 	if (fcntl(fd, F_SETFL, O_NONBLOCK) == -1) {
817 		(void) syslog(LOG_ERR,
818 	"fcntl(file desc. %d/transport %s, F_SETFL, O_NONBLOCK): %m. Exiting",
819 		    num_fds, nconf->nc_proto);
820 		exit(1);
821 	}
822 
823 	/*
824 	 * Count this descriptor.
825 	 */
826 	++num_fds;
827 }
828 
829 static void
830 remove_from_poll_list(int fd)
831 {
832 	int i;
833 	int num_to_copy;
834 
835 	for (i = 0; i < num_fds; i++) {
836 		if (poll_array[i].fd == fd) {
837 			--num_fds;
838 			num_to_copy = num_fds - i;
839 			(void) memcpy((void *)&poll_array[i],
840 			    (void *)&poll_array[i+1],
841 			    num_to_copy * sizeof (struct pollfd));
842 			(void) memset((void *)&poll_array[num_fds], 0,
843 			    sizeof (struct pollfd));
844 			(void) memcpy((void *)&conn_polled[i],
845 			    (void *)&conn_polled[i+1],
846 			    num_to_copy * sizeof (struct conn_entry));
847 			(void) memset((void *)&conn_polled[num_fds], 0,
848 			    sizeof (struct conn_entry));
849 			return;
850 		}
851 	}
852 	syslog(LOG_ERR, "attempt to remove nonexistent fd from poll list");
853 
854 }
855 
856 /*
857  * Called to read and interpret the event on a connectionless descriptor.
858  * Returns 0 if successful, or a UNIX error code if failure.
859  */
860 static int
861 do_poll_clts_action(int fd, int conn_index)
862 {
863 	int error;
864 	int ret;
865 	int flags;
866 	struct netconfig *nconf = &conn_polled[conn_index].nc;
867 	static struct t_unitdata *unitdata = NULL;
868 	static struct t_uderr *uderr = NULL;
869 	static int oldfd = -1;
870 	struct nd_hostservlist *host = NULL;
871 	struct strbuf ctl[1], data[1];
872 	/*
873 	 * We just need to have some space to consume the
874 	 * message in the event we can't use the TLI interface to do the
875 	 * job.
876 	 *
877 	 * We flush the message using getmsg(). For the control part
878 	 * we allocate enough for any TPI header plus 32 bytes for address
879 	 * and options. For the data part, there is nothing magic about
880 	 * the size of the array, but 256 bytes is probably better than
881 	 * 1 byte, and we don't expect any data portion anyway.
882 	 *
883 	 * If the array sizes are too small, we handle this because getmsg()
884 	 * (called to consume the message) will return MOREDATA|MORECTL.
885 	 * Thus we just call getmsg() until it's read the message.
886 	 */
887 	char ctlbuf[sizeof (union T_primitives) + 32];
888 	char databuf[256];
889 
890 	/*
891 	 * If this is the same descriptor as the last time
892 	 * do_poll_clts_action was called, we can save some
893 	 * de-allocation and allocation.
894 	 */
895 	if (oldfd != fd) {
896 		oldfd = fd;
897 
898 		if (unitdata) {
899 			(void) t_free((char *)unitdata, T_UNITDATA);
900 			unitdata = NULL;
901 		}
902 		if (uderr) {
903 			(void) t_free((char *)uderr, T_UDERROR);
904 			uderr = NULL;
905 		}
906 	}
907 
908 	/*
909 	 * Allocate a unitdata structure for receiving the event.
910 	 */
911 	if (unitdata == NULL) {
912 		/* LINTED pointer alignment */
913 		unitdata = (struct t_unitdata *)t_alloc(fd, T_UNITDATA, T_ALL);
914 		if (unitdata == NULL) {
915 			if (t_errno == TSYSERR) {
916 				/*
917 				 * Save the error code across
918 				 * syslog(), just in case
919 				 * syslog() gets its own error
920 				 * and therefore overwrites errno.
921 				 */
922 				error = errno;
923 				(void) syslog(LOG_ERR,
924 	"t_alloc(file descriptor %d/transport %s, T_UNITDATA) failed: %m",
925 				    fd, nconf->nc_proto);
926 				return (error);
927 			}
928 			(void) syslog(LOG_ERR,
929 "t_alloc(file descriptor %d/transport %s, T_UNITDATA) failed TLI error %d",
930 			    fd, nconf->nc_proto, t_errno);
931 			goto flush_it;
932 		}
933 	}
934 
935 try_again:
936 	flags = 0;
937 
938 	/*
939 	 * The idea is we wait for T_UNITDATA_IND's. Of course,
940 	 * we don't get any, because rpcmod filters them out.
941 	 * However, we need to call t_rcvudata() to let TLI
942 	 * tell us we have a T_UDERROR_IND.
943 	 *
944 	 * algorithm is:
945 	 * 	t_rcvudata(), expecting TLOOK.
946 	 * 	t_look(), expecting T_UDERR.
947 	 * 	t_rcvuderr(), expecting success (0).
948 	 * 	expand destination address into ASCII,
949 	 *	and dump it.
950 	 */
951 
952 	ret = t_rcvudata(fd, unitdata, &flags);
953 	if (ret == 0 || t_errno == TBUFOVFLW) {
954 		(void) syslog(LOG_WARNING,
955 "t_rcvudata(file descriptor %d/transport %s) got unexpected data, %d bytes",
956 		    fd, nconf->nc_proto, unitdata->udata.len);
957 
958 		/*
959 		 * Even though we don't expect any data, in case we do,
960 		 * keep reading until there is no more.
961 		 */
962 		if (flags & T_MORE)
963 			goto try_again;
964 
965 		return (0);
966 	}
967 
968 	switch (t_errno) {
969 	case TNODATA:
970 		return (0);
971 	case TSYSERR:
972 		/*
973 		 * System errors are returned to caller.
974 		 * Save the error code across
975 		 * syslog(), just in case
976 		 * syslog() gets its own error
977 		 * and therefore overwrites errno.
978 		 */
979 		error = errno;
980 		(void) syslog(LOG_ERR,
981 		    "t_rcvudata(file descriptor %d/transport %s) %m",
982 		    fd, nconf->nc_proto);
983 		return (error);
984 	case TLOOK:
985 		break;
986 	default:
987 		(void) syslog(LOG_ERR,
988 		"t_rcvudata(file descriptor %d/transport %s) TLI error %d",
989 		    fd, nconf->nc_proto, t_errno);
990 		goto flush_it;
991 	}
992 
993 	ret = t_look(fd);
994 	switch (ret) {
995 	case 0:
996 		return (0);
997 	case -1:
998 		/*
999 		 * System errors are returned to caller.
1000 		 */
1001 		if (t_errno == TSYSERR) {
1002 			/*
1003 			 * Save the error code across
1004 			 * syslog(), just in case
1005 			 * syslog() gets its own error
1006 			 * and therefore overwrites errno.
1007 			 */
1008 			error = errno;
1009 			(void) syslog(LOG_ERR,
1010 			    "t_look(file descriptor %d/transport %s) %m",
1011 			    fd, nconf->nc_proto);
1012 			return (error);
1013 		}
1014 		(void) syslog(LOG_ERR,
1015 		    "t_look(file descriptor %d/transport %s) TLI error %d",
1016 		    fd, nconf->nc_proto, t_errno);
1017 		goto flush_it;
1018 	case T_UDERR:
1019 		break;
1020 	default:
1021 		(void) syslog(LOG_WARNING,
1022 	"t_look(file descriptor %d/transport %s) returned %d not T_UDERR (%d)",
1023 		    fd, nconf->nc_proto, ret, T_UDERR);
1024 	}
1025 
1026 	if (uderr == NULL) {
1027 		/* LINTED pointer alignment */
1028 		uderr = (struct t_uderr *)t_alloc(fd, T_UDERROR, T_ALL);
1029 		if (uderr == NULL) {
1030 			if (t_errno == TSYSERR) {
1031 				/*
1032 				 * Save the error code across
1033 				 * syslog(), just in case
1034 				 * syslog() gets its own error
1035 				 * and therefore overwrites errno.
1036 				 */
1037 				error = errno;
1038 				(void) syslog(LOG_ERR,
1039 	"t_alloc(file descriptor %d/transport %s, T_UDERROR) failed: %m",
1040 				    fd, nconf->nc_proto);
1041 				return (error);
1042 			}
1043 			(void) syslog(LOG_ERR,
1044 "t_alloc(file descriptor %d/transport %s, T_UDERROR) failed TLI error: %d",
1045 			    fd, nconf->nc_proto, t_errno);
1046 			goto flush_it;
1047 		}
1048 	}
1049 
1050 	ret = t_rcvuderr(fd, uderr);
1051 	if (ret == 0) {
1052 
1053 		/*
1054 		 * Save the datagram error in errno, so that the
1055 		 * %m argument to syslog picks up the error string.
1056 		 */
1057 		errno = uderr->error;
1058 
1059 		/*
1060 		 * Log the datagram error, then log the host that
1061 		 * probably triggerred. Cannot log both in the
1062 		 * same transaction because of packet size limitations
1063 		 * in /dev/log.
1064 		 */
1065 		(void) syslog((errno == ECONNREFUSED) ? LOG_DEBUG : LOG_WARNING,
1066 "NFS response over <file descriptor %d/transport %s> generated error: %m",
1067 		    fd, nconf->nc_proto);
1068 
1069 		/*
1070 		 * Try to map the client's address back to a
1071 		 * name.
1072 		 */
1073 		ret = netdir_getbyaddr(nconf, &host, &uderr->addr);
1074 		if (ret != -1 && host && host->h_cnt > 0 &&
1075 		    host->h_hostservs) {
1076 		(void) syslog((errno == ECONNREFUSED) ? LOG_DEBUG : LOG_WARNING,
1077 "Bad NFS response was sent to client with host name: %s; service port: %s",
1078 		    host->h_hostservs->h_host,
1079 		    host->h_hostservs->h_serv);
1080 		} else {
1081 			int i, j;
1082 			char *buf;
1083 			char *hex = "0123456789abcdef";
1084 
1085 			/*
1086 			 * Mapping failed, print the whole thing
1087 			 * in ASCII hex.
1088 			 */
1089 			buf = (char *)malloc(uderr->addr.len * 2 + 1);
1090 			for (i = 0, j = 0; i < uderr->addr.len; i++, j += 2) {
1091 				buf[j] = hex[((uderr->addr.buf[i]) >> 4) & 0xf];
1092 				buf[j+1] = hex[uderr->addr.buf[i] & 0xf];
1093 			}
1094 			buf[j] = '\0';
1095 		(void) syslog((errno == ECONNREFUSED) ? LOG_DEBUG : LOG_WARNING,
1096 	"Bad NFS response was sent to client with transport address: 0x%s",
1097 		    buf);
1098 			free((void *)buf);
1099 		}
1100 
1101 		if (ret == 0 && host != NULL)
1102 			netdir_free((void *)host, ND_HOSTSERVLIST);
1103 		return (0);
1104 	}
1105 
1106 	switch (t_errno) {
1107 	case TNOUDERR:
1108 		goto flush_it;
1109 	case TSYSERR:
1110 		/*
1111 		 * System errors are returned to caller.
1112 		 * Save the error code across
1113 		 * syslog(), just in case
1114 		 * syslog() gets its own error
1115 		 * and therefore overwrites errno.
1116 		 */
1117 		error = errno;
1118 		(void) syslog(LOG_ERR,
1119 		    "t_rcvuderr(file descriptor %d/transport %s) %m",
1120 		    fd, nconf->nc_proto);
1121 		return (error);
1122 	default:
1123 		(void) syslog(LOG_ERR,
1124 		"t_rcvuderr(file descriptor %d/transport %s) TLI error %d",
1125 		    fd, nconf->nc_proto, t_errno);
1126 		goto flush_it;
1127 	}
1128 
1129 flush_it:
1130 	/*
1131 	 * If we get here, then we could not cope with whatever message
1132 	 * we attempted to read, so flush it. If we did read a message,
1133 	 * and one isn't present, that is all right, because fd is in
1134 	 * nonblocking mode.
1135 	 */
1136 	(void) syslog(LOG_ERR,
1137 	"Flushing one input message from <file descriptor %d/transport %s>",
1138 	    fd, nconf->nc_proto);
1139 
1140 	/*
1141 	 * Read and discard the message. Do this this until there is
1142 	 * no more control/data in the message or until we get an error.
1143 	 */
1144 	do {
1145 		ctl->maxlen = sizeof (ctlbuf);
1146 		ctl->buf = ctlbuf;
1147 		data->maxlen = sizeof (databuf);
1148 		data->buf = databuf;
1149 		flags = 0;
1150 		ret = getmsg(fd, ctl, data, &flags);
1151 		if (ret == -1)
1152 			return (errno);
1153 	} while (ret != 0);
1154 
1155 	return (0);
1156 }
1157 
1158 static void
1159 conn_close_oldest(void)
1160 {
1161 	int fd;
1162 	int i1;
1163 
1164 	/*
1165 	 * Find the oldest connection that is not already in the
1166 	 * process of shutting down.
1167 	 */
1168 	for (i1 = end_listen_fds; /* no conditional expression */; i1++) {
1169 		if (i1 >= num_fds)
1170 			return;
1171 		if (conn_polled[i1].closing == 0)
1172 			break;
1173 	}
1174 #ifdef DEBUG
1175 	printf("too many connections (%d), releasing oldest (%d)\n",
1176 	    num_conns, poll_array[i1].fd);
1177 #else
1178 	syslog(LOG_WARNING, "too many connections (%d), releasing oldest (%d)",
1179 	    num_conns, poll_array[i1].fd);
1180 #endif
1181 	fd = poll_array[i1].fd;
1182 	if (conn_polled[i1].nc.nc_semantics == NC_TPI_COTS) {
1183 		/*
1184 		 * For politeness, send a T_DISCON_REQ to the transport
1185 		 * provider.  We close the stream anyway.
1186 		 */
1187 		(void) t_snddis(fd, (struct t_call *)0);
1188 		num_conns--;
1189 		remove_from_poll_list(fd);
1190 		(void) t_close(fd);
1191 	} else {
1192 		/*
1193 		 * For orderly release, we do not close the stream
1194 		 * until the T_ORDREL_IND arrives to complete
1195 		 * the handshake.
1196 		 */
1197 		if (t_sndrel(fd) == 0)
1198 			conn_polled[i1].closing = 1;
1199 	}
1200 }
1201 
1202 static boolean_t
1203 conn_get(int fd, struct netconfig *nconf, struct conn_ind **connp)
1204 {
1205 	struct conn_ind	*conn;
1206 	struct conn_ind	*next_conn;
1207 
1208 	conn = (struct conn_ind *)malloc(sizeof (*conn));
1209 	if (conn == NULL) {
1210 		syslog(LOG_ERR, "malloc for listen indication failed");
1211 		return (FALSE);
1212 	}
1213 
1214 	/* LINTED pointer alignment */
1215 	conn->conn_call = (struct t_call *)t_alloc(fd, T_CALL, T_ALL);
1216 	if (conn->conn_call == NULL) {
1217 		free((char *)conn);
1218 		nfslib_log_tli_error("t_alloc", fd, nconf);
1219 		return (FALSE);
1220 	}
1221 
1222 	if (t_listen(fd, conn->conn_call) == -1) {
1223 		nfslib_log_tli_error("t_listen", fd, nconf);
1224 		(void) t_free((char *)conn->conn_call, T_CALL);
1225 		free((char *)conn);
1226 		return (FALSE);
1227 	}
1228 
1229 	if (conn->conn_call->udata.len > 0) {
1230 		syslog(LOG_WARNING,
1231 	"rejecting inbound connection(%s) with %d bytes of connect data",
1232 		    nconf->nc_proto, conn->conn_call->udata.len);
1233 
1234 		conn->conn_call->udata.len = 0;
1235 		(void) t_snddis(fd, conn->conn_call);
1236 		(void) t_free((char *)conn->conn_call, T_CALL);
1237 		free((char *)conn);
1238 		return (FALSE);
1239 	}
1240 
1241 	if ((next_conn = *connp) != NULL) {
1242 		next_conn->conn_prev->conn_next = conn;
1243 		conn->conn_next = next_conn;
1244 		conn->conn_prev = next_conn->conn_prev;
1245 		next_conn->conn_prev = conn;
1246 	} else {
1247 		conn->conn_next = conn;
1248 		conn->conn_prev = conn;
1249 		*connp = conn;
1250 	}
1251 	return (TRUE);
1252 }
1253 
1254 static int
1255 discon_get(int fd, struct netconfig *nconf, struct conn_ind **connp)
1256 {
1257 	struct conn_ind	*conn;
1258 	struct t_discon	discon;
1259 
1260 	discon.udata.buf = (char *)0;
1261 	discon.udata.maxlen = 0;
1262 	if (t_rcvdis(fd, &discon) == -1) {
1263 		nfslib_log_tli_error("t_rcvdis", fd, nconf);
1264 		return (-1);
1265 	}
1266 
1267 	conn = *connp;
1268 	if (conn == NULL)
1269 		return (0);
1270 
1271 	do {
1272 		if (conn->conn_call->sequence == discon.sequence) {
1273 			if (conn->conn_next == conn)
1274 				*connp = (struct conn_ind *)0;
1275 			else {
1276 				if (conn == *connp) {
1277 					*connp = conn->conn_next;
1278 				}
1279 				conn->conn_next->conn_prev = conn->conn_prev;
1280 				conn->conn_prev->conn_next = conn->conn_next;
1281 			}
1282 			free((char *)conn);
1283 			break;
1284 		}
1285 		conn = conn->conn_next;
1286 	} while (conn != *connp);
1287 
1288 	return (0);
1289 }
1290 
1291 static void
1292 cots_listen_event(int fd, int conn_index)
1293 {
1294 	struct t_call *call;
1295 	struct conn_ind	*conn;
1296 	struct conn_ind	*conn_head;
1297 	int event;
1298 	struct netconfig *nconf = &conn_polled[conn_index].nc;
1299 	int new_fd;
1300 	struct netbuf addrmask;
1301 	int ret = 0;
1302 	char *clnt;
1303 	char *clnt_uaddr = NULL;
1304 	struct nd_hostservlist *clnt_serv = NULL;
1305 
1306 	conn_head = (struct conn_ind *)0;
1307 	(void) conn_get(fd, nconf, &conn_head);
1308 
1309 	while ((conn = conn_head) != NULL) {
1310 		conn_head = conn->conn_next;
1311 		if (conn_head == conn)
1312 			conn_head = (struct conn_ind *)0;
1313 		else {
1314 			conn_head->conn_prev = conn->conn_prev;
1315 			conn->conn_prev->conn_next = conn_head;
1316 		}
1317 		call = conn->conn_call;
1318 		free((char *)conn);
1319 
1320 		/*
1321 		 * If we have already accepted the maximum number of
1322 		 * connections allowed on the command line, then drop
1323 		 * the oldest connection (for any protocol) before
1324 		 * accepting the new connection.  Unless explicitly
1325 		 * set on the command line, max_conns_allowed is -1.
1326 		 */
1327 		if (max_conns_allowed != -1 && num_conns >= max_conns_allowed)
1328 			conn_close_oldest();
1329 
1330 		/*
1331 		 * Create a new transport endpoint for the same proto as
1332 		 * the listener.
1333 		 */
1334 		new_fd = nfslib_transport_open(nconf);
1335 		if (new_fd == -1) {
1336 			call->udata.len = 0;
1337 			(void) t_snddis(fd, call);
1338 			(void) t_free((char *)call, T_CALL);
1339 			syslog(LOG_ERR, "Cannot establish transport over %s",
1340 			    nconf->nc_device);
1341 			continue;
1342 		}
1343 
1344 		/* Bind to a generic address/port for the accepting stream. */
1345 		if (t_bind(new_fd, (struct t_bind *)NULL,
1346 		    (struct t_bind *)NULL) == -1) {
1347 			nfslib_log_tli_error("t_bind", new_fd, nconf);
1348 			call->udata.len = 0;
1349 			(void) t_snddis(fd, call);
1350 			(void) t_free((char *)call, T_CALL);
1351 			(void) t_close(new_fd);
1352 			continue;
1353 		}
1354 
1355 		while (t_accept(fd, new_fd, call) == -1) {
1356 			if (t_errno != TLOOK) {
1357 #ifdef DEBUG
1358 				nfslib_log_tli_error("t_accept", fd, nconf);
1359 #endif
1360 				call->udata.len = 0;
1361 				(void) t_snddis(fd, call);
1362 				(void) t_free((char *)call, T_CALL);
1363 				(void) t_close(new_fd);
1364 				goto do_next_conn;
1365 			}
1366 			while (event = t_look(fd)) {
1367 				switch (event) {
1368 				case T_LISTEN:
1369 #ifdef DEBUG
1370 					printf(
1371 "cots_listen_event(%s): T_LISTEN during accept processing\n", nconf->nc_proto);
1372 #endif
1373 					(void) conn_get(fd, nconf, &conn_head);
1374 					continue;
1375 				case T_DISCONNECT:
1376 #ifdef DEBUG
1377 					printf(
1378 	"cots_listen_event(%s): T_DISCONNECT during accept processing\n",
1379 					    nconf->nc_proto);
1380 #endif
1381 					(void) discon_get(fd, nconf,
1382 					    &conn_head);
1383 					continue;
1384 				default:
1385 					syslog(LOG_ERR,
1386 			"unexpected event 0x%x during accept processing (%s)",
1387 					    event, nconf->nc_proto);
1388 					call->udata.len = 0;
1389 					(void) t_snddis(fd, call);
1390 					(void) t_free((char *)call, T_CALL);
1391 					(void) t_close(new_fd);
1392 					goto do_next_conn;
1393 				}
1394 			}
1395 		}
1396 
1397 		if (set_addrmask(new_fd, nconf, &addrmask) < 0) {
1398 			(void) syslog(LOG_ERR,
1399 			    "Cannot set address mask for %s",
1400 			    nconf->nc_netid);
1401 			return;
1402 		}
1403 
1404 		/* Tell KRPC about the new stream. */
1405 		if (Mysvc4 != NULL)
1406 			ret = (*Mysvc4)(new_fd, &addrmask, nconf,
1407 			    NFS4_KRPC_START, &call->addr);
1408 		else
1409 			ret = (*Mysvc)(new_fd, addrmask, nconf);
1410 
1411 		if (ret < 0) {
1412 			if (errno != ENOTCONN) {
1413 				syslog(LOG_ERR,
1414 				    "unable to register new connection: %m");
1415 			} else {
1416 				/*
1417 				 * This is the only error that could be
1418 				 * caused by the client, so who was it?
1419 				 */
1420 				if (netdir_getbyaddr(nconf, &clnt_serv,
1421 				    &(call->addr)) == ND_OK &&
1422 				    clnt_serv->h_cnt > 0)
1423 					clnt = clnt_serv->h_hostservs->h_host;
1424 				else
1425 					clnt = clnt_uaddr = taddr2uaddr(nconf,
1426 					    &(call->addr));
1427 				/*
1428 				 * If we don't know who the client was,
1429 				 * remain silent.
1430 				 */
1431 				if (clnt)
1432 					syslog(LOG_ERR,
1433 "unable to register new connection: client %s has dropped connection", clnt);
1434 				if (clnt_serv)
1435 					netdir_free(clnt_serv, ND_HOSTSERVLIST);
1436 				if (clnt_uaddr)
1437 					free(clnt_uaddr);
1438 			}
1439 			free(addrmask.buf);
1440 			(void) t_snddis(new_fd, (struct t_call *)0);
1441 			(void) t_free((char *)call, T_CALL);
1442 			(void) t_close(new_fd);
1443 			goto do_next_conn;
1444 		}
1445 
1446 		free(addrmask.buf);
1447 		(void) t_free((char *)call, T_CALL);
1448 
1449 		/*
1450 		 * Poll on the new descriptor so that we get disconnect
1451 		 * and orderly release indications.
1452 		 */
1453 		num_conns++;
1454 		add_to_poll_list(new_fd, nconf);
1455 
1456 		/* Reset nconf in case it has been moved. */
1457 		nconf = &conn_polled[conn_index].nc;
1458 do_next_conn:;
1459 	}
1460 }
1461 
1462 static int
1463 do_poll_cots_action(int fd, int conn_index)
1464 {
1465 	char buf[256];
1466 	int event;
1467 	int i1;
1468 	int flags;
1469 	struct conn_entry *connent = &conn_polled[conn_index];
1470 	struct netconfig *nconf = &(connent->nc);
1471 	const char *errorstr;
1472 
1473 	while (event = t_look(fd)) {
1474 		switch (event) {
1475 		case T_LISTEN:
1476 #ifdef DEBUG
1477 printf("do_poll_cots_action(%s,%d): T_LISTEN event\n", nconf->nc_proto, fd);
1478 #endif
1479 			cots_listen_event(fd, conn_index);
1480 			break;
1481 
1482 		case T_DATA:
1483 #ifdef DEBUG
1484 printf("do_poll_cots_action(%d,%s): T_DATA event\n", fd, nconf->nc_proto);
1485 #endif
1486 			/*
1487 			 * Receive a private notification from CONS rpcmod.
1488 			 */
1489 			i1 = t_rcv(fd, buf, sizeof (buf), &flags);
1490 			if (i1 == -1) {
1491 				syslog(LOG_ERR, "t_rcv failed");
1492 				break;
1493 			}
1494 			if (i1 < sizeof (int))
1495 				break;
1496 			i1 = BE32_TO_U32(buf);
1497 			if (i1 == 1 || i1 == 2) {
1498 				/*
1499 				 * This connection has been idle for too long,
1500 				 * so release it as politely as we can.  If we
1501 				 * have already initiated an orderly release
1502 				 * and we get notified that the stream is
1503 				 * still idle, pull the plug.  This prevents
1504 				 * hung connections from continuing to consume
1505 				 * resources.
1506 				 */
1507 #ifdef DEBUG
1508 printf("do_poll_cots_action(%s,%d): ", nconf->nc_proto, fd);
1509 printf("initiating orderly release of idle connection\n");
1510 #endif
1511 				if (nconf->nc_semantics == NC_TPI_COTS ||
1512 				    connent->closing != 0) {
1513 					(void) t_snddis(fd, (struct t_call *)0);
1514 					goto fdclose;
1515 				}
1516 				/*
1517 				 * For NC_TPI_COTS_ORD, the stream is closed
1518 				 * and removed from the poll list when the
1519 				 * T_ORDREL is received from the provider.  We
1520 				 * don't wait for it here because it may take
1521 				 * a while for the transport to shut down.
1522 				 */
1523 				if (t_sndrel(fd) == -1) {
1524 					syslog(LOG_ERR,
1525 					"unable to send orderly release %m");
1526 				}
1527 				connent->closing = 1;
1528 			} else
1529 				syslog(LOG_ERR,
1530 				"unexpected event from CONS rpcmod %d", i1);
1531 			break;
1532 
1533 		case T_ORDREL:
1534 #ifdef DEBUG
1535 printf("do_poll_cots_action(%s,%d): T_ORDREL event\n", nconf->nc_proto, fd);
1536 #endif
1537 			/* Perform an orderly release. */
1538 			if (t_rcvrel(fd) == 0) {
1539 				/* T_ORDREL on listen fd's should be ignored */
1540 				if (!is_listen_fd_index(conn_index)) {
1541 					(void) t_sndrel(fd);
1542 					goto fdclose;
1543 				}
1544 				break;
1545 
1546 			} else if (t_errno == TLOOK) {
1547 				break;
1548 			} else {
1549 				nfslib_log_tli_error("t_rcvrel", fd, nconf);
1550 
1551 				/*
1552 				 * check to make sure we do not close
1553 				 * listen fd
1554 				 */
1555 				if (is_listen_fd_index(conn_index))
1556 					break;
1557 				else
1558 					goto fdclose;
1559 			}
1560 
1561 		case T_DISCONNECT:
1562 #ifdef DEBUG
1563 printf("do_poll_cots_action(%s,%d): T_DISCONNECT event\n", nconf->nc_proto, fd);
1564 #endif
1565 			if (t_rcvdis(fd, (struct t_discon *)NULL) == -1)
1566 				nfslib_log_tli_error("t_rcvdis", fd, nconf);
1567 
1568 			/*
1569 			 * T_DISCONNECT on listen fd's should be ignored.
1570 			 */
1571 			if (is_listen_fd_index(conn_index))
1572 				break;
1573 			else
1574 				goto fdclose;
1575 
1576 		case T_ERROR:
1577 		default:
1578 			if (event == T_ERROR || t_errno == TSYSERR) {
1579 				if ((errorstr = strerror(errno)) == NULL) {
1580 					(void) sprintf(buf,
1581 					    "Unknown error num %d", errno);
1582 					errorstr = (const char *) buf;
1583 				}
1584 			} else if (event == -1)
1585 				errorstr = t_strerror(t_errno);
1586 			else
1587 				errorstr = "";
1588 			syslog(LOG_ERR,
1589 			    "unexpected TLI event (0x%x) on "
1590 			    "connection-oriented transport(%s,%d):%s",
1591 			    event, nconf->nc_proto, fd, errorstr);
1592 fdclose:
1593 			num_conns--;
1594 			remove_from_poll_list(fd);
1595 			(void) t_close(fd);
1596 			return (0);
1597 		}
1598 	}
1599 
1600 	return (0);
1601 }
1602 
1603 static char *
1604 serv_name_to_port_name(char *name)
1605 {
1606 	/*
1607 	 * Map service names (used primarily in logging) to
1608 	 * RPC port names (used by netdir_*() routines).
1609 	 */
1610 	if (strcmp(name, "NFS") == 0) {
1611 		return ("nfs");
1612 	} else if (strcmp(name, "NLM") == 0) {
1613 		return ("lockd");
1614 	} else if (strcmp(name, "NFS4_CALLBACK") == 0) {
1615 		return ("nfs4_callback");
1616 	}
1617 
1618 	return ("unrecognized");
1619 }
1620 
1621 static int
1622 bind_to_provider(char *provider, char *serv, struct netbuf **addr,
1623 		struct netconfig **retnconf)
1624 {
1625 	struct netconfig *nconf;
1626 	NCONF_HANDLE *nc;
1627 	struct nd_hostserv hs;
1628 
1629 	hs.h_host = HOST_SELF;
1630 	hs.h_serv = serv_name_to_port_name(serv);
1631 
1632 	if ((nc = setnetconfig()) == (NCONF_HANDLE *)NULL) {
1633 		syslog(LOG_ERR, "setnetconfig failed: %m");
1634 		return (-1);
1635 	}
1636 	while (nconf = getnetconfig(nc)) {
1637 		if (OK_TPI_TYPE(nconf) &&
1638 		    strcmp(nconf->nc_device, provider) == 0) {
1639 			*retnconf = nconf;
1640 			return (nfslib_bindit(nconf, addr, &hs,
1641 			    listen_backlog));
1642 		}
1643 	}
1644 	(void) endnetconfig(nc);
1645 
1646 	syslog(LOG_ERR, "couldn't find netconfig entry for provider %s",
1647 	    provider);
1648 	return (-1);
1649 }
1650 
1651 static int
1652 bind_to_proto(NETSELDECL(proto), char *serv, struct netbuf **addr,
1653 		struct netconfig **retnconf)
1654 {
1655 	struct netconfig *nconf;
1656 	NCONF_HANDLE *nc = NULL;
1657 	struct nd_hostserv hs;
1658 
1659 	hs.h_host = HOST_SELF;
1660 	hs.h_serv = serv_name_to_port_name(serv);
1661 
1662 	if ((nc = setnetconfig()) == (NCONF_HANDLE *)NULL) {
1663 		syslog(LOG_ERR, "setnetconfig failed: %m");
1664 		return (-1);
1665 	}
1666 	while (nconf = getnetconfig(nc)) {
1667 		if (OK_TPI_TYPE(nconf) && NETSELEQ(nconf->nc_proto, proto)) {
1668 			*retnconf = nconf;
1669 			return (nfslib_bindit(nconf, addr, &hs,
1670 			    listen_backlog));
1671 		}
1672 	}
1673 	(void) endnetconfig(nc);
1674 
1675 	syslog(LOG_ERR, "couldn't find netconfig entry for protocol %s",
1676 	    proto);
1677 	return (-1);
1678 }
1679 
1680 #include <netinet/in.h>
1681 
1682 /*
1683  * Create an address mask appropriate for the transport.
1684  * The mask is used to obtain the host-specific part of
1685  * a network address when comparing addresses.
1686  * For an internet address the host-specific part is just
1687  * the 32 bit IP address and this part of the mask is set
1688  * to all-ones. The port number part of the mask is zeroes.
1689  */
1690 static int
1691 set_addrmask(fd, nconf, mask)
1692 	struct netconfig *nconf;
1693 	struct netbuf *mask;
1694 {
1695 	struct t_info info;
1696 
1697 	/*
1698 	 * Find the size of the address we need to mask.
1699 	 */
1700 	if (t_getinfo(fd, &info) < 0) {
1701 		t_error("t_getinfo");
1702 		return (-1);
1703 	}
1704 	mask->len = mask->maxlen = info.addr;
1705 	if (info.addr <= 0) {
1706 		syslog(LOG_ERR, "set_addrmask: address size: %ld",
1707 			info.addr);
1708 		return (-1);
1709 	}
1710 
1711 	mask->buf = (char *)malloc(mask->len);
1712 	if (mask->buf == NULL) {
1713 		syslog(LOG_ERR, "set_addrmask: no memory");
1714 		return (-1);
1715 	}
1716 	(void) memset(mask->buf, 0, mask->len);	/* reset all mask bits */
1717 
1718 	if (strcmp(nconf->nc_protofmly, NC_INET) == 0) {
1719 		/*
1720 		 * Set the mask so that the port is ignored.
1721 		 */
1722 		/* LINTED pointer alignment */
1723 		((struct sockaddr_in *)mask->buf)->sin_addr.s_addr =
1724 								(ulong_t)~0;
1725 		/* LINTED pointer alignment */
1726 		((struct sockaddr_in *)mask->buf)->sin_family =
1727 								(ushort_t)~0;
1728 	} else if (strcmp(nconf->nc_protofmly, NC_INET6) == 0) {
1729 		/* LINTED pointer alignment */
1730 		(void) memset(&((struct sockaddr_in6 *)mask->buf)->sin6_addr,
1731 			(uchar_t)~0, sizeof (struct in6_addr));
1732 		/* LINTED pointer alignment */
1733 		((struct sockaddr_in6 *)mask->buf)->sin6_family =
1734 								(ushort_t)~0;
1735 	} else {
1736 
1737 		/*
1738 		 * Set all mask bits.
1739 		 */
1740 		(void) memset(mask->buf, 0xFF, mask->len);
1741 	}
1742 	return (0);
1743 }
1744 
1745 /*
1746  * For listen fd's index is always less than end_listen_fds.
1747  * end_listen_fds is defined externally in the daemon that uses this library.
1748  * It's value is equal to the number of open file descriptors after the
1749  * last listen end point was opened but before any connection was accepted.
1750  */
1751 static int
1752 is_listen_fd_index(int index)
1753 {
1754 	return (index < end_listen_fds);
1755 }
1756