xref: /titanic_52/usr/src/cmd/fs.d/nfs/lib/nfs_tbind.c (revision 5c46c6a053defc1bf25f2e08df2653bc7717ed0b)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 /*
27  * nfs_tbind.c, common part for nfsd and lockd.
28  */
29 
30 #pragma ident	"%Z%%M%	%I%	%E% SMI"
31 
32 #include <tiuser.h>
33 #include <fcntl.h>
34 #include <netconfig.h>
35 #include <stropts.h>
36 #include <errno.h>
37 #include <syslog.h>
38 #include <rpc/rpc.h>
39 #include <sys/time.h>
40 #include <sys/resource.h>
41 #include <signal.h>
42 #include <netdir.h>
43 #include <unistd.h>
44 #include <string.h>
45 #include <netinet/tcp.h>
46 #include <malloc.h>
47 #include <stdlib.h>
48 #include "nfs_tbind.h"
49 #include <nfs/nfs.h>
50 #include <nfs/nfs_acl.h>
51 #include <nfs/nfssys.h>
52 #include <nfs/nfs4.h>
53 #include <zone.h>
54 #include <sys/socket.h>
55 #include <tsol/label.h>
56 
57 /*
58  * Determine valid semantics for most applications.
59  */
60 #define	OK_TPI_TYPE(_nconf) \
61 	(_nconf->nc_semantics == NC_TPI_CLTS || \
62 	_nconf->nc_semantics == NC_TPI_COTS || \
63 	_nconf->nc_semantics == NC_TPI_COTS_ORD)
64 
65 #define	BE32_TO_U32(a) \
66 	((((ulong_t)((uchar_t *)a)[0] & 0xFF) << (ulong_t)24) | \
67 	(((ulong_t)((uchar_t *)a)[1] & 0xFF) << (ulong_t)16) | \
68 	(((ulong_t)((uchar_t *)a)[2] & 0xFF) << (ulong_t)8)  | \
69 	((ulong_t)((uchar_t *)a)[3] & 0xFF))
70 
71 /*
72  * Number of elements to add to the poll array on each allocation.
73  */
74 #define	POLL_ARRAY_INC_SIZE	64
75 
76 /*
77  * Number of file descriptors by which the process soft limit may be
78  * increased on each call to nofile_increase(0).
79  */
80 #define	NOFILE_INC_SIZE	64
81 
82 struct conn_ind {
83 	struct conn_ind *conn_next;
84 	struct conn_ind *conn_prev;
85 	struct t_call   *conn_call;
86 };
87 
88 struct conn_entry {
89 	bool_t			closing;
90 	struct netconfig	nc;
91 };
92 
93 /*
94  * this file contains transport routines common to nfsd and lockd
95  */
96 static	int	nofile_increase(int);
97 static	int	reuseaddr(int);
98 static	int	recvucred(int);
99 static  int	anonmlp(int);
100 static	void	add_to_poll_list(int, struct netconfig *);
101 static	char	*serv_name_to_port_name(char *);
102 static	int	bind_to_proto(char *, char *, struct netbuf **,
103 				struct netconfig **);
104 static	int	bind_to_provider(char *, char *, struct netbuf **,
105 					struct netconfig **);
106 static	void	conn_close_oldest(void);
107 static	boolean_t conn_get(int, struct netconfig *, struct conn_ind **);
108 static	void	cots_listen_event(int, int);
109 static	int	discon_get(int, struct netconfig *, struct conn_ind **);
110 static	int	do_poll_clts_action(int, int);
111 static	int	do_poll_cots_action(int, int);
112 static	void	remove_from_poll_list(int);
113 static	int	set_addrmask(int, struct netconfig *, struct netbuf *);
114 static	int	is_listen_fd_index(int);
115 
116 static	struct pollfd *poll_array;
117 static	struct conn_entry *conn_polled;
118 static	int	num_conns;		/* Current number of connections */
119 int		(*Mysvc4)(int, struct netbuf *, struct netconfig *, int,
120 		struct netbuf *);
121 
122 /*
123  * Called to create and prepare a transport descriptor for in-kernel
124  * RPC service.
125  * Returns -1 on failure and a valid descriptor on success.
126  */
127 int
128 nfslib_transport_open(struct netconfig *nconf)
129 {
130 	int fd;
131 	struct strioctl	strioc;
132 
133 	if ((nconf == (struct netconfig *)NULL) ||
134 	    (nconf->nc_device == (char *)NULL)) {
135 		syslog(LOG_ERR, "no netconfig device");
136 		return (-1);
137 	}
138 
139 	/*
140 	 * Open the transport device.
141 	 */
142 	fd = t_open(nconf->nc_device, O_RDWR, (struct t_info *)NULL);
143 	if (fd == -1) {
144 		if (t_errno == TSYSERR && errno == EMFILE &&
145 		    (nofile_increase(0) == 0)) {
146 			/* Try again with a higher NOFILE limit. */
147 			fd = t_open(nconf->nc_device, O_RDWR,
148 					(struct t_info *)NULL);
149 		}
150 		if (fd == -1) {
151 			syslog(LOG_ERR, "t_open %s failed:  t_errno %d, %m",
152 			    nconf->nc_device, t_errno);
153 			return (-1);
154 		}
155 	}
156 
157 	/*
158 	 * Pop timod because the RPC module must be as close as possible
159 	 * to the transport.
160 	 */
161 	if (ioctl(fd, I_POP, 0) < 0) {
162 		syslog(LOG_ERR, "I_POP of timod failed: %m");
163 		(void) t_close(fd);
164 		return (-1);
165 	}
166 
167 	/*
168 	 * Common code for CLTS and COTS transports
169 	 */
170 	if (ioctl(fd, I_PUSH, "rpcmod") < 0) {
171 		syslog(LOG_ERR, "I_PUSH of rpcmod failed: %m");
172 		(void) t_close(fd);
173 		return (-1);
174 	}
175 
176 	strioc.ic_cmd = RPC_SERVER;
177 	strioc.ic_dp = (char *)0;
178 	strioc.ic_len = 0;
179 	strioc.ic_timout = -1;
180 
181 	/* Tell rpcmod to act like a server stream. */
182 	if (ioctl(fd, I_STR, &strioc) < 0) {
183 		syslog(LOG_ERR, "rpcmod set-up ioctl failed: %m");
184 		(void) t_close(fd);
185 		return (-1);
186 	}
187 
188 	/*
189 	 * Re-push timod so that we will still be doing TLI
190 	 * operations on the descriptor.
191 	 */
192 	if (ioctl(fd, I_PUSH, "timod") < 0) {
193 		syslog(LOG_ERR, "I_PUSH of timod failed: %m");
194 		(void) t_close(fd);
195 		return (-1);
196 	}
197 
198 	return (fd);
199 }
200 
201 static int
202 nofile_increase(int limit)
203 {
204 	struct rlimit rl;
205 
206 	if (getrlimit(RLIMIT_NOFILE, &rl) == -1) {
207 		syslog(LOG_ERR, "getrlimit of NOFILE failed: %m");
208 		return (-1);
209 	}
210 
211 	if (limit > 0)
212 		rl.rlim_cur = limit;
213 	else
214 		rl.rlim_cur += NOFILE_INC_SIZE;
215 
216 	if (rl.rlim_cur > rl.rlim_max &&
217 	    rl.rlim_max != RLIM_INFINITY)
218 		rl.rlim_max = rl.rlim_cur;
219 
220 	if (setrlimit(RLIMIT_NOFILE, &rl) == -1) {
221 		syslog(LOG_ERR, "setrlimit of NOFILE to %d failed: %m",
222 			rl.rlim_cur);
223 		return (-1);
224 	}
225 
226 	return (0);
227 }
228 
229 int
230 nfslib_bindit(struct netconfig *nconf, struct netbuf **addr,
231 	struct nd_hostserv *hs, int backlog)
232 {
233 	int fd;
234 	struct t_bind  *ntb;
235 	struct t_bind tb;
236 	struct nd_addrlist *addrlist;
237 	struct t_optmgmt req, resp;
238 	struct opthdr *opt;
239 	char reqbuf[128];
240 	bool_t use_any = FALSE;
241 	bool_t gzone = TRUE;
242 
243 	if ((fd = nfslib_transport_open(nconf)) == -1) {
244 		syslog(LOG_ERR, "cannot establish transport service over %s",
245 			nconf->nc_device);
246 		return (-1);
247 	}
248 
249 	addrlist = (struct nd_addrlist *)NULL;
250 
251 	/* nfs4_callback service does not used a fieed port number */
252 
253 	if (strcmp(hs->h_serv, "nfs4_callback") == 0) {
254 		tb.addr.maxlen = 0;
255 		tb.addr.len = 0;
256 		tb.addr.buf = 0;
257 		use_any = TRUE;
258 		gzone = (getzoneid() == GLOBAL_ZONEID);
259 	} else if (netdir_getbyname(nconf, hs, &addrlist) != 0) {
260 
261 		syslog(LOG_ERR,
262 		"Cannot get address for transport %s host %s service %s",
263 			nconf->nc_netid, hs->h_host, hs->h_serv);
264 		(void) t_close(fd);
265 		return (-1);
266 	}
267 
268 	if (strcmp(nconf->nc_proto, "tcp") == 0) {
269 		/*
270 		 * If we're running over TCP, then set the
271 		 * SO_REUSEADDR option so that we can bind
272 		 * to our preferred address even if previously
273 		 * left connections exist in FIN_WAIT states.
274 		 * This is somewhat bogus, but otherwise you have
275 		 * to wait 2 minutes to restart after killing it.
276 		 */
277 		if (reuseaddr(fd) == -1) {
278 			syslog(LOG_WARNING,
279 			"couldn't set SO_REUSEADDR option on transport");
280 		}
281 	} else if (strcmp(nconf->nc_proto, "udp") == 0) {
282 		/*
283 		 * In order to run MLP on UDP, we need to handle creds.
284 		 */
285 		if (recvucred(fd) == -1) {
286 			syslog(LOG_WARNING,
287 			    "couldn't set SO_RECVUCRED option on transport");
288 		}
289 	}
290 
291 	/*
292 	 * Make non global zone nfs4_callback port MLP
293 	 */
294 	if (use_any && is_system_labeled() && !gzone) {
295 		if (anonmlp(fd) == -1) {
296 			/*
297 			 * failing to set this option means nfs4_callback
298 			 * could fail silently later. So fail it with
299 			 * with an error message now.
300 			 */
301 			syslog(LOG_ERR,
302 			    "couldn't set SO_ANON_MLP option on transport");
303 			(void) t_close(fd);
304 			return (-1);
305 		}
306 	}
307 
308 	if (nconf->nc_semantics == NC_TPI_CLTS)
309 		tb.qlen = 0;
310 	else
311 		tb.qlen = backlog;
312 
313 	/* LINTED pointer alignment */
314 	ntb = (struct t_bind *)t_alloc(fd, T_BIND, T_ALL);
315 	if (ntb == (struct t_bind *)NULL) {
316 		syslog(LOG_ERR, "t_alloc failed:  t_errno %d, %m", t_errno);
317 		(void) t_close(fd);
318 		netdir_free((void *)addrlist, ND_ADDRLIST);
319 		return (-1);
320 	}
321 
322 	/*
323 	 * XXX - what about the space tb->addr.buf points to? This should
324 	 * be either a memcpy() to/from the buf fields, or t_alloc(fd,T_BIND,)
325 	 * should't be called with T_ALL.
326 	 */
327 	if (addrlist)
328 		tb.addr = *(addrlist->n_addrs);		/* structure copy */
329 
330 	if (t_bind(fd, &tb, ntb) == -1) {
331 		syslog(LOG_ERR, "t_bind failed:  t_errno %d, %m", t_errno);
332 		(void) t_free((char *)ntb, T_BIND);
333 		netdir_free((void *)addrlist, ND_ADDRLIST);
334 		(void) t_close(fd);
335 		return (-1);
336 	}
337 
338 	/* make sure we bound to the right address */
339 	if (use_any == FALSE &&
340 	    (tb.addr.len != ntb->addr.len ||
341 	    memcmp(tb.addr.buf, ntb->addr.buf, tb.addr.len) != 0)) {
342 		syslog(LOG_ERR, "t_bind to wrong address");
343 		(void) t_free((char *)ntb, T_BIND);
344 		netdir_free((void *)addrlist, ND_ADDRLIST);
345 		(void) t_close(fd);
346 		return (-1);
347 	}
348 
349 	/*
350 	 * Call nfs4svc_setport so that the kernel can be
351 	 * informed what port number the daemon is listing
352 	 * for incoming connection requests.
353 	 */
354 
355 	if ((nconf->nc_semantics == NC_TPI_COTS ||
356 	    nconf->nc_semantics == NC_TPI_COTS_ORD) && Mysvc4 != NULL)
357 		(*Mysvc4)(fd, NULL, nconf, NFS4_SETPORT, &ntb->addr);
358 
359 	*addr = &ntb->addr;
360 	netdir_free((void *)addrlist, ND_ADDRLIST);
361 
362 	if (strcmp(nconf->nc_proto, "tcp") == 0) {
363 		/*
364 		 * Disable the Nagle algorithm on TCP connections.
365 		 * Connections accepted from this listener will
366 		 * inherit the listener options.
367 		 */
368 
369 		/* LINTED pointer alignment */
370 		opt = (struct opthdr *)reqbuf;
371 		opt->level = IPPROTO_TCP;
372 		opt->name = TCP_NODELAY;
373 		opt->len = sizeof (int);
374 
375 		/* LINTED pointer alignment */
376 		*(int *)((char *)opt + sizeof (*opt)) = 1;
377 
378 		req.flags = T_NEGOTIATE;
379 		req.opt.len = sizeof (*opt) + opt->len;
380 		req.opt.buf = (char *)opt;
381 		resp.flags = 0;
382 		resp.opt.buf = reqbuf;
383 		resp.opt.maxlen = sizeof (reqbuf);
384 
385 		if (t_optmgmt(fd, &req, &resp) < 0 ||
386 				resp.flags != T_SUCCESS) {
387 			syslog(LOG_ERR,
388 	"couldn't set NODELAY option for proto %s: t_errno = %d, %m",
389 				nconf->nc_proto, t_errno);
390 		}
391 	}
392 
393 	return (fd);
394 }
395 
396 static int
397 setopt(int fd, int level, int name, int value)
398 {
399 	struct t_optmgmt req, resp;
400 	struct {
401 		struct opthdr opt;
402 		int value;
403 	} reqbuf;
404 
405 	reqbuf.opt.level = level;
406 	reqbuf.opt.name = name;
407 	reqbuf.opt.len = sizeof (int);
408 
409 	reqbuf.value = value;
410 
411 	req.flags = T_NEGOTIATE;
412 	req.opt.len = sizeof (reqbuf);
413 	req.opt.buf = (char *)&reqbuf;
414 
415 	resp.flags = 0;
416 	resp.opt.buf = (char *)&reqbuf;
417 	resp.opt.maxlen = sizeof (reqbuf);
418 
419 	if (t_optmgmt(fd, &req, &resp) < 0 || resp.flags != T_SUCCESS) {
420 		t_error("t_optmgmt");
421 		return (-1);
422 	}
423 	return (0);
424 }
425 
426 static int
427 reuseaddr(int fd)
428 {
429 	return (setopt(fd, SOL_SOCKET, SO_REUSEADDR, 1));
430 }
431 
432 static int
433 recvucred(int fd)
434 {
435 	return (setopt(fd, SOL_SOCKET, SO_RECVUCRED, 1));
436 }
437 
438 static int
439 anonmlp(int fd)
440 {
441 	return (setopt(fd, SOL_SOCKET, SO_ANON_MLP, 1));
442 }
443 
444 void
445 nfslib_log_tli_error(char *tli_name, int fd, struct netconfig *nconf)
446 {
447 	int error;
448 
449 	/*
450 	 * Save the error code across syslog(), just in case syslog()
451 	 * gets its own error and, therefore, overwrites errno.
452 	 */
453 	error = errno;
454 	if (t_errno == TSYSERR) {
455 		syslog(LOG_ERR, "%s(file descriptor %d/transport %s) %m",
456 			tli_name, fd, nconf->nc_proto);
457 	} else {
458 		syslog(LOG_ERR,
459 			"%s(file descriptor %d/transport %s) TLI error %d",
460 			tli_name, fd, nconf->nc_proto, t_errno);
461 	}
462 	errno = error;
463 }
464 
465 /*
466  * Called to set up service over a particular transport.
467  */
468 void
469 do_one(char *provider, NETSELDECL(proto), struct protob *protobp0,
470 	int (*svc)(int, struct netbuf, struct netconfig *))
471 {
472 	register int sock;
473 	struct protob *protobp;
474 	struct netbuf *retaddr;
475 	struct netconfig *retnconf;
476 	struct netbuf addrmask;
477 	int vers;
478 	int err;
479 	int l;
480 
481 	if (provider)
482 		sock = bind_to_provider(provider, protobp0->serv, &retaddr,
483 					&retnconf);
484 	else
485 		sock = bind_to_proto(proto, protobp0->serv, &retaddr,
486 					&retnconf);
487 
488 	if (sock == -1) {
489 		(void) syslog(LOG_ERR,
490 	"Cannot establish %s service over %s: transport setup problem.",
491 			protobp0->serv, provider ? provider : proto);
492 		return;
493 	}
494 
495 	if (set_addrmask(sock, retnconf, &addrmask) < 0) {
496 		(void) syslog(LOG_ERR,
497 		    "Cannot set address mask for %s", retnconf->nc_netid);
498 		return;
499 	}
500 
501 	/*
502 	 * Register all versions of the programs in the protocol block list.
503 	 */
504 	l = strlen(NC_UDP);
505 	for (protobp = protobp0; protobp; protobp = protobp->next) {
506 		for (vers = protobp->versmin; vers <= protobp->versmax;
507 			vers++) {
508 			if ((protobp->program == NFS_PROGRAM ||
509 				protobp->program == NFS_ACL_PROGRAM) &&
510 				vers == NFS_V4 &&
511 				strncasecmp(retnconf->nc_proto, NC_UDP, l) == 0)
512 				continue;
513 
514 			(void) rpcb_unset(protobp->program, vers, retnconf);
515 			(void) rpcb_set(protobp->program, vers, retnconf,
516 					retaddr);
517 		}
518 	}
519 
520 	if (retnconf->nc_semantics == NC_TPI_CLTS) {
521 		/* Don't drop core if supporting module(s) aren't loaded. */
522 		(void) signal(SIGSYS, SIG_IGN);
523 
524 		/*
525 		 * svc() doesn't block, it returns success or failure.
526 		 */
527 
528 		if (svc == NULL && Mysvc4 != NULL)
529 			err = (*Mysvc4)(sock, &addrmask, retnconf,
530 					NFS4_SETPORT|NFS4_KRPC_START, retaddr);
531 		else
532 			err = (*svc)(sock, addrmask, retnconf);
533 
534 		if (err < 0) {
535 			(void) syslog(LOG_ERR,
536 				"Cannot establish %s service over <file desc."
537 				" %d, protocol %s> : %m. Exiting",
538 				protobp0->serv, sock, retnconf->nc_proto);
539 			exit(1);
540 		}
541 	}
542 
543 	/*
544 	 * We successfully set up the server over this transport.
545 	 * Add this descriptor to the one being polled on.
546 	 */
547 	add_to_poll_list(sock, retnconf);
548 }
549 /*
550  * Set up the NFS service over all the available transports.
551  * Returns -1 for failure, 0 for success.
552  */
553 int
554 do_all(struct protob *protobp,
555 	int (*svc)(int, struct netbuf, struct netconfig *))
556 {
557 	struct netconfig *nconf;
558 	NCONF_HANDLE *nc;
559 	int l;
560 
561 	if ((nc = setnetconfig()) == (NCONF_HANDLE *)NULL) {
562 		syslog(LOG_ERR, "setnetconfig failed: %m");
563 		return (-1);
564 	}
565 	l = strlen(NC_UDP);
566 	while (nconf = getnetconfig(nc)) {
567 		if ((nconf->nc_flag & NC_VISIBLE) &&
568 		    strcmp(nconf->nc_protofmly, NC_LOOPBACK) != 0 &&
569 		    OK_TPI_TYPE(nconf) &&
570 		    (protobp->program != NFS4_CALLBACK ||
571 		    strncasecmp(nconf->nc_proto, NC_UDP, l) != 0))
572 			do_one(nconf->nc_device, nconf->nc_proto,
573 				protobp, svc);
574 	}
575 	(void) endnetconfig(nc);
576 	return (0);
577 }
578 
579 /*
580  * poll on the open transport descriptors for events and errors.
581  */
582 void
583 poll_for_action(void)
584 {
585 	int nfds;
586 	int i;
587 
588 	/*
589 	 * Keep polling until all transports have been closed. When this
590 	 * happens, we return.
591 	 */
592 	while ((int)num_fds > 0) {
593 		nfds = poll(poll_array, num_fds, INFTIM);
594 		switch (nfds) {
595 		case 0:
596 			continue;
597 
598 		case -1:
599 			/*
600 			 * Some errors from poll could be
601 			 * due to temporary conditions, and we try to
602 			 * be robust in the face of them. Other
603 			 * errors (should never happen in theory)
604 			 * are fatal (eg. EINVAL, EFAULT).
605 			 */
606 			switch (errno) {
607 			case EINTR:
608 			    continue;
609 
610 			case EAGAIN:
611 			case ENOMEM:
612 				(void) sleep(10);
613 				continue;
614 
615 			default:
616 				(void) syslog(LOG_ERR,
617 						"poll failed: %m. Exiting");
618 				exit(1);
619 			}
620 		default:
621 			break;
622 		}
623 
624 		/*
625 		 * Go through the poll list looking for events.
626 		 */
627 		for (i = 0; i < num_fds && nfds > 0; i++) {
628 			if (poll_array[i].revents) {
629 				nfds--;
630 				/*
631 				 * We have a message, so try to read it.
632 				 * Record the error return in errno,
633 				 * so that syslog(LOG_ERR, "...%m")
634 				 * dumps the corresponding error string.
635 				 */
636 				if (conn_polled[i].nc.nc_semantics ==
637 				    NC_TPI_CLTS) {
638 					errno = do_poll_clts_action(
639 							poll_array[i].fd, i);
640 				} else {
641 					errno = do_poll_cots_action(
642 							poll_array[i].fd, i);
643 				}
644 
645 				if (errno == 0)
646 					continue;
647 				/*
648 				 * Most returned error codes mean that there is
649 				 * fatal condition which we can only deal with
650 				 * by closing the transport.
651 				 */
652 				if (errno != EAGAIN && errno != ENOMEM) {
653 					(void) syslog(LOG_ERR,
654 		"Error (%m) reading descriptor %d/transport %s. Closing it.",
655 						poll_array[i].fd,
656 						conn_polled[i].nc.nc_proto);
657 					(void) t_close(poll_array[i].fd);
658 					remove_from_poll_list(poll_array[i].fd);
659 
660 				} else if (errno == ENOMEM)
661 					(void) sleep(5);
662 			}
663 		}
664 	}
665 
666 	(void) syslog(LOG_ERR,
667 		"All transports have been closed with errors. Exiting.");
668 }
669 
670 /*
671  * Allocate poll/transport array entries for this descriptor.
672  */
673 static void
674 add_to_poll_list(int fd, struct netconfig *nconf)
675 {
676 	static int poll_array_size = 0;
677 
678 	/*
679 	 * If the arrays are full, allocate new ones.
680 	 */
681 	if (num_fds == poll_array_size) {
682 		struct pollfd *tpa;
683 		struct conn_entry *tnp;
684 
685 		if (poll_array_size != 0) {
686 			tpa = poll_array;
687 			tnp = conn_polled;
688 		} else
689 			tpa = (struct pollfd *)0;
690 
691 		poll_array_size += POLL_ARRAY_INC_SIZE;
692 		/*
693 		 * Allocate new arrays.
694 		 */
695 		poll_array = (struct pollfd *)
696 		    malloc(poll_array_size * sizeof (struct pollfd) + 256);
697 		conn_polled = (struct conn_entry *)
698 		    malloc(poll_array_size * sizeof (struct conn_entry) + 256);
699 		if (poll_array == (struct pollfd *)NULL ||
700 		    conn_polled == (struct conn_entry *)NULL) {
701 			syslog(LOG_ERR, "malloc failed for poll array");
702 			exit(1);
703 		}
704 
705 		/*
706 		 * Copy the data of the old ones into new arrays, and
707 		 * free the old ones.
708 		 */
709 		if (tpa) {
710 			(void) memcpy((void *)poll_array, (void *)tpa,
711 				num_fds * sizeof (struct pollfd));
712 			(void) memcpy((void *)conn_polled, (void *)tnp,
713 				num_fds * sizeof (struct conn_entry));
714 			free((void *)tpa);
715 			free((void *)tnp);
716 		}
717 	}
718 
719 	/*
720 	 * Set the descriptor and event list. All possible events are
721 	 * polled for.
722 	 */
723 	poll_array[num_fds].fd = fd;
724 	poll_array[num_fds].events = POLLIN|POLLRDNORM|POLLRDBAND|POLLPRI;
725 
726 	/*
727 	 * Copy the transport data over too.
728 	 */
729 	conn_polled[num_fds].nc = *nconf;
730 	conn_polled[num_fds].closing = 0;
731 
732 	/*
733 	 * Set the descriptor to non-blocking. Avoids a race
734 	 * between data arriving on the stream and then having it
735 	 * flushed before we can read it.
736 	 */
737 	if (fcntl(fd, F_SETFL, O_NONBLOCK) == -1) {
738 		(void) syslog(LOG_ERR,
739 	"fcntl(file desc. %d/transport %s, F_SETFL, O_NONBLOCK): %m. Exiting",
740 			num_fds, nconf->nc_proto);
741 		exit(1);
742 	}
743 
744 	/*
745 	 * Count this descriptor.
746 	 */
747 	++num_fds;
748 }
749 
750 static void
751 remove_from_poll_list(int fd)
752 {
753 	int i;
754 	int num_to_copy;
755 
756 	for (i = 0; i < num_fds; i++) {
757 		if (poll_array[i].fd == fd) {
758 			--num_fds;
759 			num_to_copy = num_fds - i;
760 			(void) memcpy((void *)&poll_array[i],
761 				(void *)&poll_array[i+1],
762 				num_to_copy * sizeof (struct pollfd));
763 			(void) memset((void *)&poll_array[num_fds], 0,
764 				sizeof (struct pollfd));
765 			(void) memcpy((void *)&conn_polled[i],
766 				(void *)&conn_polled[i+1],
767 				num_to_copy * sizeof (struct conn_entry));
768 			(void) memset((void *)&conn_polled[num_fds], 0,
769 				sizeof (struct conn_entry));
770 			return;
771 		}
772 	}
773 	syslog(LOG_ERR, "attempt to remove nonexistent fd from poll list");
774 
775 }
776 
777 /*
778  * Called to read and interpret the event on a connectionless descriptor.
779  * Returns 0 if successful, or a UNIX error code if failure.
780  */
781 static int
782 do_poll_clts_action(int fd, int conn_index)
783 {
784 	int error;
785 	int ret;
786 	int flags;
787 	struct netconfig *nconf = &conn_polled[conn_index].nc;
788 	static struct t_unitdata *unitdata = NULL;
789 	static struct t_uderr *uderr = NULL;
790 	static int oldfd = -1;
791 	struct nd_hostservlist *host = NULL;
792 	struct strbuf ctl[1], data[1];
793 	/*
794 	 * We just need to have some space to consume the
795 	 * message in the event we can't use the TLI interface to do the
796 	 * job.
797 	 *
798 	 * We flush the message using getmsg(). For the control part
799 	 * we allocate enough for any TPI header plus 32 bytes for address
800 	 * and options. For the data part, there is nothing magic about
801 	 * the size of the array, but 256 bytes is probably better than
802 	 * 1 byte, and we don't expect any data portion anyway.
803 	 *
804 	 * If the array sizes are too small, we handle this because getmsg()
805 	 * (called to consume the message) will return MOREDATA|MORECTL.
806 	 * Thus we just call getmsg() until it's read the message.
807 	 */
808 	char ctlbuf[sizeof (union T_primitives) + 32];
809 	char databuf[256];
810 
811 	/*
812 	 * If this is the same descriptor as the last time
813 	 * do_poll_clts_action was called, we can save some
814 	 * de-allocation and allocation.
815 	 */
816 	if (oldfd != fd) {
817 		oldfd = fd;
818 
819 		if (unitdata) {
820 			(void) t_free((char *)unitdata, T_UNITDATA);
821 			unitdata = NULL;
822 		}
823 		if (uderr) {
824 			(void) t_free((char *)uderr, T_UDERROR);
825 			uderr = NULL;
826 		}
827 	}
828 
829 	/*
830 	 * Allocate a unitdata structure for receiving the event.
831 	 */
832 	if (unitdata == NULL) {
833 		/* LINTED pointer alignment */
834 		unitdata = (struct t_unitdata *)t_alloc(fd, T_UNITDATA, T_ALL);
835 		if (unitdata == NULL) {
836 			if (t_errno == TSYSERR) {
837 				/*
838 				 * Save the error code across
839 				 * syslog(), just in case
840 				 * syslog() gets its own error
841 				 * and therefore overwrites errno.
842 				 */
843 				error = errno;
844 				(void) syslog(LOG_ERR,
845 	"t_alloc(file descriptor %d/transport %s, T_UNITDATA) failed: %m",
846 					fd, nconf->nc_proto);
847 				return (error);
848 			}
849 			(void) syslog(LOG_ERR,
850 "t_alloc(file descriptor %d/transport %s, T_UNITDATA) failed TLI error %d",
851 					fd, nconf->nc_proto, t_errno);
852 			goto flush_it;
853 		}
854 	}
855 
856 try_again:
857 	flags = 0;
858 
859 	/*
860 	 * The idea is we wait for T_UNITDATA_IND's. Of course,
861 	 * we don't get any, because rpcmod filters them out.
862 	 * However, we need to call t_rcvudata() to let TLI
863 	 * tell us we have a T_UDERROR_IND.
864 	 *
865 	 * algorithm is:
866 	 * 	t_rcvudata(), expecting TLOOK.
867 	 * 	t_look(), expecting T_UDERR.
868 	 * 	t_rcvuderr(), expecting success (0).
869 	 * 	expand destination address into ASCII,
870 	 *	and dump it.
871 	 */
872 
873 	ret = t_rcvudata(fd, unitdata, &flags);
874 	if (ret == 0 || t_errno == TBUFOVFLW) {
875 		(void) syslog(LOG_WARNING,
876 "t_rcvudata(file descriptor %d/transport %s) got unexpected data, %d bytes",
877 			fd, nconf->nc_proto, unitdata->udata.len);
878 
879 		/*
880 		 * Even though we don't expect any data, in case we do,
881 		 * keep reading until there is no more.
882 		 */
883 		if (flags & T_MORE)
884 			goto try_again;
885 
886 		return (0);
887 	}
888 
889 	switch (t_errno) {
890 	case TNODATA:
891 		return (0);
892 	case TSYSERR:
893 		/*
894 		 * System errors are returned to caller.
895 		 * Save the error code across
896 		 * syslog(), just in case
897 		 * syslog() gets its own error
898 		 * and therefore overwrites errno.
899 		 */
900 		error = errno;
901 		(void) syslog(LOG_ERR,
902 			"t_rcvudata(file descriptor %d/transport %s) %m",
903 			fd, nconf->nc_proto);
904 		return (error);
905 	case TLOOK:
906 		break;
907 	default:
908 		(void) syslog(LOG_ERR,
909 		"t_rcvudata(file descriptor %d/transport %s) TLI error %d",
910 			fd, nconf->nc_proto, t_errno);
911 		goto flush_it;
912 	}
913 
914 	ret = t_look(fd);
915 	switch (ret) {
916 	case 0:
917 		return (0);
918 	case -1:
919 		/*
920 		 * System errors are returned to caller.
921 		 */
922 		if (t_errno == TSYSERR) {
923 			/*
924 			 * Save the error code across
925 			 * syslog(), just in case
926 			 * syslog() gets its own error
927 			 * and therefore overwrites errno.
928 			 */
929 			error = errno;
930 			(void) syslog(LOG_ERR,
931 				"t_look(file descriptor %d/transport %s) %m",
932 				fd, nconf->nc_proto);
933 			return (error);
934 		}
935 		(void) syslog(LOG_ERR,
936 			"t_look(file descriptor %d/transport %s) TLI error %d",
937 			fd, nconf->nc_proto, t_errno);
938 		goto flush_it;
939 	case T_UDERR:
940 		break;
941 	default:
942 		(void) syslog(LOG_WARNING,
943 	"t_look(file descriptor %d/transport %s) returned %d not T_UDERR (%d)",
944 			fd, nconf->nc_proto, ret, T_UDERR);
945 	}
946 
947 	if (uderr == NULL) {
948 		/* LINTED pointer alignment */
949 		uderr = (struct t_uderr *)t_alloc(fd, T_UDERROR, T_ALL);
950 		if (uderr == NULL) {
951 			if (t_errno == TSYSERR) {
952 				/*
953 				 * Save the error code across
954 				 * syslog(), just in case
955 				 * syslog() gets its own error
956 				 * and therefore overwrites errno.
957 				 */
958 				error = errno;
959 				(void) syslog(LOG_ERR,
960 	"t_alloc(file descriptor %d/transport %s, T_UDERROR) failed: %m",
961 					fd, nconf->nc_proto);
962 				return (error);
963 			}
964 			(void) syslog(LOG_ERR,
965 "t_alloc(file descriptor %d/transport %s, T_UDERROR) failed TLI error: %d",
966 				fd, nconf->nc_proto, t_errno);
967 			goto flush_it;
968 		}
969 	}
970 
971 	ret = t_rcvuderr(fd, uderr);
972 	if (ret == 0) {
973 
974 		/*
975 		 * Save the datagram error in errno, so that the
976 		 * %m argument to syslog picks up the error string.
977 		 */
978 		errno = uderr->error;
979 
980 		/*
981 		 * Log the datagram error, then log the host that
982 		 * probably triggerred. Cannot log both in the
983 		 * same transaction because of packet size limitations
984 		 * in /dev/log.
985 		 */
986 		(void) syslog((errno == ECONNREFUSED) ? LOG_DEBUG : LOG_WARNING,
987 "NFS response over <file descriptor %d/transport %s> generated error: %m",
988 			fd, nconf->nc_proto);
989 
990 		/*
991 		 * Try to map the client's address back to a
992 		 * name.
993 		 */
994 		ret = netdir_getbyaddr(nconf, &host, &uderr->addr);
995 		if (ret != -1 && host && host->h_cnt > 0 &&
996 		    host->h_hostservs) {
997 		(void) syslog((errno == ECONNREFUSED) ? LOG_DEBUG : LOG_WARNING,
998 "Bad NFS response was sent to client with host name: %s; service port: %s",
999 				host->h_hostservs->h_host,
1000 				host->h_hostservs->h_serv);
1001 		} else {
1002 			int i, j;
1003 			char *buf;
1004 			char *hex = "0123456789abcdef";
1005 
1006 			/*
1007 			 * Mapping failed, print the whole thing
1008 			 * in ASCII hex.
1009 			 */
1010 			buf = (char *)malloc(uderr->addr.len * 2 + 1);
1011 			for (i = 0, j = 0; i < uderr->addr.len; i++, j += 2) {
1012 				buf[j] = hex[((uderr->addr.buf[i]) >> 4) & 0xf];
1013 				buf[j+1] = hex[uderr->addr.buf[i] & 0xf];
1014 			}
1015 			buf[j] = '\0';
1016 		(void) syslog((errno == ECONNREFUSED) ? LOG_DEBUG : LOG_WARNING,
1017 	"Bad NFS response was sent to client with transport address: 0x%s",
1018 				buf);
1019 			free((void *)buf);
1020 		}
1021 
1022 		if (ret == 0 && host != NULL)
1023 			netdir_free((void *)host, ND_HOSTSERVLIST);
1024 		return (0);
1025 	}
1026 
1027 	switch (t_errno) {
1028 	case TNOUDERR:
1029 		goto flush_it;
1030 	case TSYSERR:
1031 		/*
1032 		 * System errors are returned to caller.
1033 		 * Save the error code across
1034 		 * syslog(), just in case
1035 		 * syslog() gets its own error
1036 		 * and therefore overwrites errno.
1037 		 */
1038 		error = errno;
1039 		(void) syslog(LOG_ERR,
1040 			"t_rcvuderr(file descriptor %d/transport %s) %m",
1041 			fd, nconf->nc_proto);
1042 		return (error);
1043 	default:
1044 		(void) syslog(LOG_ERR,
1045 		"t_rcvuderr(file descriptor %d/transport %s) TLI error %d",
1046 			fd, nconf->nc_proto, t_errno);
1047 		goto flush_it;
1048 	}
1049 
1050 flush_it:
1051 	/*
1052 	 * If we get here, then we could not cope with whatever message
1053 	 * we attempted to read, so flush it. If we did read a message,
1054 	 * and one isn't present, that is all right, because fd is in
1055 	 * nonblocking mode.
1056 	 */
1057 	(void) syslog(LOG_ERR,
1058 	"Flushing one input message from <file descriptor %d/transport %s>",
1059 		fd, nconf->nc_proto);
1060 
1061 	/*
1062 	 * Read and discard the message. Do this this until there is
1063 	 * no more control/data in the message or until we get an error.
1064 	 */
1065 	do {
1066 		ctl->maxlen = sizeof (ctlbuf);
1067 		ctl->buf = ctlbuf;
1068 		data->maxlen = sizeof (databuf);
1069 		data->buf = databuf;
1070 		flags = 0;
1071 		ret = getmsg(fd, ctl, data, &flags);
1072 		if (ret == -1)
1073 			return (errno);
1074 	} while (ret != 0);
1075 
1076 	return (0);
1077 }
1078 
1079 static void
1080 conn_close_oldest(void)
1081 {
1082 	int fd;
1083 	int i1;
1084 
1085 	/*
1086 	 * Find the oldest connection that is not already in the
1087 	 * process of shutting down.
1088 	 */
1089 	for (i1 = end_listen_fds; /* no conditional expression */; i1++) {
1090 		if (i1 >= num_fds)
1091 			return;
1092 		if (conn_polled[i1].closing == 0)
1093 			break;
1094 	}
1095 #ifdef DEBUG
1096 	printf("too many connections (%d), releasing oldest (%d)\n",
1097 		num_conns, poll_array[i1].fd);
1098 #else
1099 	syslog(LOG_WARNING, "too many connections (%d), releasing oldest (%d)",
1100 		num_conns, poll_array[i1].fd);
1101 #endif
1102 	fd = poll_array[i1].fd;
1103 	if (conn_polled[i1].nc.nc_semantics == NC_TPI_COTS) {
1104 		/*
1105 		 * For politeness, send a T_DISCON_REQ to the transport
1106 		 * provider.  We close the stream anyway.
1107 		 */
1108 		(void) t_snddis(fd, (struct t_call *)0);
1109 		num_conns--;
1110 		remove_from_poll_list(fd);
1111 		(void) t_close(fd);
1112 	} else {
1113 		/*
1114 		 * For orderly release, we do not close the stream
1115 		 * until the T_ORDREL_IND arrives to complete
1116 		 * the handshake.
1117 		 */
1118 		if (t_sndrel(fd) == 0)
1119 			conn_polled[i1].closing = 1;
1120 	}
1121 }
1122 
1123 static boolean_t
1124 conn_get(int fd, struct netconfig *nconf, struct conn_ind **connp)
1125 {
1126 	struct conn_ind	*conn;
1127 	struct conn_ind	*next_conn;
1128 
1129 	conn = (struct conn_ind *)malloc(sizeof (*conn));
1130 	if (conn == NULL) {
1131 		syslog(LOG_ERR, "malloc for listen indication failed");
1132 		return (FALSE);
1133 	}
1134 
1135 	/* LINTED pointer alignment */
1136 	conn->conn_call = (struct t_call *)t_alloc(fd, T_CALL, T_ALL);
1137 	if (conn->conn_call == NULL) {
1138 		free((char *)conn);
1139 		nfslib_log_tli_error("t_alloc", fd, nconf);
1140 		return (FALSE);
1141 	}
1142 
1143 	if (t_listen(fd, conn->conn_call) == -1) {
1144 		nfslib_log_tli_error("t_listen", fd, nconf);
1145 		(void) t_free((char *)conn->conn_call, T_CALL);
1146 		free((char *)conn);
1147 		return (FALSE);
1148 	}
1149 
1150 	if (conn->conn_call->udata.len > 0) {
1151 		syslog(LOG_WARNING,
1152 	"rejecting inbound connection(%s) with %d bytes of connect data",
1153 			nconf->nc_proto, conn->conn_call->udata.len);
1154 
1155 		conn->conn_call->udata.len = 0;
1156 		(void) t_snddis(fd, conn->conn_call);
1157 		(void) t_free((char *)conn->conn_call, T_CALL);
1158 		free((char *)conn);
1159 		return (FALSE);
1160 	}
1161 
1162 	if ((next_conn = *connp) != NULL) {
1163 		next_conn->conn_prev->conn_next = conn;
1164 		conn->conn_next = next_conn;
1165 		conn->conn_prev = next_conn->conn_prev;
1166 		next_conn->conn_prev = conn;
1167 	} else {
1168 		conn->conn_next = conn;
1169 		conn->conn_prev = conn;
1170 		*connp = conn;
1171 	}
1172 	return (TRUE);
1173 }
1174 
1175 static int
1176 discon_get(int fd, struct netconfig *nconf, struct conn_ind **connp)
1177 {
1178 	struct conn_ind	*conn;
1179 	struct t_discon	discon;
1180 
1181 	discon.udata.buf = (char *)0;
1182 	discon.udata.maxlen = 0;
1183 	if (t_rcvdis(fd, &discon) == -1) {
1184 		nfslib_log_tli_error("t_rcvdis", fd, nconf);
1185 		return (-1);
1186 	}
1187 
1188 	conn = *connp;
1189 	if (conn == NULL)
1190 		return (0);
1191 
1192 	do {
1193 		if (conn->conn_call->sequence == discon.sequence) {
1194 			if (conn->conn_next == conn)
1195 				*connp = (struct conn_ind *)0;
1196 			else {
1197 				if (conn == *connp) {
1198 					*connp = conn->conn_next;
1199 				}
1200 				conn->conn_next->conn_prev = conn->conn_prev;
1201 				conn->conn_prev->conn_next = conn->conn_next;
1202 			}
1203 			free((char *)conn);
1204 			break;
1205 		}
1206 		conn = conn->conn_next;
1207 	} while (conn != *connp);
1208 
1209 	return (0);
1210 }
1211 
1212 static void
1213 cots_listen_event(int fd, int conn_index)
1214 {
1215 	struct t_call *call;
1216 	struct conn_ind	*conn;
1217 	struct conn_ind	*conn_head;
1218 	int event;
1219 	struct netconfig *nconf = &conn_polled[conn_index].nc;
1220 	int new_fd;
1221 	struct netbuf addrmask;
1222 	int ret = 0;
1223 	char *clnt;
1224 	char *clnt_uaddr = NULL;
1225 	struct nd_hostservlist *clnt_serv = NULL;
1226 
1227 	conn_head = (struct conn_ind *)0;
1228 	(void) conn_get(fd, nconf, &conn_head);
1229 
1230 	while ((conn = conn_head) != NULL) {
1231 		conn_head = conn->conn_next;
1232 		if (conn_head == conn)
1233 			conn_head = (struct conn_ind *)0;
1234 		else {
1235 			conn_head->conn_prev = conn->conn_prev;
1236 			conn->conn_prev->conn_next = conn_head;
1237 		}
1238 		call = conn->conn_call;
1239 		free((char *)conn);
1240 
1241 		/*
1242 		 * If we have already accepted the maximum number of
1243 		 * connections allowed on the command line, then drop
1244 		 * the oldest connection (for any protocol) before
1245 		 * accepting the new connection.  Unless explicitly
1246 		 * set on the command line, max_conns_allowed is -1.
1247 		 */
1248 		if (max_conns_allowed != -1 && num_conns >= max_conns_allowed)
1249 			conn_close_oldest();
1250 
1251 		/*
1252 		 * Create a new transport endpoint for the same proto as
1253 		 * the listener.
1254 		 */
1255 		new_fd = nfslib_transport_open(nconf);
1256 		if (new_fd == -1) {
1257 			call->udata.len = 0;
1258 			(void) t_snddis(fd, call);
1259 			(void) t_free((char *)call, T_CALL);
1260 			syslog(LOG_ERR, "Cannot establish transport over %s",
1261 				nconf->nc_device);
1262 			continue;
1263 		}
1264 
1265 		/* Bind to a generic address/port for the accepting stream. */
1266 		if (t_bind(new_fd, (struct t_bind *)NULL,
1267 		    (struct t_bind *)NULL) == -1) {
1268 			nfslib_log_tli_error("t_bind", new_fd, nconf);
1269 			call->udata.len = 0;
1270 			(void) t_snddis(fd, call);
1271 			(void) t_free((char *)call, T_CALL);
1272 			(void) t_close(new_fd);
1273 			continue;
1274 		}
1275 
1276 		while (t_accept(fd, new_fd, call) == -1) {
1277 			if (t_errno != TLOOK) {
1278 #ifdef DEBUG
1279 				nfslib_log_tli_error("t_accept", fd, nconf);
1280 #endif
1281 				call->udata.len = 0;
1282 				(void) t_snddis(fd, call);
1283 				(void) t_free((char *)call, T_CALL);
1284 				(void) t_close(new_fd);
1285 				goto do_next_conn;
1286 			}
1287 			while (event = t_look(fd)) {
1288 				switch (event) {
1289 				case T_LISTEN:
1290 #ifdef DEBUG
1291 					printf(
1292 "cots_listen_event(%s): T_LISTEN during accept processing\n", nconf->nc_proto);
1293 #endif
1294 					(void) conn_get(fd, nconf, &conn_head);
1295 					continue;
1296 				case T_DISCONNECT:
1297 #ifdef DEBUG
1298 					printf(
1299 	"cots_listen_event(%s): T_DISCONNECT during accept processing\n",
1300 						nconf->nc_proto);
1301 #endif
1302 					(void) discon_get(fd, nconf,
1303 								&conn_head);
1304 					continue;
1305 				default:
1306 					syslog(LOG_ERR,
1307 			"unexpected event 0x%x during accept processing (%s)",
1308 						event, nconf->nc_proto);
1309 					call->udata.len = 0;
1310 					(void) t_snddis(fd, call);
1311 					(void) t_free((char *)call, T_CALL);
1312 					(void) t_close(new_fd);
1313 					goto do_next_conn;
1314 				}
1315 			}
1316 		}
1317 
1318 		if (set_addrmask(new_fd, nconf, &addrmask) < 0) {
1319 			(void) syslog(LOG_ERR,
1320 			    "Cannot set address mask for %s",
1321 				nconf->nc_netid);
1322 			return;
1323 		}
1324 
1325 		/* Tell KRPC about the new stream. */
1326 		if (Mysvc4 != NULL)
1327 			ret = (*Mysvc4)(new_fd, &addrmask, nconf,
1328 				NFS4_KRPC_START, &call->addr);
1329 		else
1330 			ret = (*Mysvc)(new_fd, addrmask, nconf);
1331 
1332 		if (ret < 0) {
1333 			if (errno != ENOTCONN) {
1334 				syslog(LOG_ERR,
1335 				    "unable to register new connection: %m");
1336 			} else {
1337 				/*
1338 				 * This is the only error that could be
1339 				 * caused by the client, so who was it?
1340 				 */
1341 				if (netdir_getbyaddr(nconf, &clnt_serv,
1342 				    &(call->addr)) == ND_OK &&
1343 				    clnt_serv->h_cnt > 0)
1344 					clnt = clnt_serv->h_hostservs->h_host;
1345 				else
1346 					clnt = clnt_uaddr = taddr2uaddr(nconf,
1347 					    &(call->addr));
1348 				/*
1349 				 * If we don't know who the client was,
1350 				 * remain silent.
1351 				 */
1352 				if (clnt)
1353 					syslog(LOG_ERR,
1354 "unable to register new connection: client %s has dropped connection", clnt);
1355 				if (clnt_serv)
1356 					netdir_free(clnt_serv, ND_HOSTSERVLIST);
1357 				if (clnt_uaddr)
1358 					free(clnt_uaddr);
1359 			}
1360 			free(addrmask.buf);
1361 			(void) t_snddis(new_fd, (struct t_call *)0);
1362 			(void) t_free((char *)call, T_CALL);
1363 			(void) t_close(new_fd);
1364 			goto do_next_conn;
1365 		}
1366 
1367 		free(addrmask.buf);
1368 		(void) t_free((char *)call, T_CALL);
1369 
1370 		/*
1371 		 * Poll on the new descriptor so that we get disconnect
1372 		 * and orderly release indications.
1373 		 */
1374 		num_conns++;
1375 		add_to_poll_list(new_fd, nconf);
1376 
1377 		/* Reset nconf in case it has been moved. */
1378 		nconf = &conn_polled[conn_index].nc;
1379 do_next_conn:;
1380 	}
1381 }
1382 
1383 static int
1384 do_poll_cots_action(int fd, int conn_index)
1385 {
1386 	char buf[256];
1387 	int event;
1388 	int i1;
1389 	int flags;
1390 	struct conn_entry *connent = &conn_polled[conn_index];
1391 	struct netconfig *nconf = &(connent->nc);
1392 	const char *errorstr;
1393 
1394 	while (event = t_look(fd)) {
1395 		switch (event) {
1396 		case T_LISTEN:
1397 #ifdef DEBUG
1398 printf("do_poll_cots_action(%s,%d): T_LISTEN event\n", nconf->nc_proto, fd);
1399 #endif
1400 			cots_listen_event(fd, conn_index);
1401 			break;
1402 
1403 		case T_DATA:
1404 #ifdef DEBUG
1405 printf("do_poll_cots_action(%d,%s): T_DATA event\n", fd, nconf->nc_proto);
1406 #endif
1407 			/*
1408 			 * Receive a private notification from CONS rpcmod.
1409 			 */
1410 			i1 = t_rcv(fd, buf, sizeof (buf), &flags);
1411 			if (i1 == -1) {
1412 				syslog(LOG_ERR, "t_rcv failed");
1413 				break;
1414 			}
1415 			if (i1 < sizeof (int))
1416 				break;
1417 			i1 = BE32_TO_U32(buf);
1418 			if (i1 == 1 || i1 == 2) {
1419 				/*
1420 				 * This connection has been idle for too long,
1421 				 * so release it as politely as we can.  If we
1422 				 * have already initiated an orderly release
1423 				 * and we get notified that the stream is
1424 				 * still idle, pull the plug.  This prevents
1425 				 * hung connections from continuing to consume
1426 				 * resources.
1427 				 */
1428 #ifdef DEBUG
1429 printf("do_poll_cots_action(%s,%d): ", nconf->nc_proto, fd);
1430 printf("initiating orderly release of idle connection\n");
1431 #endif
1432 				if (nconf->nc_semantics == NC_TPI_COTS ||
1433 					connent->closing != 0) {
1434 					(void) t_snddis(fd, (struct t_call *)0);
1435 					goto fdclose;
1436 				}
1437 				/*
1438 				 * For NC_TPI_COTS_ORD, the stream is closed
1439 				 * and removed from the poll list when the
1440 				 * T_ORDREL is received from the provider.  We
1441 				 * don't wait for it here because it may take
1442 				 * a while for the transport to shut down.
1443 				 */
1444 				if (t_sndrel(fd) == -1) {
1445 					syslog(LOG_ERR,
1446 					"unable to send orderly release %m");
1447 				}
1448 				connent->closing = 1;
1449 			} else
1450 				syslog(LOG_ERR,
1451 				"unexpected event from CONS rpcmod %d", i1);
1452 			break;
1453 
1454 		case T_ORDREL:
1455 #ifdef DEBUG
1456 printf("do_poll_cots_action(%s,%d): T_ORDREL event\n", nconf->nc_proto, fd);
1457 #endif
1458 			/* Perform an orderly release. */
1459 			if (t_rcvrel(fd) == 0) {
1460 				/* T_ORDREL on listen fd's should be ignored */
1461 				if (!is_listen_fd_index(conn_index)) {
1462 					(void) t_sndrel(fd);
1463 					goto fdclose;
1464 				}
1465 				break;
1466 
1467 			} else if (t_errno == TLOOK) {
1468 				break;
1469 			} else {
1470 				nfslib_log_tli_error("t_rcvrel", fd, nconf);
1471 
1472 				/*
1473 				 * check to make sure we do not close
1474 				 * listen fd
1475 				 */
1476 				if (is_listen_fd_index(conn_index))
1477 					break;
1478 				else
1479 					goto fdclose;
1480 			}
1481 
1482 		case T_DISCONNECT:
1483 #ifdef DEBUG
1484 printf("do_poll_cots_action(%s,%d): T_DISCONNECT event\n", nconf->nc_proto, fd);
1485 #endif
1486 			if (t_rcvdis(fd, (struct t_discon *)NULL) == -1)
1487 				nfslib_log_tli_error("t_rcvdis", fd, nconf);
1488 
1489 			/*
1490 			 * T_DISCONNECT on listen fd's should be ignored.
1491 			 */
1492 			if (is_listen_fd_index(conn_index))
1493 				break;
1494 			else
1495 				goto fdclose;
1496 
1497 		case T_ERROR:
1498 		default:
1499 			if (event == T_ERROR || t_errno == TSYSERR) {
1500 			    if ((errorstr = strerror(errno)) == NULL) {
1501 				(void) sprintf(buf, "Unknown error num %d",
1502 									errno);
1503 				errorstr = (const char *) buf;
1504 			    }
1505 			} else if (event == -1)
1506 				errorstr = t_strerror(t_errno);
1507 			else
1508 				errorstr = "";
1509 			syslog(LOG_ERR,
1510 			    "unexpected TLI event (0x%x) on "
1511 			    "connection-oriented transport(%s,%d):%s",
1512 			    event, nconf->nc_proto, fd, errorstr);
1513 fdclose:
1514 			num_conns--;
1515 			remove_from_poll_list(fd);
1516 			(void) t_close(fd);
1517 			return (0);
1518 		}
1519 	}
1520 
1521 	return (0);
1522 }
1523 
1524 static char *
1525 serv_name_to_port_name(char *name)
1526 {
1527 	/*
1528 	 * Map service names (used primarily in logging) to
1529 	 * RPC port names (used by netdir_*() routines).
1530 	 */
1531 	if (strcmp(name, "NFS") == 0) {
1532 		return ("nfs");
1533 	} else if (strcmp(name, "NLM") == 0) {
1534 		return ("lockd");
1535 	} else if (strcmp(name, "NFS4_CALLBACK") == 0) {
1536 		return ("nfs4_callback");
1537 	}
1538 
1539 	return ("unrecognized");
1540 }
1541 
1542 static int
1543 bind_to_provider(char *provider, char *serv, struct netbuf **addr,
1544 		struct netconfig **retnconf)
1545 {
1546 	struct netconfig *nconf;
1547 	NCONF_HANDLE *nc;
1548 	struct nd_hostserv hs;
1549 
1550 	hs.h_host = HOST_SELF;
1551 	hs.h_serv = serv_name_to_port_name(serv);
1552 
1553 	if ((nc = setnetconfig()) == (NCONF_HANDLE *)NULL) {
1554 		syslog(LOG_ERR, "setnetconfig failed: %m");
1555 		return (-1);
1556 	}
1557 	while (nconf = getnetconfig(nc)) {
1558 		if (OK_TPI_TYPE(nconf) &&
1559 		    strcmp(nconf->nc_device, provider) == 0) {
1560 			*retnconf = nconf;
1561 			return (nfslib_bindit(nconf, addr, &hs,
1562 					listen_backlog));
1563 		}
1564 	}
1565 	(void) endnetconfig(nc);
1566 
1567 	syslog(LOG_ERR, "couldn't find netconfig entry for provider %s",
1568 	    provider);
1569 	return (-1);
1570 }
1571 
1572 static int
1573 bind_to_proto(NETSELDECL(proto), char *serv, struct netbuf **addr,
1574 		struct netconfig **retnconf)
1575 {
1576 	struct netconfig *nconf;
1577 	NCONF_HANDLE *nc = NULL;
1578 	struct nd_hostserv hs;
1579 
1580 	hs.h_host = HOST_SELF;
1581 	hs.h_serv = serv_name_to_port_name(serv);
1582 
1583 	if ((nc = setnetconfig()) == (NCONF_HANDLE *)NULL) {
1584 		syslog(LOG_ERR, "setnetconfig failed: %m");
1585 		return (-1);
1586 	}
1587 	while (nconf = getnetconfig(nc)) {
1588 		if (OK_TPI_TYPE(nconf) && NETSELEQ(nconf->nc_proto, proto)) {
1589 			*retnconf = nconf;
1590 			return (nfslib_bindit(nconf, addr, &hs,
1591 					listen_backlog));
1592 		}
1593 	}
1594 	(void) endnetconfig(nc);
1595 
1596 	syslog(LOG_ERR, "couldn't find netconfig entry for protocol %s",
1597 	    proto);
1598 	return (-1);
1599 }
1600 
1601 #include <netinet/in.h>
1602 
1603 /*
1604  * Create an address mask appropriate for the transport.
1605  * The mask is used to obtain the host-specific part of
1606  * a network address when comparing addresses.
1607  * For an internet address the host-specific part is just
1608  * the 32 bit IP address and this part of the mask is set
1609  * to all-ones. The port number part of the mask is zeroes.
1610  */
1611 static int
1612 set_addrmask(fd, nconf, mask)
1613 	struct netconfig *nconf;
1614 	struct netbuf *mask;
1615 {
1616 	struct t_info info;
1617 
1618 	/*
1619 	 * Find the size of the address we need to mask.
1620 	 */
1621 	if (t_getinfo(fd, &info) < 0) {
1622 		t_error("t_getinfo");
1623 		return (-1);
1624 	}
1625 	mask->len = mask->maxlen = info.addr;
1626 	if (info.addr <= 0) {
1627 		syslog(LOG_ERR, "set_addrmask: address size: %ld",
1628 			info.addr);
1629 		return (-1);
1630 	}
1631 
1632 	mask->buf = (char *)malloc(mask->len);
1633 	if (mask->buf == NULL) {
1634 		syslog(LOG_ERR, "set_addrmask: no memory");
1635 		return (-1);
1636 	}
1637 	(void) memset(mask->buf, 0, mask->len);	/* reset all mask bits */
1638 
1639 	if (strcmp(nconf->nc_protofmly, NC_INET) == 0) {
1640 		/*
1641 		 * Set the mask so that the port is ignored.
1642 		 */
1643 		/* LINTED pointer alignment */
1644 		((struct sockaddr_in *)mask->buf)->sin_addr.s_addr =
1645 								(ulong_t)~0;
1646 		/* LINTED pointer alignment */
1647 		((struct sockaddr_in *)mask->buf)->sin_family =
1648 								(ushort_t)~0;
1649 	} else if (strcmp(nconf->nc_protofmly, NC_INET6) == 0) {
1650 		/* LINTED pointer alignment */
1651 		(void) memset(&((struct sockaddr_in6 *)mask->buf)->sin6_addr,
1652 			(uchar_t)~0, sizeof (struct in6_addr));
1653 		/* LINTED pointer alignment */
1654 		((struct sockaddr_in6 *)mask->buf)->sin6_family =
1655 								(ushort_t)~0;
1656 	} else {
1657 
1658 		/*
1659 		 * Set all mask bits.
1660 		 */
1661 		(void) memset(mask->buf, 0xFF, mask->len);
1662 	}
1663 	return (0);
1664 }
1665 
1666 /*
1667  * For listen fd's index is always less than end_listen_fds.
1668  * end_listen_fds is defined externally in the daemon that uses this library.
1669  * It's value is equal to the number of open file descriptors after the
1670  * last listen end point was opened but before any connection was accepted.
1671  */
1672 static int
1673 is_listen_fd_index(int index)
1674 {
1675 	return (index < end_listen_fds);
1676 }
1677