xref: /titanic_44/usr/src/cmd/fs.d/nfs/lib/nfs_tbind.c (revision 753d2d2e8e7fd0c9bcf736d9bf2f2faf4d6234cc)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 /*
27  * nfs_tbind.c, common part for nfsd and lockd.
28  */
29 
30 #pragma ident	"%Z%%M%	%I%	%E% SMI"
31 
32 #define	PORTMAP
33 
34 #include <tiuser.h>
35 #include <fcntl.h>
36 #include <netconfig.h>
37 #include <stropts.h>
38 #include <errno.h>
39 #include <syslog.h>
40 #include <rpc/rpc.h>
41 #include <rpc/pmap_prot.h>
42 #include <sys/time.h>
43 #include <sys/resource.h>
44 #include <signal.h>
45 #include <netdir.h>
46 #include <unistd.h>
47 #include <string.h>
48 #include <netinet/tcp.h>
49 #include <malloc.h>
50 #include <stdlib.h>
51 #include "nfs_tbind.h"
52 #include <nfs/nfs.h>
53 #include <nfs/nfs_acl.h>
54 #include <nfs/nfssys.h>
55 #include <nfs/nfs4.h>
56 #include <zone.h>
57 #include <sys/socket.h>
58 #include <tsol/label.h>
59 
60 /*
61  * Determine valid semantics for most applications.
62  */
63 #define	OK_TPI_TYPE(_nconf) \
64 	(_nconf->nc_semantics == NC_TPI_CLTS || \
65 	_nconf->nc_semantics == NC_TPI_COTS || \
66 	_nconf->nc_semantics == NC_TPI_COTS_ORD)
67 
68 #define	BE32_TO_U32(a) \
69 	((((ulong_t)((uchar_t *)a)[0] & 0xFF) << (ulong_t)24) | \
70 	(((ulong_t)((uchar_t *)a)[1] & 0xFF) << (ulong_t)16) | \
71 	(((ulong_t)((uchar_t *)a)[2] & 0xFF) << (ulong_t)8)  | \
72 	((ulong_t)((uchar_t *)a)[3] & 0xFF))
73 
74 /*
75  * Number of elements to add to the poll array on each allocation.
76  */
77 #define	POLL_ARRAY_INC_SIZE	64
78 
79 /*
80  * Number of file descriptors by which the process soft limit may be
81  * increased on each call to nofile_increase(0).
82  */
83 #define	NOFILE_INC_SIZE	64
84 
85 struct conn_ind {
86 	struct conn_ind *conn_next;
87 	struct conn_ind *conn_prev;
88 	struct t_call   *conn_call;
89 };
90 
91 struct conn_entry {
92 	bool_t			closing;
93 	struct netconfig	nc;
94 };
95 
96 /*
97  * this file contains transport routines common to nfsd and lockd
98  */
99 static	int	nofile_increase(int);
100 static	int	reuseaddr(int);
101 static	int	recvucred(int);
102 static  int	anonmlp(int);
103 static	void	add_to_poll_list(int, struct netconfig *);
104 static	char	*serv_name_to_port_name(char *);
105 static	int	bind_to_proto(char *, char *, struct netbuf **,
106 				struct netconfig **);
107 static	int	bind_to_provider(char *, char *, struct netbuf **,
108 					struct netconfig **);
109 static	void	conn_close_oldest(void);
110 static	boolean_t conn_get(int, struct netconfig *, struct conn_ind **);
111 static	void	cots_listen_event(int, int);
112 static	int	discon_get(int, struct netconfig *, struct conn_ind **);
113 static	int	do_poll_clts_action(int, int);
114 static	int	do_poll_cots_action(int, int);
115 static	void	remove_from_poll_list(int);
116 static	int	set_addrmask(int, struct netconfig *, struct netbuf *);
117 static	int	is_listen_fd_index(int);
118 
119 static	struct pollfd *poll_array;
120 static	struct conn_entry *conn_polled;
121 static	int	num_conns;		/* Current number of connections */
122 int		(*Mysvc4)(int, struct netbuf *, struct netconfig *, int,
123 		struct netbuf *);
124 
125 extern bool_t __pmap_set(const rpcprog_t program, const rpcvers_t version,
126     const struct netconfig *nconf, const struct netbuf *address);
127 
128 /*
129  * Called to create and prepare a transport descriptor for in-kernel
130  * RPC service.
131  * Returns -1 on failure and a valid descriptor on success.
132  */
133 int
134 nfslib_transport_open(struct netconfig *nconf)
135 {
136 	int fd;
137 	struct strioctl	strioc;
138 
139 	if ((nconf == (struct netconfig *)NULL) ||
140 	    (nconf->nc_device == (char *)NULL)) {
141 		syslog(LOG_ERR, "no netconfig device");
142 		return (-1);
143 	}
144 
145 	/*
146 	 * Open the transport device.
147 	 */
148 	fd = t_open(nconf->nc_device, O_RDWR, (struct t_info *)NULL);
149 	if (fd == -1) {
150 		if (t_errno == TSYSERR && errno == EMFILE &&
151 		    (nofile_increase(0) == 0)) {
152 			/* Try again with a higher NOFILE limit. */
153 			fd = t_open(nconf->nc_device, O_RDWR,
154 					(struct t_info *)NULL);
155 		}
156 		if (fd == -1) {
157 			syslog(LOG_ERR, "t_open %s failed:  t_errno %d, %m",
158 			    nconf->nc_device, t_errno);
159 			return (-1);
160 		}
161 	}
162 
163 	/*
164 	 * Pop timod because the RPC module must be as close as possible
165 	 * to the transport.
166 	 */
167 	if (ioctl(fd, I_POP, 0) < 0) {
168 		syslog(LOG_ERR, "I_POP of timod failed: %m");
169 		(void) t_close(fd);
170 		return (-1);
171 	}
172 
173 	/*
174 	 * Common code for CLTS and COTS transports
175 	 */
176 	if (ioctl(fd, I_PUSH, "rpcmod") < 0) {
177 		syslog(LOG_ERR, "I_PUSH of rpcmod failed: %m");
178 		(void) t_close(fd);
179 		return (-1);
180 	}
181 
182 	strioc.ic_cmd = RPC_SERVER;
183 	strioc.ic_dp = (char *)0;
184 	strioc.ic_len = 0;
185 	strioc.ic_timout = -1;
186 
187 	/* Tell rpcmod to act like a server stream. */
188 	if (ioctl(fd, I_STR, &strioc) < 0) {
189 		syslog(LOG_ERR, "rpcmod set-up ioctl failed: %m");
190 		(void) t_close(fd);
191 		return (-1);
192 	}
193 
194 	/*
195 	 * Re-push timod so that we will still be doing TLI
196 	 * operations on the descriptor.
197 	 */
198 	if (ioctl(fd, I_PUSH, "timod") < 0) {
199 		syslog(LOG_ERR, "I_PUSH of timod failed: %m");
200 		(void) t_close(fd);
201 		return (-1);
202 	}
203 
204 	return (fd);
205 }
206 
207 static int
208 nofile_increase(int limit)
209 {
210 	struct rlimit rl;
211 
212 	if (getrlimit(RLIMIT_NOFILE, &rl) == -1) {
213 		syslog(LOG_ERR, "getrlimit of NOFILE failed: %m");
214 		return (-1);
215 	}
216 
217 	if (limit > 0)
218 		rl.rlim_cur = limit;
219 	else
220 		rl.rlim_cur += NOFILE_INC_SIZE;
221 
222 	if (rl.rlim_cur > rl.rlim_max &&
223 	    rl.rlim_max != RLIM_INFINITY)
224 		rl.rlim_max = rl.rlim_cur;
225 
226 	if (setrlimit(RLIMIT_NOFILE, &rl) == -1) {
227 		syslog(LOG_ERR, "setrlimit of NOFILE to %d failed: %m",
228 			rl.rlim_cur);
229 		return (-1);
230 	}
231 
232 	return (0);
233 }
234 
235 int
236 nfslib_bindit(struct netconfig *nconf, struct netbuf **addr,
237 	struct nd_hostserv *hs, int backlog)
238 {
239 	int fd;
240 	struct t_bind  *ntb;
241 	struct t_bind tb;
242 	struct nd_addrlist *addrlist;
243 	struct t_optmgmt req, resp;
244 	struct opthdr *opt;
245 	char reqbuf[128];
246 	bool_t use_any = FALSE;
247 	bool_t gzone = TRUE;
248 
249 	if ((fd = nfslib_transport_open(nconf)) == -1) {
250 		syslog(LOG_ERR, "cannot establish transport service over %s",
251 			nconf->nc_device);
252 		return (-1);
253 	}
254 
255 	addrlist = (struct nd_addrlist *)NULL;
256 
257 	/* nfs4_callback service does not used a fieed port number */
258 
259 	if (strcmp(hs->h_serv, "nfs4_callback") == 0) {
260 		tb.addr.maxlen = 0;
261 		tb.addr.len = 0;
262 		tb.addr.buf = 0;
263 		use_any = TRUE;
264 		gzone = (getzoneid() == GLOBAL_ZONEID);
265 	} else if (netdir_getbyname(nconf, hs, &addrlist) != 0) {
266 
267 		syslog(LOG_ERR,
268 		"Cannot get address for transport %s host %s service %s",
269 			nconf->nc_netid, hs->h_host, hs->h_serv);
270 		(void) t_close(fd);
271 		return (-1);
272 	}
273 
274 	if (strcmp(nconf->nc_proto, "tcp") == 0) {
275 		/*
276 		 * If we're running over TCP, then set the
277 		 * SO_REUSEADDR option so that we can bind
278 		 * to our preferred address even if previously
279 		 * left connections exist in FIN_WAIT states.
280 		 * This is somewhat bogus, but otherwise you have
281 		 * to wait 2 minutes to restart after killing it.
282 		 */
283 		if (reuseaddr(fd) == -1) {
284 			syslog(LOG_WARNING,
285 			"couldn't set SO_REUSEADDR option on transport");
286 		}
287 	} else if (strcmp(nconf->nc_proto, "udp") == 0) {
288 		/*
289 		 * In order to run MLP on UDP, we need to handle creds.
290 		 */
291 		if (recvucred(fd) == -1) {
292 			syslog(LOG_WARNING,
293 			    "couldn't set SO_RECVUCRED option on transport");
294 		}
295 	}
296 
297 	/*
298 	 * Make non global zone nfs4_callback port MLP
299 	 */
300 	if (use_any && is_system_labeled() && !gzone) {
301 		if (anonmlp(fd) == -1) {
302 			/*
303 			 * failing to set this option means nfs4_callback
304 			 * could fail silently later. So fail it with
305 			 * with an error message now.
306 			 */
307 			syslog(LOG_ERR,
308 			    "couldn't set SO_ANON_MLP option on transport");
309 			(void) t_close(fd);
310 			return (-1);
311 		}
312 	}
313 
314 	if (nconf->nc_semantics == NC_TPI_CLTS)
315 		tb.qlen = 0;
316 	else
317 		tb.qlen = backlog;
318 
319 	/* LINTED pointer alignment */
320 	ntb = (struct t_bind *)t_alloc(fd, T_BIND, T_ALL);
321 	if (ntb == (struct t_bind *)NULL) {
322 		syslog(LOG_ERR, "t_alloc failed:  t_errno %d, %m", t_errno);
323 		(void) t_close(fd);
324 		netdir_free((void *)addrlist, ND_ADDRLIST);
325 		return (-1);
326 	}
327 
328 	/*
329 	 * XXX - what about the space tb->addr.buf points to? This should
330 	 * be either a memcpy() to/from the buf fields, or t_alloc(fd,T_BIND,)
331 	 * should't be called with T_ALL.
332 	 */
333 	if (addrlist)
334 		tb.addr = *(addrlist->n_addrs);		/* structure copy */
335 
336 	if (t_bind(fd, &tb, ntb) == -1) {
337 		syslog(LOG_ERR, "t_bind failed:  t_errno %d, %m", t_errno);
338 		(void) t_free((char *)ntb, T_BIND);
339 		netdir_free((void *)addrlist, ND_ADDRLIST);
340 		(void) t_close(fd);
341 		return (-1);
342 	}
343 
344 	/* make sure we bound to the right address */
345 	if (use_any == FALSE &&
346 	    (tb.addr.len != ntb->addr.len ||
347 	    memcmp(tb.addr.buf, ntb->addr.buf, tb.addr.len) != 0)) {
348 		syslog(LOG_ERR, "t_bind to wrong address");
349 		(void) t_free((char *)ntb, T_BIND);
350 		netdir_free((void *)addrlist, ND_ADDRLIST);
351 		(void) t_close(fd);
352 		return (-1);
353 	}
354 
355 	/*
356 	 * Call nfs4svc_setport so that the kernel can be
357 	 * informed what port number the daemon is listing
358 	 * for incoming connection requests.
359 	 */
360 
361 	if ((nconf->nc_semantics == NC_TPI_COTS ||
362 	    nconf->nc_semantics == NC_TPI_COTS_ORD) && Mysvc4 != NULL)
363 		(*Mysvc4)(fd, NULL, nconf, NFS4_SETPORT, &ntb->addr);
364 
365 	*addr = &ntb->addr;
366 	netdir_free((void *)addrlist, ND_ADDRLIST);
367 
368 	if (strcmp(nconf->nc_proto, "tcp") == 0) {
369 		/*
370 		 * Disable the Nagle algorithm on TCP connections.
371 		 * Connections accepted from this listener will
372 		 * inherit the listener options.
373 		 */
374 
375 		/* LINTED pointer alignment */
376 		opt = (struct opthdr *)reqbuf;
377 		opt->level = IPPROTO_TCP;
378 		opt->name = TCP_NODELAY;
379 		opt->len = sizeof (int);
380 
381 		/* LINTED pointer alignment */
382 		*(int *)((char *)opt + sizeof (*opt)) = 1;
383 
384 		req.flags = T_NEGOTIATE;
385 		req.opt.len = sizeof (*opt) + opt->len;
386 		req.opt.buf = (char *)opt;
387 		resp.flags = 0;
388 		resp.opt.buf = reqbuf;
389 		resp.opt.maxlen = sizeof (reqbuf);
390 
391 		if (t_optmgmt(fd, &req, &resp) < 0 ||
392 				resp.flags != T_SUCCESS) {
393 			syslog(LOG_ERR,
394 	"couldn't set NODELAY option for proto %s: t_errno = %d, %m",
395 				nconf->nc_proto, t_errno);
396 		}
397 	}
398 
399 	return (fd);
400 }
401 
402 static int
403 setopt(int fd, int level, int name, int value)
404 {
405 	struct t_optmgmt req, resp;
406 	struct {
407 		struct opthdr opt;
408 		int value;
409 	} reqbuf;
410 
411 	reqbuf.opt.level = level;
412 	reqbuf.opt.name = name;
413 	reqbuf.opt.len = sizeof (int);
414 
415 	reqbuf.value = value;
416 
417 	req.flags = T_NEGOTIATE;
418 	req.opt.len = sizeof (reqbuf);
419 	req.opt.buf = (char *)&reqbuf;
420 
421 	resp.flags = 0;
422 	resp.opt.buf = (char *)&reqbuf;
423 	resp.opt.maxlen = sizeof (reqbuf);
424 
425 	if (t_optmgmt(fd, &req, &resp) < 0 || resp.flags != T_SUCCESS) {
426 		t_error("t_optmgmt");
427 		return (-1);
428 	}
429 	return (0);
430 }
431 
432 static int
433 reuseaddr(int fd)
434 {
435 	return (setopt(fd, SOL_SOCKET, SO_REUSEADDR, 1));
436 }
437 
438 static int
439 recvucred(int fd)
440 {
441 	return (setopt(fd, SOL_SOCKET, SO_RECVUCRED, 1));
442 }
443 
444 static int
445 anonmlp(int fd)
446 {
447 	return (setopt(fd, SOL_SOCKET, SO_ANON_MLP, 1));
448 }
449 
450 void
451 nfslib_log_tli_error(char *tli_name, int fd, struct netconfig *nconf)
452 {
453 	int error;
454 
455 	/*
456 	 * Save the error code across syslog(), just in case syslog()
457 	 * gets its own error and, therefore, overwrites errno.
458 	 */
459 	error = errno;
460 	if (t_errno == TSYSERR) {
461 		syslog(LOG_ERR, "%s(file descriptor %d/transport %s) %m",
462 			tli_name, fd, nconf->nc_proto);
463 	} else {
464 		syslog(LOG_ERR,
465 			"%s(file descriptor %d/transport %s) TLI error %d",
466 			tli_name, fd, nconf->nc_proto, t_errno);
467 	}
468 	errno = error;
469 }
470 
471 /*
472  * Called to set up service over a particular transport.
473  */
474 void
475 do_one(char *provider, NETSELDECL(proto), struct protob *protobp0,
476 	int (*svc)(int, struct netbuf, struct netconfig *), int use_pmap)
477 {
478 	register int sock;
479 	struct protob *protobp;
480 	struct netbuf *retaddr;
481 	struct netconfig *retnconf;
482 	struct netbuf addrmask;
483 	int vers;
484 	int err;
485 	int l;
486 
487 	if (provider)
488 		sock = bind_to_provider(provider, protobp0->serv, &retaddr,
489 					&retnconf);
490 	else
491 		sock = bind_to_proto(proto, protobp0->serv, &retaddr,
492 					&retnconf);
493 
494 	if (sock == -1) {
495 		(void) syslog(LOG_ERR,
496 	"Cannot establish %s service over %s: transport setup problem.",
497 			protobp0->serv, provider ? provider : proto);
498 		return;
499 	}
500 
501 	if (set_addrmask(sock, retnconf, &addrmask) < 0) {
502 		(void) syslog(LOG_ERR,
503 		    "Cannot set address mask for %s", retnconf->nc_netid);
504 		return;
505 	}
506 
507 	/*
508 	 * Register all versions of the programs in the protocol block list.
509 	 */
510 	l = strlen(NC_UDP);
511 	for (protobp = protobp0; protobp; protobp = protobp->next) {
512 		for (vers = protobp->versmin; vers <= protobp->versmax;
513 			vers++) {
514 			if ((protobp->program == NFS_PROGRAM ||
515 				protobp->program == NFS_ACL_PROGRAM) &&
516 				vers == NFS_V4 &&
517 				strncasecmp(retnconf->nc_proto, NC_UDP, l) == 0)
518 				continue;
519 
520 			if (use_pmap) {
521 				/*
522 				 * Note that if we're using a portmapper
523 				 * instead of rpcbind then we can't do an
524 				 * unregister operation here.
525 				 *
526 				 * The reason is that the portmapper unset
527 				 * operation removes all the entries for a
528 				 * given program/version regardelss of
529 				 * transport protocol.
530 				 *
531 				 * The caller of this routine needs to ensure
532 				 * that __pmap_unset() has been called for all
533 				 * program/version service pairs they plan
534 				 * to support before they start registering
535 				 * each program/version/protocol triplet.
536 				 */
537 				(void) __pmap_set(protobp->program, vers,
538 				    retnconf, retaddr);
539 			} else {
540 				(void) rpcb_unset(protobp->program, vers,
541 				    retnconf);
542 				(void) rpcb_set(protobp->program, vers,
543 				    retnconf, retaddr);
544 			}
545 		}
546 	}
547 
548 	if (retnconf->nc_semantics == NC_TPI_CLTS) {
549 		/* Don't drop core if supporting module(s) aren't loaded. */
550 		(void) signal(SIGSYS, SIG_IGN);
551 
552 		/*
553 		 * svc() doesn't block, it returns success or failure.
554 		 */
555 
556 		if (svc == NULL && Mysvc4 != NULL)
557 			err = (*Mysvc4)(sock, &addrmask, retnconf,
558 					NFS4_SETPORT|NFS4_KRPC_START, retaddr);
559 		else
560 			err = (*svc)(sock, addrmask, retnconf);
561 
562 		if (err < 0) {
563 			(void) syslog(LOG_ERR,
564 				"Cannot establish %s service over <file desc."
565 				" %d, protocol %s> : %m. Exiting",
566 				protobp0->serv, sock, retnconf->nc_proto);
567 			exit(1);
568 		}
569 	}
570 
571 	/*
572 	 * We successfully set up the server over this transport.
573 	 * Add this descriptor to the one being polled on.
574 	 */
575 	add_to_poll_list(sock, retnconf);
576 }
577 /*
578  * Set up the NFS service over all the available transports.
579  * Returns -1 for failure, 0 for success.
580  */
581 int
582 do_all(struct protob *protobp,
583 	int (*svc)(int, struct netbuf, struct netconfig *), int use_pmap)
584 {
585 	struct netconfig *nconf;
586 	NCONF_HANDLE *nc;
587 	int l;
588 
589 	if ((nc = setnetconfig()) == (NCONF_HANDLE *)NULL) {
590 		syslog(LOG_ERR, "setnetconfig failed: %m");
591 		return (-1);
592 	}
593 	l = strlen(NC_UDP);
594 	while (nconf = getnetconfig(nc)) {
595 		if ((nconf->nc_flag & NC_VISIBLE) &&
596 		    strcmp(nconf->nc_protofmly, NC_LOOPBACK) != 0 &&
597 		    OK_TPI_TYPE(nconf) &&
598 		    (protobp->program != NFS4_CALLBACK ||
599 		    strncasecmp(nconf->nc_proto, NC_UDP, l) != 0))
600 			do_one(nconf->nc_device, nconf->nc_proto,
601 				protobp, svc, use_pmap);
602 	}
603 	(void) endnetconfig(nc);
604 	return (0);
605 }
606 
607 /*
608  * poll on the open transport descriptors for events and errors.
609  */
610 void
611 poll_for_action(void)
612 {
613 	int nfds;
614 	int i;
615 
616 	/*
617 	 * Keep polling until all transports have been closed. When this
618 	 * happens, we return.
619 	 */
620 	while ((int)num_fds > 0) {
621 		nfds = poll(poll_array, num_fds, INFTIM);
622 		switch (nfds) {
623 		case 0:
624 			continue;
625 
626 		case -1:
627 			/*
628 			 * Some errors from poll could be
629 			 * due to temporary conditions, and we try to
630 			 * be robust in the face of them. Other
631 			 * errors (should never happen in theory)
632 			 * are fatal (eg. EINVAL, EFAULT).
633 			 */
634 			switch (errno) {
635 			case EINTR:
636 			    continue;
637 
638 			case EAGAIN:
639 			case ENOMEM:
640 				(void) sleep(10);
641 				continue;
642 
643 			default:
644 				(void) syslog(LOG_ERR,
645 						"poll failed: %m. Exiting");
646 				exit(1);
647 			}
648 		default:
649 			break;
650 		}
651 
652 		/*
653 		 * Go through the poll list looking for events.
654 		 */
655 		for (i = 0; i < num_fds && nfds > 0; i++) {
656 			if (poll_array[i].revents) {
657 				nfds--;
658 				/*
659 				 * We have a message, so try to read it.
660 				 * Record the error return in errno,
661 				 * so that syslog(LOG_ERR, "...%m")
662 				 * dumps the corresponding error string.
663 				 */
664 				if (conn_polled[i].nc.nc_semantics ==
665 				    NC_TPI_CLTS) {
666 					errno = do_poll_clts_action(
667 							poll_array[i].fd, i);
668 				} else {
669 					errno = do_poll_cots_action(
670 							poll_array[i].fd, i);
671 				}
672 
673 				if (errno == 0)
674 					continue;
675 				/*
676 				 * Most returned error codes mean that there is
677 				 * fatal condition which we can only deal with
678 				 * by closing the transport.
679 				 */
680 				if (errno != EAGAIN && errno != ENOMEM) {
681 					(void) syslog(LOG_ERR,
682 		"Error (%m) reading descriptor %d/transport %s. Closing it.",
683 						poll_array[i].fd,
684 						conn_polled[i].nc.nc_proto);
685 					(void) t_close(poll_array[i].fd);
686 					remove_from_poll_list(poll_array[i].fd);
687 
688 				} else if (errno == ENOMEM)
689 					(void) sleep(5);
690 			}
691 		}
692 	}
693 
694 	(void) syslog(LOG_ERR,
695 		"All transports have been closed with errors. Exiting.");
696 }
697 
698 /*
699  * Allocate poll/transport array entries for this descriptor.
700  */
701 static void
702 add_to_poll_list(int fd, struct netconfig *nconf)
703 {
704 	static int poll_array_size = 0;
705 
706 	/*
707 	 * If the arrays are full, allocate new ones.
708 	 */
709 	if (num_fds == poll_array_size) {
710 		struct pollfd *tpa;
711 		struct conn_entry *tnp;
712 
713 		if (poll_array_size != 0) {
714 			tpa = poll_array;
715 			tnp = conn_polled;
716 		} else
717 			tpa = (struct pollfd *)0;
718 
719 		poll_array_size += POLL_ARRAY_INC_SIZE;
720 		/*
721 		 * Allocate new arrays.
722 		 */
723 		poll_array = (struct pollfd *)
724 		    malloc(poll_array_size * sizeof (struct pollfd) + 256);
725 		conn_polled = (struct conn_entry *)
726 		    malloc(poll_array_size * sizeof (struct conn_entry) + 256);
727 		if (poll_array == (struct pollfd *)NULL ||
728 		    conn_polled == (struct conn_entry *)NULL) {
729 			syslog(LOG_ERR, "malloc failed for poll array");
730 			exit(1);
731 		}
732 
733 		/*
734 		 * Copy the data of the old ones into new arrays, and
735 		 * free the old ones.
736 		 */
737 		if (tpa) {
738 			(void) memcpy((void *)poll_array, (void *)tpa,
739 				num_fds * sizeof (struct pollfd));
740 			(void) memcpy((void *)conn_polled, (void *)tnp,
741 				num_fds * sizeof (struct conn_entry));
742 			free((void *)tpa);
743 			free((void *)tnp);
744 		}
745 	}
746 
747 	/*
748 	 * Set the descriptor and event list. All possible events are
749 	 * polled for.
750 	 */
751 	poll_array[num_fds].fd = fd;
752 	poll_array[num_fds].events = POLLIN|POLLRDNORM|POLLRDBAND|POLLPRI;
753 
754 	/*
755 	 * Copy the transport data over too.
756 	 */
757 	conn_polled[num_fds].nc = *nconf;
758 	conn_polled[num_fds].closing = 0;
759 
760 	/*
761 	 * Set the descriptor to non-blocking. Avoids a race
762 	 * between data arriving on the stream and then having it
763 	 * flushed before we can read it.
764 	 */
765 	if (fcntl(fd, F_SETFL, O_NONBLOCK) == -1) {
766 		(void) syslog(LOG_ERR,
767 	"fcntl(file desc. %d/transport %s, F_SETFL, O_NONBLOCK): %m. Exiting",
768 			num_fds, nconf->nc_proto);
769 		exit(1);
770 	}
771 
772 	/*
773 	 * Count this descriptor.
774 	 */
775 	++num_fds;
776 }
777 
778 static void
779 remove_from_poll_list(int fd)
780 {
781 	int i;
782 	int num_to_copy;
783 
784 	for (i = 0; i < num_fds; i++) {
785 		if (poll_array[i].fd == fd) {
786 			--num_fds;
787 			num_to_copy = num_fds - i;
788 			(void) memcpy((void *)&poll_array[i],
789 				(void *)&poll_array[i+1],
790 				num_to_copy * sizeof (struct pollfd));
791 			(void) memset((void *)&poll_array[num_fds], 0,
792 				sizeof (struct pollfd));
793 			(void) memcpy((void *)&conn_polled[i],
794 				(void *)&conn_polled[i+1],
795 				num_to_copy * sizeof (struct conn_entry));
796 			(void) memset((void *)&conn_polled[num_fds], 0,
797 				sizeof (struct conn_entry));
798 			return;
799 		}
800 	}
801 	syslog(LOG_ERR, "attempt to remove nonexistent fd from poll list");
802 
803 }
804 
805 /*
806  * Called to read and interpret the event on a connectionless descriptor.
807  * Returns 0 if successful, or a UNIX error code if failure.
808  */
809 static int
810 do_poll_clts_action(int fd, int conn_index)
811 {
812 	int error;
813 	int ret;
814 	int flags;
815 	struct netconfig *nconf = &conn_polled[conn_index].nc;
816 	static struct t_unitdata *unitdata = NULL;
817 	static struct t_uderr *uderr = NULL;
818 	static int oldfd = -1;
819 	struct nd_hostservlist *host = NULL;
820 	struct strbuf ctl[1], data[1];
821 	/*
822 	 * We just need to have some space to consume the
823 	 * message in the event we can't use the TLI interface to do the
824 	 * job.
825 	 *
826 	 * We flush the message using getmsg(). For the control part
827 	 * we allocate enough for any TPI header plus 32 bytes for address
828 	 * and options. For the data part, there is nothing magic about
829 	 * the size of the array, but 256 bytes is probably better than
830 	 * 1 byte, and we don't expect any data portion anyway.
831 	 *
832 	 * If the array sizes are too small, we handle this because getmsg()
833 	 * (called to consume the message) will return MOREDATA|MORECTL.
834 	 * Thus we just call getmsg() until it's read the message.
835 	 */
836 	char ctlbuf[sizeof (union T_primitives) + 32];
837 	char databuf[256];
838 
839 	/*
840 	 * If this is the same descriptor as the last time
841 	 * do_poll_clts_action was called, we can save some
842 	 * de-allocation and allocation.
843 	 */
844 	if (oldfd != fd) {
845 		oldfd = fd;
846 
847 		if (unitdata) {
848 			(void) t_free((char *)unitdata, T_UNITDATA);
849 			unitdata = NULL;
850 		}
851 		if (uderr) {
852 			(void) t_free((char *)uderr, T_UDERROR);
853 			uderr = NULL;
854 		}
855 	}
856 
857 	/*
858 	 * Allocate a unitdata structure for receiving the event.
859 	 */
860 	if (unitdata == NULL) {
861 		/* LINTED pointer alignment */
862 		unitdata = (struct t_unitdata *)t_alloc(fd, T_UNITDATA, T_ALL);
863 		if (unitdata == NULL) {
864 			if (t_errno == TSYSERR) {
865 				/*
866 				 * Save the error code across
867 				 * syslog(), just in case
868 				 * syslog() gets its own error
869 				 * and therefore overwrites errno.
870 				 */
871 				error = errno;
872 				(void) syslog(LOG_ERR,
873 	"t_alloc(file descriptor %d/transport %s, T_UNITDATA) failed: %m",
874 					fd, nconf->nc_proto);
875 				return (error);
876 			}
877 			(void) syslog(LOG_ERR,
878 "t_alloc(file descriptor %d/transport %s, T_UNITDATA) failed TLI error %d",
879 					fd, nconf->nc_proto, t_errno);
880 			goto flush_it;
881 		}
882 	}
883 
884 try_again:
885 	flags = 0;
886 
887 	/*
888 	 * The idea is we wait for T_UNITDATA_IND's. Of course,
889 	 * we don't get any, because rpcmod filters them out.
890 	 * However, we need to call t_rcvudata() to let TLI
891 	 * tell us we have a T_UDERROR_IND.
892 	 *
893 	 * algorithm is:
894 	 * 	t_rcvudata(), expecting TLOOK.
895 	 * 	t_look(), expecting T_UDERR.
896 	 * 	t_rcvuderr(), expecting success (0).
897 	 * 	expand destination address into ASCII,
898 	 *	and dump it.
899 	 */
900 
901 	ret = t_rcvudata(fd, unitdata, &flags);
902 	if (ret == 0 || t_errno == TBUFOVFLW) {
903 		(void) syslog(LOG_WARNING,
904 "t_rcvudata(file descriptor %d/transport %s) got unexpected data, %d bytes",
905 			fd, nconf->nc_proto, unitdata->udata.len);
906 
907 		/*
908 		 * Even though we don't expect any data, in case we do,
909 		 * keep reading until there is no more.
910 		 */
911 		if (flags & T_MORE)
912 			goto try_again;
913 
914 		return (0);
915 	}
916 
917 	switch (t_errno) {
918 	case TNODATA:
919 		return (0);
920 	case TSYSERR:
921 		/*
922 		 * System errors are returned to caller.
923 		 * Save the error code across
924 		 * syslog(), just in case
925 		 * syslog() gets its own error
926 		 * and therefore overwrites errno.
927 		 */
928 		error = errno;
929 		(void) syslog(LOG_ERR,
930 			"t_rcvudata(file descriptor %d/transport %s) %m",
931 			fd, nconf->nc_proto);
932 		return (error);
933 	case TLOOK:
934 		break;
935 	default:
936 		(void) syslog(LOG_ERR,
937 		"t_rcvudata(file descriptor %d/transport %s) TLI error %d",
938 			fd, nconf->nc_proto, t_errno);
939 		goto flush_it;
940 	}
941 
942 	ret = t_look(fd);
943 	switch (ret) {
944 	case 0:
945 		return (0);
946 	case -1:
947 		/*
948 		 * System errors are returned to caller.
949 		 */
950 		if (t_errno == TSYSERR) {
951 			/*
952 			 * Save the error code across
953 			 * syslog(), just in case
954 			 * syslog() gets its own error
955 			 * and therefore overwrites errno.
956 			 */
957 			error = errno;
958 			(void) syslog(LOG_ERR,
959 				"t_look(file descriptor %d/transport %s) %m",
960 				fd, nconf->nc_proto);
961 			return (error);
962 		}
963 		(void) syslog(LOG_ERR,
964 			"t_look(file descriptor %d/transport %s) TLI error %d",
965 			fd, nconf->nc_proto, t_errno);
966 		goto flush_it;
967 	case T_UDERR:
968 		break;
969 	default:
970 		(void) syslog(LOG_WARNING,
971 	"t_look(file descriptor %d/transport %s) returned %d not T_UDERR (%d)",
972 			fd, nconf->nc_proto, ret, T_UDERR);
973 	}
974 
975 	if (uderr == NULL) {
976 		/* LINTED pointer alignment */
977 		uderr = (struct t_uderr *)t_alloc(fd, T_UDERROR, T_ALL);
978 		if (uderr == NULL) {
979 			if (t_errno == TSYSERR) {
980 				/*
981 				 * Save the error code across
982 				 * syslog(), just in case
983 				 * syslog() gets its own error
984 				 * and therefore overwrites errno.
985 				 */
986 				error = errno;
987 				(void) syslog(LOG_ERR,
988 	"t_alloc(file descriptor %d/transport %s, T_UDERROR) failed: %m",
989 					fd, nconf->nc_proto);
990 				return (error);
991 			}
992 			(void) syslog(LOG_ERR,
993 "t_alloc(file descriptor %d/transport %s, T_UDERROR) failed TLI error: %d",
994 				fd, nconf->nc_proto, t_errno);
995 			goto flush_it;
996 		}
997 	}
998 
999 	ret = t_rcvuderr(fd, uderr);
1000 	if (ret == 0) {
1001 
1002 		/*
1003 		 * Save the datagram error in errno, so that the
1004 		 * %m argument to syslog picks up the error string.
1005 		 */
1006 		errno = uderr->error;
1007 
1008 		/*
1009 		 * Log the datagram error, then log the host that
1010 		 * probably triggerred. Cannot log both in the
1011 		 * same transaction because of packet size limitations
1012 		 * in /dev/log.
1013 		 */
1014 		(void) syslog((errno == ECONNREFUSED) ? LOG_DEBUG : LOG_WARNING,
1015 "NFS response over <file descriptor %d/transport %s> generated error: %m",
1016 			fd, nconf->nc_proto);
1017 
1018 		/*
1019 		 * Try to map the client's address back to a
1020 		 * name.
1021 		 */
1022 		ret = netdir_getbyaddr(nconf, &host, &uderr->addr);
1023 		if (ret != -1 && host && host->h_cnt > 0 &&
1024 		    host->h_hostservs) {
1025 		(void) syslog((errno == ECONNREFUSED) ? LOG_DEBUG : LOG_WARNING,
1026 "Bad NFS response was sent to client with host name: %s; service port: %s",
1027 				host->h_hostservs->h_host,
1028 				host->h_hostservs->h_serv);
1029 		} else {
1030 			int i, j;
1031 			char *buf;
1032 			char *hex = "0123456789abcdef";
1033 
1034 			/*
1035 			 * Mapping failed, print the whole thing
1036 			 * in ASCII hex.
1037 			 */
1038 			buf = (char *)malloc(uderr->addr.len * 2 + 1);
1039 			for (i = 0, j = 0; i < uderr->addr.len; i++, j += 2) {
1040 				buf[j] = hex[((uderr->addr.buf[i]) >> 4) & 0xf];
1041 				buf[j+1] = hex[uderr->addr.buf[i] & 0xf];
1042 			}
1043 			buf[j] = '\0';
1044 		(void) syslog((errno == ECONNREFUSED) ? LOG_DEBUG : LOG_WARNING,
1045 	"Bad NFS response was sent to client with transport address: 0x%s",
1046 				buf);
1047 			free((void *)buf);
1048 		}
1049 
1050 		if (ret == 0 && host != NULL)
1051 			netdir_free((void *)host, ND_HOSTSERVLIST);
1052 		return (0);
1053 	}
1054 
1055 	switch (t_errno) {
1056 	case TNOUDERR:
1057 		goto flush_it;
1058 	case TSYSERR:
1059 		/*
1060 		 * System errors are returned to caller.
1061 		 * Save the error code across
1062 		 * syslog(), just in case
1063 		 * syslog() gets its own error
1064 		 * and therefore overwrites errno.
1065 		 */
1066 		error = errno;
1067 		(void) syslog(LOG_ERR,
1068 			"t_rcvuderr(file descriptor %d/transport %s) %m",
1069 			fd, nconf->nc_proto);
1070 		return (error);
1071 	default:
1072 		(void) syslog(LOG_ERR,
1073 		"t_rcvuderr(file descriptor %d/transport %s) TLI error %d",
1074 			fd, nconf->nc_proto, t_errno);
1075 		goto flush_it;
1076 	}
1077 
1078 flush_it:
1079 	/*
1080 	 * If we get here, then we could not cope with whatever message
1081 	 * we attempted to read, so flush it. If we did read a message,
1082 	 * and one isn't present, that is all right, because fd is in
1083 	 * nonblocking mode.
1084 	 */
1085 	(void) syslog(LOG_ERR,
1086 	"Flushing one input message from <file descriptor %d/transport %s>",
1087 		fd, nconf->nc_proto);
1088 
1089 	/*
1090 	 * Read and discard the message. Do this this until there is
1091 	 * no more control/data in the message or until we get an error.
1092 	 */
1093 	do {
1094 		ctl->maxlen = sizeof (ctlbuf);
1095 		ctl->buf = ctlbuf;
1096 		data->maxlen = sizeof (databuf);
1097 		data->buf = databuf;
1098 		flags = 0;
1099 		ret = getmsg(fd, ctl, data, &flags);
1100 		if (ret == -1)
1101 			return (errno);
1102 	} while (ret != 0);
1103 
1104 	return (0);
1105 }
1106 
1107 static void
1108 conn_close_oldest(void)
1109 {
1110 	int fd;
1111 	int i1;
1112 
1113 	/*
1114 	 * Find the oldest connection that is not already in the
1115 	 * process of shutting down.
1116 	 */
1117 	for (i1 = end_listen_fds; /* no conditional expression */; i1++) {
1118 		if (i1 >= num_fds)
1119 			return;
1120 		if (conn_polled[i1].closing == 0)
1121 			break;
1122 	}
1123 #ifdef DEBUG
1124 	printf("too many connections (%d), releasing oldest (%d)\n",
1125 		num_conns, poll_array[i1].fd);
1126 #else
1127 	syslog(LOG_WARNING, "too many connections (%d), releasing oldest (%d)",
1128 		num_conns, poll_array[i1].fd);
1129 #endif
1130 	fd = poll_array[i1].fd;
1131 	if (conn_polled[i1].nc.nc_semantics == NC_TPI_COTS) {
1132 		/*
1133 		 * For politeness, send a T_DISCON_REQ to the transport
1134 		 * provider.  We close the stream anyway.
1135 		 */
1136 		(void) t_snddis(fd, (struct t_call *)0);
1137 		num_conns--;
1138 		remove_from_poll_list(fd);
1139 		(void) t_close(fd);
1140 	} else {
1141 		/*
1142 		 * For orderly release, we do not close the stream
1143 		 * until the T_ORDREL_IND arrives to complete
1144 		 * the handshake.
1145 		 */
1146 		if (t_sndrel(fd) == 0)
1147 			conn_polled[i1].closing = 1;
1148 	}
1149 }
1150 
1151 static boolean_t
1152 conn_get(int fd, struct netconfig *nconf, struct conn_ind **connp)
1153 {
1154 	struct conn_ind	*conn;
1155 	struct conn_ind	*next_conn;
1156 
1157 	conn = (struct conn_ind *)malloc(sizeof (*conn));
1158 	if (conn == NULL) {
1159 		syslog(LOG_ERR, "malloc for listen indication failed");
1160 		return (FALSE);
1161 	}
1162 
1163 	/* LINTED pointer alignment */
1164 	conn->conn_call = (struct t_call *)t_alloc(fd, T_CALL, T_ALL);
1165 	if (conn->conn_call == NULL) {
1166 		free((char *)conn);
1167 		nfslib_log_tli_error("t_alloc", fd, nconf);
1168 		return (FALSE);
1169 	}
1170 
1171 	if (t_listen(fd, conn->conn_call) == -1) {
1172 		nfslib_log_tli_error("t_listen", fd, nconf);
1173 		(void) t_free((char *)conn->conn_call, T_CALL);
1174 		free((char *)conn);
1175 		return (FALSE);
1176 	}
1177 
1178 	if (conn->conn_call->udata.len > 0) {
1179 		syslog(LOG_WARNING,
1180 	"rejecting inbound connection(%s) with %d bytes of connect data",
1181 			nconf->nc_proto, conn->conn_call->udata.len);
1182 
1183 		conn->conn_call->udata.len = 0;
1184 		(void) t_snddis(fd, conn->conn_call);
1185 		(void) t_free((char *)conn->conn_call, T_CALL);
1186 		free((char *)conn);
1187 		return (FALSE);
1188 	}
1189 
1190 	if ((next_conn = *connp) != NULL) {
1191 		next_conn->conn_prev->conn_next = conn;
1192 		conn->conn_next = next_conn;
1193 		conn->conn_prev = next_conn->conn_prev;
1194 		next_conn->conn_prev = conn;
1195 	} else {
1196 		conn->conn_next = conn;
1197 		conn->conn_prev = conn;
1198 		*connp = conn;
1199 	}
1200 	return (TRUE);
1201 }
1202 
1203 static int
1204 discon_get(int fd, struct netconfig *nconf, struct conn_ind **connp)
1205 {
1206 	struct conn_ind	*conn;
1207 	struct t_discon	discon;
1208 
1209 	discon.udata.buf = (char *)0;
1210 	discon.udata.maxlen = 0;
1211 	if (t_rcvdis(fd, &discon) == -1) {
1212 		nfslib_log_tli_error("t_rcvdis", fd, nconf);
1213 		return (-1);
1214 	}
1215 
1216 	conn = *connp;
1217 	if (conn == NULL)
1218 		return (0);
1219 
1220 	do {
1221 		if (conn->conn_call->sequence == discon.sequence) {
1222 			if (conn->conn_next == conn)
1223 				*connp = (struct conn_ind *)0;
1224 			else {
1225 				if (conn == *connp) {
1226 					*connp = conn->conn_next;
1227 				}
1228 				conn->conn_next->conn_prev = conn->conn_prev;
1229 				conn->conn_prev->conn_next = conn->conn_next;
1230 			}
1231 			free((char *)conn);
1232 			break;
1233 		}
1234 		conn = conn->conn_next;
1235 	} while (conn != *connp);
1236 
1237 	return (0);
1238 }
1239 
1240 static void
1241 cots_listen_event(int fd, int conn_index)
1242 {
1243 	struct t_call *call;
1244 	struct conn_ind	*conn;
1245 	struct conn_ind	*conn_head;
1246 	int event;
1247 	struct netconfig *nconf = &conn_polled[conn_index].nc;
1248 	int new_fd;
1249 	struct netbuf addrmask;
1250 	int ret = 0;
1251 	char *clnt;
1252 	char *clnt_uaddr = NULL;
1253 	struct nd_hostservlist *clnt_serv = NULL;
1254 
1255 	conn_head = (struct conn_ind *)0;
1256 	(void) conn_get(fd, nconf, &conn_head);
1257 
1258 	while ((conn = conn_head) != NULL) {
1259 		conn_head = conn->conn_next;
1260 		if (conn_head == conn)
1261 			conn_head = (struct conn_ind *)0;
1262 		else {
1263 			conn_head->conn_prev = conn->conn_prev;
1264 			conn->conn_prev->conn_next = conn_head;
1265 		}
1266 		call = conn->conn_call;
1267 		free((char *)conn);
1268 
1269 		/*
1270 		 * If we have already accepted the maximum number of
1271 		 * connections allowed on the command line, then drop
1272 		 * the oldest connection (for any protocol) before
1273 		 * accepting the new connection.  Unless explicitly
1274 		 * set on the command line, max_conns_allowed is -1.
1275 		 */
1276 		if (max_conns_allowed != -1 && num_conns >= max_conns_allowed)
1277 			conn_close_oldest();
1278 
1279 		/*
1280 		 * Create a new transport endpoint for the same proto as
1281 		 * the listener.
1282 		 */
1283 		new_fd = nfslib_transport_open(nconf);
1284 		if (new_fd == -1) {
1285 			call->udata.len = 0;
1286 			(void) t_snddis(fd, call);
1287 			(void) t_free((char *)call, T_CALL);
1288 			syslog(LOG_ERR, "Cannot establish transport over %s",
1289 				nconf->nc_device);
1290 			continue;
1291 		}
1292 
1293 		/* Bind to a generic address/port for the accepting stream. */
1294 		if (t_bind(new_fd, (struct t_bind *)NULL,
1295 		    (struct t_bind *)NULL) == -1) {
1296 			nfslib_log_tli_error("t_bind", new_fd, nconf);
1297 			call->udata.len = 0;
1298 			(void) t_snddis(fd, call);
1299 			(void) t_free((char *)call, T_CALL);
1300 			(void) t_close(new_fd);
1301 			continue;
1302 		}
1303 
1304 		while (t_accept(fd, new_fd, call) == -1) {
1305 			if (t_errno != TLOOK) {
1306 #ifdef DEBUG
1307 				nfslib_log_tli_error("t_accept", fd, nconf);
1308 #endif
1309 				call->udata.len = 0;
1310 				(void) t_snddis(fd, call);
1311 				(void) t_free((char *)call, T_CALL);
1312 				(void) t_close(new_fd);
1313 				goto do_next_conn;
1314 			}
1315 			while (event = t_look(fd)) {
1316 				switch (event) {
1317 				case T_LISTEN:
1318 #ifdef DEBUG
1319 					printf(
1320 "cots_listen_event(%s): T_LISTEN during accept processing\n", nconf->nc_proto);
1321 #endif
1322 					(void) conn_get(fd, nconf, &conn_head);
1323 					continue;
1324 				case T_DISCONNECT:
1325 #ifdef DEBUG
1326 					printf(
1327 	"cots_listen_event(%s): T_DISCONNECT during accept processing\n",
1328 						nconf->nc_proto);
1329 #endif
1330 					(void) discon_get(fd, nconf,
1331 								&conn_head);
1332 					continue;
1333 				default:
1334 					syslog(LOG_ERR,
1335 			"unexpected event 0x%x during accept processing (%s)",
1336 						event, nconf->nc_proto);
1337 					call->udata.len = 0;
1338 					(void) t_snddis(fd, call);
1339 					(void) t_free((char *)call, T_CALL);
1340 					(void) t_close(new_fd);
1341 					goto do_next_conn;
1342 				}
1343 			}
1344 		}
1345 
1346 		if (set_addrmask(new_fd, nconf, &addrmask) < 0) {
1347 			(void) syslog(LOG_ERR,
1348 			    "Cannot set address mask for %s",
1349 				nconf->nc_netid);
1350 			return;
1351 		}
1352 
1353 		/* Tell KRPC about the new stream. */
1354 		if (Mysvc4 != NULL)
1355 			ret = (*Mysvc4)(new_fd, &addrmask, nconf,
1356 				NFS4_KRPC_START, &call->addr);
1357 		else
1358 			ret = (*Mysvc)(new_fd, addrmask, nconf);
1359 
1360 		if (ret < 0) {
1361 			if (errno != ENOTCONN) {
1362 				syslog(LOG_ERR,
1363 				    "unable to register new connection: %m");
1364 			} else {
1365 				/*
1366 				 * This is the only error that could be
1367 				 * caused by the client, so who was it?
1368 				 */
1369 				if (netdir_getbyaddr(nconf, &clnt_serv,
1370 				    &(call->addr)) == ND_OK &&
1371 				    clnt_serv->h_cnt > 0)
1372 					clnt = clnt_serv->h_hostservs->h_host;
1373 				else
1374 					clnt = clnt_uaddr = taddr2uaddr(nconf,
1375 					    &(call->addr));
1376 				/*
1377 				 * If we don't know who the client was,
1378 				 * remain silent.
1379 				 */
1380 				if (clnt)
1381 					syslog(LOG_ERR,
1382 "unable to register new connection: client %s has dropped connection", clnt);
1383 				if (clnt_serv)
1384 					netdir_free(clnt_serv, ND_HOSTSERVLIST);
1385 				if (clnt_uaddr)
1386 					free(clnt_uaddr);
1387 			}
1388 			free(addrmask.buf);
1389 			(void) t_snddis(new_fd, (struct t_call *)0);
1390 			(void) t_free((char *)call, T_CALL);
1391 			(void) t_close(new_fd);
1392 			goto do_next_conn;
1393 		}
1394 
1395 		free(addrmask.buf);
1396 		(void) t_free((char *)call, T_CALL);
1397 
1398 		/*
1399 		 * Poll on the new descriptor so that we get disconnect
1400 		 * and orderly release indications.
1401 		 */
1402 		num_conns++;
1403 		add_to_poll_list(new_fd, nconf);
1404 
1405 		/* Reset nconf in case it has been moved. */
1406 		nconf = &conn_polled[conn_index].nc;
1407 do_next_conn:;
1408 	}
1409 }
1410 
1411 static int
1412 do_poll_cots_action(int fd, int conn_index)
1413 {
1414 	char buf[256];
1415 	int event;
1416 	int i1;
1417 	int flags;
1418 	struct conn_entry *connent = &conn_polled[conn_index];
1419 	struct netconfig *nconf = &(connent->nc);
1420 	const char *errorstr;
1421 
1422 	while (event = t_look(fd)) {
1423 		switch (event) {
1424 		case T_LISTEN:
1425 #ifdef DEBUG
1426 printf("do_poll_cots_action(%s,%d): T_LISTEN event\n", nconf->nc_proto, fd);
1427 #endif
1428 			cots_listen_event(fd, conn_index);
1429 			break;
1430 
1431 		case T_DATA:
1432 #ifdef DEBUG
1433 printf("do_poll_cots_action(%d,%s): T_DATA event\n", fd, nconf->nc_proto);
1434 #endif
1435 			/*
1436 			 * Receive a private notification from CONS rpcmod.
1437 			 */
1438 			i1 = t_rcv(fd, buf, sizeof (buf), &flags);
1439 			if (i1 == -1) {
1440 				syslog(LOG_ERR, "t_rcv failed");
1441 				break;
1442 			}
1443 			if (i1 < sizeof (int))
1444 				break;
1445 			i1 = BE32_TO_U32(buf);
1446 			if (i1 == 1 || i1 == 2) {
1447 				/*
1448 				 * This connection has been idle for too long,
1449 				 * so release it as politely as we can.  If we
1450 				 * have already initiated an orderly release
1451 				 * and we get notified that the stream is
1452 				 * still idle, pull the plug.  This prevents
1453 				 * hung connections from continuing to consume
1454 				 * resources.
1455 				 */
1456 #ifdef DEBUG
1457 printf("do_poll_cots_action(%s,%d): ", nconf->nc_proto, fd);
1458 printf("initiating orderly release of idle connection\n");
1459 #endif
1460 				if (nconf->nc_semantics == NC_TPI_COTS ||
1461 					connent->closing != 0) {
1462 					(void) t_snddis(fd, (struct t_call *)0);
1463 					goto fdclose;
1464 				}
1465 				/*
1466 				 * For NC_TPI_COTS_ORD, the stream is closed
1467 				 * and removed from the poll list when the
1468 				 * T_ORDREL is received from the provider.  We
1469 				 * don't wait for it here because it may take
1470 				 * a while for the transport to shut down.
1471 				 */
1472 				if (t_sndrel(fd) == -1) {
1473 					syslog(LOG_ERR,
1474 					"unable to send orderly release %m");
1475 				}
1476 				connent->closing = 1;
1477 			} else
1478 				syslog(LOG_ERR,
1479 				"unexpected event from CONS rpcmod %d", i1);
1480 			break;
1481 
1482 		case T_ORDREL:
1483 #ifdef DEBUG
1484 printf("do_poll_cots_action(%s,%d): T_ORDREL event\n", nconf->nc_proto, fd);
1485 #endif
1486 			/* Perform an orderly release. */
1487 			if (t_rcvrel(fd) == 0) {
1488 				/* T_ORDREL on listen fd's should be ignored */
1489 				if (!is_listen_fd_index(conn_index)) {
1490 					(void) t_sndrel(fd);
1491 					goto fdclose;
1492 				}
1493 				break;
1494 
1495 			} else if (t_errno == TLOOK) {
1496 				break;
1497 			} else {
1498 				nfslib_log_tli_error("t_rcvrel", fd, nconf);
1499 
1500 				/*
1501 				 * check to make sure we do not close
1502 				 * listen fd
1503 				 */
1504 				if (is_listen_fd_index(conn_index))
1505 					break;
1506 				else
1507 					goto fdclose;
1508 			}
1509 
1510 		case T_DISCONNECT:
1511 #ifdef DEBUG
1512 printf("do_poll_cots_action(%s,%d): T_DISCONNECT event\n", nconf->nc_proto, fd);
1513 #endif
1514 			if (t_rcvdis(fd, (struct t_discon *)NULL) == -1)
1515 				nfslib_log_tli_error("t_rcvdis", fd, nconf);
1516 
1517 			/*
1518 			 * T_DISCONNECT on listen fd's should be ignored.
1519 			 */
1520 			if (is_listen_fd_index(conn_index))
1521 				break;
1522 			else
1523 				goto fdclose;
1524 
1525 		case T_ERROR:
1526 		default:
1527 			if (event == T_ERROR || t_errno == TSYSERR) {
1528 			    if ((errorstr = strerror(errno)) == NULL) {
1529 				(void) sprintf(buf, "Unknown error num %d",
1530 									errno);
1531 				errorstr = (const char *) buf;
1532 			    }
1533 			} else if (event == -1)
1534 				errorstr = t_strerror(t_errno);
1535 			else
1536 				errorstr = "";
1537 			syslog(LOG_ERR,
1538 			    "unexpected TLI event (0x%x) on "
1539 			    "connection-oriented transport(%s,%d):%s",
1540 			    event, nconf->nc_proto, fd, errorstr);
1541 fdclose:
1542 			num_conns--;
1543 			remove_from_poll_list(fd);
1544 			(void) t_close(fd);
1545 			return (0);
1546 		}
1547 	}
1548 
1549 	return (0);
1550 }
1551 
1552 static char *
1553 serv_name_to_port_name(char *name)
1554 {
1555 	/*
1556 	 * Map service names (used primarily in logging) to
1557 	 * RPC port names (used by netdir_*() routines).
1558 	 */
1559 	if (strcmp(name, "NFS") == 0) {
1560 		return ("nfs");
1561 	} else if (strcmp(name, "NLM") == 0) {
1562 		return ("lockd");
1563 	} else if (strcmp(name, "NFS4_CALLBACK") == 0) {
1564 		return ("nfs4_callback");
1565 	}
1566 
1567 	return ("unrecognized");
1568 }
1569 
1570 static int
1571 bind_to_provider(char *provider, char *serv, struct netbuf **addr,
1572 		struct netconfig **retnconf)
1573 {
1574 	struct netconfig *nconf;
1575 	NCONF_HANDLE *nc;
1576 	struct nd_hostserv hs;
1577 
1578 	hs.h_host = HOST_SELF;
1579 	hs.h_serv = serv_name_to_port_name(serv);
1580 
1581 	if ((nc = setnetconfig()) == (NCONF_HANDLE *)NULL) {
1582 		syslog(LOG_ERR, "setnetconfig failed: %m");
1583 		return (-1);
1584 	}
1585 	while (nconf = getnetconfig(nc)) {
1586 		if (OK_TPI_TYPE(nconf) &&
1587 		    strcmp(nconf->nc_device, provider) == 0) {
1588 			*retnconf = nconf;
1589 			return (nfslib_bindit(nconf, addr, &hs,
1590 					listen_backlog));
1591 		}
1592 	}
1593 	(void) endnetconfig(nc);
1594 
1595 	syslog(LOG_ERR, "couldn't find netconfig entry for provider %s",
1596 	    provider);
1597 	return (-1);
1598 }
1599 
1600 static int
1601 bind_to_proto(NETSELDECL(proto), char *serv, struct netbuf **addr,
1602 		struct netconfig **retnconf)
1603 {
1604 	struct netconfig *nconf;
1605 	NCONF_HANDLE *nc = NULL;
1606 	struct nd_hostserv hs;
1607 
1608 	hs.h_host = HOST_SELF;
1609 	hs.h_serv = serv_name_to_port_name(serv);
1610 
1611 	if ((nc = setnetconfig()) == (NCONF_HANDLE *)NULL) {
1612 		syslog(LOG_ERR, "setnetconfig failed: %m");
1613 		return (-1);
1614 	}
1615 	while (nconf = getnetconfig(nc)) {
1616 		if (OK_TPI_TYPE(nconf) && NETSELEQ(nconf->nc_proto, proto)) {
1617 			*retnconf = nconf;
1618 			return (nfslib_bindit(nconf, addr, &hs,
1619 					listen_backlog));
1620 		}
1621 	}
1622 	(void) endnetconfig(nc);
1623 
1624 	syslog(LOG_ERR, "couldn't find netconfig entry for protocol %s",
1625 	    proto);
1626 	return (-1);
1627 }
1628 
1629 #include <netinet/in.h>
1630 
1631 /*
1632  * Create an address mask appropriate for the transport.
1633  * The mask is used to obtain the host-specific part of
1634  * a network address when comparing addresses.
1635  * For an internet address the host-specific part is just
1636  * the 32 bit IP address and this part of the mask is set
1637  * to all-ones. The port number part of the mask is zeroes.
1638  */
1639 static int
1640 set_addrmask(fd, nconf, mask)
1641 	struct netconfig *nconf;
1642 	struct netbuf *mask;
1643 {
1644 	struct t_info info;
1645 
1646 	/*
1647 	 * Find the size of the address we need to mask.
1648 	 */
1649 	if (t_getinfo(fd, &info) < 0) {
1650 		t_error("t_getinfo");
1651 		return (-1);
1652 	}
1653 	mask->len = mask->maxlen = info.addr;
1654 	if (info.addr <= 0) {
1655 		syslog(LOG_ERR, "set_addrmask: address size: %ld",
1656 			info.addr);
1657 		return (-1);
1658 	}
1659 
1660 	mask->buf = (char *)malloc(mask->len);
1661 	if (mask->buf == NULL) {
1662 		syslog(LOG_ERR, "set_addrmask: no memory");
1663 		return (-1);
1664 	}
1665 	(void) memset(mask->buf, 0, mask->len);	/* reset all mask bits */
1666 
1667 	if (strcmp(nconf->nc_protofmly, NC_INET) == 0) {
1668 		/*
1669 		 * Set the mask so that the port is ignored.
1670 		 */
1671 		/* LINTED pointer alignment */
1672 		((struct sockaddr_in *)mask->buf)->sin_addr.s_addr =
1673 								(ulong_t)~0;
1674 		/* LINTED pointer alignment */
1675 		((struct sockaddr_in *)mask->buf)->sin_family =
1676 								(ushort_t)~0;
1677 	} else if (strcmp(nconf->nc_protofmly, NC_INET6) == 0) {
1678 		/* LINTED pointer alignment */
1679 		(void) memset(&((struct sockaddr_in6 *)mask->buf)->sin6_addr,
1680 			(uchar_t)~0, sizeof (struct in6_addr));
1681 		/* LINTED pointer alignment */
1682 		((struct sockaddr_in6 *)mask->buf)->sin6_family =
1683 								(ushort_t)~0;
1684 	} else {
1685 
1686 		/*
1687 		 * Set all mask bits.
1688 		 */
1689 		(void) memset(mask->buf, 0xFF, mask->len);
1690 	}
1691 	return (0);
1692 }
1693 
1694 /*
1695  * For listen fd's index is always less than end_listen_fds.
1696  * end_listen_fds is defined externally in the daemon that uses this library.
1697  * It's value is equal to the number of open file descriptors after the
1698  * last listen end point was opened but before any connection was accepted.
1699  */
1700 static int
1701 is_listen_fd_index(int index)
1702 {
1703 	return (index < end_listen_fds);
1704 }
1705