xref: /titanic_41/usr/src/cmd/fs.d/nfs/lib/nfs_tbind.c (revision 42e43e9829853ed82c9a4e268b0b15ea58be81fb)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 /*
27  * nfs_tbind.c, common part for nfsd and lockd.
28  */
29 
30 #define	PORTMAP
31 
32 #include <tiuser.h>
33 #include <fcntl.h>
34 #include <netconfig.h>
35 #include <stropts.h>
36 #include <errno.h>
37 #include <syslog.h>
38 #include <rpc/rpc.h>
39 #include <rpc/pmap_prot.h>
40 #include <sys/time.h>
41 #include <sys/resource.h>
42 #include <signal.h>
43 #include <netdir.h>
44 #include <unistd.h>
45 #include <string.h>
46 #include <netinet/tcp.h>
47 #include <malloc.h>
48 #include <stdlib.h>
49 #include "nfs_tbind.h"
50 #include <nfs/nfs.h>
51 #include <nfs/nfs_acl.h>
52 #include <nfs/nfssys.h>
53 #include <nfs/nfs4.h>
54 #include <zone.h>
55 #include <sys/socket.h>
56 #include <tsol/label.h>
57 
58 /*
59  * Determine valid semantics for most applications.
60  */
61 #define	OK_TPI_TYPE(_nconf) \
62 	(_nconf->nc_semantics == NC_TPI_CLTS || \
63 	_nconf->nc_semantics == NC_TPI_COTS || \
64 	_nconf->nc_semantics == NC_TPI_COTS_ORD)
65 
66 #define	BE32_TO_U32(a) \
67 	((((ulong_t)((uchar_t *)a)[0] & 0xFF) << (ulong_t)24) | \
68 	(((ulong_t)((uchar_t *)a)[1] & 0xFF) << (ulong_t)16) | \
69 	(((ulong_t)((uchar_t *)a)[2] & 0xFF) << (ulong_t)8)  | \
70 	((ulong_t)((uchar_t *)a)[3] & 0xFF))
71 
72 /*
73  * Number of elements to add to the poll array on each allocation.
74  */
75 #define	POLL_ARRAY_INC_SIZE	64
76 
77 /*
78  * Number of file descriptors by which the process soft limit may be
79  * increased on each call to nofile_increase(0).
80  */
81 #define	NOFILE_INC_SIZE	64
82 
83 /*
84  * Default TCP send and receive buffer size of NFS server.
85  */
86 #define	NFSD_TCP_BUFSZ	(1024*1024)
87 
88 struct conn_ind {
89 	struct conn_ind *conn_next;
90 	struct conn_ind *conn_prev;
91 	struct t_call   *conn_call;
92 };
93 
94 struct conn_entry {
95 	bool_t			closing;
96 	struct netconfig	nc;
97 };
98 
99 /*
100  * this file contains transport routines common to nfsd and lockd
101  */
102 static	int	nofile_increase(int);
103 static	int	reuseaddr(int);
104 static	int	recvucred(int);
105 static  int	anonmlp(int);
106 static	void	add_to_poll_list(int, struct netconfig *);
107 static	char	*serv_name_to_port_name(char *);
108 static	int	bind_to_proto(char *, char *, struct netbuf **,
109 				struct netconfig **);
110 static	int	bind_to_provider(char *, char *, struct netbuf **,
111 					struct netconfig **);
112 static	void	conn_close_oldest(void);
113 static	boolean_t conn_get(int, struct netconfig *, struct conn_ind **);
114 static	void	cots_listen_event(int, int);
115 static	int	discon_get(int, struct netconfig *, struct conn_ind **);
116 static	int	do_poll_clts_action(int, int);
117 static	int	do_poll_cots_action(int, int);
118 static	void	remove_from_poll_list(int);
119 static	int	set_addrmask(int, struct netconfig *, struct netbuf *);
120 static	int	is_listen_fd_index(int);
121 
122 static	struct pollfd *poll_array;
123 static	struct conn_entry *conn_polled;
124 static	int	num_conns;		/* Current number of connections */
125 int		(*Mysvc4)(int, struct netbuf *, struct netconfig *, int,
126 		struct netbuf *);
127 static int	setopt(int fd, int level, int name, int value);
128 static int	get_opt(int fd, int level, int name);
129 static void	nfslib_set_sockbuf(int fd);
130 
131 extern bool_t __pmap_set(const rpcprog_t program, const rpcvers_t version,
132     const struct netconfig *nconf, const struct netbuf *address);
133 
134 /*
135  * Called to create and prepare a transport descriptor for in-kernel
136  * RPC service.
137  * Returns -1 on failure and a valid descriptor on success.
138  */
139 int
140 nfslib_transport_open(struct netconfig *nconf)
141 {
142 	int fd;
143 	struct strioctl	strioc;
144 
145 	if ((nconf == (struct netconfig *)NULL) ||
146 	    (nconf->nc_device == (char *)NULL)) {
147 		syslog(LOG_ERR, "no netconfig device");
148 		return (-1);
149 	}
150 
151 	/*
152 	 * Open the transport device.
153 	 */
154 	fd = t_open(nconf->nc_device, O_RDWR, (struct t_info *)NULL);
155 	if (fd == -1) {
156 		if (t_errno == TSYSERR && errno == EMFILE &&
157 		    (nofile_increase(0) == 0)) {
158 			/* Try again with a higher NOFILE limit. */
159 			fd = t_open(nconf->nc_device, O_RDWR,
160 			    (struct t_info *)NULL);
161 		}
162 		if (fd == -1) {
163 			syslog(LOG_ERR, "t_open %s failed:  t_errno %d, %m",
164 			    nconf->nc_device, t_errno);
165 			return (-1);
166 		}
167 	}
168 
169 	/*
170 	 * Pop timod because the RPC module must be as close as possible
171 	 * to the transport.
172 	 */
173 	if (ioctl(fd, I_POP, 0) < 0) {
174 		syslog(LOG_ERR, "I_POP of timod failed: %m");
175 		(void) t_close(fd);
176 		return (-1);
177 	}
178 
179 	/*
180 	 * Common code for CLTS and COTS transports
181 	 */
182 	if (ioctl(fd, I_PUSH, "rpcmod") < 0) {
183 		syslog(LOG_ERR, "I_PUSH of rpcmod failed: %m");
184 		(void) t_close(fd);
185 		return (-1);
186 	}
187 
188 	strioc.ic_cmd = RPC_SERVER;
189 	strioc.ic_dp = (char *)0;
190 	strioc.ic_len = 0;
191 	strioc.ic_timout = -1;
192 
193 	/* Tell rpcmod to act like a server stream. */
194 	if (ioctl(fd, I_STR, &strioc) < 0) {
195 		syslog(LOG_ERR, "rpcmod set-up ioctl failed: %m");
196 		(void) t_close(fd);
197 		return (-1);
198 	}
199 
200 	/*
201 	 * Re-push timod so that we will still be doing TLI
202 	 * operations on the descriptor.
203 	 */
204 	if (ioctl(fd, I_PUSH, "timod") < 0) {
205 		syslog(LOG_ERR, "I_PUSH of timod failed: %m");
206 		(void) t_close(fd);
207 		return (-1);
208 	}
209 
210 	/*
211 	 * Enable options of returning the ip's for udp.
212 	 */
213 	if (strcmp(nconf->nc_netid, "udp6") == 0)
214 		__rpc_tli_set_options(fd, IPPROTO_IPV6, IPV6_RECVPKTINFO, 1);
215 	else if (strcmp(nconf->nc_netid, "udp") == 0)
216 		__rpc_tli_set_options(fd, IPPROTO_IP, IP_RECVDSTADDR, 1);
217 
218 	return (fd);
219 }
220 
221 static int
222 nofile_increase(int limit)
223 {
224 	struct rlimit rl;
225 
226 	if (getrlimit(RLIMIT_NOFILE, &rl) == -1) {
227 		syslog(LOG_ERR, "getrlimit of NOFILE failed: %m");
228 		return (-1);
229 	}
230 
231 	if (limit > 0)
232 		rl.rlim_cur = limit;
233 	else
234 		rl.rlim_cur += NOFILE_INC_SIZE;
235 
236 	if (rl.rlim_cur > rl.rlim_max &&
237 	    rl.rlim_max != RLIM_INFINITY)
238 		rl.rlim_max = rl.rlim_cur;
239 
240 	if (setrlimit(RLIMIT_NOFILE, &rl) == -1) {
241 		syslog(LOG_ERR, "setrlimit of NOFILE to %d failed: %m",
242 		    rl.rlim_cur);
243 		return (-1);
244 	}
245 
246 	return (0);
247 }
248 
249 static void
250 nfslib_set_sockbuf(int fd)
251 {
252 	int curval, val;
253 
254 	val = NFSD_TCP_BUFSZ;
255 
256 	curval = get_opt(fd, SOL_SOCKET, SO_SNDBUF);
257 	syslog(LOG_DEBUG, "Current SO_SNDBUF value is %d", curval);
258 	if ((curval != -1) && (curval < val)) {
259 		syslog(LOG_DEBUG, "Set SO_SNDBUF  option to %d", val);
260 		if (setopt(fd, SOL_SOCKET, SO_SNDBUF, val) < 0) {
261 			syslog(LOG_ERR,
262 			    "couldn't set SO_SNDBUF to %d - t_errno = %d",
263 			    val, t_errno);
264 			syslog(LOG_ERR,
265 			    "Check and increase system-wide tcp_max_buf");
266 		}
267 	}
268 
269 	curval = get_opt(fd, SOL_SOCKET, SO_RCVBUF);
270 	syslog(LOG_DEBUG, "Current SO_RCVBUF value is %d", curval);
271 	if ((curval != -1) && (curval < val)) {
272 		syslog(LOG_DEBUG, "Set SO_RCVBUF  option to %d", val);
273 		if (setopt(fd, SOL_SOCKET, SO_RCVBUF, val) < 0) {
274 			syslog(LOG_ERR,
275 			    "couldn't set SO_RCVBUF to %d - t_errno = %d",
276 			    val, t_errno);
277 			syslog(LOG_ERR,
278 			    "Check and increase system-wide tcp_max_buf");
279 		}
280 	}
281 }
282 
283 int
284 nfslib_bindit(struct netconfig *nconf, struct netbuf **addr,
285 	struct nd_hostserv *hs, int backlog)
286 {
287 	int fd;
288 	struct t_bind  *ntb;
289 	struct t_bind tb;
290 	struct nd_addrlist *addrlist;
291 	struct t_optmgmt req, resp;
292 	struct opthdr *opt;
293 	char reqbuf[128];
294 	bool_t use_any = FALSE;
295 	bool_t gzone = TRUE;
296 
297 	if ((fd = nfslib_transport_open(nconf)) == -1) {
298 		syslog(LOG_ERR, "cannot establish transport service over %s",
299 		    nconf->nc_device);
300 		return (-1);
301 	}
302 
303 	addrlist = (struct nd_addrlist *)NULL;
304 
305 	/* nfs4_callback service does not used a fieed port number */
306 
307 	if (strcmp(hs->h_serv, "nfs4_callback") == 0) {
308 		tb.addr.maxlen = 0;
309 		tb.addr.len = 0;
310 		tb.addr.buf = 0;
311 		use_any = TRUE;
312 		gzone = (getzoneid() == GLOBAL_ZONEID);
313 	} else if (netdir_getbyname(nconf, hs, &addrlist) != 0) {
314 
315 		syslog(LOG_ERR,
316 		"Cannot get address for transport %s host %s service %s",
317 		    nconf->nc_netid, hs->h_host, hs->h_serv);
318 		(void) t_close(fd);
319 		return (-1);
320 	}
321 
322 	if (strcmp(nconf->nc_proto, "tcp") == 0) {
323 		/*
324 		 * If we're running over TCP, then set the
325 		 * SO_REUSEADDR option so that we can bind
326 		 * to our preferred address even if previously
327 		 * left connections exist in FIN_WAIT states.
328 		 * This is somewhat bogus, but otherwise you have
329 		 * to wait 2 minutes to restart after killing it.
330 		 */
331 		if (reuseaddr(fd) == -1) {
332 			syslog(LOG_WARNING,
333 			"couldn't set SO_REUSEADDR option on transport");
334 		}
335 	} else if (strcmp(nconf->nc_proto, "udp") == 0) {
336 		/*
337 		 * In order to run MLP on UDP, we need to handle creds.
338 		 */
339 		if (recvucred(fd) == -1) {
340 			syslog(LOG_WARNING,
341 			    "couldn't set SO_RECVUCRED option on transport");
342 		}
343 	}
344 
345 	/*
346 	 * Make non global zone nfs4_callback port MLP
347 	 */
348 	if (use_any && is_system_labeled() && !gzone) {
349 		if (anonmlp(fd) == -1) {
350 			/*
351 			 * failing to set this option means nfs4_callback
352 			 * could fail silently later. So fail it with
353 			 * with an error message now.
354 			 */
355 			syslog(LOG_ERR,
356 			    "couldn't set SO_ANON_MLP option on transport");
357 			(void) t_close(fd);
358 			return (-1);
359 		}
360 	}
361 
362 	if (nconf->nc_semantics == NC_TPI_CLTS)
363 		tb.qlen = 0;
364 	else
365 		tb.qlen = backlog;
366 
367 	/* LINTED pointer alignment */
368 	ntb = (struct t_bind *)t_alloc(fd, T_BIND, T_ALL);
369 	if (ntb == (struct t_bind *)NULL) {
370 		syslog(LOG_ERR, "t_alloc failed:  t_errno %d, %m", t_errno);
371 		(void) t_close(fd);
372 		netdir_free((void *)addrlist, ND_ADDRLIST);
373 		return (-1);
374 	}
375 
376 	/*
377 	 * XXX - what about the space tb->addr.buf points to? This should
378 	 * be either a memcpy() to/from the buf fields, or t_alloc(fd,T_BIND,)
379 	 * should't be called with T_ALL.
380 	 */
381 	if (addrlist)
382 		tb.addr = *(addrlist->n_addrs);		/* structure copy */
383 
384 	if (t_bind(fd, &tb, ntb) == -1) {
385 		syslog(LOG_ERR, "t_bind failed:  t_errno %d, %m", t_errno);
386 		(void) t_free((char *)ntb, T_BIND);
387 		netdir_free((void *)addrlist, ND_ADDRLIST);
388 		(void) t_close(fd);
389 		return (-1);
390 	}
391 
392 	/* make sure we bound to the right address */
393 	if (use_any == FALSE &&
394 	    (tb.addr.len != ntb->addr.len ||
395 	    memcmp(tb.addr.buf, ntb->addr.buf, tb.addr.len) != 0)) {
396 		syslog(LOG_ERR, "t_bind to wrong address");
397 		(void) t_free((char *)ntb, T_BIND);
398 		netdir_free((void *)addrlist, ND_ADDRLIST);
399 		(void) t_close(fd);
400 		return (-1);
401 	}
402 
403 	/*
404 	 * Call nfs4svc_setport so that the kernel can be
405 	 * informed what port number the daemon is listing
406 	 * for incoming connection requests.
407 	 */
408 
409 	if ((nconf->nc_semantics == NC_TPI_COTS ||
410 	    nconf->nc_semantics == NC_TPI_COTS_ORD) && Mysvc4 != NULL)
411 		(*Mysvc4)(fd, NULL, nconf, NFS4_SETPORT, &ntb->addr);
412 
413 	*addr = &ntb->addr;
414 	netdir_free((void *)addrlist, ND_ADDRLIST);
415 
416 	if (strcmp(nconf->nc_proto, "tcp") == 0) {
417 		/*
418 		 * Disable the Nagle algorithm on TCP connections.
419 		 * Connections accepted from this listener will
420 		 * inherit the listener options.
421 		 */
422 
423 		/* LINTED pointer alignment */
424 		opt = (struct opthdr *)reqbuf;
425 		opt->level = IPPROTO_TCP;
426 		opt->name = TCP_NODELAY;
427 		opt->len = sizeof (int);
428 
429 		/* LINTED pointer alignment */
430 		*(int *)((char *)opt + sizeof (*opt)) = 1;
431 
432 		req.flags = T_NEGOTIATE;
433 		req.opt.len = sizeof (*opt) + opt->len;
434 		req.opt.buf = (char *)opt;
435 		resp.flags = 0;
436 		resp.opt.buf = reqbuf;
437 		resp.opt.maxlen = sizeof (reqbuf);
438 
439 		if (t_optmgmt(fd, &req, &resp) < 0 ||
440 		    resp.flags != T_SUCCESS) {
441 			syslog(LOG_ERR,
442 	"couldn't set NODELAY option for proto %s: t_errno = %d, %m",
443 			    nconf->nc_proto, t_errno);
444 		}
445 
446 		nfslib_set_sockbuf(fd);
447 	}
448 
449 	return (fd);
450 }
451 
452 static int
453 get_opt(int fd, int level, int name)
454 {
455 	struct t_optmgmt req, res;
456 	struct {
457 		struct opthdr opt;
458 		int value;
459 	} reqbuf;
460 
461 	reqbuf.opt.level = level;
462 	reqbuf.opt.name = name;
463 	reqbuf.opt.len = sizeof (int);
464 	reqbuf.value = 0;
465 
466 	req.flags = T_CURRENT;
467 	req.opt.len = sizeof (reqbuf);
468 	req.opt.buf = (char *)&reqbuf;
469 
470 	res.flags = 0;
471 	res.opt.buf = (char *)&reqbuf;
472 	res.opt.maxlen = sizeof (reqbuf);
473 
474 	if (t_optmgmt(fd, &req, &res) < 0 || res.flags != T_SUCCESS) {
475 		t_error("t_optmgmt");
476 		return (-1);
477 	}
478 	return (reqbuf.value);
479 }
480 
481 static int
482 setopt(int fd, int level, int name, int value)
483 {
484 	struct t_optmgmt req, resp;
485 	struct {
486 		struct opthdr opt;
487 		int value;
488 	} reqbuf;
489 
490 	reqbuf.opt.level = level;
491 	reqbuf.opt.name = name;
492 	reqbuf.opt.len = sizeof (int);
493 
494 	reqbuf.value = value;
495 
496 	req.flags = T_NEGOTIATE;
497 	req.opt.len = sizeof (reqbuf);
498 	req.opt.buf = (char *)&reqbuf;
499 
500 	resp.flags = 0;
501 	resp.opt.buf = (char *)&reqbuf;
502 	resp.opt.maxlen = sizeof (reqbuf);
503 
504 	if (t_optmgmt(fd, &req, &resp) < 0 || resp.flags != T_SUCCESS) {
505 		t_error("t_optmgmt");
506 		return (-1);
507 	}
508 	return (0);
509 }
510 
511 static int
512 reuseaddr(int fd)
513 {
514 	return (setopt(fd, SOL_SOCKET, SO_REUSEADDR, 1));
515 }
516 
517 static int
518 recvucred(int fd)
519 {
520 	return (setopt(fd, SOL_SOCKET, SO_RECVUCRED, 1));
521 }
522 
523 static int
524 anonmlp(int fd)
525 {
526 	return (setopt(fd, SOL_SOCKET, SO_ANON_MLP, 1));
527 }
528 
529 void
530 nfslib_log_tli_error(char *tli_name, int fd, struct netconfig *nconf)
531 {
532 	int error;
533 
534 	/*
535 	 * Save the error code across syslog(), just in case syslog()
536 	 * gets its own error and, therefore, overwrites errno.
537 	 */
538 	error = errno;
539 	if (t_errno == TSYSERR) {
540 		syslog(LOG_ERR, "%s(file descriptor %d/transport %s) %m",
541 		    tli_name, fd, nconf->nc_proto);
542 	} else {
543 		syslog(LOG_ERR,
544 		    "%s(file descriptor %d/transport %s) TLI error %d",
545 		    tli_name, fd, nconf->nc_proto, t_errno);
546 	}
547 	errno = error;
548 }
549 
550 /*
551  * Called to set up service over a particular transport.
552  */
553 void
554 do_one(char *provider, NETSELDECL(proto), struct protob *protobp0,
555 	int (*svc)(int, struct netbuf, struct netconfig *), int use_pmap)
556 {
557 	register int sock;
558 	struct protob *protobp;
559 	struct netbuf *retaddr;
560 	struct netconfig *retnconf;
561 	struct netbuf addrmask;
562 	int vers;
563 	int err;
564 	int l;
565 
566 	if (provider)
567 		sock = bind_to_provider(provider, protobp0->serv, &retaddr,
568 		    &retnconf);
569 	else
570 		sock = bind_to_proto(proto, protobp0->serv, &retaddr,
571 		    &retnconf);
572 
573 	if (sock == -1) {
574 		(void) syslog(LOG_ERR,
575 	"Cannot establish %s service over %s: transport setup problem.",
576 		    protobp0->serv, provider ? provider : proto);
577 		return;
578 	}
579 
580 	if (set_addrmask(sock, retnconf, &addrmask) < 0) {
581 		(void) syslog(LOG_ERR,
582 		    "Cannot set address mask for %s", retnconf->nc_netid);
583 		return;
584 	}
585 
586 	/*
587 	 * Register all versions of the programs in the protocol block list.
588 	 */
589 	l = strlen(NC_UDP);
590 	for (protobp = protobp0; protobp; protobp = protobp->next) {
591 		for (vers = protobp->versmin; vers <= protobp->versmax;
592 		    vers++) {
593 			if ((protobp->program == NFS_PROGRAM ||
594 			    protobp->program == NFS_ACL_PROGRAM) &&
595 			    vers == NFS_V4 &&
596 			    strncasecmp(retnconf->nc_proto, NC_UDP, l) == 0)
597 				continue;
598 
599 			if (use_pmap) {
600 				/*
601 				 * Note that if we're using a portmapper
602 				 * instead of rpcbind then we can't do an
603 				 * unregister operation here.
604 				 *
605 				 * The reason is that the portmapper unset
606 				 * operation removes all the entries for a
607 				 * given program/version regardelss of
608 				 * transport protocol.
609 				 *
610 				 * The caller of this routine needs to ensure
611 				 * that __pmap_unset() has been called for all
612 				 * program/version service pairs they plan
613 				 * to support before they start registering
614 				 * each program/version/protocol triplet.
615 				 */
616 				(void) __pmap_set(protobp->program, vers,
617 				    retnconf, retaddr);
618 			} else {
619 				(void) rpcb_unset(protobp->program, vers,
620 				    retnconf);
621 				(void) rpcb_set(protobp->program, vers,
622 				    retnconf, retaddr);
623 			}
624 		}
625 	}
626 
627 	if (retnconf->nc_semantics == NC_TPI_CLTS) {
628 		/* Don't drop core if supporting module(s) aren't loaded. */
629 		(void) signal(SIGSYS, SIG_IGN);
630 
631 		/*
632 		 * svc() doesn't block, it returns success or failure.
633 		 */
634 
635 		if (svc == NULL && Mysvc4 != NULL)
636 			err = (*Mysvc4)(sock, &addrmask, retnconf,
637 			    NFS4_SETPORT|NFS4_KRPC_START, retaddr);
638 		else
639 			err = (*svc)(sock, addrmask, retnconf);
640 
641 		if (err < 0) {
642 			(void) syslog(LOG_ERR,
643 			    "Cannot establish %s service over <file desc."
644 			    " %d, protocol %s> : %m. Exiting",
645 			    protobp0->serv, sock, retnconf->nc_proto);
646 			exit(1);
647 		}
648 	}
649 
650 	/*
651 	 * We successfully set up the server over this transport.
652 	 * Add this descriptor to the one being polled on.
653 	 */
654 	add_to_poll_list(sock, retnconf);
655 }
656 
657 /*
658  * Set up the NFS service over all the available transports.
659  * Returns -1 for failure, 0 for success.
660  */
661 int
662 do_all(struct protob *protobp,
663 	int (*svc)(int, struct netbuf, struct netconfig *), int use_pmap)
664 {
665 	struct netconfig *nconf;
666 	NCONF_HANDLE *nc;
667 	int l;
668 
669 	if ((nc = setnetconfig()) == (NCONF_HANDLE *)NULL) {
670 		syslog(LOG_ERR, "setnetconfig failed: %m");
671 		return (-1);
672 	}
673 	l = strlen(NC_UDP);
674 	while (nconf = getnetconfig(nc)) {
675 		if ((nconf->nc_flag & NC_VISIBLE) &&
676 		    strcmp(nconf->nc_protofmly, NC_LOOPBACK) != 0 &&
677 		    OK_TPI_TYPE(nconf) &&
678 		    (protobp->program != NFS4_CALLBACK ||
679 		    strncasecmp(nconf->nc_proto, NC_UDP, l) != 0))
680 			do_one(nconf->nc_device, nconf->nc_proto,
681 			    protobp, svc, use_pmap);
682 	}
683 	(void) endnetconfig(nc);
684 	return (0);
685 }
686 
687 /*
688  * poll on the open transport descriptors for events and errors.
689  */
690 void
691 poll_for_action(void)
692 {
693 	int nfds;
694 	int i;
695 
696 	/*
697 	 * Keep polling until all transports have been closed. When this
698 	 * happens, we return.
699 	 */
700 	while ((int)num_fds > 0) {
701 		nfds = poll(poll_array, num_fds, INFTIM);
702 		switch (nfds) {
703 		case 0:
704 			continue;
705 
706 		case -1:
707 			/*
708 			 * Some errors from poll could be
709 			 * due to temporary conditions, and we try to
710 			 * be robust in the face of them. Other
711 			 * errors (should never happen in theory)
712 			 * are fatal (eg. EINVAL, EFAULT).
713 			 */
714 			switch (errno) {
715 			case EINTR:
716 				continue;
717 
718 			case EAGAIN:
719 			case ENOMEM:
720 				(void) sleep(10);
721 				continue;
722 
723 			default:
724 				(void) syslog(LOG_ERR,
725 				    "poll failed: %m. Exiting");
726 				exit(1);
727 			}
728 		default:
729 			break;
730 		}
731 
732 		/*
733 		 * Go through the poll list looking for events.
734 		 */
735 		for (i = 0; i < num_fds && nfds > 0; i++) {
736 			if (poll_array[i].revents) {
737 				nfds--;
738 				/*
739 				 * We have a message, so try to read it.
740 				 * Record the error return in errno,
741 				 * so that syslog(LOG_ERR, "...%m")
742 				 * dumps the corresponding error string.
743 				 */
744 				if (conn_polled[i].nc.nc_semantics ==
745 				    NC_TPI_CLTS) {
746 					errno = do_poll_clts_action(
747 					    poll_array[i].fd, i);
748 				} else {
749 					errno = do_poll_cots_action(
750 					    poll_array[i].fd, i);
751 				}
752 
753 				if (errno == 0)
754 					continue;
755 				/*
756 				 * Most returned error codes mean that there is
757 				 * fatal condition which we can only deal with
758 				 * by closing the transport.
759 				 */
760 				if (errno != EAGAIN && errno != ENOMEM) {
761 					(void) syslog(LOG_ERR,
762 		"Error (%m) reading descriptor %d/transport %s. Closing it.",
763 					    poll_array[i].fd,
764 					    conn_polled[i].nc.nc_proto);
765 					(void) t_close(poll_array[i].fd);
766 					remove_from_poll_list(poll_array[i].fd);
767 
768 				} else if (errno == ENOMEM)
769 					(void) sleep(5);
770 			}
771 		}
772 	}
773 
774 	(void) syslog(LOG_ERR,
775 	    "All transports have been closed with errors. Exiting.");
776 }
777 
778 /*
779  * Allocate poll/transport array entries for this descriptor.
780  */
781 static void
782 add_to_poll_list(int fd, struct netconfig *nconf)
783 {
784 	static int poll_array_size = 0;
785 
786 	/*
787 	 * If the arrays are full, allocate new ones.
788 	 */
789 	if (num_fds == poll_array_size) {
790 		struct pollfd *tpa;
791 		struct conn_entry *tnp;
792 
793 		if (poll_array_size != 0) {
794 			tpa = poll_array;
795 			tnp = conn_polled;
796 		} else
797 			tpa = (struct pollfd *)0;
798 
799 		poll_array_size += POLL_ARRAY_INC_SIZE;
800 		/*
801 		 * Allocate new arrays.
802 		 */
803 		poll_array = (struct pollfd *)
804 		    malloc(poll_array_size * sizeof (struct pollfd) + 256);
805 		conn_polled = (struct conn_entry *)
806 		    malloc(poll_array_size * sizeof (struct conn_entry) + 256);
807 		if (poll_array == (struct pollfd *)NULL ||
808 		    conn_polled == (struct conn_entry *)NULL) {
809 			syslog(LOG_ERR, "malloc failed for poll array");
810 			exit(1);
811 		}
812 
813 		/*
814 		 * Copy the data of the old ones into new arrays, and
815 		 * free the old ones.
816 		 */
817 		if (tpa) {
818 			(void) memcpy((void *)poll_array, (void *)tpa,
819 			    num_fds * sizeof (struct pollfd));
820 			(void) memcpy((void *)conn_polled, (void *)tnp,
821 			    num_fds * sizeof (struct conn_entry));
822 			free((void *)tpa);
823 			free((void *)tnp);
824 		}
825 	}
826 
827 	/*
828 	 * Set the descriptor and event list. All possible events are
829 	 * polled for.
830 	 */
831 	poll_array[num_fds].fd = fd;
832 	poll_array[num_fds].events = POLLIN|POLLRDNORM|POLLRDBAND|POLLPRI;
833 
834 	/*
835 	 * Copy the transport data over too.
836 	 */
837 	conn_polled[num_fds].nc = *nconf;
838 	conn_polled[num_fds].closing = 0;
839 
840 	/*
841 	 * Set the descriptor to non-blocking. Avoids a race
842 	 * between data arriving on the stream and then having it
843 	 * flushed before we can read it.
844 	 */
845 	if (fcntl(fd, F_SETFL, O_NONBLOCK) == -1) {
846 		(void) syslog(LOG_ERR,
847 	"fcntl(file desc. %d/transport %s, F_SETFL, O_NONBLOCK): %m. Exiting",
848 		    num_fds, nconf->nc_proto);
849 		exit(1);
850 	}
851 
852 	/*
853 	 * Count this descriptor.
854 	 */
855 	++num_fds;
856 }
857 
858 static void
859 remove_from_poll_list(int fd)
860 {
861 	int i;
862 	int num_to_copy;
863 
864 	for (i = 0; i < num_fds; i++) {
865 		if (poll_array[i].fd == fd) {
866 			--num_fds;
867 			num_to_copy = num_fds - i;
868 			(void) memcpy((void *)&poll_array[i],
869 			    (void *)&poll_array[i+1],
870 			    num_to_copy * sizeof (struct pollfd));
871 			(void) memset((void *)&poll_array[num_fds], 0,
872 			    sizeof (struct pollfd));
873 			(void) memcpy((void *)&conn_polled[i],
874 			    (void *)&conn_polled[i+1],
875 			    num_to_copy * sizeof (struct conn_entry));
876 			(void) memset((void *)&conn_polled[num_fds], 0,
877 			    sizeof (struct conn_entry));
878 			return;
879 		}
880 	}
881 	syslog(LOG_ERR, "attempt to remove nonexistent fd from poll list");
882 
883 }
884 
885 /*
886  * Called to read and interpret the event on a connectionless descriptor.
887  * Returns 0 if successful, or a UNIX error code if failure.
888  */
889 static int
890 do_poll_clts_action(int fd, int conn_index)
891 {
892 	int error;
893 	int ret;
894 	int flags;
895 	struct netconfig *nconf = &conn_polled[conn_index].nc;
896 	static struct t_unitdata *unitdata = NULL;
897 	static struct t_uderr *uderr = NULL;
898 	static int oldfd = -1;
899 	struct nd_hostservlist *host = NULL;
900 	struct strbuf ctl[1], data[1];
901 	/*
902 	 * We just need to have some space to consume the
903 	 * message in the event we can't use the TLI interface to do the
904 	 * job.
905 	 *
906 	 * We flush the message using getmsg(). For the control part
907 	 * we allocate enough for any TPI header plus 32 bytes for address
908 	 * and options. For the data part, there is nothing magic about
909 	 * the size of the array, but 256 bytes is probably better than
910 	 * 1 byte, and we don't expect any data portion anyway.
911 	 *
912 	 * If the array sizes are too small, we handle this because getmsg()
913 	 * (called to consume the message) will return MOREDATA|MORECTL.
914 	 * Thus we just call getmsg() until it's read the message.
915 	 */
916 	char ctlbuf[sizeof (union T_primitives) + 32];
917 	char databuf[256];
918 
919 	/*
920 	 * If this is the same descriptor as the last time
921 	 * do_poll_clts_action was called, we can save some
922 	 * de-allocation and allocation.
923 	 */
924 	if (oldfd != fd) {
925 		oldfd = fd;
926 
927 		if (unitdata) {
928 			(void) t_free((char *)unitdata, T_UNITDATA);
929 			unitdata = NULL;
930 		}
931 		if (uderr) {
932 			(void) t_free((char *)uderr, T_UDERROR);
933 			uderr = NULL;
934 		}
935 	}
936 
937 	/*
938 	 * Allocate a unitdata structure for receiving the event.
939 	 */
940 	if (unitdata == NULL) {
941 		/* LINTED pointer alignment */
942 		unitdata = (struct t_unitdata *)t_alloc(fd, T_UNITDATA, T_ALL);
943 		if (unitdata == NULL) {
944 			if (t_errno == TSYSERR) {
945 				/*
946 				 * Save the error code across
947 				 * syslog(), just in case
948 				 * syslog() gets its own error
949 				 * and therefore overwrites errno.
950 				 */
951 				error = errno;
952 				(void) syslog(LOG_ERR,
953 	"t_alloc(file descriptor %d/transport %s, T_UNITDATA) failed: %m",
954 				    fd, nconf->nc_proto);
955 				return (error);
956 			}
957 			(void) syslog(LOG_ERR,
958 "t_alloc(file descriptor %d/transport %s, T_UNITDATA) failed TLI error %d",
959 			    fd, nconf->nc_proto, t_errno);
960 			goto flush_it;
961 		}
962 	}
963 
964 try_again:
965 	flags = 0;
966 
967 	/*
968 	 * The idea is we wait for T_UNITDATA_IND's. Of course,
969 	 * we don't get any, because rpcmod filters them out.
970 	 * However, we need to call t_rcvudata() to let TLI
971 	 * tell us we have a T_UDERROR_IND.
972 	 *
973 	 * algorithm is:
974 	 * 	t_rcvudata(), expecting TLOOK.
975 	 * 	t_look(), expecting T_UDERR.
976 	 * 	t_rcvuderr(), expecting success (0).
977 	 * 	expand destination address into ASCII,
978 	 *	and dump it.
979 	 */
980 
981 	ret = t_rcvudata(fd, unitdata, &flags);
982 	if (ret == 0 || t_errno == TBUFOVFLW) {
983 		(void) syslog(LOG_WARNING,
984 "t_rcvudata(file descriptor %d/transport %s) got unexpected data, %d bytes",
985 		    fd, nconf->nc_proto, unitdata->udata.len);
986 
987 		/*
988 		 * Even though we don't expect any data, in case we do,
989 		 * keep reading until there is no more.
990 		 */
991 		if (flags & T_MORE)
992 			goto try_again;
993 
994 		return (0);
995 	}
996 
997 	switch (t_errno) {
998 	case TNODATA:
999 		return (0);
1000 	case TSYSERR:
1001 		/*
1002 		 * System errors are returned to caller.
1003 		 * Save the error code across
1004 		 * syslog(), just in case
1005 		 * syslog() gets its own error
1006 		 * and therefore overwrites errno.
1007 		 */
1008 		error = errno;
1009 		(void) syslog(LOG_ERR,
1010 		    "t_rcvudata(file descriptor %d/transport %s) %m",
1011 		    fd, nconf->nc_proto);
1012 		return (error);
1013 	case TLOOK:
1014 		break;
1015 	default:
1016 		(void) syslog(LOG_ERR,
1017 		"t_rcvudata(file descriptor %d/transport %s) TLI error %d",
1018 		    fd, nconf->nc_proto, t_errno);
1019 		goto flush_it;
1020 	}
1021 
1022 	ret = t_look(fd);
1023 	switch (ret) {
1024 	case 0:
1025 		return (0);
1026 	case -1:
1027 		/*
1028 		 * System errors are returned to caller.
1029 		 */
1030 		if (t_errno == TSYSERR) {
1031 			/*
1032 			 * Save the error code across
1033 			 * syslog(), just in case
1034 			 * syslog() gets its own error
1035 			 * and therefore overwrites errno.
1036 			 */
1037 			error = errno;
1038 			(void) syslog(LOG_ERR,
1039 			    "t_look(file descriptor %d/transport %s) %m",
1040 			    fd, nconf->nc_proto);
1041 			return (error);
1042 		}
1043 		(void) syslog(LOG_ERR,
1044 		    "t_look(file descriptor %d/transport %s) TLI error %d",
1045 		    fd, nconf->nc_proto, t_errno);
1046 		goto flush_it;
1047 	case T_UDERR:
1048 		break;
1049 	default:
1050 		(void) syslog(LOG_WARNING,
1051 	"t_look(file descriptor %d/transport %s) returned %d not T_UDERR (%d)",
1052 		    fd, nconf->nc_proto, ret, T_UDERR);
1053 	}
1054 
1055 	if (uderr == NULL) {
1056 		/* LINTED pointer alignment */
1057 		uderr = (struct t_uderr *)t_alloc(fd, T_UDERROR, T_ALL);
1058 		if (uderr == NULL) {
1059 			if (t_errno == TSYSERR) {
1060 				/*
1061 				 * Save the error code across
1062 				 * syslog(), just in case
1063 				 * syslog() gets its own error
1064 				 * and therefore overwrites errno.
1065 				 */
1066 				error = errno;
1067 				(void) syslog(LOG_ERR,
1068 	"t_alloc(file descriptor %d/transport %s, T_UDERROR) failed: %m",
1069 				    fd, nconf->nc_proto);
1070 				return (error);
1071 			}
1072 			(void) syslog(LOG_ERR,
1073 "t_alloc(file descriptor %d/transport %s, T_UDERROR) failed TLI error: %d",
1074 			    fd, nconf->nc_proto, t_errno);
1075 			goto flush_it;
1076 		}
1077 	}
1078 
1079 	ret = t_rcvuderr(fd, uderr);
1080 	if (ret == 0) {
1081 
1082 		/*
1083 		 * Save the datagram error in errno, so that the
1084 		 * %m argument to syslog picks up the error string.
1085 		 */
1086 		errno = uderr->error;
1087 
1088 		/*
1089 		 * Log the datagram error, then log the host that
1090 		 * probably triggerred. Cannot log both in the
1091 		 * same transaction because of packet size limitations
1092 		 * in /dev/log.
1093 		 */
1094 		(void) syslog((errno == ECONNREFUSED) ? LOG_DEBUG : LOG_WARNING,
1095 "NFS response over <file descriptor %d/transport %s> generated error: %m",
1096 		    fd, nconf->nc_proto);
1097 
1098 		/*
1099 		 * Try to map the client's address back to a
1100 		 * name.
1101 		 */
1102 		ret = netdir_getbyaddr(nconf, &host, &uderr->addr);
1103 		if (ret != -1 && host && host->h_cnt > 0 &&
1104 		    host->h_hostservs) {
1105 		(void) syslog((errno == ECONNREFUSED) ? LOG_DEBUG : LOG_WARNING,
1106 "Bad NFS response was sent to client with host name: %s; service port: %s",
1107 		    host->h_hostservs->h_host,
1108 		    host->h_hostservs->h_serv);
1109 		} else {
1110 			int i, j;
1111 			char *buf;
1112 			char *hex = "0123456789abcdef";
1113 
1114 			/*
1115 			 * Mapping failed, print the whole thing
1116 			 * in ASCII hex.
1117 			 */
1118 			buf = (char *)malloc(uderr->addr.len * 2 + 1);
1119 			for (i = 0, j = 0; i < uderr->addr.len; i++, j += 2) {
1120 				buf[j] = hex[((uderr->addr.buf[i]) >> 4) & 0xf];
1121 				buf[j+1] = hex[uderr->addr.buf[i] & 0xf];
1122 			}
1123 			buf[j] = '\0';
1124 		(void) syslog((errno == ECONNREFUSED) ? LOG_DEBUG : LOG_WARNING,
1125 	"Bad NFS response was sent to client with transport address: 0x%s",
1126 		    buf);
1127 			free((void *)buf);
1128 		}
1129 
1130 		if (ret == 0 && host != NULL)
1131 			netdir_free((void *)host, ND_HOSTSERVLIST);
1132 		return (0);
1133 	}
1134 
1135 	switch (t_errno) {
1136 	case TNOUDERR:
1137 		goto flush_it;
1138 	case TSYSERR:
1139 		/*
1140 		 * System errors are returned to caller.
1141 		 * Save the error code across
1142 		 * syslog(), just in case
1143 		 * syslog() gets its own error
1144 		 * and therefore overwrites errno.
1145 		 */
1146 		error = errno;
1147 		(void) syslog(LOG_ERR,
1148 		    "t_rcvuderr(file descriptor %d/transport %s) %m",
1149 		    fd, nconf->nc_proto);
1150 		return (error);
1151 	default:
1152 		(void) syslog(LOG_ERR,
1153 		"t_rcvuderr(file descriptor %d/transport %s) TLI error %d",
1154 		    fd, nconf->nc_proto, t_errno);
1155 		goto flush_it;
1156 	}
1157 
1158 flush_it:
1159 	/*
1160 	 * If we get here, then we could not cope with whatever message
1161 	 * we attempted to read, so flush it. If we did read a message,
1162 	 * and one isn't present, that is all right, because fd is in
1163 	 * nonblocking mode.
1164 	 */
1165 	(void) syslog(LOG_ERR,
1166 	"Flushing one input message from <file descriptor %d/transport %s>",
1167 	    fd, nconf->nc_proto);
1168 
1169 	/*
1170 	 * Read and discard the message. Do this this until there is
1171 	 * no more control/data in the message or until we get an error.
1172 	 */
1173 	do {
1174 		ctl->maxlen = sizeof (ctlbuf);
1175 		ctl->buf = ctlbuf;
1176 		data->maxlen = sizeof (databuf);
1177 		data->buf = databuf;
1178 		flags = 0;
1179 		ret = getmsg(fd, ctl, data, &flags);
1180 		if (ret == -1)
1181 			return (errno);
1182 	} while (ret != 0);
1183 
1184 	return (0);
1185 }
1186 
1187 static void
1188 conn_close_oldest(void)
1189 {
1190 	int fd;
1191 	int i1;
1192 
1193 	/*
1194 	 * Find the oldest connection that is not already in the
1195 	 * process of shutting down.
1196 	 */
1197 	for (i1 = end_listen_fds; /* no conditional expression */; i1++) {
1198 		if (i1 >= num_fds)
1199 			return;
1200 		if (conn_polled[i1].closing == 0)
1201 			break;
1202 	}
1203 #ifdef DEBUG
1204 	printf("too many connections (%d), releasing oldest (%d)\n",
1205 	    num_conns, poll_array[i1].fd);
1206 #else
1207 	syslog(LOG_WARNING, "too many connections (%d), releasing oldest (%d)",
1208 	    num_conns, poll_array[i1].fd);
1209 #endif
1210 	fd = poll_array[i1].fd;
1211 	if (conn_polled[i1].nc.nc_semantics == NC_TPI_COTS) {
1212 		/*
1213 		 * For politeness, send a T_DISCON_REQ to the transport
1214 		 * provider.  We close the stream anyway.
1215 		 */
1216 		(void) t_snddis(fd, (struct t_call *)0);
1217 		num_conns--;
1218 		remove_from_poll_list(fd);
1219 		(void) t_close(fd);
1220 	} else {
1221 		/*
1222 		 * For orderly release, we do not close the stream
1223 		 * until the T_ORDREL_IND arrives to complete
1224 		 * the handshake.
1225 		 */
1226 		if (t_sndrel(fd) == 0)
1227 			conn_polled[i1].closing = 1;
1228 	}
1229 }
1230 
1231 static boolean_t
1232 conn_get(int fd, struct netconfig *nconf, struct conn_ind **connp)
1233 {
1234 	struct conn_ind	*conn;
1235 	struct conn_ind	*next_conn;
1236 
1237 	conn = (struct conn_ind *)malloc(sizeof (*conn));
1238 	if (conn == NULL) {
1239 		syslog(LOG_ERR, "malloc for listen indication failed");
1240 		return (FALSE);
1241 	}
1242 
1243 	/* LINTED pointer alignment */
1244 	conn->conn_call = (struct t_call *)t_alloc(fd, T_CALL, T_ALL);
1245 	if (conn->conn_call == NULL) {
1246 		free((char *)conn);
1247 		nfslib_log_tli_error("t_alloc", fd, nconf);
1248 		return (FALSE);
1249 	}
1250 
1251 	if (t_listen(fd, conn->conn_call) == -1) {
1252 		nfslib_log_tli_error("t_listen", fd, nconf);
1253 		(void) t_free((char *)conn->conn_call, T_CALL);
1254 		free((char *)conn);
1255 		return (FALSE);
1256 	}
1257 
1258 	if (conn->conn_call->udata.len > 0) {
1259 		syslog(LOG_WARNING,
1260 	"rejecting inbound connection(%s) with %d bytes of connect data",
1261 		    nconf->nc_proto, conn->conn_call->udata.len);
1262 
1263 		conn->conn_call->udata.len = 0;
1264 		(void) t_snddis(fd, conn->conn_call);
1265 		(void) t_free((char *)conn->conn_call, T_CALL);
1266 		free((char *)conn);
1267 		return (FALSE);
1268 	}
1269 
1270 	if ((next_conn = *connp) != NULL) {
1271 		next_conn->conn_prev->conn_next = conn;
1272 		conn->conn_next = next_conn;
1273 		conn->conn_prev = next_conn->conn_prev;
1274 		next_conn->conn_prev = conn;
1275 	} else {
1276 		conn->conn_next = conn;
1277 		conn->conn_prev = conn;
1278 		*connp = conn;
1279 	}
1280 	return (TRUE);
1281 }
1282 
1283 static int
1284 discon_get(int fd, struct netconfig *nconf, struct conn_ind **connp)
1285 {
1286 	struct conn_ind	*conn;
1287 	struct t_discon	discon;
1288 
1289 	discon.udata.buf = (char *)0;
1290 	discon.udata.maxlen = 0;
1291 	if (t_rcvdis(fd, &discon) == -1) {
1292 		nfslib_log_tli_error("t_rcvdis", fd, nconf);
1293 		return (-1);
1294 	}
1295 
1296 	conn = *connp;
1297 	if (conn == NULL)
1298 		return (0);
1299 
1300 	do {
1301 		if (conn->conn_call->sequence == discon.sequence) {
1302 			if (conn->conn_next == conn)
1303 				*connp = (struct conn_ind *)0;
1304 			else {
1305 				if (conn == *connp) {
1306 					*connp = conn->conn_next;
1307 				}
1308 				conn->conn_next->conn_prev = conn->conn_prev;
1309 				conn->conn_prev->conn_next = conn->conn_next;
1310 			}
1311 			free((char *)conn);
1312 			break;
1313 		}
1314 		conn = conn->conn_next;
1315 	} while (conn != *connp);
1316 
1317 	return (0);
1318 }
1319 
1320 static void
1321 cots_listen_event(int fd, int conn_index)
1322 {
1323 	struct t_call *call;
1324 	struct conn_ind	*conn;
1325 	struct conn_ind	*conn_head;
1326 	int event;
1327 	struct netconfig *nconf = &conn_polled[conn_index].nc;
1328 	int new_fd;
1329 	struct netbuf addrmask;
1330 	int ret = 0;
1331 	char *clnt;
1332 	char *clnt_uaddr = NULL;
1333 	struct nd_hostservlist *clnt_serv = NULL;
1334 
1335 	conn_head = (struct conn_ind *)0;
1336 	(void) conn_get(fd, nconf, &conn_head);
1337 
1338 	while ((conn = conn_head) != NULL) {
1339 		conn_head = conn->conn_next;
1340 		if (conn_head == conn)
1341 			conn_head = (struct conn_ind *)0;
1342 		else {
1343 			conn_head->conn_prev = conn->conn_prev;
1344 			conn->conn_prev->conn_next = conn_head;
1345 		}
1346 		call = conn->conn_call;
1347 		free((char *)conn);
1348 
1349 		/*
1350 		 * If we have already accepted the maximum number of
1351 		 * connections allowed on the command line, then drop
1352 		 * the oldest connection (for any protocol) before
1353 		 * accepting the new connection.  Unless explicitly
1354 		 * set on the command line, max_conns_allowed is -1.
1355 		 */
1356 		if (max_conns_allowed != -1 && num_conns >= max_conns_allowed)
1357 			conn_close_oldest();
1358 
1359 		/*
1360 		 * Create a new transport endpoint for the same proto as
1361 		 * the listener.
1362 		 */
1363 		new_fd = nfslib_transport_open(nconf);
1364 		if (new_fd == -1) {
1365 			call->udata.len = 0;
1366 			(void) t_snddis(fd, call);
1367 			(void) t_free((char *)call, T_CALL);
1368 			syslog(LOG_ERR, "Cannot establish transport over %s",
1369 			    nconf->nc_device);
1370 			continue;
1371 		}
1372 
1373 		/* Bind to a generic address/port for the accepting stream. */
1374 		if (t_bind(new_fd, (struct t_bind *)NULL,
1375 		    (struct t_bind *)NULL) == -1) {
1376 			nfslib_log_tli_error("t_bind", new_fd, nconf);
1377 			call->udata.len = 0;
1378 			(void) t_snddis(fd, call);
1379 			(void) t_free((char *)call, T_CALL);
1380 			(void) t_close(new_fd);
1381 			continue;
1382 		}
1383 
1384 		while (t_accept(fd, new_fd, call) == -1) {
1385 			if (t_errno != TLOOK) {
1386 #ifdef DEBUG
1387 				nfslib_log_tli_error("t_accept", fd, nconf);
1388 #endif
1389 				call->udata.len = 0;
1390 				(void) t_snddis(fd, call);
1391 				(void) t_free((char *)call, T_CALL);
1392 				(void) t_close(new_fd);
1393 				goto do_next_conn;
1394 			}
1395 			while (event = t_look(fd)) {
1396 				switch (event) {
1397 				case T_LISTEN:
1398 #ifdef DEBUG
1399 					printf(
1400 "cots_listen_event(%s): T_LISTEN during accept processing\n", nconf->nc_proto);
1401 #endif
1402 					(void) conn_get(fd, nconf, &conn_head);
1403 					continue;
1404 				case T_DISCONNECT:
1405 #ifdef DEBUG
1406 					printf(
1407 	"cots_listen_event(%s): T_DISCONNECT during accept processing\n",
1408 					    nconf->nc_proto);
1409 #endif
1410 					(void) discon_get(fd, nconf,
1411 					    &conn_head);
1412 					continue;
1413 				default:
1414 					syslog(LOG_ERR,
1415 			"unexpected event 0x%x during accept processing (%s)",
1416 					    event, nconf->nc_proto);
1417 					call->udata.len = 0;
1418 					(void) t_snddis(fd, call);
1419 					(void) t_free((char *)call, T_CALL);
1420 					(void) t_close(new_fd);
1421 					goto do_next_conn;
1422 				}
1423 			}
1424 		}
1425 
1426 		if (set_addrmask(new_fd, nconf, &addrmask) < 0) {
1427 			(void) syslog(LOG_ERR,
1428 			    "Cannot set address mask for %s",
1429 			    nconf->nc_netid);
1430 			return;
1431 		}
1432 
1433 		/* Tell KRPC about the new stream. */
1434 		if (Mysvc4 != NULL)
1435 			ret = (*Mysvc4)(new_fd, &addrmask, nconf,
1436 			    NFS4_KRPC_START, &call->addr);
1437 		else
1438 			ret = (*Mysvc)(new_fd, addrmask, nconf);
1439 
1440 		if (ret < 0) {
1441 			if (errno != ENOTCONN) {
1442 				syslog(LOG_ERR,
1443 				    "unable to register new connection: %m");
1444 			} else {
1445 				/*
1446 				 * This is the only error that could be
1447 				 * caused by the client, so who was it?
1448 				 */
1449 				if (netdir_getbyaddr(nconf, &clnt_serv,
1450 				    &(call->addr)) == ND_OK &&
1451 				    clnt_serv->h_cnt > 0)
1452 					clnt = clnt_serv->h_hostservs->h_host;
1453 				else
1454 					clnt = clnt_uaddr = taddr2uaddr(nconf,
1455 					    &(call->addr));
1456 				/*
1457 				 * If we don't know who the client was,
1458 				 * remain silent.
1459 				 */
1460 				if (clnt)
1461 					syslog(LOG_ERR,
1462 "unable to register new connection: client %s has dropped connection", clnt);
1463 				if (clnt_serv)
1464 					netdir_free(clnt_serv, ND_HOSTSERVLIST);
1465 				if (clnt_uaddr)
1466 					free(clnt_uaddr);
1467 			}
1468 			free(addrmask.buf);
1469 			(void) t_snddis(new_fd, (struct t_call *)0);
1470 			(void) t_free((char *)call, T_CALL);
1471 			(void) t_close(new_fd);
1472 			goto do_next_conn;
1473 		}
1474 
1475 		free(addrmask.buf);
1476 		(void) t_free((char *)call, T_CALL);
1477 
1478 		/*
1479 		 * Poll on the new descriptor so that we get disconnect
1480 		 * and orderly release indications.
1481 		 */
1482 		num_conns++;
1483 		add_to_poll_list(new_fd, nconf);
1484 
1485 		/* Reset nconf in case it has been moved. */
1486 		nconf = &conn_polled[conn_index].nc;
1487 do_next_conn:;
1488 	}
1489 }
1490 
1491 static int
1492 do_poll_cots_action(int fd, int conn_index)
1493 {
1494 	char buf[256];
1495 	int event;
1496 	int i1;
1497 	int flags;
1498 	struct conn_entry *connent = &conn_polled[conn_index];
1499 	struct netconfig *nconf = &(connent->nc);
1500 	const char *errorstr;
1501 
1502 	while (event = t_look(fd)) {
1503 		switch (event) {
1504 		case T_LISTEN:
1505 #ifdef DEBUG
1506 printf("do_poll_cots_action(%s,%d): T_LISTEN event\n", nconf->nc_proto, fd);
1507 #endif
1508 			cots_listen_event(fd, conn_index);
1509 			break;
1510 
1511 		case T_DATA:
1512 #ifdef DEBUG
1513 printf("do_poll_cots_action(%d,%s): T_DATA event\n", fd, nconf->nc_proto);
1514 #endif
1515 			/*
1516 			 * Receive a private notification from CONS rpcmod.
1517 			 */
1518 			i1 = t_rcv(fd, buf, sizeof (buf), &flags);
1519 			if (i1 == -1) {
1520 				syslog(LOG_ERR, "t_rcv failed");
1521 				break;
1522 			}
1523 			if (i1 < sizeof (int))
1524 				break;
1525 			i1 = BE32_TO_U32(buf);
1526 			if (i1 == 1 || i1 == 2) {
1527 				/*
1528 				 * This connection has been idle for too long,
1529 				 * so release it as politely as we can.  If we
1530 				 * have already initiated an orderly release
1531 				 * and we get notified that the stream is
1532 				 * still idle, pull the plug.  This prevents
1533 				 * hung connections from continuing to consume
1534 				 * resources.
1535 				 */
1536 #ifdef DEBUG
1537 printf("do_poll_cots_action(%s,%d): ", nconf->nc_proto, fd);
1538 printf("initiating orderly release of idle connection\n");
1539 #endif
1540 				if (nconf->nc_semantics == NC_TPI_COTS ||
1541 				    connent->closing != 0) {
1542 					(void) t_snddis(fd, (struct t_call *)0);
1543 					goto fdclose;
1544 				}
1545 				/*
1546 				 * For NC_TPI_COTS_ORD, the stream is closed
1547 				 * and removed from the poll list when the
1548 				 * T_ORDREL is received from the provider.  We
1549 				 * don't wait for it here because it may take
1550 				 * a while for the transport to shut down.
1551 				 */
1552 				if (t_sndrel(fd) == -1) {
1553 					syslog(LOG_ERR,
1554 					"unable to send orderly release %m");
1555 				}
1556 				connent->closing = 1;
1557 			} else
1558 				syslog(LOG_ERR,
1559 				"unexpected event from CONS rpcmod %d", i1);
1560 			break;
1561 
1562 		case T_ORDREL:
1563 #ifdef DEBUG
1564 printf("do_poll_cots_action(%s,%d): T_ORDREL event\n", nconf->nc_proto, fd);
1565 #endif
1566 			/* Perform an orderly release. */
1567 			if (t_rcvrel(fd) == 0) {
1568 				/* T_ORDREL on listen fd's should be ignored */
1569 				if (!is_listen_fd_index(conn_index)) {
1570 					(void) t_sndrel(fd);
1571 					goto fdclose;
1572 				}
1573 				break;
1574 
1575 			} else if (t_errno == TLOOK) {
1576 				break;
1577 			} else {
1578 				nfslib_log_tli_error("t_rcvrel", fd, nconf);
1579 
1580 				/*
1581 				 * check to make sure we do not close
1582 				 * listen fd
1583 				 */
1584 				if (is_listen_fd_index(conn_index))
1585 					break;
1586 				else
1587 					goto fdclose;
1588 			}
1589 
1590 		case T_DISCONNECT:
1591 #ifdef DEBUG
1592 printf("do_poll_cots_action(%s,%d): T_DISCONNECT event\n", nconf->nc_proto, fd);
1593 #endif
1594 			if (t_rcvdis(fd, (struct t_discon *)NULL) == -1)
1595 				nfslib_log_tli_error("t_rcvdis", fd, nconf);
1596 
1597 			/*
1598 			 * T_DISCONNECT on listen fd's should be ignored.
1599 			 */
1600 			if (is_listen_fd_index(conn_index))
1601 				break;
1602 			else
1603 				goto fdclose;
1604 
1605 		case T_ERROR:
1606 		default:
1607 			if (event == T_ERROR || t_errno == TSYSERR) {
1608 				if ((errorstr = strerror(errno)) == NULL) {
1609 					(void) sprintf(buf,
1610 					    "Unknown error num %d", errno);
1611 					errorstr = (const char *) buf;
1612 				}
1613 			} else if (event == -1)
1614 				errorstr = t_strerror(t_errno);
1615 			else
1616 				errorstr = "";
1617 			syslog(LOG_ERR,
1618 			    "unexpected TLI event (0x%x) on "
1619 			    "connection-oriented transport(%s,%d):%s",
1620 			    event, nconf->nc_proto, fd, errorstr);
1621 fdclose:
1622 			num_conns--;
1623 			remove_from_poll_list(fd);
1624 			(void) t_close(fd);
1625 			return (0);
1626 		}
1627 	}
1628 
1629 	return (0);
1630 }
1631 
1632 static char *
1633 serv_name_to_port_name(char *name)
1634 {
1635 	/*
1636 	 * Map service names (used primarily in logging) to
1637 	 * RPC port names (used by netdir_*() routines).
1638 	 */
1639 	if (strcmp(name, "NFS") == 0) {
1640 		return ("nfs");
1641 	} else if (strcmp(name, "NLM") == 0) {
1642 		return ("lockd");
1643 	} else if (strcmp(name, "NFS4_CALLBACK") == 0) {
1644 		return ("nfs4_callback");
1645 	}
1646 
1647 	return ("unrecognized");
1648 }
1649 
1650 static int
1651 bind_to_provider(char *provider, char *serv, struct netbuf **addr,
1652 		struct netconfig **retnconf)
1653 {
1654 	struct netconfig *nconf;
1655 	NCONF_HANDLE *nc;
1656 	struct nd_hostserv hs;
1657 
1658 	hs.h_host = HOST_SELF;
1659 	hs.h_serv = serv_name_to_port_name(serv);
1660 
1661 	if ((nc = setnetconfig()) == (NCONF_HANDLE *)NULL) {
1662 		syslog(LOG_ERR, "setnetconfig failed: %m");
1663 		return (-1);
1664 	}
1665 	while (nconf = getnetconfig(nc)) {
1666 		if (OK_TPI_TYPE(nconf) &&
1667 		    strcmp(nconf->nc_device, provider) == 0) {
1668 			*retnconf = nconf;
1669 			return (nfslib_bindit(nconf, addr, &hs,
1670 			    listen_backlog));
1671 		}
1672 	}
1673 	(void) endnetconfig(nc);
1674 
1675 	syslog(LOG_ERR, "couldn't find netconfig entry for provider %s",
1676 	    provider);
1677 	return (-1);
1678 }
1679 
1680 static int
1681 bind_to_proto(NETSELDECL(proto), char *serv, struct netbuf **addr,
1682 		struct netconfig **retnconf)
1683 {
1684 	struct netconfig *nconf;
1685 	NCONF_HANDLE *nc = NULL;
1686 	struct nd_hostserv hs;
1687 
1688 	hs.h_host = HOST_SELF;
1689 	hs.h_serv = serv_name_to_port_name(serv);
1690 
1691 	if ((nc = setnetconfig()) == (NCONF_HANDLE *)NULL) {
1692 		syslog(LOG_ERR, "setnetconfig failed: %m");
1693 		return (-1);
1694 	}
1695 	while (nconf = getnetconfig(nc)) {
1696 		if (OK_TPI_TYPE(nconf) && NETSELEQ(nconf->nc_proto, proto)) {
1697 			*retnconf = nconf;
1698 			return (nfslib_bindit(nconf, addr, &hs,
1699 			    listen_backlog));
1700 		}
1701 	}
1702 	(void) endnetconfig(nc);
1703 
1704 	syslog(LOG_ERR, "couldn't find netconfig entry for protocol %s",
1705 	    proto);
1706 	return (-1);
1707 }
1708 
1709 #include <netinet/in.h>
1710 
1711 /*
1712  * Create an address mask appropriate for the transport.
1713  * The mask is used to obtain the host-specific part of
1714  * a network address when comparing addresses.
1715  * For an internet address the host-specific part is just
1716  * the 32 bit IP address and this part of the mask is set
1717  * to all-ones. The port number part of the mask is zeroes.
1718  */
1719 static int
1720 set_addrmask(fd, nconf, mask)
1721 	struct netconfig *nconf;
1722 	struct netbuf *mask;
1723 {
1724 	struct t_info info;
1725 
1726 	/*
1727 	 * Find the size of the address we need to mask.
1728 	 */
1729 	if (t_getinfo(fd, &info) < 0) {
1730 		t_error("t_getinfo");
1731 		return (-1);
1732 	}
1733 	mask->len = mask->maxlen = info.addr;
1734 	if (info.addr <= 0) {
1735 		syslog(LOG_ERR, "set_addrmask: address size: %ld",
1736 			info.addr);
1737 		return (-1);
1738 	}
1739 
1740 	mask->buf = (char *)malloc(mask->len);
1741 	if (mask->buf == NULL) {
1742 		syslog(LOG_ERR, "set_addrmask: no memory");
1743 		return (-1);
1744 	}
1745 	(void) memset(mask->buf, 0, mask->len);	/* reset all mask bits */
1746 
1747 	if (strcmp(nconf->nc_protofmly, NC_INET) == 0) {
1748 		/*
1749 		 * Set the mask so that the port is ignored.
1750 		 */
1751 		/* LINTED pointer alignment */
1752 		((struct sockaddr_in *)mask->buf)->sin_addr.s_addr =
1753 								(ulong_t)~0;
1754 		/* LINTED pointer alignment */
1755 		((struct sockaddr_in *)mask->buf)->sin_family =
1756 								(ushort_t)~0;
1757 	} else if (strcmp(nconf->nc_protofmly, NC_INET6) == 0) {
1758 		/* LINTED pointer alignment */
1759 		(void) memset(&((struct sockaddr_in6 *)mask->buf)->sin6_addr,
1760 			(uchar_t)~0, sizeof (struct in6_addr));
1761 		/* LINTED pointer alignment */
1762 		((struct sockaddr_in6 *)mask->buf)->sin6_family =
1763 								(ushort_t)~0;
1764 	} else {
1765 
1766 		/*
1767 		 * Set all mask bits.
1768 		 */
1769 		(void) memset(mask->buf, 0xFF, mask->len);
1770 	}
1771 	return (0);
1772 }
1773 
1774 /*
1775  * For listen fd's index is always less than end_listen_fds.
1776  * end_listen_fds is defined externally in the daemon that uses this library.
1777  * It's value is equal to the number of open file descriptors after the
1778  * last listen end point was opened but before any connection was accepted.
1779  */
1780 static int
1781 is_listen_fd_index(int index)
1782 {
1783 	return (index < end_listen_fds);
1784 }
1785