xref: /titanic_44/usr/src/cmd/fs.d/nfs/lib/nfs_tbind.c (revision d4f95bf4d6ec7ef0f01e5ddf22813ac641edf019)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright (c) 1996, 2010, Oracle and/or its affiliates. All rights reserved.
23  */
24 /*
25  * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
26  */
27 
28 
29 /*
30  * nfs_tbind.c, common part for nfsd and lockd.
31  */
32 
33 #include <tiuser.h>
34 #include <fcntl.h>
35 #include <netconfig.h>
36 #include <stropts.h>
37 #include <errno.h>
38 #include <syslog.h>
39 #include <rpc/rpc.h>
40 #include <sys/time.h>
41 #include <sys/resource.h>
42 #include <signal.h>
43 #include <netdir.h>
44 #include <unistd.h>
45 #include <string.h>
46 #include <netinet/tcp.h>
47 #include <malloc.h>
48 #include <stdlib.h>
49 #include "nfs_tbind.h"
50 #include <nfs/nfs.h>
51 #include <nfs/nfs_acl.h>
52 #include <nfs/nfssys.h>
53 #include <nfs/nfs4.h>
54 #include <zone.h>
55 #include <sys/socket.h>
56 #include <tsol/label.h>
57 
58 /*
59  * Determine valid semantics for most applications.
60  */
61 #define	OK_TPI_TYPE(_nconf) \
62 	(_nconf->nc_semantics == NC_TPI_CLTS || \
63 	_nconf->nc_semantics == NC_TPI_COTS || \
64 	_nconf->nc_semantics == NC_TPI_COTS_ORD)
65 
66 #define	BE32_TO_U32(a) \
67 	((((ulong_t)((uchar_t *)a)[0] & 0xFF) << (ulong_t)24) | \
68 	(((ulong_t)((uchar_t *)a)[1] & 0xFF) << (ulong_t)16) | \
69 	(((ulong_t)((uchar_t *)a)[2] & 0xFF) << (ulong_t)8)  | \
70 	((ulong_t)((uchar_t *)a)[3] & 0xFF))
71 
72 /*
73  * Number of elements to add to the poll array on each allocation.
74  */
75 #define	POLL_ARRAY_INC_SIZE	64
76 
77 /*
78  * Number of file descriptors by which the process soft limit may be
79  * increased on each call to nofile_increase(0).
80  */
81 #define	NOFILE_INC_SIZE	64
82 
83 /*
84  * Default TCP send and receive buffer size of NFS server.
85  */
86 #define	NFSD_TCP_BUFSZ	(1024*1024)
87 
88 struct conn_ind {
89 	struct conn_ind *conn_next;
90 	struct conn_ind *conn_prev;
91 	struct t_call   *conn_call;
92 };
93 
94 struct conn_entry {
95 	bool_t			closing;
96 	struct netconfig	nc;
97 };
98 
99 /*
100  * this file contains transport routines common to nfsd and lockd
101  */
102 static	int	nofile_increase(int);
103 static	int	reuseaddr(int);
104 static	int	recvucred(int);
105 static  int	anonmlp(int);
106 static	void	add_to_poll_list(int, struct netconfig *);
107 static	char	*serv_name_to_port_name(char *);
108 static	int	bind_to_proto(char *, char *, struct netbuf **,
109 				struct netconfig **);
110 static	int	bind_to_provider(char *, char *, struct netbuf **,
111 					struct netconfig **);
112 static	void	conn_close_oldest(void);
113 static	boolean_t conn_get(int, struct netconfig *, struct conn_ind **);
114 static	void	cots_listen_event(int, int);
115 static	int	discon_get(int, struct netconfig *, struct conn_ind **);
116 static	int	do_poll_clts_action(int, int);
117 static	int	do_poll_cots_action(int, int);
118 static	void	remove_from_poll_list(int);
119 static	int	set_addrmask(int, struct netconfig *, struct netbuf *);
120 static	int	is_listen_fd_index(int);
121 
122 static	struct pollfd *poll_array;
123 static	struct conn_entry *conn_polled;
124 static	int	num_conns;		/* Current number of connections */
125 int		(*Mysvc4)(int, struct netbuf *, struct netconfig *, int,
126 		struct netbuf *);
127 static int	setopt(int fd, int level, int name, int value);
128 static int	get_opt(int fd, int level, int name);
129 static void	nfslib_set_sockbuf(int fd);
130 
131 /*
132  * Called to create and prepare a transport descriptor for in-kernel
133  * RPC service.
134  * Returns -1 on failure and a valid descriptor on success.
135  */
136 int
137 nfslib_transport_open(struct netconfig *nconf)
138 {
139 	int fd;
140 	struct strioctl	strioc;
141 
142 	if ((nconf == (struct netconfig *)NULL) ||
143 	    (nconf->nc_device == (char *)NULL)) {
144 		syslog(LOG_ERR, "no netconfig device");
145 		return (-1);
146 	}
147 
148 	/*
149 	 * Open the transport device.
150 	 */
151 	fd = t_open(nconf->nc_device, O_RDWR, (struct t_info *)NULL);
152 	if (fd == -1) {
153 		if (t_errno == TSYSERR && errno == EMFILE &&
154 		    (nofile_increase(0) == 0)) {
155 			/* Try again with a higher NOFILE limit. */
156 			fd = t_open(nconf->nc_device, O_RDWR,
157 			    (struct t_info *)NULL);
158 		}
159 		if (fd == -1) {
160 			syslog(LOG_ERR, "t_open %s failed:  t_errno %d, %m",
161 			    nconf->nc_device, t_errno);
162 			return (-1);
163 		}
164 	}
165 
166 	/*
167 	 * Pop timod because the RPC module must be as close as possible
168 	 * to the transport.
169 	 */
170 	if (ioctl(fd, I_POP, 0) < 0) {
171 		syslog(LOG_ERR, "I_POP of timod failed: %m");
172 		(void) t_close(fd);
173 		return (-1);
174 	}
175 
176 	/*
177 	 * Common code for CLTS and COTS transports
178 	 */
179 	if (ioctl(fd, I_PUSH, "rpcmod") < 0) {
180 		syslog(LOG_ERR, "I_PUSH of rpcmod failed: %m");
181 		(void) t_close(fd);
182 		return (-1);
183 	}
184 
185 	strioc.ic_cmd = RPC_SERVER;
186 	strioc.ic_dp = (char *)0;
187 	strioc.ic_len = 0;
188 	strioc.ic_timout = -1;
189 
190 	/* Tell rpcmod to act like a server stream. */
191 	if (ioctl(fd, I_STR, &strioc) < 0) {
192 		syslog(LOG_ERR, "rpcmod set-up ioctl failed: %m");
193 		(void) t_close(fd);
194 		return (-1);
195 	}
196 
197 	/*
198 	 * Re-push timod so that we will still be doing TLI
199 	 * operations on the descriptor.
200 	 */
201 	if (ioctl(fd, I_PUSH, "timod") < 0) {
202 		syslog(LOG_ERR, "I_PUSH of timod failed: %m");
203 		(void) t_close(fd);
204 		return (-1);
205 	}
206 
207 	/*
208 	 * Enable options of returning the ip's for udp.
209 	 */
210 	if (strcmp(nconf->nc_netid, "udp6") == 0)
211 		__rpc_tli_set_options(fd, IPPROTO_IPV6, IPV6_RECVPKTINFO, 1);
212 	else if (strcmp(nconf->nc_netid, "udp") == 0)
213 		__rpc_tli_set_options(fd, IPPROTO_IP, IP_RECVDSTADDR, 1);
214 
215 	return (fd);
216 }
217 
218 static int
219 nofile_increase(int limit)
220 {
221 	struct rlimit rl;
222 
223 	if (getrlimit(RLIMIT_NOFILE, &rl) == -1) {
224 		syslog(LOG_ERR, "getrlimit of NOFILE failed: %m");
225 		return (-1);
226 	}
227 
228 	if (limit > 0)
229 		rl.rlim_cur = limit;
230 	else
231 		rl.rlim_cur += NOFILE_INC_SIZE;
232 
233 	if (rl.rlim_cur > rl.rlim_max &&
234 	    rl.rlim_max != RLIM_INFINITY)
235 		rl.rlim_max = rl.rlim_cur;
236 
237 	if (setrlimit(RLIMIT_NOFILE, &rl) == -1) {
238 		syslog(LOG_ERR, "setrlimit of NOFILE to %d failed: %m",
239 		    rl.rlim_cur);
240 		return (-1);
241 	}
242 
243 	return (0);
244 }
245 
246 static void
247 nfslib_set_sockbuf(int fd)
248 {
249 	int curval, val;
250 
251 	val = NFSD_TCP_BUFSZ;
252 
253 	curval = get_opt(fd, SOL_SOCKET, SO_SNDBUF);
254 	syslog(LOG_DEBUG, "Current SO_SNDBUF value is %d", curval);
255 	if ((curval != -1) && (curval < val)) {
256 		syslog(LOG_DEBUG, "Set SO_SNDBUF  option to %d", val);
257 		if (setopt(fd, SOL_SOCKET, SO_SNDBUF, val) < 0) {
258 			syslog(LOG_ERR,
259 			    "couldn't set SO_SNDBUF to %d - t_errno = %d",
260 			    val, t_errno);
261 			syslog(LOG_ERR,
262 			    "Check and increase system-wide tcp_max_buf");
263 		}
264 	}
265 
266 	curval = get_opt(fd, SOL_SOCKET, SO_RCVBUF);
267 	syslog(LOG_DEBUG, "Current SO_RCVBUF value is %d", curval);
268 	if ((curval != -1) && (curval < val)) {
269 		syslog(LOG_DEBUG, "Set SO_RCVBUF  option to %d", val);
270 		if (setopt(fd, SOL_SOCKET, SO_RCVBUF, val) < 0) {
271 			syslog(LOG_ERR,
272 			    "couldn't set SO_RCVBUF to %d - t_errno = %d",
273 			    val, t_errno);
274 			syslog(LOG_ERR,
275 			    "Check and increase system-wide tcp_max_buf");
276 		}
277 	}
278 }
279 
280 int
281 nfslib_bindit(struct netconfig *nconf, struct netbuf **addr,
282 	struct nd_hostserv *hs, int backlog)
283 {
284 	int fd;
285 	struct t_bind  *ntb;
286 	struct t_bind tb;
287 	struct nd_addrlist *addrlist;
288 	struct t_optmgmt req, resp;
289 	struct opthdr *opt;
290 	char reqbuf[128];
291 	bool_t use_any = FALSE;
292 	bool_t gzone = TRUE;
293 
294 	if ((fd = nfslib_transport_open(nconf)) == -1) {
295 		syslog(LOG_ERR, "cannot establish transport service over %s",
296 		    nconf->nc_device);
297 		return (-1);
298 	}
299 
300 	addrlist = (struct nd_addrlist *)NULL;
301 
302 	/* nfs4_callback service does not used a fieed port number */
303 
304 	if (strcmp(hs->h_serv, "nfs4_callback") == 0) {
305 		tb.addr.maxlen = 0;
306 		tb.addr.len = 0;
307 		tb.addr.buf = 0;
308 		use_any = TRUE;
309 		gzone = (getzoneid() == GLOBAL_ZONEID);
310 	} else if (netdir_getbyname(nconf, hs, &addrlist) != 0) {
311 
312 		syslog(LOG_ERR,
313 		"Cannot get address for transport %s host %s service %s",
314 		    nconf->nc_netid, hs->h_host, hs->h_serv);
315 		(void) t_close(fd);
316 		return (-1);
317 	}
318 
319 	if (strcmp(nconf->nc_proto, "tcp") == 0) {
320 		/*
321 		 * If we're running over TCP, then set the
322 		 * SO_REUSEADDR option so that we can bind
323 		 * to our preferred address even if previously
324 		 * left connections exist in FIN_WAIT states.
325 		 * This is somewhat bogus, but otherwise you have
326 		 * to wait 2 minutes to restart after killing it.
327 		 */
328 		if (reuseaddr(fd) == -1) {
329 			syslog(LOG_WARNING,
330 			"couldn't set SO_REUSEADDR option on transport");
331 		}
332 	} else if (strcmp(nconf->nc_proto, "udp") == 0) {
333 		/*
334 		 * In order to run MLP on UDP, we need to handle creds.
335 		 */
336 		if (recvucred(fd) == -1) {
337 			syslog(LOG_WARNING,
338 			    "couldn't set SO_RECVUCRED option on transport");
339 		}
340 	}
341 
342 	/*
343 	 * Make non global zone nfs4_callback port MLP
344 	 */
345 	if (use_any && is_system_labeled() && !gzone) {
346 		if (anonmlp(fd) == -1) {
347 			/*
348 			 * failing to set this option means nfs4_callback
349 			 * could fail silently later. So fail it with
350 			 * with an error message now.
351 			 */
352 			syslog(LOG_ERR,
353 			    "couldn't set SO_ANON_MLP option on transport");
354 			(void) t_close(fd);
355 			return (-1);
356 		}
357 	}
358 
359 	if (nconf->nc_semantics == NC_TPI_CLTS)
360 		tb.qlen = 0;
361 	else
362 		tb.qlen = backlog;
363 
364 	/* LINTED pointer alignment */
365 	ntb = (struct t_bind *)t_alloc(fd, T_BIND, T_ALL);
366 	if (ntb == (struct t_bind *)NULL) {
367 		syslog(LOG_ERR, "t_alloc failed:  t_errno %d, %m", t_errno);
368 		(void) t_close(fd);
369 		netdir_free((void *)addrlist, ND_ADDRLIST);
370 		return (-1);
371 	}
372 
373 	/*
374 	 * XXX - what about the space tb->addr.buf points to? This should
375 	 * be either a memcpy() to/from the buf fields, or t_alloc(fd,T_BIND,)
376 	 * should't be called with T_ALL.
377 	 */
378 	if (addrlist)
379 		tb.addr = *(addrlist->n_addrs);		/* structure copy */
380 
381 	if (t_bind(fd, &tb, ntb) == -1) {
382 		syslog(LOG_ERR, "t_bind failed:  t_errno %d, %m", t_errno);
383 		(void) t_free((char *)ntb, T_BIND);
384 		netdir_free((void *)addrlist, ND_ADDRLIST);
385 		(void) t_close(fd);
386 		return (-1);
387 	}
388 
389 	/* make sure we bound to the right address */
390 	if (use_any == FALSE &&
391 	    (tb.addr.len != ntb->addr.len ||
392 	    memcmp(tb.addr.buf, ntb->addr.buf, tb.addr.len) != 0)) {
393 		syslog(LOG_ERR, "t_bind to wrong address");
394 		(void) t_free((char *)ntb, T_BIND);
395 		netdir_free((void *)addrlist, ND_ADDRLIST);
396 		(void) t_close(fd);
397 		return (-1);
398 	}
399 
400 	/*
401 	 * Call nfs4svc_setport so that the kernel can be
402 	 * informed what port number the daemon is listing
403 	 * for incoming connection requests.
404 	 */
405 
406 	if ((nconf->nc_semantics == NC_TPI_COTS ||
407 	    nconf->nc_semantics == NC_TPI_COTS_ORD) && Mysvc4 != NULL)
408 		(*Mysvc4)(fd, NULL, nconf, NFS4_SETPORT, &ntb->addr);
409 
410 	*addr = &ntb->addr;
411 	netdir_free((void *)addrlist, ND_ADDRLIST);
412 
413 	if (strcmp(nconf->nc_proto, "tcp") == 0) {
414 		/*
415 		 * Disable the Nagle algorithm on TCP connections.
416 		 * Connections accepted from this listener will
417 		 * inherit the listener options.
418 		 */
419 
420 		/* LINTED pointer alignment */
421 		opt = (struct opthdr *)reqbuf;
422 		opt->level = IPPROTO_TCP;
423 		opt->name = TCP_NODELAY;
424 		opt->len = sizeof (int);
425 
426 		/* LINTED pointer alignment */
427 		*(int *)((char *)opt + sizeof (*opt)) = 1;
428 
429 		req.flags = T_NEGOTIATE;
430 		req.opt.len = sizeof (*opt) + opt->len;
431 		req.opt.buf = (char *)opt;
432 		resp.flags = 0;
433 		resp.opt.buf = reqbuf;
434 		resp.opt.maxlen = sizeof (reqbuf);
435 
436 		if (t_optmgmt(fd, &req, &resp) < 0 ||
437 		    resp.flags != T_SUCCESS) {
438 			syslog(LOG_ERR,
439 	"couldn't set NODELAY option for proto %s: t_errno = %d, %m",
440 			    nconf->nc_proto, t_errno);
441 		}
442 
443 		nfslib_set_sockbuf(fd);
444 	}
445 
446 	return (fd);
447 }
448 
449 static int
450 get_opt(int fd, int level, int name)
451 {
452 	struct t_optmgmt req, res;
453 	struct {
454 		struct opthdr opt;
455 		int value;
456 	} reqbuf;
457 
458 	reqbuf.opt.level = level;
459 	reqbuf.opt.name = name;
460 	reqbuf.opt.len = sizeof (int);
461 	reqbuf.value = 0;
462 
463 	req.flags = T_CURRENT;
464 	req.opt.len = sizeof (reqbuf);
465 	req.opt.buf = (char *)&reqbuf;
466 
467 	res.flags = 0;
468 	res.opt.buf = (char *)&reqbuf;
469 	res.opt.maxlen = sizeof (reqbuf);
470 
471 	if (t_optmgmt(fd, &req, &res) < 0 || res.flags != T_SUCCESS) {
472 		t_error("t_optmgmt");
473 		return (-1);
474 	}
475 	return (reqbuf.value);
476 }
477 
478 static int
479 setopt(int fd, int level, int name, int value)
480 {
481 	struct t_optmgmt req, resp;
482 	struct {
483 		struct opthdr opt;
484 		int value;
485 	} reqbuf;
486 
487 	reqbuf.opt.level = level;
488 	reqbuf.opt.name = name;
489 	reqbuf.opt.len = sizeof (int);
490 
491 	reqbuf.value = value;
492 
493 	req.flags = T_NEGOTIATE;
494 	req.opt.len = sizeof (reqbuf);
495 	req.opt.buf = (char *)&reqbuf;
496 
497 	resp.flags = 0;
498 	resp.opt.buf = (char *)&reqbuf;
499 	resp.opt.maxlen = sizeof (reqbuf);
500 
501 	if (t_optmgmt(fd, &req, &resp) < 0 || resp.flags != T_SUCCESS) {
502 		t_error("t_optmgmt");
503 		return (-1);
504 	}
505 	return (0);
506 }
507 
508 static int
509 reuseaddr(int fd)
510 {
511 	return (setopt(fd, SOL_SOCKET, SO_REUSEADDR, 1));
512 }
513 
514 static int
515 recvucred(int fd)
516 {
517 	return (setopt(fd, SOL_SOCKET, SO_RECVUCRED, 1));
518 }
519 
520 static int
521 anonmlp(int fd)
522 {
523 	return (setopt(fd, SOL_SOCKET, SO_ANON_MLP, 1));
524 }
525 
526 void
527 nfslib_log_tli_error(char *tli_name, int fd, struct netconfig *nconf)
528 {
529 	int error;
530 
531 	/*
532 	 * Save the error code across syslog(), just in case syslog()
533 	 * gets its own error and, therefore, overwrites errno.
534 	 */
535 	error = errno;
536 	if (t_errno == TSYSERR) {
537 		syslog(LOG_ERR, "%s(file descriptor %d/transport %s) %m",
538 		    tli_name, fd, nconf->nc_proto);
539 	} else {
540 		syslog(LOG_ERR,
541 		    "%s(file descriptor %d/transport %s) TLI error %d",
542 		    tli_name, fd, nconf->nc_proto, t_errno);
543 	}
544 	errno = error;
545 }
546 
547 /*
548  * Called to set up service over a particular transport.
549  */
550 void
551 do_one(char *provider, NETSELDECL(proto), struct protob *protobp0,
552 	int (*svc)(int, struct netbuf, struct netconfig *))
553 {
554 	register int sock;
555 	struct protob *protobp;
556 	struct netbuf *retaddr;
557 	struct netconfig *retnconf;
558 	struct netbuf addrmask;
559 	int vers;
560 	int err;
561 	int l;
562 
563 	if (provider)
564 		sock = bind_to_provider(provider, protobp0->serv, &retaddr,
565 		    &retnconf);
566 	else
567 		sock = bind_to_proto(proto, protobp0->serv, &retaddr,
568 		    &retnconf);
569 
570 	if (sock == -1) {
571 		(void) syslog(LOG_ERR,
572 	"Cannot establish %s service over %s: transport setup problem.",
573 		    protobp0->serv, provider ? provider : proto);
574 		return;
575 	}
576 
577 	if (set_addrmask(sock, retnconf, &addrmask) < 0) {
578 		(void) syslog(LOG_ERR,
579 		    "Cannot set address mask for %s", retnconf->nc_netid);
580 		return;
581 	}
582 
583 	/*
584 	 * Register all versions of the programs in the protocol block list.
585 	 */
586 	l = strlen(NC_UDP);
587 	for (protobp = protobp0; protobp; protobp = protobp->next) {
588 		for (vers = protobp->versmin; vers <= protobp->versmax;
589 		    vers++) {
590 			if ((protobp->program == NFS_PROGRAM ||
591 			    protobp->program == NFS_ACL_PROGRAM) &&
592 			    vers == NFS_V4 &&
593 			    strncasecmp(retnconf->nc_proto, NC_UDP, l) == 0)
594 				continue;
595 
596 			(void) rpcb_unset(protobp->program, vers, retnconf);
597 			(void) rpcb_set(protobp->program, vers, retnconf,
598 			    retaddr);
599 		}
600 	}
601 
602 	/*
603 	 * Register services with CLTS semantics right now.
604 	 * Note: services with COTS/COTS_ORD semantics will be
605 	 * registered later from cots_listen_event function.
606 	 */
607 	if (retnconf->nc_semantics == NC_TPI_CLTS) {
608 		/* Don't drop core if supporting module(s) aren't loaded. */
609 		(void) signal(SIGSYS, SIG_IGN);
610 
611 		/*
612 		 * svc() doesn't block, it returns success or failure.
613 		 */
614 
615 		if (svc == NULL && Mysvc4 != NULL)
616 			err = (*Mysvc4)(sock, &addrmask, retnconf,
617 			    NFS4_SETPORT|NFS4_KRPC_START, retaddr);
618 		else
619 			err = (*svc)(sock, addrmask, retnconf);
620 
621 		if (err < 0) {
622 			(void) syslog(LOG_ERR,
623 			    "Cannot establish %s service over <file desc."
624 			    " %d, protocol %s> : %m. Exiting",
625 			    protobp0->serv, sock, retnconf->nc_proto);
626 			exit(1);
627 		}
628 	}
629 	free(addrmask.buf);
630 
631 	/*
632 	 * We successfully set up the server over this transport.
633 	 * Add this descriptor to the one being polled on.
634 	 */
635 	add_to_poll_list(sock, retnconf);
636 }
637 
638 /*
639  * Set up the NFS service over all the available transports.
640  * Returns -1 for failure, 0 for success.
641  */
642 int
643 do_all(struct protob *protobp,
644 	int (*svc)(int, struct netbuf, struct netconfig *))
645 {
646 	struct netconfig *nconf;
647 	NCONF_HANDLE *nc;
648 	int l;
649 
650 	if ((nc = setnetconfig()) == (NCONF_HANDLE *)NULL) {
651 		syslog(LOG_ERR, "setnetconfig failed: %m");
652 		return (-1);
653 	}
654 	l = strlen(NC_UDP);
655 	while (nconf = getnetconfig(nc)) {
656 		if ((nconf->nc_flag & NC_VISIBLE) &&
657 		    strcmp(nconf->nc_protofmly, NC_LOOPBACK) != 0 &&
658 		    OK_TPI_TYPE(nconf) &&
659 		    (protobp->program != NFS4_CALLBACK ||
660 		    strncasecmp(nconf->nc_proto, NC_UDP, l) != 0))
661 			do_one(nconf->nc_device, nconf->nc_proto,
662 			    protobp, svc);
663 	}
664 	(void) endnetconfig(nc);
665 	return (0);
666 }
667 
668 /*
669  * poll on the open transport descriptors for events and errors.
670  */
671 void
672 poll_for_action(void)
673 {
674 	int nfds;
675 	int i;
676 
677 	/*
678 	 * Keep polling until all transports have been closed. When this
679 	 * happens, we return.
680 	 */
681 	while ((int)num_fds > 0) {
682 		nfds = poll(poll_array, num_fds, INFTIM);
683 		switch (nfds) {
684 		case 0:
685 			continue;
686 
687 		case -1:
688 			/*
689 			 * Some errors from poll could be
690 			 * due to temporary conditions, and we try to
691 			 * be robust in the face of them. Other
692 			 * errors (should never happen in theory)
693 			 * are fatal (eg. EINVAL, EFAULT).
694 			 */
695 			switch (errno) {
696 			case EINTR:
697 				continue;
698 
699 			case EAGAIN:
700 			case ENOMEM:
701 				(void) sleep(10);
702 				continue;
703 
704 			default:
705 				(void) syslog(LOG_ERR,
706 				    "poll failed: %m. Exiting");
707 				exit(1);
708 			}
709 		default:
710 			break;
711 		}
712 
713 		/*
714 		 * Go through the poll list looking for events.
715 		 */
716 		for (i = 0; i < num_fds && nfds > 0; i++) {
717 			if (poll_array[i].revents) {
718 				nfds--;
719 				/*
720 				 * We have a message, so try to read it.
721 				 * Record the error return in errno,
722 				 * so that syslog(LOG_ERR, "...%m")
723 				 * dumps the corresponding error string.
724 				 */
725 				if (conn_polled[i].nc.nc_semantics ==
726 				    NC_TPI_CLTS) {
727 					errno = do_poll_clts_action(
728 					    poll_array[i].fd, i);
729 				} else {
730 					errno = do_poll_cots_action(
731 					    poll_array[i].fd, i);
732 				}
733 
734 				if (errno == 0)
735 					continue;
736 				/*
737 				 * Most returned error codes mean that there is
738 				 * fatal condition which we can only deal with
739 				 * by closing the transport.
740 				 */
741 				if (errno != EAGAIN && errno != ENOMEM) {
742 					(void) syslog(LOG_ERR,
743 		"Error (%m) reading descriptor %d/transport %s. Closing it.",
744 					    poll_array[i].fd,
745 					    conn_polled[i].nc.nc_proto);
746 					(void) t_close(poll_array[i].fd);
747 					remove_from_poll_list(poll_array[i].fd);
748 
749 				} else if (errno == ENOMEM)
750 					(void) sleep(5);
751 			}
752 		}
753 	}
754 
755 	(void) syslog(LOG_ERR,
756 	    "All transports have been closed with errors. Exiting.");
757 }
758 
759 /*
760  * Allocate poll/transport array entries for this descriptor.
761  */
762 static void
763 add_to_poll_list(int fd, struct netconfig *nconf)
764 {
765 	static int poll_array_size = 0;
766 
767 	/*
768 	 * If the arrays are full, allocate new ones.
769 	 */
770 	if (num_fds == poll_array_size) {
771 		struct pollfd *tpa;
772 		struct conn_entry *tnp;
773 
774 		if (poll_array_size != 0) {
775 			tpa = poll_array;
776 			tnp = conn_polled;
777 		} else
778 			tpa = (struct pollfd *)0;
779 
780 		poll_array_size += POLL_ARRAY_INC_SIZE;
781 		/*
782 		 * Allocate new arrays.
783 		 */
784 		poll_array = (struct pollfd *)
785 		    malloc(poll_array_size * sizeof (struct pollfd) + 256);
786 		conn_polled = (struct conn_entry *)
787 		    malloc(poll_array_size * sizeof (struct conn_entry) + 256);
788 		if (poll_array == (struct pollfd *)NULL ||
789 		    conn_polled == (struct conn_entry *)NULL) {
790 			syslog(LOG_ERR, "malloc failed for poll array");
791 			exit(1);
792 		}
793 
794 		/*
795 		 * Copy the data of the old ones into new arrays, and
796 		 * free the old ones.
797 		 */
798 		if (tpa) {
799 			(void) memcpy((void *)poll_array, (void *)tpa,
800 			    num_fds * sizeof (struct pollfd));
801 			(void) memcpy((void *)conn_polled, (void *)tnp,
802 			    num_fds * sizeof (struct conn_entry));
803 			free((void *)tpa);
804 			free((void *)tnp);
805 		}
806 	}
807 
808 	/*
809 	 * Set the descriptor and event list. All possible events are
810 	 * polled for.
811 	 */
812 	poll_array[num_fds].fd = fd;
813 	poll_array[num_fds].events = POLLIN|POLLRDNORM|POLLRDBAND|POLLPRI;
814 
815 	/*
816 	 * Copy the transport data over too.
817 	 */
818 	conn_polled[num_fds].nc = *nconf;
819 	conn_polled[num_fds].closing = 0;
820 
821 	/*
822 	 * Set the descriptor to non-blocking. Avoids a race
823 	 * between data arriving on the stream and then having it
824 	 * flushed before we can read it.
825 	 */
826 	if (fcntl(fd, F_SETFL, O_NONBLOCK) == -1) {
827 		(void) syslog(LOG_ERR,
828 	"fcntl(file desc. %d/transport %s, F_SETFL, O_NONBLOCK): %m. Exiting",
829 		    num_fds, nconf->nc_proto);
830 		exit(1);
831 	}
832 
833 	/*
834 	 * Count this descriptor.
835 	 */
836 	++num_fds;
837 }
838 
839 static void
840 remove_from_poll_list(int fd)
841 {
842 	int i;
843 	int num_to_copy;
844 
845 	for (i = 0; i < num_fds; i++) {
846 		if (poll_array[i].fd == fd) {
847 			--num_fds;
848 			num_to_copy = num_fds - i;
849 			(void) memcpy((void *)&poll_array[i],
850 			    (void *)&poll_array[i+1],
851 			    num_to_copy * sizeof (struct pollfd));
852 			(void) memset((void *)&poll_array[num_fds], 0,
853 			    sizeof (struct pollfd));
854 			(void) memcpy((void *)&conn_polled[i],
855 			    (void *)&conn_polled[i+1],
856 			    num_to_copy * sizeof (struct conn_entry));
857 			(void) memset((void *)&conn_polled[num_fds], 0,
858 			    sizeof (struct conn_entry));
859 			return;
860 		}
861 	}
862 	syslog(LOG_ERR, "attempt to remove nonexistent fd from poll list");
863 
864 }
865 
866 /*
867  * Called to read and interpret the event on a connectionless descriptor.
868  * Returns 0 if successful, or a UNIX error code if failure.
869  */
870 static int
871 do_poll_clts_action(int fd, int conn_index)
872 {
873 	int error;
874 	int ret;
875 	int flags;
876 	struct netconfig *nconf = &conn_polled[conn_index].nc;
877 	static struct t_unitdata *unitdata = NULL;
878 	static struct t_uderr *uderr = NULL;
879 	static int oldfd = -1;
880 	struct nd_hostservlist *host = NULL;
881 	struct strbuf ctl[1], data[1];
882 	/*
883 	 * We just need to have some space to consume the
884 	 * message in the event we can't use the TLI interface to do the
885 	 * job.
886 	 *
887 	 * We flush the message using getmsg(). For the control part
888 	 * we allocate enough for any TPI header plus 32 bytes for address
889 	 * and options. For the data part, there is nothing magic about
890 	 * the size of the array, but 256 bytes is probably better than
891 	 * 1 byte, and we don't expect any data portion anyway.
892 	 *
893 	 * If the array sizes are too small, we handle this because getmsg()
894 	 * (called to consume the message) will return MOREDATA|MORECTL.
895 	 * Thus we just call getmsg() until it's read the message.
896 	 */
897 	char ctlbuf[sizeof (union T_primitives) + 32];
898 	char databuf[256];
899 
900 	/*
901 	 * If this is the same descriptor as the last time
902 	 * do_poll_clts_action was called, we can save some
903 	 * de-allocation and allocation.
904 	 */
905 	if (oldfd != fd) {
906 		oldfd = fd;
907 
908 		if (unitdata) {
909 			(void) t_free((char *)unitdata, T_UNITDATA);
910 			unitdata = NULL;
911 		}
912 		if (uderr) {
913 			(void) t_free((char *)uderr, T_UDERROR);
914 			uderr = NULL;
915 		}
916 	}
917 
918 	/*
919 	 * Allocate a unitdata structure for receiving the event.
920 	 */
921 	if (unitdata == NULL) {
922 		/* LINTED pointer alignment */
923 		unitdata = (struct t_unitdata *)t_alloc(fd, T_UNITDATA, T_ALL);
924 		if (unitdata == NULL) {
925 			if (t_errno == TSYSERR) {
926 				/*
927 				 * Save the error code across
928 				 * syslog(), just in case
929 				 * syslog() gets its own error
930 				 * and therefore overwrites errno.
931 				 */
932 				error = errno;
933 				(void) syslog(LOG_ERR,
934 	"t_alloc(file descriptor %d/transport %s, T_UNITDATA) failed: %m",
935 				    fd, nconf->nc_proto);
936 				return (error);
937 			}
938 			(void) syslog(LOG_ERR,
939 "t_alloc(file descriptor %d/transport %s, T_UNITDATA) failed TLI error %d",
940 			    fd, nconf->nc_proto, t_errno);
941 			goto flush_it;
942 		}
943 	}
944 
945 try_again:
946 	flags = 0;
947 
948 	/*
949 	 * The idea is we wait for T_UNITDATA_IND's. Of course,
950 	 * we don't get any, because rpcmod filters them out.
951 	 * However, we need to call t_rcvudata() to let TLI
952 	 * tell us we have a T_UDERROR_IND.
953 	 *
954 	 * algorithm is:
955 	 * 	t_rcvudata(), expecting TLOOK.
956 	 * 	t_look(), expecting T_UDERR.
957 	 * 	t_rcvuderr(), expecting success (0).
958 	 * 	expand destination address into ASCII,
959 	 *	and dump it.
960 	 */
961 
962 	ret = t_rcvudata(fd, unitdata, &flags);
963 	if (ret == 0 || t_errno == TBUFOVFLW) {
964 		(void) syslog(LOG_WARNING,
965 "t_rcvudata(file descriptor %d/transport %s) got unexpected data, %d bytes",
966 		    fd, nconf->nc_proto, unitdata->udata.len);
967 
968 		/*
969 		 * Even though we don't expect any data, in case we do,
970 		 * keep reading until there is no more.
971 		 */
972 		if (flags & T_MORE)
973 			goto try_again;
974 
975 		return (0);
976 	}
977 
978 	switch (t_errno) {
979 	case TNODATA:
980 		return (0);
981 	case TSYSERR:
982 		/*
983 		 * System errors are returned to caller.
984 		 * Save the error code across
985 		 * syslog(), just in case
986 		 * syslog() gets its own error
987 		 * and therefore overwrites errno.
988 		 */
989 		error = errno;
990 		(void) syslog(LOG_ERR,
991 		    "t_rcvudata(file descriptor %d/transport %s) %m",
992 		    fd, nconf->nc_proto);
993 		return (error);
994 	case TLOOK:
995 		break;
996 	default:
997 		(void) syslog(LOG_ERR,
998 		"t_rcvudata(file descriptor %d/transport %s) TLI error %d",
999 		    fd, nconf->nc_proto, t_errno);
1000 		goto flush_it;
1001 	}
1002 
1003 	ret = t_look(fd);
1004 	switch (ret) {
1005 	case 0:
1006 		return (0);
1007 	case -1:
1008 		/*
1009 		 * System errors are returned to caller.
1010 		 */
1011 		if (t_errno == TSYSERR) {
1012 			/*
1013 			 * Save the error code across
1014 			 * syslog(), just in case
1015 			 * syslog() gets its own error
1016 			 * and therefore overwrites errno.
1017 			 */
1018 			error = errno;
1019 			(void) syslog(LOG_ERR,
1020 			    "t_look(file descriptor %d/transport %s) %m",
1021 			    fd, nconf->nc_proto);
1022 			return (error);
1023 		}
1024 		(void) syslog(LOG_ERR,
1025 		    "t_look(file descriptor %d/transport %s) TLI error %d",
1026 		    fd, nconf->nc_proto, t_errno);
1027 		goto flush_it;
1028 	case T_UDERR:
1029 		break;
1030 	default:
1031 		(void) syslog(LOG_WARNING,
1032 	"t_look(file descriptor %d/transport %s) returned %d not T_UDERR (%d)",
1033 		    fd, nconf->nc_proto, ret, T_UDERR);
1034 	}
1035 
1036 	if (uderr == NULL) {
1037 		/* LINTED pointer alignment */
1038 		uderr = (struct t_uderr *)t_alloc(fd, T_UDERROR, T_ALL);
1039 		if (uderr == NULL) {
1040 			if (t_errno == TSYSERR) {
1041 				/*
1042 				 * Save the error code across
1043 				 * syslog(), just in case
1044 				 * syslog() gets its own error
1045 				 * and therefore overwrites errno.
1046 				 */
1047 				error = errno;
1048 				(void) syslog(LOG_ERR,
1049 	"t_alloc(file descriptor %d/transport %s, T_UDERROR) failed: %m",
1050 				    fd, nconf->nc_proto);
1051 				return (error);
1052 			}
1053 			(void) syslog(LOG_ERR,
1054 "t_alloc(file descriptor %d/transport %s, T_UDERROR) failed TLI error: %d",
1055 			    fd, nconf->nc_proto, t_errno);
1056 			goto flush_it;
1057 		}
1058 	}
1059 
1060 	ret = t_rcvuderr(fd, uderr);
1061 	if (ret == 0) {
1062 
1063 		/*
1064 		 * Save the datagram error in errno, so that the
1065 		 * %m argument to syslog picks up the error string.
1066 		 */
1067 		errno = uderr->error;
1068 
1069 		/*
1070 		 * Log the datagram error, then log the host that
1071 		 * probably triggerred. Cannot log both in the
1072 		 * same transaction because of packet size limitations
1073 		 * in /dev/log.
1074 		 */
1075 		(void) syslog((errno == ECONNREFUSED) ? LOG_DEBUG : LOG_WARNING,
1076 "NFS response over <file descriptor %d/transport %s> generated error: %m",
1077 		    fd, nconf->nc_proto);
1078 
1079 		/*
1080 		 * Try to map the client's address back to a
1081 		 * name.
1082 		 */
1083 		ret = netdir_getbyaddr(nconf, &host, &uderr->addr);
1084 		if (ret != -1 && host && host->h_cnt > 0 &&
1085 		    host->h_hostservs) {
1086 		(void) syslog((errno == ECONNREFUSED) ? LOG_DEBUG : LOG_WARNING,
1087 "Bad NFS response was sent to client with host name: %s; service port: %s",
1088 		    host->h_hostservs->h_host,
1089 		    host->h_hostservs->h_serv);
1090 		} else {
1091 			int i, j;
1092 			char *buf;
1093 			char *hex = "0123456789abcdef";
1094 
1095 			/*
1096 			 * Mapping failed, print the whole thing
1097 			 * in ASCII hex.
1098 			 */
1099 			buf = (char *)malloc(uderr->addr.len * 2 + 1);
1100 			for (i = 0, j = 0; i < uderr->addr.len; i++, j += 2) {
1101 				buf[j] = hex[((uderr->addr.buf[i]) >> 4) & 0xf];
1102 				buf[j+1] = hex[uderr->addr.buf[i] & 0xf];
1103 			}
1104 			buf[j] = '\0';
1105 		(void) syslog((errno == ECONNREFUSED) ? LOG_DEBUG : LOG_WARNING,
1106 	"Bad NFS response was sent to client with transport address: 0x%s",
1107 		    buf);
1108 			free((void *)buf);
1109 		}
1110 
1111 		if (ret == 0 && host != NULL)
1112 			netdir_free((void *)host, ND_HOSTSERVLIST);
1113 		return (0);
1114 	}
1115 
1116 	switch (t_errno) {
1117 	case TNOUDERR:
1118 		goto flush_it;
1119 	case TSYSERR:
1120 		/*
1121 		 * System errors are returned to caller.
1122 		 * Save the error code across
1123 		 * syslog(), just in case
1124 		 * syslog() gets its own error
1125 		 * and therefore overwrites errno.
1126 		 */
1127 		error = errno;
1128 		(void) syslog(LOG_ERR,
1129 		    "t_rcvuderr(file descriptor %d/transport %s) %m",
1130 		    fd, nconf->nc_proto);
1131 		return (error);
1132 	default:
1133 		(void) syslog(LOG_ERR,
1134 		"t_rcvuderr(file descriptor %d/transport %s) TLI error %d",
1135 		    fd, nconf->nc_proto, t_errno);
1136 		goto flush_it;
1137 	}
1138 
1139 flush_it:
1140 	/*
1141 	 * If we get here, then we could not cope with whatever message
1142 	 * we attempted to read, so flush it. If we did read a message,
1143 	 * and one isn't present, that is all right, because fd is in
1144 	 * nonblocking mode.
1145 	 */
1146 	(void) syslog(LOG_ERR,
1147 	"Flushing one input message from <file descriptor %d/transport %s>",
1148 	    fd, nconf->nc_proto);
1149 
1150 	/*
1151 	 * Read and discard the message. Do this this until there is
1152 	 * no more control/data in the message or until we get an error.
1153 	 */
1154 	do {
1155 		ctl->maxlen = sizeof (ctlbuf);
1156 		ctl->buf = ctlbuf;
1157 		data->maxlen = sizeof (databuf);
1158 		data->buf = databuf;
1159 		flags = 0;
1160 		ret = getmsg(fd, ctl, data, &flags);
1161 		if (ret == -1)
1162 			return (errno);
1163 	} while (ret != 0);
1164 
1165 	return (0);
1166 }
1167 
1168 static void
1169 conn_close_oldest(void)
1170 {
1171 	int fd;
1172 	int i1;
1173 
1174 	/*
1175 	 * Find the oldest connection that is not already in the
1176 	 * process of shutting down.
1177 	 */
1178 	for (i1 = end_listen_fds; /* no conditional expression */; i1++) {
1179 		if (i1 >= num_fds)
1180 			return;
1181 		if (conn_polled[i1].closing == 0)
1182 			break;
1183 	}
1184 #ifdef DEBUG
1185 	printf("too many connections (%d), releasing oldest (%d)\n",
1186 	    num_conns, poll_array[i1].fd);
1187 #else
1188 	syslog(LOG_WARNING, "too many connections (%d), releasing oldest (%d)",
1189 	    num_conns, poll_array[i1].fd);
1190 #endif
1191 	fd = poll_array[i1].fd;
1192 	if (conn_polled[i1].nc.nc_semantics == NC_TPI_COTS) {
1193 		/*
1194 		 * For politeness, send a T_DISCON_REQ to the transport
1195 		 * provider.  We close the stream anyway.
1196 		 */
1197 		(void) t_snddis(fd, (struct t_call *)0);
1198 		num_conns--;
1199 		remove_from_poll_list(fd);
1200 		(void) t_close(fd);
1201 	} else {
1202 		/*
1203 		 * For orderly release, we do not close the stream
1204 		 * until the T_ORDREL_IND arrives to complete
1205 		 * the handshake.
1206 		 */
1207 		if (t_sndrel(fd) == 0)
1208 			conn_polled[i1].closing = 1;
1209 	}
1210 }
1211 
1212 static boolean_t
1213 conn_get(int fd, struct netconfig *nconf, struct conn_ind **connp)
1214 {
1215 	struct conn_ind	*conn;
1216 	struct conn_ind	*next_conn;
1217 
1218 	conn = (struct conn_ind *)malloc(sizeof (*conn));
1219 	if (conn == NULL) {
1220 		syslog(LOG_ERR, "malloc for listen indication failed");
1221 		return (FALSE);
1222 	}
1223 
1224 	/* LINTED pointer alignment */
1225 	conn->conn_call = (struct t_call *)t_alloc(fd, T_CALL, T_ALL);
1226 	if (conn->conn_call == NULL) {
1227 		free((char *)conn);
1228 		nfslib_log_tli_error("t_alloc", fd, nconf);
1229 		return (FALSE);
1230 	}
1231 
1232 	if (t_listen(fd, conn->conn_call) == -1) {
1233 		nfslib_log_tli_error("t_listen", fd, nconf);
1234 		(void) t_free((char *)conn->conn_call, T_CALL);
1235 		free((char *)conn);
1236 		return (FALSE);
1237 	}
1238 
1239 	if (conn->conn_call->udata.len > 0) {
1240 		syslog(LOG_WARNING,
1241 	"rejecting inbound connection(%s) with %d bytes of connect data",
1242 		    nconf->nc_proto, conn->conn_call->udata.len);
1243 
1244 		conn->conn_call->udata.len = 0;
1245 		(void) t_snddis(fd, conn->conn_call);
1246 		(void) t_free((char *)conn->conn_call, T_CALL);
1247 		free((char *)conn);
1248 		return (FALSE);
1249 	}
1250 
1251 	if ((next_conn = *connp) != NULL) {
1252 		next_conn->conn_prev->conn_next = conn;
1253 		conn->conn_next = next_conn;
1254 		conn->conn_prev = next_conn->conn_prev;
1255 		next_conn->conn_prev = conn;
1256 	} else {
1257 		conn->conn_next = conn;
1258 		conn->conn_prev = conn;
1259 		*connp = conn;
1260 	}
1261 	return (TRUE);
1262 }
1263 
1264 static int
1265 discon_get(int fd, struct netconfig *nconf, struct conn_ind **connp)
1266 {
1267 	struct conn_ind	*conn;
1268 	struct t_discon	discon;
1269 
1270 	discon.udata.buf = (char *)0;
1271 	discon.udata.maxlen = 0;
1272 	if (t_rcvdis(fd, &discon) == -1) {
1273 		nfslib_log_tli_error("t_rcvdis", fd, nconf);
1274 		return (-1);
1275 	}
1276 
1277 	conn = *connp;
1278 	if (conn == NULL)
1279 		return (0);
1280 
1281 	do {
1282 		if (conn->conn_call->sequence == discon.sequence) {
1283 			if (conn->conn_next == conn)
1284 				*connp = (struct conn_ind *)0;
1285 			else {
1286 				if (conn == *connp) {
1287 					*connp = conn->conn_next;
1288 				}
1289 				conn->conn_next->conn_prev = conn->conn_prev;
1290 				conn->conn_prev->conn_next = conn->conn_next;
1291 			}
1292 			free((char *)conn);
1293 			break;
1294 		}
1295 		conn = conn->conn_next;
1296 	} while (conn != *connp);
1297 
1298 	return (0);
1299 }
1300 
1301 static void
1302 cots_listen_event(int fd, int conn_index)
1303 {
1304 	struct t_call *call;
1305 	struct conn_ind	*conn;
1306 	struct conn_ind	*conn_head;
1307 	int event;
1308 	struct netconfig *nconf = &conn_polled[conn_index].nc;
1309 	int new_fd;
1310 	struct netbuf addrmask;
1311 	int ret = 0;
1312 	char *clnt;
1313 	char *clnt_uaddr = NULL;
1314 	struct nd_hostservlist *clnt_serv = NULL;
1315 
1316 	conn_head = (struct conn_ind *)0;
1317 	(void) conn_get(fd, nconf, &conn_head);
1318 
1319 	while ((conn = conn_head) != NULL) {
1320 		conn_head = conn->conn_next;
1321 		if (conn_head == conn)
1322 			conn_head = (struct conn_ind *)0;
1323 		else {
1324 			conn_head->conn_prev = conn->conn_prev;
1325 			conn->conn_prev->conn_next = conn_head;
1326 		}
1327 		call = conn->conn_call;
1328 		free((char *)conn);
1329 
1330 		/*
1331 		 * If we have already accepted the maximum number of
1332 		 * connections allowed on the command line, then drop
1333 		 * the oldest connection (for any protocol) before
1334 		 * accepting the new connection.  Unless explicitly
1335 		 * set on the command line, max_conns_allowed is -1.
1336 		 */
1337 		if (max_conns_allowed != -1 && num_conns >= max_conns_allowed)
1338 			conn_close_oldest();
1339 
1340 		/*
1341 		 * Create a new transport endpoint for the same proto as
1342 		 * the listener.
1343 		 */
1344 		new_fd = nfslib_transport_open(nconf);
1345 		if (new_fd == -1) {
1346 			call->udata.len = 0;
1347 			(void) t_snddis(fd, call);
1348 			(void) t_free((char *)call, T_CALL);
1349 			syslog(LOG_ERR, "Cannot establish transport over %s",
1350 			    nconf->nc_device);
1351 			continue;
1352 		}
1353 
1354 		/* Bind to a generic address/port for the accepting stream. */
1355 		if (t_bind(new_fd, (struct t_bind *)NULL,
1356 		    (struct t_bind *)NULL) == -1) {
1357 			nfslib_log_tli_error("t_bind", new_fd, nconf);
1358 			call->udata.len = 0;
1359 			(void) t_snddis(fd, call);
1360 			(void) t_free((char *)call, T_CALL);
1361 			(void) t_close(new_fd);
1362 			continue;
1363 		}
1364 
1365 		while (t_accept(fd, new_fd, call) == -1) {
1366 			if (t_errno != TLOOK) {
1367 #ifdef DEBUG
1368 				nfslib_log_tli_error("t_accept", fd, nconf);
1369 #endif
1370 				call->udata.len = 0;
1371 				(void) t_snddis(fd, call);
1372 				(void) t_free((char *)call, T_CALL);
1373 				(void) t_close(new_fd);
1374 				goto do_next_conn;
1375 			}
1376 			while (event = t_look(fd)) {
1377 				switch (event) {
1378 				case T_LISTEN:
1379 #ifdef DEBUG
1380 					printf(
1381 "cots_listen_event(%s): T_LISTEN during accept processing\n", nconf->nc_proto);
1382 #endif
1383 					(void) conn_get(fd, nconf, &conn_head);
1384 					continue;
1385 				case T_DISCONNECT:
1386 #ifdef DEBUG
1387 					printf(
1388 	"cots_listen_event(%s): T_DISCONNECT during accept processing\n",
1389 					    nconf->nc_proto);
1390 #endif
1391 					(void) discon_get(fd, nconf,
1392 					    &conn_head);
1393 					continue;
1394 				default:
1395 					syslog(LOG_ERR,
1396 			"unexpected event 0x%x during accept processing (%s)",
1397 					    event, nconf->nc_proto);
1398 					call->udata.len = 0;
1399 					(void) t_snddis(fd, call);
1400 					(void) t_free((char *)call, T_CALL);
1401 					(void) t_close(new_fd);
1402 					goto do_next_conn;
1403 				}
1404 			}
1405 		}
1406 
1407 		if (set_addrmask(new_fd, nconf, &addrmask) < 0) {
1408 			(void) syslog(LOG_ERR,
1409 			    "Cannot set address mask for %s",
1410 			    nconf->nc_netid);
1411 			return;
1412 		}
1413 
1414 		/* Tell KRPC about the new stream. */
1415 		if (Mysvc4 != NULL)
1416 			ret = (*Mysvc4)(new_fd, &addrmask, nconf,
1417 			    NFS4_KRPC_START, &call->addr);
1418 		else
1419 			ret = (*Mysvc)(new_fd, addrmask, nconf);
1420 
1421 		if (ret < 0) {
1422 			if (errno != ENOTCONN) {
1423 				syslog(LOG_ERR,
1424 				    "unable to register new connection: %m");
1425 			} else {
1426 				/*
1427 				 * This is the only error that could be
1428 				 * caused by the client, so who was it?
1429 				 */
1430 				if (netdir_getbyaddr(nconf, &clnt_serv,
1431 				    &(call->addr)) == ND_OK &&
1432 				    clnt_serv->h_cnt > 0)
1433 					clnt = clnt_serv->h_hostservs->h_host;
1434 				else
1435 					clnt = clnt_uaddr = taddr2uaddr(nconf,
1436 					    &(call->addr));
1437 				/*
1438 				 * If we don't know who the client was,
1439 				 * remain silent.
1440 				 */
1441 				if (clnt)
1442 					syslog(LOG_ERR,
1443 "unable to register new connection: client %s has dropped connection", clnt);
1444 				if (clnt_serv)
1445 					netdir_free(clnt_serv, ND_HOSTSERVLIST);
1446 				if (clnt_uaddr)
1447 					free(clnt_uaddr);
1448 			}
1449 			free(addrmask.buf);
1450 			(void) t_snddis(new_fd, (struct t_call *)0);
1451 			(void) t_free((char *)call, T_CALL);
1452 			(void) t_close(new_fd);
1453 			goto do_next_conn;
1454 		}
1455 
1456 		free(addrmask.buf);
1457 		(void) t_free((char *)call, T_CALL);
1458 
1459 		/*
1460 		 * Poll on the new descriptor so that we get disconnect
1461 		 * and orderly release indications.
1462 		 */
1463 		num_conns++;
1464 		add_to_poll_list(new_fd, nconf);
1465 
1466 		/* Reset nconf in case it has been moved. */
1467 		nconf = &conn_polled[conn_index].nc;
1468 do_next_conn:;
1469 	}
1470 }
1471 
1472 static int
1473 do_poll_cots_action(int fd, int conn_index)
1474 {
1475 	char buf[256];
1476 	int event;
1477 	int i1;
1478 	int flags;
1479 	struct conn_entry *connent = &conn_polled[conn_index];
1480 	struct netconfig *nconf = &(connent->nc);
1481 	const char *errorstr;
1482 
1483 	while (event = t_look(fd)) {
1484 		switch (event) {
1485 		case T_LISTEN:
1486 #ifdef DEBUG
1487 printf("do_poll_cots_action(%s,%d): T_LISTEN event\n", nconf->nc_proto, fd);
1488 #endif
1489 			cots_listen_event(fd, conn_index);
1490 			break;
1491 
1492 		case T_DATA:
1493 #ifdef DEBUG
1494 printf("do_poll_cots_action(%d,%s): T_DATA event\n", fd, nconf->nc_proto);
1495 #endif
1496 			/*
1497 			 * Receive a private notification from CONS rpcmod.
1498 			 */
1499 			i1 = t_rcv(fd, buf, sizeof (buf), &flags);
1500 			if (i1 == -1) {
1501 				syslog(LOG_ERR, "t_rcv failed");
1502 				break;
1503 			}
1504 			if (i1 < sizeof (int))
1505 				break;
1506 			i1 = BE32_TO_U32(buf);
1507 			if (i1 == 1 || i1 == 2) {
1508 				/*
1509 				 * This connection has been idle for too long,
1510 				 * so release it as politely as we can.  If we
1511 				 * have already initiated an orderly release
1512 				 * and we get notified that the stream is
1513 				 * still idle, pull the plug.  This prevents
1514 				 * hung connections from continuing to consume
1515 				 * resources.
1516 				 */
1517 #ifdef DEBUG
1518 printf("do_poll_cots_action(%s,%d): ", nconf->nc_proto, fd);
1519 printf("initiating orderly release of idle connection\n");
1520 #endif
1521 				if (nconf->nc_semantics == NC_TPI_COTS ||
1522 				    connent->closing != 0) {
1523 					(void) t_snddis(fd, (struct t_call *)0);
1524 					goto fdclose;
1525 				}
1526 				/*
1527 				 * For NC_TPI_COTS_ORD, the stream is closed
1528 				 * and removed from the poll list when the
1529 				 * T_ORDREL is received from the provider.  We
1530 				 * don't wait for it here because it may take
1531 				 * a while for the transport to shut down.
1532 				 */
1533 				if (t_sndrel(fd) == -1) {
1534 					syslog(LOG_ERR,
1535 					"unable to send orderly release %m");
1536 				}
1537 				connent->closing = 1;
1538 			} else
1539 				syslog(LOG_ERR,
1540 				"unexpected event from CONS rpcmod %d", i1);
1541 			break;
1542 
1543 		case T_ORDREL:
1544 #ifdef DEBUG
1545 printf("do_poll_cots_action(%s,%d): T_ORDREL event\n", nconf->nc_proto, fd);
1546 #endif
1547 			/* Perform an orderly release. */
1548 			if (t_rcvrel(fd) == 0) {
1549 				/* T_ORDREL on listen fd's should be ignored */
1550 				if (!is_listen_fd_index(conn_index)) {
1551 					(void) t_sndrel(fd);
1552 					goto fdclose;
1553 				}
1554 				break;
1555 
1556 			} else if (t_errno == TLOOK) {
1557 				break;
1558 			} else {
1559 				nfslib_log_tli_error("t_rcvrel", fd, nconf);
1560 
1561 				/*
1562 				 * check to make sure we do not close
1563 				 * listen fd
1564 				 */
1565 				if (is_listen_fd_index(conn_index))
1566 					break;
1567 				else
1568 					goto fdclose;
1569 			}
1570 
1571 		case T_DISCONNECT:
1572 #ifdef DEBUG
1573 printf("do_poll_cots_action(%s,%d): T_DISCONNECT event\n", nconf->nc_proto, fd);
1574 #endif
1575 			if (t_rcvdis(fd, (struct t_discon *)NULL) == -1)
1576 				nfslib_log_tli_error("t_rcvdis", fd, nconf);
1577 
1578 			/*
1579 			 * T_DISCONNECT on listen fd's should be ignored.
1580 			 */
1581 			if (is_listen_fd_index(conn_index))
1582 				break;
1583 			else
1584 				goto fdclose;
1585 
1586 		case T_ERROR:
1587 		default:
1588 			if (event == T_ERROR || t_errno == TSYSERR) {
1589 				if ((errorstr = strerror(errno)) == NULL) {
1590 					(void) sprintf(buf,
1591 					    "Unknown error num %d", errno);
1592 					errorstr = (const char *) buf;
1593 				}
1594 			} else if (event == -1)
1595 				errorstr = t_strerror(t_errno);
1596 			else
1597 				errorstr = "";
1598 			syslog(LOG_ERR,
1599 			    "unexpected TLI event (0x%x) on "
1600 			    "connection-oriented transport(%s,%d):%s",
1601 			    event, nconf->nc_proto, fd, errorstr);
1602 fdclose:
1603 			num_conns--;
1604 			remove_from_poll_list(fd);
1605 			(void) t_close(fd);
1606 			return (0);
1607 		}
1608 	}
1609 
1610 	return (0);
1611 }
1612 
1613 static char *
1614 serv_name_to_port_name(char *name)
1615 {
1616 	/*
1617 	 * Map service names (used primarily in logging) to
1618 	 * RPC port names (used by netdir_*() routines).
1619 	 */
1620 	if (strcmp(name, "NFS") == 0) {
1621 		return ("nfs");
1622 	} else if (strcmp(name, "NLM") == 0) {
1623 		return ("lockd");
1624 	} else if (strcmp(name, "NFS4_CALLBACK") == 0) {
1625 		return ("nfs4_callback");
1626 	}
1627 
1628 	return ("unrecognized");
1629 }
1630 
1631 static int
1632 bind_to_provider(char *provider, char *serv, struct netbuf **addr,
1633 		struct netconfig **retnconf)
1634 {
1635 	struct netconfig *nconf;
1636 	NCONF_HANDLE *nc;
1637 	struct nd_hostserv hs;
1638 
1639 	hs.h_host = HOST_SELF;
1640 	hs.h_serv = serv_name_to_port_name(serv);
1641 
1642 	if ((nc = setnetconfig()) == (NCONF_HANDLE *)NULL) {
1643 		syslog(LOG_ERR, "setnetconfig failed: %m");
1644 		return (-1);
1645 	}
1646 	while (nconf = getnetconfig(nc)) {
1647 		if (OK_TPI_TYPE(nconf) &&
1648 		    strcmp(nconf->nc_device, provider) == 0) {
1649 			*retnconf = nconf;
1650 			return (nfslib_bindit(nconf, addr, &hs,
1651 			    listen_backlog));
1652 		}
1653 	}
1654 	(void) endnetconfig(nc);
1655 
1656 	syslog(LOG_ERR, "couldn't find netconfig entry for provider %s",
1657 	    provider);
1658 	return (-1);
1659 }
1660 
1661 static int
1662 bind_to_proto(NETSELDECL(proto), char *serv, struct netbuf **addr,
1663 		struct netconfig **retnconf)
1664 {
1665 	struct netconfig *nconf;
1666 	NCONF_HANDLE *nc = NULL;
1667 	struct nd_hostserv hs;
1668 
1669 	hs.h_host = HOST_SELF;
1670 	hs.h_serv = serv_name_to_port_name(serv);
1671 
1672 	if ((nc = setnetconfig()) == (NCONF_HANDLE *)NULL) {
1673 		syslog(LOG_ERR, "setnetconfig failed: %m");
1674 		return (-1);
1675 	}
1676 	while (nconf = getnetconfig(nc)) {
1677 		if (OK_TPI_TYPE(nconf) && NETSELEQ(nconf->nc_proto, proto)) {
1678 			*retnconf = nconf;
1679 			return (nfslib_bindit(nconf, addr, &hs,
1680 			    listen_backlog));
1681 		}
1682 	}
1683 	(void) endnetconfig(nc);
1684 
1685 	syslog(LOG_ERR, "couldn't find netconfig entry for protocol %s",
1686 	    proto);
1687 	return (-1);
1688 }
1689 
1690 #include <netinet/in.h>
1691 
1692 /*
1693  * Create an address mask appropriate for the transport.
1694  * The mask is used to obtain the host-specific part of
1695  * a network address when comparing addresses.
1696  * For an internet address the host-specific part is just
1697  * the 32 bit IP address and this part of the mask is set
1698  * to all-ones. The port number part of the mask is zeroes.
1699  */
1700 static int
1701 set_addrmask(fd, nconf, mask)
1702 	struct netconfig *nconf;
1703 	struct netbuf *mask;
1704 {
1705 	struct t_info info;
1706 
1707 	/*
1708 	 * Find the size of the address we need to mask.
1709 	 */
1710 	if (t_getinfo(fd, &info) < 0) {
1711 		t_error("t_getinfo");
1712 		return (-1);
1713 	}
1714 	mask->len = mask->maxlen = info.addr;
1715 	if (info.addr <= 0) {
1716 		/*
1717 		 * loopback devices have infinite addr size
1718 		 * (it is identified by -1 in addr field of t_info structure),
1719 		 * so don't build the netmask for them. It's a special case
1720 		 * that should be handled properly.
1721 		 */
1722 		if ((info.addr == -1) &&
1723 		    (0 == strcmp(nconf->nc_protofmly, NC_LOOPBACK))) {
1724 			memset(mask, 0, sizeof (*mask));
1725 			return (0);
1726 		}
1727 
1728 		syslog(LOG_ERR, "set_addrmask: address size: %ld",
1729 			info.addr);
1730 		return (-1);
1731 	}
1732 
1733 	mask->buf = (char *)malloc(mask->len);
1734 	if (mask->buf == NULL) {
1735 		syslog(LOG_ERR, "set_addrmask: no memory");
1736 		return (-1);
1737 	}
1738 	(void) memset(mask->buf, 0, mask->len);	/* reset all mask bits */
1739 
1740 	if (strcmp(nconf->nc_protofmly, NC_INET) == 0) {
1741 		/*
1742 		 * Set the mask so that the port is ignored.
1743 		 */
1744 		/* LINTED pointer alignment */
1745 		((struct sockaddr_in *)mask->buf)->sin_addr.s_addr =
1746 								(ulong_t)~0;
1747 		/* LINTED pointer alignment */
1748 		((struct sockaddr_in *)mask->buf)->sin_family =
1749 								(ushort_t)~0;
1750 	} else if (strcmp(nconf->nc_protofmly, NC_INET6) == 0) {
1751 		/* LINTED pointer alignment */
1752 		(void) memset(&((struct sockaddr_in6 *)mask->buf)->sin6_addr,
1753 			(uchar_t)~0, sizeof (struct in6_addr));
1754 		/* LINTED pointer alignment */
1755 		((struct sockaddr_in6 *)mask->buf)->sin6_family =
1756 								(ushort_t)~0;
1757 	} else {
1758 
1759 		/*
1760 		 * Set all mask bits.
1761 		 */
1762 		(void) memset(mask->buf, 0xFF, mask->len);
1763 	}
1764 	return (0);
1765 }
1766 
1767 /*
1768  * For listen fd's index is always less than end_listen_fds.
1769  * end_listen_fds is defined externally in the daemon that uses this library.
1770  * It's value is equal to the number of open file descriptors after the
1771  * last listen end point was opened but before any connection was accepted.
1772  */
1773 static int
1774 is_listen_fd_index(int index)
1775 {
1776 	return (index < end_listen_fds);
1777 }
1778