xref: /titanic_52/usr/src/lib/libnsl/rpc/clnt_vc.c (revision 55a3a0ef881a0e1c7da841d46bb5878758497dc1)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 /*
27  * Copyright 2014 Nexenta Systems, Inc.  All rights reserved.
28  */
29 
30 /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */
31 /* All Rights Reserved */
32 /*
33  * Portions of this source code were derived from Berkeley
34  * 4.3 BSD under license from the Regents of the University of
35  * California.
36  */
37 
38 /*
39  * clnt_vc.c
40  *
41  * Implements a connectionful client side RPC.
42  *
43  * Connectionful RPC supports 'batched calls'.
44  * A sequence of calls may be batched-up in a send buffer. The rpc call
45  * return immediately to the client even though the call was not necessarily
46  * sent. The batching occurs if the results' xdr routine is NULL (0) AND
47  * the rpc timeout value is zero (see clnt.h, rpc).
48  *
49  * Clients should NOT casually batch calls that in fact return results; that
50  * is the server side should be aware that a call is batched and not produce
51  * any return message. Batched calls that produce many result messages can
52  * deadlock (netlock) the client and the server....
53  */
54 
55 
56 #include "mt.h"
57 #include "rpc_mt.h"
58 #include <assert.h>
59 #include <rpc/rpc.h>
60 #include <errno.h>
61 #include <sys/byteorder.h>
62 #include <sys/mkdev.h>
63 #include <sys/poll.h>
64 #include <syslog.h>
65 #include <stdlib.h>
66 #include <unistd.h>
67 #include <netinet/tcp.h>
68 
69 #define	MCALL_MSG_SIZE 24
70 #define	SECS_TO_MS 1000
71 #define	USECS_TO_MS 1/1000
72 #ifndef MIN
73 #define	MIN(a, b)	(((a) < (b)) ? (a) : (b))
74 #endif
75 
76 extern int __rpc_timeval_to_msec(struct timeval *);
77 extern int __rpc_compress_pollfd(int, pollfd_t *, pollfd_t *);
78 extern bool_t xdr_opaque_auth(XDR *, struct opaque_auth *);
79 extern bool_t __rpc_gss_wrap(AUTH *, char *, uint_t, XDR *, bool_t (*)(),
80 								caddr_t);
81 extern bool_t __rpc_gss_unwrap(AUTH *, XDR *, bool_t (*)(), caddr_t);
82 extern CLIENT *_clnt_vc_create_timed(int, struct netbuf *, rpcprog_t,
83 		rpcvers_t, uint_t, uint_t, const struct timeval *);
84 
85 static struct clnt_ops	*clnt_vc_ops(void);
86 static int		read_vc(void *, caddr_t, int);
87 static int		write_vc(void *, caddr_t, int);
88 static int		t_rcvall(int, char *, int);
89 static bool_t		time_not_ok(struct timeval *);
90 
91 struct ct_data;
92 static bool_t		set_up_connection(int, struct netbuf *,
93 				struct ct_data *, const struct timeval *);
94 static bool_t		set_io_mode(struct ct_data *, int);
95 
96 /*
97  * Lock table handle used by various MT sync. routines
98  */
99 static mutex_t	vctbl_lock = DEFAULTMUTEX;
100 static void	*vctbl = NULL;
101 
102 static const char clnt_vc_errstr[] = "%s : %s";
103 static const char clnt_vc_str[] = "clnt_vc_create";
104 static const char clnt_read_vc_str[] = "read_vc";
105 static const char __no_mem_str[] = "out of memory";
106 static const char no_fcntl_getfl_str[] = "could not get status flags and modes";
107 static const char no_nonblock_str[] = "could not set transport blocking mode";
108 
109 /*
110  * Private data structure
111  */
112 struct ct_data {
113 	int		ct_fd;		/* connection's fd */
114 	bool_t		ct_closeit;	/* close it on destroy */
115 	int		ct_tsdu;	/* size of tsdu */
116 	int		ct_wait;	/* wait interval in milliseconds */
117 	bool_t		ct_waitset;	/* wait set by clnt_control? */
118 	struct netbuf	ct_addr;	/* remote addr */
119 	struct rpc_err	ct_error;
120 	char		ct_mcall[MCALL_MSG_SIZE]; /* marshalled callmsg */
121 	uint_t		ct_mpos;	/* pos after marshal */
122 	XDR		ct_xdrs;	/* XDR stream */
123 
124 	/* NON STANDARD INFO - 00-08-31 */
125 	bool_t		ct_is_oneway; /* True if the current call is oneway. */
126 	bool_t		ct_is_blocking;
127 	ushort_t	ct_io_mode;
128 	ushort_t	ct_blocking_mode;
129 	uint_t		ct_bufferSize; /* Total size of the buffer. */
130 	uint_t		ct_bufferPendingSize; /* Size of unsent data. */
131 	char 		*ct_buffer; /* Pointer to the buffer. */
132 	char 		*ct_bufferWritePtr; /* Ptr to the first free byte. */
133 	char 		*ct_bufferReadPtr; /* Ptr to the first byte of data. */
134 };
135 
136 struct nb_reg_node {
137 	struct nb_reg_node *next;
138 	struct ct_data *ct;
139 };
140 
141 static struct nb_reg_node *nb_first = (struct nb_reg_node *)&nb_first;
142 static struct nb_reg_node *nb_free  = (struct nb_reg_node *)&nb_free;
143 
144 static bool_t exit_handler_set = FALSE;
145 
146 static mutex_t nb_list_mutex = DEFAULTMUTEX;
147 
148 
149 /* Define some macros to manage the linked list. */
150 #define	LIST_ISEMPTY(l) (l == (struct nb_reg_node *)&l)
151 #define	LIST_CLR(l) (l = (struct nb_reg_node *)&l)
152 #define	LIST_ADD(l, node) (node->next = l->next, l = node)
153 #define	LIST_EXTRACT(l, node) (node = l, l = l->next)
154 #define	LIST_FOR_EACH(l, node) \
155 	for (node = l; node != (struct nb_reg_node *)&l; node = node->next)
156 
157 
158 /* Default size of the IO buffer used in non blocking mode */
159 #define	DEFAULT_PENDING_ZONE_MAX_SIZE (16*1024)
160 
161 static int nb_send(struct ct_data *, void *, unsigned int);
162 static int do_flush(struct ct_data *, uint_t);
163 static bool_t set_flush_mode(struct ct_data *, int);
164 static bool_t set_blocking_connection(struct ct_data *, bool_t);
165 
166 static int register_nb(struct ct_data *);
167 static int unregister_nb(struct ct_data *);
168 
169 
170 /*
171  * Change the mode of the underlying fd.
172  */
173 static bool_t
174 set_blocking_connection(struct ct_data *ct, bool_t blocking)
175 {
176 	int flag;
177 
178 	/*
179 	 * If the underlying fd is already in the required mode,
180 	 * avoid the syscall.
181 	 */
182 	if (ct->ct_is_blocking == blocking)
183 		return (TRUE);
184 
185 	if ((flag = fcntl(ct->ct_fd, F_GETFL, 0)) < 0) {
186 		(void) syslog(LOG_ERR, "set_blocking_connection : %s",
187 		    no_fcntl_getfl_str);
188 		return (FALSE);
189 	}
190 
191 	flag = blocking? flag&~O_NONBLOCK : flag|O_NONBLOCK;
192 	if (fcntl(ct->ct_fd, F_SETFL, flag) != 0) {
193 		(void) syslog(LOG_ERR, "set_blocking_connection : %s",
194 		    no_nonblock_str);
195 		return (FALSE);
196 	}
197 	ct->ct_is_blocking = blocking;
198 	return (TRUE);
199 }
200 
201 /*
202  * Create a client handle for a connection.
203  * Default options are set, which the user can change using clnt_control()'s.
204  * The rpc/vc package does buffering similar to stdio, so the client
205  * must pick send and receive buffer sizes, 0 => use the default.
206  * NB: fd is copied into a private area.
207  * NB: The rpch->cl_auth is set null authentication. Caller may wish to
208  * set this something more useful.
209  *
210  * fd should be open and bound.
211  */
212 CLIENT *
213 clnt_vc_create(const int fd, struct netbuf *svcaddr, const rpcprog_t prog,
214 	const rpcvers_t vers, const uint_t sendsz, const uint_t recvsz)
215 {
216 	return (_clnt_vc_create_timed(fd, svcaddr, prog, vers, sendsz,
217 	    recvsz, NULL));
218 }
219 
220 /*
221  * This has the same definition as clnt_vc_create(), except it
222  * takes an additional parameter - a pointer to a timeval structure.
223  *
224  * Not a public interface. This is for clnt_create_timed,
225  * clnt_create_vers_timed, clnt_tp_create_timed to pass down the timeout
226  * value to control a tcp connection attempt.
227  * (for bug 4049792: clnt_create_timed does not time out)
228  *
229  * If tp is NULL, use default timeout to set up the connection.
230  */
231 CLIENT *
232 _clnt_vc_create_timed(int fd, struct netbuf *svcaddr, rpcprog_t prog,
233 	rpcvers_t vers, uint_t sendsz, uint_t recvsz, const struct timeval *tp)
234 {
235 	CLIENT *cl;			/* client handle */
236 	struct ct_data *ct;		/* private data */
237 	struct timeval now;
238 	struct rpc_msg call_msg;
239 	struct t_info tinfo;
240 	int flag;
241 
242 	cl = malloc(sizeof (*cl));
243 	if ((ct = malloc(sizeof (*ct))) != NULL)
244 		ct->ct_addr.buf = NULL;
245 
246 	if ((cl == NULL) || (ct == NULL)) {
247 		(void) syslog(LOG_ERR, clnt_vc_errstr,
248 		    clnt_vc_str, __no_mem_str);
249 		rpc_createerr.cf_stat = RPC_SYSTEMERROR;
250 		rpc_createerr.cf_error.re_errno = errno;
251 		rpc_createerr.cf_error.re_terrno = 0;
252 		goto err;
253 	}
254 
255 	/*
256 	 * The only use of vctbl_lock is for serializing the creation of
257 	 * vctbl. Once created the lock needs to be released so we don't
258 	 * hold it across the set_up_connection() call and end up with a
259 	 * bunch of threads stuck waiting for the mutex.
260 	 */
261 	sig_mutex_lock(&vctbl_lock);
262 
263 	if ((vctbl == NULL) && ((vctbl = rpc_fd_init()) == NULL)) {
264 		rpc_createerr.cf_stat = RPC_SYSTEMERROR;
265 		rpc_createerr.cf_error.re_errno = errno;
266 		rpc_createerr.cf_error.re_terrno = 0;
267 		sig_mutex_unlock(&vctbl_lock);
268 		goto err;
269 	}
270 
271 	sig_mutex_unlock(&vctbl_lock);
272 
273 	ct->ct_io_mode = RPC_CL_BLOCKING;
274 	ct->ct_blocking_mode = RPC_CL_BLOCKING_FLUSH;
275 
276 	ct->ct_buffer = NULL;	/* We allocate the buffer when needed. */
277 	ct->ct_bufferSize = DEFAULT_PENDING_ZONE_MAX_SIZE;
278 	ct->ct_bufferPendingSize = 0;
279 	ct->ct_bufferWritePtr = NULL;
280 	ct->ct_bufferReadPtr = NULL;
281 
282 	/* Check the current state of the fd. */
283 	if ((flag = fcntl(fd, F_GETFL, 0)) < 0) {
284 		(void) syslog(LOG_ERR, "_clnt_vc_create_timed : %s",
285 		    no_fcntl_getfl_str);
286 		rpc_createerr.cf_stat = RPC_SYSTEMERROR;
287 		rpc_createerr.cf_error.re_terrno = errno;
288 		rpc_createerr.cf_error.re_errno = 0;
289 		goto err;
290 	}
291 	ct->ct_is_blocking = flag & O_NONBLOCK ? FALSE : TRUE;
292 
293 	if (set_up_connection(fd, svcaddr, ct, tp) == FALSE) {
294 		goto err;
295 	}
296 
297 	/*
298 	 * Set up other members of private data struct
299 	 */
300 	ct->ct_fd = fd;
301 	/*
302 	 * The actual value will be set by clnt_call or clnt_control
303 	 */
304 	ct->ct_wait = 30000;
305 	ct->ct_waitset = FALSE;
306 	/*
307 	 * By default, closeit is always FALSE. It is users responsibility
308 	 * to do a t_close on it, else the user may use clnt_control
309 	 * to let clnt_destroy do it for him/her.
310 	 */
311 	ct->ct_closeit = FALSE;
312 
313 	/*
314 	 * Initialize call message
315 	 */
316 	(void) gettimeofday(&now, (struct timezone *)0);
317 	call_msg.rm_xid = getpid() ^ now.tv_sec ^ now.tv_usec;
318 	call_msg.rm_call.cb_prog = prog;
319 	call_msg.rm_call.cb_vers = vers;
320 
321 	/*
322 	 * pre-serialize the static part of the call msg and stash it away
323 	 */
324 	xdrmem_create(&(ct->ct_xdrs), ct->ct_mcall, MCALL_MSG_SIZE, XDR_ENCODE);
325 	if (!xdr_callhdr(&(ct->ct_xdrs), &call_msg)) {
326 		goto err;
327 	}
328 	ct->ct_mpos = XDR_GETPOS(&(ct->ct_xdrs));
329 	XDR_DESTROY(&(ct->ct_xdrs));
330 
331 	if (t_getinfo(fd, &tinfo) == -1) {
332 		rpc_createerr.cf_stat = RPC_TLIERROR;
333 		rpc_createerr.cf_error.re_terrno = t_errno;
334 		rpc_createerr.cf_error.re_errno = 0;
335 		goto err;
336 	}
337 	/*
338 	 * Find the receive and the send size
339 	 */
340 	sendsz = __rpc_get_t_size((int)sendsz, tinfo.tsdu);
341 	recvsz = __rpc_get_t_size((int)recvsz, tinfo.tsdu);
342 	if ((sendsz == 0) || (recvsz == 0)) {
343 		rpc_createerr.cf_stat = RPC_TLIERROR;
344 		rpc_createerr.cf_error.re_terrno = 0;
345 		rpc_createerr.cf_error.re_errno = 0;
346 		goto err;
347 	}
348 	ct->ct_tsdu = tinfo.tsdu;
349 	/*
350 	 * Create a client handle which uses xdrrec for serialization
351 	 * and authnone for authentication.
352 	 */
353 	ct->ct_xdrs.x_ops = NULL;
354 	xdrrec_create(&(ct->ct_xdrs), sendsz, recvsz, (caddr_t)ct,
355 	    read_vc, write_vc);
356 	if (ct->ct_xdrs.x_ops == NULL) {
357 		rpc_createerr.cf_stat = RPC_SYSTEMERROR;
358 		rpc_createerr.cf_error.re_terrno = 0;
359 		rpc_createerr.cf_error.re_errno = ENOMEM;
360 		goto err;
361 	}
362 	cl->cl_ops = clnt_vc_ops();
363 	cl->cl_private = (caddr_t)ct;
364 	cl->cl_auth = authnone_create();
365 	cl->cl_tp = NULL;
366 	cl->cl_netid = NULL;
367 	return (cl);
368 
369 err:
370 	if (ct) {
371 		free(ct->ct_addr.buf);
372 		free(ct);
373 	}
374 	free(cl);
375 
376 	return (NULL);
377 }
378 
379 #define	TCPOPT_BUFSIZE 128
380 
381 /*
382  * Set tcp connection timeout value.
383  * Retun 0 for success, -1 for failure.
384  */
385 static int
386 _set_tcp_conntime(int fd, int optval)
387 {
388 	struct t_optmgmt req, res;
389 	struct opthdr *opt;
390 	int *ip;
391 	char buf[TCPOPT_BUFSIZE];
392 
393 	/* LINTED pointer cast */
394 	opt = (struct opthdr *)buf;
395 	opt->level =  IPPROTO_TCP;
396 	opt->name = TCP_CONN_ABORT_THRESHOLD;
397 	opt->len = sizeof (int);
398 
399 	req.flags = T_NEGOTIATE;
400 	req.opt.len = sizeof (struct opthdr) + opt->len;
401 	req.opt.buf = (char *)opt;
402 	/* LINTED pointer cast */
403 	ip = (int *)((char *)buf + sizeof (struct opthdr));
404 	*ip = optval;
405 
406 	res.flags = 0;
407 	res.opt.buf = (char *)buf;
408 	res.opt.maxlen = sizeof (buf);
409 	if (t_optmgmt(fd, &req, &res) < 0 || res.flags != T_SUCCESS) {
410 		return (-1);
411 	}
412 	return (0);
413 }
414 
415 /*
416  * Get current tcp connection timeout value.
417  * Retun 0 for success, -1 for failure.
418  */
419 static int
420 _get_tcp_conntime(int fd)
421 {
422 	struct t_optmgmt req, res;
423 	struct opthdr *opt;
424 	int *ip, retval;
425 	char buf[TCPOPT_BUFSIZE];
426 
427 	/* LINTED pointer cast */
428 	opt = (struct opthdr *)buf;
429 	opt->level =  IPPROTO_TCP;
430 	opt->name = TCP_CONN_ABORT_THRESHOLD;
431 	opt->len = sizeof (int);
432 
433 	req.flags = T_CURRENT;
434 	req.opt.len = sizeof (struct opthdr) + opt->len;
435 	req.opt.buf = (char *)opt;
436 	/* LINTED pointer cast */
437 	ip = (int *)((char *)buf + sizeof (struct opthdr));
438 	*ip = 0;
439 
440 	res.flags = 0;
441 	res.opt.buf = (char *)buf;
442 	res.opt.maxlen = sizeof (buf);
443 	if (t_optmgmt(fd, &req, &res) < 0 || res.flags != T_SUCCESS) {
444 		return (-1);
445 	}
446 
447 	/* LINTED pointer cast */
448 	ip = (int *)((char *)buf + sizeof (struct opthdr));
449 	retval = *ip;
450 	return (retval);
451 }
452 
453 static bool_t
454 set_up_connection(int fd, struct netbuf *svcaddr, struct ct_data *ct,
455 						const struct timeval *tp)
456 {
457 	int state;
458 	struct t_call sndcallstr, *rcvcall;
459 	int nconnect;
460 	bool_t connected, do_rcv_connect;
461 	int curr_time = 0;
462 
463 	ct->ct_addr.len = 0;
464 	state = t_getstate(fd);
465 	if (state == -1) {
466 		rpc_createerr.cf_stat = RPC_TLIERROR;
467 		rpc_createerr.cf_error.re_errno = 0;
468 		rpc_createerr.cf_error.re_terrno = t_errno;
469 		return (FALSE);
470 	}
471 
472 #ifdef DEBUG
473 	fprintf(stderr, "set_up_connection: state = %d\n", state);
474 #endif
475 	switch (state) {
476 	case T_IDLE:
477 		if (svcaddr == NULL) {
478 			rpc_createerr.cf_stat = RPC_UNKNOWNADDR;
479 			return (FALSE);
480 		}
481 		/*
482 		 * Connect only if state is IDLE and svcaddr known
483 		 */
484 /* LINTED pointer alignment */
485 		rcvcall = (struct t_call *)t_alloc(fd, T_CALL, T_OPT|T_ADDR);
486 		if (rcvcall == NULL) {
487 			rpc_createerr.cf_stat = RPC_TLIERROR;
488 			rpc_createerr.cf_error.re_terrno = t_errno;
489 			rpc_createerr.cf_error.re_errno = errno;
490 			return (FALSE);
491 		}
492 		rcvcall->udata.maxlen = 0;
493 		sndcallstr.addr = *svcaddr;
494 		sndcallstr.opt.len = 0;
495 		sndcallstr.udata.len = 0;
496 		/*
497 		 * Even NULL could have sufficed for rcvcall, because
498 		 * the address returned is same for all cases except
499 		 * for the gateway case, and hence required.
500 		 */
501 		connected = FALSE;
502 		do_rcv_connect = FALSE;
503 
504 		/*
505 		 * If there is a timeout value specified, we will try to
506 		 * reset the tcp connection timeout. If the transport does
507 		 * not support the TCP_CONN_ABORT_THRESHOLD option or fails
508 		 * for other reason, default timeout will be used.
509 		 */
510 		if (tp != NULL) {
511 			int ms;
512 
513 			/*
514 			 * TCP_CONN_ABORT_THRESHOLD takes int value in millisecs
515 			 */
516 			ms = tp->tv_sec * SECS_TO_MS +
517 			    tp->tv_usec * USECS_TO_MS;
518 			if (((curr_time = _get_tcp_conntime(fd)) != -1) &&
519 			    (_set_tcp_conntime(fd, ms) == 0)) {
520 				/* EMPTY */
521 #ifdef DEBUG
522 				fprintf(stderr, "set_up_connection: set tcp ");
523 				fprintf(stderr, "connection timeout to %d ms\n",
524 				    ms);
525 #endif
526 			}
527 		}
528 
529 		for (nconnect = 0; nconnect < 3; nconnect++) {
530 			if (t_connect(fd, &sndcallstr, rcvcall) != -1) {
531 				connected = TRUE;
532 				break;
533 			}
534 			if (t_errno == TLOOK) {
535 				switch (t_look(fd)) {
536 				case T_DISCONNECT:
537 					(void) t_rcvdis(fd, (struct
538 					    t_discon *) NULL);
539 					break;
540 				default:
541 					break;
542 				}
543 			} else if (!(t_errno == TSYSERR && errno == EINTR)) {
544 				break;
545 			}
546 			if ((state = t_getstate(fd)) == T_OUTCON) {
547 				do_rcv_connect = TRUE;
548 				break;
549 			}
550 			if (state != T_IDLE) {
551 				break;
552 			}
553 		}
554 		if (do_rcv_connect) {
555 			do {
556 				if (t_rcvconnect(fd, rcvcall) != -1) {
557 					connected = TRUE;
558 					break;
559 				}
560 			} while (t_errno == TSYSERR && errno == EINTR);
561 		}
562 
563 		/*
564 		 * Set the connection timeout back to its old value.
565 		 */
566 		if (curr_time) {
567 			(void) _set_tcp_conntime(fd, curr_time);
568 		}
569 
570 		if (!connected) {
571 			rpc_createerr.cf_stat = RPC_TLIERROR;
572 			rpc_createerr.cf_error.re_terrno = t_errno;
573 			rpc_createerr.cf_error.re_errno = errno;
574 			(void) t_free((char *)rcvcall, T_CALL);
575 #ifdef DEBUG
576 			fprintf(stderr, "clnt_vc: t_connect error %d\n",
577 			    rpc_createerr.cf_error.re_terrno);
578 #endif
579 			return (FALSE);
580 		}
581 
582 		/* Free old area if allocated */
583 		if (ct->ct_addr.buf)
584 			free(ct->ct_addr.buf);
585 		ct->ct_addr = rcvcall->addr;	/* To get the new address */
586 		/* So that address buf does not get freed */
587 		rcvcall->addr.buf = NULL;
588 		(void) t_free((char *)rcvcall, T_CALL);
589 		break;
590 	case T_DATAXFER:
591 	case T_OUTCON:
592 		if (svcaddr == NULL) {
593 			/*
594 			 * svcaddr could also be NULL in cases where the
595 			 * client is already bound and connected.
596 			 */
597 			ct->ct_addr.len = 0;
598 		} else {
599 			ct->ct_addr.buf = malloc(svcaddr->len);
600 			if (ct->ct_addr.buf == NULL) {
601 				(void) syslog(LOG_ERR, clnt_vc_errstr,
602 				    clnt_vc_str, __no_mem_str);
603 				rpc_createerr.cf_stat = RPC_SYSTEMERROR;
604 				rpc_createerr.cf_error.re_errno = errno;
605 				rpc_createerr.cf_error.re_terrno = 0;
606 				return (FALSE);
607 			}
608 			(void) memcpy(ct->ct_addr.buf, svcaddr->buf,
609 			    (size_t)svcaddr->len);
610 			ct->ct_addr.len = ct->ct_addr.maxlen = svcaddr->len;
611 		}
612 		break;
613 	default:
614 		rpc_createerr.cf_stat = RPC_UNKNOWNADDR;
615 		return (FALSE);
616 	}
617 	return (TRUE);
618 }
619 
620 static enum clnt_stat
621 clnt_vc_call(CLIENT *cl, rpcproc_t proc, xdrproc_t xdr_args, caddr_t args_ptr,
622 	xdrproc_t xdr_results, caddr_t results_ptr, struct timeval timeout)
623 {
624 /* LINTED pointer alignment */
625 	struct ct_data *ct = (struct ct_data *)cl->cl_private;
626 	XDR *xdrs = &(ct->ct_xdrs);
627 	struct rpc_msg reply_msg;
628 	uint32_t x_id;
629 /* LINTED pointer alignment */
630 	uint32_t *msg_x_id = (uint32_t *)(ct->ct_mcall);	/* yuk */
631 	bool_t shipnow;
632 	int refreshes = 2;
633 
634 	if (rpc_fd_lock(vctbl, ct->ct_fd)) {
635 		rpc_callerr.re_status = RPC_FAILED;
636 		rpc_callerr.re_errno = errno;
637 		rpc_fd_unlock(vctbl, ct->ct_fd);
638 		return (RPC_FAILED);
639 	}
640 
641 	ct->ct_is_oneway = FALSE;
642 	if (ct->ct_io_mode == RPC_CL_NONBLOCKING) {
643 		if (do_flush(ct, RPC_CL_BLOCKING_FLUSH) != 0) {
644 			rpc_fd_unlock(vctbl, ct->ct_fd);
645 			return (RPC_FAILED);  /* XXX */
646 		}
647 	}
648 
649 	if (!ct->ct_waitset) {
650 		/* If time is not within limits, we ignore it. */
651 		if (time_not_ok(&timeout) == FALSE)
652 			ct->ct_wait = __rpc_timeval_to_msec(&timeout);
653 	} else {
654 		timeout.tv_sec = (ct->ct_wait / 1000);
655 		timeout.tv_usec = (ct->ct_wait % 1000) * 1000;
656 	}
657 
658 	shipnow = ((xdr_results == (xdrproc_t)0) && (timeout.tv_sec == 0) &&
659 	    (timeout.tv_usec == 0)) ? FALSE : TRUE;
660 call_again:
661 	xdrs->x_op = XDR_ENCODE;
662 	rpc_callerr.re_status = RPC_SUCCESS;
663 	/*
664 	 * Due to little endian byte order, it is necessary to convert to host
665 	 * format before decrementing xid.
666 	 */
667 	x_id = ntohl(*msg_x_id) - 1;
668 	*msg_x_id = htonl(x_id);
669 
670 	if (cl->cl_auth->ah_cred.oa_flavor != RPCSEC_GSS) {
671 		if ((!XDR_PUTBYTES(xdrs, ct->ct_mcall, ct->ct_mpos)) ||
672 		    (!XDR_PUTINT32(xdrs, (int32_t *)&proc)) ||
673 		    (!AUTH_MARSHALL(cl->cl_auth, xdrs)) ||
674 		    (!xdr_args(xdrs, args_ptr))) {
675 			if (rpc_callerr.re_status == RPC_SUCCESS)
676 				rpc_callerr.re_status = RPC_CANTENCODEARGS;
677 			(void) xdrrec_endofrecord(xdrs, TRUE);
678 			rpc_fd_unlock(vctbl, ct->ct_fd);
679 			return (rpc_callerr.re_status);
680 		}
681 	} else {
682 /* LINTED pointer alignment */
683 		uint32_t *u = (uint32_t *)&ct->ct_mcall[ct->ct_mpos];
684 		IXDR_PUT_U_INT32(u, proc);
685 		if (!__rpc_gss_wrap(cl->cl_auth, ct->ct_mcall,
686 		    ((char *)u) - ct->ct_mcall, xdrs, xdr_args, args_ptr)) {
687 			if (rpc_callerr.re_status == RPC_SUCCESS)
688 				rpc_callerr.re_status = RPC_CANTENCODEARGS;
689 			(void) xdrrec_endofrecord(xdrs, TRUE);
690 			rpc_fd_unlock(vctbl, ct->ct_fd);
691 			return (rpc_callerr.re_status);
692 		}
693 	}
694 	if (!xdrrec_endofrecord(xdrs, shipnow)) {
695 		rpc_fd_unlock(vctbl, ct->ct_fd);
696 		return (rpc_callerr.re_status = RPC_CANTSEND);
697 	}
698 	if (!shipnow) {
699 		rpc_fd_unlock(vctbl, ct->ct_fd);
700 		return (RPC_SUCCESS);
701 	}
702 	/*
703 	 * Hack to provide rpc-based message passing
704 	 */
705 	if (timeout.tv_sec == 0 && timeout.tv_usec == 0) {
706 		rpc_fd_unlock(vctbl, ct->ct_fd);
707 		return (rpc_callerr.re_status = RPC_TIMEDOUT);
708 	}
709 
710 
711 	/*
712 	 * Keep receiving until we get a valid transaction id
713 	 */
714 	xdrs->x_op = XDR_DECODE;
715 	for (;;) {
716 		reply_msg.acpted_rply.ar_verf = _null_auth;
717 		reply_msg.acpted_rply.ar_results.where = NULL;
718 		reply_msg.acpted_rply.ar_results.proc = (xdrproc_t)xdr_void;
719 		if (!xdrrec_skiprecord(xdrs)) {
720 			rpc_fd_unlock(vctbl, ct->ct_fd);
721 			return (rpc_callerr.re_status);
722 		}
723 		/* now decode and validate the response header */
724 		if (!xdr_replymsg(xdrs, &reply_msg)) {
725 			if (rpc_callerr.re_status == RPC_SUCCESS)
726 				continue;
727 			rpc_fd_unlock(vctbl, ct->ct_fd);
728 			return (rpc_callerr.re_status);
729 		}
730 		if (reply_msg.rm_xid == x_id)
731 			break;
732 	}
733 
734 	/*
735 	 * process header
736 	 */
737 	if ((reply_msg.rm_reply.rp_stat == MSG_ACCEPTED) &&
738 	    (reply_msg.acpted_rply.ar_stat == SUCCESS))
739 		rpc_callerr.re_status = RPC_SUCCESS;
740 	else
741 		__seterr_reply(&reply_msg, &(rpc_callerr));
742 
743 	if (rpc_callerr.re_status == RPC_SUCCESS) {
744 		if (!AUTH_VALIDATE(cl->cl_auth,
745 		    &reply_msg.acpted_rply.ar_verf)) {
746 			rpc_callerr.re_status = RPC_AUTHERROR;
747 			rpc_callerr.re_why = AUTH_INVALIDRESP;
748 		} else if (cl->cl_auth->ah_cred.oa_flavor != RPCSEC_GSS) {
749 			if (!(*xdr_results)(xdrs, results_ptr)) {
750 				if (rpc_callerr.re_status == RPC_SUCCESS)
751 					rpc_callerr.re_status =
752 					    RPC_CANTDECODERES;
753 			}
754 		} else if (!__rpc_gss_unwrap(cl->cl_auth, xdrs, xdr_results,
755 		    results_ptr)) {
756 			if (rpc_callerr.re_status == RPC_SUCCESS)
757 				rpc_callerr.re_status = RPC_CANTDECODERES;
758 		}
759 	}	/* end successful completion */
760 	/*
761 	 * If unsuccesful AND error is an authentication error
762 	 * then refresh credentials and try again, else break
763 	 */
764 	else if (rpc_callerr.re_status == RPC_AUTHERROR) {
765 		/* maybe our credentials need to be refreshed ... */
766 		if (refreshes-- && AUTH_REFRESH(cl->cl_auth, &reply_msg))
767 			goto call_again;
768 		else
769 			/*
770 			 * We are setting rpc_callerr here given that libnsl
771 			 * is not reentrant thereby reinitializing the TSD.
772 			 * If not set here then success could be returned even
773 			 * though refresh failed.
774 			 */
775 			rpc_callerr.re_status = RPC_AUTHERROR;
776 	} /* end of unsuccessful completion */
777 	/* free verifier ... */
778 	if (reply_msg.rm_reply.rp_stat == MSG_ACCEPTED &&
779 	    reply_msg.acpted_rply.ar_verf.oa_base != NULL) {
780 		xdrs->x_op = XDR_FREE;
781 		(void) xdr_opaque_auth(xdrs, &(reply_msg.acpted_rply.ar_verf));
782 	}
783 	rpc_fd_unlock(vctbl, ct->ct_fd);
784 	return (rpc_callerr.re_status);
785 }
786 
787 static enum clnt_stat
788 clnt_vc_send(CLIENT *cl, rpcproc_t proc, xdrproc_t xdr_args, caddr_t args_ptr)
789 {
790 /* LINTED pointer alignment */
791 	struct ct_data *ct = (struct ct_data *)cl->cl_private;
792 	XDR *xdrs = &(ct->ct_xdrs);
793 	uint32_t x_id;
794 /* LINTED pointer alignment */
795 	uint32_t *msg_x_id = (uint32_t *)(ct->ct_mcall);	/* yuk */
796 
797 	if (rpc_fd_lock(vctbl, ct->ct_fd)) {
798 		rpc_callerr.re_status = RPC_FAILED;
799 		rpc_callerr.re_errno = errno;
800 		rpc_fd_unlock(vctbl, ct->ct_fd);
801 		return (RPC_FAILED);
802 	}
803 
804 	ct->ct_is_oneway = TRUE;
805 
806 	xdrs->x_op = XDR_ENCODE;
807 	rpc_callerr.re_status = RPC_SUCCESS;
808 	/*
809 	 * Due to little endian byte order, it is necessary to convert to host
810 	 * format before decrementing xid.
811 	 */
812 	x_id = ntohl(*msg_x_id) - 1;
813 	*msg_x_id = htonl(x_id);
814 
815 	if (cl->cl_auth->ah_cred.oa_flavor != RPCSEC_GSS) {
816 		if ((!XDR_PUTBYTES(xdrs, ct->ct_mcall, ct->ct_mpos)) ||
817 		    (!XDR_PUTINT32(xdrs, (int32_t *)&proc)) ||
818 		    (!AUTH_MARSHALL(cl->cl_auth, xdrs)) ||
819 		    (!xdr_args(xdrs, args_ptr))) {
820 			if (rpc_callerr.re_status == RPC_SUCCESS)
821 				rpc_callerr.re_status = RPC_CANTENCODEARGS;
822 			(void) xdrrec_endofrecord(xdrs, TRUE);
823 			rpc_fd_unlock(vctbl, ct->ct_fd);
824 			return (rpc_callerr.re_status);
825 		}
826 	} else {
827 /* LINTED pointer alignment */
828 		uint32_t *u = (uint32_t *)&ct->ct_mcall[ct->ct_mpos];
829 		IXDR_PUT_U_INT32(u, proc);
830 		if (!__rpc_gss_wrap(cl->cl_auth, ct->ct_mcall,
831 		    ((char *)u) - ct->ct_mcall, xdrs, xdr_args, args_ptr)) {
832 			if (rpc_callerr.re_status == RPC_SUCCESS)
833 				rpc_callerr.re_status = RPC_CANTENCODEARGS;
834 			(void) xdrrec_endofrecord(xdrs, TRUE);
835 			rpc_fd_unlock(vctbl, ct->ct_fd);
836 			return (rpc_callerr.re_status);
837 		}
838 	}
839 
840 	/*
841 	 * Do not need to check errors, as the following code does
842 	 * not depend on the successful completion of the call.
843 	 * An error, if any occurs, is reported through
844 	 * rpc_callerr.re_status.
845 	 */
846 	(void) xdrrec_endofrecord(xdrs, TRUE);
847 
848 	rpc_fd_unlock(vctbl, ct->ct_fd);
849 	return (rpc_callerr.re_status);
850 }
851 
852 /* ARGSUSED */
853 static void
854 clnt_vc_geterr(CLIENT *cl, struct rpc_err *errp)
855 {
856 	*errp = rpc_callerr;
857 }
858 
859 static bool_t
860 clnt_vc_freeres(CLIENT *cl, xdrproc_t xdr_res, caddr_t res_ptr)
861 {
862 /* LINTED pointer alignment */
863 	struct ct_data *ct = (struct ct_data *)cl->cl_private;
864 	XDR *xdrs = &(ct->ct_xdrs);
865 	bool_t stat;
866 
867 	(void) rpc_fd_lock(vctbl, ct->ct_fd);
868 	xdrs->x_op = XDR_FREE;
869 	stat = (*xdr_res)(xdrs, res_ptr);
870 	rpc_fd_unlock(vctbl, ct->ct_fd);
871 	return (stat);
872 }
873 
874 static void
875 clnt_vc_abort(void)
876 {
877 }
878 
879 /*ARGSUSED*/
880 static bool_t
881 clnt_vc_control(CLIENT *cl, int request, char *info)
882 {
883 	bool_t ret;
884 /* LINTED pointer alignment */
885 	struct ct_data *ct = (struct ct_data *)cl->cl_private;
886 
887 	if (rpc_fd_lock(vctbl, ct->ct_fd)) {
888 		rpc_fd_unlock(vctbl, ct->ct_fd);
889 		return (FALSE);
890 	}
891 
892 	switch (request) {
893 	case CLSET_FD_CLOSE:
894 		ct->ct_closeit = TRUE;
895 		rpc_fd_unlock(vctbl, ct->ct_fd);
896 		return (TRUE);
897 	case CLSET_FD_NCLOSE:
898 		ct->ct_closeit = FALSE;
899 		rpc_fd_unlock(vctbl, ct->ct_fd);
900 		return (TRUE);
901 	case CLFLUSH:
902 		if (ct->ct_io_mode == RPC_CL_NONBLOCKING) {
903 			int res;
904 			res = do_flush(ct, (info == NULL ||
905 			    /* LINTED pointer cast */
906 			    *(int *)info == RPC_CL_DEFAULT_FLUSH)?
907 			    /* LINTED pointer cast */
908 			    ct->ct_blocking_mode: *(int *)info);
909 			ret = (0 == res);
910 		}
911 		rpc_fd_unlock(vctbl, ct->ct_fd);
912 		return (ret);
913 	}
914 
915 	/* for other requests which use info */
916 	if (info == NULL) {
917 		rpc_fd_unlock(vctbl, ct->ct_fd);
918 		return (FALSE);
919 	}
920 	switch (request) {
921 	case CLSET_TIMEOUT:
922 /* LINTED pointer alignment */
923 		if (time_not_ok((struct timeval *)info)) {
924 			rpc_fd_unlock(vctbl, ct->ct_fd);
925 			return (FALSE);
926 		}
927 /* LINTED pointer alignment */
928 		ct->ct_wait = __rpc_timeval_to_msec((struct timeval *)info);
929 		ct->ct_waitset = TRUE;
930 		break;
931 	case CLGET_TIMEOUT:
932 /* LINTED pointer alignment */
933 		((struct timeval *)info)->tv_sec = ct->ct_wait / 1000;
934 /* LINTED pointer alignment */
935 		((struct timeval *)info)->tv_usec = (ct->ct_wait % 1000) * 1000;
936 		break;
937 	case CLGET_SERVER_ADDR:	/* For compatibility only */
938 		(void) memcpy(info, ct->ct_addr.buf, (size_t)ct->ct_addr.len);
939 		break;
940 	case CLGET_FD:
941 /* LINTED pointer alignment */
942 		*(int *)info = ct->ct_fd;
943 		break;
944 	case CLGET_SVC_ADDR:
945 		/* The caller should not free this memory area */
946 /* LINTED pointer alignment */
947 		*(struct netbuf *)info = ct->ct_addr;
948 		break;
949 	case CLSET_SVC_ADDR:		/* set to new address */
950 #ifdef undef
951 		/*
952 		 * XXX: once the t_snddis(), followed by t_connect() starts to
953 		 * work, this ifdef should be removed.  CLIENT handle reuse
954 		 * would then be possible for COTS as well.
955 		 */
956 		if (t_snddis(ct->ct_fd, NULL) == -1) {
957 			rpc_createerr.cf_stat = RPC_TLIERROR;
958 			rpc_createerr.cf_error.re_terrno = t_errno;
959 			rpc_createerr.cf_error.re_errno = errno;
960 			rpc_fd_unlock(vctbl, ct->ct_fd);
961 			return (FALSE);
962 		}
963 		ret = set_up_connection(ct->ct_fd, (struct netbuf *)info,
964 		    ct, NULL);
965 		rpc_fd_unlock(vctbl, ct->ct_fd);
966 		return (ret);
967 #else
968 		rpc_fd_unlock(vctbl, ct->ct_fd);
969 		return (FALSE);
970 #endif
971 	case CLGET_XID:
972 		/*
973 		 * use the knowledge that xid is the
974 		 * first element in the call structure
975 		 * This will get the xid of the PREVIOUS call
976 		 */
977 /* LINTED pointer alignment */
978 		*(uint32_t *)info = ntohl(*(uint32_t *)ct->ct_mcall);
979 		break;
980 	case CLSET_XID:
981 		/* This will set the xid of the NEXT call */
982 /* LINTED pointer alignment */
983 		*(uint32_t *)ct->ct_mcall =  htonl(*(uint32_t *)info + 1);
984 		/* increment by 1 as clnt_vc_call() decrements once */
985 		break;
986 	case CLGET_VERS:
987 		/*
988 		 * This RELIES on the information that, in the call body,
989 		 * the version number field is the fifth field from the
990 		 * begining of the RPC header. MUST be changed if the
991 		 * call_struct is changed
992 		 */
993 /* LINTED pointer alignment */
994 		*(uint32_t *)info = ntohl(*(uint32_t *)(ct->ct_mcall +
995 		    4 * BYTES_PER_XDR_UNIT));
996 		break;
997 
998 	case CLSET_VERS:
999 /* LINTED pointer alignment */
1000 		*(uint32_t *)(ct->ct_mcall + 4 * BYTES_PER_XDR_UNIT) =
1001 /* LINTED pointer alignment */
1002 		    htonl(*(uint32_t *)info);
1003 		break;
1004 
1005 	case CLGET_PROG:
1006 		/*
1007 		 * This RELIES on the information that, in the call body,
1008 		 * the program number field is the fourth field from the
1009 		 * begining of the RPC header. MUST be changed if the
1010 		 * call_struct is changed
1011 		 */
1012 /* LINTED pointer alignment */
1013 		*(uint32_t *)info = ntohl(*(uint32_t *)(ct->ct_mcall +
1014 		    3 * BYTES_PER_XDR_UNIT));
1015 		break;
1016 
1017 	case CLSET_PROG:
1018 /* LINTED pointer alignment */
1019 		*(uint32_t *)(ct->ct_mcall + 3 * BYTES_PER_XDR_UNIT) =
1020 /* LINTED pointer alignment */
1021 		    htonl(*(uint32_t *)info);
1022 		break;
1023 
1024 	case CLSET_IO_MODE:
1025 		/* LINTED pointer cast */
1026 		if (!set_io_mode(ct, *(int *)info)) {
1027 			rpc_fd_unlock(vctbl, ct->ct_fd);
1028 			return (FALSE);
1029 		}
1030 		break;
1031 	case CLSET_FLUSH_MODE:
1032 		/* Set a specific FLUSH_MODE */
1033 		/* LINTED pointer cast */
1034 		if (!set_flush_mode(ct, *(int *)info)) {
1035 			rpc_fd_unlock(vctbl, ct->ct_fd);
1036 			return (FALSE);
1037 		}
1038 		break;
1039 	case CLGET_FLUSH_MODE:
1040 		/* LINTED pointer cast */
1041 		*(rpcflushmode_t *)info = ct->ct_blocking_mode;
1042 		break;
1043 
1044 	case CLGET_IO_MODE:
1045 		/* LINTED pointer cast */
1046 		*(rpciomode_t *)info = ct->ct_io_mode;
1047 		break;
1048 
1049 	case CLGET_CURRENT_REC_SIZE:
1050 		/*
1051 		 * Returns the current amount of memory allocated
1052 		 * to pending requests
1053 		 */
1054 		/* LINTED pointer cast */
1055 		*(int *)info = ct->ct_bufferPendingSize;
1056 		break;
1057 
1058 	case CLSET_CONNMAXREC_SIZE:
1059 		/* Cannot resize the buffer if it is used. */
1060 		if (ct->ct_bufferPendingSize != 0) {
1061 			rpc_fd_unlock(vctbl, ct->ct_fd);
1062 			return (FALSE);
1063 		}
1064 		/*
1065 		 * If the new size is equal to the current size,
1066 		 * there is nothing to do.
1067 		 */
1068 		/* LINTED pointer cast */
1069 		if (ct->ct_bufferSize == *(uint_t *)info)
1070 			break;
1071 
1072 		/* LINTED pointer cast */
1073 		ct->ct_bufferSize = *(uint_t *)info;
1074 		if (ct->ct_buffer) {
1075 			free(ct->ct_buffer);
1076 			ct->ct_buffer = NULL;
1077 			ct->ct_bufferReadPtr = ct->ct_bufferWritePtr = NULL;
1078 		}
1079 		break;
1080 
1081 	case CLGET_CONNMAXREC_SIZE:
1082 		/*
1083 		 * Returns the size of buffer allocated
1084 		 * to pending requests
1085 		 */
1086 		/* LINTED pointer cast */
1087 		*(uint_t *)info = ct->ct_bufferSize;
1088 		break;
1089 
1090 	default:
1091 		rpc_fd_unlock(vctbl, ct->ct_fd);
1092 		return (FALSE);
1093 	}
1094 	rpc_fd_unlock(vctbl, ct->ct_fd);
1095 	return (TRUE);
1096 }
1097 
1098 static void
1099 clnt_vc_destroy(CLIENT *cl)
1100 {
1101 /* LINTED pointer alignment */
1102 	struct ct_data *ct = (struct ct_data *)cl->cl_private;
1103 	int ct_fd = ct->ct_fd;
1104 
1105 	(void) rpc_fd_lock(vctbl, ct_fd);
1106 
1107 	if (ct->ct_io_mode == RPC_CL_NONBLOCKING) {
1108 		(void) do_flush(ct, RPC_CL_BLOCKING_FLUSH);
1109 		(void) unregister_nb(ct);
1110 	}
1111 
1112 	if (ct->ct_closeit)
1113 		(void) t_close(ct_fd);
1114 	XDR_DESTROY(&(ct->ct_xdrs));
1115 	if (ct->ct_addr.buf)
1116 		free(ct->ct_addr.buf);
1117 	free(ct);
1118 	if (cl->cl_netid && cl->cl_netid[0])
1119 		free(cl->cl_netid);
1120 	if (cl->cl_tp && cl->cl_tp[0])
1121 		free(cl->cl_tp);
1122 	free(cl);
1123 	rpc_fd_unlock(vctbl, ct_fd);
1124 }
1125 
1126 /*
1127  * Interface between xdr serializer and vc connection.
1128  * Behaves like the system calls, read & write, but keeps some error state
1129  * around for the rpc level.
1130  */
1131 static int
1132 read_vc(void *ct_tmp, caddr_t buf, int len)
1133 {
1134 	static pthread_key_t pfdp_key = PTHREAD_ONCE_KEY_NP;
1135 	struct pollfd *pfdp;
1136 	int npfd;		/* total number of pfdp allocated */
1137 	struct ct_data *ct = ct_tmp;
1138 	struct timeval starttime;
1139 	struct timeval curtime;
1140 	int poll_time;
1141 	int delta;
1142 
1143 	if (len == 0)
1144 		return (0);
1145 
1146 	/*
1147 	 * Allocate just one the first time.  thr_get_storage() may
1148 	 * return a larger buffer, left over from the last time we were
1149 	 * here, but that's OK.  realloc() will deal with it properly.
1150 	 */
1151 	npfd = 1;
1152 	pfdp = thr_get_storage(&pfdp_key, sizeof (struct pollfd), free);
1153 	if (pfdp == NULL) {
1154 		(void) syslog(LOG_ERR, clnt_vc_errstr,
1155 		    clnt_read_vc_str, __no_mem_str);
1156 		rpc_callerr.re_status = RPC_SYSTEMERROR;
1157 		rpc_callerr.re_errno = errno;
1158 		rpc_callerr.re_terrno = 0;
1159 		return (-1);
1160 	}
1161 
1162 	/*
1163 	 *	N.B.:  slot 0 in the pollfd array is reserved for the file
1164 	 *	descriptor we're really interested in (as opposed to the
1165 	 *	callback descriptors).
1166 	 */
1167 	pfdp[0].fd = ct->ct_fd;
1168 	pfdp[0].events = MASKVAL;
1169 	pfdp[0].revents = 0;
1170 	poll_time = ct->ct_wait;
1171 	if (gettimeofday(&starttime, NULL) == -1) {
1172 		syslog(LOG_ERR, "Unable to get time of day: %m");
1173 		return (-1);
1174 	}
1175 
1176 	for (;;) {
1177 		extern void (*_svc_getreqset_proc)();
1178 		extern pollfd_t *svc_pollfd;
1179 		extern int svc_max_pollfd;
1180 		int fds;
1181 
1182 		/* VARIABLES PROTECTED BY svc_fd_lock: svc_pollfd */
1183 
1184 		if (_svc_getreqset_proc) {
1185 			sig_rw_rdlock(&svc_fd_lock);
1186 
1187 			/* reallocate pfdp to svc_max_pollfd +1 */
1188 			if (npfd != (svc_max_pollfd + 1)) {
1189 				struct pollfd *tmp_pfdp = realloc(pfdp,
1190 				    sizeof (struct pollfd) *
1191 				    (svc_max_pollfd + 1));
1192 				if (tmp_pfdp == NULL) {
1193 					sig_rw_unlock(&svc_fd_lock);
1194 					(void) syslog(LOG_ERR, clnt_vc_errstr,
1195 					    clnt_read_vc_str, __no_mem_str);
1196 					rpc_callerr.re_status = RPC_SYSTEMERROR;
1197 					rpc_callerr.re_errno = errno;
1198 					rpc_callerr.re_terrno = 0;
1199 					return (-1);
1200 				}
1201 
1202 				pfdp = tmp_pfdp;
1203 				npfd = svc_max_pollfd + 1;
1204 				(void) pthread_setspecific(pfdp_key, pfdp);
1205 			}
1206 			if (npfd > 1)
1207 				(void) memcpy(&pfdp[1], svc_pollfd,
1208 				    sizeof (struct pollfd) * (npfd - 1));
1209 
1210 			sig_rw_unlock(&svc_fd_lock);
1211 		} else {
1212 			npfd = 1;	/* don't forget about pfdp[0] */
1213 		}
1214 
1215 		switch (fds = poll(pfdp, npfd, poll_time)) {
1216 		case 0:
1217 			rpc_callerr.re_status = RPC_TIMEDOUT;
1218 			return (-1);
1219 
1220 		case -1:
1221 			if (errno != EINTR)
1222 				continue;
1223 			else {
1224 				/*
1225 				 * interrupted by another signal,
1226 				 * update time_waited
1227 				 */
1228 
1229 				if (gettimeofday(&curtime, NULL) == -1) {
1230 					syslog(LOG_ERR,
1231 					    "Unable to get time of day:  %m");
1232 					errno = 0;
1233 					continue;
1234 				};
1235 				delta = (curtime.tv_sec -
1236 				    starttime.tv_sec) * 1000 +
1237 				    (curtime.tv_usec -
1238 				    starttime.tv_usec) / 1000;
1239 				poll_time -= delta;
1240 				if (poll_time < 0) {
1241 					rpc_callerr.re_status = RPC_TIMEDOUT;
1242 					errno = 0;
1243 					return (-1);
1244 				} else {
1245 					errno = 0; /* reset it */
1246 					continue;
1247 				}
1248 			}
1249 		}
1250 
1251 		if (pfdp[0].revents == 0) {
1252 			/* must be for server side of the house */
1253 			(*_svc_getreqset_proc)(&pfdp[1], fds);
1254 			continue;	/* do poll again */
1255 		}
1256 
1257 		if (pfdp[0].revents & POLLNVAL) {
1258 			rpc_callerr.re_status = RPC_CANTRECV;
1259 			/*
1260 			 *	Note:  we're faking errno here because we
1261 			 *	previously would have expected select() to
1262 			 *	return -1 with errno EBADF.  Poll(BA_OS)
1263 			 *	returns 0 and sets the POLLNVAL revents flag
1264 			 *	instead.
1265 			 */
1266 			rpc_callerr.re_errno = errno = EBADF;
1267 			return (-1);
1268 		}
1269 
1270 		if (pfdp[0].revents & (POLLERR | POLLHUP)) {
1271 			rpc_callerr.re_status = RPC_CANTRECV;
1272 			rpc_callerr.re_errno = errno = EPIPE;
1273 			return (-1);
1274 		}
1275 		break;
1276 	}
1277 
1278 	switch (len = t_rcvall(ct->ct_fd, buf, len)) {
1279 	case 0:
1280 		/* premature eof */
1281 		rpc_callerr.re_errno = ENOLINK;
1282 		rpc_callerr.re_terrno = 0;
1283 		rpc_callerr.re_status = RPC_CANTRECV;
1284 		len = -1;	/* it's really an error */
1285 		break;
1286 
1287 	case -1:
1288 		rpc_callerr.re_terrno = t_errno;
1289 		rpc_callerr.re_errno = 0;
1290 		rpc_callerr.re_status = RPC_CANTRECV;
1291 		break;
1292 	}
1293 	return (len);
1294 }
1295 
1296 static int
1297 write_vc(void *ct_tmp, caddr_t buf, int len)
1298 {
1299 	int i, cnt;
1300 	struct ct_data *ct = ct_tmp;
1301 	int flag;
1302 	int maxsz;
1303 
1304 	maxsz = ct->ct_tsdu;
1305 
1306 	/* Handle the non-blocking mode */
1307 	if (ct->ct_is_oneway && ct->ct_io_mode == RPC_CL_NONBLOCKING) {
1308 		/*
1309 		 * Test a special case here. If the length of the current
1310 		 * write is greater than the transport data unit, and the
1311 		 * mode is non blocking, we return RPC_CANTSEND.
1312 		 * XXX  this is not very clean.
1313 		 */
1314 		if (maxsz > 0 && len > maxsz) {
1315 			rpc_callerr.re_terrno = errno;
1316 			rpc_callerr.re_errno = 0;
1317 			rpc_callerr.re_status = RPC_CANTSEND;
1318 			return (-1);
1319 		}
1320 
1321 		len = nb_send(ct, buf, (unsigned)len);
1322 		if (len == -1) {
1323 			rpc_callerr.re_terrno = errno;
1324 			rpc_callerr.re_errno = 0;
1325 			rpc_callerr.re_status = RPC_CANTSEND;
1326 		} else if (len == -2) {
1327 			rpc_callerr.re_terrno = 0;
1328 			rpc_callerr.re_errno = 0;
1329 			rpc_callerr.re_status = RPC_CANTSTORE;
1330 		}
1331 		return (len);
1332 	}
1333 
1334 	if ((maxsz == 0) || (maxsz == -1)) {
1335 		/*
1336 		 * T_snd may return -1 for error on connection (connection
1337 		 * needs to be repaired/closed, and -2 for flow-control
1338 		 * handling error (no operation to do, just wait and call
1339 		 * T_Flush()).
1340 		 */
1341 		if ((len = t_snd(ct->ct_fd, buf, (unsigned)len, 0)) == -1) {
1342 			rpc_callerr.re_terrno = t_errno;
1343 			rpc_callerr.re_errno = 0;
1344 			rpc_callerr.re_status = RPC_CANTSEND;
1345 		}
1346 		return (len);
1347 	}
1348 
1349 	/*
1350 	 * This for those transports which have a max size for data.
1351 	 */
1352 	for (cnt = len, i = 0; cnt > 0; cnt -= i, buf += i) {
1353 		flag = cnt > maxsz ? T_MORE : 0;
1354 		if ((i = t_snd(ct->ct_fd, buf, (unsigned)MIN(cnt, maxsz),
1355 		    flag)) == -1) {
1356 			rpc_callerr.re_terrno = t_errno;
1357 			rpc_callerr.re_errno = 0;
1358 			rpc_callerr.re_status = RPC_CANTSEND;
1359 			return (-1);
1360 		}
1361 	}
1362 	return (len);
1363 }
1364 
1365 /*
1366  * Receive the required bytes of data, even if it is fragmented.
1367  */
1368 static int
1369 t_rcvall(int fd, char *buf, int len)
1370 {
1371 	int moreflag;
1372 	int final = 0;
1373 	int res;
1374 
1375 	do {
1376 		moreflag = 0;
1377 		res = t_rcv(fd, buf, (unsigned)len, &moreflag);
1378 		if (res == -1) {
1379 			if (t_errno == TLOOK)
1380 				switch (t_look(fd)) {
1381 				case T_DISCONNECT:
1382 					(void) t_rcvdis(fd, NULL);
1383 					(void) t_snddis(fd, NULL);
1384 					return (-1);
1385 				case T_ORDREL:
1386 				/* Received orderly release indication */
1387 					(void) t_rcvrel(fd);
1388 				/* Send orderly release indicator */
1389 					(void) t_sndrel(fd);
1390 					return (-1);
1391 				default:
1392 					return (-1);
1393 				}
1394 		} else if (res == 0) {
1395 			return (0);
1396 		}
1397 		final += res;
1398 		buf += res;
1399 		len -= res;
1400 	} while ((len > 0) && (moreflag & T_MORE));
1401 	return (final);
1402 }
1403 
1404 static struct clnt_ops *
1405 clnt_vc_ops(void)
1406 {
1407 	static struct clnt_ops ops;
1408 	extern mutex_t	ops_lock;
1409 
1410 	/* VARIABLES PROTECTED BY ops_lock: ops */
1411 
1412 	sig_mutex_lock(&ops_lock);
1413 	if (ops.cl_call == NULL) {
1414 		ops.cl_call = clnt_vc_call;
1415 		ops.cl_send = clnt_vc_send;
1416 		ops.cl_abort = clnt_vc_abort;
1417 		ops.cl_geterr = clnt_vc_geterr;
1418 		ops.cl_freeres = clnt_vc_freeres;
1419 		ops.cl_destroy = clnt_vc_destroy;
1420 		ops.cl_control = clnt_vc_control;
1421 	}
1422 	sig_mutex_unlock(&ops_lock);
1423 	return (&ops);
1424 }
1425 
1426 /*
1427  * Make sure that the time is not garbage.   -1 value is disallowed.
1428  * Note this is different from time_not_ok in clnt_dg.c
1429  */
1430 static bool_t
1431 time_not_ok(struct timeval *t)
1432 {
1433 	return (t->tv_sec <= -1 || t->tv_sec > 100000000 ||
1434 	    t->tv_usec <= -1 || t->tv_usec > 1000000);
1435 }
1436 
1437 
1438 /* Compute the # of bytes that remains until the end of the buffer */
1439 #define	REMAIN_BYTES(p) (ct->ct_bufferSize-(ct->ct_##p - ct->ct_buffer))
1440 
1441 static int
1442 addInBuffer(struct ct_data *ct, char *dataToAdd, unsigned int nBytes)
1443 {
1444 	if (NULL == ct->ct_buffer) {
1445 		/* Buffer not allocated yet. */
1446 		char *buffer;
1447 
1448 		buffer = malloc(ct->ct_bufferSize);
1449 		if (NULL == buffer) {
1450 			errno = ENOMEM;
1451 			return (-1);
1452 		}
1453 		(void) memcpy(buffer, dataToAdd, nBytes);
1454 
1455 		ct->ct_buffer = buffer;
1456 		ct->ct_bufferReadPtr = buffer;
1457 		ct->ct_bufferWritePtr = buffer + nBytes;
1458 		ct->ct_bufferPendingSize = nBytes;
1459 	} else {
1460 		/*
1461 		 * For an already allocated buffer, two mem copies
1462 		 * might be needed, depending on the current
1463 		 * writing position.
1464 		 */
1465 
1466 		/* Compute the length of the first copy. */
1467 		int len = MIN(nBytes, REMAIN_BYTES(bufferWritePtr));
1468 
1469 		ct->ct_bufferPendingSize += nBytes;
1470 
1471 		(void) memcpy(ct->ct_bufferWritePtr, dataToAdd, len);
1472 		ct->ct_bufferWritePtr += len;
1473 		nBytes -= len;
1474 		if (0 == nBytes) {
1475 			/* One memcopy needed. */
1476 
1477 			/*
1478 			 * If the write pointer is at the end of the buffer,
1479 			 * wrap it now.
1480 			 */
1481 			if (ct->ct_bufferWritePtr ==
1482 			    (ct->ct_buffer + ct->ct_bufferSize)) {
1483 				ct->ct_bufferWritePtr = ct->ct_buffer;
1484 			}
1485 		} else {
1486 			/* Two memcopy needed. */
1487 			dataToAdd += len;
1488 
1489 			/*
1490 			 * Copy the remaining data to the beginning of the
1491 			 * buffer
1492 			 */
1493 			(void) memcpy(ct->ct_buffer, dataToAdd, nBytes);
1494 			ct->ct_bufferWritePtr = ct->ct_buffer + nBytes;
1495 		}
1496 	}
1497 	return (0);
1498 }
1499 
1500 static void
1501 consumeFromBuffer(struct ct_data *ct, unsigned int nBytes)
1502 {
1503 	ct->ct_bufferPendingSize -= nBytes;
1504 	if (ct->ct_bufferPendingSize == 0) {
1505 		/*
1506 		 * If the buffer contains no data, we set the two pointers at
1507 		 * the beginning of the buffer (to miminize buffer wraps).
1508 		 */
1509 		ct->ct_bufferReadPtr = ct->ct_bufferWritePtr = ct->ct_buffer;
1510 	} else {
1511 		ct->ct_bufferReadPtr += nBytes;
1512 		if (ct->ct_bufferReadPtr >
1513 		    ct->ct_buffer + ct->ct_bufferSize) {
1514 			ct->ct_bufferReadPtr -= ct->ct_bufferSize;
1515 		}
1516 	}
1517 }
1518 
1519 static int
1520 iovFromBuffer(struct ct_data *ct, struct iovec *iov)
1521 {
1522 	int l;
1523 
1524 	if (ct->ct_bufferPendingSize == 0)
1525 		return (0);
1526 
1527 	l = REMAIN_BYTES(bufferReadPtr);
1528 	if (l < ct->ct_bufferPendingSize) {
1529 		/* Buffer in two fragments. */
1530 		iov[0].iov_base = ct->ct_bufferReadPtr;
1531 		iov[0].iov_len  = l;
1532 
1533 		iov[1].iov_base = ct->ct_buffer;
1534 		iov[1].iov_len  = ct->ct_bufferPendingSize - l;
1535 		return (2);
1536 	} else {
1537 		/* Buffer in one fragment. */
1538 		iov[0].iov_base = ct->ct_bufferReadPtr;
1539 		iov[0].iov_len  = ct->ct_bufferPendingSize;
1540 		return (1);
1541 	}
1542 }
1543 
1544 static bool_t
1545 set_flush_mode(struct ct_data *ct, int mode)
1546 {
1547 	switch (mode) {
1548 	case RPC_CL_BLOCKING_FLUSH:
1549 		/* flush as most as possible without blocking */
1550 	case RPC_CL_BESTEFFORT_FLUSH:
1551 		/* flush the buffer completely (possibly blocking) */
1552 	case RPC_CL_DEFAULT_FLUSH:
1553 		/* flush according to the currently defined policy */
1554 		ct->ct_blocking_mode = mode;
1555 		return (TRUE);
1556 	default:
1557 		return (FALSE);
1558 	}
1559 }
1560 
1561 static bool_t
1562 set_io_mode(struct ct_data *ct, int ioMode)
1563 {
1564 	switch (ioMode) {
1565 	case RPC_CL_BLOCKING:
1566 		if (ct->ct_io_mode == RPC_CL_NONBLOCKING) {
1567 			if (NULL != ct->ct_buffer) {
1568 				/*
1569 				 * If a buffer was allocated for this
1570 				 * connection, flush it now, and free it.
1571 				 */
1572 				(void) do_flush(ct, RPC_CL_BLOCKING_FLUSH);
1573 				free(ct->ct_buffer);
1574 				ct->ct_buffer = NULL;
1575 			}
1576 			(void) unregister_nb(ct);
1577 			ct->ct_io_mode = ioMode;
1578 		}
1579 		break;
1580 	case RPC_CL_NONBLOCKING:
1581 		if (ct->ct_io_mode == RPC_CL_BLOCKING) {
1582 			if (-1 == register_nb(ct)) {
1583 				return (FALSE);
1584 			}
1585 			ct->ct_io_mode = ioMode;
1586 		}
1587 		break;
1588 	default:
1589 		return (FALSE);
1590 	}
1591 	return (TRUE);
1592 }
1593 
1594 static int
1595 do_flush(struct ct_data *ct, uint_t flush_mode)
1596 {
1597 	int result;
1598 	if (ct->ct_bufferPendingSize == 0) {
1599 		return (0);
1600 	}
1601 
1602 	switch (flush_mode) {
1603 	case RPC_CL_BLOCKING_FLUSH:
1604 		if (!set_blocking_connection(ct, TRUE)) {
1605 			return (-1);
1606 		}
1607 		while (ct->ct_bufferPendingSize > 0) {
1608 			if (REMAIN_BYTES(bufferReadPtr) <
1609 			    ct->ct_bufferPendingSize) {
1610 				struct iovec iov[2];
1611 				(void) iovFromBuffer(ct, iov);
1612 				result = writev(ct->ct_fd, iov, 2);
1613 			} else {
1614 				result = t_snd(ct->ct_fd, ct->ct_bufferReadPtr,
1615 				    ct->ct_bufferPendingSize, 0);
1616 			}
1617 			if (result < 0) {
1618 				return (-1);
1619 			}
1620 			consumeFromBuffer(ct, result);
1621 		}
1622 
1623 		break;
1624 
1625 	case RPC_CL_BESTEFFORT_FLUSH:
1626 		(void) set_blocking_connection(ct, FALSE);
1627 		if (REMAIN_BYTES(bufferReadPtr) < ct->ct_bufferPendingSize) {
1628 			struct iovec iov[2];
1629 			(void) iovFromBuffer(ct, iov);
1630 			result = writev(ct->ct_fd, iov, 2);
1631 		} else {
1632 			result = t_snd(ct->ct_fd, ct->ct_bufferReadPtr,
1633 			    ct->ct_bufferPendingSize, 0);
1634 		}
1635 		if (result < 0) {
1636 			if (errno != EWOULDBLOCK) {
1637 				perror("flush");
1638 				return (-1);
1639 			}
1640 			return (0);
1641 		}
1642 		if (result > 0)
1643 			consumeFromBuffer(ct, result);
1644 		break;
1645 	}
1646 	return (0);
1647 }
1648 
1649 /*
1650  * Non blocking send.
1651  */
1652 
1653 static int
1654 nb_send(struct ct_data *ct, void *buff, unsigned int nBytes)
1655 {
1656 	int result;
1657 
1658 	if (!(ntohl(*(uint32_t *)buff) & 2^31)) {
1659 		return (-1);
1660 	}
1661 
1662 	/*
1663 	 * Check to see if the current message can be stored fully in the
1664 	 * buffer. We have to check this now because it may be impossible
1665 	 * to send any data, so the message must be stored in the buffer.
1666 	 */
1667 	if (nBytes > (ct->ct_bufferSize - ct->ct_bufferPendingSize)) {
1668 		/* Try to flush  (to free some space). */
1669 		(void) do_flush(ct, RPC_CL_BESTEFFORT_FLUSH);
1670 
1671 		/* Can we store the message now ? */
1672 		if (nBytes > (ct->ct_bufferSize - ct->ct_bufferPendingSize))
1673 			return (-2);
1674 	}
1675 
1676 	(void) set_blocking_connection(ct, FALSE);
1677 
1678 	/*
1679 	 * If there is no data pending, we can simply try
1680 	 * to send our data.
1681 	 */
1682 	if (ct->ct_bufferPendingSize == 0) {
1683 		result = t_snd(ct->ct_fd, buff, nBytes, 0);
1684 		if (result == -1) {
1685 			if (errno == EWOULDBLOCK) {
1686 				result = 0;
1687 			} else {
1688 				perror("send");
1689 				return (-1);
1690 			}
1691 		}
1692 		/*
1693 		 * If we have not sent all data, we must store them
1694 		 * in the buffer.
1695 		 */
1696 		if (result != nBytes) {
1697 			if (addInBuffer(ct, (char *)buff + result,
1698 			    nBytes - result) == -1) {
1699 				return (-1);
1700 			}
1701 		}
1702 	} else {
1703 		/*
1704 		 * Some data pending in the buffer.  We try to send
1705 		 * both buffer data and current message in one shot.
1706 		 */
1707 		struct iovec iov[3];
1708 		int i = iovFromBuffer(ct, &iov[0]);
1709 
1710 		iov[i].iov_base = buff;
1711 		iov[i].iov_len  = nBytes;
1712 
1713 		result = writev(ct->ct_fd, iov, i+1);
1714 		if (result == -1) {
1715 			if (errno == EWOULDBLOCK) {
1716 				/* No bytes sent */
1717 				result = 0;
1718 			} else {
1719 				return (-1);
1720 			}
1721 		}
1722 
1723 		/*
1724 		 * Add the bytes from the message
1725 		 * that we have not sent.
1726 		 */
1727 		if (result <= ct->ct_bufferPendingSize) {
1728 			/* No bytes from the message sent */
1729 			consumeFromBuffer(ct, result);
1730 			if (addInBuffer(ct, buff, nBytes) == -1) {
1731 				return (-1);
1732 			}
1733 		} else {
1734 			/*
1735 			 * Some bytes of the message are sent.
1736 			 * Compute the length of the message that has
1737 			 * been sent.
1738 			 */
1739 			int len = result - ct->ct_bufferPendingSize;
1740 
1741 			/* So, empty the buffer. */
1742 			ct->ct_bufferReadPtr = ct->ct_buffer;
1743 			ct->ct_bufferWritePtr = ct->ct_buffer;
1744 			ct->ct_bufferPendingSize = 0;
1745 
1746 			/* And add the remaining part of the message. */
1747 			if (len != nBytes) {
1748 				if (addInBuffer(ct, (char *)buff + len,
1749 				    nBytes-len) == -1) {
1750 					return (-1);
1751 				}
1752 			}
1753 		}
1754 	}
1755 	return (nBytes);
1756 }
1757 
1758 static void
1759 flush_registered_clients(void)
1760 {
1761 	struct nb_reg_node *node;
1762 
1763 	if (LIST_ISEMPTY(nb_first)) {
1764 		return;
1765 	}
1766 
1767 	LIST_FOR_EACH(nb_first, node) {
1768 		(void) do_flush(node->ct, RPC_CL_BLOCKING_FLUSH);
1769 	}
1770 }
1771 
1772 static int
1773 allocate_chunk(void)
1774 {
1775 #define	CHUNK_SIZE 16
1776 	struct nb_reg_node *chk =
1777 	    malloc(sizeof (struct nb_reg_node) * CHUNK_SIZE);
1778 	struct nb_reg_node *n;
1779 	int i;
1780 
1781 	if (NULL == chk) {
1782 		return (-1);
1783 	}
1784 
1785 	n = chk;
1786 	for (i = 0; i < CHUNK_SIZE-1; ++i) {
1787 		n[i].next = &(n[i+1]);
1788 	}
1789 	n[CHUNK_SIZE-1].next = (struct nb_reg_node *)&nb_free;
1790 	nb_free = chk;
1791 	return (0);
1792 }
1793 
1794 static int
1795 register_nb(struct ct_data *ct)
1796 {
1797 	struct nb_reg_node *node;
1798 
1799 	(void) mutex_lock(&nb_list_mutex);
1800 
1801 	if (LIST_ISEMPTY(nb_free) && (allocate_chunk() == -1)) {
1802 		(void) mutex_unlock(&nb_list_mutex);
1803 		errno = ENOMEM;
1804 		return (-1);
1805 	}
1806 
1807 	if (!exit_handler_set) {
1808 		(void) atexit(flush_registered_clients);
1809 		exit_handler_set = TRUE;
1810 	}
1811 	/* Get the first free node */
1812 	LIST_EXTRACT(nb_free, node);
1813 
1814 	node->ct = ct;
1815 
1816 	LIST_ADD(nb_first, node);
1817 	(void) mutex_unlock(&nb_list_mutex);
1818 
1819 	return (0);
1820 }
1821 
1822 static int
1823 unregister_nb(struct ct_data *ct)
1824 {
1825 	struct nb_reg_node *node;
1826 
1827 	(void) mutex_lock(&nb_list_mutex);
1828 	assert(!LIST_ISEMPTY(nb_first));
1829 
1830 	node = nb_first;
1831 	LIST_FOR_EACH(nb_first, node) {
1832 		if (node->next->ct == ct) {
1833 			/* Get the node to unregister. */
1834 			struct nb_reg_node *n = node->next;
1835 			node->next = n->next;
1836 
1837 			n->ct = NULL;
1838 			LIST_ADD(nb_free, n);
1839 			break;
1840 		}
1841 	}
1842 	(void) mutex_unlock(&nb_list_mutex);
1843 	return (0);
1844 }
1845