xref: /titanic_51/usr/src/lib/libnsl/rpc/clnt_vc.c (revision c029eafbb040b81649027c5ae5a38e92d214461b)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 /*
27  * Copyright 2014 Nexenta Systems, Inc.  All rights reserved.
28  */
29 
30 /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */
31 /* All Rights Reserved */
32 /*
33  * Portions of this source code were derived from Berkeley
34  * 4.3 BSD under license from the Regents of the University of
35  * California.
36  */
37 
38 /*
39  * clnt_vc.c
40  *
41  * Implements a connectionful client side RPC.
42  *
43  * Connectionful RPC supports 'batched calls'.
44  * A sequence of calls may be batched-up in a send buffer. The rpc call
45  * return immediately to the client even though the call was not necessarily
46  * sent. The batching occurs if the results' xdr routine is NULL (0) AND
47  * the rpc timeout value is zero (see clnt.h, rpc).
48  *
49  * Clients should NOT casually batch calls that in fact return results; that
50  * is the server side should be aware that a call is batched and not produce
51  * any return message. Batched calls that produce many result messages can
52  * deadlock (netlock) the client and the server....
53  */
54 
55 
56 #include "mt.h"
57 #include "rpc_mt.h"
58 #include <assert.h>
59 #include <rpc/rpc.h>
60 #include <errno.h>
61 #include <sys/byteorder.h>
62 #include <sys/mkdev.h>
63 #include <sys/poll.h>
64 #include <syslog.h>
65 #include <stdlib.h>
66 #include <unistd.h>
67 #include <netinet/tcp.h>
68 
69 #define	MCALL_MSG_SIZE 24
70 #define	SECS_TO_MS 1000
71 #define	USECS_TO_MS 1/1000
72 #ifndef MIN
73 #define	MIN(a, b)	(((a) < (b)) ? (a) : (b))
74 #endif
75 
76 extern int __rpc_timeval_to_msec(struct timeval *);
77 extern int __rpc_compress_pollfd(int, pollfd_t *, pollfd_t *);
78 extern bool_t xdr_opaque_auth(XDR *, struct opaque_auth *);
79 extern bool_t __rpc_gss_wrap(AUTH *, char *, uint_t, XDR *, bool_t (*)(),
80 								caddr_t);
81 extern bool_t __rpc_gss_unwrap(AUTH *, XDR *, bool_t (*)(), caddr_t);
82 extern CLIENT *_clnt_vc_create_timed(int, struct netbuf *, rpcprog_t,
83 		rpcvers_t, uint_t, uint_t, const struct timeval *);
84 
85 static struct clnt_ops	*clnt_vc_ops(void);
86 static int		read_vc(void *, caddr_t, int);
87 static int		write_vc(void *, caddr_t, int);
88 static int		t_rcvall(int, char *, int);
89 static bool_t		time_not_ok(struct timeval *);
90 
91 struct ct_data;
92 static bool_t		set_up_connection(int, struct netbuf *,
93 				struct ct_data *, const struct timeval *);
94 static bool_t		set_io_mode(struct ct_data *, int);
95 
96 /*
97  * Lock table handle used by various MT sync. routines
98  */
99 static mutex_t	vctbl_lock = DEFAULTMUTEX;
100 static void	*vctbl = NULL;
101 
102 static const char clnt_vc_errstr[] = "%s : %s";
103 static const char clnt_vc_str[] = "clnt_vc_create";
104 static const char clnt_read_vc_str[] = "read_vc";
105 static const char __no_mem_str[] = "out of memory";
106 static const char no_fcntl_getfl_str[] = "could not get status flags and modes";
107 static const char no_nonblock_str[] = "could not set transport blocking mode";
108 
109 /*
110  * Private data structure
111  */
112 struct ct_data {
113 	int		ct_fd;		/* connection's fd */
114 	bool_t		ct_closeit;	/* close it on destroy */
115 	int		ct_tsdu;	/* size of tsdu */
116 	int		ct_wait;	/* wait interval in milliseconds */
117 	bool_t		ct_waitset;	/* wait set by clnt_control? */
118 	struct netbuf	ct_addr;	/* remote addr */
119 	struct rpc_err	ct_error;
120 	char		ct_mcall[MCALL_MSG_SIZE]; /* marshalled callmsg */
121 	uint_t		ct_mpos;	/* pos after marshal */
122 	XDR		ct_xdrs;	/* XDR stream */
123 
124 	/* NON STANDARD INFO - 00-08-31 */
125 	bool_t		ct_is_oneway; /* True if the current call is oneway. */
126 	bool_t		ct_is_blocking;
127 	ushort_t	ct_io_mode;
128 	ushort_t	ct_blocking_mode;
129 	uint_t		ct_bufferSize; /* Total size of the buffer. */
130 	uint_t		ct_bufferPendingSize; /* Size of unsent data. */
131 	char 		*ct_buffer; /* Pointer to the buffer. */
132 	char 		*ct_bufferWritePtr; /* Ptr to the first free byte. */
133 	char 		*ct_bufferReadPtr; /* Ptr to the first byte of data. */
134 };
135 
136 struct nb_reg_node {
137 	struct nb_reg_node *next;
138 	struct ct_data *ct;
139 };
140 
141 static struct nb_reg_node *nb_first = (struct nb_reg_node *)&nb_first;
142 static struct nb_reg_node *nb_free  = (struct nb_reg_node *)&nb_free;
143 
144 static bool_t exit_handler_set = FALSE;
145 
146 static mutex_t nb_list_mutex = DEFAULTMUTEX;
147 
148 
149 /* Define some macros to manage the linked list. */
150 #define	LIST_ISEMPTY(l) (l == (struct nb_reg_node *)&l)
151 #define	LIST_CLR(l) (l = (struct nb_reg_node *)&l)
152 #define	LIST_ADD(l, node) (node->next = l->next, l = node)
153 #define	LIST_EXTRACT(l, node) (node = l, l = l->next)
154 #define	LIST_FOR_EACH(l, node) \
155 	for (node = l; node != (struct nb_reg_node *)&l; node = node->next)
156 
157 
158 /* Default size of the IO buffer used in non blocking mode */
159 #define	DEFAULT_PENDING_ZONE_MAX_SIZE (16*1024)
160 
161 static int nb_send(struct ct_data *, void *, unsigned int);
162 static int do_flush(struct ct_data *, uint_t);
163 static bool_t set_flush_mode(struct ct_data *, int);
164 static bool_t set_blocking_connection(struct ct_data *, bool_t);
165 
166 static int register_nb(struct ct_data *);
167 static int unregister_nb(struct ct_data *);
168 
169 
170 /*
171  * Change the mode of the underlying fd.
172  */
173 static bool_t
174 set_blocking_connection(struct ct_data *ct, bool_t blocking)
175 {
176 	int flag;
177 
178 	/*
179 	 * If the underlying fd is already in the required mode,
180 	 * avoid the syscall.
181 	 */
182 	if (ct->ct_is_blocking == blocking)
183 		return (TRUE);
184 
185 	if ((flag = fcntl(ct->ct_fd, F_GETFL, 0)) < 0) {
186 		(void) syslog(LOG_ERR, "set_blocking_connection : %s",
187 		    no_fcntl_getfl_str);
188 		return (FALSE);
189 	}
190 
191 	flag = blocking? flag&~O_NONBLOCK : flag|O_NONBLOCK;
192 	if (fcntl(ct->ct_fd, F_SETFL, flag) != 0) {
193 		(void) syslog(LOG_ERR, "set_blocking_connection : %s",
194 		    no_nonblock_str);
195 		return (FALSE);
196 	}
197 	ct->ct_is_blocking = blocking;
198 	return (TRUE);
199 }
200 
201 /*
202  * Create a client handle for a connection.
203  * Default options are set, which the user can change using clnt_control()'s.
204  * The rpc/vc package does buffering similar to stdio, so the client
205  * must pick send and receive buffer sizes, 0 => use the default.
206  * NB: fd is copied into a private area.
207  * NB: The rpch->cl_auth is set null authentication. Caller may wish to
208  * set this something more useful.
209  *
210  * fd should be open and bound.
211  */
212 CLIENT *
213 clnt_vc_create(const int fd, struct netbuf *svcaddr, const rpcprog_t prog,
214 	const rpcvers_t vers, const uint_t sendsz, const uint_t recvsz)
215 {
216 	return (_clnt_vc_create_timed(fd, svcaddr, prog, vers, sendsz,
217 	    recvsz, NULL));
218 }
219 
220 /*
221  * This has the same definition as clnt_vc_create(), except it
222  * takes an additional parameter - a pointer to a timeval structure.
223  *
224  * Not a public interface. This is for clnt_create_timed,
225  * clnt_create_vers_timed, clnt_tp_create_timed to pass down the timeout
226  * value to control a tcp connection attempt.
227  * (for bug 4049792: clnt_create_timed does not time out)
228  *
229  * If tp is NULL, use default timeout to set up the connection.
230  */
231 CLIENT *
232 _clnt_vc_create_timed(int fd, struct netbuf *svcaddr, rpcprog_t prog,
233 	rpcvers_t vers, uint_t sendsz, uint_t recvsz, const struct timeval *tp)
234 {
235 	CLIENT *cl;			/* client handle */
236 	struct ct_data *ct;		/* private data */
237 	struct timeval now;
238 	struct rpc_msg call_msg;
239 	struct t_info tinfo;
240 	int flag;
241 
242 	cl = malloc(sizeof (*cl));
243 	if ((ct = malloc(sizeof (*ct))) != NULL)
244 		ct->ct_addr.buf = NULL;
245 
246 	if ((cl == NULL) || (ct == NULL)) {
247 		(void) syslog(LOG_ERR, clnt_vc_errstr,
248 		    clnt_vc_str, __no_mem_str);
249 		rpc_createerr.cf_stat = RPC_SYSTEMERROR;
250 		rpc_createerr.cf_error.re_errno = errno;
251 		rpc_createerr.cf_error.re_terrno = 0;
252 		goto err;
253 	}
254 
255 	/*
256 	 * The only use of vctbl_lock is for serializing the creation of
257 	 * vctbl. Once created the lock needs to be released so we don't
258 	 * hold it across the set_up_connection() call and end up with a
259 	 * bunch of threads stuck waiting for the mutex.
260 	 */
261 	sig_mutex_lock(&vctbl_lock);
262 
263 	if ((vctbl == NULL) && ((vctbl = rpc_fd_init()) == NULL)) {
264 		rpc_createerr.cf_stat = RPC_SYSTEMERROR;
265 		rpc_createerr.cf_error.re_errno = errno;
266 		rpc_createerr.cf_error.re_terrno = 0;
267 		sig_mutex_unlock(&vctbl_lock);
268 		goto err;
269 	}
270 
271 	sig_mutex_unlock(&vctbl_lock);
272 
273 	ct->ct_io_mode = RPC_CL_BLOCKING;
274 	ct->ct_blocking_mode = RPC_CL_BLOCKING_FLUSH;
275 
276 	ct->ct_buffer = NULL;	/* We allocate the buffer when needed. */
277 	ct->ct_bufferSize = DEFAULT_PENDING_ZONE_MAX_SIZE;
278 	ct->ct_bufferPendingSize = 0;
279 	ct->ct_bufferWritePtr = NULL;
280 	ct->ct_bufferReadPtr = NULL;
281 
282 	/* Check the current state of the fd. */
283 	if ((flag = fcntl(fd, F_GETFL, 0)) < 0) {
284 		(void) syslog(LOG_ERR, "_clnt_vc_create_timed : %s",
285 		    no_fcntl_getfl_str);
286 		rpc_createerr.cf_stat = RPC_SYSTEMERROR;
287 		rpc_createerr.cf_error.re_terrno = errno;
288 		rpc_createerr.cf_error.re_errno = 0;
289 		goto err;
290 	}
291 	ct->ct_is_blocking = flag & O_NONBLOCK ? FALSE : TRUE;
292 
293 	if (set_up_connection(fd, svcaddr, ct, tp) == FALSE) {
294 		goto err;
295 	}
296 
297 	/*
298 	 * Set up other members of private data struct
299 	 */
300 	ct->ct_fd = fd;
301 	/*
302 	 * The actual value will be set by clnt_call or clnt_control
303 	 */
304 	ct->ct_wait = 30000;
305 	ct->ct_waitset = FALSE;
306 	/*
307 	 * By default, closeit is always FALSE. It is users responsibility
308 	 * to do a t_close on it, else the user may use clnt_control
309 	 * to let clnt_destroy do it for him/her.
310 	 */
311 	ct->ct_closeit = FALSE;
312 
313 	/*
314 	 * Initialize call message
315 	 */
316 	(void) gettimeofday(&now, (struct timezone *)0);
317 	call_msg.rm_xid = getpid() ^ now.tv_sec ^ now.tv_usec;
318 	call_msg.rm_call.cb_prog = prog;
319 	call_msg.rm_call.cb_vers = vers;
320 
321 	/*
322 	 * pre-serialize the static part of the call msg and stash it away
323 	 */
324 	xdrmem_create(&(ct->ct_xdrs), ct->ct_mcall, MCALL_MSG_SIZE, XDR_ENCODE);
325 	if (!xdr_callhdr(&(ct->ct_xdrs), &call_msg)) {
326 		goto err;
327 	}
328 	ct->ct_mpos = XDR_GETPOS(&(ct->ct_xdrs));
329 	XDR_DESTROY(&(ct->ct_xdrs));
330 
331 	if (t_getinfo(fd, &tinfo) == -1) {
332 		rpc_createerr.cf_stat = RPC_TLIERROR;
333 		rpc_createerr.cf_error.re_terrno = t_errno;
334 		rpc_createerr.cf_error.re_errno = 0;
335 		goto err;
336 	}
337 	/*
338 	 * Find the receive and the send size
339 	 */
340 	sendsz = __rpc_get_t_size((int)sendsz, tinfo.tsdu);
341 	recvsz = __rpc_get_t_size((int)recvsz, tinfo.tsdu);
342 	if ((sendsz == 0) || (recvsz == 0)) {
343 		rpc_createerr.cf_stat = RPC_TLIERROR;
344 		rpc_createerr.cf_error.re_terrno = 0;
345 		rpc_createerr.cf_error.re_errno = 0;
346 		goto err;
347 	}
348 	ct->ct_tsdu = tinfo.tsdu;
349 	/*
350 	 * Create a client handle which uses xdrrec for serialization
351 	 * and authnone for authentication.
352 	 */
353 	ct->ct_xdrs.x_ops = NULL;
354 	xdrrec_create(&(ct->ct_xdrs), sendsz, recvsz, (caddr_t)ct,
355 	    read_vc, write_vc);
356 	if (ct->ct_xdrs.x_ops == NULL) {
357 		rpc_createerr.cf_stat = RPC_SYSTEMERROR;
358 		rpc_createerr.cf_error.re_terrno = 0;
359 		rpc_createerr.cf_error.re_errno = ENOMEM;
360 		goto err;
361 	}
362 	cl->cl_ops = clnt_vc_ops();
363 	cl->cl_private = (caddr_t)ct;
364 	cl->cl_auth = authnone_create();
365 	cl->cl_tp = NULL;
366 	cl->cl_netid = NULL;
367 	return (cl);
368 
369 err:
370 	if (ct) {
371 		free(ct->ct_addr.buf);
372 		free(ct);
373 	}
374 	free(cl);
375 
376 	return (NULL);
377 }
378 
379 #define	TCPOPT_BUFSIZE 128
380 
381 /*
382  * Set tcp connection timeout value.
383  * Retun 0 for success, -1 for failure.
384  */
385 static int
386 _set_tcp_conntime(int fd, int optval)
387 {
388 	struct t_optmgmt req, res;
389 	struct opthdr *opt;
390 	int *ip;
391 	char buf[TCPOPT_BUFSIZE];
392 
393 	/* LINTED pointer cast */
394 	opt = (struct opthdr *)buf;
395 	opt->level =  IPPROTO_TCP;
396 	opt->name = TCP_CONN_ABORT_THRESHOLD;
397 	opt->len = sizeof (int);
398 
399 	req.flags = T_NEGOTIATE;
400 	req.opt.len = sizeof (struct opthdr) + opt->len;
401 	req.opt.buf = (char *)opt;
402 	/* LINTED pointer cast */
403 	ip = (int *)((char *)buf + sizeof (struct opthdr));
404 	*ip = optval;
405 
406 	res.flags = 0;
407 	res.opt.buf = (char *)buf;
408 	res.opt.maxlen = sizeof (buf);
409 	if (t_optmgmt(fd, &req, &res) < 0 || res.flags != T_SUCCESS) {
410 		return (-1);
411 	}
412 	return (0);
413 }
414 
415 /*
416  * Get current tcp connection timeout value.
417  * Retun 0 for success, -1 for failure.
418  */
419 static int
420 _get_tcp_conntime(int fd)
421 {
422 	struct t_optmgmt req, res;
423 	struct opthdr *opt;
424 	int *ip, retval;
425 	char buf[TCPOPT_BUFSIZE];
426 
427 	/* LINTED pointer cast */
428 	opt = (struct opthdr *)buf;
429 	opt->level =  IPPROTO_TCP;
430 	opt->name = TCP_CONN_ABORT_THRESHOLD;
431 	opt->len = sizeof (int);
432 
433 	req.flags = T_CURRENT;
434 	req.opt.len = sizeof (struct opthdr) + opt->len;
435 	req.opt.buf = (char *)opt;
436 	/* LINTED pointer cast */
437 	ip = (int *)((char *)buf + sizeof (struct opthdr));
438 	*ip = 0;
439 
440 	res.flags = 0;
441 	res.opt.buf = (char *)buf;
442 	res.opt.maxlen = sizeof (buf);
443 	if (t_optmgmt(fd, &req, &res) < 0 || res.flags != T_SUCCESS) {
444 		return (-1);
445 	}
446 
447 	/* LINTED pointer cast */
448 	ip = (int *)((char *)buf + sizeof (struct opthdr));
449 	retval = *ip;
450 	return (retval);
451 }
452 
453 static bool_t
454 set_up_connection(int fd, struct netbuf *svcaddr, struct ct_data *ct,
455 						const struct timeval *tp)
456 {
457 	int state;
458 	struct t_call sndcallstr, *rcvcall;
459 	int nconnect;
460 	bool_t connected, do_rcv_connect;
461 	int curr_time = 0;
462 
463 	ct->ct_addr.len = 0;
464 	state = t_getstate(fd);
465 	if (state == -1) {
466 		rpc_createerr.cf_stat = RPC_TLIERROR;
467 		rpc_createerr.cf_error.re_errno = 0;
468 		rpc_createerr.cf_error.re_terrno = t_errno;
469 		return (FALSE);
470 	}
471 
472 #ifdef DEBUG
473 	fprintf(stderr, "set_up_connection: state = %d\n", state);
474 #endif
475 	switch (state) {
476 	case T_IDLE:
477 		if (svcaddr == NULL) {
478 			rpc_createerr.cf_stat = RPC_UNKNOWNADDR;
479 			return (FALSE);
480 		}
481 		/*
482 		 * Connect only if state is IDLE and svcaddr known
483 		 */
484 /* LINTED pointer alignment */
485 		rcvcall = (struct t_call *)t_alloc(fd, T_CALL, T_OPT|T_ADDR);
486 		if (rcvcall == NULL) {
487 			rpc_createerr.cf_stat = RPC_TLIERROR;
488 			rpc_createerr.cf_error.re_terrno = t_errno;
489 			rpc_createerr.cf_error.re_errno = errno;
490 			return (FALSE);
491 		}
492 		rcvcall->udata.maxlen = 0;
493 		sndcallstr.addr = *svcaddr;
494 		sndcallstr.opt.len = 0;
495 		sndcallstr.udata.len = 0;
496 		/*
497 		 * Even NULL could have sufficed for rcvcall, because
498 		 * the address returned is same for all cases except
499 		 * for the gateway case, and hence required.
500 		 */
501 		connected = FALSE;
502 		do_rcv_connect = FALSE;
503 
504 		/*
505 		 * If there is a timeout value specified, we will try to
506 		 * reset the tcp connection timeout. If the transport does
507 		 * not support the TCP_CONN_ABORT_THRESHOLD option or fails
508 		 * for other reason, default timeout will be used.
509 		 */
510 		if (tp != NULL) {
511 			int ms;
512 
513 			/*
514 			 * TCP_CONN_ABORT_THRESHOLD takes int value in millisecs
515 			 */
516 			ms = tp->tv_sec * SECS_TO_MS +
517 			    tp->tv_usec * USECS_TO_MS;
518 			if (((curr_time = _get_tcp_conntime(fd)) != -1) &&
519 			    (_set_tcp_conntime(fd, ms) == 0)) {
520 				/* EMPTY */
521 #ifdef DEBUG
522 				fprintf(stderr, "set_up_connection: set tcp ");
523 				fprintf(stderr, "connection timeout to %d ms\n",
524 				    ms);
525 #endif
526 			}
527 		}
528 
529 		for (nconnect = 0; nconnect < 3; nconnect++) {
530 			if (t_connect(fd, &sndcallstr, rcvcall) != -1) {
531 				connected = TRUE;
532 				break;
533 			}
534 			if (t_errno == TLOOK) {
535 				switch (t_look(fd)) {
536 				case T_DISCONNECT:
537 					(void) t_rcvdis(fd, (struct
538 					    t_discon *) NULL);
539 					break;
540 				default:
541 					break;
542 				}
543 			} else if (!(t_errno == TSYSERR && errno == EINTR)) {
544 				break;
545 			}
546 			if ((state = t_getstate(fd)) == T_OUTCON) {
547 				do_rcv_connect = TRUE;
548 				break;
549 			}
550 			if (state != T_IDLE) {
551 				break;
552 			}
553 		}
554 		if (do_rcv_connect) {
555 			do {
556 				if (t_rcvconnect(fd, rcvcall) != -1) {
557 					connected = TRUE;
558 					break;
559 				}
560 			} while (t_errno == TSYSERR && errno == EINTR);
561 		}
562 
563 		/*
564 		 * Set the connection timeout back to its old value.
565 		 */
566 		if (curr_time) {
567 			(void) _set_tcp_conntime(fd, curr_time);
568 		}
569 
570 		if (!connected) {
571 			rpc_createerr.cf_stat = RPC_TLIERROR;
572 			rpc_createerr.cf_error.re_terrno = t_errno;
573 			rpc_createerr.cf_error.re_errno = errno;
574 			(void) t_free((char *)rcvcall, T_CALL);
575 #ifdef DEBUG
576 			fprintf(stderr, "clnt_vc: t_connect error %d\n",
577 			    rpc_createerr.cf_error.re_terrno);
578 #endif
579 			return (FALSE);
580 		}
581 
582 		/* Free old area if allocated */
583 		if (ct->ct_addr.buf)
584 			free(ct->ct_addr.buf);
585 		ct->ct_addr = rcvcall->addr;	/* To get the new address */
586 		/* So that address buf does not get freed */
587 		rcvcall->addr.buf = NULL;
588 		(void) t_free((char *)rcvcall, T_CALL);
589 		break;
590 	case T_DATAXFER:
591 	case T_OUTCON:
592 		if (svcaddr == NULL) {
593 			/*
594 			 * svcaddr could also be NULL in cases where the
595 			 * client is already bound and connected.
596 			 */
597 			ct->ct_addr.len = 0;
598 		} else {
599 			ct->ct_addr.buf = malloc(svcaddr->len);
600 			if (ct->ct_addr.buf == NULL) {
601 				(void) syslog(LOG_ERR, clnt_vc_errstr,
602 				    clnt_vc_str, __no_mem_str);
603 				rpc_createerr.cf_stat = RPC_SYSTEMERROR;
604 				rpc_createerr.cf_error.re_errno = errno;
605 				rpc_createerr.cf_error.re_terrno = 0;
606 				return (FALSE);
607 			}
608 			(void) memcpy(ct->ct_addr.buf, svcaddr->buf,
609 			    (size_t)svcaddr->len);
610 			ct->ct_addr.len = ct->ct_addr.maxlen = svcaddr->len;
611 		}
612 		break;
613 	default:
614 		rpc_createerr.cf_stat = RPC_UNKNOWNADDR;
615 		return (FALSE);
616 	}
617 	return (TRUE);
618 }
619 
620 static enum clnt_stat
621 clnt_vc_call(CLIENT *cl, rpcproc_t proc, xdrproc_t xdr_args, caddr_t args_ptr,
622 	xdrproc_t xdr_results, caddr_t results_ptr, struct timeval timeout)
623 {
624 /* LINTED pointer alignment */
625 	struct ct_data *ct = (struct ct_data *)cl->cl_private;
626 	XDR *xdrs = &(ct->ct_xdrs);
627 	struct rpc_msg reply_msg;
628 	uint32_t x_id;
629 /* LINTED pointer alignment */
630 	uint32_t *msg_x_id = (uint32_t *)(ct->ct_mcall);	/* yuk */
631 	bool_t shipnow;
632 	int refreshes = 2;
633 
634 	if (rpc_fd_lock(vctbl, ct->ct_fd)) {
635 		rpc_callerr.re_status = RPC_FAILED;
636 		rpc_callerr.re_errno = errno;
637 		rpc_fd_unlock(vctbl, ct->ct_fd);
638 		return (RPC_FAILED);
639 	}
640 
641 	ct->ct_is_oneway = FALSE;
642 	if (ct->ct_io_mode == RPC_CL_NONBLOCKING) {
643 		if (do_flush(ct, RPC_CL_BLOCKING_FLUSH) != 0) {
644 			rpc_fd_unlock(vctbl, ct->ct_fd);
645 			return (RPC_FAILED);  /* XXX */
646 		}
647 	}
648 
649 	if (!ct->ct_waitset) {
650 		/* If time is not within limits, we ignore it. */
651 		if (time_not_ok(&timeout) == FALSE)
652 			ct->ct_wait = __rpc_timeval_to_msec(&timeout);
653 	} else {
654 		timeout.tv_sec = (ct->ct_wait / 1000);
655 		timeout.tv_usec = (ct->ct_wait % 1000) * 1000;
656 	}
657 
658 	shipnow = ((xdr_results == (xdrproc_t)0) && (timeout.tv_sec == 0) &&
659 	    (timeout.tv_usec == 0)) ? FALSE : TRUE;
660 call_again:
661 	xdrs->x_op = XDR_ENCODE;
662 	rpc_callerr.re_status = RPC_SUCCESS;
663 	/*
664 	 * Due to little endian byte order, it is necessary to convert to host
665 	 * format before decrementing xid.
666 	 */
667 	x_id = ntohl(*msg_x_id) - 1;
668 	*msg_x_id = htonl(x_id);
669 
670 	if (cl->cl_auth->ah_cred.oa_flavor != RPCSEC_GSS) {
671 		if ((!XDR_PUTBYTES(xdrs, ct->ct_mcall, ct->ct_mpos)) ||
672 		    (!XDR_PUTINT32(xdrs, (int32_t *)&proc)) ||
673 		    (!AUTH_MARSHALL(cl->cl_auth, xdrs)) ||
674 		    (!xdr_args(xdrs, args_ptr))) {
675 			if (rpc_callerr.re_status == RPC_SUCCESS)
676 				rpc_callerr.re_status = RPC_CANTENCODEARGS;
677 			(void) xdrrec_endofrecord(xdrs, TRUE);
678 			rpc_fd_unlock(vctbl, ct->ct_fd);
679 			return (rpc_callerr.re_status);
680 		}
681 	} else {
682 /* LINTED pointer alignment */
683 		uint32_t *u = (uint32_t *)&ct->ct_mcall[ct->ct_mpos];
684 		IXDR_PUT_U_INT32(u, proc);
685 		if (!__rpc_gss_wrap(cl->cl_auth, ct->ct_mcall,
686 		    ((char *)u) - ct->ct_mcall, xdrs, xdr_args, args_ptr)) {
687 			if (rpc_callerr.re_status == RPC_SUCCESS)
688 				rpc_callerr.re_status = RPC_CANTENCODEARGS;
689 			(void) xdrrec_endofrecord(xdrs, TRUE);
690 			rpc_fd_unlock(vctbl, ct->ct_fd);
691 			return (rpc_callerr.re_status);
692 		}
693 	}
694 	if (!xdrrec_endofrecord(xdrs, shipnow)) {
695 		rpc_fd_unlock(vctbl, ct->ct_fd);
696 		return (rpc_callerr.re_status = RPC_CANTSEND);
697 	}
698 	if (!shipnow) {
699 		rpc_fd_unlock(vctbl, ct->ct_fd);
700 		return (RPC_SUCCESS);
701 	}
702 	/*
703 	 * Hack to provide rpc-based message passing
704 	 */
705 	if (timeout.tv_sec == 0 && timeout.tv_usec == 0) {
706 		rpc_fd_unlock(vctbl, ct->ct_fd);
707 		return (rpc_callerr.re_status = RPC_TIMEDOUT);
708 	}
709 
710 
711 	/*
712 	 * Keep receiving until we get a valid transaction id
713 	 */
714 	xdrs->x_op = XDR_DECODE;
715 	for (;;) {
716 		reply_msg.acpted_rply.ar_verf = _null_auth;
717 		reply_msg.acpted_rply.ar_results.where = NULL;
718 		reply_msg.acpted_rply.ar_results.proc = (xdrproc_t)xdr_void;
719 		if (!xdrrec_skiprecord(xdrs)) {
720 			rpc_fd_unlock(vctbl, ct->ct_fd);
721 			return (rpc_callerr.re_status);
722 		}
723 		/* now decode and validate the response header */
724 		if (!xdr_replymsg(xdrs, &reply_msg)) {
725 			if (rpc_callerr.re_status == RPC_SUCCESS)
726 				continue;
727 			rpc_fd_unlock(vctbl, ct->ct_fd);
728 			return (rpc_callerr.re_status);
729 		}
730 		if (reply_msg.rm_xid == x_id)
731 			break;
732 	}
733 
734 	/*
735 	 * process header
736 	 */
737 	if ((reply_msg.rm_reply.rp_stat == MSG_ACCEPTED) &&
738 	    (reply_msg.acpted_rply.ar_stat == SUCCESS))
739 		rpc_callerr.re_status = RPC_SUCCESS;
740 	else
741 		__seterr_reply(&reply_msg, &(rpc_callerr));
742 
743 	if (rpc_callerr.re_status == RPC_SUCCESS) {
744 		if (!AUTH_VALIDATE(cl->cl_auth,
745 		    &reply_msg.acpted_rply.ar_verf)) {
746 			rpc_callerr.re_status = RPC_AUTHERROR;
747 			rpc_callerr.re_why = AUTH_INVALIDRESP;
748 		} else if (cl->cl_auth->ah_cred.oa_flavor != RPCSEC_GSS) {
749 			if (!(*xdr_results)(xdrs, results_ptr)) {
750 				if (rpc_callerr.re_status == RPC_SUCCESS)
751 					rpc_callerr.re_status =
752 					    RPC_CANTDECODERES;
753 			}
754 		} else if (!__rpc_gss_unwrap(cl->cl_auth, xdrs, xdr_results,
755 		    results_ptr)) {
756 			if (rpc_callerr.re_status == RPC_SUCCESS)
757 				rpc_callerr.re_status = RPC_CANTDECODERES;
758 		}
759 	}	/* end successful completion */
760 	/*
761 	 * If unsuccesful AND error is an authentication error
762 	 * then refresh credentials and try again, else break
763 	 */
764 	else if (rpc_callerr.re_status == RPC_AUTHERROR) {
765 		/* maybe our credentials need to be refreshed ... */
766 		if (refreshes-- && AUTH_REFRESH(cl->cl_auth, &reply_msg))
767 			goto call_again;
768 		else
769 			/*
770 			 * We are setting rpc_callerr here given that libnsl
771 			 * is not reentrant thereby reinitializing the TSD.
772 			 * If not set here then success could be returned even
773 			 * though refresh failed.
774 			 */
775 			rpc_callerr.re_status = RPC_AUTHERROR;
776 	} /* end of unsuccessful completion */
777 	/* free verifier ... */
778 	if (reply_msg.rm_reply.rp_stat == MSG_ACCEPTED &&
779 	    reply_msg.acpted_rply.ar_verf.oa_base != NULL) {
780 		xdrs->x_op = XDR_FREE;
781 		(void) xdr_opaque_auth(xdrs, &(reply_msg.acpted_rply.ar_verf));
782 	}
783 	rpc_fd_unlock(vctbl, ct->ct_fd);
784 	return (rpc_callerr.re_status);
785 }
786 
787 static enum clnt_stat
788 clnt_vc_send(CLIENT *cl, rpcproc_t proc, xdrproc_t xdr_args, caddr_t args_ptr)
789 {
790 /* LINTED pointer alignment */
791 	struct ct_data *ct = (struct ct_data *)cl->cl_private;
792 	XDR *xdrs = &(ct->ct_xdrs);
793 	uint32_t x_id;
794 /* LINTED pointer alignment */
795 	uint32_t *msg_x_id = (uint32_t *)(ct->ct_mcall);	/* yuk */
796 
797 	if (rpc_fd_lock(vctbl, ct->ct_fd)) {
798 		rpc_callerr.re_status = RPC_FAILED;
799 		rpc_callerr.re_errno = errno;
800 		rpc_fd_unlock(vctbl, ct->ct_fd);
801 		return (RPC_FAILED);
802 	}
803 
804 	ct->ct_is_oneway = TRUE;
805 
806 	xdrs->x_op = XDR_ENCODE;
807 	rpc_callerr.re_status = RPC_SUCCESS;
808 	/*
809 	 * Due to little endian byte order, it is necessary to convert to host
810 	 * format before decrementing xid.
811 	 */
812 	x_id = ntohl(*msg_x_id) - 1;
813 	*msg_x_id = htonl(x_id);
814 
815 	if (cl->cl_auth->ah_cred.oa_flavor != RPCSEC_GSS) {
816 		if ((!XDR_PUTBYTES(xdrs, ct->ct_mcall, ct->ct_mpos)) ||
817 		    (!XDR_PUTINT32(xdrs, (int32_t *)&proc)) ||
818 		    (!AUTH_MARSHALL(cl->cl_auth, xdrs)) ||
819 		    (!xdr_args(xdrs, args_ptr))) {
820 			if (rpc_callerr.re_status == RPC_SUCCESS)
821 				rpc_callerr.re_status = RPC_CANTENCODEARGS;
822 			(void) xdrrec_endofrecord(xdrs, TRUE);
823 			rpc_fd_unlock(vctbl, ct->ct_fd);
824 			return (rpc_callerr.re_status);
825 		}
826 	} else {
827 /* LINTED pointer alignment */
828 		uint32_t *u = (uint32_t *)&ct->ct_mcall[ct->ct_mpos];
829 		IXDR_PUT_U_INT32(u, proc);
830 		if (!__rpc_gss_wrap(cl->cl_auth, ct->ct_mcall,
831 		    ((char *)u) - ct->ct_mcall, xdrs, xdr_args, args_ptr)) {
832 			if (rpc_callerr.re_status == RPC_SUCCESS)
833 				rpc_callerr.re_status = RPC_CANTENCODEARGS;
834 			(void) xdrrec_endofrecord(xdrs, TRUE);
835 			rpc_fd_unlock(vctbl, ct->ct_fd);
836 			return (rpc_callerr.re_status);
837 		}
838 	}
839 
840 	/*
841 	 * Do not need to check errors, as the following code does
842 	 * not depend on the successful completion of the call.
843 	 * An error, if any occurs, is reported through
844 	 * rpc_callerr.re_status.
845 	 */
846 	(void) xdrrec_endofrecord(xdrs, TRUE);
847 
848 	rpc_fd_unlock(vctbl, ct->ct_fd);
849 	return (rpc_callerr.re_status);
850 }
851 
852 /* ARGSUSED */
853 static void
854 clnt_vc_geterr(CLIENT *cl, struct rpc_err *errp)
855 {
856 	*errp = rpc_callerr;
857 }
858 
859 static bool_t
860 clnt_vc_freeres(CLIENT *cl, xdrproc_t xdr_res, caddr_t res_ptr)
861 {
862 /* LINTED pointer alignment */
863 	struct ct_data *ct = (struct ct_data *)cl->cl_private;
864 	XDR *xdrs = &(ct->ct_xdrs);
865 	bool_t stat;
866 
867 	(void) rpc_fd_lock(vctbl, ct->ct_fd);
868 	xdrs->x_op = XDR_FREE;
869 	stat = (*xdr_res)(xdrs, res_ptr);
870 	rpc_fd_unlock(vctbl, ct->ct_fd);
871 	return (stat);
872 }
873 
874 static void
875 clnt_vc_abort(void)
876 {
877 }
878 
879 /*ARGSUSED*/
880 static bool_t
881 clnt_vc_control(CLIENT *cl, int request, char *info)
882 {
883 	bool_t ret;
884 /* LINTED pointer alignment */
885 	struct ct_data *ct = (struct ct_data *)cl->cl_private;
886 
887 	if (rpc_fd_lock(vctbl, ct->ct_fd)) {
888 		rpc_fd_unlock(vctbl, ct->ct_fd);
889 		return (FALSE);
890 	}
891 
892 	switch (request) {
893 	case CLSET_FD_CLOSE:
894 		ct->ct_closeit = TRUE;
895 		rpc_fd_unlock(vctbl, ct->ct_fd);
896 		return (TRUE);
897 	case CLSET_FD_NCLOSE:
898 		ct->ct_closeit = FALSE;
899 		rpc_fd_unlock(vctbl, ct->ct_fd);
900 		return (TRUE);
901 	case CLFLUSH:
902 		if (ct->ct_io_mode == RPC_CL_NONBLOCKING) {
903 			int res;
904 			res = do_flush(ct, (info == NULL ||
905 			    /* LINTED pointer cast */
906 			    *(int *)info == RPC_CL_DEFAULT_FLUSH)?
907 			    /* LINTED pointer cast */
908 			    ct->ct_blocking_mode: *(int *)info);
909 			ret = (0 == res);
910 		} else {
911 			ret = FALSE;
912 		}
913 		rpc_fd_unlock(vctbl, ct->ct_fd);
914 		return (ret);
915 	}
916 
917 	/* for other requests which use info */
918 	if (info == NULL) {
919 		rpc_fd_unlock(vctbl, ct->ct_fd);
920 		return (FALSE);
921 	}
922 	switch (request) {
923 	case CLSET_TIMEOUT:
924 /* LINTED pointer alignment */
925 		if (time_not_ok((struct timeval *)info)) {
926 			rpc_fd_unlock(vctbl, ct->ct_fd);
927 			return (FALSE);
928 		}
929 /* LINTED pointer alignment */
930 		ct->ct_wait = __rpc_timeval_to_msec((struct timeval *)info);
931 		ct->ct_waitset = TRUE;
932 		break;
933 	case CLGET_TIMEOUT:
934 /* LINTED pointer alignment */
935 		((struct timeval *)info)->tv_sec = ct->ct_wait / 1000;
936 /* LINTED pointer alignment */
937 		((struct timeval *)info)->tv_usec = (ct->ct_wait % 1000) * 1000;
938 		break;
939 	case CLGET_SERVER_ADDR:	/* For compatibility only */
940 		(void) memcpy(info, ct->ct_addr.buf, (size_t)ct->ct_addr.len);
941 		break;
942 	case CLGET_FD:
943 /* LINTED pointer alignment */
944 		*(int *)info = ct->ct_fd;
945 		break;
946 	case CLGET_SVC_ADDR:
947 		/* The caller should not free this memory area */
948 /* LINTED pointer alignment */
949 		*(struct netbuf *)info = ct->ct_addr;
950 		break;
951 	case CLSET_SVC_ADDR:		/* set to new address */
952 #ifdef undef
953 		/*
954 		 * XXX: once the t_snddis(), followed by t_connect() starts to
955 		 * work, this ifdef should be removed.  CLIENT handle reuse
956 		 * would then be possible for COTS as well.
957 		 */
958 		if (t_snddis(ct->ct_fd, NULL) == -1) {
959 			rpc_createerr.cf_stat = RPC_TLIERROR;
960 			rpc_createerr.cf_error.re_terrno = t_errno;
961 			rpc_createerr.cf_error.re_errno = errno;
962 			rpc_fd_unlock(vctbl, ct->ct_fd);
963 			return (FALSE);
964 		}
965 		ret = set_up_connection(ct->ct_fd, (struct netbuf *)info,
966 		    ct, NULL);
967 		rpc_fd_unlock(vctbl, ct->ct_fd);
968 		return (ret);
969 #else
970 		rpc_fd_unlock(vctbl, ct->ct_fd);
971 		return (FALSE);
972 #endif
973 	case CLGET_XID:
974 		/*
975 		 * use the knowledge that xid is the
976 		 * first element in the call structure
977 		 * This will get the xid of the PREVIOUS call
978 		 */
979 /* LINTED pointer alignment */
980 		*(uint32_t *)info = ntohl(*(uint32_t *)ct->ct_mcall);
981 		break;
982 	case CLSET_XID:
983 		/* This will set the xid of the NEXT call */
984 /* LINTED pointer alignment */
985 		*(uint32_t *)ct->ct_mcall =  htonl(*(uint32_t *)info + 1);
986 		/* increment by 1 as clnt_vc_call() decrements once */
987 		break;
988 	case CLGET_VERS:
989 		/*
990 		 * This RELIES on the information that, in the call body,
991 		 * the version number field is the fifth field from the
992 		 * begining of the RPC header. MUST be changed if the
993 		 * call_struct is changed
994 		 */
995 /* LINTED pointer alignment */
996 		*(uint32_t *)info = ntohl(*(uint32_t *)(ct->ct_mcall +
997 		    4 * BYTES_PER_XDR_UNIT));
998 		break;
999 
1000 	case CLSET_VERS:
1001 /* LINTED pointer alignment */
1002 		*(uint32_t *)(ct->ct_mcall + 4 * BYTES_PER_XDR_UNIT) =
1003 /* LINTED pointer alignment */
1004 		    htonl(*(uint32_t *)info);
1005 		break;
1006 
1007 	case CLGET_PROG:
1008 		/*
1009 		 * This RELIES on the information that, in the call body,
1010 		 * the program number field is the fourth field from the
1011 		 * begining of the RPC header. MUST be changed if the
1012 		 * call_struct is changed
1013 		 */
1014 /* LINTED pointer alignment */
1015 		*(uint32_t *)info = ntohl(*(uint32_t *)(ct->ct_mcall +
1016 		    3 * BYTES_PER_XDR_UNIT));
1017 		break;
1018 
1019 	case CLSET_PROG:
1020 /* LINTED pointer alignment */
1021 		*(uint32_t *)(ct->ct_mcall + 3 * BYTES_PER_XDR_UNIT) =
1022 /* LINTED pointer alignment */
1023 		    htonl(*(uint32_t *)info);
1024 		break;
1025 
1026 	case CLSET_IO_MODE:
1027 		/* LINTED pointer cast */
1028 		if (!set_io_mode(ct, *(int *)info)) {
1029 			rpc_fd_unlock(vctbl, ct->ct_fd);
1030 			return (FALSE);
1031 		}
1032 		break;
1033 	case CLSET_FLUSH_MODE:
1034 		/* Set a specific FLUSH_MODE */
1035 		/* LINTED pointer cast */
1036 		if (!set_flush_mode(ct, *(int *)info)) {
1037 			rpc_fd_unlock(vctbl, ct->ct_fd);
1038 			return (FALSE);
1039 		}
1040 		break;
1041 	case CLGET_FLUSH_MODE:
1042 		/* LINTED pointer cast */
1043 		*(rpcflushmode_t *)info = ct->ct_blocking_mode;
1044 		break;
1045 
1046 	case CLGET_IO_MODE:
1047 		/* LINTED pointer cast */
1048 		*(rpciomode_t *)info = ct->ct_io_mode;
1049 		break;
1050 
1051 	case CLGET_CURRENT_REC_SIZE:
1052 		/*
1053 		 * Returns the current amount of memory allocated
1054 		 * to pending requests
1055 		 */
1056 		/* LINTED pointer cast */
1057 		*(int *)info = ct->ct_bufferPendingSize;
1058 		break;
1059 
1060 	case CLSET_CONNMAXREC_SIZE:
1061 		/* Cannot resize the buffer if it is used. */
1062 		if (ct->ct_bufferPendingSize != 0) {
1063 			rpc_fd_unlock(vctbl, ct->ct_fd);
1064 			return (FALSE);
1065 		}
1066 		/*
1067 		 * If the new size is equal to the current size,
1068 		 * there is nothing to do.
1069 		 */
1070 		/* LINTED pointer cast */
1071 		if (ct->ct_bufferSize == *(uint_t *)info)
1072 			break;
1073 
1074 		/* LINTED pointer cast */
1075 		ct->ct_bufferSize = *(uint_t *)info;
1076 		if (ct->ct_buffer) {
1077 			free(ct->ct_buffer);
1078 			ct->ct_buffer = NULL;
1079 			ct->ct_bufferReadPtr = ct->ct_bufferWritePtr = NULL;
1080 		}
1081 		break;
1082 
1083 	case CLGET_CONNMAXREC_SIZE:
1084 		/*
1085 		 * Returns the size of buffer allocated
1086 		 * to pending requests
1087 		 */
1088 		/* LINTED pointer cast */
1089 		*(uint_t *)info = ct->ct_bufferSize;
1090 		break;
1091 
1092 	default:
1093 		rpc_fd_unlock(vctbl, ct->ct_fd);
1094 		return (FALSE);
1095 	}
1096 	rpc_fd_unlock(vctbl, ct->ct_fd);
1097 	return (TRUE);
1098 }
1099 
1100 static void
1101 clnt_vc_destroy(CLIENT *cl)
1102 {
1103 /* LINTED pointer alignment */
1104 	struct ct_data *ct = (struct ct_data *)cl->cl_private;
1105 	int ct_fd = ct->ct_fd;
1106 
1107 	(void) rpc_fd_lock(vctbl, ct_fd);
1108 
1109 	if (ct->ct_io_mode == RPC_CL_NONBLOCKING) {
1110 		(void) do_flush(ct, RPC_CL_BLOCKING_FLUSH);
1111 		(void) unregister_nb(ct);
1112 	}
1113 
1114 	if (ct->ct_closeit)
1115 		(void) t_close(ct_fd);
1116 	XDR_DESTROY(&(ct->ct_xdrs));
1117 	if (ct->ct_addr.buf)
1118 		free(ct->ct_addr.buf);
1119 	free(ct);
1120 	if (cl->cl_netid && cl->cl_netid[0])
1121 		free(cl->cl_netid);
1122 	if (cl->cl_tp && cl->cl_tp[0])
1123 		free(cl->cl_tp);
1124 	free(cl);
1125 	rpc_fd_unlock(vctbl, ct_fd);
1126 }
1127 
1128 /*
1129  * Interface between xdr serializer and vc connection.
1130  * Behaves like the system calls, read & write, but keeps some error state
1131  * around for the rpc level.
1132  */
1133 static int
1134 read_vc(void *ct_tmp, caddr_t buf, int len)
1135 {
1136 	static pthread_key_t pfdp_key = PTHREAD_ONCE_KEY_NP;
1137 	struct pollfd *pfdp;
1138 	int npfd;		/* total number of pfdp allocated */
1139 	struct ct_data *ct = ct_tmp;
1140 	struct timeval starttime;
1141 	struct timeval curtime;
1142 	int poll_time;
1143 	int delta;
1144 
1145 	if (len == 0)
1146 		return (0);
1147 
1148 	/*
1149 	 * Allocate just one the first time.  thr_get_storage() may
1150 	 * return a larger buffer, left over from the last time we were
1151 	 * here, but that's OK.  realloc() will deal with it properly.
1152 	 */
1153 	npfd = 1;
1154 	pfdp = thr_get_storage(&pfdp_key, sizeof (struct pollfd), free);
1155 	if (pfdp == NULL) {
1156 		(void) syslog(LOG_ERR, clnt_vc_errstr,
1157 		    clnt_read_vc_str, __no_mem_str);
1158 		rpc_callerr.re_status = RPC_SYSTEMERROR;
1159 		rpc_callerr.re_errno = errno;
1160 		rpc_callerr.re_terrno = 0;
1161 		return (-1);
1162 	}
1163 
1164 	/*
1165 	 *	N.B.:  slot 0 in the pollfd array is reserved for the file
1166 	 *	descriptor we're really interested in (as opposed to the
1167 	 *	callback descriptors).
1168 	 */
1169 	pfdp[0].fd = ct->ct_fd;
1170 	pfdp[0].events = MASKVAL;
1171 	pfdp[0].revents = 0;
1172 	poll_time = ct->ct_wait;
1173 	if (gettimeofday(&starttime, NULL) == -1) {
1174 		syslog(LOG_ERR, "Unable to get time of day: %m");
1175 		return (-1);
1176 	}
1177 
1178 	for (;;) {
1179 		extern void (*_svc_getreqset_proc)();
1180 		extern pollfd_t *svc_pollfd;
1181 		extern int svc_max_pollfd;
1182 		int fds;
1183 
1184 		/* VARIABLES PROTECTED BY svc_fd_lock: svc_pollfd */
1185 
1186 		if (_svc_getreqset_proc) {
1187 			sig_rw_rdlock(&svc_fd_lock);
1188 
1189 			/* reallocate pfdp to svc_max_pollfd +1 */
1190 			if (npfd != (svc_max_pollfd + 1)) {
1191 				struct pollfd *tmp_pfdp = realloc(pfdp,
1192 				    sizeof (struct pollfd) *
1193 				    (svc_max_pollfd + 1));
1194 				if (tmp_pfdp == NULL) {
1195 					sig_rw_unlock(&svc_fd_lock);
1196 					(void) syslog(LOG_ERR, clnt_vc_errstr,
1197 					    clnt_read_vc_str, __no_mem_str);
1198 					rpc_callerr.re_status = RPC_SYSTEMERROR;
1199 					rpc_callerr.re_errno = errno;
1200 					rpc_callerr.re_terrno = 0;
1201 					return (-1);
1202 				}
1203 
1204 				pfdp = tmp_pfdp;
1205 				npfd = svc_max_pollfd + 1;
1206 				(void) pthread_setspecific(pfdp_key, pfdp);
1207 			}
1208 			if (npfd > 1)
1209 				(void) memcpy(&pfdp[1], svc_pollfd,
1210 				    sizeof (struct pollfd) * (npfd - 1));
1211 
1212 			sig_rw_unlock(&svc_fd_lock);
1213 		} else {
1214 			npfd = 1;	/* don't forget about pfdp[0] */
1215 		}
1216 
1217 		switch (fds = poll(pfdp, npfd, poll_time)) {
1218 		case 0:
1219 			rpc_callerr.re_status = RPC_TIMEDOUT;
1220 			return (-1);
1221 
1222 		case -1:
1223 			if (errno != EINTR)
1224 				continue;
1225 			else {
1226 				/*
1227 				 * interrupted by another signal,
1228 				 * update time_waited
1229 				 */
1230 
1231 				if (gettimeofday(&curtime, NULL) == -1) {
1232 					syslog(LOG_ERR,
1233 					    "Unable to get time of day:  %m");
1234 					errno = 0;
1235 					continue;
1236 				};
1237 				delta = (curtime.tv_sec -
1238 				    starttime.tv_sec) * 1000 +
1239 				    (curtime.tv_usec -
1240 				    starttime.tv_usec) / 1000;
1241 				poll_time -= delta;
1242 				if (poll_time < 0) {
1243 					rpc_callerr.re_status = RPC_TIMEDOUT;
1244 					errno = 0;
1245 					return (-1);
1246 				} else {
1247 					errno = 0; /* reset it */
1248 					continue;
1249 				}
1250 			}
1251 		}
1252 
1253 		if (pfdp[0].revents == 0) {
1254 			/* must be for server side of the house */
1255 			(*_svc_getreqset_proc)(&pfdp[1], fds);
1256 			continue;	/* do poll again */
1257 		}
1258 
1259 		if (pfdp[0].revents & POLLNVAL) {
1260 			rpc_callerr.re_status = RPC_CANTRECV;
1261 			/*
1262 			 *	Note:  we're faking errno here because we
1263 			 *	previously would have expected select() to
1264 			 *	return -1 with errno EBADF.  Poll(BA_OS)
1265 			 *	returns 0 and sets the POLLNVAL revents flag
1266 			 *	instead.
1267 			 */
1268 			rpc_callerr.re_errno = errno = EBADF;
1269 			return (-1);
1270 		}
1271 
1272 		if (pfdp[0].revents & (POLLERR | POLLHUP)) {
1273 			rpc_callerr.re_status = RPC_CANTRECV;
1274 			rpc_callerr.re_errno = errno = EPIPE;
1275 			return (-1);
1276 		}
1277 		break;
1278 	}
1279 
1280 	switch (len = t_rcvall(ct->ct_fd, buf, len)) {
1281 	case 0:
1282 		/* premature eof */
1283 		rpc_callerr.re_errno = ENOLINK;
1284 		rpc_callerr.re_terrno = 0;
1285 		rpc_callerr.re_status = RPC_CANTRECV;
1286 		len = -1;	/* it's really an error */
1287 		break;
1288 
1289 	case -1:
1290 		rpc_callerr.re_terrno = t_errno;
1291 		rpc_callerr.re_errno = 0;
1292 		rpc_callerr.re_status = RPC_CANTRECV;
1293 		break;
1294 	}
1295 	return (len);
1296 }
1297 
1298 static int
1299 write_vc(void *ct_tmp, caddr_t buf, int len)
1300 {
1301 	int i, cnt;
1302 	struct ct_data *ct = ct_tmp;
1303 	int flag;
1304 	int maxsz;
1305 
1306 	maxsz = ct->ct_tsdu;
1307 
1308 	/* Handle the non-blocking mode */
1309 	if (ct->ct_is_oneway && ct->ct_io_mode == RPC_CL_NONBLOCKING) {
1310 		/*
1311 		 * Test a special case here. If the length of the current
1312 		 * write is greater than the transport data unit, and the
1313 		 * mode is non blocking, we return RPC_CANTSEND.
1314 		 * XXX  this is not very clean.
1315 		 */
1316 		if (maxsz > 0 && len > maxsz) {
1317 			rpc_callerr.re_terrno = errno;
1318 			rpc_callerr.re_errno = 0;
1319 			rpc_callerr.re_status = RPC_CANTSEND;
1320 			return (-1);
1321 		}
1322 
1323 		len = nb_send(ct, buf, (unsigned)len);
1324 		if (len == -1) {
1325 			rpc_callerr.re_terrno = errno;
1326 			rpc_callerr.re_errno = 0;
1327 			rpc_callerr.re_status = RPC_CANTSEND;
1328 		} else if (len == -2) {
1329 			rpc_callerr.re_terrno = 0;
1330 			rpc_callerr.re_errno = 0;
1331 			rpc_callerr.re_status = RPC_CANTSTORE;
1332 		}
1333 		return (len);
1334 	}
1335 
1336 	if ((maxsz == 0) || (maxsz == -1)) {
1337 		/*
1338 		 * T_snd may return -1 for error on connection (connection
1339 		 * needs to be repaired/closed, and -2 for flow-control
1340 		 * handling error (no operation to do, just wait and call
1341 		 * T_Flush()).
1342 		 */
1343 		if ((len = t_snd(ct->ct_fd, buf, (unsigned)len, 0)) == -1) {
1344 			rpc_callerr.re_terrno = t_errno;
1345 			rpc_callerr.re_errno = 0;
1346 			rpc_callerr.re_status = RPC_CANTSEND;
1347 		}
1348 		return (len);
1349 	}
1350 
1351 	/*
1352 	 * This for those transports which have a max size for data.
1353 	 */
1354 	for (cnt = len, i = 0; cnt > 0; cnt -= i, buf += i) {
1355 		flag = cnt > maxsz ? T_MORE : 0;
1356 		if ((i = t_snd(ct->ct_fd, buf, (unsigned)MIN(cnt, maxsz),
1357 		    flag)) == -1) {
1358 			rpc_callerr.re_terrno = t_errno;
1359 			rpc_callerr.re_errno = 0;
1360 			rpc_callerr.re_status = RPC_CANTSEND;
1361 			return (-1);
1362 		}
1363 	}
1364 	return (len);
1365 }
1366 
1367 /*
1368  * Receive the required bytes of data, even if it is fragmented.
1369  */
1370 static int
1371 t_rcvall(int fd, char *buf, int len)
1372 {
1373 	int moreflag;
1374 	int final = 0;
1375 	int res;
1376 
1377 	do {
1378 		moreflag = 0;
1379 		res = t_rcv(fd, buf, (unsigned)len, &moreflag);
1380 		if (res == -1) {
1381 			if (t_errno == TLOOK)
1382 				switch (t_look(fd)) {
1383 				case T_DISCONNECT:
1384 					(void) t_rcvdis(fd, NULL);
1385 					(void) t_snddis(fd, NULL);
1386 					return (-1);
1387 				case T_ORDREL:
1388 				/* Received orderly release indication */
1389 					(void) t_rcvrel(fd);
1390 				/* Send orderly release indicator */
1391 					(void) t_sndrel(fd);
1392 					return (-1);
1393 				default:
1394 					return (-1);
1395 				}
1396 		} else if (res == 0) {
1397 			return (0);
1398 		}
1399 		final += res;
1400 		buf += res;
1401 		len -= res;
1402 	} while ((len > 0) && (moreflag & T_MORE));
1403 	return (final);
1404 }
1405 
1406 static struct clnt_ops *
1407 clnt_vc_ops(void)
1408 {
1409 	static struct clnt_ops ops;
1410 	extern mutex_t	ops_lock;
1411 
1412 	/* VARIABLES PROTECTED BY ops_lock: ops */
1413 
1414 	sig_mutex_lock(&ops_lock);
1415 	if (ops.cl_call == NULL) {
1416 		ops.cl_call = clnt_vc_call;
1417 		ops.cl_send = clnt_vc_send;
1418 		ops.cl_abort = clnt_vc_abort;
1419 		ops.cl_geterr = clnt_vc_geterr;
1420 		ops.cl_freeres = clnt_vc_freeres;
1421 		ops.cl_destroy = clnt_vc_destroy;
1422 		ops.cl_control = clnt_vc_control;
1423 	}
1424 	sig_mutex_unlock(&ops_lock);
1425 	return (&ops);
1426 }
1427 
1428 /*
1429  * Make sure that the time is not garbage.   -1 value is disallowed.
1430  * Note this is different from time_not_ok in clnt_dg.c
1431  */
1432 static bool_t
1433 time_not_ok(struct timeval *t)
1434 {
1435 	return (t->tv_sec <= -1 || t->tv_sec > 100000000 ||
1436 	    t->tv_usec <= -1 || t->tv_usec > 1000000);
1437 }
1438 
1439 
1440 /* Compute the # of bytes that remains until the end of the buffer */
1441 #define	REMAIN_BYTES(p) (ct->ct_bufferSize-(ct->ct_##p - ct->ct_buffer))
1442 
1443 static int
1444 addInBuffer(struct ct_data *ct, char *dataToAdd, unsigned int nBytes)
1445 {
1446 	if (NULL == ct->ct_buffer) {
1447 		/* Buffer not allocated yet. */
1448 		char *buffer;
1449 
1450 		buffer = malloc(ct->ct_bufferSize);
1451 		if (NULL == buffer) {
1452 			errno = ENOMEM;
1453 			return (-1);
1454 		}
1455 		(void) memcpy(buffer, dataToAdd, nBytes);
1456 
1457 		ct->ct_buffer = buffer;
1458 		ct->ct_bufferReadPtr = buffer;
1459 		ct->ct_bufferWritePtr = buffer + nBytes;
1460 		ct->ct_bufferPendingSize = nBytes;
1461 	} else {
1462 		/*
1463 		 * For an already allocated buffer, two mem copies
1464 		 * might be needed, depending on the current
1465 		 * writing position.
1466 		 */
1467 
1468 		/* Compute the length of the first copy. */
1469 		int len = MIN(nBytes, REMAIN_BYTES(bufferWritePtr));
1470 
1471 		ct->ct_bufferPendingSize += nBytes;
1472 
1473 		(void) memcpy(ct->ct_bufferWritePtr, dataToAdd, len);
1474 		ct->ct_bufferWritePtr += len;
1475 		nBytes -= len;
1476 		if (0 == nBytes) {
1477 			/* One memcopy needed. */
1478 
1479 			/*
1480 			 * If the write pointer is at the end of the buffer,
1481 			 * wrap it now.
1482 			 */
1483 			if (ct->ct_bufferWritePtr ==
1484 			    (ct->ct_buffer + ct->ct_bufferSize)) {
1485 				ct->ct_bufferWritePtr = ct->ct_buffer;
1486 			}
1487 		} else {
1488 			/* Two memcopy needed. */
1489 			dataToAdd += len;
1490 
1491 			/*
1492 			 * Copy the remaining data to the beginning of the
1493 			 * buffer
1494 			 */
1495 			(void) memcpy(ct->ct_buffer, dataToAdd, nBytes);
1496 			ct->ct_bufferWritePtr = ct->ct_buffer + nBytes;
1497 		}
1498 	}
1499 	return (0);
1500 }
1501 
1502 static void
1503 consumeFromBuffer(struct ct_data *ct, unsigned int nBytes)
1504 {
1505 	ct->ct_bufferPendingSize -= nBytes;
1506 	if (ct->ct_bufferPendingSize == 0) {
1507 		/*
1508 		 * If the buffer contains no data, we set the two pointers at
1509 		 * the beginning of the buffer (to miminize buffer wraps).
1510 		 */
1511 		ct->ct_bufferReadPtr = ct->ct_bufferWritePtr = ct->ct_buffer;
1512 	} else {
1513 		ct->ct_bufferReadPtr += nBytes;
1514 		if (ct->ct_bufferReadPtr >
1515 		    ct->ct_buffer + ct->ct_bufferSize) {
1516 			ct->ct_bufferReadPtr -= ct->ct_bufferSize;
1517 		}
1518 	}
1519 }
1520 
1521 static int
1522 iovFromBuffer(struct ct_data *ct, struct iovec *iov)
1523 {
1524 	int l;
1525 
1526 	if (ct->ct_bufferPendingSize == 0)
1527 		return (0);
1528 
1529 	l = REMAIN_BYTES(bufferReadPtr);
1530 	if (l < ct->ct_bufferPendingSize) {
1531 		/* Buffer in two fragments. */
1532 		iov[0].iov_base = ct->ct_bufferReadPtr;
1533 		iov[0].iov_len  = l;
1534 
1535 		iov[1].iov_base = ct->ct_buffer;
1536 		iov[1].iov_len  = ct->ct_bufferPendingSize - l;
1537 		return (2);
1538 	} else {
1539 		/* Buffer in one fragment. */
1540 		iov[0].iov_base = ct->ct_bufferReadPtr;
1541 		iov[0].iov_len  = ct->ct_bufferPendingSize;
1542 		return (1);
1543 	}
1544 }
1545 
1546 static bool_t
1547 set_flush_mode(struct ct_data *ct, int mode)
1548 {
1549 	switch (mode) {
1550 	case RPC_CL_BLOCKING_FLUSH:
1551 		/* flush as most as possible without blocking */
1552 	case RPC_CL_BESTEFFORT_FLUSH:
1553 		/* flush the buffer completely (possibly blocking) */
1554 	case RPC_CL_DEFAULT_FLUSH:
1555 		/* flush according to the currently defined policy */
1556 		ct->ct_blocking_mode = mode;
1557 		return (TRUE);
1558 	default:
1559 		return (FALSE);
1560 	}
1561 }
1562 
1563 static bool_t
1564 set_io_mode(struct ct_data *ct, int ioMode)
1565 {
1566 	switch (ioMode) {
1567 	case RPC_CL_BLOCKING:
1568 		if (ct->ct_io_mode == RPC_CL_NONBLOCKING) {
1569 			if (NULL != ct->ct_buffer) {
1570 				/*
1571 				 * If a buffer was allocated for this
1572 				 * connection, flush it now, and free it.
1573 				 */
1574 				(void) do_flush(ct, RPC_CL_BLOCKING_FLUSH);
1575 				free(ct->ct_buffer);
1576 				ct->ct_buffer = NULL;
1577 			}
1578 			(void) unregister_nb(ct);
1579 			ct->ct_io_mode = ioMode;
1580 		}
1581 		break;
1582 	case RPC_CL_NONBLOCKING:
1583 		if (ct->ct_io_mode == RPC_CL_BLOCKING) {
1584 			if (-1 == register_nb(ct)) {
1585 				return (FALSE);
1586 			}
1587 			ct->ct_io_mode = ioMode;
1588 		}
1589 		break;
1590 	default:
1591 		return (FALSE);
1592 	}
1593 	return (TRUE);
1594 }
1595 
1596 static int
1597 do_flush(struct ct_data *ct, uint_t flush_mode)
1598 {
1599 	int result;
1600 	if (ct->ct_bufferPendingSize == 0) {
1601 		return (0);
1602 	}
1603 
1604 	switch (flush_mode) {
1605 	case RPC_CL_BLOCKING_FLUSH:
1606 		if (!set_blocking_connection(ct, TRUE)) {
1607 			return (-1);
1608 		}
1609 		while (ct->ct_bufferPendingSize > 0) {
1610 			if (REMAIN_BYTES(bufferReadPtr) <
1611 			    ct->ct_bufferPendingSize) {
1612 				struct iovec iov[2];
1613 				(void) iovFromBuffer(ct, iov);
1614 				result = writev(ct->ct_fd, iov, 2);
1615 			} else {
1616 				result = t_snd(ct->ct_fd, ct->ct_bufferReadPtr,
1617 				    ct->ct_bufferPendingSize, 0);
1618 			}
1619 			if (result < 0) {
1620 				return (-1);
1621 			}
1622 			consumeFromBuffer(ct, result);
1623 		}
1624 
1625 		break;
1626 
1627 	case RPC_CL_BESTEFFORT_FLUSH:
1628 		(void) set_blocking_connection(ct, FALSE);
1629 		if (REMAIN_BYTES(bufferReadPtr) < ct->ct_bufferPendingSize) {
1630 			struct iovec iov[2];
1631 			(void) iovFromBuffer(ct, iov);
1632 			result = writev(ct->ct_fd, iov, 2);
1633 		} else {
1634 			result = t_snd(ct->ct_fd, ct->ct_bufferReadPtr,
1635 			    ct->ct_bufferPendingSize, 0);
1636 		}
1637 		if (result < 0) {
1638 			if (errno != EWOULDBLOCK) {
1639 				perror("flush");
1640 				return (-1);
1641 			}
1642 			return (0);
1643 		}
1644 		if (result > 0)
1645 			consumeFromBuffer(ct, result);
1646 		break;
1647 	}
1648 	return (0);
1649 }
1650 
1651 /*
1652  * Non blocking send.
1653  */
1654 
1655 static int
1656 nb_send(struct ct_data *ct, void *buff, unsigned int nBytes)
1657 {
1658 	int result;
1659 
1660 	if (!(ntohl(*(uint32_t *)buff) & 2^31)) {
1661 		return (-1);
1662 	}
1663 
1664 	/*
1665 	 * Check to see if the current message can be stored fully in the
1666 	 * buffer. We have to check this now because it may be impossible
1667 	 * to send any data, so the message must be stored in the buffer.
1668 	 */
1669 	if (nBytes > (ct->ct_bufferSize - ct->ct_bufferPendingSize)) {
1670 		/* Try to flush  (to free some space). */
1671 		(void) do_flush(ct, RPC_CL_BESTEFFORT_FLUSH);
1672 
1673 		/* Can we store the message now ? */
1674 		if (nBytes > (ct->ct_bufferSize - ct->ct_bufferPendingSize))
1675 			return (-2);
1676 	}
1677 
1678 	(void) set_blocking_connection(ct, FALSE);
1679 
1680 	/*
1681 	 * If there is no data pending, we can simply try
1682 	 * to send our data.
1683 	 */
1684 	if (ct->ct_bufferPendingSize == 0) {
1685 		result = t_snd(ct->ct_fd, buff, nBytes, 0);
1686 		if (result == -1) {
1687 			if (errno == EWOULDBLOCK) {
1688 				result = 0;
1689 			} else {
1690 				perror("send");
1691 				return (-1);
1692 			}
1693 		}
1694 		/*
1695 		 * If we have not sent all data, we must store them
1696 		 * in the buffer.
1697 		 */
1698 		if (result != nBytes) {
1699 			if (addInBuffer(ct, (char *)buff + result,
1700 			    nBytes - result) == -1) {
1701 				return (-1);
1702 			}
1703 		}
1704 	} else {
1705 		/*
1706 		 * Some data pending in the buffer.  We try to send
1707 		 * both buffer data and current message in one shot.
1708 		 */
1709 		struct iovec iov[3];
1710 		int i = iovFromBuffer(ct, &iov[0]);
1711 
1712 		iov[i].iov_base = buff;
1713 		iov[i].iov_len  = nBytes;
1714 
1715 		result = writev(ct->ct_fd, iov, i+1);
1716 		if (result == -1) {
1717 			if (errno == EWOULDBLOCK) {
1718 				/* No bytes sent */
1719 				result = 0;
1720 			} else {
1721 				return (-1);
1722 			}
1723 		}
1724 
1725 		/*
1726 		 * Add the bytes from the message
1727 		 * that we have not sent.
1728 		 */
1729 		if (result <= ct->ct_bufferPendingSize) {
1730 			/* No bytes from the message sent */
1731 			consumeFromBuffer(ct, result);
1732 			if (addInBuffer(ct, buff, nBytes) == -1) {
1733 				return (-1);
1734 			}
1735 		} else {
1736 			/*
1737 			 * Some bytes of the message are sent.
1738 			 * Compute the length of the message that has
1739 			 * been sent.
1740 			 */
1741 			int len = result - ct->ct_bufferPendingSize;
1742 
1743 			/* So, empty the buffer. */
1744 			ct->ct_bufferReadPtr = ct->ct_buffer;
1745 			ct->ct_bufferWritePtr = ct->ct_buffer;
1746 			ct->ct_bufferPendingSize = 0;
1747 
1748 			/* And add the remaining part of the message. */
1749 			if (len != nBytes) {
1750 				if (addInBuffer(ct, (char *)buff + len,
1751 				    nBytes-len) == -1) {
1752 					return (-1);
1753 				}
1754 			}
1755 		}
1756 	}
1757 	return (nBytes);
1758 }
1759 
1760 static void
1761 flush_registered_clients(void)
1762 {
1763 	struct nb_reg_node *node;
1764 
1765 	if (LIST_ISEMPTY(nb_first)) {
1766 		return;
1767 	}
1768 
1769 	LIST_FOR_EACH(nb_first, node) {
1770 		(void) do_flush(node->ct, RPC_CL_BLOCKING_FLUSH);
1771 	}
1772 }
1773 
1774 static int
1775 allocate_chunk(void)
1776 {
1777 #define	CHUNK_SIZE 16
1778 	struct nb_reg_node *chk =
1779 	    malloc(sizeof (struct nb_reg_node) * CHUNK_SIZE);
1780 	struct nb_reg_node *n;
1781 	int i;
1782 
1783 	if (NULL == chk) {
1784 		return (-1);
1785 	}
1786 
1787 	n = chk;
1788 	for (i = 0; i < CHUNK_SIZE-1; ++i) {
1789 		n[i].next = &(n[i+1]);
1790 	}
1791 	n[CHUNK_SIZE-1].next = (struct nb_reg_node *)&nb_free;
1792 	nb_free = chk;
1793 	return (0);
1794 }
1795 
1796 static int
1797 register_nb(struct ct_data *ct)
1798 {
1799 	struct nb_reg_node *node;
1800 
1801 	(void) mutex_lock(&nb_list_mutex);
1802 
1803 	if (LIST_ISEMPTY(nb_free) && (allocate_chunk() == -1)) {
1804 		(void) mutex_unlock(&nb_list_mutex);
1805 		errno = ENOMEM;
1806 		return (-1);
1807 	}
1808 
1809 	if (!exit_handler_set) {
1810 		(void) atexit(flush_registered_clients);
1811 		exit_handler_set = TRUE;
1812 	}
1813 	/* Get the first free node */
1814 	LIST_EXTRACT(nb_free, node);
1815 
1816 	node->ct = ct;
1817 
1818 	LIST_ADD(nb_first, node);
1819 	(void) mutex_unlock(&nb_list_mutex);
1820 
1821 	return (0);
1822 }
1823 
1824 static int
1825 unregister_nb(struct ct_data *ct)
1826 {
1827 	struct nb_reg_node *node;
1828 
1829 	(void) mutex_lock(&nb_list_mutex);
1830 	assert(!LIST_ISEMPTY(nb_first));
1831 
1832 	node = nb_first;
1833 	LIST_FOR_EACH(nb_first, node) {
1834 		if (node->next->ct == ct) {
1835 			/* Get the node to unregister. */
1836 			struct nb_reg_node *n = node->next;
1837 			node->next = n->next;
1838 
1839 			n->ct = NULL;
1840 			LIST_ADD(nb_free, n);
1841 			break;
1842 		}
1843 	}
1844 	(void) mutex_unlock(&nb_list_mutex);
1845 	return (0);
1846 }
1847