xref: /titanic_41/usr/src/lib/libnsl/rpc/clnt_vc.c (revision c7e508bc3bedac165992c478e823999c95f7447a)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 /*
27  * Copyright 2014 Nexenta Systems, Inc.  All rights reserved.
28  */
29 
30 /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */
31 /* All Rights Reserved */
32 /*
33  * Portions of this source code were derived from Berkeley
34  * 4.3 BSD under license from the Regents of the University of
35  * California.
36  */
37 
38 /*
39  * clnt_vc.c
40  *
41  * Implements a connectionful client side RPC.
42  *
43  * Connectionful RPC supports 'batched calls'.
44  * A sequence of calls may be batched-up in a send buffer. The rpc call
45  * return immediately to the client even though the call was not necessarily
46  * sent. The batching occurs if the results' xdr routine is NULL (0) AND
47  * the rpc timeout value is zero (see clnt.h, rpc).
48  *
49  * Clients should NOT casually batch calls that in fact return results; that
50  * is the server side should be aware that a call is batched and not produce
51  * any return message. Batched calls that produce many result messages can
52  * deadlock (netlock) the client and the server....
53  */
54 
55 
56 #include "mt.h"
57 #include "rpc_mt.h"
58 #include <assert.h>
59 #include <rpc/rpc.h>
60 #include <errno.h>
61 #include <sys/byteorder.h>
62 #include <sys/mkdev.h>
63 #include <sys/poll.h>
64 #include <syslog.h>
65 #include <stdlib.h>
66 #include <unistd.h>
67 #include <netinet/tcp.h>
68 
69 #define	MCALL_MSG_SIZE 24
70 #define	SECS_TO_MS 1000
71 #define	USECS_TO_MS 1/1000
72 #ifndef MIN
73 #define	MIN(a, b)	(((a) < (b)) ? (a) : (b))
74 #endif
75 
76 extern int __rpc_timeval_to_msec(struct timeval *);
77 extern int __rpc_compress_pollfd(int, pollfd_t *, pollfd_t *);
78 extern bool_t xdr_opaque_auth(XDR *, struct opaque_auth *);
79 extern bool_t __rpc_gss_wrap(AUTH *, char *, uint_t, XDR *, bool_t (*)(),
80 								caddr_t);
81 extern bool_t __rpc_gss_unwrap(AUTH *, XDR *, bool_t (*)(), caddr_t);
82 extern CLIENT *_clnt_vc_create_timed(int, struct netbuf *, rpcprog_t,
83 		rpcvers_t, uint_t, uint_t, const struct timeval *);
84 
85 static struct clnt_ops	*clnt_vc_ops(void);
86 static int		read_vc(void *, caddr_t, int);
87 static int		write_vc(void *, caddr_t, int);
88 static int		t_rcvall(int, char *, int);
89 static bool_t		time_not_ok(struct timeval *);
90 
91 struct ct_data;
92 static bool_t		set_up_connection(int, struct netbuf *,
93 				struct ct_data *, const struct timeval *);
94 static bool_t		set_io_mode(struct ct_data *, int);
95 
96 /*
97  * Lock table handle used by various MT sync. routines
98  */
99 static mutex_t	vctbl_lock = DEFAULTMUTEX;
100 static void	*vctbl = NULL;
101 
102 static const char clnt_vc_errstr[] = "%s : %s";
103 static const char clnt_vc_str[] = "clnt_vc_create";
104 static const char clnt_read_vc_str[] = "read_vc";
105 static const char __no_mem_str[] = "out of memory";
106 static const char no_fcntl_getfl_str[] = "could not get status flags and modes";
107 static const char no_nonblock_str[] = "could not set transport blocking mode";
108 
109 /*
110  * Private data structure
111  */
112 struct ct_data {
113 	int		ct_fd;		/* connection's fd */
114 	bool_t		ct_closeit;	/* close it on destroy */
115 	int		ct_tsdu;	/* size of tsdu */
116 	int		ct_wait;	/* wait interval in milliseconds */
117 	bool_t		ct_waitset;	/* wait set by clnt_control? */
118 	struct netbuf	ct_addr;	/* remote addr */
119 	struct rpc_err	ct_error;
120 	char		ct_mcall[MCALL_MSG_SIZE]; /* marshalled callmsg */
121 	uint_t		ct_mpos;	/* pos after marshal */
122 	XDR		ct_xdrs;	/* XDR stream */
123 
124 	/* NON STANDARD INFO - 00-08-31 */
125 	bool_t		ct_is_oneway; /* True if the current call is oneway. */
126 	bool_t		ct_is_blocking;
127 	ushort_t	ct_io_mode;
128 	ushort_t	ct_blocking_mode;
129 	uint_t		ct_bufferSize; /* Total size of the buffer. */
130 	uint_t		ct_bufferPendingSize; /* Size of unsent data. */
131 	char 		*ct_buffer; /* Pointer to the buffer. */
132 	char 		*ct_bufferWritePtr; /* Ptr to the first free byte. */
133 	char 		*ct_bufferReadPtr; /* Ptr to the first byte of data. */
134 };
135 
136 struct nb_reg_node {
137 	struct nb_reg_node *next;
138 	struct ct_data *ct;
139 };
140 
141 static struct nb_reg_node *nb_first = (struct nb_reg_node *)&nb_first;
142 static struct nb_reg_node *nb_free  = (struct nb_reg_node *)&nb_free;
143 
144 static bool_t exit_handler_set = FALSE;
145 
146 static mutex_t nb_list_mutex = DEFAULTMUTEX;
147 
148 
149 /* Define some macros to manage the linked list. */
150 #define	LIST_ISEMPTY(l) (l == (struct nb_reg_node *)&l)
151 #define	LIST_CLR(l) (l = (struct nb_reg_node *)&l)
152 #define	LIST_ADD(l, node) (node->next = l->next, l = node)
153 #define	LIST_EXTRACT(l, node) (node = l, l = l->next)
154 #define	LIST_FOR_EACH(l, node) \
155 	for (node = l; node != (struct nb_reg_node *)&l; node = node->next)
156 
157 
158 /* Default size of the IO buffer used in non blocking mode */
159 #define	DEFAULT_PENDING_ZONE_MAX_SIZE (16*1024)
160 
161 static int nb_send(struct ct_data *, void *, unsigned int);
162 static int do_flush(struct ct_data *, uint_t);
163 static bool_t set_flush_mode(struct ct_data *, int);
164 static bool_t set_blocking_connection(struct ct_data *, bool_t);
165 
166 static int register_nb(struct ct_data *);
167 static int unregister_nb(struct ct_data *);
168 
169 
170 /*
171  * Change the mode of the underlying fd.
172  */
173 static bool_t
174 set_blocking_connection(struct ct_data *ct, bool_t blocking)
175 {
176 	int flag;
177 
178 	/*
179 	 * If the underlying fd is already in the required mode,
180 	 * avoid the syscall.
181 	 */
182 	if (ct->ct_is_blocking == blocking)
183 		return (TRUE);
184 
185 	if ((flag = fcntl(ct->ct_fd, F_GETFL, 0)) < 0) {
186 		(void) syslog(LOG_ERR, "set_blocking_connection : %s",
187 		    no_fcntl_getfl_str);
188 		return (FALSE);
189 	}
190 
191 	flag = blocking? flag&~O_NONBLOCK : flag|O_NONBLOCK;
192 	if (fcntl(ct->ct_fd, F_SETFL, flag) != 0) {
193 		(void) syslog(LOG_ERR, "set_blocking_connection : %s",
194 		    no_nonblock_str);
195 		return (FALSE);
196 	}
197 	ct->ct_is_blocking = blocking;
198 	return (TRUE);
199 }
200 
201 /*
202  * Create a client handle for a connection.
203  * Default options are set, which the user can change using clnt_control()'s.
204  * The rpc/vc package does buffering similar to stdio, so the client
205  * must pick send and receive buffer sizes, 0 => use the default.
206  * NB: fd is copied into a private area.
207  * NB: The rpch->cl_auth is set null authentication. Caller may wish to
208  * set this something more useful.
209  *
210  * fd should be open and bound.
211  */
212 CLIENT *
213 clnt_vc_create(const int fd, struct netbuf *svcaddr, const rpcprog_t prog,
214 	const rpcvers_t vers, const uint_t sendsz, const uint_t recvsz)
215 {
216 	return (_clnt_vc_create_timed(fd, svcaddr, prog, vers, sendsz,
217 	    recvsz, NULL));
218 }
219 
220 /*
221  * This has the same definition as clnt_vc_create(), except it
222  * takes an additional parameter - a pointer to a timeval structure.
223  *
224  * Not a public interface. This is for clnt_create_timed,
225  * clnt_create_vers_timed, clnt_tp_create_timed to pass down the timeout
226  * value to control a tcp connection attempt.
227  * (for bug 4049792: clnt_create_timed does not time out)
228  *
229  * If tp is NULL, use default timeout to set up the connection.
230  */
231 CLIENT *
232 _clnt_vc_create_timed(int fd, struct netbuf *svcaddr, rpcprog_t prog,
233 	rpcvers_t vers, uint_t sendsz, uint_t recvsz, const struct timeval *tp)
234 {
235 	CLIENT *cl;			/* client handle */
236 	struct ct_data *ct;		/* private data */
237 	struct timeval now;
238 	struct rpc_msg call_msg;
239 	struct t_info tinfo;
240 	int flag;
241 
242 	cl = malloc(sizeof (*cl));
243 	ct = malloc(sizeof (*ct));
244 	if ((cl == NULL) || (ct == NULL)) {
245 		(void) syslog(LOG_ERR, clnt_vc_errstr,
246 		    clnt_vc_str, __no_mem_str);
247 		rpc_createerr.cf_stat = RPC_SYSTEMERROR;
248 		rpc_createerr.cf_error.re_errno = errno;
249 		rpc_createerr.cf_error.re_terrno = 0;
250 		goto err;
251 	}
252 	ct->ct_addr.buf = NULL;
253 
254 	/*
255 	 * The only use of vctbl_lock is for serializing the creation of
256 	 * vctbl. Once created the lock needs to be released so we don't
257 	 * hold it across the set_up_connection() call and end up with a
258 	 * bunch of threads stuck waiting for the mutex.
259 	 */
260 	sig_mutex_lock(&vctbl_lock);
261 
262 	if ((vctbl == NULL) && ((vctbl = rpc_fd_init()) == NULL)) {
263 		rpc_createerr.cf_stat = RPC_SYSTEMERROR;
264 		rpc_createerr.cf_error.re_errno = errno;
265 		rpc_createerr.cf_error.re_terrno = 0;
266 		sig_mutex_unlock(&vctbl_lock);
267 		goto err;
268 	}
269 
270 	sig_mutex_unlock(&vctbl_lock);
271 
272 	ct->ct_io_mode = RPC_CL_BLOCKING;
273 	ct->ct_blocking_mode = RPC_CL_BLOCKING_FLUSH;
274 
275 	ct->ct_buffer = NULL;	/* We allocate the buffer when needed. */
276 	ct->ct_bufferSize = DEFAULT_PENDING_ZONE_MAX_SIZE;
277 	ct->ct_bufferPendingSize = 0;
278 	ct->ct_bufferWritePtr = NULL;
279 	ct->ct_bufferReadPtr = NULL;
280 
281 	/* Check the current state of the fd. */
282 	if ((flag = fcntl(fd, F_GETFL, 0)) < 0) {
283 		(void) syslog(LOG_ERR, "_clnt_vc_create_timed : %s",
284 		    no_fcntl_getfl_str);
285 		rpc_createerr.cf_stat = RPC_SYSTEMERROR;
286 		rpc_createerr.cf_error.re_terrno = errno;
287 		rpc_createerr.cf_error.re_errno = 0;
288 		goto err;
289 	}
290 	ct->ct_is_blocking = flag & O_NONBLOCK ? FALSE : TRUE;
291 
292 	if (set_up_connection(fd, svcaddr, ct, tp) == FALSE) {
293 		goto err;
294 	}
295 
296 	/*
297 	 * Set up other members of private data struct
298 	 */
299 	ct->ct_fd = fd;
300 	/*
301 	 * The actual value will be set by clnt_call or clnt_control
302 	 */
303 	ct->ct_wait = 30000;
304 	ct->ct_waitset = FALSE;
305 	/*
306 	 * By default, closeit is always FALSE. It is users responsibility
307 	 * to do a t_close on it, else the user may use clnt_control
308 	 * to let clnt_destroy do it for him/her.
309 	 */
310 	ct->ct_closeit = FALSE;
311 
312 	/*
313 	 * Initialize call message
314 	 */
315 	(void) gettimeofday(&now, (struct timezone *)0);
316 	call_msg.rm_xid = getpid() ^ now.tv_sec ^ now.tv_usec;
317 	call_msg.rm_call.cb_prog = prog;
318 	call_msg.rm_call.cb_vers = vers;
319 
320 	/*
321 	 * pre-serialize the static part of the call msg and stash it away
322 	 */
323 	xdrmem_create(&(ct->ct_xdrs), ct->ct_mcall, MCALL_MSG_SIZE, XDR_ENCODE);
324 	if (!xdr_callhdr(&(ct->ct_xdrs), &call_msg)) {
325 		goto err;
326 	}
327 	ct->ct_mpos = XDR_GETPOS(&(ct->ct_xdrs));
328 	XDR_DESTROY(&(ct->ct_xdrs));
329 
330 	if (t_getinfo(fd, &tinfo) == -1) {
331 		rpc_createerr.cf_stat = RPC_TLIERROR;
332 		rpc_createerr.cf_error.re_terrno = t_errno;
333 		rpc_createerr.cf_error.re_errno = 0;
334 		goto err;
335 	}
336 	/*
337 	 * Find the receive and the send size
338 	 */
339 	sendsz = __rpc_get_t_size((int)sendsz, tinfo.tsdu);
340 	recvsz = __rpc_get_t_size((int)recvsz, tinfo.tsdu);
341 	if ((sendsz == 0) || (recvsz == 0)) {
342 		rpc_createerr.cf_stat = RPC_TLIERROR;
343 		rpc_createerr.cf_error.re_terrno = 0;
344 		rpc_createerr.cf_error.re_errno = 0;
345 		goto err;
346 	}
347 	ct->ct_tsdu = tinfo.tsdu;
348 	/*
349 	 * Create a client handle which uses xdrrec for serialization
350 	 * and authnone for authentication.
351 	 */
352 	ct->ct_xdrs.x_ops = NULL;
353 	xdrrec_create(&(ct->ct_xdrs), sendsz, recvsz, (caddr_t)ct,
354 	    read_vc, write_vc);
355 	if (ct->ct_xdrs.x_ops == NULL) {
356 		rpc_createerr.cf_stat = RPC_SYSTEMERROR;
357 		rpc_createerr.cf_error.re_terrno = 0;
358 		rpc_createerr.cf_error.re_errno = ENOMEM;
359 		goto err;
360 	}
361 	cl->cl_ops = clnt_vc_ops();
362 	cl->cl_private = (caddr_t)ct;
363 	cl->cl_auth = authnone_create();
364 	cl->cl_tp = NULL;
365 	cl->cl_netid = NULL;
366 	return (cl);
367 
368 err:
369 	if (cl) {
370 		if (ct) {
371 			if (ct->ct_addr.len)
372 				free(ct->ct_addr.buf);
373 			free(ct);
374 		}
375 		free(cl);
376 	}
377 	return (NULL);
378 }
379 
380 #define	TCPOPT_BUFSIZE 128
381 
382 /*
383  * Set tcp connection timeout value.
384  * Retun 0 for success, -1 for failure.
385  */
386 static int
387 _set_tcp_conntime(int fd, int optval)
388 {
389 	struct t_optmgmt req, res;
390 	struct opthdr *opt;
391 	int *ip;
392 	char buf[TCPOPT_BUFSIZE];
393 
394 	/* LINTED pointer cast */
395 	opt = (struct opthdr *)buf;
396 	opt->level =  IPPROTO_TCP;
397 	opt->name = TCP_CONN_ABORT_THRESHOLD;
398 	opt->len = sizeof (int);
399 
400 	req.flags = T_NEGOTIATE;
401 	req.opt.len = sizeof (struct opthdr) + opt->len;
402 	req.opt.buf = (char *)opt;
403 	/* LINTED pointer cast */
404 	ip = (int *)((char *)buf + sizeof (struct opthdr));
405 	*ip = optval;
406 
407 	res.flags = 0;
408 	res.opt.buf = (char *)buf;
409 	res.opt.maxlen = sizeof (buf);
410 	if (t_optmgmt(fd, &req, &res) < 0 || res.flags != T_SUCCESS) {
411 		return (-1);
412 	}
413 	return (0);
414 }
415 
416 /*
417  * Get current tcp connection timeout value.
418  * Retun 0 for success, -1 for failure.
419  */
420 static int
421 _get_tcp_conntime(int fd)
422 {
423 	struct t_optmgmt req, res;
424 	struct opthdr *opt;
425 	int *ip, retval;
426 	char buf[TCPOPT_BUFSIZE];
427 
428 	/* LINTED pointer cast */
429 	opt = (struct opthdr *)buf;
430 	opt->level =  IPPROTO_TCP;
431 	opt->name = TCP_CONN_ABORT_THRESHOLD;
432 	opt->len = sizeof (int);
433 
434 	req.flags = T_CURRENT;
435 	req.opt.len = sizeof (struct opthdr) + opt->len;
436 	req.opt.buf = (char *)opt;
437 	/* LINTED pointer cast */
438 	ip = (int *)((char *)buf + sizeof (struct opthdr));
439 	*ip = 0;
440 
441 	res.flags = 0;
442 	res.opt.buf = (char *)buf;
443 	res.opt.maxlen = sizeof (buf);
444 	if (t_optmgmt(fd, &req, &res) < 0 || res.flags != T_SUCCESS) {
445 		return (-1);
446 	}
447 
448 	/* LINTED pointer cast */
449 	ip = (int *)((char *)buf + sizeof (struct opthdr));
450 	retval = *ip;
451 	return (retval);
452 }
453 
454 static bool_t
455 set_up_connection(int fd, struct netbuf *svcaddr, struct ct_data *ct,
456 						const struct timeval *tp)
457 {
458 	int state;
459 	struct t_call sndcallstr, *rcvcall;
460 	int nconnect;
461 	bool_t connected, do_rcv_connect;
462 	int curr_time = 0;
463 
464 	ct->ct_addr.len = 0;
465 	state = t_getstate(fd);
466 	if (state == -1) {
467 		rpc_createerr.cf_stat = RPC_TLIERROR;
468 		rpc_createerr.cf_error.re_errno = 0;
469 		rpc_createerr.cf_error.re_terrno = t_errno;
470 		return (FALSE);
471 	}
472 
473 #ifdef DEBUG
474 	fprintf(stderr, "set_up_connection: state = %d\n", state);
475 #endif
476 	switch (state) {
477 	case T_IDLE:
478 		if (svcaddr == NULL) {
479 			rpc_createerr.cf_stat = RPC_UNKNOWNADDR;
480 			return (FALSE);
481 		}
482 		/*
483 		 * Connect only if state is IDLE and svcaddr known
484 		 */
485 /* LINTED pointer alignment */
486 		rcvcall = (struct t_call *)t_alloc(fd, T_CALL, T_OPT|T_ADDR);
487 		if (rcvcall == NULL) {
488 			rpc_createerr.cf_stat = RPC_TLIERROR;
489 			rpc_createerr.cf_error.re_terrno = t_errno;
490 			rpc_createerr.cf_error.re_errno = errno;
491 			return (FALSE);
492 		}
493 		rcvcall->udata.maxlen = 0;
494 		sndcallstr.addr = *svcaddr;
495 		sndcallstr.opt.len = 0;
496 		sndcallstr.udata.len = 0;
497 		/*
498 		 * Even NULL could have sufficed for rcvcall, because
499 		 * the address returned is same for all cases except
500 		 * for the gateway case, and hence required.
501 		 */
502 		connected = FALSE;
503 		do_rcv_connect = FALSE;
504 
505 		/*
506 		 * If there is a timeout value specified, we will try to
507 		 * reset the tcp connection timeout. If the transport does
508 		 * not support the TCP_CONN_ABORT_THRESHOLD option or fails
509 		 * for other reason, default timeout will be used.
510 		 */
511 		if (tp != NULL) {
512 			int ms;
513 
514 			/*
515 			 * TCP_CONN_ABORT_THRESHOLD takes int value in millisecs
516 			 */
517 			ms = tp->tv_sec * SECS_TO_MS +
518 			    tp->tv_usec * USECS_TO_MS;
519 			if (((curr_time = _get_tcp_conntime(fd)) != -1) &&
520 			    (_set_tcp_conntime(fd, ms) == 0)) {
521 				/* EMPTY */
522 #ifdef DEBUG
523 				fprintf(stderr, "set_up_connection: set tcp ");
524 				fprintf(stderr, "connection timeout to %d ms\n",
525 				    ms);
526 #endif
527 			}
528 		}
529 
530 		for (nconnect = 0; nconnect < 3; nconnect++) {
531 			if (t_connect(fd, &sndcallstr, rcvcall) != -1) {
532 				connected = TRUE;
533 				break;
534 			}
535 			if (t_errno == TLOOK) {
536 				switch (t_look(fd)) {
537 				case T_DISCONNECT:
538 					(void) t_rcvdis(fd, (struct
539 					    t_discon *) NULL);
540 					break;
541 				default:
542 					break;
543 				}
544 			} else if (!(t_errno == TSYSERR && errno == EINTR)) {
545 				break;
546 			}
547 			if ((state = t_getstate(fd)) == T_OUTCON) {
548 				do_rcv_connect = TRUE;
549 				break;
550 			}
551 			if (state != T_IDLE) {
552 				break;
553 			}
554 		}
555 		if (do_rcv_connect) {
556 			do {
557 				if (t_rcvconnect(fd, rcvcall) != -1) {
558 					connected = TRUE;
559 					break;
560 				}
561 			} while (t_errno == TSYSERR && errno == EINTR);
562 		}
563 
564 		/*
565 		 * Set the connection timeout back to its old value.
566 		 */
567 		if (curr_time) {
568 			(void) _set_tcp_conntime(fd, curr_time);
569 		}
570 
571 		if (!connected) {
572 			rpc_createerr.cf_stat = RPC_TLIERROR;
573 			rpc_createerr.cf_error.re_terrno = t_errno;
574 			rpc_createerr.cf_error.re_errno = errno;
575 			(void) t_free((char *)rcvcall, T_CALL);
576 #ifdef DEBUG
577 			fprintf(stderr, "clnt_vc: t_connect error %d\n",
578 			    rpc_createerr.cf_error.re_terrno);
579 #endif
580 			return (FALSE);
581 		}
582 
583 		/* Free old area if allocated */
584 		if (ct->ct_addr.buf)
585 			free(ct->ct_addr.buf);
586 		ct->ct_addr = rcvcall->addr;	/* To get the new address */
587 		/* So that address buf does not get freed */
588 		rcvcall->addr.buf = NULL;
589 		(void) t_free((char *)rcvcall, T_CALL);
590 		break;
591 	case T_DATAXFER:
592 	case T_OUTCON:
593 		if (svcaddr == NULL) {
594 			/*
595 			 * svcaddr could also be NULL in cases where the
596 			 * client is already bound and connected.
597 			 */
598 			ct->ct_addr.len = 0;
599 		} else {
600 			ct->ct_addr.buf = malloc(svcaddr->len);
601 			if (ct->ct_addr.buf == NULL) {
602 				(void) syslog(LOG_ERR, clnt_vc_errstr,
603 				    clnt_vc_str, __no_mem_str);
604 				rpc_createerr.cf_stat = RPC_SYSTEMERROR;
605 				rpc_createerr.cf_error.re_errno = errno;
606 				rpc_createerr.cf_error.re_terrno = 0;
607 				return (FALSE);
608 			}
609 			(void) memcpy(ct->ct_addr.buf, svcaddr->buf,
610 			    (size_t)svcaddr->len);
611 			ct->ct_addr.len = ct->ct_addr.maxlen = svcaddr->len;
612 		}
613 		break;
614 	default:
615 		rpc_createerr.cf_stat = RPC_UNKNOWNADDR;
616 		return (FALSE);
617 	}
618 	return (TRUE);
619 }
620 
621 static enum clnt_stat
622 clnt_vc_call(CLIENT *cl, rpcproc_t proc, xdrproc_t xdr_args, caddr_t args_ptr,
623 	xdrproc_t xdr_results, caddr_t results_ptr, struct timeval timeout)
624 {
625 /* LINTED pointer alignment */
626 	struct ct_data *ct = (struct ct_data *)cl->cl_private;
627 	XDR *xdrs = &(ct->ct_xdrs);
628 	struct rpc_msg reply_msg;
629 	uint32_t x_id;
630 /* LINTED pointer alignment */
631 	uint32_t *msg_x_id = (uint32_t *)(ct->ct_mcall);	/* yuk */
632 	bool_t shipnow;
633 	int refreshes = 2;
634 
635 	if (rpc_fd_lock(vctbl, ct->ct_fd)) {
636 		rpc_callerr.re_status = RPC_FAILED;
637 		rpc_callerr.re_errno = errno;
638 		rpc_fd_unlock(vctbl, ct->ct_fd);
639 		return (RPC_FAILED);
640 	}
641 
642 	ct->ct_is_oneway = FALSE;
643 	if (ct->ct_io_mode == RPC_CL_NONBLOCKING) {
644 		if (do_flush(ct, RPC_CL_BLOCKING_FLUSH) != 0) {
645 			rpc_fd_unlock(vctbl, ct->ct_fd);
646 			return (RPC_FAILED);  /* XXX */
647 		}
648 	}
649 
650 	if (!ct->ct_waitset) {
651 		/* If time is not within limits, we ignore it. */
652 		if (time_not_ok(&timeout) == FALSE)
653 			ct->ct_wait = __rpc_timeval_to_msec(&timeout);
654 	} else {
655 		timeout.tv_sec = (ct->ct_wait / 1000);
656 		timeout.tv_usec = (ct->ct_wait % 1000) * 1000;
657 	}
658 
659 	shipnow = ((xdr_results == (xdrproc_t)0) && (timeout.tv_sec == 0) &&
660 	    (timeout.tv_usec == 0)) ? FALSE : TRUE;
661 call_again:
662 	xdrs->x_op = XDR_ENCODE;
663 	rpc_callerr.re_status = RPC_SUCCESS;
664 	/*
665 	 * Due to little endian byte order, it is necessary to convert to host
666 	 * format before decrementing xid.
667 	 */
668 	x_id = ntohl(*msg_x_id) - 1;
669 	*msg_x_id = htonl(x_id);
670 
671 	if (cl->cl_auth->ah_cred.oa_flavor != RPCSEC_GSS) {
672 		if ((!XDR_PUTBYTES(xdrs, ct->ct_mcall, ct->ct_mpos)) ||
673 		    (!XDR_PUTINT32(xdrs, (int32_t *)&proc)) ||
674 		    (!AUTH_MARSHALL(cl->cl_auth, xdrs)) ||
675 		    (!xdr_args(xdrs, args_ptr))) {
676 			if (rpc_callerr.re_status == RPC_SUCCESS)
677 				rpc_callerr.re_status = RPC_CANTENCODEARGS;
678 			(void) xdrrec_endofrecord(xdrs, TRUE);
679 			rpc_fd_unlock(vctbl, ct->ct_fd);
680 			return (rpc_callerr.re_status);
681 		}
682 	} else {
683 /* LINTED pointer alignment */
684 		uint32_t *u = (uint32_t *)&ct->ct_mcall[ct->ct_mpos];
685 		IXDR_PUT_U_INT32(u, proc);
686 		if (!__rpc_gss_wrap(cl->cl_auth, ct->ct_mcall,
687 		    ((char *)u) - ct->ct_mcall, xdrs, xdr_args, args_ptr)) {
688 			if (rpc_callerr.re_status == RPC_SUCCESS)
689 				rpc_callerr.re_status = RPC_CANTENCODEARGS;
690 			(void) xdrrec_endofrecord(xdrs, TRUE);
691 			rpc_fd_unlock(vctbl, ct->ct_fd);
692 			return (rpc_callerr.re_status);
693 		}
694 	}
695 	if (!xdrrec_endofrecord(xdrs, shipnow)) {
696 		rpc_fd_unlock(vctbl, ct->ct_fd);
697 		return (rpc_callerr.re_status = RPC_CANTSEND);
698 	}
699 	if (!shipnow) {
700 		rpc_fd_unlock(vctbl, ct->ct_fd);
701 		return (RPC_SUCCESS);
702 	}
703 	/*
704 	 * Hack to provide rpc-based message passing
705 	 */
706 	if (timeout.tv_sec == 0 && timeout.tv_usec == 0) {
707 		rpc_fd_unlock(vctbl, ct->ct_fd);
708 		return (rpc_callerr.re_status = RPC_TIMEDOUT);
709 	}
710 
711 
712 	/*
713 	 * Keep receiving until we get a valid transaction id
714 	 */
715 	xdrs->x_op = XDR_DECODE;
716 	for (;;) {
717 		reply_msg.acpted_rply.ar_verf = _null_auth;
718 		reply_msg.acpted_rply.ar_results.where = NULL;
719 		reply_msg.acpted_rply.ar_results.proc = (xdrproc_t)xdr_void;
720 		if (!xdrrec_skiprecord(xdrs)) {
721 			rpc_fd_unlock(vctbl, ct->ct_fd);
722 			return (rpc_callerr.re_status);
723 		}
724 		/* now decode and validate the response header */
725 		if (!xdr_replymsg(xdrs, &reply_msg)) {
726 			if (rpc_callerr.re_status == RPC_SUCCESS)
727 				continue;
728 			rpc_fd_unlock(vctbl, ct->ct_fd);
729 			return (rpc_callerr.re_status);
730 		}
731 		if (reply_msg.rm_xid == x_id)
732 			break;
733 	}
734 
735 	/*
736 	 * process header
737 	 */
738 	if ((reply_msg.rm_reply.rp_stat == MSG_ACCEPTED) &&
739 	    (reply_msg.acpted_rply.ar_stat == SUCCESS))
740 		rpc_callerr.re_status = RPC_SUCCESS;
741 	else
742 		__seterr_reply(&reply_msg, &(rpc_callerr));
743 
744 	if (rpc_callerr.re_status == RPC_SUCCESS) {
745 		if (!AUTH_VALIDATE(cl->cl_auth,
746 		    &reply_msg.acpted_rply.ar_verf)) {
747 			rpc_callerr.re_status = RPC_AUTHERROR;
748 			rpc_callerr.re_why = AUTH_INVALIDRESP;
749 		} else if (cl->cl_auth->ah_cred.oa_flavor != RPCSEC_GSS) {
750 			if (!(*xdr_results)(xdrs, results_ptr)) {
751 				if (rpc_callerr.re_status == RPC_SUCCESS)
752 					rpc_callerr.re_status =
753 					    RPC_CANTDECODERES;
754 			}
755 		} else if (!__rpc_gss_unwrap(cl->cl_auth, xdrs, xdr_results,
756 		    results_ptr)) {
757 			if (rpc_callerr.re_status == RPC_SUCCESS)
758 				rpc_callerr.re_status = RPC_CANTDECODERES;
759 		}
760 	}	/* end successful completion */
761 	/*
762 	 * If unsuccesful AND error is an authentication error
763 	 * then refresh credentials and try again, else break
764 	 */
765 	else if (rpc_callerr.re_status == RPC_AUTHERROR) {
766 		/* maybe our credentials need to be refreshed ... */
767 		if (refreshes-- && AUTH_REFRESH(cl->cl_auth, &reply_msg))
768 			goto call_again;
769 		else
770 			/*
771 			 * We are setting rpc_callerr here given that libnsl
772 			 * is not reentrant thereby reinitializing the TSD.
773 			 * If not set here then success could be returned even
774 			 * though refresh failed.
775 			 */
776 			rpc_callerr.re_status = RPC_AUTHERROR;
777 	} /* end of unsuccessful completion */
778 	/* free verifier ... */
779 	if (reply_msg.rm_reply.rp_stat == MSG_ACCEPTED &&
780 	    reply_msg.acpted_rply.ar_verf.oa_base != NULL) {
781 		xdrs->x_op = XDR_FREE;
782 		(void) xdr_opaque_auth(xdrs, &(reply_msg.acpted_rply.ar_verf));
783 	}
784 	rpc_fd_unlock(vctbl, ct->ct_fd);
785 	return (rpc_callerr.re_status);
786 }
787 
788 static enum clnt_stat
789 clnt_vc_send(CLIENT *cl, rpcproc_t proc, xdrproc_t xdr_args, caddr_t args_ptr)
790 {
791 /* LINTED pointer alignment */
792 	struct ct_data *ct = (struct ct_data *)cl->cl_private;
793 	XDR *xdrs = &(ct->ct_xdrs);
794 	uint32_t x_id;
795 /* LINTED pointer alignment */
796 	uint32_t *msg_x_id = (uint32_t *)(ct->ct_mcall);	/* yuk */
797 
798 	if (rpc_fd_lock(vctbl, ct->ct_fd)) {
799 		rpc_callerr.re_status = RPC_FAILED;
800 		rpc_callerr.re_errno = errno;
801 		rpc_fd_unlock(vctbl, ct->ct_fd);
802 		return (RPC_FAILED);
803 	}
804 
805 	ct->ct_is_oneway = TRUE;
806 
807 	xdrs->x_op = XDR_ENCODE;
808 	rpc_callerr.re_status = RPC_SUCCESS;
809 	/*
810 	 * Due to little endian byte order, it is necessary to convert to host
811 	 * format before decrementing xid.
812 	 */
813 	x_id = ntohl(*msg_x_id) - 1;
814 	*msg_x_id = htonl(x_id);
815 
816 	if (cl->cl_auth->ah_cred.oa_flavor != RPCSEC_GSS) {
817 		if ((!XDR_PUTBYTES(xdrs, ct->ct_mcall, ct->ct_mpos)) ||
818 		    (!XDR_PUTINT32(xdrs, (int32_t *)&proc)) ||
819 		    (!AUTH_MARSHALL(cl->cl_auth, xdrs)) ||
820 		    (!xdr_args(xdrs, args_ptr))) {
821 			if (rpc_callerr.re_status == RPC_SUCCESS)
822 				rpc_callerr.re_status = RPC_CANTENCODEARGS;
823 			(void) xdrrec_endofrecord(xdrs, TRUE);
824 			rpc_fd_unlock(vctbl, ct->ct_fd);
825 			return (rpc_callerr.re_status);
826 		}
827 	} else {
828 /* LINTED pointer alignment */
829 		uint32_t *u = (uint32_t *)&ct->ct_mcall[ct->ct_mpos];
830 		IXDR_PUT_U_INT32(u, proc);
831 		if (!__rpc_gss_wrap(cl->cl_auth, ct->ct_mcall,
832 		    ((char *)u) - ct->ct_mcall, xdrs, xdr_args, args_ptr)) {
833 			if (rpc_callerr.re_status == RPC_SUCCESS)
834 				rpc_callerr.re_status = RPC_CANTENCODEARGS;
835 			(void) xdrrec_endofrecord(xdrs, TRUE);
836 			rpc_fd_unlock(vctbl, ct->ct_fd);
837 			return (rpc_callerr.re_status);
838 		}
839 	}
840 
841 	/*
842 	 * Do not need to check errors, as the following code does
843 	 * not depend on the successful completion of the call.
844 	 * An error, if any occurs, is reported through
845 	 * rpc_callerr.re_status.
846 	 */
847 	(void) xdrrec_endofrecord(xdrs, TRUE);
848 
849 	rpc_fd_unlock(vctbl, ct->ct_fd);
850 	return (rpc_callerr.re_status);
851 }
852 
853 /* ARGSUSED */
854 static void
855 clnt_vc_geterr(CLIENT *cl, struct rpc_err *errp)
856 {
857 	*errp = rpc_callerr;
858 }
859 
860 static bool_t
861 clnt_vc_freeres(CLIENT *cl, xdrproc_t xdr_res, caddr_t res_ptr)
862 {
863 /* LINTED pointer alignment */
864 	struct ct_data *ct = (struct ct_data *)cl->cl_private;
865 	XDR *xdrs = &(ct->ct_xdrs);
866 	bool_t stat;
867 
868 	(void) rpc_fd_lock(vctbl, ct->ct_fd);
869 	xdrs->x_op = XDR_FREE;
870 	stat = (*xdr_res)(xdrs, res_ptr);
871 	rpc_fd_unlock(vctbl, ct->ct_fd);
872 	return (stat);
873 }
874 
875 static void
876 clnt_vc_abort(void)
877 {
878 }
879 
880 /*ARGSUSED*/
881 static bool_t
882 clnt_vc_control(CLIENT *cl, int request, char *info)
883 {
884 	bool_t ret;
885 /* LINTED pointer alignment */
886 	struct ct_data *ct = (struct ct_data *)cl->cl_private;
887 
888 	if (rpc_fd_lock(vctbl, ct->ct_fd)) {
889 		rpc_fd_unlock(vctbl, ct->ct_fd);
890 		return (FALSE);
891 	}
892 
893 	switch (request) {
894 	case CLSET_FD_CLOSE:
895 		ct->ct_closeit = TRUE;
896 		rpc_fd_unlock(vctbl, ct->ct_fd);
897 		return (TRUE);
898 	case CLSET_FD_NCLOSE:
899 		ct->ct_closeit = FALSE;
900 		rpc_fd_unlock(vctbl, ct->ct_fd);
901 		return (TRUE);
902 	case CLFLUSH:
903 		if (ct->ct_io_mode == RPC_CL_NONBLOCKING) {
904 			int res;
905 			res = do_flush(ct, (info == NULL ||
906 			    /* LINTED pointer cast */
907 			    *(int *)info == RPC_CL_DEFAULT_FLUSH)?
908 			    /* LINTED pointer cast */
909 			    ct->ct_blocking_mode: *(int *)info);
910 			ret = (0 == res);
911 		}
912 		rpc_fd_unlock(vctbl, ct->ct_fd);
913 		return (ret);
914 	}
915 
916 	/* for other requests which use info */
917 	if (info == NULL) {
918 		rpc_fd_unlock(vctbl, ct->ct_fd);
919 		return (FALSE);
920 	}
921 	switch (request) {
922 	case CLSET_TIMEOUT:
923 /* LINTED pointer alignment */
924 		if (time_not_ok((struct timeval *)info)) {
925 			rpc_fd_unlock(vctbl, ct->ct_fd);
926 			return (FALSE);
927 		}
928 /* LINTED pointer alignment */
929 		ct->ct_wait = __rpc_timeval_to_msec((struct timeval *)info);
930 		ct->ct_waitset = TRUE;
931 		break;
932 	case CLGET_TIMEOUT:
933 /* LINTED pointer alignment */
934 		((struct timeval *)info)->tv_sec = ct->ct_wait / 1000;
935 /* LINTED pointer alignment */
936 		((struct timeval *)info)->tv_usec = (ct->ct_wait % 1000) * 1000;
937 		break;
938 	case CLGET_SERVER_ADDR:	/* For compatibility only */
939 		(void) memcpy(info, ct->ct_addr.buf, (size_t)ct->ct_addr.len);
940 		break;
941 	case CLGET_FD:
942 /* LINTED pointer alignment */
943 		*(int *)info = ct->ct_fd;
944 		break;
945 	case CLGET_SVC_ADDR:
946 		/* The caller should not free this memory area */
947 /* LINTED pointer alignment */
948 		*(struct netbuf *)info = ct->ct_addr;
949 		break;
950 	case CLSET_SVC_ADDR:		/* set to new address */
951 #ifdef undef
952 		/*
953 		 * XXX: once the t_snddis(), followed by t_connect() starts to
954 		 * work, this ifdef should be removed.  CLIENT handle reuse
955 		 * would then be possible for COTS as well.
956 		 */
957 		if (t_snddis(ct->ct_fd, NULL) == -1) {
958 			rpc_createerr.cf_stat = RPC_TLIERROR;
959 			rpc_createerr.cf_error.re_terrno = t_errno;
960 			rpc_createerr.cf_error.re_errno = errno;
961 			rpc_fd_unlock(vctbl, ct->ct_fd);
962 			return (FALSE);
963 		}
964 		ret = set_up_connection(ct->ct_fd, (struct netbuf *)info,
965 		    ct, NULL);
966 		rpc_fd_unlock(vctbl, ct->ct_fd);
967 		return (ret);
968 #else
969 		rpc_fd_unlock(vctbl, ct->ct_fd);
970 		return (FALSE);
971 #endif
972 	case CLGET_XID:
973 		/*
974 		 * use the knowledge that xid is the
975 		 * first element in the call structure
976 		 * This will get the xid of the PREVIOUS call
977 		 */
978 /* LINTED pointer alignment */
979 		*(uint32_t *)info = ntohl(*(uint32_t *)ct->ct_mcall);
980 		break;
981 	case CLSET_XID:
982 		/* This will set the xid of the NEXT call */
983 /* LINTED pointer alignment */
984 		*(uint32_t *)ct->ct_mcall =  htonl(*(uint32_t *)info + 1);
985 		/* increment by 1 as clnt_vc_call() decrements once */
986 		break;
987 	case CLGET_VERS:
988 		/*
989 		 * This RELIES on the information that, in the call body,
990 		 * the version number field is the fifth field from the
991 		 * begining of the RPC header. MUST be changed if the
992 		 * call_struct is changed
993 		 */
994 /* LINTED pointer alignment */
995 		*(uint32_t *)info = ntohl(*(uint32_t *)(ct->ct_mcall +
996 		    4 * BYTES_PER_XDR_UNIT));
997 		break;
998 
999 	case CLSET_VERS:
1000 /* LINTED pointer alignment */
1001 		*(uint32_t *)(ct->ct_mcall + 4 * BYTES_PER_XDR_UNIT) =
1002 /* LINTED pointer alignment */
1003 		    htonl(*(uint32_t *)info);
1004 		break;
1005 
1006 	case CLGET_PROG:
1007 		/*
1008 		 * This RELIES on the information that, in the call body,
1009 		 * the program number field is the fourth field from the
1010 		 * begining of the RPC header. MUST be changed if the
1011 		 * call_struct is changed
1012 		 */
1013 /* LINTED pointer alignment */
1014 		*(uint32_t *)info = ntohl(*(uint32_t *)(ct->ct_mcall +
1015 		    3 * BYTES_PER_XDR_UNIT));
1016 		break;
1017 
1018 	case CLSET_PROG:
1019 /* LINTED pointer alignment */
1020 		*(uint32_t *)(ct->ct_mcall + 3 * BYTES_PER_XDR_UNIT) =
1021 /* LINTED pointer alignment */
1022 		    htonl(*(uint32_t *)info);
1023 		break;
1024 
1025 	case CLSET_IO_MODE:
1026 		/* LINTED pointer cast */
1027 		if (!set_io_mode(ct, *(int *)info)) {
1028 			rpc_fd_unlock(vctbl, ct->ct_fd);
1029 			return (FALSE);
1030 		}
1031 		break;
1032 	case CLSET_FLUSH_MODE:
1033 		/* Set a specific FLUSH_MODE */
1034 		/* LINTED pointer cast */
1035 		if (!set_flush_mode(ct, *(int *)info)) {
1036 			rpc_fd_unlock(vctbl, ct->ct_fd);
1037 			return (FALSE);
1038 		}
1039 		break;
1040 	case CLGET_FLUSH_MODE:
1041 		/* LINTED pointer cast */
1042 		*(rpcflushmode_t *)info = ct->ct_blocking_mode;
1043 		break;
1044 
1045 	case CLGET_IO_MODE:
1046 		/* LINTED pointer cast */
1047 		*(rpciomode_t *)info = ct->ct_io_mode;
1048 		break;
1049 
1050 	case CLGET_CURRENT_REC_SIZE:
1051 		/*
1052 		 * Returns the current amount of memory allocated
1053 		 * to pending requests
1054 		 */
1055 		/* LINTED pointer cast */
1056 		*(int *)info = ct->ct_bufferPendingSize;
1057 		break;
1058 
1059 	case CLSET_CONNMAXREC_SIZE:
1060 		/* Cannot resize the buffer if it is used. */
1061 		if (ct->ct_bufferPendingSize != 0) {
1062 			rpc_fd_unlock(vctbl, ct->ct_fd);
1063 			return (FALSE);
1064 		}
1065 		/*
1066 		 * If the new size is equal to the current size,
1067 		 * there is nothing to do.
1068 		 */
1069 		/* LINTED pointer cast */
1070 		if (ct->ct_bufferSize == *(uint_t *)info)
1071 			break;
1072 
1073 		/* LINTED pointer cast */
1074 		ct->ct_bufferSize = *(uint_t *)info;
1075 		if (ct->ct_buffer) {
1076 			free(ct->ct_buffer);
1077 			ct->ct_buffer = NULL;
1078 			ct->ct_bufferReadPtr = ct->ct_bufferWritePtr = NULL;
1079 		}
1080 		break;
1081 
1082 	case CLGET_CONNMAXREC_SIZE:
1083 		/*
1084 		 * Returns the size of buffer allocated
1085 		 * to pending requests
1086 		 */
1087 		/* LINTED pointer cast */
1088 		*(uint_t *)info = ct->ct_bufferSize;
1089 		break;
1090 
1091 	default:
1092 		rpc_fd_unlock(vctbl, ct->ct_fd);
1093 		return (FALSE);
1094 	}
1095 	rpc_fd_unlock(vctbl, ct->ct_fd);
1096 	return (TRUE);
1097 }
1098 
1099 static void
1100 clnt_vc_destroy(CLIENT *cl)
1101 {
1102 /* LINTED pointer alignment */
1103 	struct ct_data *ct = (struct ct_data *)cl->cl_private;
1104 	int ct_fd = ct->ct_fd;
1105 
1106 	(void) rpc_fd_lock(vctbl, ct_fd);
1107 
1108 	if (ct->ct_io_mode == RPC_CL_NONBLOCKING) {
1109 		(void) do_flush(ct, RPC_CL_BLOCKING_FLUSH);
1110 		(void) unregister_nb(ct);
1111 	}
1112 
1113 	if (ct->ct_closeit)
1114 		(void) t_close(ct_fd);
1115 	XDR_DESTROY(&(ct->ct_xdrs));
1116 	if (ct->ct_addr.buf)
1117 		free(ct->ct_addr.buf);
1118 	free(ct);
1119 	if (cl->cl_netid && cl->cl_netid[0])
1120 		free(cl->cl_netid);
1121 	if (cl->cl_tp && cl->cl_tp[0])
1122 		free(cl->cl_tp);
1123 	free(cl);
1124 	rpc_fd_unlock(vctbl, ct_fd);
1125 }
1126 
1127 /*
1128  * Interface between xdr serializer and vc connection.
1129  * Behaves like the system calls, read & write, but keeps some error state
1130  * around for the rpc level.
1131  */
1132 static int
1133 read_vc(void *ct_tmp, caddr_t buf, int len)
1134 {
1135 	static pthread_key_t pfdp_key = PTHREAD_ONCE_KEY_NP;
1136 	struct pollfd *pfdp;
1137 	int npfd;		/* total number of pfdp allocated */
1138 	struct ct_data *ct = ct_tmp;
1139 	struct timeval starttime;
1140 	struct timeval curtime;
1141 	int poll_time;
1142 	int delta;
1143 
1144 	if (len == 0)
1145 		return (0);
1146 
1147 	/*
1148 	 * Allocate just one the first time.  thr_get_storage() may
1149 	 * return a larger buffer, left over from the last time we were
1150 	 * here, but that's OK.  realloc() will deal with it properly.
1151 	 */
1152 	npfd = 1;
1153 	pfdp = thr_get_storage(&pfdp_key, sizeof (struct pollfd), free);
1154 	if (pfdp == NULL) {
1155 		(void) syslog(LOG_ERR, clnt_vc_errstr,
1156 		    clnt_read_vc_str, __no_mem_str);
1157 		rpc_callerr.re_status = RPC_SYSTEMERROR;
1158 		rpc_callerr.re_errno = errno;
1159 		rpc_callerr.re_terrno = 0;
1160 		return (-1);
1161 	}
1162 
1163 	/*
1164 	 *	N.B.:  slot 0 in the pollfd array is reserved for the file
1165 	 *	descriptor we're really interested in (as opposed to the
1166 	 *	callback descriptors).
1167 	 */
1168 	pfdp[0].fd = ct->ct_fd;
1169 	pfdp[0].events = MASKVAL;
1170 	pfdp[0].revents = 0;
1171 	poll_time = ct->ct_wait;
1172 	if (gettimeofday(&starttime, NULL) == -1) {
1173 		syslog(LOG_ERR, "Unable to get time of day: %m");
1174 		return (-1);
1175 	}
1176 
1177 	for (;;) {
1178 		extern void (*_svc_getreqset_proc)();
1179 		extern pollfd_t *svc_pollfd;
1180 		extern int svc_max_pollfd;
1181 		int fds;
1182 
1183 		/* VARIABLES PROTECTED BY svc_fd_lock: svc_pollfd */
1184 
1185 		if (_svc_getreqset_proc) {
1186 			sig_rw_rdlock(&svc_fd_lock);
1187 
1188 			/* reallocate pfdp to svc_max_pollfd +1 */
1189 			if (npfd != (svc_max_pollfd + 1)) {
1190 				struct pollfd *tmp_pfdp = realloc(pfdp,
1191 				    sizeof (struct pollfd) *
1192 				    (svc_max_pollfd + 1));
1193 				if (tmp_pfdp == NULL) {
1194 					sig_rw_unlock(&svc_fd_lock);
1195 					(void) syslog(LOG_ERR, clnt_vc_errstr,
1196 					    clnt_read_vc_str, __no_mem_str);
1197 					rpc_callerr.re_status = RPC_SYSTEMERROR;
1198 					rpc_callerr.re_errno = errno;
1199 					rpc_callerr.re_terrno = 0;
1200 					return (-1);
1201 				}
1202 
1203 				pfdp = tmp_pfdp;
1204 				npfd = svc_max_pollfd + 1;
1205 				(void) pthread_setspecific(pfdp_key, pfdp);
1206 			}
1207 			if (npfd > 1)
1208 				(void) memcpy(&pfdp[1], svc_pollfd,
1209 				    sizeof (struct pollfd) * (npfd - 1));
1210 
1211 			sig_rw_unlock(&svc_fd_lock);
1212 		} else {
1213 			npfd = 1;	/* don't forget about pfdp[0] */
1214 		}
1215 
1216 		switch (fds = poll(pfdp, npfd, poll_time)) {
1217 		case 0:
1218 			rpc_callerr.re_status = RPC_TIMEDOUT;
1219 			return (-1);
1220 
1221 		case -1:
1222 			if (errno != EINTR)
1223 				continue;
1224 			else {
1225 				/*
1226 				 * interrupted by another signal,
1227 				 * update time_waited
1228 				 */
1229 
1230 				if (gettimeofday(&curtime, NULL) == -1) {
1231 					syslog(LOG_ERR,
1232 					    "Unable to get time of day:  %m");
1233 					errno = 0;
1234 					continue;
1235 				};
1236 				delta = (curtime.tv_sec -
1237 				    starttime.tv_sec) * 1000 +
1238 				    (curtime.tv_usec -
1239 				    starttime.tv_usec) / 1000;
1240 				poll_time -= delta;
1241 				if (poll_time < 0) {
1242 					rpc_callerr.re_status = RPC_TIMEDOUT;
1243 					errno = 0;
1244 					return (-1);
1245 				} else {
1246 					errno = 0; /* reset it */
1247 					continue;
1248 				}
1249 			}
1250 		}
1251 
1252 		if (pfdp[0].revents == 0) {
1253 			/* must be for server side of the house */
1254 			(*_svc_getreqset_proc)(&pfdp[1], fds);
1255 			continue;	/* do poll again */
1256 		}
1257 
1258 		if (pfdp[0].revents & POLLNVAL) {
1259 			rpc_callerr.re_status = RPC_CANTRECV;
1260 			/*
1261 			 *	Note:  we're faking errno here because we
1262 			 *	previously would have expected select() to
1263 			 *	return -1 with errno EBADF.  Poll(BA_OS)
1264 			 *	returns 0 and sets the POLLNVAL revents flag
1265 			 *	instead.
1266 			 */
1267 			rpc_callerr.re_errno = errno = EBADF;
1268 			return (-1);
1269 		}
1270 
1271 		if (pfdp[0].revents & (POLLERR | POLLHUP)) {
1272 			rpc_callerr.re_status = RPC_CANTRECV;
1273 			rpc_callerr.re_errno = errno = EPIPE;
1274 			return (-1);
1275 		}
1276 		break;
1277 	}
1278 
1279 	switch (len = t_rcvall(ct->ct_fd, buf, len)) {
1280 	case 0:
1281 		/* premature eof */
1282 		rpc_callerr.re_errno = ENOLINK;
1283 		rpc_callerr.re_terrno = 0;
1284 		rpc_callerr.re_status = RPC_CANTRECV;
1285 		len = -1;	/* it's really an error */
1286 		break;
1287 
1288 	case -1:
1289 		rpc_callerr.re_terrno = t_errno;
1290 		rpc_callerr.re_errno = 0;
1291 		rpc_callerr.re_status = RPC_CANTRECV;
1292 		break;
1293 	}
1294 	return (len);
1295 }
1296 
1297 static int
1298 write_vc(void *ct_tmp, caddr_t buf, int len)
1299 {
1300 	int i, cnt;
1301 	struct ct_data *ct = ct_tmp;
1302 	int flag;
1303 	int maxsz;
1304 
1305 	maxsz = ct->ct_tsdu;
1306 
1307 	/* Handle the non-blocking mode */
1308 	if (ct->ct_is_oneway && ct->ct_io_mode == RPC_CL_NONBLOCKING) {
1309 		/*
1310 		 * Test a special case here. If the length of the current
1311 		 * write is greater than the transport data unit, and the
1312 		 * mode is non blocking, we return RPC_CANTSEND.
1313 		 * XXX  this is not very clean.
1314 		 */
1315 		if (maxsz > 0 && len > maxsz) {
1316 			rpc_callerr.re_terrno = errno;
1317 			rpc_callerr.re_errno = 0;
1318 			rpc_callerr.re_status = RPC_CANTSEND;
1319 			return (-1);
1320 		}
1321 
1322 		len = nb_send(ct, buf, (unsigned)len);
1323 		if (len == -1) {
1324 			rpc_callerr.re_terrno = errno;
1325 			rpc_callerr.re_errno = 0;
1326 			rpc_callerr.re_status = RPC_CANTSEND;
1327 		} else if (len == -2) {
1328 			rpc_callerr.re_terrno = 0;
1329 			rpc_callerr.re_errno = 0;
1330 			rpc_callerr.re_status = RPC_CANTSTORE;
1331 		}
1332 		return (len);
1333 	}
1334 
1335 	if ((maxsz == 0) || (maxsz == -1)) {
1336 		/*
1337 		 * T_snd may return -1 for error on connection (connection
1338 		 * needs to be repaired/closed, and -2 for flow-control
1339 		 * handling error (no operation to do, just wait and call
1340 		 * T_Flush()).
1341 		 */
1342 		if ((len = t_snd(ct->ct_fd, buf, (unsigned)len, 0)) == -1) {
1343 			rpc_callerr.re_terrno = t_errno;
1344 			rpc_callerr.re_errno = 0;
1345 			rpc_callerr.re_status = RPC_CANTSEND;
1346 		}
1347 		return (len);
1348 	}
1349 
1350 	/*
1351 	 * This for those transports which have a max size for data.
1352 	 */
1353 	for (cnt = len, i = 0; cnt > 0; cnt -= i, buf += i) {
1354 		flag = cnt > maxsz ? T_MORE : 0;
1355 		if ((i = t_snd(ct->ct_fd, buf, (unsigned)MIN(cnt, maxsz),
1356 		    flag)) == -1) {
1357 			rpc_callerr.re_terrno = t_errno;
1358 			rpc_callerr.re_errno = 0;
1359 			rpc_callerr.re_status = RPC_CANTSEND;
1360 			return (-1);
1361 		}
1362 	}
1363 	return (len);
1364 }
1365 
1366 /*
1367  * Receive the required bytes of data, even if it is fragmented.
1368  */
1369 static int
1370 t_rcvall(int fd, char *buf, int len)
1371 {
1372 	int moreflag;
1373 	int final = 0;
1374 	int res;
1375 
1376 	do {
1377 		moreflag = 0;
1378 		res = t_rcv(fd, buf, (unsigned)len, &moreflag);
1379 		if (res == -1) {
1380 			if (t_errno == TLOOK)
1381 				switch (t_look(fd)) {
1382 				case T_DISCONNECT:
1383 					(void) t_rcvdis(fd, NULL);
1384 					(void) t_snddis(fd, NULL);
1385 					return (-1);
1386 				case T_ORDREL:
1387 				/* Received orderly release indication */
1388 					(void) t_rcvrel(fd);
1389 				/* Send orderly release indicator */
1390 					(void) t_sndrel(fd);
1391 					return (-1);
1392 				default:
1393 					return (-1);
1394 				}
1395 		} else if (res == 0) {
1396 			return (0);
1397 		}
1398 		final += res;
1399 		buf += res;
1400 		len -= res;
1401 	} while ((len > 0) && (moreflag & T_MORE));
1402 	return (final);
1403 }
1404 
1405 static struct clnt_ops *
1406 clnt_vc_ops(void)
1407 {
1408 	static struct clnt_ops ops;
1409 	extern mutex_t	ops_lock;
1410 
1411 	/* VARIABLES PROTECTED BY ops_lock: ops */
1412 
1413 	sig_mutex_lock(&ops_lock);
1414 	if (ops.cl_call == NULL) {
1415 		ops.cl_call = clnt_vc_call;
1416 		ops.cl_send = clnt_vc_send;
1417 		ops.cl_abort = clnt_vc_abort;
1418 		ops.cl_geterr = clnt_vc_geterr;
1419 		ops.cl_freeres = clnt_vc_freeres;
1420 		ops.cl_destroy = clnt_vc_destroy;
1421 		ops.cl_control = clnt_vc_control;
1422 	}
1423 	sig_mutex_unlock(&ops_lock);
1424 	return (&ops);
1425 }
1426 
1427 /*
1428  * Make sure that the time is not garbage.   -1 value is disallowed.
1429  * Note this is different from time_not_ok in clnt_dg.c
1430  */
1431 static bool_t
1432 time_not_ok(struct timeval *t)
1433 {
1434 	return (t->tv_sec <= -1 || t->tv_sec > 100000000 ||
1435 	    t->tv_usec <= -1 || t->tv_usec > 1000000);
1436 }
1437 
1438 
1439 /* Compute the # of bytes that remains until the end of the buffer */
1440 #define	REMAIN_BYTES(p) (ct->ct_bufferSize-(ct->ct_##p - ct->ct_buffer))
1441 
1442 static int
1443 addInBuffer(struct ct_data *ct, char *dataToAdd, unsigned int nBytes)
1444 {
1445 	if (NULL == ct->ct_buffer) {
1446 		/* Buffer not allocated yet. */
1447 		char *buffer;
1448 
1449 		buffer = malloc(ct->ct_bufferSize);
1450 		if (NULL == buffer) {
1451 			errno = ENOMEM;
1452 			return (-1);
1453 		}
1454 		(void) memcpy(buffer, dataToAdd, nBytes);
1455 
1456 		ct->ct_buffer = buffer;
1457 		ct->ct_bufferReadPtr = buffer;
1458 		ct->ct_bufferWritePtr = buffer + nBytes;
1459 		ct->ct_bufferPendingSize = nBytes;
1460 	} else {
1461 		/*
1462 		 * For an already allocated buffer, two mem copies
1463 		 * might be needed, depending on the current
1464 		 * writing position.
1465 		 */
1466 
1467 		/* Compute the length of the first copy. */
1468 		int len = MIN(nBytes, REMAIN_BYTES(bufferWritePtr));
1469 
1470 		ct->ct_bufferPendingSize += nBytes;
1471 
1472 		(void) memcpy(ct->ct_bufferWritePtr, dataToAdd, len);
1473 		ct->ct_bufferWritePtr += len;
1474 		nBytes -= len;
1475 		if (0 == nBytes) {
1476 			/* One memcopy needed. */
1477 
1478 			/*
1479 			 * If the write pointer is at the end of the buffer,
1480 			 * wrap it now.
1481 			 */
1482 			if (ct->ct_bufferWritePtr ==
1483 			    (ct->ct_buffer + ct->ct_bufferSize)) {
1484 				ct->ct_bufferWritePtr = ct->ct_buffer;
1485 			}
1486 		} else {
1487 			/* Two memcopy needed. */
1488 			dataToAdd += len;
1489 
1490 			/*
1491 			 * Copy the remaining data to the beginning of the
1492 			 * buffer
1493 			 */
1494 			(void) memcpy(ct->ct_buffer, dataToAdd, nBytes);
1495 			ct->ct_bufferWritePtr = ct->ct_buffer + nBytes;
1496 		}
1497 	}
1498 	return (0);
1499 }
1500 
1501 static void
1502 consumeFromBuffer(struct ct_data *ct, unsigned int nBytes)
1503 {
1504 	ct->ct_bufferPendingSize -= nBytes;
1505 	if (ct->ct_bufferPendingSize == 0) {
1506 		/*
1507 		 * If the buffer contains no data, we set the two pointers at
1508 		 * the beginning of the buffer (to miminize buffer wraps).
1509 		 */
1510 		ct->ct_bufferReadPtr = ct->ct_bufferWritePtr = ct->ct_buffer;
1511 	} else {
1512 		ct->ct_bufferReadPtr += nBytes;
1513 		if (ct->ct_bufferReadPtr >
1514 		    ct->ct_buffer + ct->ct_bufferSize) {
1515 			ct->ct_bufferReadPtr -= ct->ct_bufferSize;
1516 		}
1517 	}
1518 }
1519 
1520 static int
1521 iovFromBuffer(struct ct_data *ct, struct iovec *iov)
1522 {
1523 	int l;
1524 
1525 	if (ct->ct_bufferPendingSize == 0)
1526 		return (0);
1527 
1528 	l = REMAIN_BYTES(bufferReadPtr);
1529 	if (l < ct->ct_bufferPendingSize) {
1530 		/* Buffer in two fragments. */
1531 		iov[0].iov_base = ct->ct_bufferReadPtr;
1532 		iov[0].iov_len  = l;
1533 
1534 		iov[1].iov_base = ct->ct_buffer;
1535 		iov[1].iov_len  = ct->ct_bufferPendingSize - l;
1536 		return (2);
1537 	} else {
1538 		/* Buffer in one fragment. */
1539 		iov[0].iov_base = ct->ct_bufferReadPtr;
1540 		iov[0].iov_len  = ct->ct_bufferPendingSize;
1541 		return (1);
1542 	}
1543 }
1544 
1545 static bool_t
1546 set_flush_mode(struct ct_data *ct, int mode)
1547 {
1548 	switch (mode) {
1549 	case RPC_CL_BLOCKING_FLUSH:
1550 		/* flush as most as possible without blocking */
1551 	case RPC_CL_BESTEFFORT_FLUSH:
1552 		/* flush the buffer completely (possibly blocking) */
1553 	case RPC_CL_DEFAULT_FLUSH:
1554 		/* flush according to the currently defined policy */
1555 		ct->ct_blocking_mode = mode;
1556 		return (TRUE);
1557 	default:
1558 		return (FALSE);
1559 	}
1560 }
1561 
1562 static bool_t
1563 set_io_mode(struct ct_data *ct, int ioMode)
1564 {
1565 	switch (ioMode) {
1566 	case RPC_CL_BLOCKING:
1567 		if (ct->ct_io_mode == RPC_CL_NONBLOCKING) {
1568 			if (NULL != ct->ct_buffer) {
1569 				/*
1570 				 * If a buffer was allocated for this
1571 				 * connection, flush it now, and free it.
1572 				 */
1573 				(void) do_flush(ct, RPC_CL_BLOCKING_FLUSH);
1574 				free(ct->ct_buffer);
1575 				ct->ct_buffer = NULL;
1576 			}
1577 			(void) unregister_nb(ct);
1578 			ct->ct_io_mode = ioMode;
1579 		}
1580 		break;
1581 	case RPC_CL_NONBLOCKING:
1582 		if (ct->ct_io_mode == RPC_CL_BLOCKING) {
1583 			if (-1 == register_nb(ct)) {
1584 				return (FALSE);
1585 			}
1586 			ct->ct_io_mode = ioMode;
1587 		}
1588 		break;
1589 	default:
1590 		return (FALSE);
1591 	}
1592 	return (TRUE);
1593 }
1594 
1595 static int
1596 do_flush(struct ct_data *ct, uint_t flush_mode)
1597 {
1598 	int result;
1599 	if (ct->ct_bufferPendingSize == 0) {
1600 		return (0);
1601 	}
1602 
1603 	switch (flush_mode) {
1604 	case RPC_CL_BLOCKING_FLUSH:
1605 		if (!set_blocking_connection(ct, TRUE)) {
1606 			return (-1);
1607 		}
1608 		while (ct->ct_bufferPendingSize > 0) {
1609 			if (REMAIN_BYTES(bufferReadPtr) <
1610 			    ct->ct_bufferPendingSize) {
1611 				struct iovec iov[2];
1612 				(void) iovFromBuffer(ct, iov);
1613 				result = writev(ct->ct_fd, iov, 2);
1614 			} else {
1615 				result = t_snd(ct->ct_fd, ct->ct_bufferReadPtr,
1616 				    ct->ct_bufferPendingSize, 0);
1617 			}
1618 			if (result < 0) {
1619 				return (-1);
1620 			}
1621 			consumeFromBuffer(ct, result);
1622 		}
1623 
1624 		break;
1625 
1626 	case RPC_CL_BESTEFFORT_FLUSH:
1627 		(void) set_blocking_connection(ct, FALSE);
1628 		if (REMAIN_BYTES(bufferReadPtr) < ct->ct_bufferPendingSize) {
1629 			struct iovec iov[2];
1630 			(void) iovFromBuffer(ct, iov);
1631 			result = writev(ct->ct_fd, iov, 2);
1632 		} else {
1633 			result = t_snd(ct->ct_fd, ct->ct_bufferReadPtr,
1634 			    ct->ct_bufferPendingSize, 0);
1635 		}
1636 		if (result < 0) {
1637 			if (errno != EWOULDBLOCK) {
1638 				perror("flush");
1639 				return (-1);
1640 			}
1641 			return (0);
1642 		}
1643 		if (result > 0)
1644 			consumeFromBuffer(ct, result);
1645 		break;
1646 	}
1647 	return (0);
1648 }
1649 
1650 /*
1651  * Non blocking send.
1652  */
1653 
1654 static int
1655 nb_send(struct ct_data *ct, void *buff, unsigned int nBytes)
1656 {
1657 	int result;
1658 
1659 	if (!(ntohl(*(uint32_t *)buff) & 2^31)) {
1660 		return (-1);
1661 	}
1662 
1663 	/*
1664 	 * Check to see if the current message can be stored fully in the
1665 	 * buffer. We have to check this now because it may be impossible
1666 	 * to send any data, so the message must be stored in the buffer.
1667 	 */
1668 	if (nBytes > (ct->ct_bufferSize - ct->ct_bufferPendingSize)) {
1669 		/* Try to flush  (to free some space). */
1670 		(void) do_flush(ct, RPC_CL_BESTEFFORT_FLUSH);
1671 
1672 		/* Can we store the message now ? */
1673 		if (nBytes > (ct->ct_bufferSize - ct->ct_bufferPendingSize))
1674 			return (-2);
1675 	}
1676 
1677 	(void) set_blocking_connection(ct, FALSE);
1678 
1679 	/*
1680 	 * If there is no data pending, we can simply try
1681 	 * to send our data.
1682 	 */
1683 	if (ct->ct_bufferPendingSize == 0) {
1684 		result = t_snd(ct->ct_fd, buff, nBytes, 0);
1685 		if (result == -1) {
1686 			if (errno == EWOULDBLOCK) {
1687 				result = 0;
1688 			} else {
1689 				perror("send");
1690 				return (-1);
1691 			}
1692 		}
1693 		/*
1694 		 * If we have not sent all data, we must store them
1695 		 * in the buffer.
1696 		 */
1697 		if (result != nBytes) {
1698 			if (addInBuffer(ct, (char *)buff + result,
1699 			    nBytes - result) == -1) {
1700 				return (-1);
1701 			}
1702 		}
1703 	} else {
1704 		/*
1705 		 * Some data pending in the buffer.  We try to send
1706 		 * both buffer data and current message in one shot.
1707 		 */
1708 		struct iovec iov[3];
1709 		int i = iovFromBuffer(ct, &iov[0]);
1710 
1711 		iov[i].iov_base = buff;
1712 		iov[i].iov_len  = nBytes;
1713 
1714 		result = writev(ct->ct_fd, iov, i+1);
1715 		if (result == -1) {
1716 			if (errno == EWOULDBLOCK) {
1717 				/* No bytes sent */
1718 				result = 0;
1719 			} else {
1720 				return (-1);
1721 			}
1722 		}
1723 
1724 		/*
1725 		 * Add the bytes from the message
1726 		 * that we have not sent.
1727 		 */
1728 		if (result <= ct->ct_bufferPendingSize) {
1729 			/* No bytes from the message sent */
1730 			consumeFromBuffer(ct, result);
1731 			if (addInBuffer(ct, buff, nBytes) == -1) {
1732 				return (-1);
1733 			}
1734 		} else {
1735 			/*
1736 			 * Some bytes of the message are sent.
1737 			 * Compute the length of the message that has
1738 			 * been sent.
1739 			 */
1740 			int len = result - ct->ct_bufferPendingSize;
1741 
1742 			/* So, empty the buffer. */
1743 			ct->ct_bufferReadPtr = ct->ct_buffer;
1744 			ct->ct_bufferWritePtr = ct->ct_buffer;
1745 			ct->ct_bufferPendingSize = 0;
1746 
1747 			/* And add the remaining part of the message. */
1748 			if (len != nBytes) {
1749 				if (addInBuffer(ct, (char *)buff + len,
1750 				    nBytes-len) == -1) {
1751 					return (-1);
1752 				}
1753 			}
1754 		}
1755 	}
1756 	return (nBytes);
1757 }
1758 
1759 static void
1760 flush_registered_clients(void)
1761 {
1762 	struct nb_reg_node *node;
1763 
1764 	if (LIST_ISEMPTY(nb_first)) {
1765 		return;
1766 	}
1767 
1768 	LIST_FOR_EACH(nb_first, node) {
1769 		(void) do_flush(node->ct, RPC_CL_BLOCKING_FLUSH);
1770 	}
1771 }
1772 
1773 static int
1774 allocate_chunk(void)
1775 {
1776 #define	CHUNK_SIZE 16
1777 	struct nb_reg_node *chk =
1778 	    malloc(sizeof (struct nb_reg_node) * CHUNK_SIZE);
1779 	struct nb_reg_node *n;
1780 	int i;
1781 
1782 	if (NULL == chk) {
1783 		return (-1);
1784 	}
1785 
1786 	n = chk;
1787 	for (i = 0; i < CHUNK_SIZE-1; ++i) {
1788 		n[i].next = &(n[i+1]);
1789 	}
1790 	n[CHUNK_SIZE-1].next = (struct nb_reg_node *)&nb_free;
1791 	nb_free = chk;
1792 	return (0);
1793 }
1794 
1795 static int
1796 register_nb(struct ct_data *ct)
1797 {
1798 	struct nb_reg_node *node;
1799 
1800 	(void) mutex_lock(&nb_list_mutex);
1801 
1802 	if (LIST_ISEMPTY(nb_free) && (allocate_chunk() == -1)) {
1803 		(void) mutex_unlock(&nb_list_mutex);
1804 		errno = ENOMEM;
1805 		return (-1);
1806 	}
1807 
1808 	if (!exit_handler_set) {
1809 		(void) atexit(flush_registered_clients);
1810 		exit_handler_set = TRUE;
1811 	}
1812 	/* Get the first free node */
1813 	LIST_EXTRACT(nb_free, node);
1814 
1815 	node->ct = ct;
1816 
1817 	LIST_ADD(nb_first, node);
1818 	(void) mutex_unlock(&nb_list_mutex);
1819 
1820 	return (0);
1821 }
1822 
1823 static int
1824 unregister_nb(struct ct_data *ct)
1825 {
1826 	struct nb_reg_node *node;
1827 
1828 	(void) mutex_lock(&nb_list_mutex);
1829 	assert(!LIST_ISEMPTY(nb_first));
1830 
1831 	node = nb_first;
1832 	LIST_FOR_EACH(nb_first, node) {
1833 		if (node->next->ct == ct) {
1834 			/* Get the node to unregister. */
1835 			struct nb_reg_node *n = node->next;
1836 			node->next = n->next;
1837 
1838 			n->ct = NULL;
1839 			LIST_ADD(nb_free, n);
1840 			break;
1841 		}
1842 	}
1843 	(void) mutex_unlock(&nb_list_mutex);
1844 	return (0);
1845 }
1846