xref: /illumos-gate/usr/src/lib/libnsl/rpc/clnt_vc.c (revision fbd1c0dae6f4a2ccc2ce0527c7f19d3dd5ea90b8)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */
28 /* All Rights Reserved */
29 /*
30  * Portions of this source code were derived from Berkeley
31  * 4.3 BSD under license from the Regents of the University of
32  * California.
33  */
34 
35 #pragma ident	"%Z%%M%	%I%	%E% SMI"
36 
37 /*
38  * clnt_vc.c
39  *
40  * Implements a connectionful client side RPC.
41  *
42  * Connectionful RPC supports 'batched calls'.
43  * A sequence of calls may be batched-up in a send buffer. The rpc call
44  * return immediately to the client even though the call was not necessarily
45  * sent. The batching occurs if the results' xdr routine is NULL (0) AND
46  * the rpc timeout value is zero (see clnt.h, rpc).
47  *
48  * Clients should NOT casually batch calls that in fact return results; that
49  * is the server side should be aware that a call is batched and not produce
50  * any return message. Batched calls that produce many result messages can
51  * deadlock (netlock) the client and the server....
52  */
53 
54 
55 #include "mt.h"
56 #include "rpc_mt.h"
57 #include <assert.h>
58 #include <rpc/rpc.h>
59 #include <errno.h>
60 #include <sys/byteorder.h>
61 #include <sys/mkdev.h>
62 #include <sys/poll.h>
63 #include <syslog.h>
64 #include <stdlib.h>
65 #include <unistd.h>
66 #include <netinet/tcp.h>
67 
68 #define	MCALL_MSG_SIZE 24
69 #define	SECS_TO_MS 1000
70 #define	USECS_TO_MS 1/1000
71 #ifndef MIN
72 #define	MIN(a, b)	(((a) < (b)) ? (a) : (b))
73 #endif
74 
75 extern int __rpc_timeval_to_msec(struct timeval *);
76 extern int __rpc_compress_pollfd(int, pollfd_t *, pollfd_t *);
77 extern bool_t xdr_opaque_auth(XDR *, struct opaque_auth *);
78 extern bool_t __rpc_gss_wrap(AUTH *, char *, uint_t, XDR *, bool_t (*)(),
79 								caddr_t);
80 extern bool_t __rpc_gss_unwrap(AUTH *, XDR *, bool_t (*)(), caddr_t);
81 extern CLIENT *_clnt_vc_create_timed(int, struct netbuf *, rpcprog_t,
82 		rpcvers_t, uint_t, uint_t, const struct timeval *);
83 
84 static struct clnt_ops	*clnt_vc_ops(void);
85 static int		read_vc(void *, caddr_t, int);
86 static int		write_vc(void *, caddr_t, int);
87 static int		t_rcvall(int, char *, int);
88 static bool_t		time_not_ok(struct timeval *);
89 
90 struct ct_data;
91 static bool_t		set_up_connection(int, struct netbuf *,
92 				struct ct_data *, const struct timeval *);
93 static bool_t		set_io_mode(struct ct_data *, int);
94 
95 /*
96  * Lock table handle used by various MT sync. routines
97  */
98 static mutex_t	vctbl_lock = DEFAULTMUTEX;
99 static void	*vctbl = NULL;
100 
101 static const char clnt_vc_errstr[] = "%s : %s";
102 static const char clnt_vc_str[] = "clnt_vc_create";
103 static const char clnt_read_vc_str[] = "read_vc";
104 static const char __no_mem_str[] = "out of memory";
105 static const char no_fcntl_getfl_str[] = "could not get status flags and modes";
106 static const char no_nonblock_str[] = "could not set transport blocking mode";
107 
108 /*
109  * Private data structure
110  */
111 struct ct_data {
112 	int		ct_fd;		/* connection's fd */
113 	bool_t		ct_closeit;	/* close it on destroy */
114 	int		ct_tsdu;	/* size of tsdu */
115 	int		ct_wait;	/* wait interval in milliseconds */
116 	bool_t		ct_waitset;	/* wait set by clnt_control? */
117 	struct netbuf	ct_addr;	/* remote addr */
118 	struct rpc_err	ct_error;
119 	char		ct_mcall[MCALL_MSG_SIZE]; /* marshalled callmsg */
120 	uint_t		ct_mpos;	/* pos after marshal */
121 	XDR		ct_xdrs;	/* XDR stream */
122 
123 	/* NON STANDARD INFO - 00-08-31 */
124 	bool_t		ct_is_oneway; /* True if the current call is oneway. */
125 	bool_t		ct_is_blocking;
126 	ushort_t	ct_io_mode;
127 	ushort_t	ct_blocking_mode;
128 	uint_t		ct_bufferSize; /* Total size of the buffer. */
129 	uint_t		ct_bufferPendingSize; /* Size of unsent data. */
130 	char 		*ct_buffer; /* Pointer to the buffer. */
131 	char 		*ct_bufferWritePtr; /* Ptr to the first free byte. */
132 	char 		*ct_bufferReadPtr; /* Ptr to the first byte of data. */
133 };
134 
135 struct nb_reg_node {
136 	struct nb_reg_node *next;
137 	struct ct_data *ct;
138 };
139 
140 static struct nb_reg_node *nb_first = (struct nb_reg_node *)&nb_first;
141 static struct nb_reg_node *nb_free  = (struct nb_reg_node *)&nb_free;
142 
143 static bool_t exit_handler_set = FALSE;
144 
145 static mutex_t nb_list_mutex = DEFAULTMUTEX;
146 
147 
148 /* Define some macros to manage the linked list. */
149 #define	LIST_ISEMPTY(l) (l == (struct nb_reg_node *)&l)
150 #define	LIST_CLR(l) (l = (struct nb_reg_node *)&l)
151 #define	LIST_ADD(l, node) (node->next = l->next, l = node)
152 #define	LIST_EXTRACT(l, node) (node = l, l = l->next)
153 #define	LIST_FOR_EACH(l, node) \
154 	for (node = l; node != (struct nb_reg_node *)&l; node = node->next)
155 
156 
157 /* Default size of the IO buffer used in non blocking mode */
158 #define	DEFAULT_PENDING_ZONE_MAX_SIZE (16*1024)
159 
160 static int nb_send(struct ct_data *, void *, unsigned int);
161 static int do_flush(struct ct_data *, uint_t);
162 static bool_t set_flush_mode(struct ct_data *, int);
163 static bool_t set_blocking_connection(struct ct_data *, bool_t);
164 
165 static int register_nb(struct ct_data *);
166 static int unregister_nb(struct ct_data *);
167 
168 
169 /*
170  * Change the mode of the underlying fd.
171  */
172 static bool_t
173 set_blocking_connection(struct ct_data *ct, bool_t blocking)
174 {
175 	int flag;
176 
177 	/*
178 	 * If the underlying fd is already in the required mode,
179 	 * avoid the syscall.
180 	 */
181 	if (ct->ct_is_blocking == blocking)
182 		return (TRUE);
183 
184 	if ((flag = fcntl(ct->ct_fd, F_GETFL, 0)) < 0) {
185 		(void) syslog(LOG_ERR, "set_blocking_connection : %s",
186 		    no_fcntl_getfl_str);
187 		return (FALSE);
188 	}
189 
190 	flag = blocking? flag&~O_NONBLOCK : flag|O_NONBLOCK;
191 	if (fcntl(ct->ct_fd, F_SETFL, flag) != 0) {
192 		(void) syslog(LOG_ERR, "set_blocking_connection : %s",
193 		    no_nonblock_str);
194 		return (FALSE);
195 	}
196 	ct->ct_is_blocking = blocking;
197 	return (TRUE);
198 }
199 
200 /*
201  * Create a client handle for a connection.
202  * Default options are set, which the user can change using clnt_control()'s.
203  * The rpc/vc package does buffering similar to stdio, so the client
204  * must pick send and receive buffer sizes, 0 => use the default.
205  * NB: fd is copied into a private area.
206  * NB: The rpch->cl_auth is set null authentication. Caller may wish to
207  * set this something more useful.
208  *
209  * fd should be open and bound.
210  */
211 CLIENT *
212 clnt_vc_create(const int fd, struct netbuf *svcaddr, const rpcprog_t prog,
213 	const rpcvers_t vers, const uint_t sendsz, const uint_t recvsz)
214 {
215 	return (_clnt_vc_create_timed(fd, svcaddr, prog, vers, sendsz,
216 			recvsz, NULL));
217 }
218 
219 /*
220  * This has the same definition as clnt_vc_create(), except it
221  * takes an additional parameter - a pointer to a timeval structure.
222  *
223  * Not a public interface. This is for clnt_create_timed,
224  * clnt_create_vers_timed, clnt_tp_create_timed to pass down the timeout
225  * value to control a tcp connection attempt.
226  * (for bug 4049792: clnt_create_timed does not time out)
227  *
228  * If tp is NULL, use default timeout to set up the connection.
229  */
230 CLIENT *
231 _clnt_vc_create_timed(int fd, struct netbuf *svcaddr, rpcprog_t prog,
232 	rpcvers_t vers, uint_t sendsz, uint_t recvsz, const struct timeval *tp)
233 {
234 	CLIENT *cl;			/* client handle */
235 	struct ct_data *ct;		/* private data */
236 	struct timeval now;
237 	struct rpc_msg call_msg;
238 	struct t_info tinfo;
239 	int flag;
240 
241 	cl = malloc(sizeof (*cl));
242 	ct = malloc(sizeof (*ct));
243 	if ((cl == NULL) || (ct == NULL)) {
244 		(void) syslog(LOG_ERR, clnt_vc_errstr,
245 				clnt_vc_str, __no_mem_str);
246 		rpc_createerr.cf_stat = RPC_SYSTEMERROR;
247 		rpc_createerr.cf_error.re_errno = errno;
248 		rpc_createerr.cf_error.re_terrno = 0;
249 		goto err;
250 	}
251 	ct->ct_addr.buf = NULL;
252 
253 	/*
254 	 * The only use of vctbl_lock is for serializing the creation of
255 	 * vctbl. Once created the lock needs to be released so we don't
256 	 * hold it across the set_up_connection() call and end up with a
257 	 * bunch of threads stuck waiting for the mutex.
258 	 */
259 	sig_mutex_lock(&vctbl_lock);
260 
261 	if ((vctbl == NULL) && ((vctbl = rpc_fd_init()) == NULL)) {
262 		rpc_createerr.cf_stat = RPC_SYSTEMERROR;
263 		rpc_createerr.cf_error.re_errno = errno;
264 		rpc_createerr.cf_error.re_terrno = 0;
265 		sig_mutex_unlock(&vctbl_lock);
266 		goto err;
267 	}
268 
269 	sig_mutex_unlock(&vctbl_lock);
270 
271 	ct->ct_io_mode = RPC_CL_BLOCKING;
272 	ct->ct_blocking_mode = RPC_CL_BLOCKING_FLUSH;
273 
274 	ct->ct_buffer = NULL;	/* We allocate the buffer when needed. */
275 	ct->ct_bufferSize = DEFAULT_PENDING_ZONE_MAX_SIZE;
276 	ct->ct_bufferPendingSize = 0;
277 	ct->ct_bufferWritePtr = NULL;
278 	ct->ct_bufferReadPtr = NULL;
279 
280 	/* Check the current state of the fd. */
281 	if ((flag = fcntl(fd, F_GETFL, 0)) < 0) {
282 		(void) syslog(LOG_ERR, "_clnt_vc_create_timed : %s",
283 		    no_fcntl_getfl_str);
284 		rpc_createerr.cf_stat = RPC_SYSTEMERROR;
285 		rpc_createerr.cf_error.re_terrno = errno;
286 		rpc_createerr.cf_error.re_errno = 0;
287 		goto err;
288 	}
289 	ct->ct_is_blocking = flag & O_NONBLOCK ? FALSE : TRUE;
290 
291 	if (set_up_connection(fd, svcaddr, ct, tp) == FALSE) {
292 		goto err;
293 	}
294 
295 	/*
296 	 * Set up other members of private data struct
297 	 */
298 	ct->ct_fd = fd;
299 	/*
300 	 * The actual value will be set by clnt_call or clnt_control
301 	 */
302 	ct->ct_wait = 30000;
303 	ct->ct_waitset = FALSE;
304 	/*
305 	 * By default, closeit is always FALSE. It is users responsibility
306 	 * to do a t_close on it, else the user may use clnt_control
307 	 * to let clnt_destroy do it for him/her.
308 	 */
309 	ct->ct_closeit = FALSE;
310 
311 	/*
312 	 * Initialize call message
313 	 */
314 	(void) gettimeofday(&now, (struct timezone *)0);
315 	call_msg.rm_xid = getpid() ^ now.tv_sec ^ now.tv_usec;
316 	call_msg.rm_call.cb_prog = prog;
317 	call_msg.rm_call.cb_vers = vers;
318 
319 	/*
320 	 * pre-serialize the static part of the call msg and stash it away
321 	 */
322 	xdrmem_create(&(ct->ct_xdrs), ct->ct_mcall, MCALL_MSG_SIZE, XDR_ENCODE);
323 	if (!xdr_callhdr(&(ct->ct_xdrs), &call_msg)) {
324 		goto err;
325 	}
326 	ct->ct_mpos = XDR_GETPOS(&(ct->ct_xdrs));
327 	XDR_DESTROY(&(ct->ct_xdrs));
328 
329 	if (t_getinfo(fd, &tinfo) == -1) {
330 		rpc_createerr.cf_stat = RPC_TLIERROR;
331 		rpc_createerr.cf_error.re_terrno = t_errno;
332 		rpc_createerr.cf_error.re_errno = 0;
333 		goto err;
334 	}
335 	/*
336 	 * Find the receive and the send size
337 	 */
338 	sendsz = __rpc_get_t_size((int)sendsz, tinfo.tsdu);
339 	recvsz = __rpc_get_t_size((int)recvsz, tinfo.tsdu);
340 	if ((sendsz == 0) || (recvsz == 0)) {
341 		rpc_createerr.cf_stat = RPC_TLIERROR;
342 		rpc_createerr.cf_error.re_terrno = 0;
343 		rpc_createerr.cf_error.re_errno = 0;
344 		goto err;
345 	}
346 	ct->ct_tsdu = tinfo.tsdu;
347 	/*
348 	 * Create a client handle which uses xdrrec for serialization
349 	 * and authnone for authentication.
350 	 */
351 	ct->ct_xdrs.x_ops = NULL;
352 	xdrrec_create(&(ct->ct_xdrs), sendsz, recvsz, (caddr_t)ct,
353 			read_vc, write_vc);
354 	if (ct->ct_xdrs.x_ops == NULL) {
355 		rpc_createerr.cf_stat = RPC_SYSTEMERROR;
356 		rpc_createerr.cf_error.re_terrno = 0;
357 		rpc_createerr.cf_error.re_errno = ENOMEM;
358 		goto err;
359 	}
360 	cl->cl_ops = clnt_vc_ops();
361 	cl->cl_private = (caddr_t)ct;
362 	cl->cl_auth = authnone_create();
363 	cl->cl_tp = NULL;
364 	cl->cl_netid = NULL;
365 	return (cl);
366 
367 err:
368 	if (cl) {
369 		if (ct) {
370 			if (ct->ct_addr.len)
371 				free(ct->ct_addr.buf);
372 			free(ct);
373 		}
374 		free(cl);
375 	}
376 	return (NULL);
377 }
378 
379 #define	TCPOPT_BUFSIZE 128
380 
381 /*
382  * Set tcp connection timeout value.
383  * Retun 0 for success, -1 for failure.
384  */
385 static int
386 _set_tcp_conntime(int fd, int optval)
387 {
388 	struct t_optmgmt req, res;
389 	struct opthdr *opt;
390 	int *ip;
391 	char buf[TCPOPT_BUFSIZE];
392 
393 	/* LINTED pointer cast */
394 	opt = (struct opthdr *)buf;
395 	opt->level =  IPPROTO_TCP;
396 	opt->name = TCP_CONN_ABORT_THRESHOLD;
397 	opt->len = sizeof (int);
398 
399 	req.flags = T_NEGOTIATE;
400 	req.opt.len = sizeof (struct opthdr) + opt->len;
401 	req.opt.buf = (char *)opt;
402 	/* LINTED pointer cast */
403 	ip = (int *)((char *)buf + sizeof (struct opthdr));
404 	*ip = optval;
405 
406 	res.flags = 0;
407 	res.opt.buf = (char *)buf;
408 	res.opt.maxlen = sizeof (buf);
409 	if (t_optmgmt(fd, &req, &res) < 0 || res.flags != T_SUCCESS) {
410 		return (-1);
411 	}
412 	return (0);
413 }
414 
415 /*
416  * Get current tcp connection timeout value.
417  * Retun 0 for success, -1 for failure.
418  */
419 static int
420 _get_tcp_conntime(int fd)
421 {
422 	struct t_optmgmt req, res;
423 	struct opthdr *opt;
424 	int *ip, retval;
425 	char buf[TCPOPT_BUFSIZE];
426 
427 	/* LINTED pointer cast */
428 	opt = (struct opthdr *)buf;
429 	opt->level =  IPPROTO_TCP;
430 	opt->name = TCP_CONN_ABORT_THRESHOLD;
431 	opt->len = sizeof (int);
432 
433 	req.flags = T_CURRENT;
434 	req.opt.len = sizeof (struct opthdr) + opt->len;
435 	req.opt.buf = (char *)opt;
436 	/* LINTED pointer cast */
437 	ip = (int *)((char *)buf + sizeof (struct opthdr));
438 	*ip = 0;
439 
440 	res.flags = 0;
441 	res.opt.buf = (char *)buf;
442 	res.opt.maxlen = sizeof (buf);
443 	if (t_optmgmt(fd, &req, &res) < 0 || res.flags != T_SUCCESS) {
444 		return (-1);
445 	}
446 
447 	/* LINTED pointer cast */
448 	ip = (int *)((char *)buf + sizeof (struct opthdr));
449 	retval = *ip;
450 	return (retval);
451 }
452 
453 static bool_t
454 set_up_connection(int fd, struct netbuf *svcaddr, struct ct_data *ct,
455 						const struct timeval *tp)
456 {
457 	int state;
458 	struct t_call sndcallstr, *rcvcall;
459 	int nconnect;
460 	bool_t connected, do_rcv_connect;
461 	int curr_time = 0;
462 
463 	ct->ct_addr.len = 0;
464 	state = t_getstate(fd);
465 	if (state == -1) {
466 		rpc_createerr.cf_stat = RPC_TLIERROR;
467 		rpc_createerr.cf_error.re_errno = 0;
468 		rpc_createerr.cf_error.re_terrno = t_errno;
469 		return (FALSE);
470 	}
471 
472 #ifdef DEBUG
473 	fprintf(stderr, "set_up_connection: state = %d\n", state);
474 #endif
475 	switch (state) {
476 	case T_IDLE:
477 		if (svcaddr == NULL) {
478 			rpc_createerr.cf_stat = RPC_UNKNOWNADDR;
479 			return (FALSE);
480 		}
481 		/*
482 		 * Connect only if state is IDLE and svcaddr known
483 		 */
484 /* LINTED pointer alignment */
485 		rcvcall = (struct t_call *)t_alloc(fd, T_CALL, T_OPT|T_ADDR);
486 		if (rcvcall == NULL) {
487 			rpc_createerr.cf_stat = RPC_TLIERROR;
488 			rpc_createerr.cf_error.re_terrno = t_errno;
489 			rpc_createerr.cf_error.re_errno = errno;
490 			return (FALSE);
491 		}
492 		rcvcall->udata.maxlen = 0;
493 		sndcallstr.addr = *svcaddr;
494 		sndcallstr.opt.len = 0;
495 		sndcallstr.udata.len = 0;
496 		/*
497 		 * Even NULL could have sufficed for rcvcall, because
498 		 * the address returned is same for all cases except
499 		 * for the gateway case, and hence required.
500 		 */
501 		connected = FALSE;
502 		do_rcv_connect = FALSE;
503 
504 		/*
505 		 * If there is a timeout value specified, we will try to
506 		 * reset the tcp connection timeout. If the transport does
507 		 * not support the TCP_CONN_ABORT_THRESHOLD option or fails
508 		 * for other reason, default timeout will be used.
509 		 */
510 		if (tp != NULL) {
511 		    int ms;
512 
513 		    /* TCP_CONN_ABORT_THRESHOLD takes int value in millisecs */
514 		    ms = tp->tv_sec * SECS_TO_MS + tp->tv_usec * USECS_TO_MS;
515 		    if (((curr_time = _get_tcp_conntime(fd)) != -1) &&
516 			(_set_tcp_conntime(fd, ms) == 0)) {
517 			/* EMPTY */
518 #ifdef DEBUG
519 			fprintf(stderr, "set_up_connection: set tcp ");
520 			fprintf(stderr, "connection timeout to %d ms\n", ms);
521 #endif
522 		    }
523 		}
524 
525 		for (nconnect = 0; nconnect < 3; nconnect++) {
526 			if (t_connect(fd, &sndcallstr, rcvcall) != -1) {
527 				connected = TRUE;
528 				break;
529 			}
530 			if (!(t_errno == TSYSERR && errno == EINTR)) {
531 				break;
532 			}
533 			if ((state = t_getstate(fd)) == T_OUTCON) {
534 				do_rcv_connect = TRUE;
535 				break;
536 			}
537 			if (state != T_IDLE) {
538 				break;
539 			}
540 		}
541 		if (do_rcv_connect) {
542 			do {
543 				if (t_rcvconnect(fd, rcvcall) != -1) {
544 					connected = TRUE;
545 					break;
546 				}
547 			} while (t_errno == TSYSERR && errno == EINTR);
548 		}
549 
550 		/*
551 		 * Set the connection timeout back to its old value.
552 		 */
553 		if (curr_time) {
554 			(void) _set_tcp_conntime(fd, curr_time);
555 		}
556 
557 		if (!connected) {
558 			rpc_createerr.cf_stat = RPC_TLIERROR;
559 			rpc_createerr.cf_error.re_terrno = t_errno;
560 			rpc_createerr.cf_error.re_errno = errno;
561 			(void) t_free((char *)rcvcall, T_CALL);
562 #ifdef DEBUG
563 			fprintf(stderr, "clnt_vc: t_connect error %d\n",
564 				rpc_createerr.cf_error.re_terrno);
565 #endif
566 			return (FALSE);
567 		}
568 
569 		/* Free old area if allocated */
570 		if (ct->ct_addr.buf)
571 			free(ct->ct_addr.buf);
572 		ct->ct_addr = rcvcall->addr;	/* To get the new address */
573 		/* So that address buf does not get freed */
574 		rcvcall->addr.buf = NULL;
575 		(void) t_free((char *)rcvcall, T_CALL);
576 		break;
577 	case T_DATAXFER:
578 	case T_OUTCON:
579 		if (svcaddr == NULL) {
580 			/*
581 			 * svcaddr could also be NULL in cases where the
582 			 * client is already bound and connected.
583 			 */
584 			ct->ct_addr.len = 0;
585 		} else {
586 			ct->ct_addr.buf = malloc(svcaddr->len);
587 			if (ct->ct_addr.buf == NULL) {
588 				(void) syslog(LOG_ERR, clnt_vc_errstr,
589 					clnt_vc_str, __no_mem_str);
590 				rpc_createerr.cf_stat = RPC_SYSTEMERROR;
591 				rpc_createerr.cf_error.re_errno = errno;
592 				rpc_createerr.cf_error.re_terrno = 0;
593 				return (FALSE);
594 			}
595 			(void) memcpy(ct->ct_addr.buf, svcaddr->buf,
596 					(size_t)svcaddr->len);
597 			ct->ct_addr.len = ct->ct_addr.maxlen = svcaddr->len;
598 		}
599 		break;
600 	default:
601 		rpc_createerr.cf_stat = RPC_UNKNOWNADDR;
602 		return (FALSE);
603 	}
604 	return (TRUE);
605 }
606 
607 static enum clnt_stat
608 clnt_vc_call(CLIENT *cl, rpcproc_t proc, xdrproc_t xdr_args, caddr_t args_ptr,
609 	xdrproc_t xdr_results, caddr_t results_ptr, struct timeval timeout)
610 {
611 /* LINTED pointer alignment */
612 	struct ct_data *ct = (struct ct_data *)cl->cl_private;
613 	XDR *xdrs = &(ct->ct_xdrs);
614 	struct rpc_msg reply_msg;
615 	uint32_t x_id;
616 /* LINTED pointer alignment */
617 	uint32_t *msg_x_id = (uint32_t *)(ct->ct_mcall);	/* yuk */
618 	bool_t shipnow;
619 	int refreshes = 2;
620 
621 	if (rpc_fd_lock(vctbl, ct->ct_fd)) {
622 		rpc_callerr.re_status = RPC_FAILED;
623 		rpc_callerr.re_errno = errno;
624 		rpc_fd_unlock(vctbl, ct->ct_fd);
625 		return (RPC_FAILED);
626 	}
627 
628 	ct->ct_is_oneway = FALSE;
629 	if (ct->ct_io_mode == RPC_CL_NONBLOCKING) {
630 		if (do_flush(ct, RPC_CL_BLOCKING_FLUSH) != 0) {
631 			rpc_fd_unlock(vctbl, ct->ct_fd);
632 			return (RPC_FAILED);  /* XXX */
633 		}
634 	}
635 
636 	if (!ct->ct_waitset) {
637 		/* If time is not within limits, we ignore it. */
638 		if (time_not_ok(&timeout) == FALSE)
639 			ct->ct_wait = __rpc_timeval_to_msec(&timeout);
640 	} else {
641 		timeout.tv_sec = (ct->ct_wait / 1000);
642 		timeout.tv_usec = (ct->ct_wait % 1000) * 1000;
643 	}
644 
645 	shipnow = ((xdr_results == (xdrproc_t)0) && (timeout.tv_sec == 0) &&
646 	    (timeout.tv_usec == 0)) ? FALSE : TRUE;
647 call_again:
648 	xdrs->x_op = XDR_ENCODE;
649 	rpc_callerr.re_status = RPC_SUCCESS;
650 	/*
651 	 * Due to little endian byte order, it is necessary to convert to host
652 	 * format before decrementing xid.
653 	 */
654 	x_id = ntohl(*msg_x_id) - 1;
655 	*msg_x_id = htonl(x_id);
656 
657 	if (cl->cl_auth->ah_cred.oa_flavor != RPCSEC_GSS) {
658 		if ((!XDR_PUTBYTES(xdrs, ct->ct_mcall, ct->ct_mpos)) ||
659 		    (!XDR_PUTINT32(xdrs, (int32_t *)&proc)) ||
660 		    (!AUTH_MARSHALL(cl->cl_auth, xdrs)) ||
661 		    (!xdr_args(xdrs, args_ptr))) {
662 			if (rpc_callerr.re_status == RPC_SUCCESS)
663 				rpc_callerr.re_status = RPC_CANTENCODEARGS;
664 			(void) xdrrec_endofrecord(xdrs, TRUE);
665 			rpc_fd_unlock(vctbl, ct->ct_fd);
666 			return (rpc_callerr.re_status);
667 		}
668 	} else {
669 /* LINTED pointer alignment */
670 		uint32_t *u = (uint32_t *)&ct->ct_mcall[ct->ct_mpos];
671 		IXDR_PUT_U_INT32(u, proc);
672 		if (!__rpc_gss_wrap(cl->cl_auth, ct->ct_mcall,
673 		    ((char *)u) - ct->ct_mcall, xdrs, xdr_args, args_ptr)) {
674 			if (rpc_callerr.re_status == RPC_SUCCESS)
675 				rpc_callerr.re_status = RPC_CANTENCODEARGS;
676 			(void) xdrrec_endofrecord(xdrs, TRUE);
677 			rpc_fd_unlock(vctbl, ct->ct_fd);
678 			return (rpc_callerr.re_status);
679 		}
680 	}
681 	if (!xdrrec_endofrecord(xdrs, shipnow)) {
682 		rpc_fd_unlock(vctbl, ct->ct_fd);
683 		return (rpc_callerr.re_status = RPC_CANTSEND);
684 	}
685 	if (!shipnow) {
686 		rpc_fd_unlock(vctbl, ct->ct_fd);
687 		return (RPC_SUCCESS);
688 	}
689 	/*
690 	 * Hack to provide rpc-based message passing
691 	 */
692 	if (timeout.tv_sec == 0 && timeout.tv_usec == 0) {
693 		rpc_fd_unlock(vctbl, ct->ct_fd);
694 		return (rpc_callerr.re_status = RPC_TIMEDOUT);
695 	}
696 
697 
698 	/*
699 	 * Keep receiving until we get a valid transaction id
700 	 */
701 	xdrs->x_op = XDR_DECODE;
702 	for (;;) {
703 		reply_msg.acpted_rply.ar_verf = _null_auth;
704 		reply_msg.acpted_rply.ar_results.where = NULL;
705 		reply_msg.acpted_rply.ar_results.proc = (xdrproc_t)xdr_void;
706 		if (!xdrrec_skiprecord(xdrs)) {
707 			rpc_fd_unlock(vctbl, ct->ct_fd);
708 			return (rpc_callerr.re_status);
709 		}
710 		/* now decode and validate the response header */
711 		if (!xdr_replymsg(xdrs, &reply_msg)) {
712 			if (rpc_callerr.re_status == RPC_SUCCESS)
713 				continue;
714 			rpc_fd_unlock(vctbl, ct->ct_fd);
715 			return (rpc_callerr.re_status);
716 		}
717 		if (reply_msg.rm_xid == x_id)
718 			break;
719 	}
720 
721 	/*
722 	 * process header
723 	 */
724 	if ((reply_msg.rm_reply.rp_stat == MSG_ACCEPTED) &&
725 	    (reply_msg.acpted_rply.ar_stat == SUCCESS))
726 		rpc_callerr.re_status = RPC_SUCCESS;
727 	else
728 		__seterr_reply(&reply_msg, &(rpc_callerr));
729 
730 	if (rpc_callerr.re_status == RPC_SUCCESS) {
731 		if (!AUTH_VALIDATE(cl->cl_auth,
732 				&reply_msg.acpted_rply.ar_verf)) {
733 			rpc_callerr.re_status = RPC_AUTHERROR;
734 			rpc_callerr.re_why = AUTH_INVALIDRESP;
735 		} else if (cl->cl_auth->ah_cred.oa_flavor != RPCSEC_GSS) {
736 			if (!(*xdr_results)(xdrs, results_ptr)) {
737 				if (rpc_callerr.re_status == RPC_SUCCESS)
738 				    rpc_callerr.re_status = RPC_CANTDECODERES;
739 			}
740 		} else if (!__rpc_gss_unwrap(cl->cl_auth, xdrs, xdr_results,
741 							results_ptr)) {
742 			if (rpc_callerr.re_status == RPC_SUCCESS)
743 				rpc_callerr.re_status = RPC_CANTDECODERES;
744 		}
745 	}	/* end successful completion */
746 	/*
747 	 * If unsuccesful AND error is an authentication error
748 	 * then refresh credentials and try again, else break
749 	 */
750 	else if (rpc_callerr.re_status == RPC_AUTHERROR) {
751 		/* maybe our credentials need to be refreshed ... */
752 		if (refreshes-- && AUTH_REFRESH(cl->cl_auth, &reply_msg))
753 			goto call_again;
754 		else
755 			/*
756 			 * We are setting rpc_callerr here given that libnsl
757 			 * is not reentrant thereby reinitializing the TSD.
758 			 * If not set here then success could be returned even
759 			 * though refresh failed.
760 			 */
761 			rpc_callerr.re_status = RPC_AUTHERROR;
762 	} /* end of unsuccessful completion */
763 	/* free verifier ... */
764 	if (reply_msg.rm_reply.rp_stat == MSG_ACCEPTED &&
765 			reply_msg.acpted_rply.ar_verf.oa_base != NULL) {
766 		xdrs->x_op = XDR_FREE;
767 		(void) xdr_opaque_auth(xdrs, &(reply_msg.acpted_rply.ar_verf));
768 	}
769 	rpc_fd_unlock(vctbl, ct->ct_fd);
770 	return (rpc_callerr.re_status);
771 }
772 
773 static enum clnt_stat
774 clnt_vc_send(CLIENT *cl, rpcproc_t proc, xdrproc_t xdr_args, caddr_t args_ptr)
775 {
776 /* LINTED pointer alignment */
777 	struct ct_data *ct = (struct ct_data *)cl->cl_private;
778 	XDR *xdrs = &(ct->ct_xdrs);
779 	uint32_t x_id;
780 /* LINTED pointer alignment */
781 	uint32_t *msg_x_id = (uint32_t *)(ct->ct_mcall);	/* yuk */
782 
783 	if (rpc_fd_lock(vctbl, ct->ct_fd)) {
784 		rpc_callerr.re_status = RPC_FAILED;
785 		rpc_callerr.re_errno = errno;
786 		rpc_fd_unlock(vctbl, ct->ct_fd);
787 		return (RPC_FAILED);
788 	}
789 
790 	ct->ct_is_oneway = TRUE;
791 
792 	xdrs->x_op = XDR_ENCODE;
793 	rpc_callerr.re_status = RPC_SUCCESS;
794 	/*
795 	 * Due to little endian byte order, it is necessary to convert to host
796 	 * format before decrementing xid.
797 	 */
798 	x_id = ntohl(*msg_x_id) - 1;
799 	*msg_x_id = htonl(x_id);
800 
801 	if (cl->cl_auth->ah_cred.oa_flavor != RPCSEC_GSS) {
802 		if ((!XDR_PUTBYTES(xdrs, ct->ct_mcall, ct->ct_mpos)) ||
803 		    (!XDR_PUTINT32(xdrs, (int32_t *)&proc)) ||
804 		    (!AUTH_MARSHALL(cl->cl_auth, xdrs)) ||
805 		    (!xdr_args(xdrs, args_ptr))) {
806 			if (rpc_callerr.re_status == RPC_SUCCESS)
807 				rpc_callerr.re_status = RPC_CANTENCODEARGS;
808 			(void) xdrrec_endofrecord(xdrs, TRUE);
809 			rpc_fd_unlock(vctbl, ct->ct_fd);
810 			return (rpc_callerr.re_status);
811 		}
812 	} else {
813 /* LINTED pointer alignment */
814 		uint32_t *u = (uint32_t *)&ct->ct_mcall[ct->ct_mpos];
815 		IXDR_PUT_U_INT32(u, proc);
816 		if (!__rpc_gss_wrap(cl->cl_auth, ct->ct_mcall,
817 		    ((char *)u) - ct->ct_mcall, xdrs, xdr_args, args_ptr)) {
818 			if (rpc_callerr.re_status == RPC_SUCCESS)
819 				rpc_callerr.re_status = RPC_CANTENCODEARGS;
820 			(void) xdrrec_endofrecord(xdrs, TRUE);
821 			rpc_fd_unlock(vctbl, ct->ct_fd);
822 			return (rpc_callerr.re_status);
823 		}
824 	}
825 
826 	/*
827 	 * Do not need to check errors, as the following code does
828 	 * not depend on the successful completion of the call.
829 	 * An error, if any occurs, is reported through
830 	 * rpc_callerr.re_status.
831 	 */
832 	(void) xdrrec_endofrecord(xdrs, TRUE);
833 
834 	rpc_fd_unlock(vctbl, ct->ct_fd);
835 	return (rpc_callerr.re_status);
836 }
837 
838 /* ARGSUSED */
839 static void
840 clnt_vc_geterr(CLIENT *cl, struct rpc_err *errp)
841 {
842 	*errp = rpc_callerr;
843 }
844 
845 static bool_t
846 clnt_vc_freeres(CLIENT *cl, xdrproc_t xdr_res, caddr_t res_ptr)
847 {
848 /* LINTED pointer alignment */
849 	struct ct_data *ct = (struct ct_data *)cl->cl_private;
850 	XDR *xdrs = &(ct->ct_xdrs);
851 	bool_t stat;
852 
853 	(void) rpc_fd_lock(vctbl, ct->ct_fd);
854 	xdrs->x_op = XDR_FREE;
855 	stat = (*xdr_res)(xdrs, res_ptr);
856 	rpc_fd_unlock(vctbl, ct->ct_fd);
857 	return (stat);
858 }
859 
860 static void
861 clnt_vc_abort(void)
862 {
863 }
864 
865 /*ARGSUSED*/
866 static bool_t
867 clnt_vc_control(CLIENT *cl, int request, char *info)
868 {
869 	bool_t ret;
870 /* LINTED pointer alignment */
871 	struct ct_data *ct = (struct ct_data *)cl->cl_private;
872 
873 	if (rpc_fd_lock(vctbl, ct->ct_fd)) {
874 		rpc_fd_unlock(vctbl, ct->ct_fd);
875 		return (RPC_FAILED);
876 	}
877 
878 	switch (request) {
879 	case CLSET_FD_CLOSE:
880 		ct->ct_closeit = TRUE;
881 		rpc_fd_unlock(vctbl, ct->ct_fd);
882 		return (TRUE);
883 	case CLSET_FD_NCLOSE:
884 		ct->ct_closeit = FALSE;
885 		rpc_fd_unlock(vctbl, ct->ct_fd);
886 		return (TRUE);
887 	case CLFLUSH:
888 		if (ct->ct_io_mode == RPC_CL_NONBLOCKING) {
889 			int res;
890 			res = do_flush(ct, (info == NULL ||
891 			    /* LINTED pointer cast */
892 			    *(int *)info == RPC_CL_DEFAULT_FLUSH)?
893 			    /* LINTED pointer cast */
894 			    ct->ct_blocking_mode: *(int *)info);
895 			ret = (0 == res);
896 		}
897 		rpc_fd_unlock(vctbl, ct->ct_fd);
898 		return (ret);
899 	}
900 
901 	/* for other requests which use info */
902 	if (info == NULL) {
903 		rpc_fd_unlock(vctbl, ct->ct_fd);
904 		return (FALSE);
905 	}
906 	switch (request) {
907 	case CLSET_TIMEOUT:
908 /* LINTED pointer alignment */
909 		if (time_not_ok((struct timeval *)info)) {
910 			rpc_fd_unlock(vctbl, ct->ct_fd);
911 			return (FALSE);
912 		}
913 /* LINTED pointer alignment */
914 		ct->ct_wait = __rpc_timeval_to_msec((struct timeval *)info);
915 		ct->ct_waitset = TRUE;
916 		break;
917 	case CLGET_TIMEOUT:
918 /* LINTED pointer alignment */
919 		((struct timeval *)info)->tv_sec = ct->ct_wait / 1000;
920 /* LINTED pointer alignment */
921 		((struct timeval *)info)->tv_usec =
922 			(ct->ct_wait % 1000) * 1000;
923 		break;
924 	case CLGET_SERVER_ADDR:	/* For compatibility only */
925 		(void) memcpy(info, ct->ct_addr.buf, (size_t)ct->ct_addr.len);
926 		break;
927 	case CLGET_FD:
928 /* LINTED pointer alignment */
929 		*(int *)info = ct->ct_fd;
930 		break;
931 	case CLGET_SVC_ADDR:
932 		/* The caller should not free this memory area */
933 /* LINTED pointer alignment */
934 		*(struct netbuf *)info = ct->ct_addr;
935 		break;
936 	case CLSET_SVC_ADDR:		/* set to new address */
937 #ifdef undef
938 		/*
939 		 * XXX: once the t_snddis(), followed by t_connect() starts to
940 		 * work, this ifdef should be removed.  CLIENT handle reuse
941 		 * would then be possible for COTS as well.
942 		 */
943 		if (t_snddis(ct->ct_fd, NULL) == -1) {
944 			rpc_createerr.cf_stat = RPC_TLIERROR;
945 			rpc_createerr.cf_error.re_terrno = t_errno;
946 			rpc_createerr.cf_error.re_errno = errno;
947 			rpc_fd_unlock(vctbl, ct->ct_fd);
948 			return (FALSE);
949 		}
950 		ret = set_up_connection(ct->ct_fd, (struct netbuf *)info,
951 			ct, NULL));
952 		rpc_fd_unlock(vctbl, ct->ct_fd);
953 		return (ret);
954 #else
955 		rpc_fd_unlock(vctbl, ct->ct_fd);
956 		return (FALSE);
957 #endif
958 	case CLGET_XID:
959 		/*
960 		 * use the knowledge that xid is the
961 		 * first element in the call structure
962 		 * This will get the xid of the PREVIOUS call
963 		 */
964 /* LINTED pointer alignment */
965 		*(uint32_t *)info = ntohl(*(uint32_t *)ct->ct_mcall);
966 		break;
967 	case CLSET_XID:
968 		/* This will set the xid of the NEXT call */
969 /* LINTED pointer alignment */
970 		*(uint32_t *)ct->ct_mcall =  htonl(*(uint32_t *)info + 1);
971 		/* increment by 1 as clnt_vc_call() decrements once */
972 		break;
973 	case CLGET_VERS:
974 		/*
975 		 * This RELIES on the information that, in the call body,
976 		 * the version number field is the fifth field from the
977 		 * begining of the RPC header. MUST be changed if the
978 		 * call_struct is changed
979 		 */
980 /* LINTED pointer alignment */
981 		*(uint32_t *)info = ntohl(*(uint32_t *)(ct->ct_mcall +
982 						4 * BYTES_PER_XDR_UNIT));
983 		break;
984 
985 	case CLSET_VERS:
986 /* LINTED pointer alignment */
987 		*(uint32_t *)(ct->ct_mcall + 4 * BYTES_PER_XDR_UNIT) =
988 /* LINTED pointer alignment */
989 			htonl(*(uint32_t *)info);
990 		break;
991 
992 	case CLGET_PROG:
993 		/*
994 		 * This RELIES on the information that, in the call body,
995 		 * the program number field is the fourth field from the
996 		 * begining of the RPC header. MUST be changed if the
997 		 * call_struct is changed
998 		 */
999 /* LINTED pointer alignment */
1000 		*(uint32_t *)info = ntohl(*(uint32_t *)(ct->ct_mcall +
1001 						3 * BYTES_PER_XDR_UNIT));
1002 		break;
1003 
1004 	case CLSET_PROG:
1005 /* LINTED pointer alignment */
1006 		*(uint32_t *)(ct->ct_mcall + 3 * BYTES_PER_XDR_UNIT) =
1007 /* LINTED pointer alignment */
1008 			htonl(*(uint32_t *)info);
1009 		break;
1010 
1011 	case CLSET_IO_MODE:
1012 		/* LINTED pointer cast */
1013 		if (!set_io_mode(ct, *(int *)info)) {
1014 		    rpc_fd_unlock(vctbl, ct->ct_fd);
1015 		    return (FALSE);
1016 		}
1017 		break;
1018 	case CLSET_FLUSH_MODE:
1019 		/* Set a specific FLUSH_MODE */
1020 		/* LINTED pointer cast */
1021 		if (!set_flush_mode(ct, *(int *)info)) {
1022 		    rpc_fd_unlock(vctbl, ct->ct_fd);
1023 		    return (FALSE);
1024 		}
1025 		break;
1026 	case CLGET_FLUSH_MODE:
1027 		/* LINTED pointer cast */
1028 		*(rpcflushmode_t *)info = ct->ct_blocking_mode;
1029 		break;
1030 
1031 	case CLGET_IO_MODE:
1032 		/* LINTED pointer cast */
1033 		*(rpciomode_t *)info = ct->ct_io_mode;
1034 		break;
1035 
1036 	case CLGET_CURRENT_REC_SIZE:
1037 		/*
1038 		 * Returns the current amount of memory allocated
1039 		 * to pending requests
1040 		 */
1041 		/* LINTED pointer cast */
1042 		*(int *)info = ct->ct_bufferPendingSize;
1043 		break;
1044 
1045 	case CLSET_CONNMAXREC_SIZE:
1046 		/* Cannot resize the buffer if it is used. */
1047 		if (ct->ct_bufferPendingSize != 0) {
1048 			rpc_fd_unlock(vctbl, ct->ct_fd);
1049 			return (FALSE);
1050 		}
1051 		/*
1052 		 * If the new size is equal to the current size,
1053 		 * there is nothing to do.
1054 		 */
1055 		/* LINTED pointer cast */
1056 		if (ct->ct_bufferSize == *(uint_t *)info)
1057 			break;
1058 
1059 		/* LINTED pointer cast */
1060 		ct->ct_bufferSize = *(uint_t *)info;
1061 		if (ct->ct_buffer) {
1062 			free(ct->ct_buffer);
1063 			ct->ct_buffer = NULL;
1064 			ct->ct_bufferReadPtr = ct->ct_bufferWritePtr = NULL;
1065 		}
1066 		break;
1067 
1068 	case CLGET_CONNMAXREC_SIZE:
1069 		/*
1070 		 * Returns the size of buffer allocated
1071 		 * to pending requests
1072 		 */
1073 		/* LINTED pointer cast */
1074 		*(uint_t *)info = ct->ct_bufferSize;
1075 		break;
1076 
1077 	default:
1078 		rpc_fd_unlock(vctbl, ct->ct_fd);
1079 		return (FALSE);
1080 	}
1081 	rpc_fd_unlock(vctbl, ct->ct_fd);
1082 	return (TRUE);
1083 }
1084 
1085 static void
1086 clnt_vc_destroy(CLIENT *cl)
1087 {
1088 /* LINTED pointer alignment */
1089 	struct ct_data *ct = (struct ct_data *)cl->cl_private;
1090 	int ct_fd = ct->ct_fd;
1091 
1092 	(void) rpc_fd_lock(vctbl, ct_fd);
1093 
1094 	if (ct->ct_io_mode == RPC_CL_NONBLOCKING) {
1095 		(void) do_flush(ct, RPC_CL_BLOCKING_FLUSH);
1096 		(void) unregister_nb(ct);
1097 	}
1098 
1099 	if (ct->ct_closeit)
1100 		(void) t_close(ct_fd);
1101 	XDR_DESTROY(&(ct->ct_xdrs));
1102 	if (ct->ct_addr.buf)
1103 		free(ct->ct_addr.buf);
1104 	free(ct);
1105 	if (cl->cl_netid && cl->cl_netid[0])
1106 		free(cl->cl_netid);
1107 	if (cl->cl_tp && cl->cl_tp[0])
1108 		free(cl->cl_tp);
1109 	free(cl);
1110 	rpc_fd_unlock(vctbl, ct_fd);
1111 }
1112 
1113 /*
1114  * Interface between xdr serializer and vc connection.
1115  * Behaves like the system calls, read & write, but keeps some error state
1116  * around for the rpc level.
1117  */
1118 static int
1119 read_vc(void *ct_tmp, caddr_t buf, int len)
1120 {
1121 	static pthread_key_t pfdp_key = PTHREAD_ONCE_KEY_NP;
1122 	struct pollfd *pfdp;
1123 	int npfd;		/* total number of pfdp allocated */
1124 	struct ct_data *ct = ct_tmp;
1125 	struct timeval starttime;
1126 	struct timeval curtime;
1127 	int poll_time;
1128 	int delta;
1129 
1130 	if (len == 0)
1131 		return (0);
1132 
1133 	/*
1134 	 * Allocate just one the first time.  thr_get_storage() may
1135 	 * return a larger buffer, left over from the last time we were
1136 	 * here, but that's OK.  realloc() will deal with it properly.
1137 	 */
1138 	npfd = 1;
1139 	pfdp = thr_get_storage(&pfdp_key, sizeof (struct pollfd), free);
1140 	if (pfdp == NULL) {
1141 		(void) syslog(LOG_ERR, clnt_vc_errstr,
1142 			clnt_read_vc_str, __no_mem_str);
1143 		rpc_callerr.re_status = RPC_SYSTEMERROR;
1144 		rpc_callerr.re_errno = errno;
1145 		rpc_callerr.re_terrno = 0;
1146 		return (-1);
1147 	}
1148 
1149 	/*
1150 	 *	N.B.:  slot 0 in the pollfd array is reserved for the file
1151 	 *	descriptor we're really interested in (as opposed to the
1152 	 *	callback descriptors).
1153 	 */
1154 	pfdp[0].fd = ct->ct_fd;
1155 	pfdp[0].events = MASKVAL;
1156 	pfdp[0].revents = 0;
1157 	poll_time = ct->ct_wait;
1158 	if (gettimeofday(&starttime, NULL) == -1) {
1159 		syslog(LOG_ERR, "Unable to get time of day: %m");
1160 		return (-1);
1161 	}
1162 
1163 	for (;;) {
1164 		extern void (*_svc_getreqset_proc)();
1165 		extern pollfd_t *svc_pollfd;
1166 		extern int svc_max_pollfd;
1167 		int fds;
1168 
1169 		/* VARIABLES PROTECTED BY svc_fd_lock: svc_pollfd */
1170 
1171 		if (_svc_getreqset_proc) {
1172 			sig_rw_rdlock(&svc_fd_lock);
1173 
1174 			/* reallocate pfdp to svc_max_pollfd +1 */
1175 			if (npfd != (svc_max_pollfd + 1)) {
1176 				struct pollfd *tmp_pfdp = realloc(pfdp,
1177 						sizeof (struct pollfd) *
1178 						(svc_max_pollfd + 1));
1179 				if (tmp_pfdp == NULL) {
1180 					sig_rw_unlock(&svc_fd_lock);
1181 					(void) syslog(LOG_ERR, clnt_vc_errstr,
1182 						clnt_read_vc_str, __no_mem_str);
1183 					rpc_callerr.re_status = RPC_SYSTEMERROR;
1184 					rpc_callerr.re_errno = errno;
1185 					rpc_callerr.re_terrno = 0;
1186 					return (-1);
1187 				}
1188 
1189 				pfdp = tmp_pfdp;
1190 				npfd = svc_max_pollfd + 1;
1191 				(void) pthread_setspecific(pfdp_key, pfdp);
1192 			}
1193 			if (npfd > 1)
1194 				(void) memcpy(&pfdp[1], svc_pollfd,
1195 				    sizeof (struct pollfd) * (npfd - 1));
1196 
1197 			sig_rw_unlock(&svc_fd_lock);
1198 		} else {
1199 			npfd = 1;	/* don't forget about pfdp[0] */
1200 		}
1201 
1202 		switch (fds = poll(pfdp, npfd, poll_time)) {
1203 		case 0:
1204 			rpc_callerr.re_status = RPC_TIMEDOUT;
1205 			return (-1);
1206 
1207 		case -1:
1208 			if (errno != EINTR)
1209 				continue;
1210 			else {
1211 				/*
1212 				 * interrupted by another signal,
1213 				 * update time_waited
1214 				 */
1215 
1216 				if (gettimeofday(&curtime, NULL) == -1) {
1217 					syslog(LOG_ERR,
1218 					    "Unable to get time of day:  %m");
1219 					errno = 0;
1220 					continue;
1221 				};
1222 				delta = (curtime.tv_sec -
1223 						starttime.tv_sec) * 1000 +
1224 					(curtime.tv_usec -
1225 						starttime.tv_usec) / 1000;
1226 				poll_time -= delta;
1227 				if (poll_time < 0) {
1228 					rpc_callerr.re_status =
1229 						RPC_TIMEDOUT;
1230 					errno = 0;
1231 					return (-1);
1232 				} else {
1233 					errno = 0; /* reset it */
1234 					continue;
1235 				}
1236 			}
1237 		}
1238 
1239 		if (pfdp[0].revents == 0) {
1240 			/* must be for server side of the house */
1241 			(*_svc_getreqset_proc)(&pfdp[1], fds);
1242 			continue;	/* do poll again */
1243 		}
1244 
1245 		if (pfdp[0].revents & POLLNVAL) {
1246 			rpc_callerr.re_status = RPC_CANTRECV;
1247 			/*
1248 			 *	Note:  we're faking errno here because we
1249 			 *	previously would have expected select() to
1250 			 *	return -1 with errno EBADF.  Poll(BA_OS)
1251 			 *	returns 0 and sets the POLLNVAL revents flag
1252 			 *	instead.
1253 			 */
1254 			rpc_callerr.re_errno = errno = EBADF;
1255 			return (-1);
1256 		}
1257 
1258 		if (pfdp[0].revents & (POLLERR | POLLHUP)) {
1259 			rpc_callerr.re_status = RPC_CANTRECV;
1260 			rpc_callerr.re_errno = errno = EPIPE;
1261 			return (-1);
1262 		}
1263 		break;
1264 	}
1265 
1266 	switch (len = t_rcvall(ct->ct_fd, buf, len)) {
1267 	case 0:
1268 		/* premature eof */
1269 		rpc_callerr.re_errno = ENOLINK;
1270 		rpc_callerr.re_terrno = 0;
1271 		rpc_callerr.re_status = RPC_CANTRECV;
1272 		len = -1;	/* it's really an error */
1273 		break;
1274 
1275 	case -1:
1276 		rpc_callerr.re_terrno = t_errno;
1277 		rpc_callerr.re_errno = 0;
1278 		rpc_callerr.re_status = RPC_CANTRECV;
1279 		break;
1280 	}
1281 	return (len);
1282 }
1283 
1284 static int
1285 write_vc(void *ct_tmp, caddr_t buf, int len)
1286 {
1287 	int i, cnt;
1288 	struct ct_data *ct = ct_tmp;
1289 	int flag;
1290 	int maxsz;
1291 
1292 	maxsz = ct->ct_tsdu;
1293 
1294 	/* Handle the non-blocking mode */
1295 	if (ct->ct_is_oneway && ct->ct_io_mode == RPC_CL_NONBLOCKING) {
1296 		/*
1297 		 * Test a special case here. If the length of the current
1298 		 * write is greater than the transport data unit, and the
1299 		 * mode is non blocking, we return RPC_CANTSEND.
1300 		 * XXX  this is not very clean.
1301 		 */
1302 		if (maxsz > 0 && len > maxsz) {
1303 			rpc_callerr.re_terrno = errno;
1304 			rpc_callerr.re_errno = 0;
1305 			rpc_callerr.re_status = RPC_CANTSEND;
1306 			return (-1);
1307 		}
1308 
1309 		len = nb_send(ct, buf, (unsigned)len);
1310 		if (len == -1) {
1311 			rpc_callerr.re_terrno = errno;
1312 			rpc_callerr.re_errno = 0;
1313 			rpc_callerr.re_status = RPC_CANTSEND;
1314 		} else if (len == -2) {
1315 			rpc_callerr.re_terrno = 0;
1316 			rpc_callerr.re_errno = 0;
1317 			rpc_callerr.re_status = RPC_CANTSTORE;
1318 		}
1319 		return (len);
1320 	}
1321 
1322 	if ((maxsz == 0) || (maxsz == -1)) {
1323 		/*
1324 		 * T_snd may return -1 for error on connection (connection
1325 		 * needs to be repaired/closed, and -2 for flow-control
1326 		 * handling error (no operation to do, just wait and call
1327 		 * T_Flush()).
1328 		 */
1329 		if ((len = t_snd(ct->ct_fd, buf, (unsigned)len, 0)) == -1) {
1330 			rpc_callerr.re_terrno = t_errno;
1331 			rpc_callerr.re_errno = 0;
1332 			rpc_callerr.re_status = RPC_CANTSEND;
1333 		}
1334 		return (len);
1335 	}
1336 
1337 	/*
1338 	 * This for those transports which have a max size for data.
1339 	 */
1340 	for (cnt = len, i = 0; cnt > 0; cnt -= i, buf += i) {
1341 		flag = cnt > maxsz ? T_MORE : 0;
1342 		if ((i = t_snd(ct->ct_fd, buf, (unsigned)MIN(cnt, maxsz),
1343 				flag)) == -1) {
1344 			rpc_callerr.re_terrno = t_errno;
1345 			rpc_callerr.re_errno = 0;
1346 			rpc_callerr.re_status = RPC_CANTSEND;
1347 			return (-1);
1348 		}
1349 	}
1350 	return (len);
1351 }
1352 
1353 /*
1354  * Receive the required bytes of data, even if it is fragmented.
1355  */
1356 static int
1357 t_rcvall(int fd, char *buf, int len)
1358 {
1359 	int moreflag;
1360 	int final = 0;
1361 	int res;
1362 
1363 	do {
1364 		moreflag = 0;
1365 		res = t_rcv(fd, buf, (unsigned)len, &moreflag);
1366 		if (res == -1) {
1367 			if (t_errno == TLOOK)
1368 				switch (t_look(fd)) {
1369 				case T_DISCONNECT:
1370 					(void) t_rcvdis(fd, NULL);
1371 					(void) t_snddis(fd, NULL);
1372 					return (-1);
1373 				case T_ORDREL:
1374 				/* Received orderly release indication */
1375 					(void) t_rcvrel(fd);
1376 				/* Send orderly release indicator */
1377 					(void) t_sndrel(fd);
1378 					return (-1);
1379 				default:
1380 					return (-1);
1381 				}
1382 		} else if (res == 0) {
1383 			return (0);
1384 		}
1385 		final += res;
1386 		buf += res;
1387 		len -= res;
1388 	} while ((len > 0) && (moreflag & T_MORE));
1389 	return (final);
1390 }
1391 
1392 static struct clnt_ops *
1393 clnt_vc_ops(void)
1394 {
1395 	static struct clnt_ops ops;
1396 	extern mutex_t	ops_lock;
1397 
1398 	/* VARIABLES PROTECTED BY ops_lock: ops */
1399 
1400 	sig_mutex_lock(&ops_lock);
1401 	if (ops.cl_call == NULL) {
1402 		ops.cl_call = clnt_vc_call;
1403 		ops.cl_send = clnt_vc_send;
1404 		ops.cl_abort = clnt_vc_abort;
1405 		ops.cl_geterr = clnt_vc_geterr;
1406 		ops.cl_freeres = clnt_vc_freeres;
1407 		ops.cl_destroy = clnt_vc_destroy;
1408 		ops.cl_control = clnt_vc_control;
1409 	}
1410 	sig_mutex_unlock(&ops_lock);
1411 	return (&ops);
1412 }
1413 
1414 /*
1415  * Make sure that the time is not garbage.   -1 value is disallowed.
1416  * Note this is different from time_not_ok in clnt_dg.c
1417  */
1418 static bool_t
1419 time_not_ok(struct timeval *t)
1420 {
1421 	return (t->tv_sec <= -1 || t->tv_sec > 100000000 ||
1422 		t->tv_usec <= -1 || t->tv_usec > 1000000);
1423 }
1424 
1425 
1426 /* Compute the # of bytes that remains until the end of the buffer */
1427 #define	REMAIN_BYTES(p) (ct->ct_bufferSize-(ct->ct_##p - ct->ct_buffer))
1428 
1429 static int
1430 addInBuffer(struct ct_data *ct, char *dataToAdd, unsigned int nBytes)
1431 {
1432 	if (NULL == ct->ct_buffer) {
1433 		/* Buffer not allocated yet. */
1434 		char *buffer;
1435 
1436 		buffer = malloc(ct->ct_bufferSize);
1437 		if (NULL == buffer) {
1438 			errno = ENOMEM;
1439 			return (-1);
1440 		}
1441 		(void) memcpy(buffer, dataToAdd, nBytes);
1442 
1443 		ct->ct_buffer = buffer;
1444 		ct->ct_bufferReadPtr = buffer;
1445 		ct->ct_bufferWritePtr = buffer + nBytes;
1446 		ct->ct_bufferPendingSize = nBytes;
1447 	} else {
1448 		/*
1449 		 * For an already allocated buffer, two mem copies
1450 		 * might be needed, depending on the current
1451 		 * writing position.
1452 		 */
1453 
1454 		/* Compute the length of the first copy. */
1455 		int len = MIN(nBytes, REMAIN_BYTES(bufferWritePtr));
1456 
1457 		ct->ct_bufferPendingSize += nBytes;
1458 
1459 		(void) memcpy(ct->ct_bufferWritePtr, dataToAdd, len);
1460 		ct->ct_bufferWritePtr += len;
1461 		nBytes -= len;
1462 		if (0 == nBytes) {
1463 			/* One memcopy needed. */
1464 
1465 			/*
1466 			 * If the write pointer is at the end of the buffer,
1467 			 * wrap it now.
1468 			 */
1469 			if (ct->ct_bufferWritePtr ==
1470 			    (ct->ct_buffer + ct->ct_bufferSize)) {
1471 				ct->ct_bufferWritePtr = ct->ct_buffer;
1472 			}
1473 		} else {
1474 			/* Two memcopy needed. */
1475 			dataToAdd += len;
1476 
1477 			/*
1478 			 * Copy the remaining data to the beginning of the
1479 			 * buffer
1480 			 */
1481 			(void) memcpy(ct->ct_buffer, dataToAdd, nBytes);
1482 			ct->ct_bufferWritePtr = ct->ct_buffer + nBytes;
1483 		}
1484 	}
1485 	return (0);
1486 }
1487 
1488 static void
1489 consumeFromBuffer(struct ct_data *ct, unsigned int nBytes)
1490 {
1491 	ct->ct_bufferPendingSize -= nBytes;
1492 	if (ct->ct_bufferPendingSize == 0) {
1493 		/*
1494 		 * If the buffer contains no data, we set the two pointers at
1495 		 * the beginning of the buffer (to miminize buffer wraps).
1496 		 */
1497 		ct->ct_bufferReadPtr = ct->ct_bufferWritePtr = ct->ct_buffer;
1498 	} else {
1499 		ct->ct_bufferReadPtr += nBytes;
1500 		if (ct->ct_bufferReadPtr >
1501 		    ct->ct_buffer + ct->ct_bufferSize) {
1502 			ct->ct_bufferReadPtr -= ct->ct_bufferSize;
1503 		}
1504 	}
1505 }
1506 
1507 static int
1508 iovFromBuffer(struct ct_data *ct, struct iovec *iov)
1509 {
1510 	int l;
1511 
1512 	if (ct->ct_bufferPendingSize == 0)
1513 		return (0);
1514 
1515 	l = REMAIN_BYTES(bufferReadPtr);
1516 	if (l < ct->ct_bufferPendingSize) {
1517 		/* Buffer in two fragments. */
1518 		iov[0].iov_base = ct->ct_bufferReadPtr;
1519 		iov[0].iov_len  = l;
1520 
1521 		iov[1].iov_base = ct->ct_buffer;
1522 		iov[1].iov_len  = ct->ct_bufferPendingSize - l;
1523 		return (2);
1524 	} else {
1525 		/* Buffer in one fragment. */
1526 		iov[0].iov_base = ct->ct_bufferReadPtr;
1527 		iov[0].iov_len  = ct->ct_bufferPendingSize;
1528 		return (1);
1529 	}
1530 }
1531 
1532 static bool_t
1533 set_flush_mode(struct ct_data *ct, int mode)
1534 {
1535 	switch (mode) {
1536 	case RPC_CL_BLOCKING_FLUSH:
1537 		/* flush as most as possible without blocking */
1538 	case RPC_CL_BESTEFFORT_FLUSH:
1539 		/* flush the buffer completely (possibly blocking) */
1540 	case RPC_CL_DEFAULT_FLUSH:
1541 		/* flush according to the currently defined policy */
1542 		ct->ct_blocking_mode = mode;
1543 		return (TRUE);
1544 	default:
1545 		return (FALSE);
1546 	}
1547 }
1548 
1549 static bool_t
1550 set_io_mode(struct ct_data *ct, int ioMode)
1551 {
1552 	switch (ioMode) {
1553 	case RPC_CL_BLOCKING:
1554 		if (ct->ct_io_mode == RPC_CL_NONBLOCKING) {
1555 			if (NULL != ct->ct_buffer) {
1556 				/*
1557 				 * If a buffer was allocated for this
1558 				 * connection, flush it now, and free it.
1559 				 */
1560 				(void) do_flush(ct, RPC_CL_BLOCKING_FLUSH);
1561 				free(ct->ct_buffer);
1562 				ct->ct_buffer = NULL;
1563 			}
1564 			(void) unregister_nb(ct);
1565 			ct->ct_io_mode = ioMode;
1566 		}
1567 		break;
1568 	case RPC_CL_NONBLOCKING:
1569 		if (ct->ct_io_mode == RPC_CL_BLOCKING) {
1570 			if (-1 == register_nb(ct)) {
1571 				return (FALSE);
1572 			}
1573 			ct->ct_io_mode = ioMode;
1574 		}
1575 		break;
1576 	default:
1577 		return (FALSE);
1578 	}
1579 	return (TRUE);
1580 }
1581 
1582 static int
1583 do_flush(struct ct_data *ct, uint_t flush_mode)
1584 {
1585 	int result;
1586 	if (ct->ct_bufferPendingSize == 0) {
1587 		return (0);
1588 	}
1589 
1590 	switch (flush_mode) {
1591 	case RPC_CL_BLOCKING_FLUSH:
1592 		if (!set_blocking_connection(ct, TRUE)) {
1593 			return (-1);
1594 		}
1595 		while (ct->ct_bufferPendingSize > 0) {
1596 			if (REMAIN_BYTES(bufferReadPtr) <
1597 			    ct->ct_bufferPendingSize) {
1598 				struct iovec iov[2];
1599 				(void) iovFromBuffer(ct, iov);
1600 				result = writev(ct->ct_fd, iov, 2);
1601 			} else {
1602 				result = t_snd(ct->ct_fd, ct->ct_bufferReadPtr,
1603 				    ct->ct_bufferPendingSize, 0);
1604 			}
1605 			if (result < 0) {
1606 				return (-1);
1607 			}
1608 			consumeFromBuffer(ct, result);
1609 		}
1610 
1611 		break;
1612 
1613 	case RPC_CL_BESTEFFORT_FLUSH:
1614 		(void) set_blocking_connection(ct, FALSE);
1615 		if (REMAIN_BYTES(bufferReadPtr) < ct->ct_bufferPendingSize) {
1616 			struct iovec iov[2];
1617 			(void) iovFromBuffer(ct, iov);
1618 			result = writev(ct->ct_fd, iov, 2);
1619 		} else {
1620 			result = t_snd(ct->ct_fd, ct->ct_bufferReadPtr,
1621 			    ct->ct_bufferPendingSize, 0);
1622 		}
1623 		if (result < 0) {
1624 			if (errno != EWOULDBLOCK) {
1625 				perror("flush");
1626 				return (-1);
1627 			}
1628 			return (0);
1629 		}
1630 		if (result > 0)
1631 			consumeFromBuffer(ct, result);
1632 		break;
1633 	}
1634 	return (0);
1635 }
1636 
1637 /*
1638  * Non blocking send.
1639  */
1640 
1641 static int
1642 nb_send(struct ct_data *ct, void *buff, unsigned int nBytes)
1643 {
1644 	int result;
1645 
1646 	if (!(ntohl(*(uint32_t *)buff) & 2^31)) {
1647 		return (-1);
1648 	}
1649 
1650 	/*
1651 	 * Check to see if the current message can be stored fully in the
1652 	 * buffer. We have to check this now because it may be impossible
1653 	 * to send any data, so the message must be stored in the buffer.
1654 	 */
1655 	if (nBytes > (ct->ct_bufferSize - ct->ct_bufferPendingSize)) {
1656 		/* Try to flush  (to free some space). */
1657 		(void) do_flush(ct, RPC_CL_BESTEFFORT_FLUSH);
1658 
1659 		/* Can we store the message now ? */
1660 		if (nBytes > (ct->ct_bufferSize - ct->ct_bufferPendingSize))
1661 			return (-2);
1662 	}
1663 
1664 	(void) set_blocking_connection(ct, FALSE);
1665 
1666 	/*
1667 	 * If there is no data pending, we can simply try
1668 	 * to send our data.
1669 	 */
1670 	if (ct->ct_bufferPendingSize == 0) {
1671 		result = t_snd(ct->ct_fd, buff, nBytes, 0);
1672 		if (result == -1) {
1673 			if (errno == EWOULDBLOCK) {
1674 				result = 0;
1675 			} else {
1676 				perror("send");
1677 				return (-1);
1678 			}
1679 		}
1680 		/*
1681 		 * If we have not sent all data, we must store them
1682 		 * in the buffer.
1683 		 */
1684 		if (result != nBytes) {
1685 			if (addInBuffer(ct, (char *)buff + result,
1686 			    nBytes - result) == -1) {
1687 				return (-1);
1688 			}
1689 		}
1690 	} else {
1691 		/*
1692 		 * Some data pending in the buffer.  We try to send
1693 		 * both buffer data and current message in one shot.
1694 		 */
1695 		struct iovec iov[3];
1696 		int i = iovFromBuffer(ct, &iov[0]);
1697 
1698 		iov[i].iov_base = buff;
1699 		iov[i].iov_len  = nBytes;
1700 
1701 		result = writev(ct->ct_fd, iov, i+1);
1702 		if (result == -1) {
1703 			if (errno == EWOULDBLOCK) {
1704 				/* No bytes sent */
1705 				result = 0;
1706 			} else {
1707 				return (-1);
1708 			}
1709 		}
1710 
1711 		/*
1712 		 * Add the bytes from the message
1713 		 * that we have not sent.
1714 		 */
1715 		if (result <= ct->ct_bufferPendingSize) {
1716 			/* No bytes from the message sent */
1717 			consumeFromBuffer(ct, result);
1718 			if (addInBuffer(ct, buff, nBytes) == -1) {
1719 				return (-1);
1720 			}
1721 		} else {
1722 			/*
1723 			 * Some bytes of the message are sent.
1724 			 * Compute the length of the message that has
1725 			 * been sent.
1726 			 */
1727 			int len = result - ct->ct_bufferPendingSize;
1728 
1729 			/* So, empty the buffer. */
1730 			ct->ct_bufferReadPtr = ct->ct_buffer;
1731 			ct->ct_bufferWritePtr = ct->ct_buffer;
1732 			ct->ct_bufferPendingSize = 0;
1733 
1734 			/* And add the remaining part of the message. */
1735 			if (len != nBytes) {
1736 				if (addInBuffer(ct, (char *)buff + len,
1737 					nBytes-len) == -1) {
1738 					return (-1);
1739 				}
1740 			}
1741 		}
1742 	}
1743 	return (nBytes);
1744 }
1745 
1746 static void
1747 flush_registered_clients(void)
1748 {
1749 	struct nb_reg_node *node;
1750 
1751 	if (LIST_ISEMPTY(nb_first)) {
1752 		return;
1753 	}
1754 
1755 	LIST_FOR_EACH(nb_first, node) {
1756 		(void) do_flush(node->ct, RPC_CL_BLOCKING_FLUSH);
1757 	}
1758 }
1759 
1760 static int
1761 allocate_chunk(void)
1762 {
1763 #define	CHUNK_SIZE 16
1764 	struct nb_reg_node *chk =
1765 	    malloc(sizeof (struct nb_reg_node) * CHUNK_SIZE);
1766 	struct nb_reg_node *n;
1767 	int i;
1768 
1769 	if (NULL == chk) {
1770 		return (-1);
1771 	}
1772 
1773 	n = chk;
1774 	for (i = 0; i < CHUNK_SIZE-1; ++i) {
1775 		n[i].next = &(n[i+1]);
1776 	}
1777 	n[CHUNK_SIZE-1].next = (struct nb_reg_node *)&nb_free;
1778 	nb_free = chk;
1779 	return (0);
1780 }
1781 
1782 static int
1783 register_nb(struct ct_data *ct)
1784 {
1785 	struct nb_reg_node *node;
1786 
1787 	(void) mutex_lock(&nb_list_mutex);
1788 
1789 	if (LIST_ISEMPTY(nb_free) && (allocate_chunk() == -1)) {
1790 		(void) mutex_unlock(&nb_list_mutex);
1791 		errno = ENOMEM;
1792 		return (-1);
1793 	}
1794 
1795 	if (!exit_handler_set) {
1796 		(void) atexit(flush_registered_clients);
1797 		exit_handler_set = TRUE;
1798 	}
1799 	/* Get the first free node */
1800 	LIST_EXTRACT(nb_free, node);
1801 
1802 	node->ct = ct;
1803 
1804 	LIST_ADD(nb_first, node);
1805 	(void) mutex_unlock(&nb_list_mutex);
1806 
1807 	return (0);
1808 }
1809 
1810 static int
1811 unregister_nb(struct ct_data *ct)
1812 {
1813 	struct nb_reg_node *node;
1814 
1815 	(void) mutex_lock(&nb_list_mutex);
1816 	assert(!LIST_ISEMPTY(nb_first));
1817 
1818 	node = nb_first;
1819 	LIST_FOR_EACH(nb_first, node) {
1820 		if (node->next->ct == ct) {
1821 			/* Get the node to unregister. */
1822 			struct nb_reg_node *n = node->next;
1823 			node->next = n->next;
1824 
1825 			n->ct = NULL;
1826 			LIST_ADD(nb_free, n);
1827 			break;
1828 		}
1829 	}
1830 	(void) mutex_unlock(&nb_list_mutex);
1831 	return (0);
1832 }
1833