xref: /titanic_51/usr/src/lib/libnsl/rpc/clnt_vc.c (revision 996aa81675f6b63ed02041243b97e61ee7bd51d2)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */
27 /* All Rights Reserved */
28 /*
29  * Portions of this source code were derived from Berkeley
30  * 4.3 BSD under license from the Regents of the University of
31  * California.
32  */
33 
34 #pragma ident	"%Z%%M%	%I%	%E% SMI"
35 
36 /*
37  * clnt_vc.c
38  *
39  * Implements a connectionful client side RPC.
40  *
41  * Connectionful RPC supports 'batched calls'.
42  * A sequence of calls may be batched-up in a send buffer. The rpc call
43  * return immediately to the client even though the call was not necessarily
44  * sent. The batching occurs if the results' xdr routine is NULL (0) AND
45  * the rpc timeout value is zero (see clnt.h, rpc).
46  *
47  * Clients should NOT casually batch calls that in fact return results; that
48  * is the server side should be aware that a call is batched and not produce
49  * any return message. Batched calls that produce many result messages can
50  * deadlock (netlock) the client and the server....
51  */
52 
53 
54 #include "mt.h"
55 #include "rpc_mt.h"
56 #include <assert.h>
57 #include <rpc/rpc.h>
58 #include <errno.h>
59 #include <sys/byteorder.h>
60 #include <sys/mkdev.h>
61 #include <sys/poll.h>
62 #include <syslog.h>
63 #include <stdlib.h>
64 #include <unistd.h>
65 #include <netinet/tcp.h>
66 
67 #define	MCALL_MSG_SIZE 24
68 #define	SECS_TO_MS 1000
69 #define	USECS_TO_MS 1/1000
70 #ifndef MIN
71 #define	MIN(a, b)	(((a) < (b)) ? (a) : (b))
72 #endif
73 
74 extern int __rpc_timeval_to_msec(struct timeval *);
75 extern int __rpc_compress_pollfd(int, pollfd_t *, pollfd_t *);
76 extern bool_t xdr_opaque_auth(XDR *, struct opaque_auth *);
77 extern bool_t __rpc_gss_wrap(AUTH *, char *, uint_t, XDR *, bool_t (*)(),
78 								caddr_t);
79 extern bool_t __rpc_gss_unwrap(AUTH *, XDR *, bool_t (*)(), caddr_t);
80 extern CLIENT *_clnt_vc_create_timed(int, struct netbuf *, rpcprog_t,
81 		rpcvers_t, uint_t, uint_t, const struct timeval *);
82 
83 static struct clnt_ops	*clnt_vc_ops(void);
84 static int		read_vc(void *, caddr_t, int);
85 static int		write_vc(void *, caddr_t, int);
86 static int		t_rcvall(int, char *, int);
87 static bool_t		time_not_ok(struct timeval *);
88 
89 struct ct_data;
90 static bool_t		set_up_connection(int, struct netbuf *,
91 				struct ct_data *, const struct timeval *);
92 static bool_t		set_io_mode(struct ct_data *, int);
93 
94 /*
95  * Lock table handle used by various MT sync. routines
96  */
97 static mutex_t	vctbl_lock = DEFAULTMUTEX;
98 static void	*vctbl = NULL;
99 
100 static const char clnt_vc_errstr[] = "%s : %s";
101 static const char clnt_vc_str[] = "clnt_vc_create";
102 static const char clnt_read_vc_str[] = "read_vc";
103 static const char __no_mem_str[] = "out of memory";
104 static const char no_fcntl_getfl_str[] = "could not get status flags and modes";
105 static const char no_nonblock_str[] = "could not set transport blocking mode";
106 
107 /*
108  * Private data structure
109  */
110 struct ct_data {
111 	int		ct_fd;		/* connection's fd */
112 	bool_t		ct_closeit;	/* close it on destroy */
113 	int		ct_tsdu;	/* size of tsdu */
114 	int		ct_wait;	/* wait interval in milliseconds */
115 	bool_t		ct_waitset;	/* wait set by clnt_control? */
116 	struct netbuf	ct_addr;	/* remote addr */
117 	struct rpc_err	ct_error;
118 	char		ct_mcall[MCALL_MSG_SIZE]; /* marshalled callmsg */
119 	uint_t		ct_mpos;	/* pos after marshal */
120 	XDR		ct_xdrs;	/* XDR stream */
121 
122 	/* NON STANDARD INFO - 00-08-31 */
123 	bool_t		ct_is_oneway; /* True if the current call is oneway. */
124 	bool_t		ct_is_blocking;
125 	ushort_t	ct_io_mode;
126 	ushort_t	ct_blocking_mode;
127 	uint_t		ct_bufferSize; /* Total size of the buffer. */
128 	uint_t		ct_bufferPendingSize; /* Size of unsent data. */
129 	char 		*ct_buffer; /* Pointer to the buffer. */
130 	char 		*ct_bufferWritePtr; /* Ptr to the first free byte. */
131 	char 		*ct_bufferReadPtr; /* Ptr to the first byte of data. */
132 };
133 
134 struct nb_reg_node {
135 	struct nb_reg_node *next;
136 	struct ct_data *ct;
137 };
138 
139 static struct nb_reg_node *nb_first = (struct nb_reg_node *)&nb_first;
140 static struct nb_reg_node *nb_free  = (struct nb_reg_node *)&nb_free;
141 
142 static bool_t exit_handler_set = FALSE;
143 
144 static mutex_t nb_list_mutex = DEFAULTMUTEX;
145 
146 
147 /* Define some macros to manage the linked list. */
148 #define	LIST_ISEMPTY(l) (l == (struct nb_reg_node *)&l)
149 #define	LIST_CLR(l) (l = (struct nb_reg_node *)&l)
150 #define	LIST_ADD(l, node) (node->next = l->next, l = node)
151 #define	LIST_EXTRACT(l, node) (node = l, l = l->next)
152 #define	LIST_FOR_EACH(l, node) \
153 	for (node = l; node != (struct nb_reg_node *)&l; node = node->next)
154 
155 
156 /* Default size of the IO buffer used in non blocking mode */
157 #define	DEFAULT_PENDING_ZONE_MAX_SIZE (16*1024)
158 
159 static int nb_send(struct ct_data *, void *, unsigned int);
160 static int do_flush(struct ct_data *, uint_t);
161 static bool_t set_flush_mode(struct ct_data *, int);
162 static bool_t set_blocking_connection(struct ct_data *, bool_t);
163 
164 static int register_nb(struct ct_data *);
165 static int unregister_nb(struct ct_data *);
166 
167 
168 /*
169  * Change the mode of the underlying fd.
170  */
171 static bool_t
172 set_blocking_connection(struct ct_data *ct, bool_t blocking)
173 {
174 	int flag;
175 
176 	/*
177 	 * If the underlying fd is already in the required mode,
178 	 * avoid the syscall.
179 	 */
180 	if (ct->ct_is_blocking == blocking)
181 		return (TRUE);
182 
183 	if ((flag = fcntl(ct->ct_fd, F_GETFL, 0)) < 0) {
184 		(void) syslog(LOG_ERR, "set_blocking_connection : %s",
185 		    no_fcntl_getfl_str);
186 		return (FALSE);
187 	}
188 
189 	flag = blocking? flag&~O_NONBLOCK : flag|O_NONBLOCK;
190 	if (fcntl(ct->ct_fd, F_SETFL, flag) != 0) {
191 		(void) syslog(LOG_ERR, "set_blocking_connection : %s",
192 		    no_nonblock_str);
193 		return (FALSE);
194 	}
195 	ct->ct_is_blocking = blocking;
196 	return (TRUE);
197 }
198 
199 /*
200  * Create a client handle for a connection.
201  * Default options are set, which the user can change using clnt_control()'s.
202  * The rpc/vc package does buffering similar to stdio, so the client
203  * must pick send and receive buffer sizes, 0 => use the default.
204  * NB: fd is copied into a private area.
205  * NB: The rpch->cl_auth is set null authentication. Caller may wish to
206  * set this something more useful.
207  *
208  * fd should be open and bound.
209  */
210 CLIENT *
211 clnt_vc_create(const int fd, struct netbuf *svcaddr, const rpcprog_t prog,
212 	const rpcvers_t vers, const uint_t sendsz, const uint_t recvsz)
213 {
214 	return (_clnt_vc_create_timed(fd, svcaddr, prog, vers, sendsz,
215 			recvsz, NULL));
216 }
217 
218 /*
219  * This has the same definition as clnt_vc_create(), except it
220  * takes an additional parameter - a pointer to a timeval structure.
221  *
222  * Not a public interface. This is for clnt_create_timed,
223  * clnt_create_vers_timed, clnt_tp_create_timed to pass down the timeout
224  * value to control a tcp connection attempt.
225  * (for bug 4049792: clnt_create_timed does not time out)
226  *
227  * If tp is NULL, use default timeout to set up the connection.
228  */
229 CLIENT *
230 _clnt_vc_create_timed(int fd, struct netbuf *svcaddr, rpcprog_t prog,
231 	rpcvers_t vers, uint_t sendsz, uint_t recvsz, const struct timeval *tp)
232 {
233 	CLIENT *cl;			/* client handle */
234 	struct ct_data *ct;		/* private data */
235 	struct timeval now;
236 	struct rpc_msg call_msg;
237 	struct t_info tinfo;
238 	int flag;
239 
240 	cl = malloc(sizeof (*cl));
241 	ct = malloc(sizeof (*ct));
242 	if ((cl == NULL) || (ct == NULL)) {
243 		(void) syslog(LOG_ERR, clnt_vc_errstr,
244 				clnt_vc_str, __no_mem_str);
245 		rpc_createerr.cf_stat = RPC_SYSTEMERROR;
246 		rpc_createerr.cf_error.re_errno = errno;
247 		rpc_createerr.cf_error.re_terrno = 0;
248 		goto err;
249 	}
250 	ct->ct_addr.buf = NULL;
251 
252 	/*
253 	 * The only use of vctbl_lock is for serializing the creation of
254 	 * vctbl. Once created the lock needs to be released so we don't
255 	 * hold it across the set_up_connection() call and end up with a
256 	 * bunch of threads stuck waiting for the mutex.
257 	 */
258 	sig_mutex_lock(&vctbl_lock);
259 
260 	if ((vctbl == NULL) && ((vctbl = rpc_fd_init()) == NULL)) {
261 		rpc_createerr.cf_stat = RPC_SYSTEMERROR;
262 		rpc_createerr.cf_error.re_errno = errno;
263 		rpc_createerr.cf_error.re_terrno = 0;
264 		sig_mutex_unlock(&vctbl_lock);
265 		goto err;
266 	}
267 
268 	sig_mutex_unlock(&vctbl_lock);
269 
270 	ct->ct_io_mode = RPC_CL_BLOCKING;
271 	ct->ct_blocking_mode = RPC_CL_BLOCKING_FLUSH;
272 
273 	ct->ct_buffer = NULL;	/* We allocate the buffer when needed. */
274 	ct->ct_bufferSize = DEFAULT_PENDING_ZONE_MAX_SIZE;
275 	ct->ct_bufferPendingSize = 0;
276 	ct->ct_bufferWritePtr = NULL;
277 	ct->ct_bufferReadPtr = NULL;
278 
279 	/* Check the current state of the fd. */
280 	if ((flag = fcntl(fd, F_GETFL, 0)) < 0) {
281 		(void) syslog(LOG_ERR, "_clnt_vc_create_timed : %s",
282 		    no_fcntl_getfl_str);
283 		rpc_createerr.cf_stat = RPC_SYSTEMERROR;
284 		rpc_createerr.cf_error.re_terrno = errno;
285 		rpc_createerr.cf_error.re_errno = 0;
286 		goto err;
287 	}
288 	ct->ct_is_blocking = flag & O_NONBLOCK ? FALSE : TRUE;
289 
290 	if (set_up_connection(fd, svcaddr, ct, tp) == FALSE) {
291 		goto err;
292 	}
293 
294 	/*
295 	 * Set up other members of private data struct
296 	 */
297 	ct->ct_fd = fd;
298 	/*
299 	 * The actual value will be set by clnt_call or clnt_control
300 	 */
301 	ct->ct_wait = 30000;
302 	ct->ct_waitset = FALSE;
303 	/*
304 	 * By default, closeit is always FALSE. It is users responsibility
305 	 * to do a t_close on it, else the user may use clnt_control
306 	 * to let clnt_destroy do it for him/her.
307 	 */
308 	ct->ct_closeit = FALSE;
309 
310 	/*
311 	 * Initialize call message
312 	 */
313 	(void) gettimeofday(&now, (struct timezone *)0);
314 	call_msg.rm_xid = getpid() ^ now.tv_sec ^ now.tv_usec;
315 	call_msg.rm_call.cb_prog = prog;
316 	call_msg.rm_call.cb_vers = vers;
317 
318 	/*
319 	 * pre-serialize the static part of the call msg and stash it away
320 	 */
321 	xdrmem_create(&(ct->ct_xdrs), ct->ct_mcall, MCALL_MSG_SIZE, XDR_ENCODE);
322 	if (!xdr_callhdr(&(ct->ct_xdrs), &call_msg)) {
323 		goto err;
324 	}
325 	ct->ct_mpos = XDR_GETPOS(&(ct->ct_xdrs));
326 	XDR_DESTROY(&(ct->ct_xdrs));
327 
328 	if (t_getinfo(fd, &tinfo) == -1) {
329 		rpc_createerr.cf_stat = RPC_TLIERROR;
330 		rpc_createerr.cf_error.re_terrno = t_errno;
331 		rpc_createerr.cf_error.re_errno = 0;
332 		goto err;
333 	}
334 	/*
335 	 * Find the receive and the send size
336 	 */
337 	sendsz = __rpc_get_t_size((int)sendsz, tinfo.tsdu);
338 	recvsz = __rpc_get_t_size((int)recvsz, tinfo.tsdu);
339 	if ((sendsz == 0) || (recvsz == 0)) {
340 		rpc_createerr.cf_stat = RPC_TLIERROR;
341 		rpc_createerr.cf_error.re_terrno = 0;
342 		rpc_createerr.cf_error.re_errno = 0;
343 		goto err;
344 	}
345 	ct->ct_tsdu = tinfo.tsdu;
346 	/*
347 	 * Create a client handle which uses xdrrec for serialization
348 	 * and authnone for authentication.
349 	 */
350 	ct->ct_xdrs.x_ops = NULL;
351 	xdrrec_create(&(ct->ct_xdrs), sendsz, recvsz, (caddr_t)ct,
352 			read_vc, write_vc);
353 	if (ct->ct_xdrs.x_ops == NULL) {
354 		rpc_createerr.cf_stat = RPC_SYSTEMERROR;
355 		rpc_createerr.cf_error.re_terrno = 0;
356 		rpc_createerr.cf_error.re_errno = ENOMEM;
357 		goto err;
358 	}
359 	cl->cl_ops = clnt_vc_ops();
360 	cl->cl_private = (caddr_t)ct;
361 	cl->cl_auth = authnone_create();
362 	cl->cl_tp = NULL;
363 	cl->cl_netid = NULL;
364 	return (cl);
365 
366 err:
367 	if (cl) {
368 		if (ct) {
369 			if (ct->ct_addr.len)
370 				free(ct->ct_addr.buf);
371 			free(ct);
372 		}
373 		free(cl);
374 	}
375 	return (NULL);
376 }
377 
378 #define	TCPOPT_BUFSIZE 128
379 
380 /*
381  * Set tcp connection timeout value.
382  * Retun 0 for success, -1 for failure.
383  */
384 static int
385 _set_tcp_conntime(int fd, int optval)
386 {
387 	struct t_optmgmt req, res;
388 	struct opthdr *opt;
389 	int *ip;
390 	char buf[TCPOPT_BUFSIZE];
391 
392 	/* LINTED pointer cast */
393 	opt = (struct opthdr *)buf;
394 	opt->level =  IPPROTO_TCP;
395 	opt->name = TCP_CONN_ABORT_THRESHOLD;
396 	opt->len = sizeof (int);
397 
398 	req.flags = T_NEGOTIATE;
399 	req.opt.len = sizeof (struct opthdr) + opt->len;
400 	req.opt.buf = (char *)opt;
401 	/* LINTED pointer cast */
402 	ip = (int *)((char *)buf + sizeof (struct opthdr));
403 	*ip = optval;
404 
405 	res.flags = 0;
406 	res.opt.buf = (char *)buf;
407 	res.opt.maxlen = sizeof (buf);
408 	if (t_optmgmt(fd, &req, &res) < 0 || res.flags != T_SUCCESS) {
409 		return (-1);
410 	}
411 	return (0);
412 }
413 
414 /*
415  * Get current tcp connection timeout value.
416  * Retun 0 for success, -1 for failure.
417  */
418 static int
419 _get_tcp_conntime(int fd)
420 {
421 	struct t_optmgmt req, res;
422 	struct opthdr *opt;
423 	int *ip, retval;
424 	char buf[TCPOPT_BUFSIZE];
425 
426 	/* LINTED pointer cast */
427 	opt = (struct opthdr *)buf;
428 	opt->level =  IPPROTO_TCP;
429 	opt->name = TCP_CONN_ABORT_THRESHOLD;
430 	opt->len = sizeof (int);
431 
432 	req.flags = T_CURRENT;
433 	req.opt.len = sizeof (struct opthdr) + opt->len;
434 	req.opt.buf = (char *)opt;
435 	/* LINTED pointer cast */
436 	ip = (int *)((char *)buf + sizeof (struct opthdr));
437 	*ip = 0;
438 
439 	res.flags = 0;
440 	res.opt.buf = (char *)buf;
441 	res.opt.maxlen = sizeof (buf);
442 	if (t_optmgmt(fd, &req, &res) < 0 || res.flags != T_SUCCESS) {
443 		return (-1);
444 	}
445 
446 	/* LINTED pointer cast */
447 	ip = (int *)((char *)buf + sizeof (struct opthdr));
448 	retval = *ip;
449 	return (retval);
450 }
451 
452 static bool_t
453 set_up_connection(int fd, struct netbuf *svcaddr, struct ct_data *ct,
454 						const struct timeval *tp)
455 {
456 	int state;
457 	struct t_call sndcallstr, *rcvcall;
458 	int nconnect;
459 	bool_t connected, do_rcv_connect;
460 	int curr_time = 0;
461 
462 	ct->ct_addr.len = 0;
463 	state = t_getstate(fd);
464 	if (state == -1) {
465 		rpc_createerr.cf_stat = RPC_TLIERROR;
466 		rpc_createerr.cf_error.re_errno = 0;
467 		rpc_createerr.cf_error.re_terrno = t_errno;
468 		return (FALSE);
469 	}
470 
471 #ifdef DEBUG
472 	fprintf(stderr, "set_up_connection: state = %d\n", state);
473 #endif
474 	switch (state) {
475 	case T_IDLE:
476 		if (svcaddr == NULL) {
477 			rpc_createerr.cf_stat = RPC_UNKNOWNADDR;
478 			return (FALSE);
479 		}
480 		/*
481 		 * Connect only if state is IDLE and svcaddr known
482 		 */
483 /* LINTED pointer alignment */
484 		rcvcall = (struct t_call *)t_alloc(fd, T_CALL, T_OPT|T_ADDR);
485 		if (rcvcall == NULL) {
486 			rpc_createerr.cf_stat = RPC_TLIERROR;
487 			rpc_createerr.cf_error.re_terrno = t_errno;
488 			rpc_createerr.cf_error.re_errno = errno;
489 			return (FALSE);
490 		}
491 		rcvcall->udata.maxlen = 0;
492 		sndcallstr.addr = *svcaddr;
493 		sndcallstr.opt.len = 0;
494 		sndcallstr.udata.len = 0;
495 		/*
496 		 * Even NULL could have sufficed for rcvcall, because
497 		 * the address returned is same for all cases except
498 		 * for the gateway case, and hence required.
499 		 */
500 		connected = FALSE;
501 		do_rcv_connect = FALSE;
502 
503 		/*
504 		 * If there is a timeout value specified, we will try to
505 		 * reset the tcp connection timeout. If the transport does
506 		 * not support the TCP_CONN_ABORT_THRESHOLD option or fails
507 		 * for other reason, default timeout will be used.
508 		 */
509 		if (tp != NULL) {
510 		    int ms;
511 
512 		    /* TCP_CONN_ABORT_THRESHOLD takes int value in millisecs */
513 		    ms = tp->tv_sec * SECS_TO_MS + tp->tv_usec * USECS_TO_MS;
514 		    if (((curr_time = _get_tcp_conntime(fd)) != -1) &&
515 			(_set_tcp_conntime(fd, ms) == 0)) {
516 			/* EMPTY */
517 #ifdef DEBUG
518 			fprintf(stderr, "set_up_connection: set tcp ");
519 			fprintf(stderr, "connection timeout to %d ms\n", ms);
520 #endif
521 		    }
522 		}
523 
524 		for (nconnect = 0; nconnect < 3; nconnect++) {
525 			if (t_connect(fd, &sndcallstr, rcvcall) != -1) {
526 				connected = TRUE;
527 				break;
528 			}
529 			if (!(t_errno == TSYSERR && errno == EINTR)) {
530 				break;
531 			}
532 			if ((state = t_getstate(fd)) == T_OUTCON) {
533 				do_rcv_connect = TRUE;
534 				break;
535 			}
536 			if (state != T_IDLE) {
537 				break;
538 			}
539 		}
540 		if (do_rcv_connect) {
541 			do {
542 				if (t_rcvconnect(fd, rcvcall) != -1) {
543 					connected = TRUE;
544 					break;
545 				}
546 			} while (t_errno == TSYSERR && errno == EINTR);
547 		}
548 
549 		/*
550 		 * Set the connection timeout back to its old value.
551 		 */
552 		if (curr_time) {
553 			(void) _set_tcp_conntime(fd, curr_time);
554 		}
555 
556 		if (!connected) {
557 			rpc_createerr.cf_stat = RPC_TLIERROR;
558 			rpc_createerr.cf_error.re_terrno = t_errno;
559 			rpc_createerr.cf_error.re_errno = errno;
560 			(void) t_free((char *)rcvcall, T_CALL);
561 #ifdef DEBUG
562 			fprintf(stderr, "clnt_vc: t_connect error %d\n",
563 				rpc_createerr.cf_error.re_terrno);
564 #endif
565 			return (FALSE);
566 		}
567 
568 		/* Free old area if allocated */
569 		if (ct->ct_addr.buf)
570 			free(ct->ct_addr.buf);
571 		ct->ct_addr = rcvcall->addr;	/* To get the new address */
572 		/* So that address buf does not get freed */
573 		rcvcall->addr.buf = NULL;
574 		(void) t_free((char *)rcvcall, T_CALL);
575 		break;
576 	case T_DATAXFER:
577 	case T_OUTCON:
578 		if (svcaddr == NULL) {
579 			/*
580 			 * svcaddr could also be NULL in cases where the
581 			 * client is already bound and connected.
582 			 */
583 			ct->ct_addr.len = 0;
584 		} else {
585 			ct->ct_addr.buf = malloc(svcaddr->len);
586 			if (ct->ct_addr.buf == NULL) {
587 				(void) syslog(LOG_ERR, clnt_vc_errstr,
588 					clnt_vc_str, __no_mem_str);
589 				rpc_createerr.cf_stat = RPC_SYSTEMERROR;
590 				rpc_createerr.cf_error.re_errno = errno;
591 				rpc_createerr.cf_error.re_terrno = 0;
592 				return (FALSE);
593 			}
594 			(void) memcpy(ct->ct_addr.buf, svcaddr->buf,
595 					(size_t)svcaddr->len);
596 			ct->ct_addr.len = ct->ct_addr.maxlen = svcaddr->len;
597 		}
598 		break;
599 	default:
600 		rpc_createerr.cf_stat = RPC_UNKNOWNADDR;
601 		return (FALSE);
602 	}
603 	return (TRUE);
604 }
605 
606 static enum clnt_stat
607 clnt_vc_call(CLIENT *cl, rpcproc_t proc, xdrproc_t xdr_args, caddr_t args_ptr,
608 	xdrproc_t xdr_results, caddr_t results_ptr, struct timeval timeout)
609 {
610 /* LINTED pointer alignment */
611 	struct ct_data *ct = (struct ct_data *)cl->cl_private;
612 	XDR *xdrs = &(ct->ct_xdrs);
613 	struct rpc_msg reply_msg;
614 	uint32_t x_id;
615 /* LINTED pointer alignment */
616 	uint32_t *msg_x_id = (uint32_t *)(ct->ct_mcall);	/* yuk */
617 	bool_t shipnow;
618 	int refreshes = 2;
619 
620 	if (rpc_fd_lock(vctbl, ct->ct_fd)) {
621 		rpc_callerr.re_status = RPC_FAILED;
622 		rpc_callerr.re_errno = errno;
623 		rpc_fd_unlock(vctbl, ct->ct_fd);
624 		return (RPC_FAILED);
625 	}
626 
627 	ct->ct_is_oneway = FALSE;
628 	if (ct->ct_io_mode == RPC_CL_NONBLOCKING) {
629 		if (do_flush(ct, RPC_CL_BLOCKING_FLUSH) != 0) {
630 			rpc_fd_unlock(vctbl, ct->ct_fd);
631 			return (RPC_FAILED);  /* XXX */
632 		}
633 	}
634 
635 	if (!ct->ct_waitset) {
636 		/* If time is not within limits, we ignore it. */
637 		if (time_not_ok(&timeout) == FALSE)
638 			ct->ct_wait = __rpc_timeval_to_msec(&timeout);
639 	} else {
640 		timeout.tv_sec = (ct->ct_wait / 1000);
641 		timeout.tv_usec = (ct->ct_wait % 1000) * 1000;
642 	}
643 
644 	shipnow = ((xdr_results == (xdrproc_t)0) && (timeout.tv_sec == 0) &&
645 	    (timeout.tv_usec == 0)) ? FALSE : TRUE;
646 call_again:
647 	xdrs->x_op = XDR_ENCODE;
648 	rpc_callerr.re_status = RPC_SUCCESS;
649 	/*
650 	 * Due to little endian byte order, it is necessary to convert to host
651 	 * format before decrementing xid.
652 	 */
653 	x_id = ntohl(*msg_x_id) - 1;
654 	*msg_x_id = htonl(x_id);
655 
656 	if (cl->cl_auth->ah_cred.oa_flavor != RPCSEC_GSS) {
657 		if ((!XDR_PUTBYTES(xdrs, ct->ct_mcall, ct->ct_mpos)) ||
658 		    (!XDR_PUTINT32(xdrs, (int32_t *)&proc)) ||
659 		    (!AUTH_MARSHALL(cl->cl_auth, xdrs)) ||
660 		    (!xdr_args(xdrs, args_ptr))) {
661 			if (rpc_callerr.re_status == RPC_SUCCESS)
662 				rpc_callerr.re_status = RPC_CANTENCODEARGS;
663 			(void) xdrrec_endofrecord(xdrs, TRUE);
664 			rpc_fd_unlock(vctbl, ct->ct_fd);
665 			return (rpc_callerr.re_status);
666 		}
667 	} else {
668 /* LINTED pointer alignment */
669 		uint32_t *u = (uint32_t *)&ct->ct_mcall[ct->ct_mpos];
670 		IXDR_PUT_U_INT32(u, proc);
671 		if (!__rpc_gss_wrap(cl->cl_auth, ct->ct_mcall,
672 		    ((char *)u) - ct->ct_mcall, xdrs, xdr_args, args_ptr)) {
673 			if (rpc_callerr.re_status == RPC_SUCCESS)
674 				rpc_callerr.re_status = RPC_CANTENCODEARGS;
675 			(void) xdrrec_endofrecord(xdrs, TRUE);
676 			rpc_fd_unlock(vctbl, ct->ct_fd);
677 			return (rpc_callerr.re_status);
678 		}
679 	}
680 	if (!xdrrec_endofrecord(xdrs, shipnow)) {
681 		rpc_fd_unlock(vctbl, ct->ct_fd);
682 		return (rpc_callerr.re_status = RPC_CANTSEND);
683 	}
684 	if (!shipnow) {
685 		rpc_fd_unlock(vctbl, ct->ct_fd);
686 		return (RPC_SUCCESS);
687 	}
688 	/*
689 	 * Hack to provide rpc-based message passing
690 	 */
691 	if (timeout.tv_sec == 0 && timeout.tv_usec == 0) {
692 		rpc_fd_unlock(vctbl, ct->ct_fd);
693 		return (rpc_callerr.re_status = RPC_TIMEDOUT);
694 	}
695 
696 
697 	/*
698 	 * Keep receiving until we get a valid transaction id
699 	 */
700 	xdrs->x_op = XDR_DECODE;
701 	for (;;) {
702 		reply_msg.acpted_rply.ar_verf = _null_auth;
703 		reply_msg.acpted_rply.ar_results.where = NULL;
704 		reply_msg.acpted_rply.ar_results.proc = (xdrproc_t)xdr_void;
705 		if (!xdrrec_skiprecord(xdrs)) {
706 			rpc_fd_unlock(vctbl, ct->ct_fd);
707 			return (rpc_callerr.re_status);
708 		}
709 		/* now decode and validate the response header */
710 		if (!xdr_replymsg(xdrs, &reply_msg)) {
711 			if (rpc_callerr.re_status == RPC_SUCCESS)
712 				continue;
713 			rpc_fd_unlock(vctbl, ct->ct_fd);
714 			return (rpc_callerr.re_status);
715 		}
716 		if (reply_msg.rm_xid == x_id)
717 			break;
718 	}
719 
720 	/*
721 	 * process header
722 	 */
723 	if ((reply_msg.rm_reply.rp_stat == MSG_ACCEPTED) &&
724 	    (reply_msg.acpted_rply.ar_stat == SUCCESS))
725 		rpc_callerr.re_status = RPC_SUCCESS;
726 	else
727 		__seterr_reply(&reply_msg, &(rpc_callerr));
728 
729 	if (rpc_callerr.re_status == RPC_SUCCESS) {
730 		if (!AUTH_VALIDATE(cl->cl_auth,
731 				&reply_msg.acpted_rply.ar_verf)) {
732 			rpc_callerr.re_status = RPC_AUTHERROR;
733 			rpc_callerr.re_why = AUTH_INVALIDRESP;
734 		} else if (cl->cl_auth->ah_cred.oa_flavor != RPCSEC_GSS) {
735 			if (!(*xdr_results)(xdrs, results_ptr)) {
736 				if (rpc_callerr.re_status == RPC_SUCCESS)
737 				    rpc_callerr.re_status = RPC_CANTDECODERES;
738 			}
739 		} else if (!__rpc_gss_unwrap(cl->cl_auth, xdrs, xdr_results,
740 							results_ptr)) {
741 			if (rpc_callerr.re_status == RPC_SUCCESS)
742 				rpc_callerr.re_status = RPC_CANTDECODERES;
743 		}
744 	}	/* end successful completion */
745 	/*
746 	 * If unsuccesful AND error is an authentication error
747 	 * then refresh credentials and try again, else break
748 	 */
749 	else if (rpc_callerr.re_status == RPC_AUTHERROR) {
750 		/* maybe our credentials need to be refreshed ... */
751 		if (refreshes-- && AUTH_REFRESH(cl->cl_auth, &reply_msg))
752 			goto call_again;
753 		else
754 			/*
755 			 * We are setting rpc_callerr here given that libnsl
756 			 * is not reentrant thereby reinitializing the TSD.
757 			 * If not set here then success could be returned even
758 			 * though refresh failed.
759 			 */
760 			rpc_callerr.re_status = RPC_AUTHERROR;
761 	} /* end of unsuccessful completion */
762 	/* free verifier ... */
763 	if (reply_msg.rm_reply.rp_stat == MSG_ACCEPTED &&
764 			reply_msg.acpted_rply.ar_verf.oa_base != NULL) {
765 		xdrs->x_op = XDR_FREE;
766 		(void) xdr_opaque_auth(xdrs, &(reply_msg.acpted_rply.ar_verf));
767 	}
768 	rpc_fd_unlock(vctbl, ct->ct_fd);
769 	return (rpc_callerr.re_status);
770 }
771 
772 static enum clnt_stat
773 clnt_vc_send(CLIENT *cl, rpcproc_t proc, xdrproc_t xdr_args, caddr_t args_ptr)
774 {
775 /* LINTED pointer alignment */
776 	struct ct_data *ct = (struct ct_data *)cl->cl_private;
777 	XDR *xdrs = &(ct->ct_xdrs);
778 	uint32_t x_id;
779 /* LINTED pointer alignment */
780 	uint32_t *msg_x_id = (uint32_t *)(ct->ct_mcall);	/* yuk */
781 
782 	if (rpc_fd_lock(vctbl, ct->ct_fd)) {
783 		rpc_callerr.re_status = RPC_FAILED;
784 		rpc_callerr.re_errno = errno;
785 		rpc_fd_unlock(vctbl, ct->ct_fd);
786 		return (RPC_FAILED);
787 	}
788 
789 	ct->ct_is_oneway = TRUE;
790 
791 	xdrs->x_op = XDR_ENCODE;
792 	rpc_callerr.re_status = RPC_SUCCESS;
793 	/*
794 	 * Due to little endian byte order, it is necessary to convert to host
795 	 * format before decrementing xid.
796 	 */
797 	x_id = ntohl(*msg_x_id) - 1;
798 	*msg_x_id = htonl(x_id);
799 
800 	if (cl->cl_auth->ah_cred.oa_flavor != RPCSEC_GSS) {
801 		if ((!XDR_PUTBYTES(xdrs, ct->ct_mcall, ct->ct_mpos)) ||
802 		    (!XDR_PUTINT32(xdrs, (int32_t *)&proc)) ||
803 		    (!AUTH_MARSHALL(cl->cl_auth, xdrs)) ||
804 		    (!xdr_args(xdrs, args_ptr))) {
805 			if (rpc_callerr.re_status == RPC_SUCCESS)
806 				rpc_callerr.re_status = RPC_CANTENCODEARGS;
807 			(void) xdrrec_endofrecord(xdrs, TRUE);
808 			rpc_fd_unlock(vctbl, ct->ct_fd);
809 			return (rpc_callerr.re_status);
810 		}
811 	} else {
812 /* LINTED pointer alignment */
813 		uint32_t *u = (uint32_t *)&ct->ct_mcall[ct->ct_mpos];
814 		IXDR_PUT_U_INT32(u, proc);
815 		if (!__rpc_gss_wrap(cl->cl_auth, ct->ct_mcall,
816 		    ((char *)u) - ct->ct_mcall, xdrs, xdr_args, args_ptr)) {
817 			if (rpc_callerr.re_status == RPC_SUCCESS)
818 				rpc_callerr.re_status = RPC_CANTENCODEARGS;
819 			(void) xdrrec_endofrecord(xdrs, TRUE);
820 			rpc_fd_unlock(vctbl, ct->ct_fd);
821 			return (rpc_callerr.re_status);
822 		}
823 	}
824 
825 	/*
826 	 * Do not need to check errors, as the following code does
827 	 * not depend on the successful completion of the call.
828 	 * An error, if any occurs, is reported through
829 	 * rpc_callerr.re_status.
830 	 */
831 	(void) xdrrec_endofrecord(xdrs, TRUE);
832 
833 	rpc_fd_unlock(vctbl, ct->ct_fd);
834 	return (rpc_callerr.re_status);
835 }
836 
837 /* ARGSUSED */
838 static void
839 clnt_vc_geterr(CLIENT *cl, struct rpc_err *errp)
840 {
841 	*errp = rpc_callerr;
842 }
843 
844 static bool_t
845 clnt_vc_freeres(CLIENT *cl, xdrproc_t xdr_res, caddr_t res_ptr)
846 {
847 /* LINTED pointer alignment */
848 	struct ct_data *ct = (struct ct_data *)cl->cl_private;
849 	XDR *xdrs = &(ct->ct_xdrs);
850 	bool_t stat;
851 
852 	(void) rpc_fd_lock(vctbl, ct->ct_fd);
853 	xdrs->x_op = XDR_FREE;
854 	stat = (*xdr_res)(xdrs, res_ptr);
855 	rpc_fd_unlock(vctbl, ct->ct_fd);
856 	return (stat);
857 }
858 
859 static void
860 clnt_vc_abort(void)
861 {
862 }
863 
864 /*ARGSUSED*/
865 static bool_t
866 clnt_vc_control(CLIENT *cl, int request, char *info)
867 {
868 	bool_t ret;
869 /* LINTED pointer alignment */
870 	struct ct_data *ct = (struct ct_data *)cl->cl_private;
871 
872 	if (rpc_fd_lock(vctbl, ct->ct_fd)) {
873 		rpc_fd_unlock(vctbl, ct->ct_fd);
874 		return (RPC_FAILED);
875 	}
876 
877 	switch (request) {
878 	case CLSET_FD_CLOSE:
879 		ct->ct_closeit = TRUE;
880 		rpc_fd_unlock(vctbl, ct->ct_fd);
881 		return (TRUE);
882 	case CLSET_FD_NCLOSE:
883 		ct->ct_closeit = FALSE;
884 		rpc_fd_unlock(vctbl, ct->ct_fd);
885 		return (TRUE);
886 	case CLFLUSH:
887 		if (ct->ct_io_mode == RPC_CL_NONBLOCKING) {
888 			int res;
889 			res = do_flush(ct, (info == NULL ||
890 			    /* LINTED pointer cast */
891 			    *(int *)info == RPC_CL_DEFAULT_FLUSH)?
892 			    /* LINTED pointer cast */
893 			    ct->ct_blocking_mode: *(int *)info);
894 			ret = (0 == res);
895 		}
896 		rpc_fd_unlock(vctbl, ct->ct_fd);
897 		return (ret);
898 	}
899 
900 	/* for other requests which use info */
901 	if (info == NULL) {
902 		rpc_fd_unlock(vctbl, ct->ct_fd);
903 		return (FALSE);
904 	}
905 	switch (request) {
906 	case CLSET_TIMEOUT:
907 /* LINTED pointer alignment */
908 		if (time_not_ok((struct timeval *)info)) {
909 			rpc_fd_unlock(vctbl, ct->ct_fd);
910 			return (FALSE);
911 		}
912 /* LINTED pointer alignment */
913 		ct->ct_wait = __rpc_timeval_to_msec((struct timeval *)info);
914 		ct->ct_waitset = TRUE;
915 		break;
916 	case CLGET_TIMEOUT:
917 /* LINTED pointer alignment */
918 		((struct timeval *)info)->tv_sec = ct->ct_wait / 1000;
919 /* LINTED pointer alignment */
920 		((struct timeval *)info)->tv_usec =
921 			(ct->ct_wait % 1000) * 1000;
922 		break;
923 	case CLGET_SERVER_ADDR:	/* For compatibility only */
924 		(void) memcpy(info, ct->ct_addr.buf, (size_t)ct->ct_addr.len);
925 		break;
926 	case CLGET_FD:
927 /* LINTED pointer alignment */
928 		*(int *)info = ct->ct_fd;
929 		break;
930 	case CLGET_SVC_ADDR:
931 		/* The caller should not free this memory area */
932 /* LINTED pointer alignment */
933 		*(struct netbuf *)info = ct->ct_addr;
934 		break;
935 	case CLSET_SVC_ADDR:		/* set to new address */
936 #ifdef undef
937 		/*
938 		 * XXX: once the t_snddis(), followed by t_connect() starts to
939 		 * work, this ifdef should be removed.  CLIENT handle reuse
940 		 * would then be possible for COTS as well.
941 		 */
942 		if (t_snddis(ct->ct_fd, NULL) == -1) {
943 			rpc_createerr.cf_stat = RPC_TLIERROR;
944 			rpc_createerr.cf_error.re_terrno = t_errno;
945 			rpc_createerr.cf_error.re_errno = errno;
946 			rpc_fd_unlock(vctbl, ct->ct_fd);
947 			return (FALSE);
948 		}
949 		ret = set_up_connection(ct->ct_fd, (struct netbuf *)info,
950 			ct, NULL));
951 		rpc_fd_unlock(vctbl, ct->ct_fd);
952 		return (ret);
953 #else
954 		rpc_fd_unlock(vctbl, ct->ct_fd);
955 		return (FALSE);
956 #endif
957 	case CLGET_XID:
958 		/*
959 		 * use the knowledge that xid is the
960 		 * first element in the call structure
961 		 * This will get the xid of the PREVIOUS call
962 		 */
963 /* LINTED pointer alignment */
964 		*(uint32_t *)info = ntohl(*(uint32_t *)ct->ct_mcall);
965 		break;
966 	case CLSET_XID:
967 		/* This will set the xid of the NEXT call */
968 /* LINTED pointer alignment */
969 		*(uint32_t *)ct->ct_mcall =  htonl(*(uint32_t *)info + 1);
970 		/* increment by 1 as clnt_vc_call() decrements once */
971 		break;
972 	case CLGET_VERS:
973 		/*
974 		 * This RELIES on the information that, in the call body,
975 		 * the version number field is the fifth field from the
976 		 * begining of the RPC header. MUST be changed if the
977 		 * call_struct is changed
978 		 */
979 /* LINTED pointer alignment */
980 		*(uint32_t *)info = ntohl(*(uint32_t *)(ct->ct_mcall +
981 						4 * BYTES_PER_XDR_UNIT));
982 		break;
983 
984 	case CLSET_VERS:
985 /* LINTED pointer alignment */
986 		*(uint32_t *)(ct->ct_mcall + 4 * BYTES_PER_XDR_UNIT) =
987 /* LINTED pointer alignment */
988 			htonl(*(uint32_t *)info);
989 		break;
990 
991 	case CLGET_PROG:
992 		/*
993 		 * This RELIES on the information that, in the call body,
994 		 * the program number field is the fourth field from the
995 		 * begining of the RPC header. MUST be changed if the
996 		 * call_struct is changed
997 		 */
998 /* LINTED pointer alignment */
999 		*(uint32_t *)info = ntohl(*(uint32_t *)(ct->ct_mcall +
1000 						3 * BYTES_PER_XDR_UNIT));
1001 		break;
1002 
1003 	case CLSET_PROG:
1004 /* LINTED pointer alignment */
1005 		*(uint32_t *)(ct->ct_mcall + 3 * BYTES_PER_XDR_UNIT) =
1006 /* LINTED pointer alignment */
1007 			htonl(*(uint32_t *)info);
1008 		break;
1009 
1010 	case CLSET_IO_MODE:
1011 		/* LINTED pointer cast */
1012 		if (!set_io_mode(ct, *(int *)info)) {
1013 		    rpc_fd_unlock(vctbl, ct->ct_fd);
1014 		    return (FALSE);
1015 		}
1016 		break;
1017 	case CLSET_FLUSH_MODE:
1018 		/* Set a specific FLUSH_MODE */
1019 		/* LINTED pointer cast */
1020 		if (!set_flush_mode(ct, *(int *)info)) {
1021 		    rpc_fd_unlock(vctbl, ct->ct_fd);
1022 		    return (FALSE);
1023 		}
1024 		break;
1025 	case CLGET_FLUSH_MODE:
1026 		/* LINTED pointer cast */
1027 		*(rpcflushmode_t *)info = ct->ct_blocking_mode;
1028 		break;
1029 
1030 	case CLGET_IO_MODE:
1031 		/* LINTED pointer cast */
1032 		*(rpciomode_t *)info = ct->ct_io_mode;
1033 		break;
1034 
1035 	case CLGET_CURRENT_REC_SIZE:
1036 		/*
1037 		 * Returns the current amount of memory allocated
1038 		 * to pending requests
1039 		 */
1040 		/* LINTED pointer cast */
1041 		*(int *)info = ct->ct_bufferPendingSize;
1042 		break;
1043 
1044 	case CLSET_CONNMAXREC_SIZE:
1045 		/* Cannot resize the buffer if it is used. */
1046 		if (ct->ct_bufferPendingSize != 0) {
1047 			rpc_fd_unlock(vctbl, ct->ct_fd);
1048 			return (FALSE);
1049 		}
1050 		/*
1051 		 * If the new size is equal to the current size,
1052 		 * there is nothing to do.
1053 		 */
1054 		/* LINTED pointer cast */
1055 		if (ct->ct_bufferSize == *(uint_t *)info)
1056 			break;
1057 
1058 		/* LINTED pointer cast */
1059 		ct->ct_bufferSize = *(uint_t *)info;
1060 		if (ct->ct_buffer) {
1061 			free(ct->ct_buffer);
1062 			ct->ct_buffer = NULL;
1063 			ct->ct_bufferReadPtr = ct->ct_bufferWritePtr = NULL;
1064 		}
1065 		break;
1066 
1067 	case CLGET_CONNMAXREC_SIZE:
1068 		/*
1069 		 * Returns the size of buffer allocated
1070 		 * to pending requests
1071 		 */
1072 		/* LINTED pointer cast */
1073 		*(uint_t *)info = ct->ct_bufferSize;
1074 		break;
1075 
1076 	default:
1077 		rpc_fd_unlock(vctbl, ct->ct_fd);
1078 		return (FALSE);
1079 	}
1080 	rpc_fd_unlock(vctbl, ct->ct_fd);
1081 	return (TRUE);
1082 }
1083 
1084 static void
1085 clnt_vc_destroy(CLIENT *cl)
1086 {
1087 /* LINTED pointer alignment */
1088 	struct ct_data *ct = (struct ct_data *)cl->cl_private;
1089 	int ct_fd = ct->ct_fd;
1090 
1091 	(void) rpc_fd_lock(vctbl, ct_fd);
1092 
1093 	if (ct->ct_io_mode == RPC_CL_NONBLOCKING) {
1094 		(void) do_flush(ct, RPC_CL_BLOCKING_FLUSH);
1095 		(void) unregister_nb(ct);
1096 	}
1097 
1098 	if (ct->ct_closeit)
1099 		(void) t_close(ct_fd);
1100 	XDR_DESTROY(&(ct->ct_xdrs));
1101 	if (ct->ct_addr.buf)
1102 		free(ct->ct_addr.buf);
1103 	free(ct);
1104 	if (cl->cl_netid && cl->cl_netid[0])
1105 		free(cl->cl_netid);
1106 	if (cl->cl_tp && cl->cl_tp[0])
1107 		free(cl->cl_tp);
1108 	free(cl);
1109 	rpc_fd_unlock(vctbl, ct_fd);
1110 }
1111 
1112 /*
1113  * Interface between xdr serializer and vc connection.
1114  * Behaves like the system calls, read & write, but keeps some error state
1115  * around for the rpc level.
1116  */
1117 static int
1118 read_vc(void *ct_tmp, caddr_t buf, int len)
1119 {
1120 	static pthread_key_t pfdp_key;
1121 	struct pollfd *pfdp;
1122 	int npfd;		/* total number of pfdp allocated */
1123 	struct ct_data *ct = ct_tmp;
1124 	struct timeval starttime;
1125 	struct timeval curtime;
1126 	int poll_time;
1127 	int delta;
1128 
1129 	if (len == 0)
1130 		return (0);
1131 
1132 	/*
1133 	 * Allocate just one the first time.  thr_get_storage() may
1134 	 * return a larger buffer, left over from the last time we were
1135 	 * here, but that's OK.  realloc() will deal with it properly.
1136 	 */
1137 	npfd = 1;
1138 	pfdp = thr_get_storage(&pfdp_key, sizeof (struct pollfd), free);
1139 	if (pfdp == NULL) {
1140 		(void) syslog(LOG_ERR, clnt_vc_errstr,
1141 			clnt_read_vc_str, __no_mem_str);
1142 		rpc_callerr.re_status = RPC_SYSTEMERROR;
1143 		rpc_callerr.re_errno = errno;
1144 		rpc_callerr.re_terrno = 0;
1145 		return (-1);
1146 	}
1147 
1148 	/*
1149 	 *	N.B.:  slot 0 in the pollfd array is reserved for the file
1150 	 *	descriptor we're really interested in (as opposed to the
1151 	 *	callback descriptors).
1152 	 */
1153 	pfdp[0].fd = ct->ct_fd;
1154 	pfdp[0].events = MASKVAL;
1155 	pfdp[0].revents = 0;
1156 	poll_time = ct->ct_wait;
1157 	if (gettimeofday(&starttime, NULL) == -1) {
1158 		syslog(LOG_ERR, "Unable to get time of day: %m");
1159 		return (-1);
1160 	}
1161 
1162 	for (;;) {
1163 		extern void (*_svc_getreqset_proc)();
1164 		extern pollfd_t *svc_pollfd;
1165 		extern int svc_max_pollfd;
1166 		int fds;
1167 
1168 		/* VARIABLES PROTECTED BY svc_fd_lock: svc_pollfd */
1169 
1170 		if (_svc_getreqset_proc) {
1171 			sig_rw_rdlock(&svc_fd_lock);
1172 
1173 			/* reallocate pfdp to svc_max_pollfd +1 */
1174 			if (npfd != (svc_max_pollfd + 1)) {
1175 				struct pollfd *tmp_pfdp = realloc(pfdp,
1176 						sizeof (struct pollfd) *
1177 						(svc_max_pollfd + 1));
1178 				if (tmp_pfdp == NULL) {
1179 					sig_rw_unlock(&svc_fd_lock);
1180 					(void) syslog(LOG_ERR, clnt_vc_errstr,
1181 						clnt_read_vc_str, __no_mem_str);
1182 					rpc_callerr.re_status = RPC_SYSTEMERROR;
1183 					rpc_callerr.re_errno = errno;
1184 					rpc_callerr.re_terrno = 0;
1185 					return (-1);
1186 				}
1187 
1188 				pfdp = tmp_pfdp;
1189 				npfd = svc_max_pollfd + 1;
1190 				(void) pthread_setspecific(pfdp_key, pfdp);
1191 			}
1192 			if (npfd > 1)
1193 				(void) memcpy(&pfdp[1], svc_pollfd,
1194 				    sizeof (struct pollfd) * (npfd - 1));
1195 
1196 			sig_rw_unlock(&svc_fd_lock);
1197 		} else {
1198 			npfd = 1;	/* don't forget about pfdp[0] */
1199 		}
1200 
1201 		switch (fds = poll(pfdp, npfd, poll_time)) {
1202 		case 0:
1203 			rpc_callerr.re_status = RPC_TIMEDOUT;
1204 			return (-1);
1205 
1206 		case -1:
1207 			if (errno != EINTR)
1208 				continue;
1209 			else {
1210 				/*
1211 				 * interrupted by another signal,
1212 				 * update time_waited
1213 				 */
1214 
1215 				if (gettimeofday(&curtime, NULL) == -1) {
1216 					syslog(LOG_ERR,
1217 					    "Unable to get time of day:  %m");
1218 					errno = 0;
1219 					continue;
1220 				};
1221 				delta = (curtime.tv_sec -
1222 						starttime.tv_sec) * 1000 +
1223 					(curtime.tv_usec -
1224 						starttime.tv_usec) / 1000;
1225 				poll_time -= delta;
1226 				if (poll_time < 0) {
1227 					rpc_callerr.re_status =
1228 						RPC_TIMEDOUT;
1229 					errno = 0;
1230 					return (-1);
1231 				} else {
1232 					errno = 0; /* reset it */
1233 					continue;
1234 				}
1235 			}
1236 		}
1237 
1238 		if (pfdp[0].revents == 0) {
1239 			/* must be for server side of the house */
1240 			(*_svc_getreqset_proc)(&pfdp[1], fds);
1241 			continue;	/* do poll again */
1242 		}
1243 
1244 		if (pfdp[0].revents & POLLNVAL) {
1245 			rpc_callerr.re_status = RPC_CANTRECV;
1246 			/*
1247 			 *	Note:  we're faking errno here because we
1248 			 *	previously would have expected select() to
1249 			 *	return -1 with errno EBADF.  Poll(BA_OS)
1250 			 *	returns 0 and sets the POLLNVAL revents flag
1251 			 *	instead.
1252 			 */
1253 			rpc_callerr.re_errno = errno = EBADF;
1254 			return (-1);
1255 		}
1256 
1257 		if (pfdp[0].revents & (POLLERR | POLLHUP)) {
1258 			rpc_callerr.re_status = RPC_CANTRECV;
1259 			rpc_callerr.re_errno = errno = EPIPE;
1260 			return (-1);
1261 		}
1262 		break;
1263 	}
1264 
1265 	switch (len = t_rcvall(ct->ct_fd, buf, len)) {
1266 	case 0:
1267 		/* premature eof */
1268 		rpc_callerr.re_errno = ENOLINK;
1269 		rpc_callerr.re_terrno = 0;
1270 		rpc_callerr.re_status = RPC_CANTRECV;
1271 		len = -1;	/* it's really an error */
1272 		break;
1273 
1274 	case -1:
1275 		rpc_callerr.re_terrno = t_errno;
1276 		rpc_callerr.re_errno = 0;
1277 		rpc_callerr.re_status = RPC_CANTRECV;
1278 		break;
1279 	}
1280 	return (len);
1281 }
1282 
1283 static int
1284 write_vc(void *ct_tmp, caddr_t buf, int len)
1285 {
1286 	int i, cnt;
1287 	struct ct_data *ct = ct_tmp;
1288 	int flag;
1289 	int maxsz;
1290 
1291 	maxsz = ct->ct_tsdu;
1292 
1293 	/* Handle the non-blocking mode */
1294 	if (ct->ct_is_oneway && ct->ct_io_mode == RPC_CL_NONBLOCKING) {
1295 		/*
1296 		 * Test a special case here. If the length of the current
1297 		 * write is greater than the transport data unit, and the
1298 		 * mode is non blocking, we return RPC_CANTSEND.
1299 		 * XXX  this is not very clean.
1300 		 */
1301 		if (maxsz > 0 && len > maxsz) {
1302 			rpc_callerr.re_terrno = errno;
1303 			rpc_callerr.re_errno = 0;
1304 			rpc_callerr.re_status = RPC_CANTSEND;
1305 			return (-1);
1306 		}
1307 
1308 		len = nb_send(ct, buf, (unsigned)len);
1309 		if (len == -1) {
1310 			rpc_callerr.re_terrno = errno;
1311 			rpc_callerr.re_errno = 0;
1312 			rpc_callerr.re_status = RPC_CANTSEND;
1313 		} else if (len == -2) {
1314 			rpc_callerr.re_terrno = 0;
1315 			rpc_callerr.re_errno = 0;
1316 			rpc_callerr.re_status = RPC_CANTSTORE;
1317 		}
1318 		return (len);
1319 	}
1320 
1321 	if ((maxsz == 0) || (maxsz == -1)) {
1322 		/*
1323 		 * T_snd may return -1 for error on connection (connection
1324 		 * needs to be repaired/closed, and -2 for flow-control
1325 		 * handling error (no operation to do, just wait and call
1326 		 * T_Flush()).
1327 		 */
1328 		if ((len = t_snd(ct->ct_fd, buf, (unsigned)len, 0)) == -1) {
1329 			rpc_callerr.re_terrno = t_errno;
1330 			rpc_callerr.re_errno = 0;
1331 			rpc_callerr.re_status = RPC_CANTSEND;
1332 		}
1333 		return (len);
1334 	}
1335 
1336 	/*
1337 	 * This for those transports which have a max size for data.
1338 	 */
1339 	for (cnt = len, i = 0; cnt > 0; cnt -= i, buf += i) {
1340 		flag = cnt > maxsz ? T_MORE : 0;
1341 		if ((i = t_snd(ct->ct_fd, buf, (unsigned)MIN(cnt, maxsz),
1342 				flag)) == -1) {
1343 			rpc_callerr.re_terrno = t_errno;
1344 			rpc_callerr.re_errno = 0;
1345 			rpc_callerr.re_status = RPC_CANTSEND;
1346 			return (-1);
1347 		}
1348 	}
1349 	return (len);
1350 }
1351 
1352 /*
1353  * Receive the required bytes of data, even if it is fragmented.
1354  */
1355 static int
1356 t_rcvall(int fd, char *buf, int len)
1357 {
1358 	int moreflag;
1359 	int final = 0;
1360 	int res;
1361 
1362 	do {
1363 		moreflag = 0;
1364 		res = t_rcv(fd, buf, (unsigned)len, &moreflag);
1365 		if (res == -1) {
1366 			if (t_errno == TLOOK)
1367 				switch (t_look(fd)) {
1368 				case T_DISCONNECT:
1369 					(void) t_rcvdis(fd, NULL);
1370 					(void) t_snddis(fd, NULL);
1371 					return (-1);
1372 				case T_ORDREL:
1373 				/* Received orderly release indication */
1374 					(void) t_rcvrel(fd);
1375 				/* Send orderly release indicator */
1376 					(void) t_sndrel(fd);
1377 					return (-1);
1378 				default:
1379 					return (-1);
1380 				}
1381 		} else if (res == 0) {
1382 			return (0);
1383 		}
1384 		final += res;
1385 		buf += res;
1386 		len -= res;
1387 	} while ((len > 0) && (moreflag & T_MORE));
1388 	return (final);
1389 }
1390 
1391 static struct clnt_ops *
1392 clnt_vc_ops(void)
1393 {
1394 	static struct clnt_ops ops;
1395 	extern mutex_t	ops_lock;
1396 
1397 	/* VARIABLES PROTECTED BY ops_lock: ops */
1398 
1399 	sig_mutex_lock(&ops_lock);
1400 	if (ops.cl_call == NULL) {
1401 		ops.cl_call = clnt_vc_call;
1402 		ops.cl_send = clnt_vc_send;
1403 		ops.cl_abort = clnt_vc_abort;
1404 		ops.cl_geterr = clnt_vc_geterr;
1405 		ops.cl_freeres = clnt_vc_freeres;
1406 		ops.cl_destroy = clnt_vc_destroy;
1407 		ops.cl_control = clnt_vc_control;
1408 	}
1409 	sig_mutex_unlock(&ops_lock);
1410 	return (&ops);
1411 }
1412 
1413 /*
1414  * Make sure that the time is not garbage.   -1 value is disallowed.
1415  * Note this is different from time_not_ok in clnt_dg.c
1416  */
1417 static bool_t
1418 time_not_ok(struct timeval *t)
1419 {
1420 	return (t->tv_sec <= -1 || t->tv_sec > 100000000 ||
1421 		t->tv_usec <= -1 || t->tv_usec > 1000000);
1422 }
1423 
1424 
1425 /* Compute the # of bytes that remains until the end of the buffer */
1426 #define	REMAIN_BYTES(p) (ct->ct_bufferSize-(ct->ct_##p - ct->ct_buffer))
1427 
1428 static int
1429 addInBuffer(struct ct_data *ct, char *dataToAdd, unsigned int nBytes)
1430 {
1431 	if (NULL == ct->ct_buffer) {
1432 		/* Buffer not allocated yet. */
1433 		char *buffer;
1434 
1435 		buffer = malloc(ct->ct_bufferSize);
1436 		if (NULL == buffer) {
1437 			errno = ENOMEM;
1438 			return (-1);
1439 		}
1440 		(void) memcpy(buffer, dataToAdd, nBytes);
1441 
1442 		ct->ct_buffer = buffer;
1443 		ct->ct_bufferReadPtr = buffer;
1444 		ct->ct_bufferWritePtr = buffer + nBytes;
1445 		ct->ct_bufferPendingSize = nBytes;
1446 	} else {
1447 		/*
1448 		 * For an already allocated buffer, two mem copies
1449 		 * might be needed, depending on the current
1450 		 * writing position.
1451 		 */
1452 
1453 		/* Compute the length of the first copy. */
1454 		int len = MIN(nBytes, REMAIN_BYTES(bufferWritePtr));
1455 
1456 		ct->ct_bufferPendingSize += nBytes;
1457 
1458 		(void) memcpy(ct->ct_bufferWritePtr, dataToAdd, len);
1459 		ct->ct_bufferWritePtr += len;
1460 		nBytes -= len;
1461 		if (0 == nBytes) {
1462 			/* One memcopy needed. */
1463 
1464 			/*
1465 			 * If the write pointer is at the end of the buffer,
1466 			 * wrap it now.
1467 			 */
1468 			if (ct->ct_bufferWritePtr ==
1469 			    (ct->ct_buffer + ct->ct_bufferSize)) {
1470 				ct->ct_bufferWritePtr = ct->ct_buffer;
1471 			}
1472 		} else {
1473 			/* Two memcopy needed. */
1474 			dataToAdd += len;
1475 
1476 			/*
1477 			 * Copy the remaining data to the beginning of the
1478 			 * buffer
1479 			 */
1480 			(void) memcpy(ct->ct_buffer, dataToAdd, nBytes);
1481 			ct->ct_bufferWritePtr = ct->ct_buffer + nBytes;
1482 		}
1483 	}
1484 	return (0);
1485 }
1486 
1487 static void
1488 consumeFromBuffer(struct ct_data *ct, unsigned int nBytes)
1489 {
1490 	ct->ct_bufferPendingSize -= nBytes;
1491 	if (ct->ct_bufferPendingSize == 0) {
1492 		/*
1493 		 * If the buffer contains no data, we set the two pointers at
1494 		 * the beginning of the buffer (to miminize buffer wraps).
1495 		 */
1496 		ct->ct_bufferReadPtr = ct->ct_bufferWritePtr = ct->ct_buffer;
1497 	} else {
1498 		ct->ct_bufferReadPtr += nBytes;
1499 		if (ct->ct_bufferReadPtr >
1500 		    ct->ct_buffer + ct->ct_bufferSize) {
1501 			ct->ct_bufferReadPtr -= ct->ct_bufferSize;
1502 		}
1503 	}
1504 }
1505 
1506 static int
1507 iovFromBuffer(struct ct_data *ct, struct iovec *iov)
1508 {
1509 	int l;
1510 
1511 	if (ct->ct_bufferPendingSize == 0)
1512 		return (0);
1513 
1514 	l = REMAIN_BYTES(bufferReadPtr);
1515 	if (l < ct->ct_bufferPendingSize) {
1516 		/* Buffer in two fragments. */
1517 		iov[0].iov_base = ct->ct_bufferReadPtr;
1518 		iov[0].iov_len  = l;
1519 
1520 		iov[1].iov_base = ct->ct_buffer;
1521 		iov[1].iov_len  = ct->ct_bufferPendingSize - l;
1522 		return (2);
1523 	} else {
1524 		/* Buffer in one fragment. */
1525 		iov[0].iov_base = ct->ct_bufferReadPtr;
1526 		iov[0].iov_len  = ct->ct_bufferPendingSize;
1527 		return (1);
1528 	}
1529 }
1530 
1531 static bool_t
1532 set_flush_mode(struct ct_data *ct, int mode)
1533 {
1534 	switch (mode) {
1535 	case RPC_CL_BLOCKING_FLUSH:
1536 		/* flush as most as possible without blocking */
1537 	case RPC_CL_BESTEFFORT_FLUSH:
1538 		/* flush the buffer completely (possibly blocking) */
1539 	case RPC_CL_DEFAULT_FLUSH:
1540 		/* flush according to the currently defined policy */
1541 		ct->ct_blocking_mode = mode;
1542 		return (TRUE);
1543 	default:
1544 		return (FALSE);
1545 	}
1546 }
1547 
1548 static bool_t
1549 set_io_mode(struct ct_data *ct, int ioMode)
1550 {
1551 	switch (ioMode) {
1552 	case RPC_CL_BLOCKING:
1553 		if (ct->ct_io_mode == RPC_CL_NONBLOCKING) {
1554 			if (NULL != ct->ct_buffer) {
1555 				/*
1556 				 * If a buffer was allocated for this
1557 				 * connection, flush it now, and free it.
1558 				 */
1559 				(void) do_flush(ct, RPC_CL_BLOCKING_FLUSH);
1560 				free(ct->ct_buffer);
1561 				ct->ct_buffer = NULL;
1562 			}
1563 			(void) unregister_nb(ct);
1564 			ct->ct_io_mode = ioMode;
1565 		}
1566 		break;
1567 	case RPC_CL_NONBLOCKING:
1568 		if (ct->ct_io_mode == RPC_CL_BLOCKING) {
1569 			if (-1 == register_nb(ct)) {
1570 				return (FALSE);
1571 			}
1572 			ct->ct_io_mode = ioMode;
1573 		}
1574 		break;
1575 	default:
1576 		return (FALSE);
1577 	}
1578 	return (TRUE);
1579 }
1580 
1581 static int
1582 do_flush(struct ct_data *ct, uint_t flush_mode)
1583 {
1584 	int result;
1585 	if (ct->ct_bufferPendingSize == 0) {
1586 		return (0);
1587 	}
1588 
1589 	switch (flush_mode) {
1590 	case RPC_CL_BLOCKING_FLUSH:
1591 		if (!set_blocking_connection(ct, TRUE)) {
1592 			return (-1);
1593 		}
1594 		while (ct->ct_bufferPendingSize > 0) {
1595 			if (REMAIN_BYTES(bufferReadPtr) <
1596 			    ct->ct_bufferPendingSize) {
1597 				struct iovec iov[2];
1598 				(void) iovFromBuffer(ct, iov);
1599 				result = writev(ct->ct_fd, iov, 2);
1600 			} else {
1601 				result = t_snd(ct->ct_fd, ct->ct_bufferReadPtr,
1602 				    ct->ct_bufferPendingSize, 0);
1603 			}
1604 			if (result < 0) {
1605 				return (-1);
1606 			}
1607 			consumeFromBuffer(ct, result);
1608 		}
1609 
1610 		break;
1611 
1612 	case RPC_CL_BESTEFFORT_FLUSH:
1613 		(void) set_blocking_connection(ct, FALSE);
1614 		if (REMAIN_BYTES(bufferReadPtr) < ct->ct_bufferPendingSize) {
1615 			struct iovec iov[2];
1616 			(void) iovFromBuffer(ct, iov);
1617 			result = writev(ct->ct_fd, iov, 2);
1618 		} else {
1619 			result = t_snd(ct->ct_fd, ct->ct_bufferReadPtr,
1620 			    ct->ct_bufferPendingSize, 0);
1621 		}
1622 		if (result < 0) {
1623 			if (errno != EWOULDBLOCK) {
1624 				perror("flush");
1625 				return (-1);
1626 			}
1627 			return (0);
1628 		}
1629 		if (result > 0)
1630 			consumeFromBuffer(ct, result);
1631 		break;
1632 	}
1633 	return (0);
1634 }
1635 
1636 /*
1637  * Non blocking send.
1638  */
1639 
1640 static int
1641 nb_send(struct ct_data *ct, void *buff, unsigned int nBytes)
1642 {
1643 	int result;
1644 
1645 	if (!(ntohl(*(uint32_t *)buff) & 2^31)) {
1646 		return (-1);
1647 	}
1648 
1649 	/*
1650 	 * Check to see if the current message can be stored fully in the
1651 	 * buffer. We have to check this now because it may be impossible
1652 	 * to send any data, so the message must be stored in the buffer.
1653 	 */
1654 	if (nBytes > (ct->ct_bufferSize - ct->ct_bufferPendingSize)) {
1655 		/* Try to flush  (to free some space). */
1656 		(void) do_flush(ct, RPC_CL_BESTEFFORT_FLUSH);
1657 
1658 		/* Can we store the message now ? */
1659 		if (nBytes > (ct->ct_bufferSize - ct->ct_bufferPendingSize))
1660 			return (-2);
1661 	}
1662 
1663 	(void) set_blocking_connection(ct, FALSE);
1664 
1665 	/*
1666 	 * If there is no data pending, we can simply try
1667 	 * to send our data.
1668 	 */
1669 	if (ct->ct_bufferPendingSize == 0) {
1670 		result = t_snd(ct->ct_fd, buff, nBytes, 0);
1671 		if (result == -1) {
1672 			if (errno == EWOULDBLOCK) {
1673 				result = 0;
1674 			} else {
1675 				perror("send");
1676 				return (-1);
1677 			}
1678 		}
1679 		/*
1680 		 * If we have not sent all data, we must store them
1681 		 * in the buffer.
1682 		 */
1683 		if (result != nBytes) {
1684 			if (addInBuffer(ct, (char *)buff + result,
1685 			    nBytes - result) == -1) {
1686 				return (-1);
1687 			}
1688 		}
1689 	} else {
1690 		/*
1691 		 * Some data pending in the buffer.  We try to send
1692 		 * both buffer data and current message in one shot.
1693 		 */
1694 		struct iovec iov[3];
1695 		int i = iovFromBuffer(ct, &iov[0]);
1696 
1697 		iov[i].iov_base = buff;
1698 		iov[i].iov_len  = nBytes;
1699 
1700 		result = writev(ct->ct_fd, iov, i+1);
1701 		if (result == -1) {
1702 			if (errno == EWOULDBLOCK) {
1703 				/* No bytes sent */
1704 				result = 0;
1705 			} else {
1706 				return (-1);
1707 			}
1708 		}
1709 
1710 		/*
1711 		 * Add the bytes from the message
1712 		 * that we have not sent.
1713 		 */
1714 		if (result <= ct->ct_bufferPendingSize) {
1715 			/* No bytes from the message sent */
1716 			consumeFromBuffer(ct, result);
1717 			if (addInBuffer(ct, buff, nBytes) == -1) {
1718 				return (-1);
1719 			}
1720 		} else {
1721 			/*
1722 			 * Some bytes of the message are sent.
1723 			 * Compute the length of the message that has
1724 			 * been sent.
1725 			 */
1726 			int len = result - ct->ct_bufferPendingSize;
1727 
1728 			/* So, empty the buffer. */
1729 			ct->ct_bufferReadPtr = ct->ct_buffer;
1730 			ct->ct_bufferWritePtr = ct->ct_buffer;
1731 			ct->ct_bufferPendingSize = 0;
1732 
1733 			/* And add the remaining part of the message. */
1734 			if (len != nBytes) {
1735 				if (addInBuffer(ct, (char *)buff + len,
1736 					nBytes-len) == -1) {
1737 					return (-1);
1738 				}
1739 			}
1740 		}
1741 	}
1742 	return (nBytes);
1743 }
1744 
1745 static void
1746 flush_registered_clients(void)
1747 {
1748 	struct nb_reg_node *node;
1749 
1750 	if (LIST_ISEMPTY(nb_first)) {
1751 		return;
1752 	}
1753 
1754 	LIST_FOR_EACH(nb_first, node) {
1755 		(void) do_flush(node->ct, RPC_CL_BLOCKING_FLUSH);
1756 	}
1757 }
1758 
1759 static int
1760 allocate_chunk(void)
1761 {
1762 #define	CHUNK_SIZE 16
1763 	struct nb_reg_node *chk =
1764 	    malloc(sizeof (struct nb_reg_node) * CHUNK_SIZE);
1765 	struct nb_reg_node *n;
1766 	int i;
1767 
1768 	if (NULL == chk) {
1769 		return (-1);
1770 	}
1771 
1772 	n = chk;
1773 	for (i = 0; i < CHUNK_SIZE-1; ++i) {
1774 		n[i].next = &(n[i+1]);
1775 	}
1776 	n[CHUNK_SIZE-1].next = (struct nb_reg_node *)&nb_free;
1777 	nb_free = chk;
1778 	return (0);
1779 }
1780 
1781 static int
1782 register_nb(struct ct_data *ct)
1783 {
1784 	struct nb_reg_node *node;
1785 
1786 	(void) mutex_lock(&nb_list_mutex);
1787 
1788 	if (LIST_ISEMPTY(nb_free) && (allocate_chunk() == -1)) {
1789 		(void) mutex_unlock(&nb_list_mutex);
1790 		errno = ENOMEM;
1791 		return (-1);
1792 	}
1793 
1794 	if (!exit_handler_set) {
1795 		(void) atexit(flush_registered_clients);
1796 		exit_handler_set = TRUE;
1797 	}
1798 	/* Get the first free node */
1799 	LIST_EXTRACT(nb_free, node);
1800 
1801 	node->ct = ct;
1802 
1803 	LIST_ADD(nb_first, node);
1804 	(void) mutex_unlock(&nb_list_mutex);
1805 
1806 	return (0);
1807 }
1808 
1809 static int
1810 unregister_nb(struct ct_data *ct)
1811 {
1812 	struct nb_reg_node *node;
1813 
1814 	(void) mutex_lock(&nb_list_mutex);
1815 	assert(!LIST_ISEMPTY(nb_first));
1816 
1817 	node = nb_first;
1818 	LIST_FOR_EACH(nb_first, node) {
1819 		if (node->next->ct == ct) {
1820 			/* Get the node to unregister. */
1821 			struct nb_reg_node *n = node->next;
1822 			node->next = n->next;
1823 
1824 			n->ct = NULL;
1825 			LIST_ADD(nb_free, n);
1826 			break;
1827 		}
1828 	}
1829 	(void) mutex_unlock(&nb_list_mutex);
1830 	return (0);
1831 }
1832