xref: /freebsd/sys/rpc/clnt_dg.c (revision 13de33a5dc2304b13d595d75d48c51793958474f)
1 /*	$NetBSD: clnt_dg.c,v 1.4 2000/07/14 08:40:41 fvdl Exp $	*/
2 
3 /*-
4  * Copyright (c) 2009, Sun Microsystems, Inc.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions are met:
9  * - Redistributions of source code must retain the above copyright notice,
10  *   this list of conditions and the following disclaimer.
11  * - Redistributions in binary form must reproduce the above copyright notice,
12  *   this list of conditions and the following disclaimer in the documentation
13  *   and/or other materials provided with the distribution.
14  * - Neither the name of Sun Microsystems, Inc. nor the names of its
15  *   contributors may be used to endorse or promote products derived
16  *   from this software without specific prior written permission.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
22  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
23  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
25  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
26  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
27  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
28  * POSSIBILITY OF SUCH DAMAGE.
29  */
30 /*
31  * Copyright (c) 1986-1991 by Sun Microsystems Inc.
32  */
33 
34 #if defined(LIBC_SCCS) && !defined(lint)
35 #ident	"@(#)clnt_dg.c	1.23	94/04/22 SMI"
36 static char sccsid[] = "@(#)clnt_dg.c 1.19 89/03/16 Copyr 1988 Sun Micro";
37 #endif
38 #include <sys/cdefs.h>
39 __FBSDID("$FreeBSD$");
40 
41 /*
42  * Implements a connectionless client side RPC.
43  */
44 
45 #include <sys/param.h>
46 #include <sys/systm.h>
47 #include <sys/kernel.h>
48 #include <sys/lock.h>
49 #include <sys/malloc.h>
50 #include <sys/mbuf.h>
51 #include <sys/mutex.h>
52 #include <sys/pcpu.h>
53 #include <sys/proc.h>
54 #include <sys/socket.h>
55 #include <sys/socketvar.h>
56 #include <sys/time.h>
57 #include <sys/uio.h>
58 
59 #include <net/vnet.h>
60 
61 #include <rpc/rpc.h>
62 #include <rpc/rpc_com.h>
63 
64 
65 #ifdef _FREEFALL_CONFIG
66 /*
67  * Disable RPC exponential back-off for FreeBSD.org systems.
68  */
69 #define	RPC_MAX_BACKOFF		1 /* second */
70 #else
71 #define	RPC_MAX_BACKOFF		30 /* seconds */
72 #endif
73 
74 static bool_t time_not_ok(struct timeval *);
75 static enum clnt_stat clnt_dg_call(CLIENT *, struct rpc_callextra *,
76     rpcproc_t, struct mbuf *, struct mbuf **, struct timeval);
77 static void clnt_dg_geterr(CLIENT *, struct rpc_err *);
78 static bool_t clnt_dg_freeres(CLIENT *, xdrproc_t, void *);
79 static void clnt_dg_abort(CLIENT *);
80 static bool_t clnt_dg_control(CLIENT *, u_int, void *);
81 static void clnt_dg_close(CLIENT *);
82 static void clnt_dg_destroy(CLIENT *);
83 static int clnt_dg_soupcall(struct socket *so, void *arg, int waitflag);
84 
85 static struct clnt_ops clnt_dg_ops = {
86 	.cl_call =	clnt_dg_call,
87 	.cl_abort =	clnt_dg_abort,
88 	.cl_geterr =	clnt_dg_geterr,
89 	.cl_freeres =	clnt_dg_freeres,
90 	.cl_close =	clnt_dg_close,
91 	.cl_destroy =	clnt_dg_destroy,
92 	.cl_control =	clnt_dg_control
93 };
94 
95 static const char mem_err_clnt_dg[] = "clnt_dg_create: out of memory";
96 
97 /*
98  * A pending RPC request which awaits a reply. Requests which have
99  * received their reply will have cr_xid set to zero and cr_mrep to
100  * the mbuf chain of the reply.
101  */
102 struct cu_request {
103 	TAILQ_ENTRY(cu_request) cr_link;
104 	CLIENT			*cr_client;	/* owner */
105 	uint32_t		cr_xid;		/* XID of request */
106 	struct mbuf		*cr_mrep;	/* reply received by upcall */
107 	int			cr_error;	/* any error from upcall */
108 	char			cr_verf[MAX_AUTH_BYTES]; /* reply verf */
109 };
110 
111 TAILQ_HEAD(cu_request_list, cu_request);
112 
113 #define MCALL_MSG_SIZE 24
114 
115 /*
116  * This structure is pointed to by the socket buffer's sb_upcallarg
117  * member. It is separate from the client private data to facilitate
118  * multiple clients sharing the same socket. The cs_lock mutex is used
119  * to protect all fields of this structure, the socket's receive
120  * buffer SOCKBUF_LOCK is used to ensure that exactly one of these
121  * structures is installed on the socket.
122  */
123 struct cu_socket {
124 	struct mtx		cs_lock;
125 	int			cs_refs;	/* Count of clients */
126 	struct cu_request_list	cs_pending;	/* Requests awaiting replies */
127 	int			cs_upcallrefs;	/* Refcnt of upcalls in prog.*/
128 };
129 
130 static void clnt_dg_upcallsdone(struct socket *, struct cu_socket *);
131 
132 /*
133  * Private data kept per client handle
134  */
135 struct cu_data {
136 	int			cu_threads;	/* # threads in clnt_vc_call */
137 	bool_t			cu_closing;	/* TRUE if we are closing */
138 	bool_t			cu_closed;	/* TRUE if we are closed */
139 	struct socket		*cu_socket;	/* connection socket */
140 	bool_t			cu_closeit;	/* opened by library */
141 	struct sockaddr_storage	cu_raddr;	/* remote address */
142 	int			cu_rlen;
143 	struct timeval		cu_wait;	/* retransmit interval */
144 	struct timeval		cu_total;	/* total time for the call */
145 	struct rpc_err		cu_error;
146 	uint32_t		cu_xid;
147 	char			cu_mcallc[MCALL_MSG_SIZE]; /* marshalled callmsg */
148 	size_t			cu_mcalllen;
149 	size_t			cu_sendsz;	/* send size */
150 	size_t			cu_recvsz;	/* recv size */
151 	int			cu_async;
152 	int			cu_connect;	/* Use connect(). */
153 	int			cu_connected;	/* Have done connect(). */
154 	const char		*cu_waitchan;
155 	int			cu_waitflag;
156 	int			cu_cwnd;	/* congestion window */
157 	int			cu_sent;	/* number of in-flight RPCs */
158 	bool_t			cu_cwnd_wait;
159 };
160 
161 #define CWNDSCALE	256
162 #define MAXCWND		(32 * CWNDSCALE)
163 
164 /*
165  * Connection less client creation returns with client handle parameters.
166  * Default options are set, which the user can change using clnt_control().
167  * fd should be open and bound.
168  * NB: The rpch->cl_auth is initialized to null authentication.
169  * 	Caller may wish to set this something more useful.
170  *
171  * sendsz and recvsz are the maximum allowable packet sizes that can be
172  * sent and received. Normally they are the same, but they can be
173  * changed to improve the program efficiency and buffer allocation.
174  * If they are 0, use the transport default.
175  *
176  * If svcaddr is NULL, returns NULL.
177  */
178 CLIENT *
179 clnt_dg_create(
180 	struct socket *so,
181 	struct sockaddr *svcaddr,	/* servers address */
182 	rpcprog_t program,		/* program number */
183 	rpcvers_t version,		/* version number */
184 	size_t sendsz,			/* buffer recv size */
185 	size_t recvsz)			/* buffer send size */
186 {
187 	CLIENT *cl = NULL;		/* client handle */
188 	struct cu_data *cu = NULL;	/* private data */
189 	struct cu_socket *cs = NULL;
190 	struct sockbuf *sb;
191 	struct timeval now;
192 	struct rpc_msg call_msg;
193 	struct __rpc_sockinfo si;
194 	XDR xdrs;
195 	int error;
196 
197 	if (svcaddr == NULL) {
198 		rpc_createerr.cf_stat = RPC_UNKNOWNADDR;
199 		return (NULL);
200 	}
201 
202 	if (!__rpc_socket2sockinfo(so, &si)) {
203 		rpc_createerr.cf_stat = RPC_TLIERROR;
204 		rpc_createerr.cf_error.re_errno = 0;
205 		return (NULL);
206 	}
207 
208 	/*
209 	 * Find the receive and the send size
210 	 */
211 	sendsz = __rpc_get_t_size(si.si_af, si.si_proto, (int)sendsz);
212 	recvsz = __rpc_get_t_size(si.si_af, si.si_proto, (int)recvsz);
213 	if ((sendsz == 0) || (recvsz == 0)) {
214 		rpc_createerr.cf_stat = RPC_TLIERROR; /* XXX */
215 		rpc_createerr.cf_error.re_errno = 0;
216 		return (NULL);
217 	}
218 
219 	cl = mem_alloc(sizeof (CLIENT));
220 
221 	/*
222 	 * Should be multiple of 4 for XDR.
223 	 */
224 	sendsz = ((sendsz + 3) / 4) * 4;
225 	recvsz = ((recvsz + 3) / 4) * 4;
226 	cu = mem_alloc(sizeof (*cu));
227 	cu->cu_threads = 0;
228 	cu->cu_closing = FALSE;
229 	cu->cu_closed = FALSE;
230 	(void) memcpy(&cu->cu_raddr, svcaddr, (size_t)svcaddr->sa_len);
231 	cu->cu_rlen = svcaddr->sa_len;
232 	/* Other values can also be set through clnt_control() */
233 	cu->cu_wait.tv_sec = 3;	/* heuristically chosen */
234 	cu->cu_wait.tv_usec = 0;
235 	cu->cu_total.tv_sec = -1;
236 	cu->cu_total.tv_usec = -1;
237 	cu->cu_sendsz = sendsz;
238 	cu->cu_recvsz = recvsz;
239 	cu->cu_async = FALSE;
240 	cu->cu_connect = FALSE;
241 	cu->cu_connected = FALSE;
242 	cu->cu_waitchan = "rpcrecv";
243 	cu->cu_waitflag = 0;
244 	cu->cu_cwnd = MAXCWND / 2;
245 	cu->cu_sent = 0;
246 	cu->cu_cwnd_wait = FALSE;
247 	(void) getmicrotime(&now);
248 	cu->cu_xid = __RPC_GETXID(&now);
249 	call_msg.rm_xid = cu->cu_xid;
250 	call_msg.rm_call.cb_prog = program;
251 	call_msg.rm_call.cb_vers = version;
252 	xdrmem_create(&xdrs, cu->cu_mcallc, MCALL_MSG_SIZE, XDR_ENCODE);
253 	if (! xdr_callhdr(&xdrs, &call_msg)) {
254 		rpc_createerr.cf_stat = RPC_CANTENCODEARGS;  /* XXX */
255 		rpc_createerr.cf_error.re_errno = 0;
256 		goto err2;
257 	}
258 	cu->cu_mcalllen = XDR_GETPOS(&xdrs);
259 
260 	/*
261 	 * By default, closeit is always FALSE. It is users responsibility
262 	 * to do a close on it, else the user may use clnt_control
263 	 * to let clnt_destroy do it for him/her.
264 	 */
265 	cu->cu_closeit = FALSE;
266 	cu->cu_socket = so;
267 	error = soreserve(so, (u_long)sendsz, (u_long)recvsz);
268 	if (error != 0) {
269 		rpc_createerr.cf_stat = RPC_FAILED;
270 		rpc_createerr.cf_error.re_errno = error;
271 		goto err2;
272 	}
273 
274 	sb = &so->so_rcv;
275 	SOCKBUF_LOCK(&so->so_rcv);
276 recheck_socket:
277 	if (sb->sb_upcall) {
278 		if (sb->sb_upcall != clnt_dg_soupcall) {
279 			SOCKBUF_UNLOCK(&so->so_rcv);
280 			printf("clnt_dg_create(): socket already has an incompatible upcall\n");
281 			goto err2;
282 		}
283 		cs = (struct cu_socket *) sb->sb_upcallarg;
284 		mtx_lock(&cs->cs_lock);
285 		cs->cs_refs++;
286 		mtx_unlock(&cs->cs_lock);
287 	} else {
288 		/*
289 		 * We are the first on this socket - allocate the
290 		 * structure and install it in the socket.
291 		 */
292 		SOCKBUF_UNLOCK(&so->so_rcv);
293 		cs = mem_alloc(sizeof(*cs));
294 		SOCKBUF_LOCK(&so->so_rcv);
295 		if (sb->sb_upcall) {
296 			/*
297 			 * We have lost a race with some other client.
298 			 */
299 			mem_free(cs, sizeof(*cs));
300 			goto recheck_socket;
301 		}
302 		mtx_init(&cs->cs_lock, "cs->cs_lock", NULL, MTX_DEF);
303 		cs->cs_refs = 1;
304 		cs->cs_upcallrefs = 0;
305 		TAILQ_INIT(&cs->cs_pending);
306 		soupcall_set(so, SO_RCV, clnt_dg_soupcall, cs);
307 	}
308 	SOCKBUF_UNLOCK(&so->so_rcv);
309 
310 	cl->cl_refs = 1;
311 	cl->cl_ops = &clnt_dg_ops;
312 	cl->cl_private = (caddr_t)(void *)cu;
313 	cl->cl_auth = authnone_create();
314 	cl->cl_tp = NULL;
315 	cl->cl_netid = NULL;
316 	return (cl);
317 err2:
318 	if (cl) {
319 		mem_free(cl, sizeof (CLIENT));
320 		if (cu)
321 			mem_free(cu, sizeof (*cu));
322 	}
323 	return (NULL);
324 }
325 
326 static enum clnt_stat
327 clnt_dg_call(
328 	CLIENT		*cl,		/* client handle */
329 	struct rpc_callextra *ext,	/* call metadata */
330 	rpcproc_t	proc,		/* procedure number */
331 	struct mbuf	*args,		/* pointer to args */
332 	struct mbuf	**resultsp,	/* pointer to results */
333 	struct timeval	utimeout)	/* seconds to wait before giving up */
334 {
335 	struct cu_data *cu = (struct cu_data *)cl->cl_private;
336 	struct cu_socket *cs;
337 	struct rpc_timers *rt;
338 	AUTH *auth;
339 	struct rpc_err *errp;
340 	enum clnt_stat stat;
341 	XDR xdrs;
342 	struct rpc_msg reply_msg;
343 	bool_t ok;
344 	int retrans;			/* number of re-transmits so far */
345 	int nrefreshes = 2;		/* number of times to refresh cred */
346 	struct timeval *tvp;
347 	int timeout;
348 	int retransmit_time;
349 	int next_sendtime, starttime, rtt, time_waited, tv = 0;
350 	struct sockaddr *sa;
351 	socklen_t salen;
352 	uint32_t xid = 0;
353 	struct mbuf *mreq = NULL, *results;
354 	struct cu_request *cr;
355 	int error;
356 
357 	cs = cu->cu_socket->so_rcv.sb_upcallarg;
358 	cr = malloc(sizeof(struct cu_request), M_RPC, M_WAITOK);
359 
360 	mtx_lock(&cs->cs_lock);
361 
362 	if (cu->cu_closing || cu->cu_closed) {
363 		mtx_unlock(&cs->cs_lock);
364 		free(cr, M_RPC);
365 		return (RPC_CANTSEND);
366 	}
367 	cu->cu_threads++;
368 
369 	if (ext) {
370 		auth = ext->rc_auth;
371 		errp = &ext->rc_err;
372 	} else {
373 		auth = cl->cl_auth;
374 		errp = &cu->cu_error;
375 	}
376 
377 	cr->cr_client = cl;
378 	cr->cr_mrep = NULL;
379 	cr->cr_error = 0;
380 
381 	if (cu->cu_total.tv_usec == -1) {
382 		tvp = &utimeout; /* use supplied timeout */
383 	} else {
384 		tvp = &cu->cu_total; /* use default timeout */
385 	}
386 	if (tvp->tv_sec || tvp->tv_usec)
387 		timeout = tvtohz(tvp);
388 	else
389 		timeout = 0;
390 
391 	if (cu->cu_connect && !cu->cu_connected) {
392 		mtx_unlock(&cs->cs_lock);
393 		error = soconnect(cu->cu_socket,
394 		    (struct sockaddr *)&cu->cu_raddr, curthread);
395 		mtx_lock(&cs->cs_lock);
396 		if (error) {
397 			errp->re_errno = error;
398 			errp->re_status = stat = RPC_CANTSEND;
399 			goto out;
400 		}
401 		cu->cu_connected = 1;
402 	}
403 	if (cu->cu_connected) {
404 		sa = NULL;
405 		salen = 0;
406 	} else {
407 		sa = (struct sockaddr *)&cu->cu_raddr;
408 		salen = cu->cu_rlen;
409 	}
410 	time_waited = 0;
411 	retrans = 0;
412 	if (ext && ext->rc_timers) {
413 		rt = ext->rc_timers;
414 		if (!rt->rt_rtxcur)
415 			rt->rt_rtxcur = tvtohz(&cu->cu_wait);
416 		retransmit_time = next_sendtime = rt->rt_rtxcur;
417 	} else {
418 		rt = NULL;
419 		retransmit_time = next_sendtime = tvtohz(&cu->cu_wait);
420 	}
421 
422 	starttime = ticks;
423 
424 call_again:
425 	mtx_assert(&cs->cs_lock, MA_OWNED);
426 
427 	cu->cu_xid++;
428 	xid = cu->cu_xid;
429 
430 send_again:
431 	mtx_unlock(&cs->cs_lock);
432 
433 	mreq = m_gethdr(M_WAITOK, MT_DATA);
434 	KASSERT(cu->cu_mcalllen <= MHLEN, ("RPC header too big"));
435 	bcopy(cu->cu_mcallc, mreq->m_data, cu->cu_mcalllen);
436 	mreq->m_len = cu->cu_mcalllen;
437 
438 	/*
439 	 * The XID is the first thing in the request.
440 	 */
441 	*mtod(mreq, uint32_t *) = htonl(xid);
442 
443 	xdrmbuf_create(&xdrs, mreq, XDR_ENCODE);
444 
445 	if (cu->cu_async == TRUE && args == NULL)
446 		goto get_reply;
447 
448 	if ((! XDR_PUTINT32(&xdrs, &proc)) ||
449 	    (! AUTH_MARSHALL(auth, xid, &xdrs,
450 		m_copym(args, 0, M_COPYALL, M_WAITOK)))) {
451 		errp->re_status = stat = RPC_CANTENCODEARGS;
452 		mtx_lock(&cs->cs_lock);
453 		goto out;
454 	}
455 	mreq->m_pkthdr.len = m_length(mreq, NULL);
456 
457 	cr->cr_xid = xid;
458 	mtx_lock(&cs->cs_lock);
459 
460 	/*
461 	 * Try to get a place in the congestion window.
462 	 */
463 	while (cu->cu_sent >= cu->cu_cwnd) {
464 		cu->cu_cwnd_wait = TRUE;
465 		error = msleep(&cu->cu_cwnd_wait, &cs->cs_lock,
466 		    cu->cu_waitflag, "rpccwnd", 0);
467 		if (error) {
468 			errp->re_errno = error;
469 			if (error == EINTR || error == ERESTART)
470 				errp->re_status = stat = RPC_INTR;
471 			else
472 				errp->re_status = stat = RPC_CANTSEND;
473 			goto out;
474 		}
475 	}
476 	cu->cu_sent += CWNDSCALE;
477 
478 	TAILQ_INSERT_TAIL(&cs->cs_pending, cr, cr_link);
479 	mtx_unlock(&cs->cs_lock);
480 
481 	/*
482 	 * sosend consumes mreq.
483 	 */
484 	error = sosend(cu->cu_socket, sa, NULL, mreq, NULL, 0, curthread);
485 	mreq = NULL;
486 
487 	/*
488 	 * sub-optimal code appears here because we have
489 	 * some clock time to spare while the packets are in flight.
490 	 * (We assume that this is actually only executed once.)
491 	 */
492 	reply_msg.acpted_rply.ar_verf.oa_flavor = AUTH_NULL;
493 	reply_msg.acpted_rply.ar_verf.oa_base = cr->cr_verf;
494 	reply_msg.acpted_rply.ar_verf.oa_length = 0;
495 	reply_msg.acpted_rply.ar_results.where = NULL;
496 	reply_msg.acpted_rply.ar_results.proc = (xdrproc_t)xdr_void;
497 
498 	mtx_lock(&cs->cs_lock);
499 	if (error) {
500 		TAILQ_REMOVE(&cs->cs_pending, cr, cr_link);
501 		errp->re_errno = error;
502 		errp->re_status = stat = RPC_CANTSEND;
503 		cu->cu_sent -= CWNDSCALE;
504 		if (cu->cu_cwnd_wait) {
505 			cu->cu_cwnd_wait = FALSE;
506 			wakeup(&cu->cu_cwnd_wait);
507 		}
508 		goto out;
509 	}
510 
511 	/*
512 	 * Check to see if we got an upcall while waiting for the
513 	 * lock.
514 	 */
515 	if (cr->cr_error) {
516 		TAILQ_REMOVE(&cs->cs_pending, cr, cr_link);
517 		errp->re_errno = cr->cr_error;
518 		errp->re_status = stat = RPC_CANTRECV;
519 		cu->cu_sent -= CWNDSCALE;
520 		if (cu->cu_cwnd_wait) {
521 			cu->cu_cwnd_wait = FALSE;
522 			wakeup(&cu->cu_cwnd_wait);
523 		}
524 		goto out;
525 	}
526 	if (cr->cr_mrep) {
527 		TAILQ_REMOVE(&cs->cs_pending, cr, cr_link);
528 		cu->cu_sent -= CWNDSCALE;
529 		if (cu->cu_cwnd_wait) {
530 			cu->cu_cwnd_wait = FALSE;
531 			wakeup(&cu->cu_cwnd_wait);
532 		}
533 		goto got_reply;
534 	}
535 
536 	/*
537 	 * Hack to provide rpc-based message passing
538 	 */
539 	if (timeout == 0) {
540 		TAILQ_REMOVE(&cs->cs_pending, cr, cr_link);
541 		errp->re_status = stat = RPC_TIMEDOUT;
542 		cu->cu_sent -= CWNDSCALE;
543 		if (cu->cu_cwnd_wait) {
544 			cu->cu_cwnd_wait = FALSE;
545 			wakeup(&cu->cu_cwnd_wait);
546 		}
547 		goto out;
548 	}
549 
550 get_reply:
551 	for (;;) {
552 		/* Decide how long to wait. */
553 		if (next_sendtime < timeout)
554 			tv = next_sendtime;
555 		else
556 			tv = timeout;
557 		tv -= time_waited;
558 
559 		if (tv > 0) {
560 			if (cu->cu_closing || cu->cu_closed) {
561 				error = 0;
562 				cr->cr_error = ESHUTDOWN;
563 			} else {
564 				error = msleep(cr, &cs->cs_lock,
565 				    cu->cu_waitflag, cu->cu_waitchan, tv);
566 			}
567 		} else {
568 			error = EWOULDBLOCK;
569 		}
570 
571 		TAILQ_REMOVE(&cs->cs_pending, cr, cr_link);
572 		cu->cu_sent -= CWNDSCALE;
573 		if (cu->cu_cwnd_wait) {
574 			cu->cu_cwnd_wait = FALSE;
575 			wakeup(&cu->cu_cwnd_wait);
576 		}
577 
578 		if (!error) {
579 			/*
580 			 * We were woken up by the upcall.  If the
581 			 * upcall had a receive error, report that,
582 			 * otherwise we have a reply.
583 			 */
584 			if (cr->cr_error) {
585 				errp->re_errno = cr->cr_error;
586 				errp->re_status = stat = RPC_CANTRECV;
587 				goto out;
588 			}
589 
590 			cu->cu_cwnd += (CWNDSCALE * CWNDSCALE
591 			    + cu->cu_cwnd / 2) / cu->cu_cwnd;
592 			if (cu->cu_cwnd > MAXCWND)
593 				cu->cu_cwnd = MAXCWND;
594 
595 			if (rt) {
596 				/*
597 				 * Add one to the time since a tick
598 				 * count of N means that the actual
599 				 * time taken was somewhere between N
600 				 * and N+1.
601 				 */
602 				rtt = ticks - starttime + 1;
603 
604 				/*
605 				 * Update our estimate of the round
606 				 * trip time using roughly the
607 				 * algorithm described in RFC
608 				 * 2988. Given an RTT sample R:
609 				 *
610 				 * RTTVAR = (1-beta) * RTTVAR + beta * |SRTT-R|
611 				 * SRTT = (1-alpha) * SRTT + alpha * R
612 				 *
613 				 * where alpha = 0.125 and beta = 0.25.
614 				 *
615 				 * The initial retransmit timeout is
616 				 * SRTT + 4*RTTVAR and doubles on each
617 				 * retransmision.
618 				 */
619 				if (rt->rt_srtt == 0) {
620 					rt->rt_srtt = rtt;
621 					rt->rt_deviate = rtt / 2;
622 				} else {
623 					int32_t error = rtt - rt->rt_srtt;
624 					rt->rt_srtt += error / 8;
625 					error = abs(error) - rt->rt_deviate;
626 					rt->rt_deviate += error / 4;
627 				}
628 				rt->rt_rtxcur = rt->rt_srtt + 4*rt->rt_deviate;
629 			}
630 
631 			break;
632 		}
633 
634 		/*
635 		 * The sleep returned an error so our request is still
636 		 * on the list. If we got EWOULDBLOCK, we may want to
637 		 * re-send the request.
638 		 */
639 		if (error != EWOULDBLOCK) {
640 			errp->re_errno = error;
641 			if (error == EINTR || error == ERESTART)
642 				errp->re_status = stat = RPC_INTR;
643 			else
644 				errp->re_status = stat = RPC_CANTRECV;
645 			goto out;
646 		}
647 
648 		time_waited = ticks - starttime;
649 
650 		/* Check for timeout. */
651 		if (time_waited > timeout) {
652 			errp->re_errno = EWOULDBLOCK;
653 			errp->re_status = stat = RPC_TIMEDOUT;
654 			goto out;
655 		}
656 
657 		/* Retransmit if necessary. */
658 		if (time_waited >= next_sendtime) {
659 			cu->cu_cwnd /= 2;
660 			if (cu->cu_cwnd < CWNDSCALE)
661 				cu->cu_cwnd = CWNDSCALE;
662 			if (ext && ext->rc_feedback) {
663 				mtx_unlock(&cs->cs_lock);
664 				if (retrans == 0)
665 					ext->rc_feedback(FEEDBACK_REXMIT1,
666 					    proc, ext->rc_feedback_arg);
667 				else
668 					ext->rc_feedback(FEEDBACK_REXMIT2,
669 					    proc, ext->rc_feedback_arg);
670 				mtx_lock(&cs->cs_lock);
671 			}
672 			if (cu->cu_closing || cu->cu_closed) {
673 				errp->re_errno = ESHUTDOWN;
674 				errp->re_status = stat = RPC_CANTRECV;
675 				goto out;
676 			}
677 			retrans++;
678 			/* update retransmit_time */
679 			if (retransmit_time < RPC_MAX_BACKOFF * hz)
680 				retransmit_time = 2 * retransmit_time;
681 			next_sendtime += retransmit_time;
682 			goto send_again;
683 		}
684 		cu->cu_sent += CWNDSCALE;
685 		TAILQ_INSERT_TAIL(&cs->cs_pending, cr, cr_link);
686 	}
687 
688 got_reply:
689 	/*
690 	 * Now decode and validate the response. We need to drop the
691 	 * lock since xdr_replymsg may end up sleeping in malloc.
692 	 */
693 	mtx_unlock(&cs->cs_lock);
694 
695 	if (ext && ext->rc_feedback)
696 		ext->rc_feedback(FEEDBACK_OK, proc, ext->rc_feedback_arg);
697 
698 	xdrmbuf_create(&xdrs, cr->cr_mrep, XDR_DECODE);
699 	ok = xdr_replymsg(&xdrs, &reply_msg);
700 	cr->cr_mrep = NULL;
701 
702 	if (ok) {
703 		if ((reply_msg.rm_reply.rp_stat == MSG_ACCEPTED) &&
704 		    (reply_msg.acpted_rply.ar_stat == SUCCESS))
705 			errp->re_status = stat = RPC_SUCCESS;
706 		else
707 			stat = _seterr_reply(&reply_msg, &(cu->cu_error));
708 
709 		if (errp->re_status == RPC_SUCCESS) {
710 			results = xdrmbuf_getall(&xdrs);
711 			if (! AUTH_VALIDATE(auth, xid,
712 				&reply_msg.acpted_rply.ar_verf,
713 				&results)) {
714 				errp->re_status = stat = RPC_AUTHERROR;
715 				errp->re_why = AUTH_INVALIDRESP;
716 				if (retrans &&
717 				    auth->ah_cred.oa_flavor == RPCSEC_GSS) {
718 					/*
719 					 * If we retransmitted, its
720 					 * possible that we will
721 					 * receive a reply for one of
722 					 * the earlier transmissions
723 					 * (which will use an older
724 					 * RPCSEC_GSS sequence
725 					 * number). In this case, just
726 					 * go back and listen for a
727 					 * new reply. We could keep a
728 					 * record of all the seq
729 					 * numbers we have transmitted
730 					 * so far so that we could
731 					 * accept a reply for any of
732 					 * them here.
733 					 */
734 					XDR_DESTROY(&xdrs);
735 					mtx_lock(&cs->cs_lock);
736 					cu->cu_sent += CWNDSCALE;
737 					TAILQ_INSERT_TAIL(&cs->cs_pending,
738 					    cr, cr_link);
739 					cr->cr_mrep = NULL;
740 					goto get_reply;
741 				}
742 			} else {
743 				*resultsp = results;
744 			}
745 		}		/* end successful completion */
746 		/*
747 		 * If unsuccesful AND error is an authentication error
748 		 * then refresh credentials and try again, else break
749 		 */
750 		else if (stat == RPC_AUTHERROR)
751 			/* maybe our credentials need to be refreshed ... */
752 			if (nrefreshes > 0 &&
753 			    AUTH_REFRESH(auth, &reply_msg)) {
754 				nrefreshes--;
755 				XDR_DESTROY(&xdrs);
756 				mtx_lock(&cs->cs_lock);
757 				goto call_again;
758 			}
759 		/* end of unsuccessful completion */
760 	}	/* end of valid reply message */
761 	else {
762 		errp->re_status = stat = RPC_CANTDECODERES;
763 
764 	}
765 	XDR_DESTROY(&xdrs);
766 	mtx_lock(&cs->cs_lock);
767 out:
768 	mtx_assert(&cs->cs_lock, MA_OWNED);
769 
770 	if (mreq)
771 		m_freem(mreq);
772 	if (cr->cr_mrep)
773 		m_freem(cr->cr_mrep);
774 
775 	cu->cu_threads--;
776 	if (cu->cu_closing)
777 		wakeup(cu);
778 
779 	mtx_unlock(&cs->cs_lock);
780 
781 	if (auth && stat != RPC_SUCCESS)
782 		AUTH_VALIDATE(auth, xid, NULL, NULL);
783 
784 	free(cr, M_RPC);
785 
786 	return (stat);
787 }
788 
789 static void
790 clnt_dg_geterr(CLIENT *cl, struct rpc_err *errp)
791 {
792 	struct cu_data *cu = (struct cu_data *)cl->cl_private;
793 
794 	*errp = cu->cu_error;
795 }
796 
797 static bool_t
798 clnt_dg_freeres(CLIENT *cl, xdrproc_t xdr_res, void *res_ptr)
799 {
800 	XDR xdrs;
801 	bool_t dummy;
802 
803 	xdrs.x_op = XDR_FREE;
804 	dummy = (*xdr_res)(&xdrs, res_ptr);
805 
806 	return (dummy);
807 }
808 
809 /*ARGSUSED*/
810 static void
811 clnt_dg_abort(CLIENT *h)
812 {
813 }
814 
815 static bool_t
816 clnt_dg_control(CLIENT *cl, u_int request, void *info)
817 {
818 	struct cu_data *cu = (struct cu_data *)cl->cl_private;
819 	struct cu_socket *cs;
820 	struct sockaddr *addr;
821 
822 	cs = cu->cu_socket->so_rcv.sb_upcallarg;
823 	mtx_lock(&cs->cs_lock);
824 
825 	switch (request) {
826 	case CLSET_FD_CLOSE:
827 		cu->cu_closeit = TRUE;
828 		mtx_unlock(&cs->cs_lock);
829 		return (TRUE);
830 	case CLSET_FD_NCLOSE:
831 		cu->cu_closeit = FALSE;
832 		mtx_unlock(&cs->cs_lock);
833 		return (TRUE);
834 	}
835 
836 	/* for other requests which use info */
837 	if (info == NULL) {
838 		mtx_unlock(&cs->cs_lock);
839 		return (FALSE);
840 	}
841 	switch (request) {
842 	case CLSET_TIMEOUT:
843 		if (time_not_ok((struct timeval *)info)) {
844 			mtx_unlock(&cs->cs_lock);
845 			return (FALSE);
846 		}
847 		cu->cu_total = *(struct timeval *)info;
848 		break;
849 	case CLGET_TIMEOUT:
850 		*(struct timeval *)info = cu->cu_total;
851 		break;
852 	case CLSET_RETRY_TIMEOUT:
853 		if (time_not_ok((struct timeval *)info)) {
854 			mtx_unlock(&cs->cs_lock);
855 			return (FALSE);
856 		}
857 		cu->cu_wait = *(struct timeval *)info;
858 		break;
859 	case CLGET_RETRY_TIMEOUT:
860 		*(struct timeval *)info = cu->cu_wait;
861 		break;
862 	case CLGET_SVC_ADDR:
863 		/*
864 		 * Slightly different semantics to userland - we use
865 		 * sockaddr instead of netbuf.
866 		 */
867 		memcpy(info, &cu->cu_raddr, cu->cu_raddr.ss_len);
868 		break;
869 	case CLSET_SVC_ADDR:		/* set to new address */
870 		addr = (struct sockaddr *)info;
871 		(void) memcpy(&cu->cu_raddr, addr, addr->sa_len);
872 		break;
873 	case CLGET_XID:
874 		*(uint32_t *)info = cu->cu_xid;
875 		break;
876 
877 	case CLSET_XID:
878 		/* This will set the xid of the NEXT call */
879 		/* decrement by 1 as clnt_dg_call() increments once */
880 		cu->cu_xid = *(uint32_t *)info - 1;
881 		break;
882 
883 	case CLGET_VERS:
884 		/*
885 		 * This RELIES on the information that, in the call body,
886 		 * the version number field is the fifth field from the
887 		 * begining of the RPC header. MUST be changed if the
888 		 * call_struct is changed
889 		 */
890 		*(uint32_t *)info =
891 		    ntohl(*(uint32_t *)(void *)(cu->cu_mcallc +
892 		    4 * BYTES_PER_XDR_UNIT));
893 		break;
894 
895 	case CLSET_VERS:
896 		*(uint32_t *)(void *)(cu->cu_mcallc + 4 * BYTES_PER_XDR_UNIT)
897 			= htonl(*(uint32_t *)info);
898 		break;
899 
900 	case CLGET_PROG:
901 		/*
902 		 * This RELIES on the information that, in the call body,
903 		 * the program number field is the fourth field from the
904 		 * begining of the RPC header. MUST be changed if the
905 		 * call_struct is changed
906 		 */
907 		*(uint32_t *)info =
908 		    ntohl(*(uint32_t *)(void *)(cu->cu_mcallc +
909 		    3 * BYTES_PER_XDR_UNIT));
910 		break;
911 
912 	case CLSET_PROG:
913 		*(uint32_t *)(void *)(cu->cu_mcallc + 3 * BYTES_PER_XDR_UNIT)
914 			= htonl(*(uint32_t *)info);
915 		break;
916 	case CLSET_ASYNC:
917 		cu->cu_async = *(int *)info;
918 		break;
919 	case CLSET_CONNECT:
920 		cu->cu_connect = *(int *)info;
921 		break;
922 	case CLSET_WAITCHAN:
923 		cu->cu_waitchan = (const char *)info;
924 		break;
925 	case CLGET_WAITCHAN:
926 		*(const char **) info = cu->cu_waitchan;
927 		break;
928 	case CLSET_INTERRUPTIBLE:
929 		if (*(int *) info)
930 			cu->cu_waitflag = PCATCH;
931 		else
932 			cu->cu_waitflag = 0;
933 		break;
934 	case CLGET_INTERRUPTIBLE:
935 		if (cu->cu_waitflag)
936 			*(int *) info = TRUE;
937 		else
938 			*(int *) info = FALSE;
939 		break;
940 	default:
941 		mtx_unlock(&cs->cs_lock);
942 		return (FALSE);
943 	}
944 	mtx_unlock(&cs->cs_lock);
945 	return (TRUE);
946 }
947 
948 static void
949 clnt_dg_close(CLIENT *cl)
950 {
951 	struct cu_data *cu = (struct cu_data *)cl->cl_private;
952 	struct cu_socket *cs;
953 	struct cu_request *cr;
954 
955 	cs = cu->cu_socket->so_rcv.sb_upcallarg;
956 	mtx_lock(&cs->cs_lock);
957 
958 	if (cu->cu_closed) {
959 		mtx_unlock(&cs->cs_lock);
960 		return;
961 	}
962 
963 	if (cu->cu_closing) {
964 		while (cu->cu_closing)
965 			msleep(cu, &cs->cs_lock, 0, "rpcclose", 0);
966 		KASSERT(cu->cu_closed, ("client should be closed"));
967 		mtx_unlock(&cs->cs_lock);
968 		return;
969 	}
970 
971 	/*
972 	 * Abort any pending requests and wait until everyone
973 	 * has finished with clnt_vc_call.
974 	 */
975 	cu->cu_closing = TRUE;
976 	TAILQ_FOREACH(cr, &cs->cs_pending, cr_link) {
977 		if (cr->cr_client == cl) {
978 			cr->cr_xid = 0;
979 			cr->cr_error = ESHUTDOWN;
980 			wakeup(cr);
981 		}
982 	}
983 
984 	while (cu->cu_threads)
985 		msleep(cu, &cs->cs_lock, 0, "rpcclose", 0);
986 
987 	cu->cu_closing = FALSE;
988 	cu->cu_closed = TRUE;
989 
990 	mtx_unlock(&cs->cs_lock);
991 	wakeup(cu);
992 }
993 
994 static void
995 clnt_dg_destroy(CLIENT *cl)
996 {
997 	struct cu_data *cu = (struct cu_data *)cl->cl_private;
998 	struct cu_socket *cs;
999 	struct socket *so = NULL;
1000 	bool_t lastsocketref;
1001 
1002 	cs = cu->cu_socket->so_rcv.sb_upcallarg;
1003 	clnt_dg_close(cl);
1004 
1005 	SOCKBUF_LOCK(&cu->cu_socket->so_rcv);
1006 	mtx_lock(&cs->cs_lock);
1007 
1008 	cs->cs_refs--;
1009 	if (cs->cs_refs == 0) {
1010 		mtx_unlock(&cs->cs_lock);
1011 		soupcall_clear(cu->cu_socket, SO_RCV);
1012 		clnt_dg_upcallsdone(cu->cu_socket, cs);
1013 		SOCKBUF_UNLOCK(&cu->cu_socket->so_rcv);
1014 		mtx_destroy(&cs->cs_lock);
1015 		mem_free(cs, sizeof(*cs));
1016 		lastsocketref = TRUE;
1017 	} else {
1018 		mtx_unlock(&cs->cs_lock);
1019 		SOCKBUF_UNLOCK(&cu->cu_socket->so_rcv);
1020 		lastsocketref = FALSE;
1021 	}
1022 
1023 	if (cu->cu_closeit && lastsocketref) {
1024 		so = cu->cu_socket;
1025 		cu->cu_socket = NULL;
1026 	}
1027 
1028 	if (so)
1029 		soclose(so);
1030 
1031 	if (cl->cl_netid && cl->cl_netid[0])
1032 		mem_free(cl->cl_netid, strlen(cl->cl_netid) +1);
1033 	if (cl->cl_tp && cl->cl_tp[0])
1034 		mem_free(cl->cl_tp, strlen(cl->cl_tp) +1);
1035 	mem_free(cu, sizeof (*cu));
1036 	mem_free(cl, sizeof (CLIENT));
1037 }
1038 
1039 /*
1040  * Make sure that the time is not garbage.  -1 value is allowed.
1041  */
1042 static bool_t
1043 time_not_ok(struct timeval *t)
1044 {
1045 	return (t->tv_sec < -1 || t->tv_sec > 100000000 ||
1046 		t->tv_usec < -1 || t->tv_usec > 1000000);
1047 }
1048 
1049 int
1050 clnt_dg_soupcall(struct socket *so, void *arg, int waitflag)
1051 {
1052 	struct cu_socket *cs = (struct cu_socket *) arg;
1053 	struct uio uio;
1054 	struct mbuf *m;
1055 	struct mbuf *control;
1056 	struct cu_request *cr;
1057 	int error, rcvflag, foundreq;
1058 	uint32_t xid;
1059 
1060 	cs->cs_upcallrefs++;
1061 	uio.uio_resid = 1000000000;
1062 	uio.uio_td = curthread;
1063 	do {
1064 		SOCKBUF_UNLOCK(&so->so_rcv);
1065 		m = NULL;
1066 		control = NULL;
1067 		rcvflag = MSG_DONTWAIT;
1068 		error = soreceive(so, NULL, &uio, &m, &control, &rcvflag);
1069 		if (control)
1070 			m_freem(control);
1071 		SOCKBUF_LOCK(&so->so_rcv);
1072 
1073 		if (error == EWOULDBLOCK)
1074 			break;
1075 
1076 		/*
1077 		 * If there was an error, wake up all pending
1078 		 * requests.
1079 		 */
1080 		if (error) {
1081 			mtx_lock(&cs->cs_lock);
1082 			TAILQ_FOREACH(cr, &cs->cs_pending, cr_link) {
1083 				cr->cr_xid = 0;
1084 				cr->cr_error = error;
1085 				wakeup(cr);
1086 			}
1087 			mtx_unlock(&cs->cs_lock);
1088 			break;
1089 		}
1090 
1091 		/*
1092 		 * The XID is in the first uint32_t of the reply.
1093 		 */
1094 		if (m->m_len < sizeof(xid) && m_length(m, NULL) < sizeof(xid)) {
1095 			/*
1096 			 * Should never happen.
1097 			 */
1098 			m_freem(m);
1099 			continue;
1100 		}
1101 
1102 		m_copydata(m, 0, sizeof(xid), (char *)&xid);
1103 		xid = ntohl(xid);
1104 
1105 		/*
1106 		 * Attempt to match this reply with a pending request.
1107 		 */
1108 		mtx_lock(&cs->cs_lock);
1109 		foundreq = 0;
1110 		TAILQ_FOREACH(cr, &cs->cs_pending, cr_link) {
1111 			if (cr->cr_xid == xid) {
1112 				/*
1113 				 * This one matches. We leave the
1114 				 * reply mbuf in cr->cr_mrep. Set the
1115 				 * XID to zero so that we will ignore
1116 				 * any duplicated replies that arrive
1117 				 * before clnt_dg_call removes it from
1118 				 * the queue.
1119 				 */
1120 				cr->cr_xid = 0;
1121 				cr->cr_mrep = m;
1122 				cr->cr_error = 0;
1123 				foundreq = 1;
1124 				wakeup(cr);
1125 				break;
1126 			}
1127 		}
1128 		mtx_unlock(&cs->cs_lock);
1129 
1130 		/*
1131 		 * If we didn't find the matching request, just drop
1132 		 * it - its probably a repeated reply.
1133 		 */
1134 		if (!foundreq)
1135 			m_freem(m);
1136 	} while (m);
1137 	cs->cs_upcallrefs--;
1138 	if (cs->cs_upcallrefs < 0)
1139 		panic("rpcdg upcall refcnt");
1140 	if (cs->cs_upcallrefs == 0)
1141 		wakeup(&cs->cs_upcallrefs);
1142 	return (SU_OK);
1143 }
1144 
1145 /*
1146  * Wait for all upcalls in progress to complete.
1147  */
1148 static void
1149 clnt_dg_upcallsdone(struct socket *so, struct cu_socket *cs)
1150 {
1151 
1152 	SOCKBUF_LOCK_ASSERT(&so->so_rcv);
1153 
1154 	while (cs->cs_upcallrefs > 0)
1155 		(void) msleep(&cs->cs_upcallrefs, SOCKBUF_MTX(&so->so_rcv), 0,
1156 		    "rpcdgup", 0);
1157 }
1158