xref: /freebsd/sys/rpc/clnt_dg.c (revision a6e527f893df2cbbd941839a93e50ae39ac0db55)
1 /*	$NetBSD: clnt_dg.c,v 1.4 2000/07/14 08:40:41 fvdl Exp $	*/
2 
3 /*-
4  * SPDX-License-Identifier: BSD-3-Clause
5  *
6  * Copyright (c) 2009, Sun Microsystems, Inc.
7  * All rights reserved.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions are met:
11  * - Redistributions of source code must retain the above copyright notice,
12  *   this list of conditions and the following disclaimer.
13  * - Redistributions in binary form must reproduce the above copyright notice,
14  *   this list of conditions and the following disclaimer in the documentation
15  *   and/or other materials provided with the distribution.
16  * - Neither the name of Sun Microsystems, Inc. nor the names of its
17  *   contributors may be used to endorse or promote products derived
18  *   from this software without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
24  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30  * POSSIBILITY OF SUCH DAMAGE.
31  */
32 /*
33  * Copyright (c) 1986-1991 by Sun Microsystems Inc.
34  */
35 
36 /*
37  * Implements a connectionless client side RPC.
38  */
39 
40 #include <sys/param.h>
41 #include <sys/systm.h>
42 #include <sys/jail.h>
43 #include <sys/kernel.h>
44 #include <sys/lock.h>
45 #include <sys/malloc.h>
46 #include <sys/mbuf.h>
47 #include <sys/mutex.h>
48 #include <sys/pcpu.h>
49 #include <sys/proc.h>
50 #include <sys/socket.h>
51 #include <sys/socketvar.h>
52 #include <sys/time.h>
53 #include <sys/uio.h>
54 
55 #include <net/vnet.h>
56 
57 #include <rpc/rpc.h>
58 #include <rpc/rpc_com.h>
59 
60 
61 #ifdef _FREEFALL_CONFIG
62 /*
63  * Disable RPC exponential back-off for FreeBSD.org systems.
64  */
65 #define	RPC_MAX_BACKOFF		1 /* second */
66 #else
67 #define	RPC_MAX_BACKOFF		30 /* seconds */
68 #endif
69 
70 static bool_t time_not_ok(struct timeval *);
71 static enum clnt_stat clnt_dg_call(CLIENT *, struct rpc_callextra *,
72     rpcproc_t, struct mbuf *, struct mbuf **, struct timeval);
73 static void clnt_dg_geterr(CLIENT *, struct rpc_err *);
74 static bool_t clnt_dg_freeres(CLIENT *, xdrproc_t, void *);
75 static void clnt_dg_abort(CLIENT *);
76 static bool_t clnt_dg_control(CLIENT *, u_int, void *);
77 static void clnt_dg_close(CLIENT *);
78 static void clnt_dg_destroy(CLIENT *);
79 static int clnt_dg_soupcall(struct socket *so, void *arg, int waitflag);
80 
81 static const struct clnt_ops clnt_dg_ops = {
82 	.cl_call =	clnt_dg_call,
83 	.cl_abort =	clnt_dg_abort,
84 	.cl_geterr =	clnt_dg_geterr,
85 	.cl_freeres =	clnt_dg_freeres,
86 	.cl_close =	clnt_dg_close,
87 	.cl_destroy =	clnt_dg_destroy,
88 	.cl_control =	clnt_dg_control
89 };
90 
91 static volatile uint32_t rpc_xid = 0;
92 
93 /*
94  * A pending RPC request which awaits a reply. Requests which have
95  * received their reply will have cr_xid set to zero and cr_mrep to
96  * the mbuf chain of the reply.
97  */
98 struct cu_request {
99 	TAILQ_ENTRY(cu_request) cr_link;
100 	CLIENT			*cr_client;	/* owner */
101 	uint32_t		cr_xid;		/* XID of request */
102 	struct mbuf		*cr_mrep;	/* reply received by upcall */
103 	int			cr_error;	/* any error from upcall */
104 	char			cr_verf[MAX_AUTH_BYTES]; /* reply verf */
105 };
106 
107 TAILQ_HEAD(cu_request_list, cu_request);
108 
109 #define MCALL_MSG_SIZE 24
110 
111 /*
112  * This structure is pointed to by the socket buffer's sb_upcallarg
113  * member. It is separate from the client private data to facilitate
114  * multiple clients sharing the same socket. The cs_lock mutex is used
115  * to protect all fields of this structure, the socket's receive
116  * buffer lock is used to ensure that exactly one of these
117  * structures is installed on the socket.
118  */
119 struct cu_socket {
120 	struct mtx		cs_lock;
121 	int			cs_refs;	/* Count of clients */
122 	struct cu_request_list	cs_pending;	/* Requests awaiting replies */
123 	int			cs_upcallrefs;	/* Refcnt of upcalls in prog.*/
124 };
125 
126 static void clnt_dg_upcallsdone(struct socket *, struct cu_socket *);
127 
128 /*
129  * Private data kept per client handle
130  */
131 struct cu_data {
132 	int			cu_threads;	/* # threads in clnt_vc_call */
133 	bool_t			cu_closing;	/* TRUE if we are closing */
134 	bool_t			cu_closed;	/* TRUE if we are closed */
135 	struct socket		*cu_socket;	/* connection socket */
136 	bool_t			cu_closeit;	/* opened by library */
137 	struct sockaddr_storage	cu_raddr;	/* remote address */
138 	int			cu_rlen;
139 	struct timeval		cu_wait;	/* retransmit interval */
140 	struct timeval		cu_total;	/* total time for the call */
141 	struct rpc_err		cu_error;
142 	uint32_t		cu_xid;
143 	char			cu_mcallc[MCALL_MSG_SIZE]; /* marshalled callmsg */
144 	size_t			cu_mcalllen;
145 	size_t			cu_sendsz;	/* send size */
146 	size_t			cu_recvsz;	/* recv size */
147 	int			cu_async;
148 	int			cu_connect;	/* Use connect(). */
149 	int			cu_connected;	/* Have done connect(). */
150 	const char		*cu_waitchan;
151 	int			cu_waitflag;
152 	int			cu_cwnd;	/* congestion window */
153 	int			cu_sent;	/* number of in-flight RPCs */
154 	bool_t			cu_cwnd_wait;
155 };
156 
157 #define CWNDSCALE	256
158 #define MAXCWND		(32 * CWNDSCALE)
159 
160 /*
161  * Connection less client creation returns with client handle parameters.
162  * Default options are set, which the user can change using clnt_control().
163  * fd should be open and bound.
164  * NB: The rpch->cl_auth is initialized to null authentication.
165  * 	Caller may wish to set this something more useful.
166  *
167  * sendsz and recvsz are the maximum allowable packet sizes that can be
168  * sent and received. Normally they are the same, but they can be
169  * changed to improve the program efficiency and buffer allocation.
170  * If they are 0, use the transport default.
171  *
172  * If svcaddr is NULL, returns NULL.
173  */
174 CLIENT *
clnt_dg_create(struct socket * so,struct sockaddr * svcaddr,rpcprog_t program,rpcvers_t version,size_t sendsz,size_t recvsz)175 clnt_dg_create(
176 	struct socket *so,
177 	struct sockaddr *svcaddr,	/* servers address */
178 	rpcprog_t program,		/* program number */
179 	rpcvers_t version,		/* version number */
180 	size_t sendsz,			/* buffer recv size */
181 	size_t recvsz)			/* buffer send size */
182 {
183 	CLIENT *cl = NULL;		/* client handle */
184 	struct cu_data *cu = NULL;	/* private data */
185 	struct cu_socket *cs = NULL;
186 	struct sockbuf *sb;
187 	struct timeval now;
188 	struct rpc_msg call_msg;
189 	struct __rpc_sockinfo si;
190 	XDR xdrs;
191 	int error;
192 	uint32_t newxid;
193 
194 	if (svcaddr == NULL) {
195 		rpc_createerr.cf_stat = RPC_UNKNOWNADDR;
196 		return (NULL);
197 	}
198 
199 	if (!__rpc_socket2sockinfo(so, &si)) {
200 		rpc_createerr.cf_stat = RPC_TLIERROR;
201 		rpc_createerr.cf_error.re_errno = 0;
202 		return (NULL);
203 	}
204 
205 	/*
206 	 * Find the receive and the send size
207 	 */
208 	sendsz = __rpc_get_t_size(si.si_af, si.si_proto, (int)sendsz);
209 	recvsz = __rpc_get_t_size(si.si_af, si.si_proto, (int)recvsz);
210 	if ((sendsz == 0) || (recvsz == 0)) {
211 		rpc_createerr.cf_stat = RPC_TLIERROR; /* XXX */
212 		rpc_createerr.cf_error.re_errno = 0;
213 		return (NULL);
214 	}
215 
216 	cl = mem_alloc(sizeof (CLIENT));
217 
218 	/*
219 	 * Should be multiple of 4 for XDR.
220 	 */
221 	sendsz = rounddown(sendsz + 3, 4);
222 	recvsz = rounddown(recvsz + 3, 4);
223 	cu = mem_alloc(sizeof (*cu));
224 	cu->cu_threads = 0;
225 	cu->cu_closing = FALSE;
226 	cu->cu_closed = FALSE;
227 	(void) memcpy(&cu->cu_raddr, svcaddr, (size_t)svcaddr->sa_len);
228 	cu->cu_rlen = svcaddr->sa_len;
229 	/* Other values can also be set through clnt_control() */
230 	cu->cu_wait.tv_sec = 3;	/* heuristically chosen */
231 	cu->cu_wait.tv_usec = 0;
232 	cu->cu_total.tv_sec = -1;
233 	cu->cu_total.tv_usec = -1;
234 	cu->cu_sendsz = sendsz;
235 	cu->cu_recvsz = recvsz;
236 	cu->cu_async = FALSE;
237 	cu->cu_connect = FALSE;
238 	cu->cu_connected = FALSE;
239 	cu->cu_waitchan = "rpcrecv";
240 	cu->cu_waitflag = 0;
241 	cu->cu_cwnd = MAXCWND / 2;
242 	cu->cu_sent = 0;
243 	cu->cu_cwnd_wait = FALSE;
244 	(void) getmicrotime(&now);
245 	/* Clip at 28bits so that it will not wrap around. */
246 	newxid = __RPC_GETXID(&now) & 0xfffffff;
247 	atomic_cmpset_32(&rpc_xid, 0, newxid);
248 	call_msg.rm_xid = atomic_fetchadd_32(&rpc_xid, 1);
249 	call_msg.rm_call.cb_prog = program;
250 	call_msg.rm_call.cb_vers = version;
251 	xdrmem_create(&xdrs, cu->cu_mcallc, MCALL_MSG_SIZE, XDR_ENCODE);
252 	if (! xdr_callhdr(&xdrs, &call_msg)) {
253 		rpc_createerr.cf_stat = RPC_CANTENCODEARGS;  /* XXX */
254 		rpc_createerr.cf_error.re_errno = 0;
255 		goto err2;
256 	}
257 	cu->cu_mcalllen = XDR_GETPOS(&xdrs);
258 
259 	/*
260 	 * By default, closeit is always FALSE. It is users responsibility
261 	 * to do a close on it, else the user may use clnt_control
262 	 * to let clnt_destroy do it for him/her.
263 	 */
264 	cu->cu_closeit = FALSE;
265 	cu->cu_socket = so;
266 	error = soreserve(so, (u_long)sendsz, (u_long)recvsz);
267 	if (error != 0) {
268 		rpc_createerr.cf_stat = RPC_FAILED;
269 		rpc_createerr.cf_error.re_errno = error;
270 		goto err2;
271 	}
272 
273 	sb = &so->so_rcv;
274 	SOCK_RECVBUF_LOCK(so);
275 recheck_socket:
276 	if (sb->sb_upcall) {
277 		if (sb->sb_upcall != clnt_dg_soupcall) {
278 			SOCK_RECVBUF_UNLOCK(so);
279 			printf("clnt_dg_create(): socket already has an incompatible upcall\n");
280 			goto err2;
281 		}
282 		cs = (struct cu_socket *) sb->sb_upcallarg;
283 		mtx_lock(&cs->cs_lock);
284 		cs->cs_refs++;
285 		mtx_unlock(&cs->cs_lock);
286 	} else {
287 		/*
288 		 * We are the first on this socket - allocate the
289 		 * structure and install it in the socket.
290 		 */
291 		SOCK_RECVBUF_UNLOCK(so);
292 		cs = mem_alloc(sizeof(*cs));
293 		SOCK_RECVBUF_LOCK(so);
294 		if (sb->sb_upcall) {
295 			/*
296 			 * We have lost a race with some other client.
297 			 */
298 			mem_free(cs, sizeof(*cs));
299 			goto recheck_socket;
300 		}
301 		mtx_init(&cs->cs_lock, "cs->cs_lock", NULL, MTX_DEF);
302 		cs->cs_refs = 1;
303 		cs->cs_upcallrefs = 0;
304 		TAILQ_INIT(&cs->cs_pending);
305 		soupcall_set(so, SO_RCV, clnt_dg_soupcall, cs);
306 	}
307 	SOCK_RECVBUF_UNLOCK(so);
308 
309 	cl->cl_refs = 1;
310 	cl->cl_ops = &clnt_dg_ops;
311 	cl->cl_private = (caddr_t)(void *)cu;
312 	cl->cl_auth = authnone_create();
313 	cl->cl_tp = NULL;
314 	cl->cl_netid = NULL;
315 	return (cl);
316 err2:
317 	mem_free(cl, sizeof (CLIENT));
318 	mem_free(cu, sizeof (*cu));
319 
320 	return (NULL);
321 }
322 
323 static enum clnt_stat
clnt_dg_call(CLIENT * cl,struct rpc_callextra * ext,rpcproc_t proc,struct mbuf * args,struct mbuf ** resultsp,struct timeval utimeout)324 clnt_dg_call(
325 	CLIENT		*cl,		/* client handle */
326 	struct rpc_callextra *ext,	/* call metadata */
327 	rpcproc_t	proc,		/* procedure number */
328 	struct mbuf	*args,		/* pointer to args */
329 	struct mbuf	**resultsp,	/* pointer to results */
330 	struct timeval	utimeout)	/* seconds to wait before giving up */
331 {
332 	struct cu_data *cu = (struct cu_data *)cl->cl_private;
333 	struct cu_socket *cs;
334 	struct rpc_timers *rt;
335 	AUTH *auth;
336 	struct rpc_err *errp;
337 	enum clnt_stat stat;
338 	XDR xdrs;
339 	struct rpc_msg reply_msg;
340 	bool_t ok;
341 	int retrans;			/* number of re-transmits so far */
342 	int nrefreshes = 2;		/* number of times to refresh cred */
343 	struct timeval *tvp;
344 	int timeout;
345 	int retransmit_time;
346 	int next_sendtime, starttime, rtt, time_waited, tv = 0;
347 	struct sockaddr *sa;
348 	uint32_t xid = 0;
349 	struct mbuf *mreq = NULL, *results;
350 	struct cu_request *cr;
351 	int error;
352 
353 	cs = cu->cu_socket->so_rcv.sb_upcallarg;
354 	cr = malloc(sizeof(struct cu_request), M_RPC, M_WAITOK);
355 
356 	mtx_lock(&cs->cs_lock);
357 
358 	if (cu->cu_closing || cu->cu_closed) {
359 		mtx_unlock(&cs->cs_lock);
360 		free(cr, M_RPC);
361 		return (RPC_CANTSEND);
362 	}
363 	cu->cu_threads++;
364 
365 	if (ext) {
366 		auth = ext->rc_auth;
367 		errp = &ext->rc_err;
368 	} else {
369 		auth = cl->cl_auth;
370 		errp = &cu->cu_error;
371 	}
372 
373 	cr->cr_client = cl;
374 	cr->cr_mrep = NULL;
375 	cr->cr_error = 0;
376 
377 	if (cu->cu_total.tv_usec == -1) {
378 		tvp = &utimeout; /* use supplied timeout */
379 	} else {
380 		tvp = &cu->cu_total; /* use default timeout */
381 	}
382 	if (tvp->tv_sec || tvp->tv_usec)
383 		timeout = tvtohz(tvp);
384 	else
385 		timeout = 0;
386 
387 	if (cu->cu_connect && !cu->cu_connected) {
388 		mtx_unlock(&cs->cs_lock);
389 		error = soconnect(cu->cu_socket,
390 		    (struct sockaddr *)&cu->cu_raddr, curthread);
391 		mtx_lock(&cs->cs_lock);
392 		if (error) {
393 			errp->re_errno = error;
394 			errp->re_status = stat = RPC_CANTSEND;
395 			goto out;
396 		}
397 		cu->cu_connected = 1;
398 	}
399 	if (cu->cu_connected)
400 		sa = NULL;
401 	else
402 		sa = (struct sockaddr *)&cu->cu_raddr;
403 	time_waited = 0;
404 	retrans = 0;
405 	if (ext && ext->rc_timers) {
406 		rt = ext->rc_timers;
407 		if (!rt->rt_rtxcur)
408 			rt->rt_rtxcur = tvtohz(&cu->cu_wait);
409 		retransmit_time = next_sendtime = rt->rt_rtxcur;
410 	} else {
411 		rt = NULL;
412 		retransmit_time = next_sendtime = tvtohz(&cu->cu_wait);
413 	}
414 
415 	starttime = ticks;
416 
417 call_again:
418 	mtx_assert(&cs->cs_lock, MA_OWNED);
419 
420 	xid = atomic_fetchadd_32(&rpc_xid, 1);
421 
422 send_again:
423 	mtx_unlock(&cs->cs_lock);
424 
425 	mreq = m_gethdr(M_WAITOK, MT_DATA);
426 	KASSERT(cu->cu_mcalllen <= MHLEN, ("RPC header too big"));
427 	bcopy(cu->cu_mcallc, mreq->m_data, cu->cu_mcalllen);
428 	mreq->m_len = cu->cu_mcalllen;
429 
430 	/*
431 	 * The XID is the first thing in the request.
432 	 */
433 	*mtod(mreq, uint32_t *) = htonl(xid);
434 
435 	xdrmbuf_create(&xdrs, mreq, XDR_ENCODE);
436 
437 	if (cu->cu_async == TRUE && args == NULL)
438 		goto get_reply;
439 
440 	if ((! XDR_PUTINT32(&xdrs, &proc)) ||
441 	    (! AUTH_MARSHALL(auth, xid, &xdrs,
442 		m_copym(args, 0, M_COPYALL, M_WAITOK)))) {
443 		errp->re_status = stat = RPC_CANTENCODEARGS;
444 		mtx_lock(&cs->cs_lock);
445 		goto out;
446 	}
447 	mreq->m_pkthdr.len = m_length(mreq, NULL);
448 
449 	cr->cr_xid = xid;
450 	mtx_lock(&cs->cs_lock);
451 
452 	/*
453 	 * Try to get a place in the congestion window.
454 	 */
455 	while (cu->cu_sent >= cu->cu_cwnd) {
456 		cu->cu_cwnd_wait = TRUE;
457 		error = msleep(&cu->cu_cwnd_wait, &cs->cs_lock,
458 		    cu->cu_waitflag, "rpccwnd", 0);
459 		if (error) {
460 			errp->re_errno = error;
461 			if (error == EINTR || error == ERESTART)
462 				errp->re_status = stat = RPC_INTR;
463 			else
464 				errp->re_status = stat = RPC_CANTSEND;
465 			goto out;
466 		}
467 	}
468 	cu->cu_sent += CWNDSCALE;
469 
470 	TAILQ_INSERT_TAIL(&cs->cs_pending, cr, cr_link);
471 	mtx_unlock(&cs->cs_lock);
472 
473 	/*
474 	 * sosend consumes mreq.
475 	 */
476 	error = sosend(cu->cu_socket, sa, NULL, mreq, NULL, 0, curthread);
477 	mreq = NULL;
478 
479 	/*
480 	 * sub-optimal code appears here because we have
481 	 * some clock time to spare while the packets are in flight.
482 	 * (We assume that this is actually only executed once.)
483 	 */
484 	reply_msg.acpted_rply.ar_verf.oa_flavor = AUTH_NULL;
485 	reply_msg.acpted_rply.ar_verf.oa_base = cr->cr_verf;
486 	reply_msg.acpted_rply.ar_verf.oa_length = 0;
487 	reply_msg.acpted_rply.ar_results.where = NULL;
488 	reply_msg.acpted_rply.ar_results.proc = (xdrproc_t)xdr_void;
489 
490 	mtx_lock(&cs->cs_lock);
491 	if (error) {
492 		TAILQ_REMOVE(&cs->cs_pending, cr, cr_link);
493 		errp->re_errno = error;
494 		errp->re_status = stat = RPC_CANTSEND;
495 		cu->cu_sent -= CWNDSCALE;
496 		if (cu->cu_cwnd_wait) {
497 			cu->cu_cwnd_wait = FALSE;
498 			wakeup(&cu->cu_cwnd_wait);
499 		}
500 		goto out;
501 	}
502 
503 	/*
504 	 * Check to see if we got an upcall while waiting for the
505 	 * lock.
506 	 */
507 	if (cr->cr_error) {
508 		TAILQ_REMOVE(&cs->cs_pending, cr, cr_link);
509 		errp->re_errno = cr->cr_error;
510 		errp->re_status = stat = RPC_CANTRECV;
511 		cu->cu_sent -= CWNDSCALE;
512 		if (cu->cu_cwnd_wait) {
513 			cu->cu_cwnd_wait = FALSE;
514 			wakeup(&cu->cu_cwnd_wait);
515 		}
516 		goto out;
517 	}
518 	if (cr->cr_mrep) {
519 		TAILQ_REMOVE(&cs->cs_pending, cr, cr_link);
520 		cu->cu_sent -= CWNDSCALE;
521 		if (cu->cu_cwnd_wait) {
522 			cu->cu_cwnd_wait = FALSE;
523 			wakeup(&cu->cu_cwnd_wait);
524 		}
525 		goto got_reply;
526 	}
527 
528 	/*
529 	 * Hack to provide rpc-based message passing
530 	 */
531 	if (timeout == 0) {
532 		TAILQ_REMOVE(&cs->cs_pending, cr, cr_link);
533 		errp->re_status = stat = RPC_TIMEDOUT;
534 		cu->cu_sent -= CWNDSCALE;
535 		if (cu->cu_cwnd_wait) {
536 			cu->cu_cwnd_wait = FALSE;
537 			wakeup(&cu->cu_cwnd_wait);
538 		}
539 		goto out;
540 	}
541 
542 get_reply:
543 	for (;;) {
544 		/* Decide how long to wait. */
545 		if (next_sendtime < timeout)
546 			tv = next_sendtime;
547 		else
548 			tv = timeout;
549 		tv -= time_waited;
550 
551 		if (tv > 0) {
552 			if (cu->cu_closing || cu->cu_closed) {
553 				error = 0;
554 				cr->cr_error = ESHUTDOWN;
555 			} else {
556 				error = msleep(cr, &cs->cs_lock,
557 				    cu->cu_waitflag, cu->cu_waitchan, tv);
558 			}
559 		} else {
560 			error = EWOULDBLOCK;
561 		}
562 
563 		TAILQ_REMOVE(&cs->cs_pending, cr, cr_link);
564 		cu->cu_sent -= CWNDSCALE;
565 		if (cu->cu_cwnd_wait) {
566 			cu->cu_cwnd_wait = FALSE;
567 			wakeup(&cu->cu_cwnd_wait);
568 		}
569 
570 		if (!error) {
571 			/*
572 			 * We were woken up by the upcall.  If the
573 			 * upcall had a receive error, report that,
574 			 * otherwise we have a reply.
575 			 */
576 			if (cr->cr_error) {
577 				errp->re_errno = cr->cr_error;
578 				errp->re_status = stat = RPC_CANTRECV;
579 				goto out;
580 			}
581 
582 			cu->cu_cwnd += (CWNDSCALE * CWNDSCALE
583 			    + cu->cu_cwnd / 2) / cu->cu_cwnd;
584 			if (cu->cu_cwnd > MAXCWND)
585 				cu->cu_cwnd = MAXCWND;
586 
587 			if (rt) {
588 				/*
589 				 * Add one to the time since a tick
590 				 * count of N means that the actual
591 				 * time taken was somewhere between N
592 				 * and N+1.
593 				 */
594 				rtt = ticks - starttime + 1;
595 
596 				/*
597 				 * Update our estimate of the round
598 				 * trip time using roughly the
599 				 * algorithm described in RFC
600 				 * 2988. Given an RTT sample R:
601 				 *
602 				 * RTTVAR = (1-beta) * RTTVAR + beta * |SRTT-R|
603 				 * SRTT = (1-alpha) * SRTT + alpha * R
604 				 *
605 				 * where alpha = 0.125 and beta = 0.25.
606 				 *
607 				 * The initial retransmit timeout is
608 				 * SRTT + 4*RTTVAR and doubles on each
609 				 * retransmision.
610 				 */
611 				if (rt->rt_srtt == 0) {
612 					rt->rt_srtt = rtt;
613 					rt->rt_deviate = rtt / 2;
614 				} else {
615 					int32_t error = rtt - rt->rt_srtt;
616 					rt->rt_srtt += error / 8;
617 					error = abs(error) - rt->rt_deviate;
618 					rt->rt_deviate += error / 4;
619 				}
620 				rt->rt_rtxcur = rt->rt_srtt + 4*rt->rt_deviate;
621 			}
622 
623 			break;
624 		}
625 
626 		/*
627 		 * The sleep returned an error so our request is still
628 		 * on the list. If we got EWOULDBLOCK, we may want to
629 		 * re-send the request.
630 		 */
631 		if (error != EWOULDBLOCK) {
632 			errp->re_errno = error;
633 			if (error == EINTR || error == ERESTART)
634 				errp->re_status = stat = RPC_INTR;
635 			else
636 				errp->re_status = stat = RPC_CANTRECV;
637 			goto out;
638 		}
639 
640 		time_waited = ticks - starttime;
641 
642 		/* Check for timeout. */
643 		if (time_waited > timeout) {
644 			errp->re_errno = EWOULDBLOCK;
645 			errp->re_status = stat = RPC_TIMEDOUT;
646 			goto out;
647 		}
648 
649 		/* Retransmit if necessary. */
650 		if (time_waited >= next_sendtime) {
651 			cu->cu_cwnd /= 2;
652 			if (cu->cu_cwnd < CWNDSCALE)
653 				cu->cu_cwnd = CWNDSCALE;
654 			if (ext && ext->rc_feedback) {
655 				mtx_unlock(&cs->cs_lock);
656 				if (retrans == 0)
657 					ext->rc_feedback(FEEDBACK_REXMIT1,
658 					    proc, ext->rc_feedback_arg);
659 				else
660 					ext->rc_feedback(FEEDBACK_REXMIT2,
661 					    proc, ext->rc_feedback_arg);
662 				mtx_lock(&cs->cs_lock);
663 			}
664 			if (cu->cu_closing || cu->cu_closed) {
665 				errp->re_errno = ESHUTDOWN;
666 				errp->re_status = stat = RPC_CANTRECV;
667 				goto out;
668 			}
669 			retrans++;
670 			/* update retransmit_time */
671 			if (retransmit_time < RPC_MAX_BACKOFF * hz)
672 				retransmit_time = 2 * retransmit_time;
673 			next_sendtime += retransmit_time;
674 			goto send_again;
675 		}
676 		cu->cu_sent += CWNDSCALE;
677 		TAILQ_INSERT_TAIL(&cs->cs_pending, cr, cr_link);
678 	}
679 
680 got_reply:
681 	/*
682 	 * Now decode and validate the response. We need to drop the
683 	 * lock since xdr_replymsg may end up sleeping in malloc.
684 	 */
685 	mtx_unlock(&cs->cs_lock);
686 
687 	if (ext && ext->rc_feedback)
688 		ext->rc_feedback(FEEDBACK_OK, proc, ext->rc_feedback_arg);
689 
690 	xdrmbuf_create(&xdrs, cr->cr_mrep, XDR_DECODE);
691 	ok = xdr_replymsg(&xdrs, &reply_msg);
692 	cr->cr_mrep = NULL;
693 
694 	if (ok) {
695 		if ((reply_msg.rm_reply.rp_stat == MSG_ACCEPTED) &&
696 		    (reply_msg.acpted_rply.ar_stat == SUCCESS))
697 			errp->re_status = stat = RPC_SUCCESS;
698 		else
699 			stat = _seterr_reply(&reply_msg, &(cu->cu_error));
700 
701 		if (errp->re_status == RPC_SUCCESS) {
702 			results = xdrmbuf_getall(&xdrs);
703 			if (! AUTH_VALIDATE(auth, xid,
704 				&reply_msg.acpted_rply.ar_verf,
705 				&results)) {
706 				errp->re_status = stat = RPC_AUTHERROR;
707 				errp->re_why = AUTH_INVALIDRESP;
708 				if (retrans &&
709 				    auth->ah_cred.oa_flavor == RPCSEC_GSS) {
710 					/*
711 					 * If we retransmitted, its
712 					 * possible that we will
713 					 * receive a reply for one of
714 					 * the earlier transmissions
715 					 * (which will use an older
716 					 * RPCSEC_GSS sequence
717 					 * number). In this case, just
718 					 * go back and listen for a
719 					 * new reply. We could keep a
720 					 * record of all the seq
721 					 * numbers we have transmitted
722 					 * so far so that we could
723 					 * accept a reply for any of
724 					 * them here.
725 					 */
726 					XDR_DESTROY(&xdrs);
727 					mtx_lock(&cs->cs_lock);
728 					cu->cu_sent += CWNDSCALE;
729 					TAILQ_INSERT_TAIL(&cs->cs_pending,
730 					    cr, cr_link);
731 					cr->cr_mrep = NULL;
732 					goto get_reply;
733 				}
734 			} else {
735 				*resultsp = results;
736 			}
737 		}		/* end successful completion */
738 		/*
739 		 * If unsuccessful AND error is an authentication error
740 		 * then refresh credentials and try again, else break
741 		 */
742 		else if (stat == RPC_AUTHERROR) {
743 			/* maybe our credentials need to be refreshed ... */
744 			CURVNET_SET_QUIET(TD_TO_VNET(curthread));
745 			if (nrefreshes > 0 &&
746 			    AUTH_REFRESH(auth, &reply_msg)) {
747 				CURVNET_RESTORE();
748 				nrefreshes--;
749 				XDR_DESTROY(&xdrs);
750 				mtx_lock(&cs->cs_lock);
751 				goto call_again;
752 			}
753 			CURVNET_RESTORE();
754 		}
755 		/* end of unsuccessful completion */
756 	}	/* end of valid reply message */
757 	else {
758 		errp->re_status = stat = RPC_CANTDECODERES;
759 
760 	}
761 	XDR_DESTROY(&xdrs);
762 	mtx_lock(&cs->cs_lock);
763 out:
764 	mtx_assert(&cs->cs_lock, MA_OWNED);
765 
766 	if (mreq)
767 		m_freem(mreq);
768 	if (cr->cr_mrep)
769 		m_freem(cr->cr_mrep);
770 
771 	cu->cu_threads--;
772 	if (cu->cu_closing)
773 		wakeup(cu);
774 
775 	mtx_unlock(&cs->cs_lock);
776 
777 	if (auth && stat != RPC_SUCCESS)
778 		AUTH_VALIDATE(auth, xid, NULL, NULL);
779 
780 	free(cr, M_RPC);
781 
782 	return (stat);
783 }
784 
785 static void
clnt_dg_geterr(CLIENT * cl,struct rpc_err * errp)786 clnt_dg_geterr(CLIENT *cl, struct rpc_err *errp)
787 {
788 	struct cu_data *cu = (struct cu_data *)cl->cl_private;
789 
790 	*errp = cu->cu_error;
791 }
792 
793 static bool_t
clnt_dg_freeres(CLIENT * cl,xdrproc_t xdr_res,void * res_ptr)794 clnt_dg_freeres(CLIENT *cl, xdrproc_t xdr_res, void *res_ptr)
795 {
796 	XDR xdrs;
797 	bool_t dummy;
798 
799 	xdrs.x_op = XDR_FREE;
800 	dummy = (*xdr_res)(&xdrs, res_ptr);
801 
802 	return (dummy);
803 }
804 
805 /*ARGSUSED*/
806 static void
clnt_dg_abort(CLIENT * h)807 clnt_dg_abort(CLIENT *h)
808 {
809 }
810 
811 static bool_t
clnt_dg_control(CLIENT * cl,u_int request,void * info)812 clnt_dg_control(CLIENT *cl, u_int request, void *info)
813 {
814 	struct cu_data *cu = (struct cu_data *)cl->cl_private;
815 	struct cu_socket *cs;
816 	struct sockaddr *addr;
817 
818 	cs = cu->cu_socket->so_rcv.sb_upcallarg;
819 	mtx_lock(&cs->cs_lock);
820 
821 	switch (request) {
822 	case CLSET_FD_CLOSE:
823 		cu->cu_closeit = TRUE;
824 		mtx_unlock(&cs->cs_lock);
825 		return (TRUE);
826 	case CLSET_FD_NCLOSE:
827 		cu->cu_closeit = FALSE;
828 		mtx_unlock(&cs->cs_lock);
829 		return (TRUE);
830 	}
831 
832 	/* for other requests which use info */
833 	if (info == NULL) {
834 		mtx_unlock(&cs->cs_lock);
835 		return (FALSE);
836 	}
837 	switch (request) {
838 	case CLSET_TIMEOUT:
839 		if (time_not_ok((struct timeval *)info)) {
840 			mtx_unlock(&cs->cs_lock);
841 			return (FALSE);
842 		}
843 		cu->cu_total = *(struct timeval *)info;
844 		break;
845 	case CLGET_TIMEOUT:
846 		*(struct timeval *)info = cu->cu_total;
847 		break;
848 	case CLSET_RETRY_TIMEOUT:
849 		if (time_not_ok((struct timeval *)info)) {
850 			mtx_unlock(&cs->cs_lock);
851 			return (FALSE);
852 		}
853 		cu->cu_wait = *(struct timeval *)info;
854 		break;
855 	case CLGET_RETRY_TIMEOUT:
856 		*(struct timeval *)info = cu->cu_wait;
857 		break;
858 	case CLGET_SVC_ADDR:
859 		/*
860 		 * Slightly different semantics to userland - we use
861 		 * sockaddr instead of netbuf.
862 		 */
863 		memcpy(info, &cu->cu_raddr, cu->cu_raddr.ss_len);
864 		break;
865 	case CLSET_SVC_ADDR:		/* set to new address */
866 		addr = (struct sockaddr *)info;
867 		(void) memcpy(&cu->cu_raddr, addr, addr->sa_len);
868 		break;
869 	case CLGET_XID:
870 		*(uint32_t *)info = atomic_load_32(&rpc_xid);
871 		break;
872 
873 	case CLSET_XID:
874 		/* This will set the xid of the NEXT call */
875 		/* decrement by 1 as clnt_dg_call() increments once */
876 		atomic_store_32(&rpc_xid, *(uint32_t *)info - 1);
877 		break;
878 
879 	case CLGET_VERS:
880 		/*
881 		 * This RELIES on the information that, in the call body,
882 		 * the version number field is the fifth field from the
883 		 * beginning of the RPC header. MUST be changed if the
884 		 * call_struct is changed
885 		 */
886 		*(uint32_t *)info =
887 		    ntohl(*(uint32_t *)(void *)(cu->cu_mcallc +
888 		    4 * BYTES_PER_XDR_UNIT));
889 		break;
890 
891 	case CLSET_VERS:
892 		*(uint32_t *)(void *)(cu->cu_mcallc + 4 * BYTES_PER_XDR_UNIT)
893 			= htonl(*(uint32_t *)info);
894 		break;
895 
896 	case CLGET_PROG:
897 		/*
898 		 * This RELIES on the information that, in the call body,
899 		 * the program number field is the fourth field from the
900 		 * beginning of the RPC header. MUST be changed if the
901 		 * call_struct is changed
902 		 */
903 		*(uint32_t *)info =
904 		    ntohl(*(uint32_t *)(void *)(cu->cu_mcallc +
905 		    3 * BYTES_PER_XDR_UNIT));
906 		break;
907 
908 	case CLSET_PROG:
909 		*(uint32_t *)(void *)(cu->cu_mcallc + 3 * BYTES_PER_XDR_UNIT)
910 			= htonl(*(uint32_t *)info);
911 		break;
912 	case CLSET_ASYNC:
913 		cu->cu_async = *(int *)info;
914 		break;
915 	case CLSET_CONNECT:
916 		cu->cu_connect = *(int *)info;
917 		break;
918 	case CLSET_WAITCHAN:
919 		cu->cu_waitchan = (const char *)info;
920 		break;
921 	case CLGET_WAITCHAN:
922 		*(const char **) info = cu->cu_waitchan;
923 		break;
924 	case CLSET_INTERRUPTIBLE:
925 		if (*(int *) info)
926 			cu->cu_waitflag = PCATCH;
927 		else
928 			cu->cu_waitflag = 0;
929 		break;
930 	case CLGET_INTERRUPTIBLE:
931 		if (cu->cu_waitflag)
932 			*(int *) info = TRUE;
933 		else
934 			*(int *) info = FALSE;
935 		break;
936 	default:
937 		mtx_unlock(&cs->cs_lock);
938 		return (FALSE);
939 	}
940 	mtx_unlock(&cs->cs_lock);
941 	return (TRUE);
942 }
943 
944 static void
clnt_dg_close(CLIENT * cl)945 clnt_dg_close(CLIENT *cl)
946 {
947 	struct cu_data *cu = (struct cu_data *)cl->cl_private;
948 	struct cu_socket *cs;
949 	struct cu_request *cr;
950 
951 	cs = cu->cu_socket->so_rcv.sb_upcallarg;
952 	mtx_lock(&cs->cs_lock);
953 
954 	if (cu->cu_closed) {
955 		mtx_unlock(&cs->cs_lock);
956 		return;
957 	}
958 
959 	if (cu->cu_closing) {
960 		while (cu->cu_closing)
961 			msleep(cu, &cs->cs_lock, 0, "rpcclose", 0);
962 		KASSERT(cu->cu_closed, ("client should be closed"));
963 		mtx_unlock(&cs->cs_lock);
964 		return;
965 	}
966 
967 	/*
968 	 * Abort any pending requests and wait until everyone
969 	 * has finished with clnt_vc_call.
970 	 */
971 	cu->cu_closing = TRUE;
972 	TAILQ_FOREACH(cr, &cs->cs_pending, cr_link) {
973 		if (cr->cr_client == cl) {
974 			cr->cr_xid = 0;
975 			cr->cr_error = ESHUTDOWN;
976 			wakeup(cr);
977 		}
978 	}
979 
980 	while (cu->cu_threads)
981 		msleep(cu, &cs->cs_lock, 0, "rpcclose", 0);
982 
983 	cu->cu_closing = FALSE;
984 	cu->cu_closed = TRUE;
985 
986 	mtx_unlock(&cs->cs_lock);
987 	wakeup(cu);
988 }
989 
990 static void
clnt_dg_destroy(CLIENT * cl)991 clnt_dg_destroy(CLIENT *cl)
992 {
993 	struct cu_data *cu = (struct cu_data *)cl->cl_private;
994 	struct cu_socket *cs;
995 	struct socket *so = NULL;
996 	bool_t lastsocketref;
997 
998 	cs = cu->cu_socket->so_rcv.sb_upcallarg;
999 	clnt_dg_close(cl);
1000 
1001 	SOCK_RECVBUF_LOCK(cu->cu_socket);
1002 	mtx_lock(&cs->cs_lock);
1003 
1004 	cs->cs_refs--;
1005 	if (cs->cs_refs == 0) {
1006 		mtx_unlock(&cs->cs_lock);
1007 		soupcall_clear(cu->cu_socket, SO_RCV);
1008 		clnt_dg_upcallsdone(cu->cu_socket, cs);
1009 		SOCK_RECVBUF_UNLOCK(cu->cu_socket);
1010 		mtx_destroy(&cs->cs_lock);
1011 		mem_free(cs, sizeof(*cs));
1012 		lastsocketref = TRUE;
1013 	} else {
1014 		mtx_unlock(&cs->cs_lock);
1015 		SOCK_RECVBUF_UNLOCK(cu->cu_socket);
1016 		lastsocketref = FALSE;
1017 	}
1018 
1019 	if (cu->cu_closeit && lastsocketref) {
1020 		so = cu->cu_socket;
1021 		cu->cu_socket = NULL;
1022 	}
1023 
1024 	if (so)
1025 		soclose(so);
1026 
1027 	if (cl->cl_netid && cl->cl_netid[0])
1028 		mem_free(cl->cl_netid, strlen(cl->cl_netid) +1);
1029 	if (cl->cl_tp && cl->cl_tp[0])
1030 		mem_free(cl->cl_tp, strlen(cl->cl_tp) +1);
1031 	mem_free(cu, sizeof (*cu));
1032 	mem_free(cl, sizeof (CLIENT));
1033 }
1034 
1035 /*
1036  * Make sure that the time is not garbage.  -1 value is allowed.
1037  */
1038 static bool_t
time_not_ok(struct timeval * t)1039 time_not_ok(struct timeval *t)
1040 {
1041 	return (t->tv_sec < -1 || t->tv_sec > 100000000 ||
1042 		t->tv_usec < -1 || t->tv_usec > 1000000);
1043 }
1044 
1045 int
clnt_dg_soupcall(struct socket * so,void * arg,int waitflag)1046 clnt_dg_soupcall(struct socket *so, void *arg, int waitflag)
1047 {
1048 	struct cu_socket *cs = (struct cu_socket *) arg;
1049 	struct uio uio;
1050 	struct mbuf *m;
1051 	struct mbuf *control;
1052 	struct cu_request *cr;
1053 	int error, rcvflag, foundreq;
1054 	uint32_t xid;
1055 
1056 	cs->cs_upcallrefs++;
1057 	uio.uio_resid = 1000000000;
1058 	uio.uio_td = curthread;
1059 	do {
1060 		SOCK_RECVBUF_UNLOCK(so);
1061 		m = NULL;
1062 		control = NULL;
1063 		rcvflag = MSG_DONTWAIT;
1064 		error = soreceive(so, NULL, &uio, &m, &control, &rcvflag);
1065 		if (control)
1066 			m_freem(control);
1067 		SOCK_RECVBUF_LOCK(so);
1068 
1069 		if (error == EWOULDBLOCK)
1070 			break;
1071 
1072 		/*
1073 		 * If there was an error, wake up all pending
1074 		 * requests.
1075 		 */
1076 		if (error) {
1077 			mtx_lock(&cs->cs_lock);
1078 			TAILQ_FOREACH(cr, &cs->cs_pending, cr_link) {
1079 				cr->cr_xid = 0;
1080 				cr->cr_error = error;
1081 				wakeup(cr);
1082 			}
1083 			mtx_unlock(&cs->cs_lock);
1084 			break;
1085 		}
1086 
1087 		/*
1088 		 * The XID is in the first uint32_t of the reply.
1089 		 */
1090 		if (m->m_len < sizeof(xid) && m_length(m, NULL) < sizeof(xid)) {
1091 			/*
1092 			 * Should never happen.
1093 			 */
1094 			m_freem(m);
1095 			continue;
1096 		}
1097 
1098 		m_copydata(m, 0, sizeof(xid), (char *)&xid);
1099 		xid = ntohl(xid);
1100 
1101 		/*
1102 		 * Attempt to match this reply with a pending request.
1103 		 */
1104 		mtx_lock(&cs->cs_lock);
1105 		foundreq = 0;
1106 		TAILQ_FOREACH(cr, &cs->cs_pending, cr_link) {
1107 			if (cr->cr_xid == xid) {
1108 				/*
1109 				 * This one matches. We leave the
1110 				 * reply mbuf in cr->cr_mrep. Set the
1111 				 * XID to zero so that we will ignore
1112 				 * any duplicated replies that arrive
1113 				 * before clnt_dg_call removes it from
1114 				 * the queue.
1115 				 */
1116 				cr->cr_xid = 0;
1117 				cr->cr_mrep = m;
1118 				cr->cr_error = 0;
1119 				foundreq = 1;
1120 				wakeup(cr);
1121 				break;
1122 			}
1123 		}
1124 		mtx_unlock(&cs->cs_lock);
1125 
1126 		/*
1127 		 * If we didn't find the matching request, just drop
1128 		 * it - its probably a repeated reply.
1129 		 */
1130 		if (!foundreq)
1131 			m_freem(m);
1132 	} while (m);
1133 	cs->cs_upcallrefs--;
1134 	if (cs->cs_upcallrefs < 0)
1135 		panic("rpcdg upcall refcnt");
1136 	if (cs->cs_upcallrefs == 0)
1137 		wakeup(&cs->cs_upcallrefs);
1138 	return (SU_OK);
1139 }
1140 
1141 /*
1142  * Wait for all upcalls in progress to complete.
1143  */
1144 static void
clnt_dg_upcallsdone(struct socket * so,struct cu_socket * cs)1145 clnt_dg_upcallsdone(struct socket *so, struct cu_socket *cs)
1146 {
1147 
1148 	SOCK_RECVBUF_LOCK_ASSERT(so);
1149 
1150 	while (cs->cs_upcallrefs > 0)
1151 		(void) msleep(&cs->cs_upcallrefs, SOCKBUF_MTX(&so->so_rcv), 0,
1152 		    "rpcdgup", 0);
1153 }
1154