xref: /freebsd/sys/rpc/svc_vc.c (revision bc5304a006238115291e7568583632889dffbab9)
1 /*	$NetBSD: svc_vc.c,v 1.7 2000/08/03 00:01:53 fvdl Exp $	*/
2 
3 /*-
4  * SPDX-License-Identifier: BSD-3-Clause
5  *
6  * Copyright (c) 2009, Sun Microsystems, Inc.
7  * All rights reserved.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions are met:
11  * - Redistributions of source code must retain the above copyright notice,
12  *   this list of conditions and the following disclaimer.
13  * - Redistributions in binary form must reproduce the above copyright notice,
14  *   this list of conditions and the following disclaimer in the documentation
15  *   and/or other materials provided with the distribution.
16  * - Neither the name of Sun Microsystems, Inc. nor the names of its
17  *   contributors may be used to endorse or promote products derived
18  *   from this software without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
24  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30  * POSSIBILITY OF SUCH DAMAGE.
31  */
32 
33 #if defined(LIBC_SCCS) && !defined(lint)
34 static char *sccsid2 = "@(#)svc_tcp.c 1.21 87/08/11 Copyr 1984 Sun Micro";
35 static char *sccsid = "@(#)svc_tcp.c	2.2 88/08/01 4.0 RPCSRC";
36 #endif
37 #include <sys/cdefs.h>
38 __FBSDID("$FreeBSD$");
39 
40 /*
41  * svc_vc.c, Server side for Connection Oriented based RPC.
42  *
43  * Actually implements two flavors of transporter -
44  * a tcp rendezvouser (a listner and connection establisher)
45  * and a record/tcp stream.
46  */
47 
48 #include "opt_kern_tls.h"
49 
50 #include <sys/param.h>
51 #include <sys/limits.h>
52 #include <sys/lock.h>
53 #include <sys/kernel.h>
54 #include <sys/ktls.h>
55 #include <sys/malloc.h>
56 #include <sys/mbuf.h>
57 #include <sys/mutex.h>
58 #include <sys/proc.h>
59 #include <sys/protosw.h>
60 #include <sys/queue.h>
61 #include <sys/socket.h>
62 #include <sys/socketvar.h>
63 #include <sys/sx.h>
64 #include <sys/systm.h>
65 #include <sys/uio.h>
66 
67 #include <net/vnet.h>
68 
69 #include <netinet/tcp.h>
70 
71 #include <rpc/rpc.h>
72 #include <rpc/rpcsec_tls.h>
73 
74 #include <rpc/krpc.h>
75 #include <rpc/rpc_com.h>
76 
77 #include <security/mac/mac_framework.h>
78 
79 static bool_t svc_vc_rendezvous_recv(SVCXPRT *, struct rpc_msg *,
80     struct sockaddr **, struct mbuf **);
81 static enum xprt_stat svc_vc_rendezvous_stat(SVCXPRT *);
82 static void svc_vc_rendezvous_destroy(SVCXPRT *);
83 static bool_t svc_vc_null(void);
84 static void svc_vc_destroy(SVCXPRT *);
85 static enum xprt_stat svc_vc_stat(SVCXPRT *);
86 static bool_t svc_vc_ack(SVCXPRT *, uint32_t *);
87 static bool_t svc_vc_recv(SVCXPRT *, struct rpc_msg *,
88     struct sockaddr **, struct mbuf **);
89 static bool_t svc_vc_reply(SVCXPRT *, struct rpc_msg *,
90     struct sockaddr *, struct mbuf *, uint32_t *seq);
91 static bool_t svc_vc_control(SVCXPRT *xprt, const u_int rq, void *in);
92 static bool_t svc_vc_rendezvous_control (SVCXPRT *xprt, const u_int rq,
93     void *in);
94 static void svc_vc_backchannel_destroy(SVCXPRT *);
95 static enum xprt_stat svc_vc_backchannel_stat(SVCXPRT *);
96 static bool_t svc_vc_backchannel_recv(SVCXPRT *, struct rpc_msg *,
97     struct sockaddr **, struct mbuf **);
98 static bool_t svc_vc_backchannel_reply(SVCXPRT *, struct rpc_msg *,
99     struct sockaddr *, struct mbuf *, uint32_t *);
100 static bool_t svc_vc_backchannel_control(SVCXPRT *xprt, const u_int rq,
101     void *in);
102 static SVCXPRT *svc_vc_create_conn(SVCPOOL *pool, struct socket *so,
103     struct sockaddr *raddr);
104 static int svc_vc_accept(struct socket *head, struct socket **sop);
105 static int svc_vc_soupcall(struct socket *so, void *arg, int waitflag);
106 static int svc_vc_rendezvous_soupcall(struct socket *, void *, int);
107 
108 static const struct xp_ops svc_vc_rendezvous_ops = {
109 	.xp_recv =	svc_vc_rendezvous_recv,
110 	.xp_stat =	svc_vc_rendezvous_stat,
111 	.xp_reply =	(bool_t (*)(SVCXPRT *, struct rpc_msg *,
112 		struct sockaddr *, struct mbuf *, uint32_t *))svc_vc_null,
113 	.xp_destroy =	svc_vc_rendezvous_destroy,
114 	.xp_control =	svc_vc_rendezvous_control
115 };
116 
117 static const struct xp_ops svc_vc_ops = {
118 	.xp_recv =	svc_vc_recv,
119 	.xp_stat =	svc_vc_stat,
120 	.xp_ack =	svc_vc_ack,
121 	.xp_reply =	svc_vc_reply,
122 	.xp_destroy =	svc_vc_destroy,
123 	.xp_control =	svc_vc_control
124 };
125 
126 static const struct xp_ops svc_vc_backchannel_ops = {
127 	.xp_recv =	svc_vc_backchannel_recv,
128 	.xp_stat =	svc_vc_backchannel_stat,
129 	.xp_reply =	svc_vc_backchannel_reply,
130 	.xp_destroy =	svc_vc_backchannel_destroy,
131 	.xp_control =	svc_vc_backchannel_control
132 };
133 
134 /*
135  * Usage:
136  *	xprt = svc_vc_create(sock, send_buf_size, recv_buf_size);
137  *
138  * Creates, registers, and returns a (rpc) tcp based transporter.
139  * Once *xprt is initialized, it is registered as a transporter
140  * see (svc.h, xprt_register).  This routine returns
141  * a NULL if a problem occurred.
142  *
143  * The filedescriptor passed in is expected to refer to a bound, but
144  * not yet connected socket.
145  *
146  * Since streams do buffered io similar to stdio, the caller can specify
147  * how big the send and receive buffers are via the second and third parms;
148  * 0 => use the system default.
149  */
150 SVCXPRT *
151 svc_vc_create(SVCPOOL *pool, struct socket *so, size_t sendsize,
152     size_t recvsize)
153 {
154 	SVCXPRT *xprt;
155 	struct sockaddr* sa;
156 	int error;
157 
158 	SOCK_LOCK(so);
159 	if (so->so_state & (SS_ISCONNECTED|SS_ISDISCONNECTED)) {
160 		SOCK_UNLOCK(so);
161 		CURVNET_SET(so->so_vnet);
162 		error = so->so_proto->pr_usrreqs->pru_peeraddr(so, &sa);
163 		CURVNET_RESTORE();
164 		if (error)
165 			return (NULL);
166 		xprt = svc_vc_create_conn(pool, so, sa);
167 		free(sa, M_SONAME);
168 		return (xprt);
169 	}
170 	SOCK_UNLOCK(so);
171 
172 	xprt = svc_xprt_alloc();
173 	sx_init(&xprt->xp_lock, "xprt->xp_lock");
174 	xprt->xp_pool = pool;
175 	xprt->xp_socket = so;
176 	xprt->xp_p1 = NULL;
177 	xprt->xp_p2 = NULL;
178 	xprt->xp_ops = &svc_vc_rendezvous_ops;
179 
180 	CURVNET_SET(so->so_vnet);
181 	error = so->so_proto->pr_usrreqs->pru_sockaddr(so, &sa);
182 	CURVNET_RESTORE();
183 	if (error) {
184 		goto cleanup_svc_vc_create;
185 	}
186 
187 	memcpy(&xprt->xp_ltaddr, sa, sa->sa_len);
188 	free(sa, M_SONAME);
189 
190 	xprt_register(xprt);
191 
192 	solisten(so, -1, curthread);
193 
194 	SOLISTEN_LOCK(so);
195 	xprt->xp_upcallset = 1;
196 	solisten_upcall_set(so, svc_vc_rendezvous_soupcall, xprt);
197 	SOLISTEN_UNLOCK(so);
198 
199 	return (xprt);
200 
201 cleanup_svc_vc_create:
202 	sx_destroy(&xprt->xp_lock);
203 	svc_xprt_free(xprt);
204 
205 	return (NULL);
206 }
207 
208 /*
209  * Create a new transport for a socket optained via soaccept().
210  */
211 SVCXPRT *
212 svc_vc_create_conn(SVCPOOL *pool, struct socket *so, struct sockaddr *raddr)
213 {
214 	SVCXPRT *xprt;
215 	struct cf_conn *cd;
216 	struct sockaddr* sa = NULL;
217 	struct sockopt opt;
218 	int one = 1;
219 	int error;
220 
221 	bzero(&opt, sizeof(struct sockopt));
222 	opt.sopt_dir = SOPT_SET;
223 	opt.sopt_level = SOL_SOCKET;
224 	opt.sopt_name = SO_KEEPALIVE;
225 	opt.sopt_val = &one;
226 	opt.sopt_valsize = sizeof(one);
227 	error = sosetopt(so, &opt);
228 	if (error) {
229 		return (NULL);
230 	}
231 
232 	if (so->so_proto->pr_protocol == IPPROTO_TCP) {
233 		bzero(&opt, sizeof(struct sockopt));
234 		opt.sopt_dir = SOPT_SET;
235 		opt.sopt_level = IPPROTO_TCP;
236 		opt.sopt_name = TCP_NODELAY;
237 		opt.sopt_val = &one;
238 		opt.sopt_valsize = sizeof(one);
239 		error = sosetopt(so, &opt);
240 		if (error) {
241 			return (NULL);
242 		}
243 	}
244 
245 	cd = mem_alloc(sizeof(*cd));
246 	cd->strm_stat = XPRT_IDLE;
247 
248 	xprt = svc_xprt_alloc();
249 	sx_init(&xprt->xp_lock, "xprt->xp_lock");
250 	xprt->xp_pool = pool;
251 	xprt->xp_socket = so;
252 	xprt->xp_p1 = cd;
253 	xprt->xp_p2 = NULL;
254 	xprt->xp_ops = &svc_vc_ops;
255 
256 	/*
257 	 * See http://www.connectathon.org/talks96/nfstcp.pdf - client
258 	 * has a 5 minute timer, server has a 6 minute timer.
259 	 */
260 	xprt->xp_idletimeout = 6 * 60;
261 
262 	memcpy(&xprt->xp_rtaddr, raddr, raddr->sa_len);
263 
264 	CURVNET_SET(so->so_vnet);
265 	error = so->so_proto->pr_usrreqs->pru_sockaddr(so, &sa);
266 	CURVNET_RESTORE();
267 	if (error)
268 		goto cleanup_svc_vc_create;
269 
270 	memcpy(&xprt->xp_ltaddr, sa, sa->sa_len);
271 	free(sa, M_SONAME);
272 
273 	xprt_register(xprt);
274 
275 	SOCKBUF_LOCK(&so->so_rcv);
276 	xprt->xp_upcallset = 1;
277 	soupcall_set(so, SO_RCV, svc_vc_soupcall, xprt);
278 	SOCKBUF_UNLOCK(&so->so_rcv);
279 
280 	/*
281 	 * Throw the transport into the active list in case it already
282 	 * has some data buffered.
283 	 */
284 	sx_xlock(&xprt->xp_lock);
285 	xprt_active(xprt);
286 	sx_xunlock(&xprt->xp_lock);
287 
288 	return (xprt);
289 cleanup_svc_vc_create:
290 	sx_destroy(&xprt->xp_lock);
291 	svc_xprt_free(xprt);
292 	mem_free(cd, sizeof(*cd));
293 
294 	return (NULL);
295 }
296 
297 /*
298  * Create a new transport for a backchannel on a clnt_vc socket.
299  */
300 SVCXPRT *
301 svc_vc_create_backchannel(SVCPOOL *pool)
302 {
303 	SVCXPRT *xprt = NULL;
304 	struct cf_conn *cd = NULL;
305 
306 	cd = mem_alloc(sizeof(*cd));
307 	cd->strm_stat = XPRT_IDLE;
308 
309 	xprt = svc_xprt_alloc();
310 	sx_init(&xprt->xp_lock, "xprt->xp_lock");
311 	xprt->xp_pool = pool;
312 	xprt->xp_socket = NULL;
313 	xprt->xp_p1 = cd;
314 	xprt->xp_p2 = NULL;
315 	xprt->xp_ops = &svc_vc_backchannel_ops;
316 	return (xprt);
317 }
318 
319 /*
320  * This does all of the accept except the final call to soaccept. The
321  * caller will call soaccept after dropping its locks (soaccept may
322  * call malloc).
323  */
324 int
325 svc_vc_accept(struct socket *head, struct socket **sop)
326 {
327 	struct socket *so;
328 	int error = 0;
329 	short nbio;
330 
331 	/* XXXGL: shouldn't that be an assertion? */
332 	if (!SOLISTENING(head)) {
333 		error = EINVAL;
334 		goto done;
335 	}
336 #ifdef MAC
337 	error = mac_socket_check_accept(curthread->td_ucred, head);
338 	if (error != 0)
339 		goto done;
340 #endif
341 	/*
342 	 * XXXGL: we want non-blocking semantics.  The socket could be a
343 	 * socket created by kernel as well as socket shared with userland,
344 	 * so we can't be sure about presense of SS_NBIO.  We also shall not
345 	 * toggle it on the socket, since that may surprise userland.  So we
346 	 * set SS_NBIO only temporarily.
347 	 */
348 	SOLISTEN_LOCK(head);
349 	nbio = head->so_state & SS_NBIO;
350 	head->so_state |= SS_NBIO;
351 	error = solisten_dequeue(head, &so, 0);
352 	head->so_state &= (nbio & ~SS_NBIO);
353 	if (error)
354 		goto done;
355 
356 	so->so_state |= nbio;
357 	*sop = so;
358 
359 	/* connection has been removed from the listen queue */
360 	KNOTE_UNLOCKED(&head->so_rdsel.si_note, 0);
361 done:
362 	return (error);
363 }
364 
365 /*ARGSUSED*/
366 static bool_t
367 svc_vc_rendezvous_recv(SVCXPRT *xprt, struct rpc_msg *msg,
368     struct sockaddr **addrp, struct mbuf **mp)
369 {
370 	struct socket *so = NULL;
371 	struct sockaddr *sa = NULL;
372 	int error;
373 	SVCXPRT *new_xprt;
374 
375 	/*
376 	 * The socket upcall calls xprt_active() which will eventually
377 	 * cause the server to call us here. We attempt to accept a
378 	 * connection from the socket and turn it into a new
379 	 * transport. If the accept fails, we have drained all pending
380 	 * connections so we call xprt_inactive().
381 	 */
382 	sx_xlock(&xprt->xp_lock);
383 
384 	error = svc_vc_accept(xprt->xp_socket, &so);
385 
386 	if (error == EWOULDBLOCK) {
387 		/*
388 		 * We must re-test for new connections after taking
389 		 * the lock to protect us in the case where a new
390 		 * connection arrives after our call to accept fails
391 		 * with EWOULDBLOCK.
392 		 */
393 		SOLISTEN_LOCK(xprt->xp_socket);
394 		if (TAILQ_EMPTY(&xprt->xp_socket->sol_comp))
395 			xprt_inactive_self(xprt);
396 		SOLISTEN_UNLOCK(xprt->xp_socket);
397 		sx_xunlock(&xprt->xp_lock);
398 		return (FALSE);
399 	}
400 
401 	if (error) {
402 		SOLISTEN_LOCK(xprt->xp_socket);
403 		if (xprt->xp_upcallset) {
404 			xprt->xp_upcallset = 0;
405 			soupcall_clear(xprt->xp_socket, SO_RCV);
406 		}
407 		SOLISTEN_UNLOCK(xprt->xp_socket);
408 		xprt_inactive_self(xprt);
409 		sx_xunlock(&xprt->xp_lock);
410 		return (FALSE);
411 	}
412 
413 	sx_xunlock(&xprt->xp_lock);
414 
415 	sa = NULL;
416 	error = soaccept(so, &sa);
417 
418 	if (error) {
419 		/*
420 		 * XXX not sure if I need to call sofree or soclose here.
421 		 */
422 		if (sa)
423 			free(sa, M_SONAME);
424 		return (FALSE);
425 	}
426 
427 	/*
428 	 * svc_vc_create_conn will call xprt_register - we don't need
429 	 * to do anything with the new connection except derefence it.
430 	 */
431 	new_xprt = svc_vc_create_conn(xprt->xp_pool, so, sa);
432 	if (!new_xprt) {
433 		soclose(so);
434 	} else {
435 		SVC_RELEASE(new_xprt);
436 	}
437 
438 	free(sa, M_SONAME);
439 
440 	return (FALSE); /* there is never an rpc msg to be processed */
441 }
442 
443 /*ARGSUSED*/
444 static enum xprt_stat
445 svc_vc_rendezvous_stat(SVCXPRT *xprt)
446 {
447 
448 	return (XPRT_IDLE);
449 }
450 
451 static void
452 svc_vc_destroy_common(SVCXPRT *xprt)
453 {
454 	enum clnt_stat stat;
455 	uint32_t reterr;
456 
457 	if (xprt->xp_socket) {
458 		if ((xprt->xp_tls & (RPCTLS_FLAGS_HANDSHAKE |
459 		    RPCTLS_FLAGS_HANDSHFAIL)) != 0) {
460 			if ((xprt->xp_tls & RPCTLS_FLAGS_HANDSHAKE) != 0) {
461 				/*
462 				 * If the upcall fails, the socket has
463 				 * probably been closed via the rpctlssd
464 				 * daemon having crashed or been
465 				 * restarted, so just ignore returned stat.
466 				 */
467 				stat = rpctls_srv_disconnect(xprt->xp_sslsec,
468 				    xprt->xp_sslusec, xprt->xp_sslrefno,
469 				    &reterr);
470 			}
471 			/* Must sorele() to get rid of reference. */
472 			CURVNET_SET(xprt->xp_socket->so_vnet);
473 			SOCK_LOCK(xprt->xp_socket);
474 			sorele(xprt->xp_socket);
475 			CURVNET_RESTORE();
476 		} else
477 			(void)soclose(xprt->xp_socket);
478 	}
479 
480 	if (xprt->xp_netid)
481 		(void) mem_free(xprt->xp_netid, strlen(xprt->xp_netid) + 1);
482 	svc_xprt_free(xprt);
483 }
484 
485 static void
486 svc_vc_rendezvous_destroy(SVCXPRT *xprt)
487 {
488 
489 	SOLISTEN_LOCK(xprt->xp_socket);
490 	if (xprt->xp_upcallset) {
491 		xprt->xp_upcallset = 0;
492 		solisten_upcall_set(xprt->xp_socket, NULL, NULL);
493 	}
494 	SOLISTEN_UNLOCK(xprt->xp_socket);
495 
496 	svc_vc_destroy_common(xprt);
497 }
498 
499 static void
500 svc_vc_destroy(SVCXPRT *xprt)
501 {
502 	struct cf_conn *cd = (struct cf_conn *)xprt->xp_p1;
503 	CLIENT *cl = (CLIENT *)xprt->xp_p2;
504 
505 	SOCKBUF_LOCK(&xprt->xp_socket->so_rcv);
506 	if (xprt->xp_upcallset) {
507 		xprt->xp_upcallset = 0;
508 		if (xprt->xp_socket->so_rcv.sb_upcall != NULL)
509 			soupcall_clear(xprt->xp_socket, SO_RCV);
510 	}
511 	SOCKBUF_UNLOCK(&xprt->xp_socket->so_rcv);
512 
513 	if (cl != NULL)
514 		CLNT_RELEASE(cl);
515 
516 	svc_vc_destroy_common(xprt);
517 
518 	if (cd->mreq)
519 		m_freem(cd->mreq);
520 	if (cd->mpending)
521 		m_freem(cd->mpending);
522 	mem_free(cd, sizeof(*cd));
523 }
524 
525 static void
526 svc_vc_backchannel_destroy(SVCXPRT *xprt)
527 {
528 	struct cf_conn *cd = (struct cf_conn *)xprt->xp_p1;
529 	struct mbuf *m, *m2;
530 
531 	svc_xprt_free(xprt);
532 	m = cd->mreq;
533 	while (m != NULL) {
534 		m2 = m;
535 		m = m->m_nextpkt;
536 		m_freem(m2);
537 	}
538 	mem_free(cd, sizeof(*cd));
539 }
540 
541 /*ARGSUSED*/
542 static bool_t
543 svc_vc_control(SVCXPRT *xprt, const u_int rq, void *in)
544 {
545 	return (FALSE);
546 }
547 
548 static bool_t
549 svc_vc_rendezvous_control(SVCXPRT *xprt, const u_int rq, void *in)
550 {
551 
552 	return (FALSE);
553 }
554 
555 static bool_t
556 svc_vc_backchannel_control(SVCXPRT *xprt, const u_int rq, void *in)
557 {
558 
559 	return (FALSE);
560 }
561 
562 static enum xprt_stat
563 svc_vc_stat(SVCXPRT *xprt)
564 {
565 	struct cf_conn *cd;
566 
567 	cd = (struct cf_conn *)(xprt->xp_p1);
568 
569 	if (cd->strm_stat == XPRT_DIED)
570 		return (XPRT_DIED);
571 
572 	if (cd->mreq != NULL && cd->resid == 0 && cd->eor)
573 		return (XPRT_MOREREQS);
574 
575 	if (soreadable(xprt->xp_socket))
576 		return (XPRT_MOREREQS);
577 
578 	return (XPRT_IDLE);
579 }
580 
581 static bool_t
582 svc_vc_ack(SVCXPRT *xprt, uint32_t *ack)
583 {
584 
585 	*ack = atomic_load_acq_32(&xprt->xp_snt_cnt);
586 	*ack -= sbused(&xprt->xp_socket->so_snd);
587 	return (TRUE);
588 }
589 
590 static enum xprt_stat
591 svc_vc_backchannel_stat(SVCXPRT *xprt)
592 {
593 	struct cf_conn *cd;
594 
595 	cd = (struct cf_conn *)(xprt->xp_p1);
596 
597 	if (cd->mreq != NULL)
598 		return (XPRT_MOREREQS);
599 
600 	return (XPRT_IDLE);
601 }
602 
603 /*
604  * If we have an mbuf chain in cd->mpending, try to parse a record from it,
605  * leaving the result in cd->mreq. If we don't have a complete record, leave
606  * the partial result in cd->mreq and try to read more from the socket.
607  */
608 static int
609 svc_vc_process_pending(SVCXPRT *xprt)
610 {
611 	struct cf_conn *cd = (struct cf_conn *) xprt->xp_p1;
612 	struct socket *so = xprt->xp_socket;
613 	struct mbuf *m;
614 
615 	/*
616 	 * If cd->resid is non-zero, we have part of the
617 	 * record already, otherwise we are expecting a record
618 	 * marker.
619 	 */
620 	if (!cd->resid && cd->mpending) {
621 		/*
622 		 * See if there is enough data buffered to
623 		 * make up a record marker. Make sure we can
624 		 * handle the case where the record marker is
625 		 * split across more than one mbuf.
626 		 */
627 		size_t n = 0;
628 		uint32_t header;
629 
630 		m = cd->mpending;
631 		while (n < sizeof(uint32_t) && m) {
632 			n += m->m_len;
633 			m = m->m_next;
634 		}
635 		if (n < sizeof(uint32_t)) {
636 			so->so_rcv.sb_lowat = sizeof(uint32_t) - n;
637 			return (FALSE);
638 		}
639 		m_copydata(cd->mpending, 0, sizeof(header),
640 		    (char *)&header);
641 		header = ntohl(header);
642 		cd->eor = (header & 0x80000000) != 0;
643 		cd->resid = header & 0x7fffffff;
644 		m_adj(cd->mpending, sizeof(uint32_t));
645 	}
646 
647 	/*
648 	 * Start pulling off mbufs from cd->mpending
649 	 * until we either have a complete record or
650 	 * we run out of data. We use m_split to pull
651 	 * data - it will pull as much as possible and
652 	 * split the last mbuf if necessary.
653 	 */
654 	while (cd->mpending && cd->resid) {
655 		m = cd->mpending;
656 		if (cd->mpending->m_next
657 		    || cd->mpending->m_len > cd->resid)
658 			cd->mpending = m_split(cd->mpending,
659 			    cd->resid, M_WAITOK);
660 		else
661 			cd->mpending = NULL;
662 		if (cd->mreq)
663 			m_last(cd->mreq)->m_next = m;
664 		else
665 			cd->mreq = m;
666 		while (m) {
667 			cd->resid -= m->m_len;
668 			m = m->m_next;
669 		}
670 	}
671 
672 	/*
673 	 * Block receive upcalls if we have more data pending,
674 	 * otherwise report our need.
675 	 */
676 	if (cd->mpending)
677 		so->so_rcv.sb_lowat = INT_MAX;
678 	else
679 		so->so_rcv.sb_lowat =
680 		    imax(1, imin(cd->resid, so->so_rcv.sb_hiwat / 2));
681 	return (TRUE);
682 }
683 
684 static bool_t
685 svc_vc_recv(SVCXPRT *xprt, struct rpc_msg *msg,
686     struct sockaddr **addrp, struct mbuf **mp)
687 {
688 	struct cf_conn *cd = (struct cf_conn *) xprt->xp_p1;
689 	struct uio uio;
690 	struct mbuf *m, *ctrl;
691 	struct socket* so = xprt->xp_socket;
692 	XDR xdrs;
693 	int error, rcvflag;
694 	uint32_t reterr, xid_plus_direction[2];
695 	struct cmsghdr *cmsg;
696 	struct tls_get_record tgr;
697 	enum clnt_stat ret;
698 
699 	/*
700 	 * Serialise access to the socket and our own record parsing
701 	 * state.
702 	 */
703 	sx_xlock(&xprt->xp_lock);
704 
705 	for (;;) {
706 		/* If we have no request ready, check pending queue. */
707 		while (cd->mpending &&
708 		    (cd->mreq == NULL || cd->resid != 0 || !cd->eor)) {
709 			if (!svc_vc_process_pending(xprt))
710 				break;
711 		}
712 
713 		/* Process and return complete request in cd->mreq. */
714 		if (cd->mreq != NULL && cd->resid == 0 && cd->eor) {
715 
716 			/*
717 			 * Now, check for a backchannel reply.
718 			 * The XID is in the first uint32_t of the reply
719 			 * and the message direction is the second one.
720 			 */
721 			if ((cd->mreq->m_len >= sizeof(xid_plus_direction) ||
722 			    m_length(cd->mreq, NULL) >=
723 			    sizeof(xid_plus_direction)) &&
724 			    xprt->xp_p2 != NULL) {
725 				m_copydata(cd->mreq, 0,
726 				    sizeof(xid_plus_direction),
727 				    (char *)xid_plus_direction);
728 				xid_plus_direction[0] =
729 				    ntohl(xid_plus_direction[0]);
730 				xid_plus_direction[1] =
731 				    ntohl(xid_plus_direction[1]);
732 				/* Check message direction. */
733 				if (xid_plus_direction[1] == REPLY) {
734 					clnt_bck_svccall(xprt->xp_p2,
735 					    cd->mreq,
736 					    xid_plus_direction[0]);
737 					cd->mreq = NULL;
738 					continue;
739 				}
740 			}
741 
742 			xdrmbuf_create(&xdrs, cd->mreq, XDR_DECODE);
743 			cd->mreq = NULL;
744 
745 			/* Check for next request in a pending queue. */
746 			svc_vc_process_pending(xprt);
747 			if (cd->mreq == NULL || cd->resid != 0) {
748 				SOCKBUF_LOCK(&so->so_rcv);
749 				if (!soreadable(so))
750 					xprt_inactive_self(xprt);
751 				SOCKBUF_UNLOCK(&so->so_rcv);
752 			}
753 
754 			sx_xunlock(&xprt->xp_lock);
755 
756 			if (! xdr_callmsg(&xdrs, msg)) {
757 				XDR_DESTROY(&xdrs);
758 				return (FALSE);
759 			}
760 
761 			*addrp = NULL;
762 			*mp = xdrmbuf_getall(&xdrs);
763 			XDR_DESTROY(&xdrs);
764 
765 			return (TRUE);
766 		}
767 
768 		/*
769 		 * If receiving is disabled so that a TLS handshake can be
770 		 * done by the rpctlssd daemon, return FALSE here.
771 		 */
772 		rcvflag = MSG_DONTWAIT;
773 		if ((xprt->xp_tls & RPCTLS_FLAGS_HANDSHAKE) != 0)
774 			rcvflag |= MSG_TLSAPPDATA;
775 tryagain:
776 		if (xprt->xp_dontrcv) {
777 			sx_xunlock(&xprt->xp_lock);
778 			return (FALSE);
779 		}
780 
781 		/*
782 		 * The socket upcall calls xprt_active() which will eventually
783 		 * cause the server to call us here. We attempt to
784 		 * read as much as possible from the socket and put
785 		 * the result in cd->mpending. If the read fails,
786 		 * we have drained both cd->mpending and the socket so
787 		 * we can call xprt_inactive().
788 		 */
789 		uio.uio_resid = 1000000000;
790 		uio.uio_td = curthread;
791 		ctrl = m = NULL;
792 		error = soreceive(so, NULL, &uio, &m, &ctrl, &rcvflag);
793 
794 		if (error == EWOULDBLOCK) {
795 			/*
796 			 * We must re-test for readability after
797 			 * taking the lock to protect us in the case
798 			 * where a new packet arrives on the socket
799 			 * after our call to soreceive fails with
800 			 * EWOULDBLOCK.
801 			 */
802 			SOCKBUF_LOCK(&so->so_rcv);
803 			if (!soreadable(so))
804 				xprt_inactive_self(xprt);
805 			SOCKBUF_UNLOCK(&so->so_rcv);
806 			sx_xunlock(&xprt->xp_lock);
807 			return (FALSE);
808 		}
809 
810 		/*
811 		 * A return of ENXIO indicates that there is a
812 		 * non-application data record at the head of the
813 		 * socket's receive queue, for TLS connections.
814 		 * This record needs to be handled in userland
815 		 * via an SSL_read() call, so do an upcall to the daemon.
816 		 */
817 		if ((xprt->xp_tls & RPCTLS_FLAGS_HANDSHAKE) != 0 &&
818 		    error == ENXIO) {
819 			/* Disable reception. */
820 			xprt->xp_dontrcv = TRUE;
821 			sx_xunlock(&xprt->xp_lock);
822 			ret = rpctls_srv_handlerecord(xprt->xp_sslsec,
823 			    xprt->xp_sslusec, xprt->xp_sslrefno,
824 			    &reterr);
825 			sx_xlock(&xprt->xp_lock);
826 			xprt->xp_dontrcv = FALSE;
827 			if (ret != RPC_SUCCESS || reterr != RPCTLSERR_OK) {
828 				/*
829 				 * All we can do is soreceive() it and
830 				 * then toss it.
831 				 */
832 				rcvflag = MSG_DONTWAIT;
833 				goto tryagain;
834 			}
835 			sx_xunlock(&xprt->xp_lock);
836 			xprt_active(xprt);   /* Harmless if already active. */
837 			return (FALSE);
838 		}
839 
840 		if (error) {
841 			SOCKBUF_LOCK(&so->so_rcv);
842 			if (xprt->xp_upcallset) {
843 				xprt->xp_upcallset = 0;
844 				soupcall_clear(so, SO_RCV);
845 			}
846 			SOCKBUF_UNLOCK(&so->so_rcv);
847 			xprt_inactive_self(xprt);
848 			cd->strm_stat = XPRT_DIED;
849 			sx_xunlock(&xprt->xp_lock);
850 			return (FALSE);
851 		}
852 
853 		if (!m) {
854 			/*
855 			 * EOF - the other end has closed the socket.
856 			 */
857 			xprt_inactive_self(xprt);
858 			cd->strm_stat = XPRT_DIED;
859 			sx_xunlock(&xprt->xp_lock);
860 			return (FALSE);
861 		}
862 
863 		/* Process any record header(s). */
864 		if (ctrl != NULL) {
865 			cmsg = mtod(ctrl, struct cmsghdr *);
866 			if (cmsg->cmsg_type == TLS_GET_RECORD &&
867 			    cmsg->cmsg_len == CMSG_LEN(sizeof(tgr))) {
868 				memcpy(&tgr, CMSG_DATA(cmsg), sizeof(tgr));
869 				/*
870 				 * This should have been handled by
871 				 * the rpctls_svc_handlerecord()
872 				 * upcall.  If not, all we can do is
873 				 * toss it away.
874 				 */
875 				if (tgr.tls_type != TLS_RLTYPE_APP) {
876 					m_freem(m);
877 					m_free(ctrl);
878 					rcvflag = MSG_DONTWAIT | MSG_TLSAPPDATA;
879 					goto tryagain;
880 				}
881 			}
882 			m_free(ctrl);
883 		}
884 
885 		if (cd->mpending)
886 			m_last(cd->mpending)->m_next = m;
887 		else
888 			cd->mpending = m;
889 	}
890 }
891 
892 static bool_t
893 svc_vc_backchannel_recv(SVCXPRT *xprt, struct rpc_msg *msg,
894     struct sockaddr **addrp, struct mbuf **mp)
895 {
896 	struct cf_conn *cd = (struct cf_conn *) xprt->xp_p1;
897 	struct ct_data *ct;
898 	struct mbuf *m;
899 	XDR xdrs;
900 
901 	sx_xlock(&xprt->xp_lock);
902 	ct = (struct ct_data *)xprt->xp_p2;
903 	if (ct == NULL) {
904 		sx_xunlock(&xprt->xp_lock);
905 		return (FALSE);
906 	}
907 	mtx_lock(&ct->ct_lock);
908 	m = cd->mreq;
909 	if (m == NULL) {
910 		xprt_inactive_self(xprt);
911 		mtx_unlock(&ct->ct_lock);
912 		sx_xunlock(&xprt->xp_lock);
913 		return (FALSE);
914 	}
915 	cd->mreq = m->m_nextpkt;
916 	mtx_unlock(&ct->ct_lock);
917 	sx_xunlock(&xprt->xp_lock);
918 
919 	xdrmbuf_create(&xdrs, m, XDR_DECODE);
920 	if (! xdr_callmsg(&xdrs, msg)) {
921 		XDR_DESTROY(&xdrs);
922 		return (FALSE);
923 	}
924 	*addrp = NULL;
925 	*mp = xdrmbuf_getall(&xdrs);
926 	XDR_DESTROY(&xdrs);
927 	return (TRUE);
928 }
929 
930 static bool_t
931 svc_vc_reply(SVCXPRT *xprt, struct rpc_msg *msg,
932     struct sockaddr *addr, struct mbuf *m, uint32_t *seq)
933 {
934 	XDR xdrs;
935 	struct mbuf *mrep;
936 	bool_t stat = TRUE;
937 	int error, len, maxextsiz;
938 #ifdef KERN_TLS
939 	u_int maxlen;
940 #endif
941 
942 	/*
943 	 * Leave space for record mark.
944 	 */
945 	mrep = m_gethdr(M_WAITOK, MT_DATA);
946 	mrep->m_data += sizeof(uint32_t);
947 
948 	xdrmbuf_create(&xdrs, mrep, XDR_ENCODE);
949 
950 	if (msg->rm_reply.rp_stat == MSG_ACCEPTED &&
951 	    msg->rm_reply.rp_acpt.ar_stat == SUCCESS) {
952 		if (!xdr_replymsg(&xdrs, msg))
953 			stat = FALSE;
954 		else
955 			xdrmbuf_append(&xdrs, m);
956 	} else {
957 		stat = xdr_replymsg(&xdrs, msg);
958 	}
959 
960 	if (stat) {
961 		m_fixhdr(mrep);
962 
963 		/*
964 		 * Prepend a record marker containing the reply length.
965 		 */
966 		M_PREPEND(mrep, sizeof(uint32_t), M_WAITOK);
967 		len = mrep->m_pkthdr.len;
968 		*mtod(mrep, uint32_t *) =
969 			htonl(0x80000000 | (len - sizeof(uint32_t)));
970 
971 		/* For RPC-over-TLS, copy mrep to a chain of ext_pgs. */
972 		if ((xprt->xp_tls & RPCTLS_FLAGS_HANDSHAKE) != 0) {
973 			/*
974 			 * Copy the mbuf chain to a chain of
975 			 * ext_pgs mbuf(s) as required by KERN_TLS.
976 			 */
977 			maxextsiz = TLS_MAX_MSG_SIZE_V10_2;
978 #ifdef KERN_TLS
979 			if (rpctls_getinfo(&maxlen, false, false))
980 				maxextsiz = min(maxextsiz, maxlen);
981 #endif
982 			mrep = _rpc_copym_into_ext_pgs(mrep, maxextsiz);
983 		}
984 		atomic_add_32(&xprt->xp_snd_cnt, len);
985 		/*
986 		 * sosend consumes mreq.
987 		 */
988 		error = sosend(xprt->xp_socket, NULL, NULL, mrep, NULL,
989 		    0, curthread);
990 		if (!error) {
991 			atomic_add_rel_32(&xprt->xp_snt_cnt, len);
992 			if (seq)
993 				*seq = xprt->xp_snd_cnt;
994 			stat = TRUE;
995 		} else
996 			atomic_subtract_32(&xprt->xp_snd_cnt, len);
997 	} else {
998 		m_freem(mrep);
999 	}
1000 
1001 	XDR_DESTROY(&xdrs);
1002 
1003 	return (stat);
1004 }
1005 
1006 static bool_t
1007 svc_vc_backchannel_reply(SVCXPRT *xprt, struct rpc_msg *msg,
1008     struct sockaddr *addr, struct mbuf *m, uint32_t *seq)
1009 {
1010 	struct ct_data *ct;
1011 	XDR xdrs;
1012 	struct mbuf *mrep;
1013 	bool_t stat = TRUE;
1014 	int error, maxextsiz;
1015 #ifdef KERN_TLS
1016 	u_int maxlen;
1017 #endif
1018 
1019 	/*
1020 	 * Leave space for record mark.
1021 	 */
1022 	mrep = m_gethdr(M_WAITOK, MT_DATA);
1023 	mrep->m_data += sizeof(uint32_t);
1024 
1025 	xdrmbuf_create(&xdrs, mrep, XDR_ENCODE);
1026 
1027 	if (msg->rm_reply.rp_stat == MSG_ACCEPTED &&
1028 	    msg->rm_reply.rp_acpt.ar_stat == SUCCESS) {
1029 		if (!xdr_replymsg(&xdrs, msg))
1030 			stat = FALSE;
1031 		else
1032 			xdrmbuf_append(&xdrs, m);
1033 	} else {
1034 		stat = xdr_replymsg(&xdrs, msg);
1035 	}
1036 
1037 	if (stat) {
1038 		m_fixhdr(mrep);
1039 
1040 		/*
1041 		 * Prepend a record marker containing the reply length.
1042 		 */
1043 		M_PREPEND(mrep, sizeof(uint32_t), M_WAITOK);
1044 		*mtod(mrep, uint32_t *) =
1045 			htonl(0x80000000 | (mrep->m_pkthdr.len
1046 				- sizeof(uint32_t)));
1047 
1048 		/* For RPC-over-TLS, copy mrep to a chain of ext_pgs. */
1049 		if ((xprt->xp_tls & RPCTLS_FLAGS_HANDSHAKE) != 0) {
1050 			/*
1051 			 * Copy the mbuf chain to a chain of
1052 			 * ext_pgs mbuf(s) as required by KERN_TLS.
1053 			 */
1054 			maxextsiz = TLS_MAX_MSG_SIZE_V10_2;
1055 #ifdef KERN_TLS
1056 			if (rpctls_getinfo(&maxlen, false, false))
1057 				maxextsiz = min(maxextsiz, maxlen);
1058 #endif
1059 			mrep = _rpc_copym_into_ext_pgs(mrep, maxextsiz);
1060 		}
1061 		sx_xlock(&xprt->xp_lock);
1062 		ct = (struct ct_data *)xprt->xp_p2;
1063 		if (ct != NULL)
1064 			error = sosend(ct->ct_socket, NULL, NULL, mrep, NULL,
1065 			    0, curthread);
1066 		else
1067 			error = EPIPE;
1068 		sx_xunlock(&xprt->xp_lock);
1069 		if (!error) {
1070 			stat = TRUE;
1071 		}
1072 	} else {
1073 		m_freem(mrep);
1074 	}
1075 
1076 	XDR_DESTROY(&xdrs);
1077 
1078 	return (stat);
1079 }
1080 
1081 static bool_t
1082 svc_vc_null()
1083 {
1084 
1085 	return (FALSE);
1086 }
1087 
1088 static int
1089 svc_vc_soupcall(struct socket *so, void *arg, int waitflag)
1090 {
1091 	SVCXPRT *xprt = (SVCXPRT *) arg;
1092 
1093 	if (soreadable(xprt->xp_socket))
1094 		xprt_active(xprt);
1095 	return (SU_OK);
1096 }
1097 
1098 static int
1099 svc_vc_rendezvous_soupcall(struct socket *head, void *arg, int waitflag)
1100 {
1101 	SVCXPRT *xprt = (SVCXPRT *) arg;
1102 
1103 	if (!TAILQ_EMPTY(&head->sol_comp))
1104 		xprt_active(xprt);
1105 	return (SU_OK);
1106 }
1107 
1108 #if 0
1109 /*
1110  * Get the effective UID of the sending process. Used by rpcbind, keyserv
1111  * and rpc.yppasswdd on AF_LOCAL.
1112  */
1113 int
1114 __rpc_get_local_uid(SVCXPRT *transp, uid_t *uid) {
1115 	int sock, ret;
1116 	gid_t egid;
1117 	uid_t euid;
1118 	struct sockaddr *sa;
1119 
1120 	sock = transp->xp_fd;
1121 	sa = (struct sockaddr *)transp->xp_rtaddr;
1122 	if (sa->sa_family == AF_LOCAL) {
1123 		ret = getpeereid(sock, &euid, &egid);
1124 		if (ret == 0)
1125 			*uid = euid;
1126 		return (ret);
1127 	} else
1128 		return (-1);
1129 }
1130 #endif
1131