xref: /freebsd/sys/netinet/tcp_usrreq.c (revision dd0e6bb996dc46e1d91f3d9aef87979716479465)
1 /*-
2  * SPDX-License-Identifier: BSD-3-Clause
3  *
4  * Copyright (c) 1982, 1986, 1988, 1993
5  *	The Regents of the University of California.
6  * Copyright (c) 2006-2007 Robert N. M. Watson
7  * Copyright (c) 2010-2011 Juniper Networks, Inc.
8  * All rights reserved.
9  *
10  * Portions of this software were developed by Robert N. M. Watson under
11  * contract to Juniper Networks, Inc.
12  *
13  * Redistribution and use in source and binary forms, with or without
14  * modification, are permitted provided that the following conditions
15  * are met:
16  * 1. Redistributions of source code must retain the above copyright
17  *    notice, this list of conditions and the following disclaimer.
18  * 2. Redistributions in binary form must reproduce the above copyright
19  *    notice, this list of conditions and the following disclaimer in the
20  *    documentation and/or other materials provided with the distribution.
21  * 3. Neither the name of the University nor the names of its contributors
22  *    may be used to endorse or promote products derived from this software
23  *    without specific prior written permission.
24  *
25  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
26  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
29  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
30  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
31  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
34  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35  * SUCH DAMAGE.
36  */
37 
38 #include <sys/cdefs.h>
39 #include "opt_ddb.h"
40 #include "opt_inet.h"
41 #include "opt_inet6.h"
42 #include "opt_ipsec.h"
43 #include "opt_kern_tls.h"
44 
45 #include <sys/param.h>
46 #include <sys/systm.h>
47 #include <sys/arb.h>
48 #include <sys/limits.h>
49 #include <sys/malloc.h>
50 #include <sys/refcount.h>
51 #include <sys/kernel.h>
52 #include <sys/ktls.h>
53 #include <sys/qmath.h>
54 #include <sys/sysctl.h>
55 #include <sys/mbuf.h>
56 #ifdef INET6
57 #include <sys/domain.h>
58 #endif /* INET6 */
59 #include <sys/socket.h>
60 #include <sys/socketvar.h>
61 #include <sys/protosw.h>
62 #include <sys/proc.h>
63 #include <sys/jail.h>
64 #include <sys/stats.h>
65 
66 #ifdef DDB
67 #include <ddb/ddb.h>
68 #endif
69 
70 #include <net/if.h>
71 #include <net/if_var.h>
72 #include <net/route.h>
73 #include <net/vnet.h>
74 
75 #include <netinet/in.h>
76 #include <netinet/in_kdtrace.h>
77 #include <netinet/in_pcb.h>
78 #include <netinet/in_rss.h>
79 #include <netinet/in_systm.h>
80 #include <netinet/in_var.h>
81 #include <netinet/ip.h>
82 #include <netinet/ip_var.h>
83 #ifdef INET6
84 #include <netinet/ip6.h>
85 #include <netinet6/in6_pcb.h>
86 #include <netinet6/in6_rss.h>
87 #include <netinet6/ip6_var.h>
88 #include <netinet6/scope6_var.h>
89 #endif
90 #include <netinet/tcp.h>
91 #include <netinet/tcp_fsm.h>
92 #include <netinet/tcp_seq.h>
93 #include <netinet/tcp_timer.h>
94 #include <netinet/tcp_var.h>
95 #include <netinet/tcp_log_buf.h>
96 #include <netinet/tcpip.h>
97 #include <netinet/cc/cc.h>
98 #include <netinet/tcp_fastopen.h>
99 #include <netinet/tcp_hpts.h>
100 #ifdef TCP_OFFLOAD
101 #include <netinet/tcp_offload.h>
102 #endif
103 #include <netipsec/ipsec_support.h>
104 
105 #include <vm/vm.h>
106 #include <vm/vm_param.h>
107 #include <vm/pmap.h>
108 #include <vm/vm_extern.h>
109 #include <vm/vm_map.h>
110 #include <vm/vm_page.h>
111 
112 /*
113  * TCP protocol interface to socket abstraction.
114  */
115 #ifdef INET
116 static int	tcp_connect(struct tcpcb *, struct sockaddr_in *,
117 		    struct thread *td);
118 #endif /* INET */
119 #ifdef INET6
120 static int	tcp6_connect(struct tcpcb *, struct sockaddr_in6 *,
121 		    struct thread *td);
122 #endif /* INET6 */
123 static void	tcp_disconnect(struct tcpcb *);
124 static void	tcp_usrclosed(struct tcpcb *);
125 static void	tcp_fill_info(const struct tcpcb *, struct tcp_info *);
126 
127 static int	tcp_pru_options_support(struct tcpcb *tp, int flags);
128 
129 static void
tcp_bblog_pru(struct tcpcb * tp,uint32_t pru,int error)130 tcp_bblog_pru(struct tcpcb *tp, uint32_t pru, int error)
131 {
132 	struct tcp_log_buffer *lgb;
133 
134 	KASSERT(tp != NULL, ("tcp_bblog_pru: tp == NULL"));
135 	INP_WLOCK_ASSERT(tptoinpcb(tp));
136 	if (tcp_bblogging_on(tp)) {
137 		lgb = tcp_log_event(tp, NULL, NULL, NULL, TCP_LOG_PRU, error,
138 		    0, NULL, false, NULL, NULL, 0, NULL);
139 	} else {
140 		lgb = NULL;
141 	}
142 	if (lgb != NULL) {
143 		if (error >= 0) {
144 			lgb->tlb_errno = (uint32_t)error;
145 		}
146 		lgb->tlb_flex1 = pru;
147 	}
148 }
149 
150 /*
151  * TCP attaches to socket via pr_attach(), reserving space,
152  * and an internet control block.
153  */
154 static int
tcp_usr_attach(struct socket * so,int proto,struct thread * td)155 tcp_usr_attach(struct socket *so, int proto, struct thread *td)
156 {
157 	struct inpcb *inp;
158 	struct tcpcb *tp = NULL;
159 	int error;
160 
161 	inp = sotoinpcb(so);
162 	KASSERT(inp == NULL, ("tcp_usr_attach: inp != NULL"));
163 
164 	error = soreserve(so, V_tcp_sendspace, V_tcp_recvspace);
165 	if (error)
166 		goto out;
167 
168 	so->so_rcv.sb_flags |= SB_AUTOSIZE;
169 	so->so_snd.sb_flags |= (SB_AUTOLOWAT | SB_AUTOSIZE);
170 	error = in_pcballoc(so, &V_tcbinfo);
171 	if (error)
172 		goto out;
173 	inp = sotoinpcb(so);
174 	tp = tcp_newtcpcb(inp, NULL);
175 	if (tp == NULL) {
176 		error = ENOBUFS;
177 		in_pcbfree(inp);
178 		goto out;
179 	}
180 	tp->t_state = TCPS_CLOSED;
181 	tcp_bblog_pru(tp, PRU_ATTACH, error);
182 	INP_WUNLOCK(inp);
183 	TCPSTATES_INC(TCPS_CLOSED);
184 out:
185 	TCP_PROBE2(debug__user, tp, PRU_ATTACH);
186 	return (error);
187 }
188 
189 /*
190  * tcp_usr_detach is called when the socket layer loses its final reference
191  * to the socket, be it a file descriptor reference, a reference from TCP,
192  * etc.  At this point, there is only one case in which we will keep around
193  * inpcb state: time wait.
194  */
195 static void
tcp_usr_detach(struct socket * so)196 tcp_usr_detach(struct socket *so)
197 {
198 	struct inpcb *inp;
199 	struct tcpcb *tp;
200 
201 	inp = sotoinpcb(so);
202 	KASSERT(inp != NULL, ("%s: inp == NULL", __func__));
203 	INP_WLOCK(inp);
204 	KASSERT(so->so_pcb == inp && inp->inp_socket == so,
205 		("%s: socket %p inp %p mismatch", __func__, so, inp));
206 
207 	tp = intotcpcb(inp);
208 
209 	KASSERT(inp->inp_flags & INP_DROPPED ||
210 	    tp->t_state < TCPS_SYN_SENT,
211 	    ("%s: inp %p not dropped or embryonic", __func__, inp));
212 
213 	tcp_discardcb(tp);
214 	in_pcbfree(inp);
215 }
216 
217 #ifdef INET
218 /*
219  * Give the socket an address.
220  */
221 static int
tcp_usr_bind(struct socket * so,struct sockaddr * nam,struct thread * td)222 tcp_usr_bind(struct socket *so, struct sockaddr *nam, struct thread *td)
223 {
224 	int error = 0;
225 	struct inpcb *inp;
226 	struct tcpcb *tp;
227 	struct sockaddr_in *sinp;
228 
229 	inp = sotoinpcb(so);
230 	KASSERT(inp != NULL, ("tcp_usr_bind: inp == NULL"));
231 	INP_WLOCK(inp);
232 	if (inp->inp_flags & INP_DROPPED) {
233 		INP_WUNLOCK(inp);
234 		return (EINVAL);
235 	}
236 	tp = intotcpcb(inp);
237 
238 	sinp = (struct sockaddr_in *)nam;
239 	if (nam->sa_family != AF_INET) {
240 		/*
241 		 * Preserve compatibility with old programs.
242 		 */
243 		if (nam->sa_family != AF_UNSPEC ||
244 		    nam->sa_len < offsetof(struct sockaddr_in, sin_zero) ||
245 		    sinp->sin_addr.s_addr != INADDR_ANY) {
246 			error = EAFNOSUPPORT;
247 			goto out;
248 		}
249 		nam->sa_family = AF_INET;
250 	}
251 	if (nam->sa_len != sizeof(*sinp)) {
252 		error = EINVAL;
253 		goto out;
254 	}
255 	/*
256 	 * Must check for multicast addresses and disallow binding
257 	 * to them.
258 	 */
259 	if (IN_MULTICAST(ntohl(sinp->sin_addr.s_addr))) {
260 		error = EAFNOSUPPORT;
261 		goto out;
262 	}
263 	INP_HASH_WLOCK(&V_tcbinfo);
264 	error = in_pcbbind(inp, sinp, V_tcp_bind_all_fibs ? 0 : INPBIND_FIB,
265 	    td->td_ucred);
266 	INP_HASH_WUNLOCK(&V_tcbinfo);
267 out:
268 	tcp_bblog_pru(tp, PRU_BIND, error);
269 	TCP_PROBE2(debug__user, tp, PRU_BIND);
270 	INP_WUNLOCK(inp);
271 
272 	return (error);
273 }
274 #endif /* INET */
275 
276 #ifdef INET6
277 static int
tcp6_usr_bind(struct socket * so,struct sockaddr * nam,struct thread * td)278 tcp6_usr_bind(struct socket *so, struct sockaddr *nam, struct thread *td)
279 {
280 	int error = 0;
281 	struct inpcb *inp;
282 	struct tcpcb *tp;
283 	struct sockaddr_in6 *sin6;
284 	u_char vflagsav;
285 
286 	inp = sotoinpcb(so);
287 	KASSERT(inp != NULL, ("tcp6_usr_bind: inp == NULL"));
288 	INP_WLOCK(inp);
289 	if (inp->inp_flags & INP_DROPPED) {
290 		INP_WUNLOCK(inp);
291 		return (EINVAL);
292 	}
293 	tp = intotcpcb(inp);
294 
295 	vflagsav = inp->inp_vflag;
296 
297 	sin6 = (struct sockaddr_in6 *)nam;
298 	if (nam->sa_family != AF_INET6) {
299 		error = EAFNOSUPPORT;
300 		goto out;
301 	}
302 	if (nam->sa_len != sizeof(*sin6)) {
303 		error = EINVAL;
304 		goto out;
305 	}
306 	/*
307 	 * Must check for multicast addresses and disallow binding
308 	 * to them.
309 	 */
310 	if (IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr)) {
311 		error = EAFNOSUPPORT;
312 		goto out;
313 	}
314 
315 	INP_HASH_WLOCK(&V_tcbinfo);
316 	inp->inp_vflag &= ~INP_IPV4;
317 	inp->inp_vflag |= INP_IPV6;
318 #ifdef INET
319 	if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0) {
320 		if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr))
321 			inp->inp_vflag |= INP_IPV4;
322 		else if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
323 			struct sockaddr_in sin;
324 
325 			in6_sin6_2_sin(&sin, sin6);
326 			if (IN_MULTICAST(ntohl(sin.sin_addr.s_addr))) {
327 				error = EAFNOSUPPORT;
328 				INP_HASH_WUNLOCK(&V_tcbinfo);
329 				goto out;
330 			}
331 			inp->inp_vflag |= INP_IPV4;
332 			inp->inp_vflag &= ~INP_IPV6;
333 			error = in_pcbbind(inp, &sin, 0, td->td_ucred);
334 			INP_HASH_WUNLOCK(&V_tcbinfo);
335 			goto out;
336 		}
337 	}
338 #endif
339 	error = in6_pcbbind(inp, sin6, V_tcp_bind_all_fibs ? 0 : INPBIND_FIB,
340 	    td->td_ucred);
341 	INP_HASH_WUNLOCK(&V_tcbinfo);
342 out:
343 	if (error != 0)
344 		inp->inp_vflag = vflagsav;
345 	tcp_bblog_pru(tp, PRU_BIND, error);
346 	TCP_PROBE2(debug__user, tp, PRU_BIND);
347 	INP_WUNLOCK(inp);
348 	return (error);
349 }
350 #endif /* INET6 */
351 
352 #ifdef INET
353 /*
354  * Prepare to accept connections.
355  */
356 static int
tcp_usr_listen(struct socket * so,int backlog,struct thread * td)357 tcp_usr_listen(struct socket *so, int backlog, struct thread *td)
358 {
359 	struct inpcb *inp;
360 	struct tcpcb *tp;
361 	int error = 0;
362 	bool already_listening;
363 
364 	inp = sotoinpcb(so);
365 	KASSERT(inp != NULL, ("tcp_usr_listen: inp == NULL"));
366 	INP_WLOCK(inp);
367 	if (inp->inp_flags & INP_DROPPED) {
368 		INP_WUNLOCK(inp);
369 		return (EINVAL);
370 	}
371 	tp = intotcpcb(inp);
372 
373 	SOCK_LOCK(so);
374 	already_listening = SOLISTENING(so);
375 	error = solisten_proto_check(so);
376 	if (error != 0) {
377 		SOCK_UNLOCK(so);
378 		goto out;
379 	}
380 	if (inp->inp_lport == 0) {
381 		INP_HASH_WLOCK(&V_tcbinfo);
382 		error = in_pcbbind(inp, NULL,
383 		    V_tcp_bind_all_fibs ? 0 : INPBIND_FIB, td->td_ucred);
384 		INP_HASH_WUNLOCK(&V_tcbinfo);
385 	}
386 	if (error == 0) {
387 		tcp_state_change(tp, TCPS_LISTEN);
388 		solisten_proto(so, backlog);
389 #ifdef TCP_OFFLOAD
390 		if ((so->so_options & SO_NO_OFFLOAD) == 0)
391 			tcp_offload_listen_start(tp);
392 #endif
393 	} else {
394 		solisten_proto_abort(so);
395 	}
396 	SOCK_UNLOCK(so);
397 	if (already_listening)
398 		goto out;
399 
400 	if (error == 0)
401 		in_pcblisten(inp);
402 	if (tp->t_flags & TF_FASTOPEN)
403 		tp->t_tfo_pending = tcp_fastopen_alloc_counter();
404 
405 out:
406 	tcp_bblog_pru(tp, PRU_LISTEN, error);
407 	TCP_PROBE2(debug__user, tp, PRU_LISTEN);
408 	INP_WUNLOCK(inp);
409 	return (error);
410 }
411 #endif /* INET */
412 
413 #ifdef INET6
414 static int
tcp6_usr_listen(struct socket * so,int backlog,struct thread * td)415 tcp6_usr_listen(struct socket *so, int backlog, struct thread *td)
416 {
417 	struct inpcb *inp;
418 	struct tcpcb *tp;
419 	u_char vflagsav;
420 	int error = 0;
421 	bool already_listening;
422 
423 	inp = sotoinpcb(so);
424 	KASSERT(inp != NULL, ("tcp6_usr_listen: inp == NULL"));
425 	INP_WLOCK(inp);
426 	if (inp->inp_flags & INP_DROPPED) {
427 		INP_WUNLOCK(inp);
428 		return (EINVAL);
429 	}
430 	tp = intotcpcb(inp);
431 
432 	vflagsav = inp->inp_vflag;
433 
434 	SOCK_LOCK(so);
435 	already_listening = SOLISTENING(so);
436 	error = solisten_proto_check(so);
437 	if (error != 0) {
438 		SOCK_UNLOCK(so);
439 		goto out;
440 	}
441 	INP_HASH_WLOCK(&V_tcbinfo);
442 	if (inp->inp_lport == 0) {
443 		inp->inp_vflag &= ~INP_IPV4;
444 		if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0)
445 			inp->inp_vflag |= INP_IPV4;
446 		error = in6_pcbbind(inp, NULL,
447 		    V_tcp_bind_all_fibs ? 0 : INPBIND_FIB, td->td_ucred);
448 	}
449 	INP_HASH_WUNLOCK(&V_tcbinfo);
450 	if (error == 0) {
451 		tcp_state_change(tp, TCPS_LISTEN);
452 		solisten_proto(so, backlog);
453 #ifdef TCP_OFFLOAD
454 		if ((so->so_options & SO_NO_OFFLOAD) == 0)
455 			tcp_offload_listen_start(tp);
456 #endif
457 	} else {
458 		solisten_proto_abort(so);
459 	}
460 	SOCK_UNLOCK(so);
461 	if (already_listening)
462 		goto out;
463 
464 	if (error == 0)
465 		in_pcblisten(inp);
466 	if (tp->t_flags & TF_FASTOPEN)
467 		tp->t_tfo_pending = tcp_fastopen_alloc_counter();
468 
469 	if (error != 0)
470 		inp->inp_vflag = vflagsav;
471 
472 out:
473 	tcp_bblog_pru(tp, PRU_LISTEN, error);
474 	TCP_PROBE2(debug__user, tp, PRU_LISTEN);
475 	INP_WUNLOCK(inp);
476 	return (error);
477 }
478 #endif /* INET6 */
479 
480 #ifdef INET
481 /*
482  * Initiate connection to peer.
483  * Create a template for use in transmissions on this connection.
484  * Enter SYN_SENT state, and mark socket as connecting.
485  * Start keep-alive timer, and seed output sequence space.
486  * Send initial segment on connection.
487  */
488 static int
tcp_usr_connect(struct socket * so,struct sockaddr * nam,struct thread * td)489 tcp_usr_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
490 {
491 	struct epoch_tracker et;
492 	int error = 0;
493 	struct inpcb *inp;
494 	struct tcpcb *tp;
495 	struct sockaddr_in *sinp;
496 
497 	inp = sotoinpcb(so);
498 	KASSERT(inp != NULL, ("tcp_usr_connect: inp == NULL"));
499 	INP_WLOCK(inp);
500 	if (inp->inp_flags & INP_DROPPED) {
501 		INP_WUNLOCK(inp);
502 		return (ECONNREFUSED);
503 	}
504 	tp = intotcpcb(inp);
505 
506 	sinp = (struct sockaddr_in *)nam;
507 	if (nam->sa_family != AF_INET) {
508 		error = EAFNOSUPPORT;
509 		goto out;
510 	}
511 	if (nam->sa_len != sizeof (*sinp)) {
512 		error = EINVAL;
513 		goto out;
514 	}
515 	/*
516 	 * Must disallow TCP ``connections'' to multicast addresses.
517 	 */
518 	if (IN_MULTICAST(ntohl(sinp->sin_addr.s_addr))) {
519 		error = EAFNOSUPPORT;
520 		goto out;
521 	}
522 	if (ntohl(sinp->sin_addr.s_addr) == INADDR_BROADCAST) {
523 		error = EACCES;
524 		goto out;
525 	}
526 	if ((error = prison_remote_ip4(td->td_ucred, &sinp->sin_addr)) != 0)
527 		goto out;
528 	if (SOLISTENING(so)) {
529 		error = EOPNOTSUPP;
530 		goto out;
531 	}
532 	NET_EPOCH_ENTER(et);
533 	if ((error = tcp_connect(tp, sinp, td)) != 0)
534 		goto out_in_epoch;
535 #ifdef TCP_OFFLOAD
536 	if (registered_toedevs > 0 &&
537 	    (so->so_options & SO_NO_OFFLOAD) == 0 &&
538 	    (error = tcp_offload_connect(so, nam)) == 0)
539 		goto out_in_epoch;
540 #endif
541 	tcp_timer_activate(tp, TT_KEEP, TP_KEEPINIT(tp));
542 	error = tcp_output(tp);
543 	KASSERT(error >= 0, ("TCP stack %s requested tcp_drop(%p) at connect()"
544 	    ", error code %d", tp->t_fb->tfb_tcp_block_name, tp, -error));
545 out_in_epoch:
546 	NET_EPOCH_EXIT(et);
547 out:
548 	tcp_bblog_pru(tp, PRU_CONNECT, error);
549 	TCP_PROBE2(debug__user, tp, PRU_CONNECT);
550 	INP_WUNLOCK(inp);
551 	return (error);
552 }
553 #endif /* INET */
554 
555 #ifdef INET6
556 static int
tcp6_usr_connect(struct socket * so,struct sockaddr * nam,struct thread * td)557 tcp6_usr_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
558 {
559 	struct epoch_tracker et;
560 	int error = 0;
561 	struct inpcb *inp;
562 	struct tcpcb *tp;
563 	struct sockaddr_in6 *sin6;
564 	u_int8_t incflagsav;
565 	u_char vflagsav;
566 
567 	inp = sotoinpcb(so);
568 	KASSERT(inp != NULL, ("tcp6_usr_connect: inp == NULL"));
569 	INP_WLOCK(inp);
570 	if (inp->inp_flags & INP_DROPPED) {
571 		INP_WUNLOCK(inp);
572 		return (ECONNREFUSED);
573 	}
574 	tp = intotcpcb(inp);
575 
576 	vflagsav = inp->inp_vflag;
577 	incflagsav = inp->inp_inc.inc_flags;
578 
579 	sin6 = (struct sockaddr_in6 *)nam;
580 	if (nam->sa_family != AF_INET6) {
581 		error = EAFNOSUPPORT;
582 		goto out;
583 	}
584 	if (nam->sa_len != sizeof (*sin6)) {
585 		error = EINVAL;
586 		goto out;
587 	}
588 	/*
589 	 * Must disallow TCP ``connections'' to multicast addresses.
590 	 */
591 	if (IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr)) {
592 		error = EAFNOSUPPORT;
593 		goto out;
594 	}
595 	if (SOLISTENING(so)) {
596 		error = EOPNOTSUPP;
597 		goto out;
598 	}
599 #ifdef INET
600 	/*
601 	 * XXXRW: Some confusion: V4/V6 flags relate to binding, and
602 	 * therefore probably require the hash lock, which isn't held here.
603 	 * Is this a significant problem?
604 	 */
605 	if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
606 		struct sockaddr_in sin;
607 
608 		if ((inp->inp_flags & IN6P_IPV6_V6ONLY) != 0) {
609 			error = EINVAL;
610 			goto out;
611 		}
612 		if ((inp->inp_vflag & INP_IPV4) == 0) {
613 			error = EAFNOSUPPORT;
614 			goto out;
615 		}
616 
617 		in6_sin6_2_sin(&sin, sin6);
618 		if (IN_MULTICAST(ntohl(sin.sin_addr.s_addr))) {
619 			error = EAFNOSUPPORT;
620 			goto out;
621 		}
622 		if (ntohl(sin.sin_addr.s_addr) == INADDR_BROADCAST) {
623 			error = EACCES;
624 			goto out;
625 		}
626 		if ((error = prison_remote_ip4(td->td_ucred,
627 		    &sin.sin_addr)) != 0)
628 			goto out;
629 		inp->inp_vflag |= INP_IPV4;
630 		inp->inp_vflag &= ~INP_IPV6;
631 		NET_EPOCH_ENTER(et);
632 		if ((error = tcp_connect(tp, &sin, td)) != 0)
633 			goto out_in_epoch;
634 #ifdef TCP_OFFLOAD
635 		if (registered_toedevs > 0 &&
636 		    (so->so_options & SO_NO_OFFLOAD) == 0 &&
637 		    (error = tcp_offload_connect(so, nam)) == 0)
638 			goto out_in_epoch;
639 #endif
640 		error = tcp_output(tp);
641 		goto out_in_epoch;
642 	} else {
643 		if ((inp->inp_vflag & INP_IPV6) == 0) {
644 			error = EAFNOSUPPORT;
645 			goto out;
646 		}
647 	}
648 #endif
649 	if ((error = prison_remote_ip6(td->td_ucred, &sin6->sin6_addr)) != 0)
650 		goto out;
651 	inp->inp_vflag &= ~INP_IPV4;
652 	inp->inp_vflag |= INP_IPV6;
653 	inp->inp_inc.inc_flags |= INC_ISIPV6;
654 	NET_EPOCH_ENTER(et);
655 	if ((error = tcp6_connect(tp, sin6, td)) != 0)
656 		goto out_in_epoch;
657 #ifdef TCP_OFFLOAD
658 	if (registered_toedevs > 0 &&
659 	    (so->so_options & SO_NO_OFFLOAD) == 0 &&
660 	    (error = tcp_offload_connect(so, nam)) == 0)
661 		goto out_in_epoch;
662 #endif
663 	tcp_timer_activate(tp, TT_KEEP, TP_KEEPINIT(tp));
664 	error = tcp_output(tp);
665 out_in_epoch:
666 	NET_EPOCH_EXIT(et);
667 out:
668 	KASSERT(error >= 0, ("TCP stack %s requested tcp_drop(%p) at connect()"
669 	    ", error code %d", tp->t_fb->tfb_tcp_block_name, tp, -error));
670 	/*
671 	 * If the implicit bind in the connect call fails, restore
672 	 * the flags we modified.
673 	 */
674 	if (error != 0 && inp->inp_lport == 0) {
675 		inp->inp_vflag = vflagsav;
676 		inp->inp_inc.inc_flags = incflagsav;
677 	}
678 
679 	tcp_bblog_pru(tp, PRU_CONNECT, error);
680 	TCP_PROBE2(debug__user, tp, PRU_CONNECT);
681 	INP_WUNLOCK(inp);
682 	return (error);
683 }
684 #endif /* INET6 */
685 
686 /*
687  * Initiate disconnect from peer.
688  * If connection never passed embryonic stage, just drop;
689  * else if don't need to let data drain, then can just drop anyways,
690  * else have to begin TCP shutdown process: mark socket disconnecting,
691  * drain unread data, state switch to reflect user close, and
692  * send segment (e.g. FIN) to peer.  Socket will be really disconnected
693  * when peer sends FIN and acks ours.
694  *
695  * SHOULD IMPLEMENT LATER PRU_CONNECT VIA REALLOC TCPCB.
696  */
697 static int
tcp_usr_disconnect(struct socket * so)698 tcp_usr_disconnect(struct socket *so)
699 {
700 	struct inpcb *inp;
701 	struct tcpcb *tp = NULL;
702 	struct epoch_tracker et;
703 
704 	NET_EPOCH_ENTER(et);
705 	inp = sotoinpcb(so);
706 	KASSERT(inp != NULL, ("tcp_usr_disconnect: inp == NULL"));
707 	INP_WLOCK(inp);
708 	tp = intotcpcb(inp);
709 
710 	if (tp->t_state == TCPS_TIME_WAIT)
711 		goto out;
712 	tcp_disconnect(tp);
713 out:
714 	tcp_bblog_pru(tp, PRU_DISCONNECT, 0);
715 	TCP_PROBE2(debug__user, tp, PRU_DISCONNECT);
716 	INP_WUNLOCK(inp);
717 	NET_EPOCH_EXIT(et);
718 	return (0);
719 }
720 
721 #ifdef INET
722 /*
723  * Accept a connection.  Essentially all the work is done at higher levels;
724  * just return the address of the peer, storing through addr.
725  */
726 static int
tcp_usr_accept(struct socket * so,struct sockaddr * sa)727 tcp_usr_accept(struct socket *so, struct sockaddr *sa)
728 {
729 	struct inpcb *inp;
730 	struct tcpcb *tp;
731 	int error = 0;
732 
733 	inp = sotoinpcb(so);
734 	KASSERT(inp != NULL, ("tcp_usr_accept: inp == NULL"));
735 	INP_WLOCK(inp);
736 	if (inp->inp_flags & INP_DROPPED) {
737 		INP_WUNLOCK(inp);
738 		return (ECONNABORTED);
739 	}
740 	tp = intotcpcb(inp);
741 
742 	if (so->so_state & SS_ISDISCONNECTED)
743 		error = ECONNABORTED;
744 	else
745 		*(struct sockaddr_in *)sa = (struct sockaddr_in ){
746 			.sin_family = AF_INET,
747 			.sin_len = sizeof(struct sockaddr_in),
748 			.sin_port = inp->inp_fport,
749 			.sin_addr = inp->inp_faddr,
750 		};
751 	tcp_bblog_pru(tp, PRU_ACCEPT, error);
752 	TCP_PROBE2(debug__user, tp, PRU_ACCEPT);
753 	INP_WUNLOCK(inp);
754 
755 	return (error);
756 }
757 #endif /* INET */
758 
759 #ifdef INET6
760 static int
tcp6_usr_accept(struct socket * so,struct sockaddr * sa)761 tcp6_usr_accept(struct socket *so, struct sockaddr *sa)
762 {
763 	struct inpcb *inp;
764 	struct tcpcb *tp;
765 	int error = 0;
766 
767 	inp = sotoinpcb(so);
768 	KASSERT(inp != NULL, ("tcp6_usr_accept: inp == NULL"));
769 	INP_WLOCK(inp);
770 	if (inp->inp_flags & INP_DROPPED) {
771 		INP_WUNLOCK(inp);
772 		return (ECONNABORTED);
773 	}
774 	tp = intotcpcb(inp);
775 
776 	if (so->so_state & SS_ISDISCONNECTED) {
777 		error = ECONNABORTED;
778 	} else {
779 		if (inp->inp_vflag & INP_IPV4) {
780 			struct sockaddr_in sin = {
781 				.sin_family = AF_INET,
782 				.sin_len = sizeof(struct sockaddr_in),
783 				.sin_port = inp->inp_fport,
784 				.sin_addr = inp->inp_faddr,
785 			};
786 			in6_sin_2_v4mapsin6(&sin, (struct sockaddr_in6 *)sa);
787 		} else {
788 			*(struct sockaddr_in6 *)sa = (struct sockaddr_in6 ){
789 				.sin6_family = AF_INET6,
790 				.sin6_len = sizeof(struct sockaddr_in6),
791 				.sin6_port = inp->inp_fport,
792 				.sin6_addr = inp->in6p_faddr,
793 			};
794 			/* XXX: should catch errors */
795 			(void)sa6_recoverscope((struct sockaddr_in6 *)sa);
796 		}
797 	}
798 
799 	tcp_bblog_pru(tp, PRU_ACCEPT, error);
800 	TCP_PROBE2(debug__user, tp, PRU_ACCEPT);
801 	INP_WUNLOCK(inp);
802 
803 	return (error);
804 }
805 #endif /* INET6 */
806 
807 /*
808  * Mark the connection as being incapable of further output.
809  */
810 static int
tcp_usr_shutdown(struct socket * so,enum shutdown_how how)811 tcp_usr_shutdown(struct socket *so, enum shutdown_how how)
812 {
813 	struct epoch_tracker et;
814 	struct inpcb *inp = sotoinpcb(so);
815 	struct tcpcb *tp = intotcpcb(inp);
816 	int error = 0;
817 
818 	SOCK_LOCK(so);
819 	if (SOLISTENING(so)) {
820 		if (how != SHUT_WR) {
821 			so->so_error = ECONNABORTED;
822 			solisten_wakeup(so);	/* unlocks so */
823 		} else
824 			SOCK_UNLOCK(so);
825 		return (ENOTCONN);
826 	} else if ((so->so_state &
827 	    (SS_ISCONNECTED | SS_ISCONNECTING | SS_ISDISCONNECTING)) == 0) {
828 		SOCK_UNLOCK(so);
829 		return (ENOTCONN);
830 	}
831 	SOCK_UNLOCK(so);
832 
833 	switch (how) {
834 	case SHUT_RD:
835 		sorflush(so);
836 		break;
837 	case SHUT_RDWR:
838 		sorflush(so);
839 		/* FALLTHROUGH */
840 	case SHUT_WR:
841 		/*
842 		 * XXXGL: mimicing old soshutdown() here. But shouldn't we
843 		 * return ECONNRESEST for SHUT_RD as well?
844 		 */
845 		INP_WLOCK(inp);
846 		if (inp->inp_flags & INP_DROPPED) {
847 			INP_WUNLOCK(inp);
848 			return (ECONNRESET);
849 		}
850 
851 		socantsendmore(so);
852 		NET_EPOCH_ENTER(et);
853 		tcp_usrclosed(tp);
854 		error = tcp_output_nodrop(tp);
855 		tcp_bblog_pru(tp, PRU_SHUTDOWN, error);
856 		TCP_PROBE2(debug__user, tp, PRU_SHUTDOWN);
857 		error = tcp_unlock_or_drop(tp, error);
858 		NET_EPOCH_EXIT(et);
859 	}
860 	wakeup(&so->so_timeo);
861 
862 	return (error);
863 }
864 
865 /*
866  * After a receive, possibly send window update to peer.
867  */
868 static int
tcp_usr_rcvd(struct socket * so,int flags)869 tcp_usr_rcvd(struct socket *so, int flags)
870 {
871 	struct epoch_tracker et;
872 	struct inpcb *inp;
873 	struct tcpcb *tp;
874 	int outrv = 0, error = 0;
875 
876 	inp = sotoinpcb(so);
877 	KASSERT(inp != NULL, ("tcp_usr_rcvd: inp == NULL"));
878 	INP_WLOCK(inp);
879 	if (inp->inp_flags & INP_DROPPED) {
880 		INP_WUNLOCK(inp);
881 		return (ECONNRESET);
882 	}
883 	tp = intotcpcb(inp);
884 
885 	NET_EPOCH_ENTER(et);
886 	/*
887 	 * For passively-created TFO connections, don't attempt a window
888 	 * update while still in SYN_RECEIVED as this may trigger an early
889 	 * SYN|ACK.  It is preferable to have the SYN|ACK be sent along with
890 	 * application response data, or failing that, when the DELACK timer
891 	 * expires.
892 	 */
893 	if ((tp->t_flags & TF_FASTOPEN) && (tp->t_state == TCPS_SYN_RECEIVED))
894 		goto out;
895 #ifdef TCP_OFFLOAD
896 	if (tp->t_flags & TF_TOE)
897 		tcp_offload_rcvd(tp);
898 	else
899 #endif
900 		outrv = tcp_output_nodrop(tp);
901 out:
902 	tcp_bblog_pru(tp, PRU_RCVD, error);
903 	TCP_PROBE2(debug__user, tp, PRU_RCVD);
904 	(void) tcp_unlock_or_drop(tp, outrv);
905 	NET_EPOCH_EXIT(et);
906 	return (error);
907 }
908 
909 /*
910  * Do a send by putting data in output queue and updating urgent
911  * marker if URG set.  Possibly send more data.  Unlike the other
912  * pr_*() routines, the mbuf chains are our responsibility.  We
913  * must either enqueue them or free them.  The other pr_*() routines
914  * generally are caller-frees.
915  */
916 static int
tcp_usr_send(struct socket * so,int flags,struct mbuf * m,struct sockaddr * nam,struct mbuf * control,struct thread * td)917 tcp_usr_send(struct socket *so, int flags, struct mbuf *m,
918     struct sockaddr *nam, struct mbuf *control, struct thread *td)
919 {
920 	struct epoch_tracker et;
921 	int error = 0;
922 	struct inpcb *inp;
923 	struct tcpcb *tp;
924 #ifdef INET
925 #ifdef INET6
926 	struct sockaddr_in sin;
927 #endif
928 	struct sockaddr_in *sinp;
929 #endif
930 #ifdef INET6
931 	struct sockaddr_in6 *sin6;
932 	int isipv6;
933 #endif
934 	u_int8_t incflagsav;
935 	u_char vflagsav;
936 	bool restoreflags;
937 
938 	inp = sotoinpcb(so);
939 	KASSERT(inp != NULL, ("tcp_usr_send: inp == NULL"));
940 	INP_WLOCK(inp);
941 	if (inp->inp_flags & INP_DROPPED) {
942 		if (m != NULL && (flags & PRUS_NOTREADY) == 0)
943 			m_freem(m);
944 		INP_WUNLOCK(inp);
945 		return (ECONNRESET);
946 	}
947 	tp = intotcpcb(inp);
948 
949 	vflagsav = inp->inp_vflag;
950 	incflagsav = inp->inp_inc.inc_flags;
951 	restoreflags = false;
952 
953 	NET_EPOCH_ENTER(et);
954 	if (control != NULL) {
955 		/* TCP doesn't do control messages (rights, creds, etc) */
956 		if (control->m_len > 0) {
957 			m_freem(control);
958 			error = EINVAL;
959 			goto out;
960 		}
961 		m_freem(control);	/* empty control, just free it */
962 	}
963 
964 	if ((flags & PRUS_OOB) != 0 &&
965 	    (error = tcp_pru_options_support(tp, PRUS_OOB)) != 0)
966 		goto out;
967 
968 	if (nam != NULL && tp->t_state < TCPS_SYN_SENT) {
969 		if (tp->t_state == TCPS_LISTEN) {
970 			error = EINVAL;
971 			goto out;
972 		}
973 		switch (nam->sa_family) {
974 #ifdef INET
975 		case AF_INET:
976 			sinp = (struct sockaddr_in *)nam;
977 			if (sinp->sin_len != sizeof(struct sockaddr_in)) {
978 				error = EINVAL;
979 				goto out;
980 			}
981 			if ((inp->inp_vflag & INP_IPV6) != 0) {
982 				error = EAFNOSUPPORT;
983 				goto out;
984 			}
985 			if (IN_MULTICAST(ntohl(sinp->sin_addr.s_addr))) {
986 				error = EAFNOSUPPORT;
987 				goto out;
988 			}
989 			if (ntohl(sinp->sin_addr.s_addr) == INADDR_BROADCAST) {
990 				error = EACCES;
991 				goto out;
992 			}
993 			if ((error = prison_remote_ip4(td->td_ucred,
994 			    &sinp->sin_addr)))
995 				goto out;
996 #ifdef INET6
997 			isipv6 = 0;
998 #endif
999 			break;
1000 #endif /* INET */
1001 #ifdef INET6
1002 		case AF_INET6:
1003 			sin6 = (struct sockaddr_in6 *)nam;
1004 			if (sin6->sin6_len != sizeof(*sin6)) {
1005 				error = EINVAL;
1006 				goto out;
1007 			}
1008 			if ((inp->inp_vflag & INP_IPV6PROTO) == 0) {
1009 				error = EAFNOSUPPORT;
1010 				goto out;
1011 			}
1012 			if (IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr)) {
1013 				error = EAFNOSUPPORT;
1014 				goto out;
1015 			}
1016 			if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
1017 #ifdef INET
1018 				if ((inp->inp_flags & IN6P_IPV6_V6ONLY) != 0) {
1019 					error = EINVAL;
1020 					goto out;
1021 				}
1022 				if ((inp->inp_vflag & INP_IPV4) == 0) {
1023 					error = EAFNOSUPPORT;
1024 					goto out;
1025 				}
1026 				restoreflags = true;
1027 				inp->inp_vflag &= ~INP_IPV6;
1028 				sinp = &sin;
1029 				in6_sin6_2_sin(sinp, sin6);
1030 				if (IN_MULTICAST(
1031 				    ntohl(sinp->sin_addr.s_addr))) {
1032 					error = EAFNOSUPPORT;
1033 					goto out;
1034 				}
1035 				if ((error = prison_remote_ip4(td->td_ucred,
1036 				    &sinp->sin_addr)))
1037 					goto out;
1038 				isipv6 = 0;
1039 #else /* !INET */
1040 				error = EAFNOSUPPORT;
1041 				goto out;
1042 #endif /* INET */
1043 			} else {
1044 				if ((inp->inp_vflag & INP_IPV6) == 0) {
1045 					error = EAFNOSUPPORT;
1046 					goto out;
1047 				}
1048 				restoreflags = true;
1049 				inp->inp_vflag &= ~INP_IPV4;
1050 				inp->inp_inc.inc_flags |= INC_ISIPV6;
1051 				if ((error = prison_remote_ip6(td->td_ucred,
1052 				    &sin6->sin6_addr)))
1053 					goto out;
1054 				isipv6 = 1;
1055 			}
1056 			break;
1057 #endif /* INET6 */
1058 		default:
1059 			error = EAFNOSUPPORT;
1060 			goto out;
1061 		}
1062 	}
1063 	if (!(flags & PRUS_OOB)) {
1064 		if (tp->t_acktime == 0)
1065 			tp->t_acktime = ticks;
1066 		sbappendstream(&so->so_snd, m, flags);
1067 		m = NULL;
1068 		if (nam && tp->t_state < TCPS_SYN_SENT) {
1069 			KASSERT(tp->t_state == TCPS_CLOSED,
1070 			    ("%s: tp %p is listening", __func__, tp));
1071 
1072 			/*
1073 			 * Do implied connect if not yet connected,
1074 			 * initialize window to default value, and
1075 			 * initialize maxseg using peer's cached MSS.
1076 			 */
1077 #ifdef INET6
1078 			if (isipv6)
1079 				error = tcp6_connect(tp, sin6, td);
1080 #endif /* INET6 */
1081 #if defined(INET6) && defined(INET)
1082 			else
1083 #endif
1084 #ifdef INET
1085 				error = tcp_connect(tp, sinp, td);
1086 #endif
1087 			/*
1088 			 * The bind operation in tcp_connect succeeded. We
1089 			 * no longer want to restore the flags if later
1090 			 * operations fail.
1091 			 */
1092 			if (error == 0 || inp->inp_lport != 0)
1093 				restoreflags = false;
1094 
1095 			if (error) {
1096 				/* m is freed if PRUS_NOTREADY is unset. */
1097 				sbflush(&so->so_snd);
1098 				goto out;
1099 			}
1100 			if (tp->t_flags & TF_FASTOPEN)
1101 				tcp_fastopen_connect(tp);
1102 			else {
1103 				tp->snd_wnd = TTCP_CLIENT_SND_WND;
1104 				tcp_mss(tp, -1);
1105 			}
1106 		}
1107 		if (flags & PRUS_EOF) {
1108 			/*
1109 			 * Close the send side of the connection after
1110 			 * the data is sent.
1111 			 */
1112 			socantsendmore(so);
1113 			tcp_usrclosed(tp);
1114 		}
1115 		if (TCPS_HAVEESTABLISHED(tp->t_state) &&
1116 		    ((tp->t_flags2 & TF2_FBYTES_COMPLETE) == 0) &&
1117 		    (tp->t_fbyte_out == 0) &&
1118 		    (so->so_snd.sb_ccc > 0)) {
1119 			tp->t_fbyte_out = ticks;
1120 			if (tp->t_fbyte_out == 0)
1121 				tp->t_fbyte_out = 1;
1122 			if (tp->t_fbyte_out && tp->t_fbyte_in)
1123 				tp->t_flags2 |= TF2_FBYTES_COMPLETE;
1124 		}
1125 		if (!(inp->inp_flags & INP_DROPPED) &&
1126 		    !(flags & PRUS_NOTREADY)) {
1127 			if (flags & PRUS_MORETOCOME)
1128 				tp->t_flags |= TF_MORETOCOME;
1129 			error = tcp_output_nodrop(tp);
1130 			if (flags & PRUS_MORETOCOME)
1131 				tp->t_flags &= ~TF_MORETOCOME;
1132 		}
1133 	} else {
1134 		/*
1135 		 * XXXRW: PRUS_EOF not implemented with PRUS_OOB?
1136 		 */
1137 		SOCK_SENDBUF_LOCK(so);
1138 		if (sbspace(&so->so_snd) < -512) {
1139 			SOCK_SENDBUF_UNLOCK(so);
1140 			error = ENOBUFS;
1141 			goto out;
1142 		}
1143 		/*
1144 		 * According to RFC961 (Assigned Protocols),
1145 		 * the urgent pointer points to the last octet
1146 		 * of urgent data.  We continue, however,
1147 		 * to consider it to indicate the first octet
1148 		 * of data past the urgent section.
1149 		 * Otherwise, snd_up should be one lower.
1150 		 */
1151 		if (tp->t_acktime == 0)
1152 			tp->t_acktime = ticks;
1153 		sbappendstream_locked(&so->so_snd, m, flags);
1154 		SOCK_SENDBUF_UNLOCK(so);
1155 		m = NULL;
1156 		if (nam && tp->t_state < TCPS_SYN_SENT) {
1157 			/*
1158 			 * Do implied connect if not yet connected,
1159 			 * initialize window to default value, and
1160 			 * initialize maxseg using peer's cached MSS.
1161 			 */
1162 
1163 			/*
1164 			 * Not going to contemplate SYN|URG
1165 			 */
1166 			if (tp->t_flags & TF_FASTOPEN)
1167 				tp->t_flags &= ~TF_FASTOPEN;
1168 #ifdef INET6
1169 			if (isipv6)
1170 				error = tcp6_connect(tp, sin6, td);
1171 #endif /* INET6 */
1172 #if defined(INET6) && defined(INET)
1173 			else
1174 #endif
1175 #ifdef INET
1176 				error = tcp_connect(tp, sinp, td);
1177 #endif
1178 			/*
1179 			 * The bind operation in tcp_connect succeeded. We
1180 			 * no longer want to restore the flags if later
1181 			 * operations fail.
1182 			 */
1183 			if (error == 0 || inp->inp_lport != 0)
1184 				restoreflags = false;
1185 
1186 			if (error != 0) {
1187 				/* m is freed if PRUS_NOTREADY is unset. */
1188 				sbflush(&so->so_snd);
1189 				goto out;
1190 			}
1191 			tp->snd_wnd = TTCP_CLIENT_SND_WND;
1192 			tcp_mss(tp, -1);
1193 		}
1194 		tp->snd_up = tp->snd_una + sbavail(&so->so_snd);
1195 		if ((flags & PRUS_NOTREADY) == 0) {
1196 			tp->t_flags |= TF_FORCEDATA;
1197 			error = tcp_output_nodrop(tp);
1198 			tp->t_flags &= ~TF_FORCEDATA;
1199 		}
1200 	}
1201 	TCP_LOG_EVENT(tp, NULL,
1202 	    &inp->inp_socket->so_rcv,
1203 	    &inp->inp_socket->so_snd,
1204 	    TCP_LOG_USERSEND, error,
1205 	    0, NULL, false);
1206 
1207 out:
1208 	/*
1209 	 * In case of PRUS_NOTREADY, the caller or tcp_usr_ready() is
1210 	 * responsible for freeing memory.
1211 	 */
1212 	if (m != NULL && (flags & PRUS_NOTREADY) == 0)
1213 		m_freem(m);
1214 
1215 	/*
1216 	 * If the request was unsuccessful and we changed flags,
1217 	 * restore the original flags.
1218 	 */
1219 	if (error != 0 && restoreflags) {
1220 		inp->inp_vflag = vflagsav;
1221 		inp->inp_inc.inc_flags = incflagsav;
1222 	}
1223 	tcp_bblog_pru(tp, (flags & PRUS_OOB) ? PRU_SENDOOB :
1224 		      ((flags & PRUS_EOF) ? PRU_SEND_EOF : PRU_SEND), error);
1225 	TCP_PROBE2(debug__user, tp, (flags & PRUS_OOB) ? PRU_SENDOOB :
1226 		   ((flags & PRUS_EOF) ? PRU_SEND_EOF : PRU_SEND));
1227 	error = tcp_unlock_or_drop(tp, error);
1228 	NET_EPOCH_EXIT(et);
1229 	return (error);
1230 }
1231 
1232 static int
tcp_usr_ready(struct socket * so,struct mbuf * m,int count)1233 tcp_usr_ready(struct socket *so, struct mbuf *m, int count)
1234 {
1235 	struct epoch_tracker et;
1236 	struct inpcb *inp;
1237 	struct tcpcb *tp;
1238 	int error;
1239 
1240 	inp = sotoinpcb(so);
1241 	INP_WLOCK(inp);
1242 	if (inp->inp_flags & INP_DROPPED) {
1243 		INP_WUNLOCK(inp);
1244 		mb_free_notready(m, count);
1245 		return (ECONNRESET);
1246 	}
1247 	tp = intotcpcb(inp);
1248 
1249 	SOCK_SENDBUF_LOCK(so);
1250 	error = sbready(&so->so_snd, m, count);
1251 	SOCK_SENDBUF_UNLOCK(so);
1252 	if (error) {
1253 		INP_WUNLOCK(inp);
1254 		return (error);
1255 	}
1256 	NET_EPOCH_ENTER(et);
1257 	error = tcp_output_unlock(tp);
1258 	NET_EPOCH_EXIT(et);
1259 
1260 	return (error);
1261 }
1262 
1263 /*
1264  * Abort the TCP.  Drop the connection abruptly.
1265  */
1266 static void
tcp_usr_abort(struct socket * so)1267 tcp_usr_abort(struct socket *so)
1268 {
1269 	struct inpcb *inp;
1270 	struct tcpcb *tp;
1271 	struct epoch_tracker et;
1272 
1273 	inp = sotoinpcb(so);
1274 	KASSERT(inp != NULL, ("tcp_usr_abort: inp == NULL"));
1275 
1276 	NET_EPOCH_ENTER(et);
1277 	INP_WLOCK(inp);
1278 	KASSERT(inp->inp_socket != NULL,
1279 	    ("tcp_usr_abort: inp_socket == NULL"));
1280 
1281 	/*
1282 	 * If we still have full TCP state, and we're not dropped, drop.
1283 	 */
1284 	if (!(inp->inp_flags & INP_DROPPED)) {
1285 		tp = intotcpcb(inp);
1286 		tp = tcp_drop(tp, ECONNABORTED);
1287 		if (tp == NULL)
1288 			goto dropped;
1289 		tcp_bblog_pru(tp, PRU_ABORT, 0);
1290 		TCP_PROBE2(debug__user, tp, PRU_ABORT);
1291 	}
1292 	if (!(inp->inp_flags & INP_DROPPED)) {
1293 		soref(so);
1294 		inp->inp_flags |= INP_SOCKREF;
1295 	}
1296 	INP_WUNLOCK(inp);
1297 dropped:
1298 	NET_EPOCH_EXIT(et);
1299 }
1300 
1301 /*
1302  * TCP socket is closed.  Start friendly disconnect.
1303  */
1304 static void
tcp_usr_close(struct socket * so)1305 tcp_usr_close(struct socket *so)
1306 {
1307 	struct inpcb *inp;
1308 	struct tcpcb *tp;
1309 	struct epoch_tracker et;
1310 
1311 	inp = sotoinpcb(so);
1312 	KASSERT(inp != NULL, ("tcp_usr_close: inp == NULL"));
1313 
1314 	NET_EPOCH_ENTER(et);
1315 	INP_WLOCK(inp);
1316 	KASSERT(inp->inp_socket != NULL,
1317 	    ("tcp_usr_close: inp_socket == NULL"));
1318 
1319 	/*
1320 	 * If we are still connected and we're not dropped, initiate
1321 	 * a disconnect.
1322 	 */
1323 	if (!(inp->inp_flags & INP_DROPPED)) {
1324 		tp = intotcpcb(inp);
1325 		if (tp->t_state != TCPS_TIME_WAIT) {
1326 			tp->t_flags |= TF_CLOSED;
1327 			tcp_disconnect(tp);
1328 			tcp_bblog_pru(tp, PRU_CLOSE, 0);
1329 			TCP_PROBE2(debug__user, tp, PRU_CLOSE);
1330 		}
1331 	}
1332 	if (!(inp->inp_flags & INP_DROPPED)) {
1333 		soref(so);
1334 		inp->inp_flags |= INP_SOCKREF;
1335 	}
1336 	INP_WUNLOCK(inp);
1337 	NET_EPOCH_EXIT(et);
1338 }
1339 
1340 static int
tcp_pru_options_support(struct tcpcb * tp,int flags)1341 tcp_pru_options_support(struct tcpcb *tp, int flags)
1342 {
1343 	/*
1344 	 * If the specific TCP stack has a pru_options
1345 	 * specified then it does not always support
1346 	 * all the PRU_XX options and we must ask it.
1347 	 * If the function is not specified then all
1348 	 * of the PRU_XX options are supported.
1349 	 */
1350 	int ret = 0;
1351 
1352 	if (tp->t_fb->tfb_pru_options) {
1353 		ret = (*tp->t_fb->tfb_pru_options)(tp, flags);
1354 	}
1355 	return (ret);
1356 }
1357 
1358 /*
1359  * Receive out-of-band data.
1360  */
1361 static int
tcp_usr_rcvoob(struct socket * so,struct mbuf * m,int flags)1362 tcp_usr_rcvoob(struct socket *so, struct mbuf *m, int flags)
1363 {
1364 	int error = 0;
1365 	struct inpcb *inp;
1366 	struct tcpcb *tp;
1367 
1368 	inp = sotoinpcb(so);
1369 	KASSERT(inp != NULL, ("tcp_usr_rcvoob: inp == NULL"));
1370 	INP_WLOCK(inp);
1371 	if (inp->inp_flags & INP_DROPPED) {
1372 		INP_WUNLOCK(inp);
1373 		return (ECONNRESET);
1374 	}
1375 	tp = intotcpcb(inp);
1376 
1377 	error = tcp_pru_options_support(tp, PRUS_OOB);
1378 	if (error) {
1379 		goto out;
1380 	}
1381 	if ((so->so_oobmark == 0 &&
1382 	     (so->so_rcv.sb_state & SBS_RCVATMARK) == 0) ||
1383 	    so->so_options & SO_OOBINLINE ||
1384 	    tp->t_oobflags & TCPOOB_HADDATA) {
1385 		error = EINVAL;
1386 		goto out;
1387 	}
1388 	if ((tp->t_oobflags & TCPOOB_HAVEDATA) == 0) {
1389 		error = EWOULDBLOCK;
1390 		goto out;
1391 	}
1392 	m->m_len = 1;
1393 	*mtod(m, caddr_t) = tp->t_iobc;
1394 	if ((flags & MSG_PEEK) == 0)
1395 		tp->t_oobflags ^= (TCPOOB_HAVEDATA | TCPOOB_HADDATA);
1396 
1397 out:
1398 	tcp_bblog_pru(tp, PRU_RCVOOB, error);
1399 	TCP_PROBE2(debug__user, tp, PRU_RCVOOB);
1400 	INP_WUNLOCK(inp);
1401 	return (error);
1402 }
1403 
1404 #ifdef INET
1405 struct protosw tcp_protosw = {
1406 	.pr_type =		SOCK_STREAM,
1407 	.pr_protocol =		IPPROTO_TCP,
1408 	.pr_flags =		PR_CONNREQUIRED | PR_IMPLOPCL | PR_WANTRCVD |
1409 				    PR_CAPATTACH,
1410 	.pr_ctloutput =		tcp_ctloutput,
1411 	.pr_abort =		tcp_usr_abort,
1412 	.pr_accept =		tcp_usr_accept,
1413 	.pr_attach =		tcp_usr_attach,
1414 	.pr_bind =		tcp_usr_bind,
1415 	.pr_connect =		tcp_usr_connect,
1416 	.pr_control =		in_control,
1417 	.pr_detach =		tcp_usr_detach,
1418 	.pr_disconnect =	tcp_usr_disconnect,
1419 	.pr_listen =		tcp_usr_listen,
1420 	.pr_peeraddr =		in_getpeeraddr,
1421 	.pr_rcvd =		tcp_usr_rcvd,
1422 	.pr_rcvoob =		tcp_usr_rcvoob,
1423 	.pr_send =		tcp_usr_send,
1424 	.pr_sendfile_wait =	sendfile_wait_generic,
1425 	.pr_ready =		tcp_usr_ready,
1426 	.pr_shutdown =		tcp_usr_shutdown,
1427 	.pr_sockaddr =		in_getsockaddr,
1428 	.pr_sosetlabel =	in_pcbsosetlabel,
1429 	.pr_close =		tcp_usr_close,
1430 };
1431 #endif /* INET */
1432 
1433 #ifdef INET6
1434 struct protosw tcp6_protosw = {
1435 	.pr_type =		SOCK_STREAM,
1436 	.pr_protocol =		IPPROTO_TCP,
1437 	.pr_flags =		PR_CONNREQUIRED | PR_IMPLOPCL |PR_WANTRCVD |
1438 				    PR_CAPATTACH,
1439 	.pr_ctloutput =		tcp_ctloutput,
1440 	.pr_abort =		tcp_usr_abort,
1441 	.pr_accept =		tcp6_usr_accept,
1442 	.pr_attach =		tcp_usr_attach,
1443 	.pr_bind =		tcp6_usr_bind,
1444 	.pr_connect =		tcp6_usr_connect,
1445 	.pr_control =		in6_control,
1446 	.pr_detach =		tcp_usr_detach,
1447 	.pr_disconnect =	tcp_usr_disconnect,
1448 	.pr_listen =		tcp6_usr_listen,
1449 	.pr_peeraddr =		in6_mapped_peeraddr,
1450 	.pr_rcvd =		tcp_usr_rcvd,
1451 	.pr_rcvoob =		tcp_usr_rcvoob,
1452 	.pr_send =		tcp_usr_send,
1453 	.pr_sendfile_wait =	sendfile_wait_generic,
1454 	.pr_ready =		tcp_usr_ready,
1455 	.pr_shutdown =		tcp_usr_shutdown,
1456 	.pr_sockaddr =		in6_mapped_sockaddr,
1457 	.pr_sosetlabel =	in_pcbsosetlabel,
1458 	.pr_close =		tcp_usr_close,
1459 };
1460 #endif /* INET6 */
1461 
1462 #ifdef INET
1463 /*
1464  * Common subroutine to open a TCP connection to remote host specified
1465  * by struct sockaddr_in.  Call in_pcbconnect() to choose local host address
1466  * and assign a local port number and install the inpcb into the hash.
1467  * Initialize connection parameters and enter SYN-SENT state.
1468  */
1469 static int
tcp_connect(struct tcpcb * tp,struct sockaddr_in * sin,struct thread * td)1470 tcp_connect(struct tcpcb *tp, struct sockaddr_in *sin, struct thread *td)
1471 {
1472 	struct inpcb *inp = tptoinpcb(tp);
1473 	struct socket *so = tptosocket(tp);
1474 	int error;
1475 
1476 	NET_EPOCH_ASSERT();
1477 	INP_WLOCK_ASSERT(inp);
1478 
1479 	if (__predict_false((so->so_state &
1480 	    (SS_ISCONNECTING | SS_ISCONNECTED | SS_ISDISCONNECTING |
1481 	    SS_ISDISCONNECTED)) != 0))
1482 		return (EISCONN);
1483 	if (__predict_false((so->so_options & SO_REUSEPORT_LB) != 0))
1484 		return (EOPNOTSUPP);
1485 
1486 	INP_HASH_WLOCK(&V_tcbinfo);
1487 	error = in_pcbconnect(inp, sin, td->td_ucred);
1488 	INP_HASH_WUNLOCK(&V_tcbinfo);
1489 	if (error != 0)
1490 		return (error);
1491 
1492 	/* set the hash on the connection */
1493 	rss_proto_software_hash_v4(inp->inp_faddr, inp->inp_laddr,
1494 	    inp->inp_fport, inp->inp_lport, IPPROTO_TCP,
1495 	    &inp->inp_flowid, &inp->inp_flowtype);
1496 	/*
1497 	 * Compute window scaling to request:
1498 	 * Scale to fit into sweet spot.  See tcp_syncache.c.
1499 	 * XXX: This should move to tcp_output().
1500 	 */
1501 	while (tp->request_r_scale < TCP_MAX_WINSHIFT &&
1502 	    (TCP_MAXWIN << tp->request_r_scale) < sb_max)
1503 		tp->request_r_scale++;
1504 
1505 	soisconnecting(so);
1506 	TCPSTAT_INC(tcps_connattempt);
1507 	tcp_state_change(tp, TCPS_SYN_SENT);
1508 	tp->iss = tcp_new_isn(&inp->inp_inc);
1509 	if (tp->t_flags & TF_REQ_TSTMP)
1510 		tp->ts_offset = tcp_new_ts_offset(&inp->inp_inc);
1511 	tcp_sendseqinit(tp);
1512 
1513 	return (0);
1514 }
1515 #endif /* INET */
1516 
1517 #ifdef INET6
1518 static int
tcp6_connect(struct tcpcb * tp,struct sockaddr_in6 * sin6,struct thread * td)1519 tcp6_connect(struct tcpcb *tp, struct sockaddr_in6 *sin6, struct thread *td)
1520 {
1521 	struct inpcb *inp = tptoinpcb(tp);
1522 	struct socket *so = tptosocket(tp);
1523 	int error;
1524 
1525 	NET_EPOCH_ASSERT();
1526 	INP_WLOCK_ASSERT(inp);
1527 
1528 	if (__predict_false((so->so_state &
1529 	    (SS_ISCONNECTING | SS_ISCONNECTED | SS_ISDISCONNECTING |
1530 	    SS_ISDISCONNECTED)) != 0))
1531 		return (EISCONN);
1532 	if (__predict_false((so->so_options & SO_REUSEPORT_LB) != 0))
1533 		return (EOPNOTSUPP);
1534 
1535 	INP_HASH_WLOCK(&V_tcbinfo);
1536 	error = in6_pcbconnect(inp, sin6, td->td_ucred, true);
1537 	INP_HASH_WUNLOCK(&V_tcbinfo);
1538 	if (error != 0)
1539 		return (error);
1540 
1541 	/* set the hash on the connection */
1542 	rss_proto_software_hash_v6(&inp->in6p_faddr,
1543 	    &inp->in6p_laddr, inp->inp_fport, inp->inp_lport, IPPROTO_TCP,
1544 	    &inp->inp_flowid, &inp->inp_flowtype);
1545 	/* Compute window scaling to request.  */
1546 	while (tp->request_r_scale < TCP_MAX_WINSHIFT &&
1547 	    (TCP_MAXWIN << tp->request_r_scale) < sb_max)
1548 		tp->request_r_scale++;
1549 
1550 	soisconnecting(so);
1551 	TCPSTAT_INC(tcps_connattempt);
1552 	tcp_state_change(tp, TCPS_SYN_SENT);
1553 	tp->iss = tcp_new_isn(&inp->inp_inc);
1554 	if (tp->t_flags & TF_REQ_TSTMP)
1555 		tp->ts_offset = tcp_new_ts_offset(&inp->inp_inc);
1556 	tcp_sendseqinit(tp);
1557 
1558 	return (0);
1559 }
1560 #endif /* INET6 */
1561 
1562 /*
1563  * Export TCP internal state information via a struct tcp_info, based on the
1564  * Linux 2.6 API.  Not ABI compatible as our constants are mapped differently
1565  * (TCP state machine, etc).  We export all information using FreeBSD-native
1566  * constants -- for example, the numeric values for tcpi_state will differ
1567  * from Linux.
1568  */
1569 void
tcp_fill_info(const struct tcpcb * tp,struct tcp_info * ti)1570 tcp_fill_info(const struct tcpcb *tp, struct tcp_info *ti)
1571 {
1572 
1573 	INP_LOCK_ASSERT(tptoinpcb(tp));
1574 	bzero(ti, sizeof(*ti));
1575 
1576 	ti->tcpi_state = tp->t_state;
1577 	if ((tp->t_flags & TF_REQ_TSTMP) && (tp->t_flags & TF_RCVD_TSTMP))
1578 		ti->tcpi_options |= TCPI_OPT_TIMESTAMPS;
1579 	if (tp->t_flags & TF_SACK_PERMIT)
1580 		ti->tcpi_options |= TCPI_OPT_SACK;
1581 	if ((tp->t_flags & TF_REQ_SCALE) && (tp->t_flags & TF_RCVD_SCALE)) {
1582 		ti->tcpi_options |= TCPI_OPT_WSCALE;
1583 		ti->tcpi_snd_wscale = tp->snd_scale;
1584 		ti->tcpi_rcv_wscale = tp->rcv_scale;
1585 	}
1586 	switch (tp->t_flags2 & (TF2_ECN_PERMIT | TF2_ACE_PERMIT)) {
1587 		case TF2_ECN_PERMIT:
1588 			ti->tcpi_options |= TCPI_OPT_ECN;
1589 			break;
1590 		case TF2_ACE_PERMIT:
1591 			/* FALLTHROUGH */
1592 		case TF2_ECN_PERMIT | TF2_ACE_PERMIT:
1593 			ti->tcpi_options |= TCPI_OPT_ACE;
1594 			break;
1595 		default:
1596 			break;
1597 	}
1598 	if (tp->t_flags & TF_FASTOPEN)
1599 		ti->tcpi_options |= TCPI_OPT_TFO;
1600 
1601 	ti->tcpi_rto = tp->t_rxtcur * tick;
1602 	ti->tcpi_last_data_recv = ((uint32_t)ticks - tp->t_rcvtime) * tick;
1603 	ti->tcpi_rtt = ((u_int64_t)tp->t_srtt * tick) >> TCP_RTT_SHIFT;
1604 	ti->tcpi_rttvar = ((u_int64_t)tp->t_rttvar * tick) >> TCP_RTTVAR_SHIFT;
1605 
1606 	ti->tcpi_snd_ssthresh = tp->snd_ssthresh;
1607 	ti->tcpi_snd_cwnd = tp->snd_cwnd;
1608 
1609 	/*
1610 	 * FreeBSD-specific extension fields for tcp_info.
1611 	 */
1612 	ti->tcpi_rcv_space = tp->rcv_wnd;
1613 	ti->tcpi_rcv_nxt = tp->rcv_nxt;
1614 	ti->tcpi_snd_wnd = tp->snd_wnd;
1615 	ti->tcpi_snd_bwnd = 0;		/* Unused, kept for compat. */
1616 	ti->tcpi_snd_nxt = tp->snd_nxt;
1617 	ti->tcpi_snd_mss = tp->t_maxseg;
1618 	ti->tcpi_rcv_mss = tp->t_maxseg;
1619 	ti->tcpi_snd_rexmitpack = tp->t_sndrexmitpack;
1620 	ti->tcpi_rcv_ooopack = tp->t_rcvoopack;
1621 	ti->tcpi_snd_zerowin = tp->t_sndzerowin;
1622 	ti->tcpi_snd_una = tp->snd_una;
1623 	ti->tcpi_snd_max = tp->snd_max;
1624 	ti->tcpi_rcv_numsacks = tp->rcv_numsacks;
1625 	ti->tcpi_rcv_adv = tp->rcv_adv;
1626 	ti->tcpi_dupacks = tp->t_dupacks;
1627 	ti->tcpi_rttmin = tp->t_rttlow;
1628 #ifdef TCP_OFFLOAD
1629 	if (tp->t_flags & TF_TOE) {
1630 		ti->tcpi_options |= TCPI_OPT_TOE;
1631 		tcp_offload_tcp_info(tp, ti);
1632 	}
1633 #endif
1634 	/*
1635 	 * AccECN related counters.
1636 	 */
1637 	if ((tp->t_flags2 & (TF2_ECN_PERMIT | TF2_ACE_PERMIT)) ==
1638 	    (TF2_ECN_PERMIT | TF2_ACE_PERMIT))
1639 		/*
1640 		 * Internal counter starts at 5 for AccECN
1641 		 * but 0 for RFC3168 ECN.
1642 		 */
1643 		ti->tcpi_delivered_ce = tp->t_scep - 5;
1644 	else
1645 		ti->tcpi_delivered_ce = tp->t_scep;
1646 	ti->tcpi_received_ce = tp->t_rcep;
1647 }
1648 
1649 /*
1650  * tcp_ctloutput() must drop the inpcb lock before performing copyin on
1651  * socket option arguments.  When it re-acquires the lock after the copy, it
1652  * has to revalidate that the connection is still valid for the socket
1653  * option.
1654  */
1655 #define INP_WLOCK_RECHECK_CLEANUP(inp, cleanup) do {			\
1656 	INP_WLOCK(inp);							\
1657 	if (inp->inp_flags & INP_DROPPED) {				\
1658 		INP_WUNLOCK(inp);					\
1659 		cleanup;						\
1660 		return (ECONNRESET);					\
1661 	}								\
1662 	tp = intotcpcb(inp);						\
1663 } while(0)
1664 #define INP_WLOCK_RECHECK(inp) INP_WLOCK_RECHECK_CLEANUP((inp), /* noop */)
1665 
1666 int
tcp_ctloutput_set(struct inpcb * inp,struct sockopt * sopt)1667 tcp_ctloutput_set(struct inpcb *inp, struct sockopt *sopt)
1668 {
1669 	struct socket *so = inp->inp_socket;
1670 	struct tcpcb *tp = intotcpcb(inp);
1671 	int error = 0;
1672 
1673 	MPASS(sopt->sopt_dir == SOPT_SET);
1674 	INP_WLOCK_ASSERT(inp);
1675 	KASSERT((inp->inp_flags & INP_DROPPED) == 0,
1676 	    ("inp_flags == %x", inp->inp_flags));
1677 	KASSERT(so != NULL, ("inp_socket == NULL"));
1678 
1679 	if (sopt->sopt_level != IPPROTO_TCP) {
1680 		INP_WUNLOCK(inp);
1681 #ifdef INET6
1682 		if (inp->inp_vflag & INP_IPV6PROTO)
1683 			error = ip6_ctloutput(so, sopt);
1684 #endif
1685 #if defined(INET6) && defined(INET)
1686 		else
1687 #endif
1688 #ifdef INET
1689 			error = ip_ctloutput(so, sopt);
1690 #endif
1691 		/*
1692 		 * When an IP-level socket option affects TCP, pass control
1693 		 * down to stack tfb_tcp_ctloutput, otherwise return what
1694 		 * IP level returned.
1695 		 */
1696 		switch (sopt->sopt_level) {
1697 #ifdef INET6
1698 		case IPPROTO_IPV6:
1699 			if ((inp->inp_vflag & INP_IPV6PROTO) == 0)
1700 				return (error);
1701 			switch (sopt->sopt_name) {
1702 			case IPV6_TCLASS:
1703 				/* Notify tcp stacks that care (e.g. RACK). */
1704 				break;
1705 			case IPV6_USE_MIN_MTU:
1706 				/* Update t_maxseg accordingly. */
1707 				break;
1708 			default:
1709 				return (error);
1710 			}
1711 			break;
1712 #endif
1713 #ifdef INET
1714 		case IPPROTO_IP:
1715 			switch (sopt->sopt_name) {
1716 			case IP_TOS:
1717 				inp->inp_ip_tos &= ~IPTOS_ECN_MASK;
1718 				break;
1719 			case IP_TTL:
1720 				/* Notify tcp stacks that care (e.g. RACK). */
1721 				break;
1722 			default:
1723 				return (error);
1724 			}
1725 			break;
1726 #endif
1727 		default:
1728 			return (error);
1729 		}
1730 		INP_WLOCK_RECHECK(inp);
1731 	} else if (sopt->sopt_name == TCP_FUNCTION_BLK) {
1732 		/*
1733 		 * Protect the TCP option TCP_FUNCTION_BLK so
1734 		 * that a sub-function can *never* overwrite this.
1735 		 */
1736 		struct tcp_function_set fsn;
1737 		struct tcp_function_block *blk;
1738 		void *ptr = NULL;
1739 
1740 		INP_WUNLOCK(inp);
1741 		error = sooptcopyin(sopt, &fsn, sizeof fsn, sizeof fsn);
1742 		if (error)
1743 			return (error);
1744 
1745 		INP_WLOCK_RECHECK(inp);
1746 
1747 		blk = find_and_ref_tcp_functions(&fsn);
1748 		if (blk == NULL) {
1749 			INP_WUNLOCK(inp);
1750 			return (ENOENT);
1751 		}
1752 		if (tp->t_fb == blk) {
1753 			/* You already have this */
1754 			refcount_release(&blk->tfb_refcnt);
1755 			INP_WUNLOCK(inp);
1756 			return (0);
1757 		}
1758 		if (blk->tfb_flags & TCP_FUNC_BEING_REMOVED) {
1759 			refcount_release(&blk->tfb_refcnt);
1760 			INP_WUNLOCK(inp);
1761 			return (ENOENT);
1762 		}
1763 		error = (*blk->tfb_tcp_handoff_ok)(tp);
1764 		if (error) {
1765 			refcount_release(&blk->tfb_refcnt);
1766 			INP_WUNLOCK(inp);
1767 			return (error);
1768 		}
1769 		/*
1770 		 * Ensure the new stack takes ownership with a
1771 		 * clean slate on peak rate threshold.
1772 		 */
1773 		if (tp->t_fb->tfb_tcp_timer_stop_all != NULL)
1774 			tp->t_fb->tfb_tcp_timer_stop_all(tp);
1775 		if (blk->tfb_tcp_fb_init) {
1776 			error = (*blk->tfb_tcp_fb_init)(tp, &ptr);
1777 			if (error) {
1778 				/*
1779 				 * Release the ref count the lookup
1780 				 * acquired.
1781 				 */
1782 				refcount_release(&blk->tfb_refcnt);
1783 				/*
1784 				 * Now there is a chance that the
1785 				 * init() function mucked with some
1786 				 * things before it failed, such as
1787 				 * hpts or inp_flags2 or timer granularity.
1788 				 * It should not of, but lets give the old
1789 				 * stack a chance to reset to a known good state.
1790 				 */
1791 				if (tp->t_fb->tfb_switch_failed) {
1792 					(*tp->t_fb->tfb_switch_failed)(tp);
1793 				}
1794 			 	goto err_out;
1795 			}
1796 		}
1797 		if (tp->t_fb->tfb_tcp_fb_fini) {
1798 			struct epoch_tracker et;
1799 			/*
1800 			 * Tell the stack to cleanup with 0 i.e.
1801 			 * the tcb is not going away.
1802 			 */
1803 			NET_EPOCH_ENTER(et);
1804 			(*tp->t_fb->tfb_tcp_fb_fini)(tp, 0);
1805 			NET_EPOCH_EXIT(et);
1806 		}
1807 		/*
1808 		 * Release the old refcnt, the
1809 		 * lookup acquired a ref on the
1810 		 * new one already.
1811 		 */
1812 		refcount_release(&tp->t_fb->tfb_refcnt);
1813 		/*
1814 		 * Set in the new stack.
1815 		 */
1816 		tp->t_fb = blk;
1817 		tp->t_fb_ptr = ptr;
1818 #ifdef TCP_OFFLOAD
1819 		if (tp->t_flags & TF_TOE) {
1820 			tcp_offload_ctloutput(tp, sopt->sopt_dir,
1821 			     sopt->sopt_name);
1822 		}
1823 #endif
1824 err_out:
1825 		INP_WUNLOCK(inp);
1826 		return (error);
1827 
1828 	}
1829 
1830 	/* Pass in the INP locked, callee must unlock it. */
1831 	return (tp->t_fb->tfb_tcp_ctloutput(tp, sopt));
1832 }
1833 
1834 static int
tcp_ctloutput_get(struct inpcb * inp,struct sockopt * sopt)1835 tcp_ctloutput_get(struct inpcb *inp, struct sockopt *sopt)
1836 {
1837 	struct socket *so = inp->inp_socket;
1838 	struct tcpcb *tp = intotcpcb(inp);
1839 	int error = 0;
1840 
1841 	MPASS(sopt->sopt_dir == SOPT_GET);
1842 	INP_WLOCK_ASSERT(inp);
1843 	KASSERT((inp->inp_flags & INP_DROPPED) == 0,
1844 	    ("inp_flags == %x", inp->inp_flags));
1845 	KASSERT(so != NULL, ("inp_socket == NULL"));
1846 
1847 	if (sopt->sopt_level != IPPROTO_TCP) {
1848 		INP_WUNLOCK(inp);
1849 #ifdef INET6
1850 		if (inp->inp_vflag & INP_IPV6PROTO)
1851 			error = ip6_ctloutput(so, sopt);
1852 #endif /* INET6 */
1853 #if defined(INET6) && defined(INET)
1854 		else
1855 #endif
1856 #ifdef INET
1857 			error = ip_ctloutput(so, sopt);
1858 #endif
1859 		return (error);
1860 	}
1861 	if (((sopt->sopt_name == TCP_FUNCTION_BLK) ||
1862 	     (sopt->sopt_name == TCP_FUNCTION_ALIAS))) {
1863 		struct tcp_function_set fsn;
1864 
1865 		if (sopt->sopt_name == TCP_FUNCTION_ALIAS) {
1866 			memset(&fsn, 0, sizeof(fsn));
1867 			find_tcp_function_alias(tp->t_fb, &fsn);
1868 		} else {
1869 			strncpy(fsn.function_set_name,
1870 			    tp->t_fb->tfb_tcp_block_name,
1871 			    TCP_FUNCTION_NAME_LEN_MAX);
1872 			fsn.function_set_name[TCP_FUNCTION_NAME_LEN_MAX - 1] = '\0';
1873 		}
1874 		fsn.pcbcnt = tp->t_fb->tfb_refcnt;
1875 		INP_WUNLOCK(inp);
1876 		error = sooptcopyout(sopt, &fsn, sizeof fsn);
1877 		return (error);
1878 	}
1879 
1880 	/* Pass in the INP locked, callee must unlock it. */
1881 	return (tp->t_fb->tfb_tcp_ctloutput(tp, sopt));
1882 }
1883 
1884 int
tcp_ctloutput(struct socket * so,struct sockopt * sopt)1885 tcp_ctloutput(struct socket *so, struct sockopt *sopt)
1886 {
1887 	struct	inpcb *inp;
1888 
1889 	inp = sotoinpcb(so);
1890 	KASSERT(inp != NULL, ("tcp_ctloutput: inp == NULL"));
1891 
1892 	INP_WLOCK(inp);
1893 	if (inp->inp_flags & INP_DROPPED) {
1894 		INP_WUNLOCK(inp);
1895 		return (ECONNRESET);
1896 	}
1897 	if (sopt->sopt_dir == SOPT_SET)
1898 		return (tcp_ctloutput_set(inp, sopt));
1899 	else if (sopt->sopt_dir == SOPT_GET)
1900 		return (tcp_ctloutput_get(inp, sopt));
1901 	else
1902 		panic("%s: sopt_dir $%d", __func__, sopt->sopt_dir);
1903 }
1904 
1905 /*
1906  * If this assert becomes untrue, we need to change the size of the buf
1907  * variable in tcp_default_ctloutput().
1908  */
1909 #ifdef CTASSERT
1910 CTASSERT(TCP_CA_NAME_MAX <= TCP_LOG_ID_LEN);
1911 CTASSERT(TCP_LOG_REASON_LEN <= TCP_LOG_ID_LEN);
1912 #endif
1913 
1914 extern struct cc_algo newreno_cc_algo;
1915 
1916 static int
tcp_set_cc_mod(struct inpcb * inp,struct sockopt * sopt)1917 tcp_set_cc_mod(struct inpcb *inp, struct sockopt *sopt)
1918 {
1919 	struct cc_algo *algo;
1920 	void *ptr = NULL;
1921 	struct tcpcb *tp;
1922 	struct cc_var cc_mem;
1923 	char	buf[TCP_CA_NAME_MAX];
1924 	size_t mem_sz;
1925 	int error;
1926 
1927 	INP_WUNLOCK(inp);
1928 	error = sooptcopyin(sopt, buf, TCP_CA_NAME_MAX - 1, 1);
1929 	if (error)
1930 		return(error);
1931 	buf[sopt->sopt_valsize] = '\0';
1932 	CC_LIST_RLOCK();
1933 	STAILQ_FOREACH(algo, &cc_list, entries) {
1934 		if (strncmp(buf, algo->name,
1935 			    TCP_CA_NAME_MAX) == 0) {
1936 			if (algo->flags & CC_MODULE_BEING_REMOVED) {
1937 				/* We can't "see" modules being unloaded */
1938 				continue;
1939 			}
1940 			break;
1941 		}
1942 	}
1943 	if (algo == NULL) {
1944 		CC_LIST_RUNLOCK();
1945 		return(ESRCH);
1946 	}
1947 	/*
1948 	 * With a reference the algorithm cannot be removed
1949 	 * so we hold a reference through the change process.
1950 	 */
1951 	cc_refer(algo);
1952 	CC_LIST_RUNLOCK();
1953 	if (algo->cb_init != NULL) {
1954 		/* We can now pre-get the memory for the CC */
1955 		mem_sz = (*algo->cc_data_sz)();
1956 		if (mem_sz == 0) {
1957 			goto no_mem_needed;
1958 		}
1959 		ptr = malloc(mem_sz, M_CC_MEM, M_WAITOK);
1960 	} else {
1961 no_mem_needed:
1962 		mem_sz = 0;
1963 		ptr = NULL;
1964 	}
1965 	/*
1966 	 * Make sure its all clean and zero and also get
1967 	 * back the inplock.
1968 	 */
1969 	memset(&cc_mem, 0, sizeof(cc_mem));
1970 	INP_WLOCK(inp);
1971 	if (inp->inp_flags & INP_DROPPED) {
1972 		INP_WUNLOCK(inp);
1973 		if (ptr)
1974 			free(ptr, M_CC_MEM);
1975 		/* Release our temp reference */
1976 		CC_LIST_RLOCK();
1977 		cc_release(algo);
1978 		CC_LIST_RUNLOCK();
1979 		return (ECONNRESET);
1980 	}
1981 	tp = intotcpcb(inp);
1982 	if (ptr != NULL)
1983 		memset(ptr, 0, mem_sz);
1984 	cc_mem.tp = tp;
1985 	/*
1986 	 * We once again hold a write lock over the tcb so it's
1987 	 * safe to do these things without ordering concerns.
1988 	 * Note here we init into stack memory.
1989 	 */
1990 	if (algo->cb_init != NULL)
1991 		error = algo->cb_init(&cc_mem, ptr);
1992 	else
1993 		error = 0;
1994 	/*
1995 	 * The CC algorithms, when given their memory
1996 	 * should not fail we could in theory have a
1997 	 * KASSERT here.
1998 	 */
1999 	if (error == 0) {
2000 		/*
2001 		 * Touchdown, lets go ahead and move the
2002 		 * connection to the new CC module by
2003 		 * copying in the cc_mem after we call
2004 		 * the old ones cleanup (if any).
2005 		 */
2006 		if (CC_ALGO(tp)->cb_destroy != NULL)
2007 			CC_ALGO(tp)->cb_destroy(&tp->t_ccv);
2008 		/* Detach the old CC from the tcpcb  */
2009 		cc_detach(tp);
2010 		/* Copy in our temp memory that was inited */
2011 		memcpy(&tp->t_ccv, &cc_mem, sizeof(struct cc_var));
2012 		/* Now attach the new, which takes a reference */
2013 		cc_attach(tp, algo);
2014 		/* Ok now are we where we have gotten past any conn_init? */
2015 		if (TCPS_HAVEESTABLISHED(tp->t_state) && (CC_ALGO(tp)->conn_init != NULL)) {
2016 			/* Yep run the connection init for the new CC */
2017 			CC_ALGO(tp)->conn_init(&tp->t_ccv);
2018 		}
2019 	} else if (ptr)
2020 		free(ptr, M_CC_MEM);
2021 	INP_WUNLOCK(inp);
2022 	/* Now lets release our temp reference */
2023 	CC_LIST_RLOCK();
2024 	cc_release(algo);
2025 	CC_LIST_RUNLOCK();
2026 	return (error);
2027 }
2028 
2029 int
tcp_default_ctloutput(struct tcpcb * tp,struct sockopt * sopt)2030 tcp_default_ctloutput(struct tcpcb *tp, struct sockopt *sopt)
2031 {
2032 	struct inpcb *inp = tptoinpcb(tp);
2033 	int	error, opt, optval;
2034 	u_int	ui;
2035 	struct	tcp_info ti;
2036 #ifdef KERN_TLS
2037 	struct tls_enable tls;
2038 	struct socket *so = inp->inp_socket;
2039 #endif
2040 	char	*pbuf, buf[TCP_LOG_ID_LEN];
2041 #ifdef STATS
2042 	struct statsblob *sbp;
2043 #endif
2044 	size_t	len;
2045 
2046 	INP_WLOCK_ASSERT(inp);
2047 	KASSERT((inp->inp_flags & INP_DROPPED) == 0,
2048 	    ("inp_flags == %x", inp->inp_flags));
2049 	KASSERT(inp->inp_socket != NULL, ("inp_socket == NULL"));
2050 
2051 	switch (sopt->sopt_level) {
2052 #ifdef INET6
2053 	case IPPROTO_IPV6:
2054 		MPASS(inp->inp_vflag & INP_IPV6PROTO);
2055 		switch (sopt->sopt_name) {
2056 		case IPV6_USE_MIN_MTU:
2057 			tcp6_use_min_mtu(tp);
2058 			/* FALLTHROUGH */
2059 		}
2060 		INP_WUNLOCK(inp);
2061 		return (0);
2062 #endif
2063 #ifdef INET
2064 	case IPPROTO_IP:
2065 		INP_WUNLOCK(inp);
2066 		return (0);
2067 #endif
2068 	}
2069 
2070 	/*
2071 	 * For TCP_CCALGOOPT forward the control to CC module, for both
2072 	 * SOPT_SET and SOPT_GET.
2073 	 */
2074 	switch (sopt->sopt_name) {
2075 	case TCP_CCALGOOPT:
2076 		INP_WUNLOCK(inp);
2077 		if (sopt->sopt_valsize > CC_ALGOOPT_LIMIT)
2078 			return (EINVAL);
2079 		pbuf = malloc(sopt->sopt_valsize, M_TEMP, M_WAITOK | M_ZERO);
2080 		error = sooptcopyin(sopt, pbuf, sopt->sopt_valsize,
2081 		    sopt->sopt_valsize);
2082 		if (error) {
2083 			free(pbuf, M_TEMP);
2084 			return (error);
2085 		}
2086 		INP_WLOCK_RECHECK_CLEANUP(inp, free(pbuf, M_TEMP));
2087 		if (CC_ALGO(tp)->ctl_output != NULL)
2088 			error = CC_ALGO(tp)->ctl_output(&tp->t_ccv, sopt, pbuf);
2089 		else
2090 			error = ENOENT;
2091 		INP_WUNLOCK(inp);
2092 		if (error == 0 && sopt->sopt_dir == SOPT_GET)
2093 			error = sooptcopyout(sopt, pbuf, sopt->sopt_valsize);
2094 		free(pbuf, M_TEMP);
2095 		return (error);
2096 	}
2097 
2098 	switch (sopt->sopt_dir) {
2099 	case SOPT_SET:
2100 		switch (sopt->sopt_name) {
2101 #if defined(IPSEC_SUPPORT) || defined(TCP_SIGNATURE)
2102 		case TCP_MD5SIG:
2103 			INP_WUNLOCK(inp);
2104 			if (!TCPMD5_ENABLED())
2105 				return (ENOPROTOOPT);
2106 			error = TCPMD5_PCBCTL(inp, sopt);
2107 			if (error)
2108 				return (error);
2109 			INP_WLOCK_RECHECK(inp);
2110 			goto unlock_and_done;
2111 #endif /* IPSEC */
2112 
2113 		case TCP_NODELAY:
2114 		case TCP_NOOPT:
2115 			INP_WUNLOCK(inp);
2116 			error = sooptcopyin(sopt, &optval, sizeof optval,
2117 			    sizeof optval);
2118 			if (error)
2119 				return (error);
2120 
2121 			INP_WLOCK_RECHECK(inp);
2122 			switch (sopt->sopt_name) {
2123 			case TCP_NODELAY:
2124 				opt = TF_NODELAY;
2125 				break;
2126 			case TCP_NOOPT:
2127 				opt = TF_NOOPT;
2128 				break;
2129 			default:
2130 				opt = 0; /* dead code to fool gcc */
2131 				break;
2132 			}
2133 
2134 			if (optval)
2135 				tp->t_flags |= opt;
2136 			else
2137 				tp->t_flags &= ~opt;
2138 unlock_and_done:
2139 #ifdef TCP_OFFLOAD
2140 			if (tp->t_flags & TF_TOE) {
2141 				tcp_offload_ctloutput(tp, sopt->sopt_dir,
2142 				    sopt->sopt_name);
2143 			}
2144 #endif
2145 			INP_WUNLOCK(inp);
2146 			break;
2147 
2148 		case TCP_NOPUSH:
2149 			INP_WUNLOCK(inp);
2150 			error = sooptcopyin(sopt, &optval, sizeof optval,
2151 			    sizeof optval);
2152 			if (error)
2153 				return (error);
2154 
2155 			INP_WLOCK_RECHECK(inp);
2156 			if (optval)
2157 				tp->t_flags |= TF_NOPUSH;
2158 			else if (tp->t_flags & TF_NOPUSH) {
2159 				tp->t_flags &= ~TF_NOPUSH;
2160 				if (TCPS_HAVEESTABLISHED(tp->t_state)) {
2161 					struct epoch_tracker et;
2162 
2163 					NET_EPOCH_ENTER(et);
2164 					error = tcp_output_nodrop(tp);
2165 					NET_EPOCH_EXIT(et);
2166 				}
2167 			}
2168 			goto unlock_and_done;
2169 
2170 		case TCP_REMOTE_UDP_ENCAPS_PORT:
2171 			INP_WUNLOCK(inp);
2172 			error = sooptcopyin(sopt, &optval, sizeof optval,
2173 			    sizeof optval);
2174 			if (error)
2175 				return (error);
2176 			if ((optval < TCP_TUNNELING_PORT_MIN) ||
2177 			    (optval > TCP_TUNNELING_PORT_MAX)) {
2178 				/* Its got to be in range */
2179 				return (EINVAL);
2180 			}
2181 			if ((V_tcp_udp_tunneling_port == 0) && (optval != 0)) {
2182 				/* You have to have enabled a UDP tunneling port first */
2183 				return (EINVAL);
2184 			}
2185 			INP_WLOCK_RECHECK(inp);
2186 			if (tp->t_state != TCPS_CLOSED) {
2187 				/* You can't change after you are connected */
2188 				error = EINVAL;
2189 			} else {
2190 				/* Ok we are all good set the port */
2191 				tp->t_port = htons(optval);
2192 			}
2193 			goto unlock_and_done;
2194 
2195 		case TCP_MAXSEG:
2196 			INP_WUNLOCK(inp);
2197 			error = sooptcopyin(sopt, &optval, sizeof optval,
2198 			    sizeof optval);
2199 			if (error)
2200 				return (error);
2201 
2202 			INP_WLOCK_RECHECK(inp);
2203 			if (optval > 0 && optval <= tp->t_maxseg &&
2204 			    optval + 40 >= V_tcp_minmss) {
2205 				tp->t_maxseg = optval;
2206 				if (tp->t_maxseg < V_tcp_mssdflt) {
2207 					/*
2208 					 * The MSS is so small we should not process incoming
2209 					 * SACK's since we are subject to attack in such a
2210 					 * case.
2211 					 */
2212 					tp->t_flags2 |= TF2_PROC_SACK_PROHIBIT;
2213 				} else {
2214 					tp->t_flags2 &= ~TF2_PROC_SACK_PROHIBIT;
2215 				}
2216 			} else
2217 				error = EINVAL;
2218 			goto unlock_and_done;
2219 
2220 		case TCP_INFO:
2221 			INP_WUNLOCK(inp);
2222 			error = EINVAL;
2223 			break;
2224 
2225 		case TCP_STATS:
2226 			INP_WUNLOCK(inp);
2227 #ifdef STATS
2228 			error = sooptcopyin(sopt, &optval, sizeof optval,
2229 			    sizeof optval);
2230 			if (error)
2231 				return (error);
2232 
2233 			if (optval > 0)
2234 				sbp = stats_blob_alloc(
2235 				    V_tcp_perconn_stats_dflt_tpl, 0);
2236 			else
2237 				sbp = NULL;
2238 
2239 			INP_WLOCK_RECHECK(inp);
2240 			if ((tp->t_stats != NULL && sbp == NULL) ||
2241 			    (tp->t_stats == NULL && sbp != NULL)) {
2242 				struct statsblob *t = tp->t_stats;
2243 				tp->t_stats = sbp;
2244 				sbp = t;
2245 			}
2246 			INP_WUNLOCK(inp);
2247 
2248 			stats_blob_destroy(sbp);
2249 #else
2250 			return (EOPNOTSUPP);
2251 #endif /* !STATS */
2252 			break;
2253 
2254 		case TCP_CONGESTION:
2255 			error = tcp_set_cc_mod(inp, sopt);
2256 			break;
2257 
2258 		case TCP_REUSPORT_LB_NUMA:
2259 			INP_WUNLOCK(inp);
2260 			error = sooptcopyin(sopt, &optval, sizeof(optval),
2261 			    sizeof(optval));
2262 			INP_WLOCK_RECHECK(inp);
2263 			if (!error)
2264 				error = in_pcblbgroup_numa(inp, optval);
2265 			INP_WUNLOCK(inp);
2266 			break;
2267 
2268 #ifdef KERN_TLS
2269 		case TCP_TXTLS_ENABLE:
2270 			INP_WUNLOCK(inp);
2271 			error = ktls_copyin_tls_enable(sopt, &tls);
2272 			if (error != 0)
2273 				break;
2274 			error = ktls_enable_tx(so, &tls);
2275 			ktls_cleanup_tls_enable(&tls);
2276 			break;
2277 		case TCP_TXTLS_MODE:
2278 			INP_WUNLOCK(inp);
2279 			error = sooptcopyin(sopt, &ui, sizeof(ui), sizeof(ui));
2280 			if (error != 0)
2281 				return (error);
2282 
2283 			INP_WLOCK_RECHECK(inp);
2284 			error = ktls_set_tx_mode(so, ui);
2285 			INP_WUNLOCK(inp);
2286 			break;
2287 		case TCP_RXTLS_ENABLE:
2288 			INP_WUNLOCK(inp);
2289 			error = ktls_copyin_tls_enable(sopt, &tls);
2290 			if (error != 0)
2291 				break;
2292 			error = ktls_enable_rx(so, &tls);
2293 			ktls_cleanup_tls_enable(&tls);
2294 			break;
2295 #endif
2296 		case TCP_MAXUNACKTIME:
2297 		case TCP_KEEPIDLE:
2298 		case TCP_KEEPINTVL:
2299 		case TCP_KEEPINIT:
2300 			INP_WUNLOCK(inp);
2301 			error = sooptcopyin(sopt, &ui, sizeof(ui), sizeof(ui));
2302 			if (error)
2303 				return (error);
2304 
2305 			if (ui > (UINT_MAX / hz)) {
2306 				error = EINVAL;
2307 				break;
2308 			}
2309 			ui *= hz;
2310 
2311 			INP_WLOCK_RECHECK(inp);
2312 			switch (sopt->sopt_name) {
2313 			case TCP_MAXUNACKTIME:
2314 				tp->t_maxunacktime = ui;
2315 				break;
2316 
2317 			case TCP_KEEPIDLE:
2318 				tp->t_keepidle = ui;
2319 				/*
2320 				 * XXX: better check current remaining
2321 				 * timeout and "merge" it with new value.
2322 				 */
2323 				if ((tp->t_state > TCPS_LISTEN) &&
2324 				    (tp->t_state <= TCPS_CLOSING))
2325 					tcp_timer_activate(tp, TT_KEEP,
2326 					    TP_KEEPIDLE(tp));
2327 				break;
2328 			case TCP_KEEPINTVL:
2329 				tp->t_keepintvl = ui;
2330 				if ((tp->t_state == TCPS_FIN_WAIT_2) &&
2331 				    (TP_MAXIDLE(tp) > 0))
2332 					tcp_timer_activate(tp, TT_2MSL,
2333 					    TP_MAXIDLE(tp));
2334 				break;
2335 			case TCP_KEEPINIT:
2336 				tp->t_keepinit = ui;
2337 				if (tp->t_state == TCPS_SYN_RECEIVED ||
2338 				    tp->t_state == TCPS_SYN_SENT)
2339 					tcp_timer_activate(tp, TT_KEEP,
2340 					    TP_KEEPINIT(tp));
2341 				break;
2342 			}
2343 			goto unlock_and_done;
2344 
2345 		case TCP_KEEPCNT:
2346 			INP_WUNLOCK(inp);
2347 			error = sooptcopyin(sopt, &ui, sizeof(ui), sizeof(ui));
2348 			if (error)
2349 				return (error);
2350 
2351 			INP_WLOCK_RECHECK(inp);
2352 			tp->t_keepcnt = ui;
2353 			if ((tp->t_state == TCPS_FIN_WAIT_2) &&
2354 			    (TP_MAXIDLE(tp) > 0))
2355 				tcp_timer_activate(tp, TT_2MSL,
2356 				    TP_MAXIDLE(tp));
2357 			goto unlock_and_done;
2358 
2359 		case TCP_FASTOPEN: {
2360 			struct tcp_fastopen tfo_optval;
2361 
2362 			INP_WUNLOCK(inp);
2363 			if (!V_tcp_fastopen_client_enable &&
2364 			    !V_tcp_fastopen_server_enable)
2365 				return (EPERM);
2366 
2367 			error = sooptcopyin(sopt, &tfo_optval,
2368 				    sizeof(tfo_optval), sizeof(int));
2369 			if (error)
2370 				return (error);
2371 
2372 			INP_WLOCK_RECHECK(inp);
2373 			if ((tp->t_state != TCPS_CLOSED) &&
2374 			    (tp->t_state != TCPS_LISTEN)) {
2375 				error = EINVAL;
2376 				goto unlock_and_done;
2377 			}
2378 			if (tfo_optval.enable) {
2379 				if (tp->t_state == TCPS_LISTEN) {
2380 					if (!V_tcp_fastopen_server_enable) {
2381 						error = EPERM;
2382 						goto unlock_and_done;
2383 					}
2384 
2385 					if (tp->t_tfo_pending == NULL)
2386 						tp->t_tfo_pending =
2387 						    tcp_fastopen_alloc_counter();
2388 				} else {
2389 					/*
2390 					 * If a pre-shared key was provided,
2391 					 * stash it in the client cookie
2392 					 * field of the tcpcb for use during
2393 					 * connect.
2394 					 */
2395 					if (sopt->sopt_valsize ==
2396 					    sizeof(tfo_optval)) {
2397 						memcpy(tp->t_tfo_cookie.client,
2398 						       tfo_optval.psk,
2399 						       TCP_FASTOPEN_PSK_LEN);
2400 						tp->t_tfo_client_cookie_len =
2401 						    TCP_FASTOPEN_PSK_LEN;
2402 					}
2403 				}
2404 				tp->t_flags |= TF_FASTOPEN;
2405 			} else
2406 				tp->t_flags &= ~TF_FASTOPEN;
2407 			goto unlock_and_done;
2408 		}
2409 
2410 #ifdef TCP_BLACKBOX
2411 		case TCP_LOG:
2412 			INP_WUNLOCK(inp);
2413 			error = sooptcopyin(sopt, &optval, sizeof optval,
2414 			    sizeof optval);
2415 			if (error)
2416 				return (error);
2417 
2418 			INP_WLOCK_RECHECK(inp);
2419 			error = tcp_log_state_change(tp, optval);
2420 			goto unlock_and_done;
2421 
2422 		case TCP_LOGBUF:
2423 			INP_WUNLOCK(inp);
2424 			error = EINVAL;
2425 			break;
2426 
2427 		case TCP_LOGID:
2428 			INP_WUNLOCK(inp);
2429 			error = sooptcopyin(sopt, buf, TCP_LOG_ID_LEN - 1, 0);
2430 			if (error)
2431 				break;
2432 			buf[sopt->sopt_valsize] = '\0';
2433 			INP_WLOCK_RECHECK(inp);
2434 			error = tcp_log_set_id(tp, buf);
2435 			/* tcp_log_set_id() unlocks the INP. */
2436 			break;
2437 
2438 		case TCP_LOGDUMP:
2439 		case TCP_LOGDUMPID:
2440 			INP_WUNLOCK(inp);
2441 			error =
2442 			    sooptcopyin(sopt, buf, TCP_LOG_REASON_LEN - 1, 0);
2443 			if (error)
2444 				break;
2445 			buf[sopt->sopt_valsize] = '\0';
2446 			INP_WLOCK_RECHECK(inp);
2447 			if (sopt->sopt_name == TCP_LOGDUMP) {
2448 				error = tcp_log_dump_tp_logbuf(tp, buf,
2449 				    M_WAITOK, true);
2450 				INP_WUNLOCK(inp);
2451 			} else {
2452 				tcp_log_dump_tp_bucket_logbufs(tp, buf);
2453 				/*
2454 				 * tcp_log_dump_tp_bucket_logbufs() drops the
2455 				 * INP lock.
2456 				 */
2457 			}
2458 			break;
2459 #endif
2460 
2461 		default:
2462 			INP_WUNLOCK(inp);
2463 			error = ENOPROTOOPT;
2464 			break;
2465 		}
2466 		break;
2467 
2468 	case SOPT_GET:
2469 		tp = intotcpcb(inp);
2470 		switch (sopt->sopt_name) {
2471 #if defined(IPSEC_SUPPORT) || defined(TCP_SIGNATURE)
2472 		case TCP_MD5SIG:
2473 			INP_WUNLOCK(inp);
2474 			if (!TCPMD5_ENABLED())
2475 				return (ENOPROTOOPT);
2476 			error = TCPMD5_PCBCTL(inp, sopt);
2477 			break;
2478 #endif
2479 
2480 		case TCP_NODELAY:
2481 			optval = tp->t_flags & TF_NODELAY;
2482 			INP_WUNLOCK(inp);
2483 			error = sooptcopyout(sopt, &optval, sizeof optval);
2484 			break;
2485 		case TCP_MAXSEG:
2486 			optval = tp->t_maxseg;
2487 			INP_WUNLOCK(inp);
2488 			error = sooptcopyout(sopt, &optval, sizeof optval);
2489 			break;
2490 		case TCP_REMOTE_UDP_ENCAPS_PORT:
2491 			optval = ntohs(tp->t_port);
2492 			INP_WUNLOCK(inp);
2493 			error = sooptcopyout(sopt, &optval, sizeof optval);
2494 			break;
2495 		case TCP_NOOPT:
2496 			optval = tp->t_flags & TF_NOOPT;
2497 			INP_WUNLOCK(inp);
2498 			error = sooptcopyout(sopt, &optval, sizeof optval);
2499 			break;
2500 		case TCP_NOPUSH:
2501 			optval = tp->t_flags & TF_NOPUSH;
2502 			INP_WUNLOCK(inp);
2503 			error = sooptcopyout(sopt, &optval, sizeof optval);
2504 			break;
2505 		case TCP_INFO:
2506 			tcp_fill_info(tp, &ti);
2507 			INP_WUNLOCK(inp);
2508 			error = sooptcopyout(sopt, &ti, sizeof ti);
2509 			break;
2510 		case TCP_STATS:
2511 			{
2512 #ifdef STATS
2513 			int nheld;
2514 			TYPEOF_MEMBER(struct statsblob, flags) sbflags = 0;
2515 
2516 			error = 0;
2517 			socklen_t outsbsz = sopt->sopt_valsize;
2518 			if (tp->t_stats == NULL)
2519 				error = ENOENT;
2520 			else if (outsbsz >= tp->t_stats->cursz)
2521 				outsbsz = tp->t_stats->cursz;
2522 			else if (outsbsz >= sizeof(struct statsblob))
2523 				outsbsz = sizeof(struct statsblob);
2524 			else
2525 				error = EINVAL;
2526 			INP_WUNLOCK(inp);
2527 			if (error)
2528 				break;
2529 
2530 			sbp = sopt->sopt_val;
2531 			nheld = atop(round_page(((vm_offset_t)sbp) +
2532 			    (vm_size_t)outsbsz) - trunc_page((vm_offset_t)sbp));
2533 			vm_page_t ma[nheld];
2534 			if (vm_fault_quick_hold_pages(
2535 			    &curproc->p_vmspace->vm_map, (vm_offset_t)sbp,
2536 			    outsbsz, VM_PROT_READ | VM_PROT_WRITE, ma,
2537 			    nheld) < 0) {
2538 				error = EFAULT;
2539 				break;
2540 			}
2541 
2542 			if ((error = copyin_nofault(&(sbp->flags), &sbflags,
2543 			    SIZEOF_MEMBER(struct statsblob, flags))))
2544 				goto unhold;
2545 
2546 			INP_WLOCK_RECHECK(inp);
2547 			error = stats_blob_snapshot(&sbp, outsbsz, tp->t_stats,
2548 			    sbflags | SB_CLONE_USRDSTNOFAULT);
2549 			INP_WUNLOCK(inp);
2550 			sopt->sopt_valsize = outsbsz;
2551 unhold:
2552 			vm_page_unhold_pages(ma, nheld);
2553 #else
2554 			INP_WUNLOCK(inp);
2555 			error = EOPNOTSUPP;
2556 #endif /* !STATS */
2557 			break;
2558 			}
2559 		case TCP_CONGESTION:
2560 			len = strlcpy(buf, CC_ALGO(tp)->name, TCP_CA_NAME_MAX);
2561 			INP_WUNLOCK(inp);
2562 			error = sooptcopyout(sopt, buf, len + 1);
2563 			break;
2564 		case TCP_MAXUNACKTIME:
2565 		case TCP_KEEPIDLE:
2566 		case TCP_KEEPINTVL:
2567 		case TCP_KEEPINIT:
2568 		case TCP_KEEPCNT:
2569 			switch (sopt->sopt_name) {
2570 			case TCP_MAXUNACKTIME:
2571 				ui = TP_MAXUNACKTIME(tp) / hz;
2572 				break;
2573 			case TCP_KEEPIDLE:
2574 				ui = TP_KEEPIDLE(tp) / hz;
2575 				break;
2576 			case TCP_KEEPINTVL:
2577 				ui = TP_KEEPINTVL(tp) / hz;
2578 				break;
2579 			case TCP_KEEPINIT:
2580 				ui = TP_KEEPINIT(tp) / hz;
2581 				break;
2582 			case TCP_KEEPCNT:
2583 				ui = TP_KEEPCNT(tp);
2584 				break;
2585 			}
2586 			INP_WUNLOCK(inp);
2587 			error = sooptcopyout(sopt, &ui, sizeof(ui));
2588 			break;
2589 		case TCP_FASTOPEN:
2590 			optval = tp->t_flags & TF_FASTOPEN;
2591 			INP_WUNLOCK(inp);
2592 			error = sooptcopyout(sopt, &optval, sizeof optval);
2593 			break;
2594 #ifdef TCP_BLACKBOX
2595 		case TCP_LOG:
2596 			optval = tcp_get_bblog_state(tp);
2597 			INP_WUNLOCK(inp);
2598 			error = sooptcopyout(sopt, &optval, sizeof(optval));
2599 			break;
2600 		case TCP_LOGBUF:
2601 			/* tcp_log_getlogbuf() does INP_WUNLOCK(inp) */
2602 			error = tcp_log_getlogbuf(sopt, tp);
2603 			break;
2604 		case TCP_LOGID:
2605 			len = tcp_log_get_id(tp, buf);
2606 			INP_WUNLOCK(inp);
2607 			error = sooptcopyout(sopt, buf, len + 1);
2608 			break;
2609 		case TCP_LOGDUMP:
2610 		case TCP_LOGDUMPID:
2611 			INP_WUNLOCK(inp);
2612 			error = EINVAL;
2613 			break;
2614 #endif
2615 #ifdef KERN_TLS
2616 		case TCP_TXTLS_MODE:
2617 			error = ktls_get_tx_mode(so, &optval);
2618 			INP_WUNLOCK(inp);
2619 			if (error == 0)
2620 				error = sooptcopyout(sopt, &optval,
2621 				    sizeof(optval));
2622 			break;
2623 		case TCP_RXTLS_MODE:
2624 			error = ktls_get_rx_mode(so, &optval);
2625 			INP_WUNLOCK(inp);
2626 			if (error == 0)
2627 				error = sooptcopyout(sopt, &optval,
2628 				    sizeof(optval));
2629 			break;
2630 #endif
2631 		default:
2632 			INP_WUNLOCK(inp);
2633 			error = ENOPROTOOPT;
2634 			break;
2635 		}
2636 		break;
2637 	}
2638 	return (error);
2639 }
2640 #undef INP_WLOCK_RECHECK
2641 #undef INP_WLOCK_RECHECK_CLEANUP
2642 
2643 /*
2644  * Initiate (or continue) disconnect.
2645  * If embryonic state, just send reset (once).
2646  * If in ``let data drain'' option and linger null, just drop.
2647  * Otherwise (hard), mark socket disconnecting and drop
2648  * current input data; switch states based on user close, and
2649  * send segment to peer (with FIN).
2650  */
2651 static void
tcp_disconnect(struct tcpcb * tp)2652 tcp_disconnect(struct tcpcb *tp)
2653 {
2654 	struct inpcb *inp = tptoinpcb(tp);
2655 	struct socket *so = tptosocket(tp);
2656 
2657 	NET_EPOCH_ASSERT();
2658 	INP_WLOCK_ASSERT(inp);
2659 
2660 	/*
2661 	 * Neither tcp_close() nor tcp_drop() should return NULL, as the
2662 	 * socket is still open.
2663 	 */
2664 	if (tp->t_state < TCPS_ESTABLISHED &&
2665 	    !(tp->t_state > TCPS_LISTEN && (tp->t_flags & TF_FASTOPEN))) {
2666 		tp = tcp_close(tp);
2667 		KASSERT(tp != NULL,
2668 		    ("tcp_disconnect: tcp_close() returned NULL"));
2669 	} else if ((so->so_options & SO_LINGER) && so->so_linger == 0) {
2670 		tp = tcp_drop(tp, 0);
2671 		KASSERT(tp != NULL,
2672 		    ("tcp_disconnect: tcp_drop() returned NULL"));
2673 	} else {
2674 		soisdisconnecting(so);
2675 		sbflush(&so->so_rcv);
2676 		tcp_usrclosed(tp);
2677 		if (!(inp->inp_flags & INP_DROPPED))
2678 			/* Ignore stack's drop request, we already at it. */
2679 			(void)tcp_output_nodrop(tp);
2680 	}
2681 }
2682 
2683 /*
2684  * User issued close, and wish to trail through shutdown states:
2685  * if never received SYN, just forget it.  If got a SYN from peer,
2686  * but haven't sent FIN, then go to FIN_WAIT_1 state to send peer a FIN.
2687  * If already got a FIN from peer, then almost done; go to LAST_ACK
2688  * state.  In all other cases, have already sent FIN to peer (e.g.
2689  * after PRU_SHUTDOWN), and just have to play tedious game waiting
2690  * for peer to send FIN or not respond to keep-alives, etc.
2691  * We can let the user exit from the close as soon as the FIN is acked.
2692  */
2693 static void
tcp_usrclosed(struct tcpcb * tp)2694 tcp_usrclosed(struct tcpcb *tp)
2695 {
2696 
2697 	NET_EPOCH_ASSERT();
2698 	INP_WLOCK_ASSERT(tptoinpcb(tp));
2699 
2700 	switch (tp->t_state) {
2701 	case TCPS_LISTEN:
2702 #ifdef TCP_OFFLOAD
2703 		tcp_offload_listen_stop(tp);
2704 #endif
2705 		tcp_state_change(tp, TCPS_CLOSED);
2706 		/* FALLTHROUGH */
2707 	case TCPS_CLOSED:
2708 		tp = tcp_close(tp);
2709 		/*
2710 		 * tcp_close() should never return NULL here as the socket is
2711 		 * still open.
2712 		 */
2713 		KASSERT(tp != NULL,
2714 		    ("tcp_usrclosed: tcp_close() returned NULL"));
2715 		break;
2716 
2717 	case TCPS_SYN_SENT:
2718 	case TCPS_SYN_RECEIVED:
2719 		tp->t_flags |= TF_NEEDFIN;
2720 		break;
2721 
2722 	case TCPS_ESTABLISHED:
2723 		tcp_state_change(tp, TCPS_FIN_WAIT_1);
2724 		break;
2725 
2726 	case TCPS_CLOSE_WAIT:
2727 		tcp_state_change(tp, TCPS_LAST_ACK);
2728 		break;
2729 	}
2730 	if (tp->t_acktime == 0)
2731 		tp->t_acktime = ticks;
2732 	if (tp->t_state >= TCPS_FIN_WAIT_2) {
2733 		tcp_free_sackholes(tp);
2734 		soisdisconnected(tptosocket(tp));
2735 		/* Prevent the connection hanging in FIN_WAIT_2 forever. */
2736 		if (tp->t_state == TCPS_FIN_WAIT_2) {
2737 			int timeout;
2738 
2739 			timeout = (tcp_fast_finwait2_recycle) ?
2740 			    tcp_finwait2_timeout : TP_MAXIDLE(tp);
2741 			tcp_timer_activate(tp, TT_2MSL, timeout);
2742 		}
2743 	}
2744 }
2745 
2746 #ifdef DDB
2747 static void
db_print_indent(int indent)2748 db_print_indent(int indent)
2749 {
2750 	int i;
2751 
2752 	for (i = 0; i < indent; i++)
2753 		db_printf(" ");
2754 }
2755 
2756 static void
db_print_tstate(int t_state)2757 db_print_tstate(int t_state)
2758 {
2759 
2760 	switch (t_state) {
2761 	case TCPS_CLOSED:
2762 		db_printf("TCPS_CLOSED");
2763 		return;
2764 
2765 	case TCPS_LISTEN:
2766 		db_printf("TCPS_LISTEN");
2767 		return;
2768 
2769 	case TCPS_SYN_SENT:
2770 		db_printf("TCPS_SYN_SENT");
2771 		return;
2772 
2773 	case TCPS_SYN_RECEIVED:
2774 		db_printf("TCPS_SYN_RECEIVED");
2775 		return;
2776 
2777 	case TCPS_ESTABLISHED:
2778 		db_printf("TCPS_ESTABLISHED");
2779 		return;
2780 
2781 	case TCPS_CLOSE_WAIT:
2782 		db_printf("TCPS_CLOSE_WAIT");
2783 		return;
2784 
2785 	case TCPS_FIN_WAIT_1:
2786 		db_printf("TCPS_FIN_WAIT_1");
2787 		return;
2788 
2789 	case TCPS_CLOSING:
2790 		db_printf("TCPS_CLOSING");
2791 		return;
2792 
2793 	case TCPS_LAST_ACK:
2794 		db_printf("TCPS_LAST_ACK");
2795 		return;
2796 
2797 	case TCPS_FIN_WAIT_2:
2798 		db_printf("TCPS_FIN_WAIT_2");
2799 		return;
2800 
2801 	case TCPS_TIME_WAIT:
2802 		db_printf("TCPS_TIME_WAIT");
2803 		return;
2804 
2805 	default:
2806 		db_printf("unknown");
2807 		return;
2808 	}
2809 }
2810 
2811 static void
db_print_bblog_state(int state)2812 db_print_bblog_state(int state)
2813 {
2814 	switch (state) {
2815 	case TCP_LOG_STATE_RATIO_OFF:
2816 		db_printf("TCP_LOG_STATE_RATIO_OFF");
2817 		break;
2818 	case TCP_LOG_STATE_CLEAR:
2819 		db_printf("TCP_LOG_STATE_CLEAR");
2820 		break;
2821 	case TCP_LOG_STATE_OFF:
2822 		db_printf("TCP_LOG_STATE_OFF");
2823 		break;
2824 	case TCP_LOG_STATE_TAIL:
2825 		db_printf("TCP_LOG_STATE_TAIL");
2826 		break;
2827 	case TCP_LOG_STATE_HEAD:
2828 		db_printf("TCP_LOG_STATE_HEAD");
2829 		break;
2830 	case TCP_LOG_STATE_HEAD_AUTO:
2831 		db_printf("TCP_LOG_STATE_HEAD_AUTO");
2832 		break;
2833 	case TCP_LOG_STATE_CONTINUAL:
2834 		db_printf("TCP_LOG_STATE_CONTINUAL");
2835 		break;
2836 	case TCP_LOG_STATE_TAIL_AUTO:
2837 		db_printf("TCP_LOG_STATE_TAIL_AUTO");
2838 		break;
2839 	case TCP_LOG_VIA_BBPOINTS:
2840 		db_printf("TCP_LOG_STATE_BBPOINTS");
2841 		break;
2842 	default:
2843 		db_printf("UNKNOWN(%d)", state);
2844 		break;
2845 	}
2846 }
2847 
2848 static void
db_print_tcpcb(struct tcpcb * tp,const char * name,int indent,bool show_bblog,bool show_inpcb)2849 db_print_tcpcb(struct tcpcb *tp, const char *name, int indent, bool show_bblog,
2850     bool show_inpcb)
2851 {
2852 
2853 	db_print_indent(indent);
2854 	db_printf("%s at %p\n", name, tp);
2855 
2856 	indent += 2;
2857 
2858 	if (show_inpcb)
2859 		db_print_inpcb(tptoinpcb(tp), "t_inpcb", indent);
2860 
2861 	db_print_indent(indent);
2862 	db_printf("t_segq first: %p   t_segqlen: %d   t_dupacks: %d\n",
2863 	   TAILQ_FIRST(&tp->t_segq), tp->t_segqlen, tp->t_dupacks);
2864 
2865 	db_print_indent(indent);
2866 	db_printf("t_callout: %p   t_timers: %p\n",
2867 	    &tp->t_callout, &tp->t_timers);
2868 
2869 	db_print_indent(indent);
2870 	db_printf("t_state: %d (", tp->t_state);
2871 	db_print_tstate(tp->t_state);
2872 	db_printf(")\n");
2873 
2874 	db_print_indent(indent);
2875 	db_printf("t_flags: 0x%b\n", tp->t_flags, TF_BITS);
2876 
2877 	db_print_indent(indent);
2878 	db_printf("t_flags2: 0x%b\n", tp->t_flags2, TF2_BITS);
2879 
2880 	db_print_indent(indent);
2881 	db_printf("snd_una: 0x%08x   snd_max: 0x%08x   snd_nxt: 0x%08x\n",
2882 	    tp->snd_una, tp->snd_max, tp->snd_nxt);
2883 
2884 	db_print_indent(indent);
2885 	db_printf("snd_up: 0x%08x   snd_wl1: 0x%08x   snd_wl2: 0x%08x\n",
2886 	   tp->snd_up, tp->snd_wl1, tp->snd_wl2);
2887 
2888 	db_print_indent(indent);
2889 	db_printf("iss: 0x%08x   irs: 0x%08x   rcv_nxt: 0x%08x\n",
2890 	    tp->iss, tp->irs, tp->rcv_nxt);
2891 
2892 	db_print_indent(indent);
2893 	db_printf("rcv_adv: 0x%08x   rcv_wnd: %u   rcv_up: 0x%08x\n",
2894 	    tp->rcv_adv, tp->rcv_wnd, tp->rcv_up);
2895 
2896 	db_print_indent(indent);
2897 	db_printf("snd_wnd: %u   snd_cwnd: %u\n",
2898 	   tp->snd_wnd, tp->snd_cwnd);
2899 
2900 	db_print_indent(indent);
2901 	db_printf("snd_ssthresh: %u   snd_recover: "
2902 	    "0x%08x\n", tp->snd_ssthresh, tp->snd_recover);
2903 
2904 	db_print_indent(indent);
2905 	db_printf("t_rcvtime: %u   t_startime: %u\n",
2906 	    tp->t_rcvtime, tp->t_starttime);
2907 
2908 	db_print_indent(indent);
2909 	db_printf("t_rttime: %u   t_rtsq: 0x%08x\n",
2910 	    tp->t_rtttime, tp->t_rtseq);
2911 
2912 	db_print_indent(indent);
2913 	db_printf("t_rxtcur: %d   t_maxseg: %u   t_srtt: %d\n",
2914 	    tp->t_rxtcur, tp->t_maxseg, tp->t_srtt);
2915 
2916 	db_print_indent(indent);
2917 	db_printf("t_rttvar: %d   t_rxtshift: %d   t_rttmin: %u\n",
2918 	    tp->t_rttvar, tp->t_rxtshift, tp->t_rttmin);
2919 
2920 	db_print_indent(indent);
2921 	db_printf("t_rttupdated: %u   max_sndwnd: %u   t_softerror: %d\n",
2922 	    tp->t_rttupdated, tp->max_sndwnd, tp->t_softerror);
2923 
2924 	db_print_indent(indent);
2925 	db_printf("t_oobflags: 0x%b   t_iobc: 0x%02x\n", tp->t_oobflags,
2926 	    TCPOOB_BITS, tp->t_iobc);
2927 
2928 	db_print_indent(indent);
2929 	db_printf("snd_scale: %u   rcv_scale: %u   request_r_scale: %u\n",
2930 	    tp->snd_scale, tp->rcv_scale, tp->request_r_scale);
2931 
2932 	db_print_indent(indent);
2933 	db_printf("ts_recent: %u   ts_recent_age: %u\n",
2934 	    tp->ts_recent, tp->ts_recent_age);
2935 
2936 	db_print_indent(indent);
2937 	db_printf("ts_offset: %u   last_ack_sent: 0x%08x   snd_cwnd_prev: "
2938 	    "%u\n", tp->ts_offset, tp->last_ack_sent, tp->snd_cwnd_prev);
2939 
2940 	db_print_indent(indent);
2941 	db_printf("snd_ssthresh_prev: %u   snd_recover_prev: 0x%08x   "
2942 	    "t_badrxtwin: %u\n", tp->snd_ssthresh_prev,
2943 	    tp->snd_recover_prev, tp->t_badrxtwin);
2944 
2945 	db_print_indent(indent);
2946 	db_printf("snd_numholes: %d  snd_holes first: %p\n",
2947 	    tp->snd_numholes, TAILQ_FIRST(&tp->snd_holes));
2948 
2949 	db_print_indent(indent);
2950 	db_printf("snd_fack: 0x%08x   rcv_numsacks: %d\n",
2951 	    tp->snd_fack, tp->rcv_numsacks);
2952 
2953 	/* Skip sackblks, sackhint. */
2954 
2955 	db_print_indent(indent);
2956 	db_printf("t_rttlow: %d   rfbuf_ts: %u   rfbuf_cnt: %d\n",
2957 	    tp->t_rttlow, tp->rfbuf_ts, tp->rfbuf_cnt);
2958 
2959 	db_print_indent(indent);
2960 	db_printf("t_fb.tfb_tcp_block_name: %s\n", tp->t_fb->tfb_tcp_block_name);
2961 
2962 	db_print_indent(indent);
2963 	db_printf("t_cc.name: %s\n", tp->t_cc->name);
2964 
2965 	db_print_indent(indent);
2966 	db_printf("_t_logstate: %d (", tp->_t_logstate);
2967 	db_print_bblog_state(tp->_t_logstate);
2968 	db_printf(")\n");
2969 
2970 	db_print_indent(indent);
2971 	db_printf("t_lognum: %d   t_loglimit: %d   t_logsn: %u\n",
2972 	    tp->t_lognum, tp->t_loglimit, tp->t_logsn);
2973 
2974 	if (show_bblog) {
2975 #ifdef TCP_BLACKBOX
2976 		db_print_bblog_entries(&tp->t_logs, indent);
2977 #else
2978 		db_print_indent(indent);
2979 		db_printf("BBLog not supported\n");
2980 #endif
2981 	}
2982 }
2983 
DB_SHOW_COMMAND(tcpcb,db_show_tcpcb)2984 DB_SHOW_COMMAND(tcpcb, db_show_tcpcb)
2985 {
2986 	struct tcpcb *tp;
2987 	bool show_bblog, show_inpcb;
2988 
2989 	if (!have_addr) {
2990 		db_printf("usage: show tcpcb[/bi] <addr>\n");
2991 		return;
2992 	}
2993 	show_bblog = strchr(modif, 'b') != NULL;
2994 	show_inpcb = strchr(modif, 'i') != NULL;
2995 	tp = (struct tcpcb *)addr;
2996 	db_print_tcpcb(tp, "tcpcb", 0, show_bblog, show_inpcb);
2997 }
2998 
DB_SHOW_ALL_COMMAND(tcpcbs,db_show_all_tcpcbs)2999 DB_SHOW_ALL_COMMAND(tcpcbs, db_show_all_tcpcbs)
3000 {
3001 	VNET_ITERATOR_DECL(vnet_iter);
3002 	struct inpcb *inp;
3003 	struct tcpcb *tp;
3004 	bool only_locked, show_bblog, show_inpcb;
3005 
3006 	only_locked = strchr(modif, 'l') != NULL;
3007 	show_bblog = strchr(modif, 'b') != NULL;
3008 	show_inpcb = strchr(modif, 'i') != NULL;
3009 	VNET_FOREACH(vnet_iter) {
3010 		CURVNET_SET(vnet_iter);
3011 		CK_LIST_FOREACH(inp, &V_tcbinfo.ipi_listhead, inp_list) {
3012 			if (only_locked &&
3013 			    inp->inp_lock.rw_lock == RW_UNLOCKED)
3014 				continue;
3015 			tp = intotcpcb(inp);
3016 			db_print_tcpcb(tp, "tcpcb", 0, show_bblog, show_inpcb);
3017 			if (db_pager_quit)
3018 				break;
3019 		}
3020 		CURVNET_RESTORE();
3021 		if (db_pager_quit)
3022 			break;
3023 	}
3024 }
3025 #endif
3026