1 /*-
2 * SPDX-License-Identifier: BSD-3-Clause
3 *
4 * Copyright (c) 1982, 1986, 1988, 1993
5 * The Regents of the University of California.
6 * Copyright (c) 2006-2007 Robert N. M. Watson
7 * Copyright (c) 2010-2011 Juniper Networks, Inc.
8 * All rights reserved.
9 *
10 * Portions of this software were developed by Robert N. M. Watson under
11 * contract to Juniper Networks, Inc.
12 *
13 * Redistribution and use in source and binary forms, with or without
14 * modification, are permitted provided that the following conditions
15 * are met:
16 * 1. Redistributions of source code must retain the above copyright
17 * notice, this list of conditions and the following disclaimer.
18 * 2. Redistributions in binary form must reproduce the above copyright
19 * notice, this list of conditions and the following disclaimer in the
20 * documentation and/or other materials provided with the distribution.
21 * 3. Neither the name of the University nor the names of its contributors
22 * may be used to endorse or promote products derived from this software
23 * without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35 * SUCH DAMAGE.
36 */
37
38 #include <sys/cdefs.h>
39 #include "opt_ddb.h"
40 #include "opt_inet.h"
41 #include "opt_inet6.h"
42 #include "opt_ipsec.h"
43 #include "opt_kern_tls.h"
44
45 #include <sys/param.h>
46 #include <sys/systm.h>
47 #include <sys/arb.h>
48 #include <sys/limits.h>
49 #include <sys/malloc.h>
50 #include <sys/refcount.h>
51 #include <sys/kernel.h>
52 #include <sys/ktls.h>
53 #include <sys/qmath.h>
54 #include <sys/sysctl.h>
55 #include <sys/mbuf.h>
56 #ifdef INET6
57 #include <sys/domain.h>
58 #endif /* INET6 */
59 #include <sys/socket.h>
60 #include <sys/socketvar.h>
61 #include <sys/protosw.h>
62 #include <sys/proc.h>
63 #include <sys/jail.h>
64 #include <sys/stats.h>
65
66 #ifdef DDB
67 #include <ddb/ddb.h>
68 #endif
69
70 #include <net/if.h>
71 #include <net/if_var.h>
72 #include <net/route.h>
73 #include <net/vnet.h>
74
75 #include <netinet/in.h>
76 #include <netinet/in_kdtrace.h>
77 #include <netinet/in_pcb.h>
78 #include <netinet/in_systm.h>
79 #include <netinet/in_var.h>
80 #include <netinet/ip.h>
81 #include <netinet/ip_var.h>
82 #ifdef INET6
83 #include <netinet/ip6.h>
84 #include <netinet6/in6_pcb.h>
85 #include <netinet6/ip6_var.h>
86 #include <netinet6/scope6_var.h>
87 #endif
88 #include <netinet/tcp.h>
89 #include <netinet/tcp_fsm.h>
90 #include <netinet/tcp_seq.h>
91 #include <netinet/tcp_timer.h>
92 #include <netinet/tcp_var.h>
93 #include <netinet/tcp_log_buf.h>
94 #include <netinet/tcpip.h>
95 #include <netinet/cc/cc.h>
96 #include <netinet/tcp_fastopen.h>
97 #include <netinet/tcp_hpts.h>
98 #ifdef TCPPCAP
99 #include <netinet/tcp_pcap.h>
100 #endif
101 #ifdef TCP_OFFLOAD
102 #include <netinet/tcp_offload.h>
103 #endif
104 #include <netipsec/ipsec_support.h>
105
106 #include <vm/vm.h>
107 #include <vm/vm_param.h>
108 #include <vm/pmap.h>
109 #include <vm/vm_extern.h>
110 #include <vm/vm_map.h>
111 #include <vm/vm_page.h>
112
113 /*
114 * TCP protocol interface to socket abstraction.
115 */
116 #ifdef INET
117 static int tcp_connect(struct tcpcb *, struct sockaddr_in *,
118 struct thread *td);
119 #endif /* INET */
120 #ifdef INET6
121 static int tcp6_connect(struct tcpcb *, struct sockaddr_in6 *,
122 struct thread *td);
123 #endif /* INET6 */
124 static void tcp_disconnect(struct tcpcb *);
125 static void tcp_usrclosed(struct tcpcb *);
126 static void tcp_fill_info(const struct tcpcb *, struct tcp_info *);
127
128 static int tcp_pru_options_support(struct tcpcb *tp, int flags);
129
130 static void
tcp_bblog_pru(struct tcpcb * tp,uint32_t pru,int error)131 tcp_bblog_pru(struct tcpcb *tp, uint32_t pru, int error)
132 {
133 struct tcp_log_buffer *lgb;
134
135 KASSERT(tp != NULL, ("tcp_bblog_pru: tp == NULL"));
136 INP_WLOCK_ASSERT(tptoinpcb(tp));
137 if (tcp_bblogging_on(tp)) {
138 lgb = tcp_log_event(tp, NULL, NULL, NULL, TCP_LOG_PRU, error,
139 0, NULL, false, NULL, NULL, 0, NULL);
140 } else {
141 lgb = NULL;
142 }
143 if (lgb != NULL) {
144 if (error >= 0) {
145 lgb->tlb_errno = (uint32_t)error;
146 }
147 lgb->tlb_flex1 = pru;
148 }
149 }
150
151 /*
152 * TCP attaches to socket via pru_attach(), reserving space,
153 * and an internet control block.
154 */
155 static int
tcp_usr_attach(struct socket * so,int proto,struct thread * td)156 tcp_usr_attach(struct socket *so, int proto, struct thread *td)
157 {
158 struct inpcb *inp;
159 struct tcpcb *tp = NULL;
160 int error;
161
162 inp = sotoinpcb(so);
163 KASSERT(inp == NULL, ("tcp_usr_attach: inp != NULL"));
164
165 error = soreserve(so, V_tcp_sendspace, V_tcp_recvspace);
166 if (error)
167 goto out;
168
169 so->so_rcv.sb_flags |= SB_AUTOSIZE;
170 so->so_snd.sb_flags |= SB_AUTOSIZE;
171 error = in_pcballoc(so, &V_tcbinfo);
172 if (error)
173 goto out;
174 inp = sotoinpcb(so);
175 tp = tcp_newtcpcb(inp, NULL);
176 if (tp == NULL) {
177 error = ENOBUFS;
178 in_pcbfree(inp);
179 goto out;
180 }
181 tp->t_state = TCPS_CLOSED;
182 tcp_bblog_pru(tp, PRU_ATTACH, error);
183 INP_WUNLOCK(inp);
184 TCPSTATES_INC(TCPS_CLOSED);
185 out:
186 TCP_PROBE2(debug__user, tp, PRU_ATTACH);
187 return (error);
188 }
189
190 /*
191 * tcp_usr_detach is called when the socket layer loses its final reference
192 * to the socket, be it a file descriptor reference, a reference from TCP,
193 * etc. At this point, there is only one case in which we will keep around
194 * inpcb state: time wait.
195 */
196 static void
tcp_usr_detach(struct socket * so)197 tcp_usr_detach(struct socket *so)
198 {
199 struct inpcb *inp;
200 struct tcpcb *tp;
201
202 inp = sotoinpcb(so);
203 KASSERT(inp != NULL, ("%s: inp == NULL", __func__));
204 INP_WLOCK(inp);
205 KASSERT(so->so_pcb == inp && inp->inp_socket == so,
206 ("%s: socket %p inp %p mismatch", __func__, so, inp));
207
208 tp = intotcpcb(inp);
209
210 KASSERT(inp->inp_flags & INP_DROPPED ||
211 tp->t_state < TCPS_SYN_SENT,
212 ("%s: inp %p not dropped or embryonic", __func__, inp));
213
214 tcp_discardcb(tp);
215 in_pcbfree(inp);
216 }
217
218 #ifdef INET
219 /*
220 * Give the socket an address.
221 */
222 static int
tcp_usr_bind(struct socket * so,struct sockaddr * nam,struct thread * td)223 tcp_usr_bind(struct socket *so, struct sockaddr *nam, struct thread *td)
224 {
225 int error = 0;
226 struct inpcb *inp;
227 struct tcpcb *tp;
228 struct sockaddr_in *sinp;
229
230 inp = sotoinpcb(so);
231 KASSERT(inp != NULL, ("tcp_usr_bind: inp == NULL"));
232 INP_WLOCK(inp);
233 if (inp->inp_flags & INP_DROPPED) {
234 INP_WUNLOCK(inp);
235 return (EINVAL);
236 }
237 tp = intotcpcb(inp);
238
239 sinp = (struct sockaddr_in *)nam;
240 if (nam->sa_family != AF_INET) {
241 /*
242 * Preserve compatibility with old programs.
243 */
244 if (nam->sa_family != AF_UNSPEC ||
245 nam->sa_len < offsetof(struct sockaddr_in, sin_zero) ||
246 sinp->sin_addr.s_addr != INADDR_ANY) {
247 error = EAFNOSUPPORT;
248 goto out;
249 }
250 nam->sa_family = AF_INET;
251 }
252 if (nam->sa_len != sizeof(*sinp)) {
253 error = EINVAL;
254 goto out;
255 }
256 /*
257 * Must check for multicast addresses and disallow binding
258 * to them.
259 */
260 if (IN_MULTICAST(ntohl(sinp->sin_addr.s_addr))) {
261 error = EAFNOSUPPORT;
262 goto out;
263 }
264 INP_HASH_WLOCK(&V_tcbinfo);
265 error = in_pcbbind(inp, sinp, V_tcp_bind_all_fibs ? 0 : INPBIND_FIB,
266 td->td_ucred);
267 INP_HASH_WUNLOCK(&V_tcbinfo);
268 out:
269 tcp_bblog_pru(tp, PRU_BIND, error);
270 TCP_PROBE2(debug__user, tp, PRU_BIND);
271 INP_WUNLOCK(inp);
272
273 return (error);
274 }
275 #endif /* INET */
276
277 #ifdef INET6
278 static int
tcp6_usr_bind(struct socket * so,struct sockaddr * nam,struct thread * td)279 tcp6_usr_bind(struct socket *so, struct sockaddr *nam, struct thread *td)
280 {
281 int error = 0;
282 struct inpcb *inp;
283 struct tcpcb *tp;
284 struct sockaddr_in6 *sin6;
285 u_char vflagsav;
286
287 inp = sotoinpcb(so);
288 KASSERT(inp != NULL, ("tcp6_usr_bind: inp == NULL"));
289 INP_WLOCK(inp);
290 if (inp->inp_flags & INP_DROPPED) {
291 INP_WUNLOCK(inp);
292 return (EINVAL);
293 }
294 tp = intotcpcb(inp);
295
296 vflagsav = inp->inp_vflag;
297
298 sin6 = (struct sockaddr_in6 *)nam;
299 if (nam->sa_family != AF_INET6) {
300 error = EAFNOSUPPORT;
301 goto out;
302 }
303 if (nam->sa_len != sizeof(*sin6)) {
304 error = EINVAL;
305 goto out;
306 }
307 /*
308 * Must check for multicast addresses and disallow binding
309 * to them.
310 */
311 if (IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr)) {
312 error = EAFNOSUPPORT;
313 goto out;
314 }
315
316 INP_HASH_WLOCK(&V_tcbinfo);
317 inp->inp_vflag &= ~INP_IPV4;
318 inp->inp_vflag |= INP_IPV6;
319 #ifdef INET
320 if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0) {
321 if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr))
322 inp->inp_vflag |= INP_IPV4;
323 else if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
324 struct sockaddr_in sin;
325
326 in6_sin6_2_sin(&sin, sin6);
327 if (IN_MULTICAST(ntohl(sin.sin_addr.s_addr))) {
328 error = EAFNOSUPPORT;
329 INP_HASH_WUNLOCK(&V_tcbinfo);
330 goto out;
331 }
332 inp->inp_vflag |= INP_IPV4;
333 inp->inp_vflag &= ~INP_IPV6;
334 error = in_pcbbind(inp, &sin, 0, td->td_ucred);
335 INP_HASH_WUNLOCK(&V_tcbinfo);
336 goto out;
337 }
338 }
339 #endif
340 error = in6_pcbbind(inp, sin6, V_tcp_bind_all_fibs ? 0 : INPBIND_FIB,
341 td->td_ucred);
342 INP_HASH_WUNLOCK(&V_tcbinfo);
343 out:
344 if (error != 0)
345 inp->inp_vflag = vflagsav;
346 tcp_bblog_pru(tp, PRU_BIND, error);
347 TCP_PROBE2(debug__user, tp, PRU_BIND);
348 INP_WUNLOCK(inp);
349 return (error);
350 }
351 #endif /* INET6 */
352
353 #ifdef INET
354 /*
355 * Prepare to accept connections.
356 */
357 static int
tcp_usr_listen(struct socket * so,int backlog,struct thread * td)358 tcp_usr_listen(struct socket *so, int backlog, struct thread *td)
359 {
360 struct inpcb *inp;
361 struct tcpcb *tp;
362 int error = 0;
363 bool already_listening;
364
365 inp = sotoinpcb(so);
366 KASSERT(inp != NULL, ("tcp_usr_listen: inp == NULL"));
367 INP_WLOCK(inp);
368 if (inp->inp_flags & INP_DROPPED) {
369 INP_WUNLOCK(inp);
370 return (EINVAL);
371 }
372 tp = intotcpcb(inp);
373
374 SOCK_LOCK(so);
375 already_listening = SOLISTENING(so);
376 error = solisten_proto_check(so);
377 if (error != 0) {
378 SOCK_UNLOCK(so);
379 goto out;
380 }
381 if (inp->inp_lport == 0) {
382 INP_HASH_WLOCK(&V_tcbinfo);
383 error = in_pcbbind(inp, NULL,
384 V_tcp_bind_all_fibs ? 0 : INPBIND_FIB, td->td_ucred);
385 INP_HASH_WUNLOCK(&V_tcbinfo);
386 }
387 if (error == 0) {
388 tcp_state_change(tp, TCPS_LISTEN);
389 solisten_proto(so, backlog);
390 #ifdef TCP_OFFLOAD
391 if ((so->so_options & SO_NO_OFFLOAD) == 0)
392 tcp_offload_listen_start(tp);
393 #endif
394 } else {
395 solisten_proto_abort(so);
396 }
397 SOCK_UNLOCK(so);
398 if (already_listening)
399 goto out;
400
401 if (error == 0)
402 in_pcblisten(inp);
403 if (tp->t_flags & TF_FASTOPEN)
404 tp->t_tfo_pending = tcp_fastopen_alloc_counter();
405
406 out:
407 tcp_bblog_pru(tp, PRU_LISTEN, error);
408 TCP_PROBE2(debug__user, tp, PRU_LISTEN);
409 INP_WUNLOCK(inp);
410 return (error);
411 }
412 #endif /* INET */
413
414 #ifdef INET6
415 static int
tcp6_usr_listen(struct socket * so,int backlog,struct thread * td)416 tcp6_usr_listen(struct socket *so, int backlog, struct thread *td)
417 {
418 struct inpcb *inp;
419 struct tcpcb *tp;
420 u_char vflagsav;
421 int error = 0;
422 bool already_listening;
423
424 inp = sotoinpcb(so);
425 KASSERT(inp != NULL, ("tcp6_usr_listen: inp == NULL"));
426 INP_WLOCK(inp);
427 if (inp->inp_flags & INP_DROPPED) {
428 INP_WUNLOCK(inp);
429 return (EINVAL);
430 }
431 tp = intotcpcb(inp);
432
433 vflagsav = inp->inp_vflag;
434
435 SOCK_LOCK(so);
436 already_listening = SOLISTENING(so);
437 error = solisten_proto_check(so);
438 if (error != 0) {
439 SOCK_UNLOCK(so);
440 goto out;
441 }
442 INP_HASH_WLOCK(&V_tcbinfo);
443 if (inp->inp_lport == 0) {
444 inp->inp_vflag &= ~INP_IPV4;
445 if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0)
446 inp->inp_vflag |= INP_IPV4;
447 error = in6_pcbbind(inp, NULL,
448 V_tcp_bind_all_fibs ? 0 : INPBIND_FIB, td->td_ucred);
449 }
450 INP_HASH_WUNLOCK(&V_tcbinfo);
451 if (error == 0) {
452 tcp_state_change(tp, TCPS_LISTEN);
453 solisten_proto(so, backlog);
454 #ifdef TCP_OFFLOAD
455 if ((so->so_options & SO_NO_OFFLOAD) == 0)
456 tcp_offload_listen_start(tp);
457 #endif
458 } else {
459 solisten_proto_abort(so);
460 }
461 SOCK_UNLOCK(so);
462 if (already_listening)
463 goto out;
464
465 if (error == 0)
466 in_pcblisten(inp);
467 if (tp->t_flags & TF_FASTOPEN)
468 tp->t_tfo_pending = tcp_fastopen_alloc_counter();
469
470 if (error != 0)
471 inp->inp_vflag = vflagsav;
472
473 out:
474 tcp_bblog_pru(tp, PRU_LISTEN, error);
475 TCP_PROBE2(debug__user, tp, PRU_LISTEN);
476 INP_WUNLOCK(inp);
477 return (error);
478 }
479 #endif /* INET6 */
480
481 #ifdef INET
482 /*
483 * Initiate connection to peer.
484 * Create a template for use in transmissions on this connection.
485 * Enter SYN_SENT state, and mark socket as connecting.
486 * Start keep-alive timer, and seed output sequence space.
487 * Send initial segment on connection.
488 */
489 static int
tcp_usr_connect(struct socket * so,struct sockaddr * nam,struct thread * td)490 tcp_usr_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
491 {
492 struct epoch_tracker et;
493 int error = 0;
494 struct inpcb *inp;
495 struct tcpcb *tp;
496 struct sockaddr_in *sinp;
497
498 inp = sotoinpcb(so);
499 KASSERT(inp != NULL, ("tcp_usr_connect: inp == NULL"));
500 INP_WLOCK(inp);
501 if (inp->inp_flags & INP_DROPPED) {
502 INP_WUNLOCK(inp);
503 return (ECONNREFUSED);
504 }
505 tp = intotcpcb(inp);
506
507 sinp = (struct sockaddr_in *)nam;
508 if (nam->sa_family != AF_INET) {
509 error = EAFNOSUPPORT;
510 goto out;
511 }
512 if (nam->sa_len != sizeof (*sinp)) {
513 error = EINVAL;
514 goto out;
515 }
516 /*
517 * Must disallow TCP ``connections'' to multicast addresses.
518 */
519 if (IN_MULTICAST(ntohl(sinp->sin_addr.s_addr))) {
520 error = EAFNOSUPPORT;
521 goto out;
522 }
523 if (ntohl(sinp->sin_addr.s_addr) == INADDR_BROADCAST) {
524 error = EACCES;
525 goto out;
526 }
527 if ((error = prison_remote_ip4(td->td_ucred, &sinp->sin_addr)) != 0)
528 goto out;
529 if (SOLISTENING(so) || so->so_options & SO_REUSEPORT_LB) {
530 error = EOPNOTSUPP;
531 goto out;
532 }
533 NET_EPOCH_ENTER(et);
534 if ((error = tcp_connect(tp, sinp, td)) != 0)
535 goto out_in_epoch;
536 #ifdef TCP_OFFLOAD
537 if (registered_toedevs > 0 &&
538 (so->so_options & SO_NO_OFFLOAD) == 0 &&
539 (error = tcp_offload_connect(so, nam)) == 0)
540 goto out_in_epoch;
541 #endif
542 tcp_timer_activate(tp, TT_KEEP, TP_KEEPINIT(tp));
543 error = tcp_output(tp);
544 KASSERT(error >= 0, ("TCP stack %s requested tcp_drop(%p) at connect()"
545 ", error code %d", tp->t_fb->tfb_tcp_block_name, tp, -error));
546 out_in_epoch:
547 NET_EPOCH_EXIT(et);
548 out:
549 tcp_bblog_pru(tp, PRU_CONNECT, error);
550 TCP_PROBE2(debug__user, tp, PRU_CONNECT);
551 INP_WUNLOCK(inp);
552 return (error);
553 }
554 #endif /* INET */
555
556 #ifdef INET6
557 static int
tcp6_usr_connect(struct socket * so,struct sockaddr * nam,struct thread * td)558 tcp6_usr_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
559 {
560 struct epoch_tracker et;
561 int error = 0;
562 struct inpcb *inp;
563 struct tcpcb *tp;
564 struct sockaddr_in6 *sin6;
565 u_int8_t incflagsav;
566 u_char vflagsav;
567
568 inp = sotoinpcb(so);
569 KASSERT(inp != NULL, ("tcp6_usr_connect: inp == NULL"));
570 INP_WLOCK(inp);
571 if (inp->inp_flags & INP_DROPPED) {
572 INP_WUNLOCK(inp);
573 return (ECONNREFUSED);
574 }
575 tp = intotcpcb(inp);
576
577 vflagsav = inp->inp_vflag;
578 incflagsav = inp->inp_inc.inc_flags;
579
580 sin6 = (struct sockaddr_in6 *)nam;
581 if (nam->sa_family != AF_INET6) {
582 error = EAFNOSUPPORT;
583 goto out;
584 }
585 if (nam->sa_len != sizeof (*sin6)) {
586 error = EINVAL;
587 goto out;
588 }
589 /*
590 * Must disallow TCP ``connections'' to multicast addresses.
591 */
592 if (IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr)) {
593 error = EAFNOSUPPORT;
594 goto out;
595 }
596 if (SOLISTENING(so) || so->so_options & SO_REUSEPORT_LB) {
597 error = EOPNOTSUPP;
598 goto out;
599 }
600 #ifdef INET
601 /*
602 * XXXRW: Some confusion: V4/V6 flags relate to binding, and
603 * therefore probably require the hash lock, which isn't held here.
604 * Is this a significant problem?
605 */
606 if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
607 struct sockaddr_in sin;
608
609 if ((inp->inp_flags & IN6P_IPV6_V6ONLY) != 0) {
610 error = EINVAL;
611 goto out;
612 }
613 if ((inp->inp_vflag & INP_IPV4) == 0) {
614 error = EAFNOSUPPORT;
615 goto out;
616 }
617
618 in6_sin6_2_sin(&sin, sin6);
619 if (IN_MULTICAST(ntohl(sin.sin_addr.s_addr))) {
620 error = EAFNOSUPPORT;
621 goto out;
622 }
623 if (ntohl(sin.sin_addr.s_addr) == INADDR_BROADCAST) {
624 error = EACCES;
625 goto out;
626 }
627 if ((error = prison_remote_ip4(td->td_ucred,
628 &sin.sin_addr)) != 0)
629 goto out;
630 inp->inp_vflag |= INP_IPV4;
631 inp->inp_vflag &= ~INP_IPV6;
632 NET_EPOCH_ENTER(et);
633 if ((error = tcp_connect(tp, &sin, td)) != 0)
634 goto out_in_epoch;
635 #ifdef TCP_OFFLOAD
636 if (registered_toedevs > 0 &&
637 (so->so_options & SO_NO_OFFLOAD) == 0 &&
638 (error = tcp_offload_connect(so, nam)) == 0)
639 goto out_in_epoch;
640 #endif
641 error = tcp_output(tp);
642 goto out_in_epoch;
643 } else {
644 if ((inp->inp_vflag & INP_IPV6) == 0) {
645 error = EAFNOSUPPORT;
646 goto out;
647 }
648 }
649 #endif
650 if ((error = prison_remote_ip6(td->td_ucred, &sin6->sin6_addr)) != 0)
651 goto out;
652 inp->inp_vflag &= ~INP_IPV4;
653 inp->inp_vflag |= INP_IPV6;
654 inp->inp_inc.inc_flags |= INC_ISIPV6;
655 NET_EPOCH_ENTER(et);
656 if ((error = tcp6_connect(tp, sin6, td)) != 0)
657 goto out_in_epoch;
658 #ifdef TCP_OFFLOAD
659 if (registered_toedevs > 0 &&
660 (so->so_options & SO_NO_OFFLOAD) == 0 &&
661 (error = tcp_offload_connect(so, nam)) == 0)
662 goto out_in_epoch;
663 #endif
664 tcp_timer_activate(tp, TT_KEEP, TP_KEEPINIT(tp));
665 error = tcp_output(tp);
666 out_in_epoch:
667 NET_EPOCH_EXIT(et);
668 out:
669 KASSERT(error >= 0, ("TCP stack %s requested tcp_drop(%p) at connect()"
670 ", error code %d", tp->t_fb->tfb_tcp_block_name, tp, -error));
671 /*
672 * If the implicit bind in the connect call fails, restore
673 * the flags we modified.
674 */
675 if (error != 0 && inp->inp_lport == 0) {
676 inp->inp_vflag = vflagsav;
677 inp->inp_inc.inc_flags = incflagsav;
678 }
679
680 tcp_bblog_pru(tp, PRU_CONNECT, error);
681 TCP_PROBE2(debug__user, tp, PRU_CONNECT);
682 INP_WUNLOCK(inp);
683 return (error);
684 }
685 #endif /* INET6 */
686
687 /*
688 * Initiate disconnect from peer.
689 * If connection never passed embryonic stage, just drop;
690 * else if don't need to let data drain, then can just drop anyways,
691 * else have to begin TCP shutdown process: mark socket disconnecting,
692 * drain unread data, state switch to reflect user close, and
693 * send segment (e.g. FIN) to peer. Socket will be really disconnected
694 * when peer sends FIN and acks ours.
695 *
696 * SHOULD IMPLEMENT LATER PRU_CONNECT VIA REALLOC TCPCB.
697 */
698 static int
tcp_usr_disconnect(struct socket * so)699 tcp_usr_disconnect(struct socket *so)
700 {
701 struct inpcb *inp;
702 struct tcpcb *tp = NULL;
703 struct epoch_tracker et;
704
705 NET_EPOCH_ENTER(et);
706 inp = sotoinpcb(so);
707 KASSERT(inp != NULL, ("tcp_usr_disconnect: inp == NULL"));
708 INP_WLOCK(inp);
709 tp = intotcpcb(inp);
710
711 if (tp->t_state == TCPS_TIME_WAIT)
712 goto out;
713 tcp_disconnect(tp);
714 out:
715 tcp_bblog_pru(tp, PRU_DISCONNECT, 0);
716 TCP_PROBE2(debug__user, tp, PRU_DISCONNECT);
717 INP_WUNLOCK(inp);
718 NET_EPOCH_EXIT(et);
719 return (0);
720 }
721
722 #ifdef INET
723 /*
724 * Accept a connection. Essentially all the work is done at higher levels;
725 * just return the address of the peer, storing through addr.
726 */
727 static int
tcp_usr_accept(struct socket * so,struct sockaddr * sa)728 tcp_usr_accept(struct socket *so, struct sockaddr *sa)
729 {
730 struct inpcb *inp;
731 struct tcpcb *tp;
732 int error = 0;
733
734 inp = sotoinpcb(so);
735 KASSERT(inp != NULL, ("tcp_usr_accept: inp == NULL"));
736 INP_WLOCK(inp);
737 if (inp->inp_flags & INP_DROPPED) {
738 INP_WUNLOCK(inp);
739 return (ECONNABORTED);
740 }
741 tp = intotcpcb(inp);
742
743 if (so->so_state & SS_ISDISCONNECTED)
744 error = ECONNABORTED;
745 else
746 *(struct sockaddr_in *)sa = (struct sockaddr_in ){
747 .sin_family = AF_INET,
748 .sin_len = sizeof(struct sockaddr_in),
749 .sin_port = inp->inp_fport,
750 .sin_addr = inp->inp_faddr,
751 };
752 tcp_bblog_pru(tp, PRU_ACCEPT, error);
753 TCP_PROBE2(debug__user, tp, PRU_ACCEPT);
754 INP_WUNLOCK(inp);
755
756 return (error);
757 }
758 #endif /* INET */
759
760 #ifdef INET6
761 static int
tcp6_usr_accept(struct socket * so,struct sockaddr * sa)762 tcp6_usr_accept(struct socket *so, struct sockaddr *sa)
763 {
764 struct inpcb *inp;
765 struct tcpcb *tp;
766 int error = 0;
767
768 inp = sotoinpcb(so);
769 KASSERT(inp != NULL, ("tcp6_usr_accept: inp == NULL"));
770 INP_WLOCK(inp);
771 if (inp->inp_flags & INP_DROPPED) {
772 INP_WUNLOCK(inp);
773 return (ECONNABORTED);
774 }
775 tp = intotcpcb(inp);
776
777 if (so->so_state & SS_ISDISCONNECTED) {
778 error = ECONNABORTED;
779 } else {
780 if (inp->inp_vflag & INP_IPV4) {
781 struct sockaddr_in sin = {
782 .sin_family = AF_INET,
783 .sin_len = sizeof(struct sockaddr_in),
784 .sin_port = inp->inp_fport,
785 .sin_addr = inp->inp_faddr,
786 };
787 in6_sin_2_v4mapsin6(&sin, (struct sockaddr_in6 *)sa);
788 } else {
789 *(struct sockaddr_in6 *)sa = (struct sockaddr_in6 ){
790 .sin6_family = AF_INET6,
791 .sin6_len = sizeof(struct sockaddr_in6),
792 .sin6_port = inp->inp_fport,
793 .sin6_addr = inp->in6p_faddr,
794 };
795 /* XXX: should catch errors */
796 (void)sa6_recoverscope((struct sockaddr_in6 *)sa);
797 }
798 }
799
800 tcp_bblog_pru(tp, PRU_ACCEPT, error);
801 TCP_PROBE2(debug__user, tp, PRU_ACCEPT);
802 INP_WUNLOCK(inp);
803
804 return (error);
805 }
806 #endif /* INET6 */
807
808 /*
809 * Mark the connection as being incapable of further output.
810 */
811 static int
tcp_usr_shutdown(struct socket * so,enum shutdown_how how)812 tcp_usr_shutdown(struct socket *so, enum shutdown_how how)
813 {
814 struct epoch_tracker et;
815 struct inpcb *inp = sotoinpcb(so);
816 struct tcpcb *tp = intotcpcb(inp);
817 int error = 0;
818
819 SOCK_LOCK(so);
820 if (SOLISTENING(so)) {
821 if (how != SHUT_WR) {
822 so->so_error = ECONNABORTED;
823 solisten_wakeup(so); /* unlocks so */
824 } else
825 SOCK_UNLOCK(so);
826 return (ENOTCONN);
827 } else if ((so->so_state &
828 (SS_ISCONNECTED | SS_ISCONNECTING | SS_ISDISCONNECTING)) == 0) {
829 SOCK_UNLOCK(so);
830 return (ENOTCONN);
831 }
832 SOCK_UNLOCK(so);
833
834 switch (how) {
835 case SHUT_RD:
836 sorflush(so);
837 break;
838 case SHUT_RDWR:
839 sorflush(so);
840 /* FALLTHROUGH */
841 case SHUT_WR:
842 /*
843 * XXXGL: mimicing old soshutdown() here. But shouldn't we
844 * return ECONNRESEST for SHUT_RD as well?
845 */
846 INP_WLOCK(inp);
847 if (inp->inp_flags & INP_DROPPED) {
848 INP_WUNLOCK(inp);
849 return (ECONNRESET);
850 }
851
852 socantsendmore(so);
853 NET_EPOCH_ENTER(et);
854 tcp_usrclosed(tp);
855 error = tcp_output_nodrop(tp);
856 tcp_bblog_pru(tp, PRU_SHUTDOWN, error);
857 TCP_PROBE2(debug__user, tp, PRU_SHUTDOWN);
858 error = tcp_unlock_or_drop(tp, error);
859 NET_EPOCH_EXIT(et);
860 }
861 wakeup(&so->so_timeo);
862
863 return (error);
864 }
865
866 /*
867 * After a receive, possibly send window update to peer.
868 */
869 static int
tcp_usr_rcvd(struct socket * so,int flags)870 tcp_usr_rcvd(struct socket *so, int flags)
871 {
872 struct epoch_tracker et;
873 struct inpcb *inp;
874 struct tcpcb *tp;
875 int outrv = 0, error = 0;
876
877 inp = sotoinpcb(so);
878 KASSERT(inp != NULL, ("tcp_usr_rcvd: inp == NULL"));
879 INP_WLOCK(inp);
880 if (inp->inp_flags & INP_DROPPED) {
881 INP_WUNLOCK(inp);
882 return (ECONNRESET);
883 }
884 tp = intotcpcb(inp);
885
886 NET_EPOCH_ENTER(et);
887 /*
888 * For passively-created TFO connections, don't attempt a window
889 * update while still in SYN_RECEIVED as this may trigger an early
890 * SYN|ACK. It is preferable to have the SYN|ACK be sent along with
891 * application response data, or failing that, when the DELACK timer
892 * expires.
893 */
894 if ((tp->t_flags & TF_FASTOPEN) && (tp->t_state == TCPS_SYN_RECEIVED))
895 goto out;
896 #ifdef TCP_OFFLOAD
897 if (tp->t_flags & TF_TOE)
898 tcp_offload_rcvd(tp);
899 else
900 #endif
901 outrv = tcp_output_nodrop(tp);
902 out:
903 tcp_bblog_pru(tp, PRU_RCVD, error);
904 TCP_PROBE2(debug__user, tp, PRU_RCVD);
905 (void) tcp_unlock_or_drop(tp, outrv);
906 NET_EPOCH_EXIT(et);
907 return (error);
908 }
909
910 /*
911 * Do a send by putting data in output queue and updating urgent
912 * marker if URG set. Possibly send more data. Unlike the other
913 * pru_*() routines, the mbuf chains are our responsibility. We
914 * must either enqueue them or free them. The other pru_* routines
915 * generally are caller-frees.
916 */
917 static int
tcp_usr_send(struct socket * so,int flags,struct mbuf * m,struct sockaddr * nam,struct mbuf * control,struct thread * td)918 tcp_usr_send(struct socket *so, int flags, struct mbuf *m,
919 struct sockaddr *nam, struct mbuf *control, struct thread *td)
920 {
921 struct epoch_tracker et;
922 int error = 0;
923 struct inpcb *inp;
924 struct tcpcb *tp;
925 #ifdef INET
926 #ifdef INET6
927 struct sockaddr_in sin;
928 #endif
929 struct sockaddr_in *sinp;
930 #endif
931 #ifdef INET6
932 struct sockaddr_in6 *sin6;
933 int isipv6;
934 #endif
935 u_int8_t incflagsav;
936 u_char vflagsav;
937 bool restoreflags;
938
939 inp = sotoinpcb(so);
940 KASSERT(inp != NULL, ("tcp_usr_send: inp == NULL"));
941 INP_WLOCK(inp);
942 if (inp->inp_flags & INP_DROPPED) {
943 if (m != NULL && (flags & PRUS_NOTREADY) == 0)
944 m_freem(m);
945 INP_WUNLOCK(inp);
946 return (ECONNRESET);
947 }
948 tp = intotcpcb(inp);
949
950 vflagsav = inp->inp_vflag;
951 incflagsav = inp->inp_inc.inc_flags;
952 restoreflags = false;
953
954 NET_EPOCH_ENTER(et);
955 if (control != NULL) {
956 /* TCP doesn't do control messages (rights, creds, etc) */
957 if (control->m_len > 0) {
958 m_freem(control);
959 error = EINVAL;
960 goto out;
961 }
962 m_freem(control); /* empty control, just free it */
963 }
964
965 if ((flags & PRUS_OOB) != 0 &&
966 (error = tcp_pru_options_support(tp, PRUS_OOB)) != 0)
967 goto out;
968
969 if (nam != NULL && tp->t_state < TCPS_SYN_SENT) {
970 if (tp->t_state == TCPS_LISTEN) {
971 error = EINVAL;
972 goto out;
973 }
974 switch (nam->sa_family) {
975 #ifdef INET
976 case AF_INET:
977 sinp = (struct sockaddr_in *)nam;
978 if (sinp->sin_len != sizeof(struct sockaddr_in)) {
979 error = EINVAL;
980 goto out;
981 }
982 if ((inp->inp_vflag & INP_IPV6) != 0) {
983 error = EAFNOSUPPORT;
984 goto out;
985 }
986 if (IN_MULTICAST(ntohl(sinp->sin_addr.s_addr))) {
987 error = EAFNOSUPPORT;
988 goto out;
989 }
990 if (ntohl(sinp->sin_addr.s_addr) == INADDR_BROADCAST) {
991 error = EACCES;
992 goto out;
993 }
994 if ((error = prison_remote_ip4(td->td_ucred,
995 &sinp->sin_addr)))
996 goto out;
997 #ifdef INET6
998 isipv6 = 0;
999 #endif
1000 break;
1001 #endif /* INET */
1002 #ifdef INET6
1003 case AF_INET6:
1004 sin6 = (struct sockaddr_in6 *)nam;
1005 if (sin6->sin6_len != sizeof(*sin6)) {
1006 error = EINVAL;
1007 goto out;
1008 }
1009 if ((inp->inp_vflag & INP_IPV6PROTO) == 0) {
1010 error = EAFNOSUPPORT;
1011 goto out;
1012 }
1013 if (IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr)) {
1014 error = EAFNOSUPPORT;
1015 goto out;
1016 }
1017 if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
1018 #ifdef INET
1019 if ((inp->inp_flags & IN6P_IPV6_V6ONLY) != 0) {
1020 error = EINVAL;
1021 goto out;
1022 }
1023 if ((inp->inp_vflag & INP_IPV4) == 0) {
1024 error = EAFNOSUPPORT;
1025 goto out;
1026 }
1027 restoreflags = true;
1028 inp->inp_vflag &= ~INP_IPV6;
1029 sinp = &sin;
1030 in6_sin6_2_sin(sinp, sin6);
1031 if (IN_MULTICAST(
1032 ntohl(sinp->sin_addr.s_addr))) {
1033 error = EAFNOSUPPORT;
1034 goto out;
1035 }
1036 if ((error = prison_remote_ip4(td->td_ucred,
1037 &sinp->sin_addr)))
1038 goto out;
1039 isipv6 = 0;
1040 #else /* !INET */
1041 error = EAFNOSUPPORT;
1042 goto out;
1043 #endif /* INET */
1044 } else {
1045 if ((inp->inp_vflag & INP_IPV6) == 0) {
1046 error = EAFNOSUPPORT;
1047 goto out;
1048 }
1049 restoreflags = true;
1050 inp->inp_vflag &= ~INP_IPV4;
1051 inp->inp_inc.inc_flags |= INC_ISIPV6;
1052 if ((error = prison_remote_ip6(td->td_ucred,
1053 &sin6->sin6_addr)))
1054 goto out;
1055 isipv6 = 1;
1056 }
1057 break;
1058 #endif /* INET6 */
1059 default:
1060 error = EAFNOSUPPORT;
1061 goto out;
1062 }
1063 }
1064 if (!(flags & PRUS_OOB)) {
1065 if (tp->t_acktime == 0)
1066 tp->t_acktime = ticks;
1067 sbappendstream(&so->so_snd, m, flags);
1068 m = NULL;
1069 if (nam && tp->t_state < TCPS_SYN_SENT) {
1070 KASSERT(tp->t_state == TCPS_CLOSED,
1071 ("%s: tp %p is listening", __func__, tp));
1072
1073 /*
1074 * Do implied connect if not yet connected,
1075 * initialize window to default value, and
1076 * initialize maxseg using peer's cached MSS.
1077 */
1078 #ifdef INET6
1079 if (isipv6)
1080 error = tcp6_connect(tp, sin6, td);
1081 #endif /* INET6 */
1082 #if defined(INET6) && defined(INET)
1083 else
1084 #endif
1085 #ifdef INET
1086 error = tcp_connect(tp, sinp, td);
1087 #endif
1088 /*
1089 * The bind operation in tcp_connect succeeded. We
1090 * no longer want to restore the flags if later
1091 * operations fail.
1092 */
1093 if (error == 0 || inp->inp_lport != 0)
1094 restoreflags = false;
1095
1096 if (error) {
1097 /* m is freed if PRUS_NOTREADY is unset. */
1098 sbflush(&so->so_snd);
1099 goto out;
1100 }
1101 if (tp->t_flags & TF_FASTOPEN)
1102 tcp_fastopen_connect(tp);
1103 else {
1104 tp->snd_wnd = TTCP_CLIENT_SND_WND;
1105 tcp_mss(tp, -1);
1106 }
1107 }
1108 if (flags & PRUS_EOF) {
1109 /*
1110 * Close the send side of the connection after
1111 * the data is sent.
1112 */
1113 socantsendmore(so);
1114 tcp_usrclosed(tp);
1115 }
1116 if (TCPS_HAVEESTABLISHED(tp->t_state) &&
1117 ((tp->t_flags2 & TF2_FBYTES_COMPLETE) == 0) &&
1118 (tp->t_fbyte_out == 0) &&
1119 (so->so_snd.sb_ccc > 0)) {
1120 tp->t_fbyte_out = ticks;
1121 if (tp->t_fbyte_out == 0)
1122 tp->t_fbyte_out = 1;
1123 if (tp->t_fbyte_out && tp->t_fbyte_in)
1124 tp->t_flags2 |= TF2_FBYTES_COMPLETE;
1125 }
1126 if (!(inp->inp_flags & INP_DROPPED) &&
1127 !(flags & PRUS_NOTREADY)) {
1128 if (flags & PRUS_MORETOCOME)
1129 tp->t_flags |= TF_MORETOCOME;
1130 error = tcp_output_nodrop(tp);
1131 if (flags & PRUS_MORETOCOME)
1132 tp->t_flags &= ~TF_MORETOCOME;
1133 }
1134 } else {
1135 /*
1136 * XXXRW: PRUS_EOF not implemented with PRUS_OOB?
1137 */
1138 SOCK_SENDBUF_LOCK(so);
1139 if (sbspace(&so->so_snd) < -512) {
1140 SOCK_SENDBUF_UNLOCK(so);
1141 error = ENOBUFS;
1142 goto out;
1143 }
1144 /*
1145 * According to RFC961 (Assigned Protocols),
1146 * the urgent pointer points to the last octet
1147 * of urgent data. We continue, however,
1148 * to consider it to indicate the first octet
1149 * of data past the urgent section.
1150 * Otherwise, snd_up should be one lower.
1151 */
1152 if (tp->t_acktime == 0)
1153 tp->t_acktime = ticks;
1154 sbappendstream_locked(&so->so_snd, m, flags);
1155 SOCK_SENDBUF_UNLOCK(so);
1156 m = NULL;
1157 if (nam && tp->t_state < TCPS_SYN_SENT) {
1158 /*
1159 * Do implied connect if not yet connected,
1160 * initialize window to default value, and
1161 * initialize maxseg using peer's cached MSS.
1162 */
1163
1164 /*
1165 * Not going to contemplate SYN|URG
1166 */
1167 if (tp->t_flags & TF_FASTOPEN)
1168 tp->t_flags &= ~TF_FASTOPEN;
1169 #ifdef INET6
1170 if (isipv6)
1171 error = tcp6_connect(tp, sin6, td);
1172 #endif /* INET6 */
1173 #if defined(INET6) && defined(INET)
1174 else
1175 #endif
1176 #ifdef INET
1177 error = tcp_connect(tp, sinp, td);
1178 #endif
1179 /*
1180 * The bind operation in tcp_connect succeeded. We
1181 * no longer want to restore the flags if later
1182 * operations fail.
1183 */
1184 if (error == 0 || inp->inp_lport != 0)
1185 restoreflags = false;
1186
1187 if (error != 0) {
1188 /* m is freed if PRUS_NOTREADY is unset. */
1189 sbflush(&so->so_snd);
1190 goto out;
1191 }
1192 tp->snd_wnd = TTCP_CLIENT_SND_WND;
1193 tcp_mss(tp, -1);
1194 }
1195 tp->snd_up = tp->snd_una + sbavail(&so->so_snd);
1196 if ((flags & PRUS_NOTREADY) == 0) {
1197 tp->t_flags |= TF_FORCEDATA;
1198 error = tcp_output_nodrop(tp);
1199 tp->t_flags &= ~TF_FORCEDATA;
1200 }
1201 }
1202 TCP_LOG_EVENT(tp, NULL,
1203 &inp->inp_socket->so_rcv,
1204 &inp->inp_socket->so_snd,
1205 TCP_LOG_USERSEND, error,
1206 0, NULL, false);
1207
1208 out:
1209 /*
1210 * In case of PRUS_NOTREADY, the caller or tcp_usr_ready() is
1211 * responsible for freeing memory.
1212 */
1213 if (m != NULL && (flags & PRUS_NOTREADY) == 0)
1214 m_freem(m);
1215
1216 /*
1217 * If the request was unsuccessful and we changed flags,
1218 * restore the original flags.
1219 */
1220 if (error != 0 && restoreflags) {
1221 inp->inp_vflag = vflagsav;
1222 inp->inp_inc.inc_flags = incflagsav;
1223 }
1224 tcp_bblog_pru(tp, (flags & PRUS_OOB) ? PRU_SENDOOB :
1225 ((flags & PRUS_EOF) ? PRU_SEND_EOF : PRU_SEND), error);
1226 TCP_PROBE2(debug__user, tp, (flags & PRUS_OOB) ? PRU_SENDOOB :
1227 ((flags & PRUS_EOF) ? PRU_SEND_EOF : PRU_SEND));
1228 error = tcp_unlock_or_drop(tp, error);
1229 NET_EPOCH_EXIT(et);
1230 return (error);
1231 }
1232
1233 static int
tcp_usr_ready(struct socket * so,struct mbuf * m,int count)1234 tcp_usr_ready(struct socket *so, struct mbuf *m, int count)
1235 {
1236 struct epoch_tracker et;
1237 struct inpcb *inp;
1238 struct tcpcb *tp;
1239 int error;
1240
1241 inp = sotoinpcb(so);
1242 INP_WLOCK(inp);
1243 if (inp->inp_flags & INP_DROPPED) {
1244 INP_WUNLOCK(inp);
1245 mb_free_notready(m, count);
1246 return (ECONNRESET);
1247 }
1248 tp = intotcpcb(inp);
1249
1250 SOCK_SENDBUF_LOCK(so);
1251 error = sbready(&so->so_snd, m, count);
1252 SOCK_SENDBUF_UNLOCK(so);
1253 if (error) {
1254 INP_WUNLOCK(inp);
1255 return (error);
1256 }
1257 NET_EPOCH_ENTER(et);
1258 error = tcp_output_unlock(tp);
1259 NET_EPOCH_EXIT(et);
1260
1261 return (error);
1262 }
1263
1264 /*
1265 * Abort the TCP. Drop the connection abruptly.
1266 */
1267 static void
tcp_usr_abort(struct socket * so)1268 tcp_usr_abort(struct socket *so)
1269 {
1270 struct inpcb *inp;
1271 struct tcpcb *tp;
1272 struct epoch_tracker et;
1273
1274 inp = sotoinpcb(so);
1275 KASSERT(inp != NULL, ("tcp_usr_abort: inp == NULL"));
1276
1277 NET_EPOCH_ENTER(et);
1278 INP_WLOCK(inp);
1279 KASSERT(inp->inp_socket != NULL,
1280 ("tcp_usr_abort: inp_socket == NULL"));
1281
1282 /*
1283 * If we still have full TCP state, and we're not dropped, drop.
1284 */
1285 if (!(inp->inp_flags & INP_DROPPED)) {
1286 tp = intotcpcb(inp);
1287 tp = tcp_drop(tp, ECONNABORTED);
1288 if (tp == NULL)
1289 goto dropped;
1290 tcp_bblog_pru(tp, PRU_ABORT, 0);
1291 TCP_PROBE2(debug__user, tp, PRU_ABORT);
1292 }
1293 if (!(inp->inp_flags & INP_DROPPED)) {
1294 soref(so);
1295 inp->inp_flags |= INP_SOCKREF;
1296 }
1297 INP_WUNLOCK(inp);
1298 dropped:
1299 NET_EPOCH_EXIT(et);
1300 }
1301
1302 /*
1303 * TCP socket is closed. Start friendly disconnect.
1304 */
1305 static void
tcp_usr_close(struct socket * so)1306 tcp_usr_close(struct socket *so)
1307 {
1308 struct inpcb *inp;
1309 struct tcpcb *tp;
1310 struct epoch_tracker et;
1311
1312 inp = sotoinpcb(so);
1313 KASSERT(inp != NULL, ("tcp_usr_close: inp == NULL"));
1314
1315 NET_EPOCH_ENTER(et);
1316 INP_WLOCK(inp);
1317 KASSERT(inp->inp_socket != NULL,
1318 ("tcp_usr_close: inp_socket == NULL"));
1319
1320 /*
1321 * If we are still connected and we're not dropped, initiate
1322 * a disconnect.
1323 */
1324 if (!(inp->inp_flags & INP_DROPPED)) {
1325 tp = intotcpcb(inp);
1326 if (tp->t_state != TCPS_TIME_WAIT) {
1327 tp->t_flags |= TF_CLOSED;
1328 tcp_disconnect(tp);
1329 tcp_bblog_pru(tp, PRU_CLOSE, 0);
1330 TCP_PROBE2(debug__user, tp, PRU_CLOSE);
1331 }
1332 }
1333 if (!(inp->inp_flags & INP_DROPPED)) {
1334 soref(so);
1335 inp->inp_flags |= INP_SOCKREF;
1336 }
1337 INP_WUNLOCK(inp);
1338 NET_EPOCH_EXIT(et);
1339 }
1340
1341 static int
tcp_pru_options_support(struct tcpcb * tp,int flags)1342 tcp_pru_options_support(struct tcpcb *tp, int flags)
1343 {
1344 /*
1345 * If the specific TCP stack has a pru_options
1346 * specified then it does not always support
1347 * all the PRU_XX options and we must ask it.
1348 * If the function is not specified then all
1349 * of the PRU_XX options are supported.
1350 */
1351 int ret = 0;
1352
1353 if (tp->t_fb->tfb_pru_options) {
1354 ret = (*tp->t_fb->tfb_pru_options)(tp, flags);
1355 }
1356 return (ret);
1357 }
1358
1359 /*
1360 * Receive out-of-band data.
1361 */
1362 static int
tcp_usr_rcvoob(struct socket * so,struct mbuf * m,int flags)1363 tcp_usr_rcvoob(struct socket *so, struct mbuf *m, int flags)
1364 {
1365 int error = 0;
1366 struct inpcb *inp;
1367 struct tcpcb *tp;
1368
1369 inp = sotoinpcb(so);
1370 KASSERT(inp != NULL, ("tcp_usr_rcvoob: inp == NULL"));
1371 INP_WLOCK(inp);
1372 if (inp->inp_flags & INP_DROPPED) {
1373 INP_WUNLOCK(inp);
1374 return (ECONNRESET);
1375 }
1376 tp = intotcpcb(inp);
1377
1378 error = tcp_pru_options_support(tp, PRUS_OOB);
1379 if (error) {
1380 goto out;
1381 }
1382 if ((so->so_oobmark == 0 &&
1383 (so->so_rcv.sb_state & SBS_RCVATMARK) == 0) ||
1384 so->so_options & SO_OOBINLINE ||
1385 tp->t_oobflags & TCPOOB_HADDATA) {
1386 error = EINVAL;
1387 goto out;
1388 }
1389 if ((tp->t_oobflags & TCPOOB_HAVEDATA) == 0) {
1390 error = EWOULDBLOCK;
1391 goto out;
1392 }
1393 m->m_len = 1;
1394 *mtod(m, caddr_t) = tp->t_iobc;
1395 if ((flags & MSG_PEEK) == 0)
1396 tp->t_oobflags ^= (TCPOOB_HAVEDATA | TCPOOB_HADDATA);
1397
1398 out:
1399 tcp_bblog_pru(tp, PRU_RCVOOB, error);
1400 TCP_PROBE2(debug__user, tp, PRU_RCVOOB);
1401 INP_WUNLOCK(inp);
1402 return (error);
1403 }
1404
1405 #ifdef INET
1406 struct protosw tcp_protosw = {
1407 .pr_type = SOCK_STREAM,
1408 .pr_protocol = IPPROTO_TCP,
1409 .pr_flags = PR_CONNREQUIRED | PR_IMPLOPCL | PR_WANTRCVD |
1410 PR_CAPATTACH,
1411 .pr_ctloutput = tcp_ctloutput,
1412 .pr_abort = tcp_usr_abort,
1413 .pr_accept = tcp_usr_accept,
1414 .pr_attach = tcp_usr_attach,
1415 .pr_bind = tcp_usr_bind,
1416 .pr_connect = tcp_usr_connect,
1417 .pr_control = in_control,
1418 .pr_detach = tcp_usr_detach,
1419 .pr_disconnect = tcp_usr_disconnect,
1420 .pr_listen = tcp_usr_listen,
1421 .pr_peeraddr = in_getpeeraddr,
1422 .pr_rcvd = tcp_usr_rcvd,
1423 .pr_rcvoob = tcp_usr_rcvoob,
1424 .pr_send = tcp_usr_send,
1425 .pr_ready = tcp_usr_ready,
1426 .pr_shutdown = tcp_usr_shutdown,
1427 .pr_sockaddr = in_getsockaddr,
1428 .pr_sosetlabel = in_pcbsosetlabel,
1429 .pr_close = tcp_usr_close,
1430 };
1431 #endif /* INET */
1432
1433 #ifdef INET6
1434 struct protosw tcp6_protosw = {
1435 .pr_type = SOCK_STREAM,
1436 .pr_protocol = IPPROTO_TCP,
1437 .pr_flags = PR_CONNREQUIRED | PR_IMPLOPCL |PR_WANTRCVD |
1438 PR_CAPATTACH,
1439 .pr_ctloutput = tcp_ctloutput,
1440 .pr_abort = tcp_usr_abort,
1441 .pr_accept = tcp6_usr_accept,
1442 .pr_attach = tcp_usr_attach,
1443 .pr_bind = tcp6_usr_bind,
1444 .pr_connect = tcp6_usr_connect,
1445 .pr_control = in6_control,
1446 .pr_detach = tcp_usr_detach,
1447 .pr_disconnect = tcp_usr_disconnect,
1448 .pr_listen = tcp6_usr_listen,
1449 .pr_peeraddr = in6_mapped_peeraddr,
1450 .pr_rcvd = tcp_usr_rcvd,
1451 .pr_rcvoob = tcp_usr_rcvoob,
1452 .pr_send = tcp_usr_send,
1453 .pr_ready = tcp_usr_ready,
1454 .pr_shutdown = tcp_usr_shutdown,
1455 .pr_sockaddr = in6_mapped_sockaddr,
1456 .pr_sosetlabel = in_pcbsosetlabel,
1457 .pr_close = tcp_usr_close,
1458 };
1459 #endif /* INET6 */
1460
1461 #ifdef INET
1462 /*
1463 * Common subroutine to open a TCP connection to remote host specified
1464 * by struct sockaddr_in. Call in_pcbconnect() to choose local host address
1465 * and assign a local port number and install the inpcb into the hash.
1466 * Initialize connection parameters and enter SYN-SENT state.
1467 */
1468 static int
tcp_connect(struct tcpcb * tp,struct sockaddr_in * sin,struct thread * td)1469 tcp_connect(struct tcpcb *tp, struct sockaddr_in *sin, struct thread *td)
1470 {
1471 struct inpcb *inp = tptoinpcb(tp);
1472 struct socket *so = tptosocket(tp);
1473 int error;
1474
1475 NET_EPOCH_ASSERT();
1476 INP_WLOCK_ASSERT(inp);
1477
1478 if (__predict_false((so->so_state &
1479 (SS_ISCONNECTING | SS_ISCONNECTED | SS_ISDISCONNECTING |
1480 SS_ISDISCONNECTED)) != 0))
1481 return (EISCONN);
1482
1483 INP_HASH_WLOCK(&V_tcbinfo);
1484 error = in_pcbconnect(inp, sin, td->td_ucred);
1485 INP_HASH_WUNLOCK(&V_tcbinfo);
1486 if (error != 0)
1487 return (error);
1488
1489 /*
1490 * Compute window scaling to request:
1491 * Scale to fit into sweet spot. See tcp_syncache.c.
1492 * XXX: This should move to tcp_output().
1493 */
1494 while (tp->request_r_scale < TCP_MAX_WINSHIFT &&
1495 (TCP_MAXWIN << tp->request_r_scale) < sb_max)
1496 tp->request_r_scale++;
1497
1498 soisconnecting(so);
1499 TCPSTAT_INC(tcps_connattempt);
1500 tcp_state_change(tp, TCPS_SYN_SENT);
1501 tp->iss = tcp_new_isn(&inp->inp_inc);
1502 if (tp->t_flags & TF_REQ_TSTMP)
1503 tp->ts_offset = tcp_new_ts_offset(&inp->inp_inc);
1504 tcp_sendseqinit(tp);
1505
1506 return (0);
1507 }
1508 #endif /* INET */
1509
1510 #ifdef INET6
1511 static int
tcp6_connect(struct tcpcb * tp,struct sockaddr_in6 * sin6,struct thread * td)1512 tcp6_connect(struct tcpcb *tp, struct sockaddr_in6 *sin6, struct thread *td)
1513 {
1514 struct inpcb *inp = tptoinpcb(tp);
1515 struct socket *so = tptosocket(tp);
1516 int error;
1517
1518 NET_EPOCH_ASSERT();
1519 INP_WLOCK_ASSERT(inp);
1520
1521 if (__predict_false((so->so_state &
1522 (SS_ISCONNECTING | SS_ISCONNECTED)) != 0))
1523 return (EISCONN);
1524
1525 INP_HASH_WLOCK(&V_tcbinfo);
1526 error = in6_pcbconnect(inp, sin6, td->td_ucred, true);
1527 INP_HASH_WUNLOCK(&V_tcbinfo);
1528 if (error != 0)
1529 return (error);
1530
1531 /* Compute window scaling to request. */
1532 while (tp->request_r_scale < TCP_MAX_WINSHIFT &&
1533 (TCP_MAXWIN << tp->request_r_scale) < sb_max)
1534 tp->request_r_scale++;
1535
1536 soisconnecting(so);
1537 TCPSTAT_INC(tcps_connattempt);
1538 tcp_state_change(tp, TCPS_SYN_SENT);
1539 tp->iss = tcp_new_isn(&inp->inp_inc);
1540 if (tp->t_flags & TF_REQ_TSTMP)
1541 tp->ts_offset = tcp_new_ts_offset(&inp->inp_inc);
1542 tcp_sendseqinit(tp);
1543
1544 return (0);
1545 }
1546 #endif /* INET6 */
1547
1548 /*
1549 * Export TCP internal state information via a struct tcp_info, based on the
1550 * Linux 2.6 API. Not ABI compatible as our constants are mapped differently
1551 * (TCP state machine, etc). We export all information using FreeBSD-native
1552 * constants -- for example, the numeric values for tcpi_state will differ
1553 * from Linux.
1554 */
1555 void
tcp_fill_info(const struct tcpcb * tp,struct tcp_info * ti)1556 tcp_fill_info(const struct tcpcb *tp, struct tcp_info *ti)
1557 {
1558
1559 INP_LOCK_ASSERT(tptoinpcb(tp));
1560 bzero(ti, sizeof(*ti));
1561
1562 ti->tcpi_state = tp->t_state;
1563 if ((tp->t_flags & TF_REQ_TSTMP) && (tp->t_flags & TF_RCVD_TSTMP))
1564 ti->tcpi_options |= TCPI_OPT_TIMESTAMPS;
1565 if (tp->t_flags & TF_SACK_PERMIT)
1566 ti->tcpi_options |= TCPI_OPT_SACK;
1567 if ((tp->t_flags & TF_REQ_SCALE) && (tp->t_flags & TF_RCVD_SCALE)) {
1568 ti->tcpi_options |= TCPI_OPT_WSCALE;
1569 ti->tcpi_snd_wscale = tp->snd_scale;
1570 ti->tcpi_rcv_wscale = tp->rcv_scale;
1571 }
1572 switch (tp->t_flags2 & (TF2_ECN_PERMIT | TF2_ACE_PERMIT)) {
1573 case TF2_ECN_PERMIT:
1574 ti->tcpi_options |= TCPI_OPT_ECN;
1575 break;
1576 case TF2_ACE_PERMIT:
1577 /* FALLTHROUGH */
1578 case TF2_ECN_PERMIT | TF2_ACE_PERMIT:
1579 ti->tcpi_options |= TCPI_OPT_ACE;
1580 break;
1581 default:
1582 break;
1583 }
1584 if (tp->t_flags & TF_FASTOPEN)
1585 ti->tcpi_options |= TCPI_OPT_TFO;
1586
1587 ti->tcpi_rto = tp->t_rxtcur * tick;
1588 ti->tcpi_last_data_recv = ((uint32_t)ticks - tp->t_rcvtime) * tick;
1589 ti->tcpi_rtt = ((u_int64_t)tp->t_srtt * tick) >> TCP_RTT_SHIFT;
1590 ti->tcpi_rttvar = ((u_int64_t)tp->t_rttvar * tick) >> TCP_RTTVAR_SHIFT;
1591
1592 ti->tcpi_snd_ssthresh = tp->snd_ssthresh;
1593 ti->tcpi_snd_cwnd = tp->snd_cwnd;
1594
1595 /*
1596 * FreeBSD-specific extension fields for tcp_info.
1597 */
1598 ti->tcpi_rcv_space = tp->rcv_wnd;
1599 ti->tcpi_rcv_nxt = tp->rcv_nxt;
1600 ti->tcpi_snd_wnd = tp->snd_wnd;
1601 ti->tcpi_snd_bwnd = 0; /* Unused, kept for compat. */
1602 ti->tcpi_snd_nxt = tp->snd_nxt;
1603 ti->tcpi_snd_mss = tp->t_maxseg;
1604 ti->tcpi_rcv_mss = tp->t_maxseg;
1605 ti->tcpi_snd_rexmitpack = tp->t_sndrexmitpack;
1606 ti->tcpi_rcv_ooopack = tp->t_rcvoopack;
1607 ti->tcpi_snd_zerowin = tp->t_sndzerowin;
1608 ti->tcpi_snd_una = tp->snd_una;
1609 ti->tcpi_snd_max = tp->snd_max;
1610 ti->tcpi_rcv_numsacks = tp->rcv_numsacks;
1611 ti->tcpi_rcv_adv = tp->rcv_adv;
1612 ti->tcpi_dupacks = tp->t_dupacks;
1613 ti->tcpi_rttmin = tp->t_rttlow;
1614 #ifdef TCP_OFFLOAD
1615 if (tp->t_flags & TF_TOE) {
1616 ti->tcpi_options |= TCPI_OPT_TOE;
1617 tcp_offload_tcp_info(tp, ti);
1618 }
1619 #endif
1620 /*
1621 * AccECN related counters.
1622 */
1623 if ((tp->t_flags2 & (TF2_ECN_PERMIT | TF2_ACE_PERMIT)) ==
1624 (TF2_ECN_PERMIT | TF2_ACE_PERMIT))
1625 /*
1626 * Internal counter starts at 5 for AccECN
1627 * but 0 for RFC3168 ECN.
1628 */
1629 ti->tcpi_delivered_ce = tp->t_scep - 5;
1630 else
1631 ti->tcpi_delivered_ce = tp->t_scep;
1632 ti->tcpi_received_ce = tp->t_rcep;
1633 }
1634
1635 /*
1636 * tcp_ctloutput() must drop the inpcb lock before performing copyin on
1637 * socket option arguments. When it re-acquires the lock after the copy, it
1638 * has to revalidate that the connection is still valid for the socket
1639 * option.
1640 */
1641 #define INP_WLOCK_RECHECK_CLEANUP(inp, cleanup) do { \
1642 INP_WLOCK(inp); \
1643 if (inp->inp_flags & INP_DROPPED) { \
1644 INP_WUNLOCK(inp); \
1645 cleanup; \
1646 return (ECONNRESET); \
1647 } \
1648 tp = intotcpcb(inp); \
1649 } while(0)
1650 #define INP_WLOCK_RECHECK(inp) INP_WLOCK_RECHECK_CLEANUP((inp), /* noop */)
1651
1652 int
tcp_ctloutput_set(struct inpcb * inp,struct sockopt * sopt)1653 tcp_ctloutput_set(struct inpcb *inp, struct sockopt *sopt)
1654 {
1655 struct socket *so = inp->inp_socket;
1656 struct tcpcb *tp = intotcpcb(inp);
1657 int error = 0;
1658
1659 MPASS(sopt->sopt_dir == SOPT_SET);
1660 INP_WLOCK_ASSERT(inp);
1661 KASSERT((inp->inp_flags & INP_DROPPED) == 0,
1662 ("inp_flags == %x", inp->inp_flags));
1663 KASSERT(so != NULL, ("inp_socket == NULL"));
1664
1665 if (sopt->sopt_level != IPPROTO_TCP) {
1666 INP_WUNLOCK(inp);
1667 #ifdef INET6
1668 if (inp->inp_vflag & INP_IPV6PROTO)
1669 error = ip6_ctloutput(so, sopt);
1670 #endif
1671 #if defined(INET6) && defined(INET)
1672 else
1673 #endif
1674 #ifdef INET
1675 error = ip_ctloutput(so, sopt);
1676 #endif
1677 /*
1678 * When an IP-level socket option affects TCP, pass control
1679 * down to stack tfb_tcp_ctloutput, otherwise return what
1680 * IP level returned.
1681 */
1682 switch (sopt->sopt_level) {
1683 #ifdef INET6
1684 case IPPROTO_IPV6:
1685 if ((inp->inp_vflag & INP_IPV6PROTO) == 0)
1686 return (error);
1687 switch (sopt->sopt_name) {
1688 case IPV6_TCLASS:
1689 /* Notify tcp stacks that care (e.g. RACK). */
1690 break;
1691 case IPV6_USE_MIN_MTU:
1692 /* Update t_maxseg accordingly. */
1693 break;
1694 default:
1695 return (error);
1696 }
1697 break;
1698 #endif
1699 #ifdef INET
1700 case IPPROTO_IP:
1701 switch (sopt->sopt_name) {
1702 case IP_TOS:
1703 inp->inp_ip_tos &= ~IPTOS_ECN_MASK;
1704 break;
1705 case IP_TTL:
1706 /* Notify tcp stacks that care (e.g. RACK). */
1707 break;
1708 default:
1709 return (error);
1710 }
1711 break;
1712 #endif
1713 default:
1714 return (error);
1715 }
1716 INP_WLOCK_RECHECK(inp);
1717 } else if (sopt->sopt_name == TCP_FUNCTION_BLK) {
1718 /*
1719 * Protect the TCP option TCP_FUNCTION_BLK so
1720 * that a sub-function can *never* overwrite this.
1721 */
1722 struct tcp_function_set fsn;
1723 struct tcp_function_block *blk;
1724 void *ptr = NULL;
1725
1726 INP_WUNLOCK(inp);
1727 error = sooptcopyin(sopt, &fsn, sizeof fsn, sizeof fsn);
1728 if (error)
1729 return (error);
1730
1731 INP_WLOCK_RECHECK(inp);
1732
1733 blk = find_and_ref_tcp_functions(&fsn);
1734 if (blk == NULL) {
1735 INP_WUNLOCK(inp);
1736 return (ENOENT);
1737 }
1738 if (tp->t_fb == blk) {
1739 /* You already have this */
1740 refcount_release(&blk->tfb_refcnt);
1741 INP_WUNLOCK(inp);
1742 return (0);
1743 }
1744 if (blk->tfb_flags & TCP_FUNC_BEING_REMOVED) {
1745 refcount_release(&blk->tfb_refcnt);
1746 INP_WUNLOCK(inp);
1747 return (ENOENT);
1748 }
1749 error = (*blk->tfb_tcp_handoff_ok)(tp);
1750 if (error) {
1751 refcount_release(&blk->tfb_refcnt);
1752 INP_WUNLOCK(inp);
1753 return (error);
1754 }
1755 /*
1756 * Ensure the new stack takes ownership with a
1757 * clean slate on peak rate threshold.
1758 */
1759 if (tp->t_fb->tfb_tcp_timer_stop_all != NULL)
1760 tp->t_fb->tfb_tcp_timer_stop_all(tp);
1761 if (blk->tfb_tcp_fb_init) {
1762 error = (*blk->tfb_tcp_fb_init)(tp, &ptr);
1763 if (error) {
1764 /*
1765 * Release the ref count the lookup
1766 * acquired.
1767 */
1768 refcount_release(&blk->tfb_refcnt);
1769 /*
1770 * Now there is a chance that the
1771 * init() function mucked with some
1772 * things before it failed, such as
1773 * hpts or inp_flags2 or timer granularity.
1774 * It should not of, but lets give the old
1775 * stack a chance to reset to a known good state.
1776 */
1777 if (tp->t_fb->tfb_switch_failed) {
1778 (*tp->t_fb->tfb_switch_failed)(tp);
1779 }
1780 goto err_out;
1781 }
1782 }
1783 if (tp->t_fb->tfb_tcp_fb_fini) {
1784 struct epoch_tracker et;
1785 /*
1786 * Tell the stack to cleanup with 0 i.e.
1787 * the tcb is not going away.
1788 */
1789 NET_EPOCH_ENTER(et);
1790 (*tp->t_fb->tfb_tcp_fb_fini)(tp, 0);
1791 NET_EPOCH_EXIT(et);
1792 }
1793 /*
1794 * Release the old refcnt, the
1795 * lookup acquired a ref on the
1796 * new one already.
1797 */
1798 refcount_release(&tp->t_fb->tfb_refcnt);
1799 /*
1800 * Set in the new stack.
1801 */
1802 tp->t_fb = blk;
1803 tp->t_fb_ptr = ptr;
1804 #ifdef TCP_OFFLOAD
1805 if (tp->t_flags & TF_TOE) {
1806 tcp_offload_ctloutput(tp, sopt->sopt_dir,
1807 sopt->sopt_name);
1808 }
1809 #endif
1810 err_out:
1811 INP_WUNLOCK(inp);
1812 return (error);
1813
1814 }
1815
1816 /* Pass in the INP locked, callee must unlock it. */
1817 return (tp->t_fb->tfb_tcp_ctloutput(tp, sopt));
1818 }
1819
1820 static int
tcp_ctloutput_get(struct inpcb * inp,struct sockopt * sopt)1821 tcp_ctloutput_get(struct inpcb *inp, struct sockopt *sopt)
1822 {
1823 struct socket *so = inp->inp_socket;
1824 struct tcpcb *tp = intotcpcb(inp);
1825 int error = 0;
1826
1827 MPASS(sopt->sopt_dir == SOPT_GET);
1828 INP_WLOCK_ASSERT(inp);
1829 KASSERT((inp->inp_flags & INP_DROPPED) == 0,
1830 ("inp_flags == %x", inp->inp_flags));
1831 KASSERT(so != NULL, ("inp_socket == NULL"));
1832
1833 if (sopt->sopt_level != IPPROTO_TCP) {
1834 INP_WUNLOCK(inp);
1835 #ifdef INET6
1836 if (inp->inp_vflag & INP_IPV6PROTO)
1837 error = ip6_ctloutput(so, sopt);
1838 #endif /* INET6 */
1839 #if defined(INET6) && defined(INET)
1840 else
1841 #endif
1842 #ifdef INET
1843 error = ip_ctloutput(so, sopt);
1844 #endif
1845 return (error);
1846 }
1847 if (((sopt->sopt_name == TCP_FUNCTION_BLK) ||
1848 (sopt->sopt_name == TCP_FUNCTION_ALIAS))) {
1849 struct tcp_function_set fsn;
1850
1851 if (sopt->sopt_name == TCP_FUNCTION_ALIAS) {
1852 memset(&fsn, 0, sizeof(fsn));
1853 find_tcp_function_alias(tp->t_fb, &fsn);
1854 } else {
1855 strncpy(fsn.function_set_name,
1856 tp->t_fb->tfb_tcp_block_name,
1857 TCP_FUNCTION_NAME_LEN_MAX);
1858 fsn.function_set_name[TCP_FUNCTION_NAME_LEN_MAX - 1] = '\0';
1859 }
1860 fsn.pcbcnt = tp->t_fb->tfb_refcnt;
1861 INP_WUNLOCK(inp);
1862 error = sooptcopyout(sopt, &fsn, sizeof fsn);
1863 return (error);
1864 }
1865
1866 /* Pass in the INP locked, callee must unlock it. */
1867 return (tp->t_fb->tfb_tcp_ctloutput(tp, sopt));
1868 }
1869
1870 int
tcp_ctloutput(struct socket * so,struct sockopt * sopt)1871 tcp_ctloutput(struct socket *so, struct sockopt *sopt)
1872 {
1873 struct inpcb *inp;
1874
1875 inp = sotoinpcb(so);
1876 KASSERT(inp != NULL, ("tcp_ctloutput: inp == NULL"));
1877
1878 INP_WLOCK(inp);
1879 if (inp->inp_flags & INP_DROPPED) {
1880 INP_WUNLOCK(inp);
1881 return (ECONNRESET);
1882 }
1883 if (sopt->sopt_dir == SOPT_SET)
1884 return (tcp_ctloutput_set(inp, sopt));
1885 else if (sopt->sopt_dir == SOPT_GET)
1886 return (tcp_ctloutput_get(inp, sopt));
1887 else
1888 panic("%s: sopt_dir $%d", __func__, sopt->sopt_dir);
1889 }
1890
1891 /*
1892 * If this assert becomes untrue, we need to change the size of the buf
1893 * variable in tcp_default_ctloutput().
1894 */
1895 #ifdef CTASSERT
1896 CTASSERT(TCP_CA_NAME_MAX <= TCP_LOG_ID_LEN);
1897 CTASSERT(TCP_LOG_REASON_LEN <= TCP_LOG_ID_LEN);
1898 #endif
1899
1900 extern struct cc_algo newreno_cc_algo;
1901
1902 static int
tcp_set_cc_mod(struct inpcb * inp,struct sockopt * sopt)1903 tcp_set_cc_mod(struct inpcb *inp, struct sockopt *sopt)
1904 {
1905 struct cc_algo *algo;
1906 void *ptr = NULL;
1907 struct tcpcb *tp;
1908 struct cc_var cc_mem;
1909 char buf[TCP_CA_NAME_MAX];
1910 size_t mem_sz;
1911 int error;
1912
1913 INP_WUNLOCK(inp);
1914 error = sooptcopyin(sopt, buf, TCP_CA_NAME_MAX - 1, 1);
1915 if (error)
1916 return(error);
1917 buf[sopt->sopt_valsize] = '\0';
1918 CC_LIST_RLOCK();
1919 STAILQ_FOREACH(algo, &cc_list, entries) {
1920 if (strncmp(buf, algo->name,
1921 TCP_CA_NAME_MAX) == 0) {
1922 if (algo->flags & CC_MODULE_BEING_REMOVED) {
1923 /* We can't "see" modules being unloaded */
1924 continue;
1925 }
1926 break;
1927 }
1928 }
1929 if (algo == NULL) {
1930 CC_LIST_RUNLOCK();
1931 return(ESRCH);
1932 }
1933 /*
1934 * With a reference the algorithm cannot be removed
1935 * so we hold a reference through the change process.
1936 */
1937 cc_refer(algo);
1938 CC_LIST_RUNLOCK();
1939 if (algo->cb_init != NULL) {
1940 /* We can now pre-get the memory for the CC */
1941 mem_sz = (*algo->cc_data_sz)();
1942 if (mem_sz == 0) {
1943 goto no_mem_needed;
1944 }
1945 ptr = malloc(mem_sz, M_CC_MEM, M_WAITOK);
1946 } else {
1947 no_mem_needed:
1948 mem_sz = 0;
1949 ptr = NULL;
1950 }
1951 /*
1952 * Make sure its all clean and zero and also get
1953 * back the inplock.
1954 */
1955 memset(&cc_mem, 0, sizeof(cc_mem));
1956 INP_WLOCK(inp);
1957 if (inp->inp_flags & INP_DROPPED) {
1958 INP_WUNLOCK(inp);
1959 if (ptr)
1960 free(ptr, M_CC_MEM);
1961 /* Release our temp reference */
1962 CC_LIST_RLOCK();
1963 cc_release(algo);
1964 CC_LIST_RUNLOCK();
1965 return (ECONNRESET);
1966 }
1967 tp = intotcpcb(inp);
1968 if (ptr != NULL)
1969 memset(ptr, 0, mem_sz);
1970 cc_mem.tp = tp;
1971 /*
1972 * We once again hold a write lock over the tcb so it's
1973 * safe to do these things without ordering concerns.
1974 * Note here we init into stack memory.
1975 */
1976 if (algo->cb_init != NULL)
1977 error = algo->cb_init(&cc_mem, ptr);
1978 else
1979 error = 0;
1980 /*
1981 * The CC algorithms, when given their memory
1982 * should not fail we could in theory have a
1983 * KASSERT here.
1984 */
1985 if (error == 0) {
1986 /*
1987 * Touchdown, lets go ahead and move the
1988 * connection to the new CC module by
1989 * copying in the cc_mem after we call
1990 * the old ones cleanup (if any).
1991 */
1992 if (CC_ALGO(tp)->cb_destroy != NULL)
1993 CC_ALGO(tp)->cb_destroy(&tp->t_ccv);
1994 /* Detach the old CC from the tcpcb */
1995 cc_detach(tp);
1996 /* Copy in our temp memory that was inited */
1997 memcpy(&tp->t_ccv, &cc_mem, sizeof(struct cc_var));
1998 /* Now attach the new, which takes a reference */
1999 cc_attach(tp, algo);
2000 /* Ok now are we where we have gotten past any conn_init? */
2001 if (TCPS_HAVEESTABLISHED(tp->t_state) && (CC_ALGO(tp)->conn_init != NULL)) {
2002 /* Yep run the connection init for the new CC */
2003 CC_ALGO(tp)->conn_init(&tp->t_ccv);
2004 }
2005 } else if (ptr)
2006 free(ptr, M_CC_MEM);
2007 INP_WUNLOCK(inp);
2008 /* Now lets release our temp reference */
2009 CC_LIST_RLOCK();
2010 cc_release(algo);
2011 CC_LIST_RUNLOCK();
2012 return (error);
2013 }
2014
2015 int
tcp_default_ctloutput(struct tcpcb * tp,struct sockopt * sopt)2016 tcp_default_ctloutput(struct tcpcb *tp, struct sockopt *sopt)
2017 {
2018 struct inpcb *inp = tptoinpcb(tp);
2019 int error, opt, optval;
2020 u_int ui;
2021 struct tcp_info ti;
2022 #ifdef KERN_TLS
2023 struct tls_enable tls;
2024 struct socket *so = inp->inp_socket;
2025 #endif
2026 char *pbuf, buf[TCP_LOG_ID_LEN];
2027 #ifdef STATS
2028 struct statsblob *sbp;
2029 #endif
2030 size_t len;
2031
2032 INP_WLOCK_ASSERT(inp);
2033 KASSERT((inp->inp_flags & INP_DROPPED) == 0,
2034 ("inp_flags == %x", inp->inp_flags));
2035 KASSERT(inp->inp_socket != NULL, ("inp_socket == NULL"));
2036
2037 switch (sopt->sopt_level) {
2038 #ifdef INET6
2039 case IPPROTO_IPV6:
2040 MPASS(inp->inp_vflag & INP_IPV6PROTO);
2041 switch (sopt->sopt_name) {
2042 case IPV6_USE_MIN_MTU:
2043 tcp6_use_min_mtu(tp);
2044 /* FALLTHROUGH */
2045 }
2046 INP_WUNLOCK(inp);
2047 return (0);
2048 #endif
2049 #ifdef INET
2050 case IPPROTO_IP:
2051 INP_WUNLOCK(inp);
2052 return (0);
2053 #endif
2054 }
2055
2056 /*
2057 * For TCP_CCALGOOPT forward the control to CC module, for both
2058 * SOPT_SET and SOPT_GET.
2059 */
2060 switch (sopt->sopt_name) {
2061 case TCP_CCALGOOPT:
2062 INP_WUNLOCK(inp);
2063 if (sopt->sopt_valsize > CC_ALGOOPT_LIMIT)
2064 return (EINVAL);
2065 pbuf = malloc(sopt->sopt_valsize, M_TEMP, M_WAITOK | M_ZERO);
2066 error = sooptcopyin(sopt, pbuf, sopt->sopt_valsize,
2067 sopt->sopt_valsize);
2068 if (error) {
2069 free(pbuf, M_TEMP);
2070 return (error);
2071 }
2072 INP_WLOCK_RECHECK_CLEANUP(inp, free(pbuf, M_TEMP));
2073 if (CC_ALGO(tp)->ctl_output != NULL)
2074 error = CC_ALGO(tp)->ctl_output(&tp->t_ccv, sopt, pbuf);
2075 else
2076 error = ENOENT;
2077 INP_WUNLOCK(inp);
2078 if (error == 0 && sopt->sopt_dir == SOPT_GET)
2079 error = sooptcopyout(sopt, pbuf, sopt->sopt_valsize);
2080 free(pbuf, M_TEMP);
2081 return (error);
2082 }
2083
2084 switch (sopt->sopt_dir) {
2085 case SOPT_SET:
2086 switch (sopt->sopt_name) {
2087 #if defined(IPSEC_SUPPORT) || defined(TCP_SIGNATURE)
2088 case TCP_MD5SIG:
2089 INP_WUNLOCK(inp);
2090 if (!TCPMD5_ENABLED())
2091 return (ENOPROTOOPT);
2092 error = TCPMD5_PCBCTL(inp, sopt);
2093 if (error)
2094 return (error);
2095 INP_WLOCK_RECHECK(inp);
2096 goto unlock_and_done;
2097 #endif /* IPSEC */
2098
2099 case TCP_NODELAY:
2100 case TCP_NOOPT:
2101 INP_WUNLOCK(inp);
2102 error = sooptcopyin(sopt, &optval, sizeof optval,
2103 sizeof optval);
2104 if (error)
2105 return (error);
2106
2107 INP_WLOCK_RECHECK(inp);
2108 switch (sopt->sopt_name) {
2109 case TCP_NODELAY:
2110 opt = TF_NODELAY;
2111 break;
2112 case TCP_NOOPT:
2113 opt = TF_NOOPT;
2114 break;
2115 default:
2116 opt = 0; /* dead code to fool gcc */
2117 break;
2118 }
2119
2120 if (optval)
2121 tp->t_flags |= opt;
2122 else
2123 tp->t_flags &= ~opt;
2124 unlock_and_done:
2125 #ifdef TCP_OFFLOAD
2126 if (tp->t_flags & TF_TOE) {
2127 tcp_offload_ctloutput(tp, sopt->sopt_dir,
2128 sopt->sopt_name);
2129 }
2130 #endif
2131 INP_WUNLOCK(inp);
2132 break;
2133
2134 case TCP_NOPUSH:
2135 INP_WUNLOCK(inp);
2136 error = sooptcopyin(sopt, &optval, sizeof optval,
2137 sizeof optval);
2138 if (error)
2139 return (error);
2140
2141 INP_WLOCK_RECHECK(inp);
2142 if (optval)
2143 tp->t_flags |= TF_NOPUSH;
2144 else if (tp->t_flags & TF_NOPUSH) {
2145 tp->t_flags &= ~TF_NOPUSH;
2146 if (TCPS_HAVEESTABLISHED(tp->t_state)) {
2147 struct epoch_tracker et;
2148
2149 NET_EPOCH_ENTER(et);
2150 error = tcp_output_nodrop(tp);
2151 NET_EPOCH_EXIT(et);
2152 }
2153 }
2154 goto unlock_and_done;
2155
2156 case TCP_REMOTE_UDP_ENCAPS_PORT:
2157 INP_WUNLOCK(inp);
2158 error = sooptcopyin(sopt, &optval, sizeof optval,
2159 sizeof optval);
2160 if (error)
2161 return (error);
2162 if ((optval < TCP_TUNNELING_PORT_MIN) ||
2163 (optval > TCP_TUNNELING_PORT_MAX)) {
2164 /* Its got to be in range */
2165 return (EINVAL);
2166 }
2167 if ((V_tcp_udp_tunneling_port == 0) && (optval != 0)) {
2168 /* You have to have enabled a UDP tunneling port first */
2169 return (EINVAL);
2170 }
2171 INP_WLOCK_RECHECK(inp);
2172 if (tp->t_state != TCPS_CLOSED) {
2173 /* You can't change after you are connected */
2174 error = EINVAL;
2175 } else {
2176 /* Ok we are all good set the port */
2177 tp->t_port = htons(optval);
2178 }
2179 goto unlock_and_done;
2180
2181 case TCP_MAXSEG:
2182 INP_WUNLOCK(inp);
2183 error = sooptcopyin(sopt, &optval, sizeof optval,
2184 sizeof optval);
2185 if (error)
2186 return (error);
2187
2188 INP_WLOCK_RECHECK(inp);
2189 if (optval > 0 && optval <= tp->t_maxseg &&
2190 optval + 40 >= V_tcp_minmss) {
2191 tp->t_maxseg = optval;
2192 if (tp->t_maxseg < V_tcp_mssdflt) {
2193 /*
2194 * The MSS is so small we should not process incoming
2195 * SACK's since we are subject to attack in such a
2196 * case.
2197 */
2198 tp->t_flags2 |= TF2_PROC_SACK_PROHIBIT;
2199 } else {
2200 tp->t_flags2 &= ~TF2_PROC_SACK_PROHIBIT;
2201 }
2202 } else
2203 error = EINVAL;
2204 goto unlock_and_done;
2205
2206 case TCP_INFO:
2207 INP_WUNLOCK(inp);
2208 error = EINVAL;
2209 break;
2210
2211 case TCP_STATS:
2212 INP_WUNLOCK(inp);
2213 #ifdef STATS
2214 error = sooptcopyin(sopt, &optval, sizeof optval,
2215 sizeof optval);
2216 if (error)
2217 return (error);
2218
2219 if (optval > 0)
2220 sbp = stats_blob_alloc(
2221 V_tcp_perconn_stats_dflt_tpl, 0);
2222 else
2223 sbp = NULL;
2224
2225 INP_WLOCK_RECHECK(inp);
2226 if ((tp->t_stats != NULL && sbp == NULL) ||
2227 (tp->t_stats == NULL && sbp != NULL)) {
2228 struct statsblob *t = tp->t_stats;
2229 tp->t_stats = sbp;
2230 sbp = t;
2231 }
2232 INP_WUNLOCK(inp);
2233
2234 stats_blob_destroy(sbp);
2235 #else
2236 return (EOPNOTSUPP);
2237 #endif /* !STATS */
2238 break;
2239
2240 case TCP_CONGESTION:
2241 error = tcp_set_cc_mod(inp, sopt);
2242 break;
2243
2244 case TCP_REUSPORT_LB_NUMA:
2245 INP_WUNLOCK(inp);
2246 error = sooptcopyin(sopt, &optval, sizeof(optval),
2247 sizeof(optval));
2248 INP_WLOCK_RECHECK(inp);
2249 if (!error)
2250 error = in_pcblbgroup_numa(inp, optval);
2251 INP_WUNLOCK(inp);
2252 break;
2253
2254 #ifdef KERN_TLS
2255 case TCP_TXTLS_ENABLE:
2256 INP_WUNLOCK(inp);
2257 error = ktls_copyin_tls_enable(sopt, &tls);
2258 if (error != 0)
2259 break;
2260 error = ktls_enable_tx(so, &tls);
2261 ktls_cleanup_tls_enable(&tls);
2262 break;
2263 case TCP_TXTLS_MODE:
2264 INP_WUNLOCK(inp);
2265 error = sooptcopyin(sopt, &ui, sizeof(ui), sizeof(ui));
2266 if (error != 0)
2267 return (error);
2268
2269 INP_WLOCK_RECHECK(inp);
2270 error = ktls_set_tx_mode(so, ui);
2271 INP_WUNLOCK(inp);
2272 break;
2273 case TCP_RXTLS_ENABLE:
2274 INP_WUNLOCK(inp);
2275 error = ktls_copyin_tls_enable(sopt, &tls);
2276 if (error != 0)
2277 break;
2278 error = ktls_enable_rx(so, &tls);
2279 ktls_cleanup_tls_enable(&tls);
2280 break;
2281 #endif
2282 case TCP_MAXUNACKTIME:
2283 case TCP_KEEPIDLE:
2284 case TCP_KEEPINTVL:
2285 case TCP_KEEPINIT:
2286 INP_WUNLOCK(inp);
2287 error = sooptcopyin(sopt, &ui, sizeof(ui), sizeof(ui));
2288 if (error)
2289 return (error);
2290
2291 if (ui > (UINT_MAX / hz)) {
2292 error = EINVAL;
2293 break;
2294 }
2295 ui *= hz;
2296
2297 INP_WLOCK_RECHECK(inp);
2298 switch (sopt->sopt_name) {
2299 case TCP_MAXUNACKTIME:
2300 tp->t_maxunacktime = ui;
2301 break;
2302
2303 case TCP_KEEPIDLE:
2304 tp->t_keepidle = ui;
2305 /*
2306 * XXX: better check current remaining
2307 * timeout and "merge" it with new value.
2308 */
2309 if ((tp->t_state > TCPS_LISTEN) &&
2310 (tp->t_state <= TCPS_CLOSING))
2311 tcp_timer_activate(tp, TT_KEEP,
2312 TP_KEEPIDLE(tp));
2313 break;
2314 case TCP_KEEPINTVL:
2315 tp->t_keepintvl = ui;
2316 if ((tp->t_state == TCPS_FIN_WAIT_2) &&
2317 (TP_MAXIDLE(tp) > 0))
2318 tcp_timer_activate(tp, TT_2MSL,
2319 TP_MAXIDLE(tp));
2320 break;
2321 case TCP_KEEPINIT:
2322 tp->t_keepinit = ui;
2323 if (tp->t_state == TCPS_SYN_RECEIVED ||
2324 tp->t_state == TCPS_SYN_SENT)
2325 tcp_timer_activate(tp, TT_KEEP,
2326 TP_KEEPINIT(tp));
2327 break;
2328 }
2329 goto unlock_and_done;
2330
2331 case TCP_KEEPCNT:
2332 INP_WUNLOCK(inp);
2333 error = sooptcopyin(sopt, &ui, sizeof(ui), sizeof(ui));
2334 if (error)
2335 return (error);
2336
2337 INP_WLOCK_RECHECK(inp);
2338 tp->t_keepcnt = ui;
2339 if ((tp->t_state == TCPS_FIN_WAIT_2) &&
2340 (TP_MAXIDLE(tp) > 0))
2341 tcp_timer_activate(tp, TT_2MSL,
2342 TP_MAXIDLE(tp));
2343 goto unlock_and_done;
2344
2345 #ifdef TCPPCAP
2346 case TCP_PCAP_OUT:
2347 case TCP_PCAP_IN:
2348 INP_WUNLOCK(inp);
2349 error = sooptcopyin(sopt, &optval, sizeof optval,
2350 sizeof optval);
2351 if (error)
2352 return (error);
2353
2354 INP_WLOCK_RECHECK(inp);
2355 if (optval >= 0)
2356 tcp_pcap_set_sock_max(
2357 (sopt->sopt_name == TCP_PCAP_OUT) ?
2358 &(tp->t_outpkts) : &(tp->t_inpkts),
2359 optval);
2360 else
2361 error = EINVAL;
2362 goto unlock_and_done;
2363 #endif
2364
2365 case TCP_FASTOPEN: {
2366 struct tcp_fastopen tfo_optval;
2367
2368 INP_WUNLOCK(inp);
2369 if (!V_tcp_fastopen_client_enable &&
2370 !V_tcp_fastopen_server_enable)
2371 return (EPERM);
2372
2373 error = sooptcopyin(sopt, &tfo_optval,
2374 sizeof(tfo_optval), sizeof(int));
2375 if (error)
2376 return (error);
2377
2378 INP_WLOCK_RECHECK(inp);
2379 if ((tp->t_state != TCPS_CLOSED) &&
2380 (tp->t_state != TCPS_LISTEN)) {
2381 error = EINVAL;
2382 goto unlock_and_done;
2383 }
2384 if (tfo_optval.enable) {
2385 if (tp->t_state == TCPS_LISTEN) {
2386 if (!V_tcp_fastopen_server_enable) {
2387 error = EPERM;
2388 goto unlock_and_done;
2389 }
2390
2391 if (tp->t_tfo_pending == NULL)
2392 tp->t_tfo_pending =
2393 tcp_fastopen_alloc_counter();
2394 } else {
2395 /*
2396 * If a pre-shared key was provided,
2397 * stash it in the client cookie
2398 * field of the tcpcb for use during
2399 * connect.
2400 */
2401 if (sopt->sopt_valsize ==
2402 sizeof(tfo_optval)) {
2403 memcpy(tp->t_tfo_cookie.client,
2404 tfo_optval.psk,
2405 TCP_FASTOPEN_PSK_LEN);
2406 tp->t_tfo_client_cookie_len =
2407 TCP_FASTOPEN_PSK_LEN;
2408 }
2409 }
2410 tp->t_flags |= TF_FASTOPEN;
2411 } else
2412 tp->t_flags &= ~TF_FASTOPEN;
2413 goto unlock_and_done;
2414 }
2415
2416 #ifdef TCP_BLACKBOX
2417 case TCP_LOG:
2418 INP_WUNLOCK(inp);
2419 error = sooptcopyin(sopt, &optval, sizeof optval,
2420 sizeof optval);
2421 if (error)
2422 return (error);
2423
2424 INP_WLOCK_RECHECK(inp);
2425 error = tcp_log_state_change(tp, optval);
2426 goto unlock_and_done;
2427
2428 case TCP_LOGBUF:
2429 INP_WUNLOCK(inp);
2430 error = EINVAL;
2431 break;
2432
2433 case TCP_LOGID:
2434 INP_WUNLOCK(inp);
2435 error = sooptcopyin(sopt, buf, TCP_LOG_ID_LEN - 1, 0);
2436 if (error)
2437 break;
2438 buf[sopt->sopt_valsize] = '\0';
2439 INP_WLOCK_RECHECK(inp);
2440 error = tcp_log_set_id(tp, buf);
2441 /* tcp_log_set_id() unlocks the INP. */
2442 break;
2443
2444 case TCP_LOGDUMP:
2445 case TCP_LOGDUMPID:
2446 INP_WUNLOCK(inp);
2447 error =
2448 sooptcopyin(sopt, buf, TCP_LOG_REASON_LEN - 1, 0);
2449 if (error)
2450 break;
2451 buf[sopt->sopt_valsize] = '\0';
2452 INP_WLOCK_RECHECK(inp);
2453 if (sopt->sopt_name == TCP_LOGDUMP) {
2454 error = tcp_log_dump_tp_logbuf(tp, buf,
2455 M_WAITOK, true);
2456 INP_WUNLOCK(inp);
2457 } else {
2458 tcp_log_dump_tp_bucket_logbufs(tp, buf);
2459 /*
2460 * tcp_log_dump_tp_bucket_logbufs() drops the
2461 * INP lock.
2462 */
2463 }
2464 break;
2465 #endif
2466
2467 default:
2468 INP_WUNLOCK(inp);
2469 error = ENOPROTOOPT;
2470 break;
2471 }
2472 break;
2473
2474 case SOPT_GET:
2475 tp = intotcpcb(inp);
2476 switch (sopt->sopt_name) {
2477 #if defined(IPSEC_SUPPORT) || defined(TCP_SIGNATURE)
2478 case TCP_MD5SIG:
2479 INP_WUNLOCK(inp);
2480 if (!TCPMD5_ENABLED())
2481 return (ENOPROTOOPT);
2482 error = TCPMD5_PCBCTL(inp, sopt);
2483 break;
2484 #endif
2485
2486 case TCP_NODELAY:
2487 optval = tp->t_flags & TF_NODELAY;
2488 INP_WUNLOCK(inp);
2489 error = sooptcopyout(sopt, &optval, sizeof optval);
2490 break;
2491 case TCP_MAXSEG:
2492 optval = tp->t_maxseg;
2493 INP_WUNLOCK(inp);
2494 error = sooptcopyout(sopt, &optval, sizeof optval);
2495 break;
2496 case TCP_REMOTE_UDP_ENCAPS_PORT:
2497 optval = ntohs(tp->t_port);
2498 INP_WUNLOCK(inp);
2499 error = sooptcopyout(sopt, &optval, sizeof optval);
2500 break;
2501 case TCP_NOOPT:
2502 optval = tp->t_flags & TF_NOOPT;
2503 INP_WUNLOCK(inp);
2504 error = sooptcopyout(sopt, &optval, sizeof optval);
2505 break;
2506 case TCP_NOPUSH:
2507 optval = tp->t_flags & TF_NOPUSH;
2508 INP_WUNLOCK(inp);
2509 error = sooptcopyout(sopt, &optval, sizeof optval);
2510 break;
2511 case TCP_INFO:
2512 tcp_fill_info(tp, &ti);
2513 INP_WUNLOCK(inp);
2514 error = sooptcopyout(sopt, &ti, sizeof ti);
2515 break;
2516 case TCP_STATS:
2517 {
2518 #ifdef STATS
2519 int nheld;
2520 TYPEOF_MEMBER(struct statsblob, flags) sbflags = 0;
2521
2522 error = 0;
2523 socklen_t outsbsz = sopt->sopt_valsize;
2524 if (tp->t_stats == NULL)
2525 error = ENOENT;
2526 else if (outsbsz >= tp->t_stats->cursz)
2527 outsbsz = tp->t_stats->cursz;
2528 else if (outsbsz >= sizeof(struct statsblob))
2529 outsbsz = sizeof(struct statsblob);
2530 else
2531 error = EINVAL;
2532 INP_WUNLOCK(inp);
2533 if (error)
2534 break;
2535
2536 sbp = sopt->sopt_val;
2537 nheld = atop(round_page(((vm_offset_t)sbp) +
2538 (vm_size_t)outsbsz) - trunc_page((vm_offset_t)sbp));
2539 vm_page_t ma[nheld];
2540 if (vm_fault_quick_hold_pages(
2541 &curproc->p_vmspace->vm_map, (vm_offset_t)sbp,
2542 outsbsz, VM_PROT_READ | VM_PROT_WRITE, ma,
2543 nheld) < 0) {
2544 error = EFAULT;
2545 break;
2546 }
2547
2548 if ((error = copyin_nofault(&(sbp->flags), &sbflags,
2549 SIZEOF_MEMBER(struct statsblob, flags))))
2550 goto unhold;
2551
2552 INP_WLOCK_RECHECK(inp);
2553 error = stats_blob_snapshot(&sbp, outsbsz, tp->t_stats,
2554 sbflags | SB_CLONE_USRDSTNOFAULT);
2555 INP_WUNLOCK(inp);
2556 sopt->sopt_valsize = outsbsz;
2557 unhold:
2558 vm_page_unhold_pages(ma, nheld);
2559 #else
2560 INP_WUNLOCK(inp);
2561 error = EOPNOTSUPP;
2562 #endif /* !STATS */
2563 break;
2564 }
2565 case TCP_CONGESTION:
2566 len = strlcpy(buf, CC_ALGO(tp)->name, TCP_CA_NAME_MAX);
2567 INP_WUNLOCK(inp);
2568 error = sooptcopyout(sopt, buf, len + 1);
2569 break;
2570 case TCP_MAXUNACKTIME:
2571 case TCP_KEEPIDLE:
2572 case TCP_KEEPINTVL:
2573 case TCP_KEEPINIT:
2574 case TCP_KEEPCNT:
2575 switch (sopt->sopt_name) {
2576 case TCP_MAXUNACKTIME:
2577 ui = TP_MAXUNACKTIME(tp) / hz;
2578 break;
2579 case TCP_KEEPIDLE:
2580 ui = TP_KEEPIDLE(tp) / hz;
2581 break;
2582 case TCP_KEEPINTVL:
2583 ui = TP_KEEPINTVL(tp) / hz;
2584 break;
2585 case TCP_KEEPINIT:
2586 ui = TP_KEEPINIT(tp) / hz;
2587 break;
2588 case TCP_KEEPCNT:
2589 ui = TP_KEEPCNT(tp);
2590 break;
2591 }
2592 INP_WUNLOCK(inp);
2593 error = sooptcopyout(sopt, &ui, sizeof(ui));
2594 break;
2595 #ifdef TCPPCAP
2596 case TCP_PCAP_OUT:
2597 case TCP_PCAP_IN:
2598 optval = tcp_pcap_get_sock_max(
2599 (sopt->sopt_name == TCP_PCAP_OUT) ?
2600 &(tp->t_outpkts) : &(tp->t_inpkts));
2601 INP_WUNLOCK(inp);
2602 error = sooptcopyout(sopt, &optval, sizeof optval);
2603 break;
2604 #endif
2605 case TCP_FASTOPEN:
2606 optval = tp->t_flags & TF_FASTOPEN;
2607 INP_WUNLOCK(inp);
2608 error = sooptcopyout(sopt, &optval, sizeof optval);
2609 break;
2610 #ifdef TCP_BLACKBOX
2611 case TCP_LOG:
2612 optval = tcp_get_bblog_state(tp);
2613 INP_WUNLOCK(inp);
2614 error = sooptcopyout(sopt, &optval, sizeof(optval));
2615 break;
2616 case TCP_LOGBUF:
2617 /* tcp_log_getlogbuf() does INP_WUNLOCK(inp) */
2618 error = tcp_log_getlogbuf(sopt, tp);
2619 break;
2620 case TCP_LOGID:
2621 len = tcp_log_get_id(tp, buf);
2622 INP_WUNLOCK(inp);
2623 error = sooptcopyout(sopt, buf, len + 1);
2624 break;
2625 case TCP_LOGDUMP:
2626 case TCP_LOGDUMPID:
2627 INP_WUNLOCK(inp);
2628 error = EINVAL;
2629 break;
2630 #endif
2631 #ifdef KERN_TLS
2632 case TCP_TXTLS_MODE:
2633 error = ktls_get_tx_mode(so, &optval);
2634 INP_WUNLOCK(inp);
2635 if (error == 0)
2636 error = sooptcopyout(sopt, &optval,
2637 sizeof(optval));
2638 break;
2639 case TCP_RXTLS_MODE:
2640 error = ktls_get_rx_mode(so, &optval);
2641 INP_WUNLOCK(inp);
2642 if (error == 0)
2643 error = sooptcopyout(sopt, &optval,
2644 sizeof(optval));
2645 break;
2646 #endif
2647 default:
2648 INP_WUNLOCK(inp);
2649 error = ENOPROTOOPT;
2650 break;
2651 }
2652 break;
2653 }
2654 return (error);
2655 }
2656 #undef INP_WLOCK_RECHECK
2657 #undef INP_WLOCK_RECHECK_CLEANUP
2658
2659 /*
2660 * Initiate (or continue) disconnect.
2661 * If embryonic state, just send reset (once).
2662 * If in ``let data drain'' option and linger null, just drop.
2663 * Otherwise (hard), mark socket disconnecting and drop
2664 * current input data; switch states based on user close, and
2665 * send segment to peer (with FIN).
2666 */
2667 static void
tcp_disconnect(struct tcpcb * tp)2668 tcp_disconnect(struct tcpcb *tp)
2669 {
2670 struct inpcb *inp = tptoinpcb(tp);
2671 struct socket *so = tptosocket(tp);
2672
2673 NET_EPOCH_ASSERT();
2674 INP_WLOCK_ASSERT(inp);
2675
2676 /*
2677 * Neither tcp_close() nor tcp_drop() should return NULL, as the
2678 * socket is still open.
2679 */
2680 if (tp->t_state < TCPS_ESTABLISHED &&
2681 !(tp->t_state > TCPS_LISTEN && (tp->t_flags & TF_FASTOPEN))) {
2682 tp = tcp_close(tp);
2683 KASSERT(tp != NULL,
2684 ("tcp_disconnect: tcp_close() returned NULL"));
2685 } else if ((so->so_options & SO_LINGER) && so->so_linger == 0) {
2686 tp = tcp_drop(tp, 0);
2687 KASSERT(tp != NULL,
2688 ("tcp_disconnect: tcp_drop() returned NULL"));
2689 } else {
2690 soisdisconnecting(so);
2691 sbflush(&so->so_rcv);
2692 tcp_usrclosed(tp);
2693 if (!(inp->inp_flags & INP_DROPPED))
2694 /* Ignore stack's drop request, we already at it. */
2695 (void)tcp_output_nodrop(tp);
2696 }
2697 }
2698
2699 /*
2700 * User issued close, and wish to trail through shutdown states:
2701 * if never received SYN, just forget it. If got a SYN from peer,
2702 * but haven't sent FIN, then go to FIN_WAIT_1 state to send peer a FIN.
2703 * If already got a FIN from peer, then almost done; go to LAST_ACK
2704 * state. In all other cases, have already sent FIN to peer (e.g.
2705 * after PRU_SHUTDOWN), and just have to play tedious game waiting
2706 * for peer to send FIN or not respond to keep-alives, etc.
2707 * We can let the user exit from the close as soon as the FIN is acked.
2708 */
2709 static void
tcp_usrclosed(struct tcpcb * tp)2710 tcp_usrclosed(struct tcpcb *tp)
2711 {
2712
2713 NET_EPOCH_ASSERT();
2714 INP_WLOCK_ASSERT(tptoinpcb(tp));
2715
2716 switch (tp->t_state) {
2717 case TCPS_LISTEN:
2718 #ifdef TCP_OFFLOAD
2719 tcp_offload_listen_stop(tp);
2720 #endif
2721 tcp_state_change(tp, TCPS_CLOSED);
2722 /* FALLTHROUGH */
2723 case TCPS_CLOSED:
2724 tp = tcp_close(tp);
2725 /*
2726 * tcp_close() should never return NULL here as the socket is
2727 * still open.
2728 */
2729 KASSERT(tp != NULL,
2730 ("tcp_usrclosed: tcp_close() returned NULL"));
2731 break;
2732
2733 case TCPS_SYN_SENT:
2734 case TCPS_SYN_RECEIVED:
2735 tp->t_flags |= TF_NEEDFIN;
2736 break;
2737
2738 case TCPS_ESTABLISHED:
2739 tcp_state_change(tp, TCPS_FIN_WAIT_1);
2740 break;
2741
2742 case TCPS_CLOSE_WAIT:
2743 tcp_state_change(tp, TCPS_LAST_ACK);
2744 break;
2745 }
2746 if (tp->t_acktime == 0)
2747 tp->t_acktime = ticks;
2748 if (tp->t_state >= TCPS_FIN_WAIT_2) {
2749 tcp_free_sackholes(tp);
2750 soisdisconnected(tptosocket(tp));
2751 /* Prevent the connection hanging in FIN_WAIT_2 forever. */
2752 if (tp->t_state == TCPS_FIN_WAIT_2) {
2753 int timeout;
2754
2755 timeout = (tcp_fast_finwait2_recycle) ?
2756 tcp_finwait2_timeout : TP_MAXIDLE(tp);
2757 tcp_timer_activate(tp, TT_2MSL, timeout);
2758 }
2759 }
2760 }
2761
2762 #ifdef DDB
2763 static void
db_print_indent(int indent)2764 db_print_indent(int indent)
2765 {
2766 int i;
2767
2768 for (i = 0; i < indent; i++)
2769 db_printf(" ");
2770 }
2771
2772 static void
db_print_tstate(int t_state)2773 db_print_tstate(int t_state)
2774 {
2775
2776 switch (t_state) {
2777 case TCPS_CLOSED:
2778 db_printf("TCPS_CLOSED");
2779 return;
2780
2781 case TCPS_LISTEN:
2782 db_printf("TCPS_LISTEN");
2783 return;
2784
2785 case TCPS_SYN_SENT:
2786 db_printf("TCPS_SYN_SENT");
2787 return;
2788
2789 case TCPS_SYN_RECEIVED:
2790 db_printf("TCPS_SYN_RECEIVED");
2791 return;
2792
2793 case TCPS_ESTABLISHED:
2794 db_printf("TCPS_ESTABLISHED");
2795 return;
2796
2797 case TCPS_CLOSE_WAIT:
2798 db_printf("TCPS_CLOSE_WAIT");
2799 return;
2800
2801 case TCPS_FIN_WAIT_1:
2802 db_printf("TCPS_FIN_WAIT_1");
2803 return;
2804
2805 case TCPS_CLOSING:
2806 db_printf("TCPS_CLOSING");
2807 return;
2808
2809 case TCPS_LAST_ACK:
2810 db_printf("TCPS_LAST_ACK");
2811 return;
2812
2813 case TCPS_FIN_WAIT_2:
2814 db_printf("TCPS_FIN_WAIT_2");
2815 return;
2816
2817 case TCPS_TIME_WAIT:
2818 db_printf("TCPS_TIME_WAIT");
2819 return;
2820
2821 default:
2822 db_printf("unknown");
2823 return;
2824 }
2825 }
2826
2827 static void
db_print_tflags(u_int t_flags)2828 db_print_tflags(u_int t_flags)
2829 {
2830 int comma;
2831
2832 comma = 0;
2833 if (t_flags & TF_ACKNOW) {
2834 db_printf("%sTF_ACKNOW", comma ? ", " : "");
2835 comma = 1;
2836 }
2837 if (t_flags & TF_DELACK) {
2838 db_printf("%sTF_DELACK", comma ? ", " : "");
2839 comma = 1;
2840 }
2841 if (t_flags & TF_NODELAY) {
2842 db_printf("%sTF_NODELAY", comma ? ", " : "");
2843 comma = 1;
2844 }
2845 if (t_flags & TF_NOOPT) {
2846 db_printf("%sTF_NOOPT", comma ? ", " : "");
2847 comma = 1;
2848 }
2849 if (t_flags & TF_SENTFIN) {
2850 db_printf("%sTF_SENTFIN", comma ? ", " : "");
2851 comma = 1;
2852 }
2853 if (t_flags & TF_REQ_SCALE) {
2854 db_printf("%sTF_REQ_SCALE", comma ? ", " : "");
2855 comma = 1;
2856 }
2857 if (t_flags & TF_RCVD_SCALE) {
2858 db_printf("%sTF_RECVD_SCALE", comma ? ", " : "");
2859 comma = 1;
2860 }
2861 if (t_flags & TF_REQ_TSTMP) {
2862 db_printf("%sTF_REQ_TSTMP", comma ? ", " : "");
2863 comma = 1;
2864 }
2865 if (t_flags & TF_RCVD_TSTMP) {
2866 db_printf("%sTF_RCVD_TSTMP", comma ? ", " : "");
2867 comma = 1;
2868 }
2869 if (t_flags & TF_SACK_PERMIT) {
2870 db_printf("%sTF_SACK_PERMIT", comma ? ", " : "");
2871 comma = 1;
2872 }
2873 if (t_flags & TF_NEEDSYN) {
2874 db_printf("%sTF_NEEDSYN", comma ? ", " : "");
2875 comma = 1;
2876 }
2877 if (t_flags & TF_NEEDFIN) {
2878 db_printf("%sTF_NEEDFIN", comma ? ", " : "");
2879 comma = 1;
2880 }
2881 if (t_flags & TF_NOPUSH) {
2882 db_printf("%sTF_NOPUSH", comma ? ", " : "");
2883 comma = 1;
2884 }
2885 if (t_flags & TF_PREVVALID) {
2886 db_printf("%sTF_PREVVALID", comma ? ", " : "");
2887 comma = 1;
2888 }
2889 if (t_flags & TF_WAKESOR) {
2890 db_printf("%sTF_WAKESOR", comma ? ", " : "");
2891 comma = 1;
2892 }
2893 if (t_flags & TF_GPUTINPROG) {
2894 db_printf("%sTF_GPUTINPROG", comma ? ", " : "");
2895 comma = 1;
2896 }
2897 if (t_flags & TF_MORETOCOME) {
2898 db_printf("%sTF_MORETOCOME", comma ? ", " : "");
2899 comma = 1;
2900 }
2901 if (t_flags & TF_SONOTCONN) {
2902 db_printf("%sTF_SONOTCONN", comma ? ", " : "");
2903 comma = 1;
2904 }
2905 if (t_flags & TF_LASTIDLE) {
2906 db_printf("%sTF_LASTIDLE", comma ? ", " : "");
2907 comma = 1;
2908 }
2909 if (t_flags & TF_RXWIN0SENT) {
2910 db_printf("%sTF_RXWIN0SENT", comma ? ", " : "");
2911 comma = 1;
2912 }
2913 if (t_flags & TF_FASTRECOVERY) {
2914 db_printf("%sTF_FASTRECOVERY", comma ? ", " : "");
2915 comma = 1;
2916 }
2917 if (t_flags & TF_WASFRECOVERY) {
2918 db_printf("%sTF_WASFRECOVERY", comma ? ", " : "");
2919 comma = 1;
2920 }
2921 if (t_flags & TF_SIGNATURE) {
2922 db_printf("%sTF_SIGNATURE", comma ? ", " : "");
2923 comma = 1;
2924 }
2925 if (t_flags & TF_FORCEDATA) {
2926 db_printf("%sTF_FORCEDATA", comma ? ", " : "");
2927 comma = 1;
2928 }
2929 if (t_flags & TF_TSO) {
2930 db_printf("%sTF_TSO", comma ? ", " : "");
2931 comma = 1;
2932 }
2933 if (t_flags & TF_TOE) {
2934 db_printf("%sTF_TOE", comma ? ", " : "");
2935 comma = 1;
2936 }
2937 if (t_flags & TF_CLOSED) {
2938 db_printf("%sTF_CLOSED", comma ? ", " : "");
2939 comma = 1;
2940 }
2941 if (t_flags & TF_SENTSYN) {
2942 db_printf("%sTF_SENTSYN", comma ? ", " : "");
2943 comma = 1;
2944 }
2945 if (t_flags & TF_LRD) {
2946 db_printf("%sTF_LRD", comma ? ", " : "");
2947 comma = 1;
2948 }
2949 if (t_flags & TF_CONGRECOVERY) {
2950 db_printf("%sTF_CONGRECOVERY", comma ? ", " : "");
2951 comma = 1;
2952 }
2953 if (t_flags & TF_WASCRECOVERY) {
2954 db_printf("%sTF_WASCRECOVERY", comma ? ", " : "");
2955 comma = 1;
2956 }
2957 if (t_flags & TF_FASTOPEN) {
2958 db_printf("%sTF_FASTOPEN", comma ? ", " : "");
2959 comma = 1;
2960 }
2961 }
2962
2963 static void
db_print_tflags2(u_int t_flags2)2964 db_print_tflags2(u_int t_flags2)
2965 {
2966 int comma;
2967
2968 comma = 0;
2969 if (t_flags2 & TF2_PLPMTU_BLACKHOLE) {
2970 db_printf("%sTF2_PLPMTU_BLACKHOLE", comma ? ", " : "");
2971 comma = 1;
2972 }
2973 if (t_flags2 & TF2_PLPMTU_PMTUD) {
2974 db_printf("%sTF2_PLPMTU_PMTUD", comma ? ", " : "");
2975 comma = 1;
2976 }
2977 if (t_flags2 & TF2_PLPMTU_MAXSEGSNT) {
2978 db_printf("%sTF2_PLPMTU_MAXSEGSNT", comma ? ", " : "");
2979 comma = 1;
2980 }
2981 if (t_flags2 & TF2_LOG_AUTO) {
2982 db_printf("%sTF2_LOG_AUTO", comma ? ", " : "");
2983 comma = 1;
2984 }
2985 if (t_flags2 & TF2_DROP_AF_DATA) {
2986 db_printf("%sTF2_DROP_AF_DATA", comma ? ", " : "");
2987 comma = 1;
2988 }
2989 if (t_flags2 & TF2_ECN_PERMIT) {
2990 db_printf("%sTF2_ECN_PERMIT", comma ? ", " : "");
2991 comma = 1;
2992 }
2993 if (t_flags2 & TF2_ECN_SND_CWR) {
2994 db_printf("%sTF2_ECN_SND_CWR", comma ? ", " : "");
2995 comma = 1;
2996 }
2997 if (t_flags2 & TF2_ECN_SND_ECE) {
2998 db_printf("%sTF2_ECN_SND_ECE", comma ? ", " : "");
2999 comma = 1;
3000 }
3001 if (t_flags2 & TF2_ACE_PERMIT) {
3002 db_printf("%sTF2_ACE_PERMIT", comma ? ", " : "");
3003 comma = 1;
3004 }
3005 if (t_flags2 & TF2_HPTS_CPU_SET) {
3006 db_printf("%sTF2_HPTS_CPU_SET", comma ? ", " : "");
3007 comma = 1;
3008 }
3009 if (t_flags2 & TF2_FBYTES_COMPLETE) {
3010 db_printf("%sTF2_FBYTES_COMPLETE", comma ? ", " : "");
3011 comma = 1;
3012 }
3013 if (t_flags2 & TF2_ECN_USE_ECT1) {
3014 db_printf("%sTF2_ECN_USE_ECT1", comma ? ", " : "");
3015 comma = 1;
3016 }
3017 if (t_flags2 & TF2_TCP_ACCOUNTING) {
3018 db_printf("%sTF2_TCP_ACCOUNTING", comma ? ", " : "");
3019 comma = 1;
3020 }
3021 if (t_flags2 & TF2_HPTS_CALLS) {
3022 db_printf("%sTF2_HPTS_CALLS", comma ? ", " : "");
3023 comma = 1;
3024 }
3025 if (t_flags2 & TF2_MBUF_L_ACKS) {
3026 db_printf("%sTF2_MBUF_L_ACKS", comma ? ", " : "");
3027 comma = 1;
3028 }
3029 if (t_flags2 & TF2_MBUF_ACKCMP) {
3030 db_printf("%sTF2_MBUF_ACKCMP", comma ? ", " : "");
3031 comma = 1;
3032 }
3033 if (t_flags2 & TF2_SUPPORTS_MBUFQ) {
3034 db_printf("%sTF2_SUPPORTS_MBUFQ", comma ? ", " : "");
3035 comma = 1;
3036 }
3037 if (t_flags2 & TF2_MBUF_QUEUE_READY) {
3038 db_printf("%sTF2_MBUF_QUEUE_READY", comma ? ", " : "");
3039 comma = 1;
3040 }
3041 if (t_flags2 & TF2_DONT_SACK_QUEUE) {
3042 db_printf("%sTF2_DONT_SACK_QUEUE", comma ? ", " : "");
3043 comma = 1;
3044 }
3045 if (t_flags2 & TF2_CANNOT_DO_ECN) {
3046 db_printf("%sTF2_CANNOT_DO_ECN", comma ? ", " : "");
3047 comma = 1;
3048 }
3049 if (t_flags2 & TF2_PROC_SACK_PROHIBIT) {
3050 db_printf("%sTF2_PROC_SACK_PROHIBIT", comma ? ", " : "");
3051 comma = 1;
3052 }
3053 if (t_flags2 & TF2_IPSEC_TSO) {
3054 db_printf("%sTF2_IPSEC_TSO", comma ? ", " : "");
3055 comma = 1;
3056 }
3057 if (t_flags2 & TF2_NO_ISS_CHECK) {
3058 db_printf("%sTF2_NO_ISS_CHECK", comma ? ", " : "");
3059 comma = 1;
3060 }
3061 }
3062
3063 static void
db_print_toobflags(char t_oobflags)3064 db_print_toobflags(char t_oobflags)
3065 {
3066 int comma;
3067
3068 comma = 0;
3069 if (t_oobflags & TCPOOB_HAVEDATA) {
3070 db_printf("%sTCPOOB_HAVEDATA", comma ? ", " : "");
3071 comma = 1;
3072 }
3073 if (t_oobflags & TCPOOB_HADDATA) {
3074 db_printf("%sTCPOOB_HADDATA", comma ? ", " : "");
3075 comma = 1;
3076 }
3077 }
3078
3079 static void
db_print_tcpcb(struct tcpcb * tp,const char * name,int indent)3080 db_print_tcpcb(struct tcpcb *tp, const char *name, int indent)
3081 {
3082
3083 db_print_indent(indent);
3084 db_printf("%s at %p\n", name, tp);
3085
3086 indent += 2;
3087
3088 db_print_indent(indent);
3089 db_printf("t_segq first: %p t_segqlen: %d t_dupacks: %d\n",
3090 TAILQ_FIRST(&tp->t_segq), tp->t_segqlen, tp->t_dupacks);
3091
3092 db_print_indent(indent);
3093 db_printf("t_callout: %p t_timers: %p\n",
3094 &tp->t_callout, &tp->t_timers);
3095
3096 db_print_indent(indent);
3097 db_printf("t_state: %d (", tp->t_state);
3098 db_print_tstate(tp->t_state);
3099 db_printf(")\n");
3100
3101 db_print_indent(indent);
3102 db_printf("t_flags: 0x%x (", tp->t_flags);
3103 db_print_tflags(tp->t_flags);
3104 db_printf(")\n");
3105
3106 db_print_indent(indent);
3107 db_printf("t_flags2: 0x%x (", tp->t_flags2);
3108 db_print_tflags2(tp->t_flags2);
3109 db_printf(")\n");
3110
3111 db_print_indent(indent);
3112 db_printf("snd_una: 0x%08x snd_max: 0x%08x snd_nxt: 0x%08x\n",
3113 tp->snd_una, tp->snd_max, tp->snd_nxt);
3114
3115 db_print_indent(indent);
3116 db_printf("snd_up: 0x%08x snd_wl1: 0x%08x snd_wl2: 0x%08x\n",
3117 tp->snd_up, tp->snd_wl1, tp->snd_wl2);
3118
3119 db_print_indent(indent);
3120 db_printf("iss: 0x%08x irs: 0x%08x rcv_nxt: 0x%08x\n",
3121 tp->iss, tp->irs, tp->rcv_nxt);
3122
3123 db_print_indent(indent);
3124 db_printf("rcv_adv: 0x%08x rcv_wnd: %u rcv_up: 0x%08x\n",
3125 tp->rcv_adv, tp->rcv_wnd, tp->rcv_up);
3126
3127 db_print_indent(indent);
3128 db_printf("snd_wnd: %u snd_cwnd: %u\n",
3129 tp->snd_wnd, tp->snd_cwnd);
3130
3131 db_print_indent(indent);
3132 db_printf("snd_ssthresh: %u snd_recover: "
3133 "0x%08x\n", tp->snd_ssthresh, tp->snd_recover);
3134
3135 db_print_indent(indent);
3136 db_printf("t_rcvtime: %u t_startime: %u\n",
3137 tp->t_rcvtime, tp->t_starttime);
3138
3139 db_print_indent(indent);
3140 db_printf("t_rttime: %u t_rtsq: 0x%08x\n",
3141 tp->t_rtttime, tp->t_rtseq);
3142
3143 db_print_indent(indent);
3144 db_printf("t_rxtcur: %d t_maxseg: %u t_srtt: %d\n",
3145 tp->t_rxtcur, tp->t_maxseg, tp->t_srtt);
3146
3147 db_print_indent(indent);
3148 db_printf("t_rttvar: %d t_rxtshift: %d t_rttmin: %u\n",
3149 tp->t_rttvar, tp->t_rxtshift, tp->t_rttmin);
3150
3151 db_print_indent(indent);
3152 db_printf("t_rttupdated: %u max_sndwnd: %u t_softerror: %d\n",
3153 tp->t_rttupdated, tp->max_sndwnd, tp->t_softerror);
3154
3155 db_print_indent(indent);
3156 db_printf("t_oobflags: 0x%x (", tp->t_oobflags);
3157 db_print_toobflags(tp->t_oobflags);
3158 db_printf(") t_iobc: 0x%02x\n", tp->t_iobc);
3159
3160 db_print_indent(indent);
3161 db_printf("snd_scale: %u rcv_scale: %u request_r_scale: %u\n",
3162 tp->snd_scale, tp->rcv_scale, tp->request_r_scale);
3163
3164 db_print_indent(indent);
3165 db_printf("ts_recent: %u ts_recent_age: %u\n",
3166 tp->ts_recent, tp->ts_recent_age);
3167
3168 db_print_indent(indent);
3169 db_printf("ts_offset: %u last_ack_sent: 0x%08x snd_cwnd_prev: "
3170 "%u\n", tp->ts_offset, tp->last_ack_sent, tp->snd_cwnd_prev);
3171
3172 db_print_indent(indent);
3173 db_printf("snd_ssthresh_prev: %u snd_recover_prev: 0x%08x "
3174 "t_badrxtwin: %u\n", tp->snd_ssthresh_prev,
3175 tp->snd_recover_prev, tp->t_badrxtwin);
3176
3177 db_print_indent(indent);
3178 db_printf("snd_numholes: %d snd_holes first: %p\n",
3179 tp->snd_numholes, TAILQ_FIRST(&tp->snd_holes));
3180
3181 db_print_indent(indent);
3182 db_printf("snd_fack: 0x%08x rcv_numsacks: %d\n",
3183 tp->snd_fack, tp->rcv_numsacks);
3184
3185 /* Skip sackblks, sackhint. */
3186
3187 db_print_indent(indent);
3188 db_printf("t_rttlow: %d rfbuf_ts: %u rfbuf_cnt: %d\n",
3189 tp->t_rttlow, tp->rfbuf_ts, tp->rfbuf_cnt);
3190 }
3191
DB_SHOW_COMMAND(tcpcb,db_show_tcpcb)3192 DB_SHOW_COMMAND(tcpcb, db_show_tcpcb)
3193 {
3194 struct tcpcb *tp;
3195
3196 if (!have_addr) {
3197 db_printf("usage: show tcpcb <addr>\n");
3198 return;
3199 }
3200 tp = (struct tcpcb *)addr;
3201
3202 db_print_tcpcb(tp, "tcpcb", 0);
3203 }
3204 #endif
3205