Lines Matching +full:non +full:- +full:urgent
1 // SPDX-License-Identifier: GPL-2.0-or-later
12 * Corey Minyard <wf-rch!minyard@relay.EU.net>
13 * Florian La Roche, <flla@stud.uni-sb.de>
25 * sk->inuse=1 and was trying to connect
51 * Alan Cox : sk->keepopen now seems to work
53 * Alan Cox : Fixed assorted sk->rqueue->next errors
63 * Alan Cox : FIN with no memory -> CRASH
76 * window non shrink trick.
113 * Alan Cox : Changed the semantics of sk->socket to
164 * Matt Day : poll()->select() match BSD precisely on error
166 * Marc Tamsky : Various sk->prot->retransmits and
167 * sk->retransmits misupdating fixed.
172 * Alan Cox : Look up device on a retransmit - routes may
214 * waiting for final ack in three-way handshake.
333 * Technical note: it is used by multiple contexts non atomically.
349 val--; in tcp_enter_memory_pressure()
364 jiffies_to_msecs(jiffies - val)); in tcp_leave_memory_pressure()
395 while (--retrans) { in retrans_to_secs()
407 u32 rate = READ_ONCE(tp->rate_delivered); in tcp_compute_delivery_rate()
408 u32 intv = READ_ONCE(tp->rate_interval_us); in tcp_compute_delivery_rate()
412 rate64 = (u64)rate * tp->mss_cache * USEC_PER_SEC; in tcp_compute_delivery_rate()
423 if (tp->md5sig_info) { in tcp_md5_destruct_sock()
426 kfree(rcu_replace_pointer(tp->md5sig_info, NULL, 1)); in tcp_md5_destruct_sock()
434 /* Address-family independent initialization for a tcp_sock.
445 tp->out_of_order_queue = RB_ROOT; in tcp_init_sock()
446 sk->tcp_rtx_queue = RB_ROOT; in tcp_init_sock()
448 INIT_LIST_HEAD(&tp->tsq_node); in tcp_init_sock()
449 INIT_LIST_HEAD(&tp->tsorted_sent_queue); in tcp_init_sock()
451 icsk->icsk_rto = TCP_TIMEOUT_INIT; in tcp_init_sock()
453 rto_max_ms = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_rto_max_ms); in tcp_init_sock()
454 icsk->icsk_rto_max = msecs_to_jiffies(rto_max_ms); in tcp_init_sock()
456 rto_min_us = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_rto_min_us); in tcp_init_sock()
457 icsk->icsk_rto_min = usecs_to_jiffies(rto_min_us); in tcp_init_sock()
458 icsk->icsk_delack_max = TCP_DELACK_MAX; in tcp_init_sock()
459 tp->mdev_us = jiffies_to_usecs(TCP_TIMEOUT_INIT); in tcp_init_sock()
460 minmax_reset(&tp->rtt_min, tcp_jiffies32, ~0U); in tcp_init_sock()
463 * initial SYN frame in their delayed-ACK and congestion control in tcp_init_sock()
465 * efficiently to them. -DaveM in tcp_init_sock()
470 tp->app_limited = ~0U; in tcp_init_sock()
471 tp->rate_app_limited = 1; in tcp_init_sock()
473 /* See draft-stevens-tcpca-spec-01 for discussion of the in tcp_init_sock()
476 tp->snd_ssthresh = TCP_INFINITE_SSTHRESH; in tcp_init_sock()
477 tp->snd_cwnd_clamp = ~0; in tcp_init_sock()
478 tp->mss_cache = TCP_MSS_DEFAULT; in tcp_init_sock()
480 tp->reordering = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reordering); in tcp_init_sock()
483 tp->tsoffset = 0; in tcp_init_sock()
484 tp->rack.reo_wnd_steps = 1; in tcp_init_sock()
486 sk->sk_write_space = sk_stream_write_space; in tcp_init_sock()
489 icsk->icsk_sync_mss = tcp_sync_mss; in tcp_init_sock()
491 WRITE_ONCE(sk->sk_sndbuf, READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_wmem[1])); in tcp_init_sock()
492 WRITE_ONCE(sk->sk_rcvbuf, READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_rmem[1])); in tcp_init_sock()
495 set_bit(SOCK_SUPPORT_ZC, &sk->sk_socket->flags); in tcp_init_sock()
497 xa_init_flags(&sk->sk_user_frags, XA_FLAGS_ALLOC1); in tcp_init_sock()
504 u32 tsflags = sockc->tsflags; in tcp_tx_timestamp()
510 sock_tx_timestamp(sk, sockc, &shinfo->tx_flags); in tcp_tx_timestamp()
512 tcb->txstamp_ack |= TSTAMP_ACK_SK; in tcp_tx_timestamp()
514 shinfo->tskey = TCP_SKB_CB(skb)->seq + skb->len - 1; in tcp_tx_timestamp()
539 struct sock *sk = sock->sk; in tcp_poll()
568 * Check-me. in tcp_poll()
573 * in state CLOSE_WAIT. One solution is evident --- to set EPOLLHUP in tcp_poll()
579 * why EPOLLHUP is incompatible with EPOLLOUT. --ANK in tcp_poll()
582 * blocking on fresh not-connected or disconnected socket. --ANK in tcp_poll()
584 shutdown = READ_ONCE(sk->sk_shutdown); in tcp_poll()
592 (state != TCP_SYN_RECV || rcu_access_pointer(tp->fastopen_rsk))) { in tcp_poll()
594 u16 urg_data = READ_ONCE(tp->urg_data); in tcp_poll()
597 READ_ONCE(tp->urg_seq) == READ_ONCE(tp->copied_seq) && in tcp_poll()
609 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); in tcp_poll()
635 if (READ_ONCE(sk->sk_err) || in tcp_poll()
636 !skb_queue_empty_lockless(&sk->sk_error_queue)) in tcp_poll()
651 if (sk->sk_state == TCP_LISTEN) in tcp_ioctl()
652 return -EINVAL; in tcp_ioctl()
659 answ = READ_ONCE(tp->urg_data) && in tcp_ioctl()
660 READ_ONCE(tp->urg_seq) == READ_ONCE(tp->copied_seq); in tcp_ioctl()
663 if (sk->sk_state == TCP_LISTEN) in tcp_ioctl()
664 return -EINVAL; in tcp_ioctl()
666 if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) in tcp_ioctl()
669 answ = READ_ONCE(tp->write_seq) - tp->snd_una; in tcp_ioctl()
672 if (sk->sk_state == TCP_LISTEN) in tcp_ioctl()
673 return -EINVAL; in tcp_ioctl()
675 if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) in tcp_ioctl()
678 answ = READ_ONCE(tp->write_seq) - in tcp_ioctl()
679 READ_ONCE(tp->snd_nxt); in tcp_ioctl()
682 return -ENOIOCTLCMD; in tcp_ioctl()
692 TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_PSH; in tcp_mark_push()
693 tp->pushed_seq = tp->write_seq; in tcp_mark_push()
698 return after(tp->write_seq, tp->pushed_seq + (tp->max_window >> 1)); in forced_push()
706 tcb->seq = tcb->end_seq = tp->write_seq; in tcp_skb_entail()
707 tcb->tcp_flags = TCPHDR_ACK; in tcp_skb_entail()
711 sk_wmem_queued_add(sk, skb->truesize); in tcp_skb_entail()
712 sk_mem_charge(sk, skb->truesize); in tcp_skb_entail()
713 if (tp->nonagle & TCP_NAGLE_PUSH) in tcp_skb_entail()
714 tp->nonagle &= ~TCP_NAGLE_PUSH; in tcp_skb_entail()
722 tp->snd_up = tp->write_seq; in tcp_mark_urg()
738 return skb->len < size_goal && in tcp_should_autocork()
739 READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_autocorking) && in tcp_should_autocork()
741 refcount_read(&sk->sk_wmem_alloc) > skb->truesize && in tcp_should_autocork()
762 if (!test_bit(TSQ_THROTTLED, &sk->sk_tsq_flags)) { in tcp_push()
764 set_bit(TSQ_THROTTLED, &sk->sk_tsq_flags); in tcp_push()
770 if (refcount_read(&sk->sk_wmem_alloc) > skb->truesize) in tcp_push()
783 struct tcp_splice_state *tss = rd_desc->arg.data; in tcp_splice_data_recv()
786 ret = skb_splice_bits(skb, skb->sk, offset, tss->pipe, in tcp_splice_data_recv()
787 min(rd_desc->count, len), tss->flags); in tcp_splice_data_recv()
789 rd_desc->count -= ret; in tcp_splice_data_recv()
798 .count = tss->len, in __tcp_splice_read()
805 * tcp_splice_read - splice data from TCP socket to a pipe
820 struct sock *sk = sock->sk; in tcp_splice_read()
835 return -ESPIPE; in tcp_splice_read()
841 timeo = sock_rcvtimeo(sk, sock->file->f_flags & O_NONBLOCK); in tcp_splice_read()
851 if (sk->sk_err) { in tcp_splice_read()
855 if (sk->sk_shutdown & RCV_SHUTDOWN) in tcp_splice_read()
857 if (sk->sk_state == TCP_CLOSE) { in tcp_splice_read()
862 ret = -ENOTCONN; in tcp_splice_read()
866 ret = -EAGAIN; in tcp_splice_read()
873 if (!skb_queue_empty(&sk->sk_receive_queue)) in tcp_splice_read()
884 tss.len -= ret; in tcp_splice_read()
892 if (sk->sk_err || sk->sk_state == TCP_CLOSE || in tcp_splice_read()
893 (sk->sk_shutdown & RCV_SHUTDOWN) || in tcp_splice_read()
916 skb->truesize = SKB_TRUESIZE(skb_end_offset(skb)); in tcp_stream_alloc_skb()
919 sk_forced_mem_schedule(sk, skb->truesize); in tcp_stream_alloc_skb()
921 mem_scheduled = sk_wmem_schedule(sk, skb->truesize); in tcp_stream_alloc_skb()
925 skb->ip_summed = CHECKSUM_PARTIAL; in tcp_stream_alloc_skb()
926 INIT_LIST_HEAD(&skb->tcp_tsorted_anchor); in tcp_stream_alloc_skb()
931 sk->sk_prot->enter_memory_pressure(sk); in tcp_stream_alloc_skb()
947 new_size_goal = tcp_bound_to_half_wnd(tp, sk->sk_gso_max_size); in tcp_xmit_size_goal()
950 size_goal = tp->gso_segs * mss_now; in tcp_xmit_size_goal()
953 tp->gso_segs = min_t(u16, new_size_goal / mss_now, in tcp_xmit_size_goal()
954 sk->sk_gso_max_segs); in tcp_xmit_size_goal()
955 size_goal = tp->gso_segs * mss_now; in tcp_xmit_size_goal()
981 if (skb && TCP_SKB_CB(skb)->seq == TCP_SKB_CB(skb)->end_seq) { in tcp_remove_empty_skb()
993 u32 extra = skb->truesize - in tcp_downgrade_zcopy_pure()
997 return -ENOMEM; in tcp_downgrade_zcopy_pure()
1000 skb_shinfo(skb)->flags &= ~SKBFL_PURE_ZEROCOPY; in tcp_downgrade_zcopy_pure()
1014 * Use whatever is left in sk->sk_forward_alloc and tcp_wmem[0] in tcp_wmem_schedule()
1017 left = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_wmem[0]) - sk->sk_wmem_queued; in tcp_wmem_schedule()
1020 return min(copy, sk->sk_forward_alloc); in tcp_wmem_schedule()
1025 if (tp->fastopen_req) { in tcp_free_fastopen_req()
1026 kfree(tp->fastopen_req); in tcp_free_fastopen_req()
1027 tp->fastopen_req = NULL; in tcp_free_fastopen_req()
1036 struct sockaddr *uaddr = msg->msg_name; in tcp_sendmsg_fastopen()
1039 if (!(READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_fastopen) & in tcp_sendmsg_fastopen()
1041 (uaddr && msg->msg_namelen >= sizeof(uaddr->sa_family) && in tcp_sendmsg_fastopen()
1042 uaddr->sa_family == AF_UNSPEC)) in tcp_sendmsg_fastopen()
1043 return -EOPNOTSUPP; in tcp_sendmsg_fastopen()
1044 if (tp->fastopen_req) in tcp_sendmsg_fastopen()
1045 return -EALREADY; /* Another Fast Open is in progress */ in tcp_sendmsg_fastopen()
1047 tp->fastopen_req = kzalloc(sizeof(struct tcp_fastopen_request), in tcp_sendmsg_fastopen()
1048 sk->sk_allocation); in tcp_sendmsg_fastopen()
1049 if (unlikely(!tp->fastopen_req)) in tcp_sendmsg_fastopen()
1050 return -ENOBUFS; in tcp_sendmsg_fastopen()
1051 tp->fastopen_req->data = msg; in tcp_sendmsg_fastopen()
1052 tp->fastopen_req->size = size; in tcp_sendmsg_fastopen()
1053 tp->fastopen_req->uarg = uarg; in tcp_sendmsg_fastopen()
1060 inet->inet_dport = 0; in tcp_sendmsg_fastopen()
1061 sk->sk_route_caps = 0; in tcp_sendmsg_fastopen()
1064 flags = (msg->msg_flags & MSG_DONTWAIT) ? O_NONBLOCK : 0; in tcp_sendmsg_fastopen()
1065 err = __inet_stream_connect(sk->sk_socket, uaddr, in tcp_sendmsg_fastopen()
1066 msg->msg_namelen, flags, 1); in tcp_sendmsg_fastopen()
1070 if (tp->fastopen_req) { in tcp_sendmsg_fastopen()
1071 *copied = tp->fastopen_req->copied; in tcp_sendmsg_fastopen()
1092 flags = msg->msg_flags; in tcp_sendmsg_locked()
1094 sockc = (struct sockcm_cookie){ .tsflags = READ_ONCE(sk->sk_tsflags) }; in tcp_sendmsg_locked()
1095 if (msg->msg_controllen) { in tcp_sendmsg_locked()
1103 if (msg->msg_ubuf) { in tcp_sendmsg_locked()
1104 uarg = msg->msg_ubuf; in tcp_sendmsg_locked()
1105 if (sk->sk_route_caps & NETIF_F_SG) in tcp_sendmsg_locked()
1112 err = -ENOBUFS; in tcp_sendmsg_locked()
1115 if (sk->sk_route_caps & NETIF_F_SG) in tcp_sendmsg_locked()
1118 uarg_to_msgzc(uarg)->zerocopy = 0; in tcp_sendmsg_locked()
1129 } else if (unlikely(msg->msg_flags & MSG_SPLICE_PAGES) && size) { in tcp_sendmsg_locked()
1130 if (sk->sk_route_caps & NETIF_F_SG) in tcp_sendmsg_locked()
1136 err = -EINVAL; in tcp_sendmsg_locked()
1142 !tp->repair) { in tcp_sendmsg_locked()
1144 if (err == -EINPROGRESS && copied_syn > 0) in tcp_sendmsg_locked()
1152 tcp_rate_check_app_limited(sk); /* is sending application-limited? */ in tcp_sendmsg_locked()
1158 if (((1 << sk->sk_state) & ~(TCPF_ESTABLISHED | TCPF_CLOSE_WAIT)) && in tcp_sendmsg_locked()
1165 if (unlikely(tp->repair)) { in tcp_sendmsg_locked()
1166 if (tp->repair_queue == TCP_RECV_QUEUE) { in tcp_sendmsg_locked()
1171 err = -EINVAL; in tcp_sendmsg_locked()
1172 if (tp->repair_queue == TCP_NO_QUEUE) in tcp_sendmsg_locked()
1192 err = -EPIPE; in tcp_sendmsg_locked()
1193 if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN)) in tcp_sendmsg_locked()
1201 copy = size_goal - skb->len; in tcp_sendmsg_locked()
1218 skb = tcp_stream_alloc_skb(sk, sk->sk_allocation, in tcp_sendmsg_locked()
1226 skb->decrypted = !!(flags & MSG_SENDPAGE_DECRYPTED); in tcp_sendmsg_locked()
1235 if (tp->repair) in tcp_sendmsg_locked()
1236 TCP_SKB_CB(skb)->sacked |= TCPCB_REPAIRED; in tcp_sendmsg_locked()
1245 int i = skb_shinfo(skb)->nr_frags; in tcp_sendmsg_locked()
1251 if (!skb_can_coalesce(skb, i, pfrag->page, in tcp_sendmsg_locked()
1252 pfrag->offset)) { in tcp_sendmsg_locked()
1260 copy = min_t(int, copy, pfrag->size - pfrag->offset); in tcp_sendmsg_locked()
1272 err = skb_copy_to_page_nocache(sk, &msg->msg_iter, skb, in tcp_sendmsg_locked()
1273 pfrag->page, in tcp_sendmsg_locked()
1274 pfrag->offset, in tcp_sendmsg_locked()
1281 skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy); in tcp_sendmsg_locked()
1283 skb_fill_page_desc(skb, i, pfrag->page, in tcp_sendmsg_locked()
1284 pfrag->offset, copy); in tcp_sendmsg_locked()
1285 page_ref_inc(pfrag->page); in tcp_sendmsg_locked()
1287 pfrag->offset += copy; in tcp_sendmsg_locked()
1292 if (!skb->len) in tcp_sendmsg_locked()
1293 skb_shinfo(skb)->flags |= SKBFL_PURE_ZEROCOPY; in tcp_sendmsg_locked()
1303 if (err == -EMSGSIZE || err == -EEXIST) { in tcp_sendmsg_locked()
1318 err = skb_splice_from_iter(skb, &msg->msg_iter, copy); in tcp_sendmsg_locked()
1320 if (err == -EMSGSIZE) { in tcp_sendmsg_locked()
1329 skb_shinfo(skb)->flags |= SKBFL_SHARED_FRAG; in tcp_sendmsg_locked()
1336 TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_PSH; in tcp_sendmsg_locked()
1338 WRITE_ONCE(tp->write_seq, tp->write_seq + copy); in tcp_sendmsg_locked()
1339 TCP_SKB_CB(skb)->end_seq += copy; in tcp_sendmsg_locked()
1345 TCP_SKB_CB(skb)->eor = 1; in tcp_sendmsg_locked()
1349 if (skb->len < size_goal || (flags & MSG_OOB) || unlikely(tp->repair)) in tcp_sendmsg_locked()
1360 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); in tcp_sendmsg_locked()
1376 tcp_push(sk, flags, mss_now, tp->nonagle, size_goal); in tcp_sendmsg_locked()
1379 /* msg->msg_ubuf is pinned by the caller so we don't take extra refs */ in tcp_sendmsg_locked()
1380 if (uarg && !msg->msg_ubuf) in tcp_sendmsg_locked()
1392 /* msg->msg_ubuf is pinned by the caller so we don't take extra refs */ in tcp_sendmsg_locked()
1393 if (uarg && !msg->msg_ubuf) in tcp_sendmsg_locked()
1397 if (unlikely(tcp_rtx_and_write_queues_empty(sk) && err == -EAGAIN)) { in tcp_sendmsg_locked()
1398 sk->sk_write_space(sk); in tcp_sendmsg_locked()
1422 struct sock *sk = sock->sk; in tcp_splice_eof()
1431 tcp_push(sk, 0, mss_now, tp->nonagle, size_goal); in tcp_splice_eof()
1437 * Handle reading urgent data. BSD has very simple semantics for
1446 if (sock_flag(sk, SOCK_URGINLINE) || !tp->urg_data || in tcp_recv_urg()
1447 tp->urg_data == TCP_URG_READ) in tcp_recv_urg()
1448 return -EINVAL; /* Yes this is right ! */ in tcp_recv_urg()
1450 if (sk->sk_state == TCP_CLOSE && !sock_flag(sk, SOCK_DONE)) in tcp_recv_urg()
1451 return -ENOTCONN; in tcp_recv_urg()
1453 if (tp->urg_data & TCP_URG_VALID) { in tcp_recv_urg()
1455 char c = tp->urg_data; in tcp_recv_urg()
1458 WRITE_ONCE(tp->urg_data, TCP_URG_READ); in tcp_recv_urg()
1460 /* Read urgent data. */ in tcp_recv_urg()
1461 msg->msg_flags |= MSG_OOB; in tcp_recv_urg()
1468 msg->msg_flags |= MSG_TRUNC; in tcp_recv_urg()
1470 return err ? -EFAULT : len; in tcp_recv_urg()
1473 if (sk->sk_state == TCP_CLOSE || (sk->sk_shutdown & RCV_SHUTDOWN)) in tcp_recv_urg()
1480 * Mike <pall@rz.uni-karlsruhe.de> in tcp_recv_urg()
1482 return -EAGAIN; in tcp_recv_urg()
1490 skb_rbtree_walk(skb, &sk->tcp_rtx_queue) { in tcp_peek_sndq()
1491 err = skb_copy_datagram_msg(skb, 0, msg, skb->len); in tcp_peek_sndq()
1494 copied += skb->len; in tcp_peek_sndq()
1497 skb_queue_walk(&sk->sk_write_queue, skb) { in tcp_peek_sndq()
1498 err = skb_copy_datagram_msg(skb, 0, msg, skb->len); in tcp_peek_sndq()
1502 copied += skb->len; in tcp_peek_sndq()
1522 if (/* Once-per-two-segments ACK was not sent by tcp_input.c */ in __tcp_cleanup_rbuf()
1523 tp->rcv_nxt - tp->rcv_wup > icsk->icsk_ack.rcv_mss || in __tcp_cleanup_rbuf()
1531 ((icsk->icsk_ack.pending & ICSK_ACK_PUSHED2) || in __tcp_cleanup_rbuf()
1532 ((icsk->icsk_ack.pending & ICSK_ACK_PUSHED) && in __tcp_cleanup_rbuf()
1534 !atomic_read(&sk->sk_rmem_alloc))) in __tcp_cleanup_rbuf()
1538 /* We send an ACK if we can now advertise a non-zero window in __tcp_cleanup_rbuf()
1544 if (copied > 0 && !time_to_ack && !(sk->sk_shutdown & RCV_SHUTDOWN)) { in __tcp_cleanup_rbuf()
1548 if (2*rcv_window_now <= tp->window_clamp) { in __tcp_cleanup_rbuf()
1566 struct sk_buff *skb = skb_peek(&sk->sk_receive_queue); in tcp_cleanup_rbuf()
1569 WARN(skb && !before(tp->copied_seq, TCP_SKB_CB(skb)->end_seq), in tcp_cleanup_rbuf()
1571 tp->copied_seq, TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt); in tcp_cleanup_rbuf()
1577 __skb_unlink(skb, &sk->sk_receive_queue); in tcp_eat_recv_skb()
1578 if (likely(skb->destructor == sock_rfree)) { in tcp_eat_recv_skb()
1580 skb->destructor = NULL; in tcp_eat_recv_skb()
1581 skb->sk = NULL; in tcp_eat_recv_skb()
1592 while ((skb = skb_peek(&sk->sk_receive_queue)) != NULL) { in tcp_recv_skb()
1593 offset = seq - TCP_SKB_CB(skb)->seq; in tcp_recv_skb()
1594 if (unlikely(TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN)) { in tcp_recv_skb()
1596 offset--; in tcp_recv_skb()
1598 if (offset < skb->len || (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)) { in tcp_recv_skb()
1617 * - It is assumed that the socket was locked by the caller.
1618 * - The routine does not block.
1619 * - At present, there is no support for reading OOB data
1633 if (sk->sk_state == TCP_LISTEN) in __tcp_read_sock()
1634 return -ENOTCONN; in __tcp_read_sock()
1636 if (offset < skb->len) { in __tcp_read_sock()
1640 len = skb->len - offset; in __tcp_read_sock()
1641 /* Stop reading if we hit a patch of urgent data */ in __tcp_read_sock()
1642 if (unlikely(tp->urg_data)) { in __tcp_read_sock()
1643 u32 urg_offset = tp->urg_seq - seq; in __tcp_read_sock()
1666 skb = tcp_recv_skb(sk, seq - 1, &offset); in __tcp_read_sock()
1672 if (offset + 1 != skb->len) in __tcp_read_sock()
1675 if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) { in __tcp_read_sock()
1681 if (!desc->count) in __tcp_read_sock()
1705 &tcp_sk(sk)->copied_seq); in tcp_read_sock()
1721 if (sk->sk_state == TCP_LISTEN) in tcp_read_skb()
1722 return -ENOTCONN; in tcp_read_skb()
1724 while ((skb = skb_peek(&sk->sk_receive_queue)) != NULL) { in tcp_read_skb()
1728 __skb_unlink(skb, &sk->sk_receive_queue); in tcp_read_skb()
1730 tcp_flags = TCP_SKB_CB(skb)->tcp_flags; in tcp_read_skb()
1749 u32 seq = tp->copied_seq; in tcp_read_done()
1754 if (sk->sk_state == TCP_LISTEN) in tcp_read_done()
1761 used = min_t(size_t, skb->len - offset, left); in tcp_read_done()
1763 left -= used; in tcp_read_done()
1765 if (skb->len > offset + used) in tcp_read_done()
1768 if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) { in tcp_read_done()
1775 WRITE_ONCE(tp->copied_seq, seq); in tcp_read_done()
1781 tcp_cleanup_rbuf(sk, len - left); in tcp_read_done()
1787 return tcp_inq(sock->sk); in tcp_peek_len()
1797 if (sk->sk_userlocks & SOCK_RCVBUF_LOCK) in tcp_set_rcvlowat()
1798 cap = sk->sk_rcvbuf >> 1; in tcp_set_rcvlowat()
1800 cap = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_rmem[2]) >> 1; in tcp_set_rcvlowat()
1802 WRITE_ONCE(sk->sk_rcvlowat, val ? : 1); in tcp_set_rcvlowat()
1807 if (sk->sk_userlocks & SOCK_RCVBUF_LOCK) in tcp_set_rcvlowat()
1811 if (space > sk->sk_rcvbuf) { in tcp_set_rcvlowat()
1812 WRITE_ONCE(sk->sk_rcvbuf, space); in tcp_set_rcvlowat()
1814 if (tp->window_clamp && tp->window_clamp < val) in tcp_set_rcvlowat()
1815 WRITE_ONCE(tp->window_clamp, val); in tcp_set_rcvlowat()
1824 if (skb->tstamp) in tcp_update_recv_tstamps()
1825 tss->ts[0] = ktime_to_timespec64(skb->tstamp); in tcp_update_recv_tstamps()
1827 tss->ts[0] = (struct timespec64) {0}; in tcp_update_recv_tstamps()
1829 if (skb_hwtstamps(skb)->hwtstamp) in tcp_update_recv_tstamps()
1830 tss->ts[2] = ktime_to_timespec64(skb_hwtstamps(skb)->hwtstamp); in tcp_update_recv_tstamps()
1832 tss->ts[2] = (struct timespec64) {0}; in tcp_update_recv_tstamps()
1842 if (vma->vm_flags & (VM_WRITE | VM_EXEC)) in tcp_mmap()
1843 return -EPERM; in tcp_mmap()
1849 vma->vm_ops = &tcp_vm_ops; in tcp_mmap()
1859 if (unlikely(offset_skb >= skb->len)) in skb_advance_to_frag()
1862 offset_skb -= skb_headlen(skb); in skb_advance_to_frag()
1866 frag = skb_shinfo(skb)->frags; in skb_advance_to_frag()
1872 offset_skb -= skb_frag_size(frag); in skb_advance_to_frag()
1888 if (PageCompound(page) || page->mapping) in can_map_frag()
1918 zc->recv_skip_hint = skb->len - offset; in tcp_zerocopy_set_hint_for_skb()
1929 if (frag == &info->frags[info->nr_frags - 1]) in tcp_zerocopy_set_hint_for_skb()
1933 partial_frag_remainder = skb_frag_size(frag) - frag_offset; in tcp_zerocopy_set_hint_for_skb()
1934 zc->recv_skip_hint -= partial_frag_remainder; in tcp_zerocopy_set_hint_for_skb()
1942 mappable_offset = find_next_mappable_frag(frag, zc->recv_skip_hint); in tcp_zerocopy_set_hint_for_skb()
1943 zc->recv_skip_hint = mappable_offset + partial_frag_remainder; in tcp_zerocopy_set_hint_for_skb()
1953 unsigned long copy_address = (unsigned long)zc->copybuf_address; in receive_fallback_to_copy()
1957 zc->length = 0; in receive_fallback_to_copy()
1958 zc->recv_skip_hint = 0; in receive_fallback_to_copy()
1960 if (copy_address != zc->copybuf_address) in receive_fallback_to_copy()
1961 return -EINVAL; in receive_fallback_to_copy()
1969 tss, &zc->msg_flags); in receive_fallback_to_copy()
1973 zc->copybuf_len = err; in receive_fallback_to_copy()
1974 if (likely(zc->copybuf_len)) { in receive_fallback_to_copy()
1978 skb = tcp_recv_skb(sk, tcp_sk(sk)->copied_seq, &offset); in receive_fallback_to_copy()
1989 unsigned long copy_address = (unsigned long)zc->copybuf_address; in tcp_copy_straggler_data()
1993 if (copy_address != zc->copybuf_address) in tcp_copy_straggler_data()
1994 return -EINVAL; in tcp_copy_straggler_data()
2003 zc->recv_skip_hint -= copylen; in tcp_copy_straggler_data()
2016 u32 offset, copylen = min_t(u32, copybuf_len, zc->recv_skip_hint); in tcp_zc_handle_leftover()
2022 offset = *seq - TCP_SKB_CB(skb)->seq; in tcp_zc_handle_leftover()
2025 if (TCP_SKB_CB(skb)->has_rxtstamp) { in tcp_zc_handle_leftover()
2027 zc->msg_flags |= TCP_CMSG_TS; in tcp_zc_handle_leftover()
2031 zc->copybuf_len = tcp_copy_straggler_data(zc, skb, copylen, &offset, in tcp_zc_handle_leftover()
2033 return zc->copybuf_len < 0 ? 0 : copylen; in tcp_zc_handle_leftover()
2047 if (err == -EBUSY && in tcp_zerocopy_vm_insert_batch_error()
2048 zc->flags & TCP_RECEIVE_ZEROCOPY_FLAG_TLB_CLEAN_HINT) { in tcp_zerocopy_vm_insert_batch_error()
2051 maybe_zap_len = total_bytes_to_map - /* All bytes to map */ in tcp_zerocopy_vm_insert_batch_error()
2066 bytes_mapped = PAGE_SIZE * (leftover_pages - pages_remaining); in tcp_zerocopy_vm_insert_batch_error()
2078 *length -= bytes_not_mapped; in tcp_zerocopy_vm_insert_batch_error()
2079 zc->recv_skip_hint += bytes_not_mapped; in tcp_zerocopy_vm_insert_batch_error()
2099 pages_mapped = pages_to_map - (unsigned int)pages_remaining; in tcp_zerocopy_vm_insert_batch()
2124 msg_control_addr = (unsigned long)zc->msg_control; in tcp_zc_finalize_rx_tstamp()
2127 (__kernel_size_t)zc->msg_controllen; in tcp_zc_finalize_rx_tstamp()
2131 zc->msg_flags = 0; in tcp_zc_finalize_rx_tstamp()
2132 if (zc->msg_control == msg_control_addr && in tcp_zc_finalize_rx_tstamp()
2133 zc->msg_controllen == cmsg_dummy.msg_controllen) { in tcp_zc_finalize_rx_tstamp()
2135 zc->msg_control = (__u64) in tcp_zc_finalize_rx_tstamp()
2137 zc->msg_controllen = in tcp_zc_finalize_rx_tstamp()
2139 zc->msg_flags = (__u32)cmsg_dummy.msg_flags; in tcp_zc_finalize_rx_tstamp()
2150 if (vma->vm_ops != &tcp_vm_ops) { in find_tcp_vma()
2160 if (!vma || vma->vm_ops != &tcp_vm_ops) { in find_tcp_vma()
2174 unsigned long address = (unsigned long)zc->address; in tcp_zerocopy_receive()
2176 s32 copybuf_len = zc->copybuf_len; in tcp_zerocopy_receive()
2182 u32 seq = tp->copied_seq; in tcp_zerocopy_receive()
2188 zc->copybuf_len = 0; in tcp_zerocopy_receive()
2189 zc->msg_flags = 0; in tcp_zerocopy_receive()
2191 if (address & (PAGE_SIZE - 1) || address != zc->address) in tcp_zerocopy_receive()
2192 return -EINVAL; in tcp_zerocopy_receive()
2194 if (sk->sk_state == TCP_LISTEN) in tcp_zerocopy_receive()
2195 return -ENOTCONN; in tcp_zerocopy_receive()
2203 zc->length = 0; in tcp_zerocopy_receive()
2204 zc->recv_skip_hint = inq; in tcp_zerocopy_receive()
2206 return -EIO; in tcp_zerocopy_receive()
2210 vma = find_tcp_vma(current->mm, address, &mmap_locked); in tcp_zerocopy_receive()
2212 return -EINVAL; in tcp_zerocopy_receive()
2214 vma_len = min_t(unsigned long, zc->length, vma->vm_end - address); in tcp_zerocopy_receive()
2216 total_bytes_to_map = avail_len & ~(PAGE_SIZE - 1); in tcp_zerocopy_receive()
2218 if (!(zc->flags & TCP_RECEIVE_ZEROCOPY_FLAG_TLB_CLEAN_HINT)) in tcp_zerocopy_receive()
2221 zc->length = total_bytes_to_map; in tcp_zerocopy_receive()
2222 zc->recv_skip_hint = 0; in tcp_zerocopy_receive()
2224 zc->length = avail_len; in tcp_zerocopy_receive()
2225 zc->recv_skip_hint = avail_len; in tcp_zerocopy_receive()
2228 while (length + PAGE_SIZE <= zc->length) { in tcp_zerocopy_receive()
2232 if (zc->recv_skip_hint < PAGE_SIZE) { in tcp_zerocopy_receive()
2236 if (zc->recv_skip_hint > 0) in tcp_zerocopy_receive()
2238 skb = skb->next; in tcp_zerocopy_receive()
2239 offset = seq - TCP_SKB_CB(skb)->seq; in tcp_zerocopy_receive()
2247 if (TCP_SKB_CB(skb)->has_rxtstamp) { in tcp_zerocopy_receive()
2249 zc->msg_flags |= TCP_CMSG_TS; in tcp_zerocopy_receive()
2251 zc->recv_skip_hint = skb->len - offset; in tcp_zerocopy_receive()
2258 zc->recv_skip_hint); in tcp_zerocopy_receive()
2260 zc->recv_skip_hint = mappable_offset; in tcp_zerocopy_receive()
2270 zc->recv_skip_hint -= PAGE_SIZE; in tcp_zerocopy_receive()
2273 zc->recv_skip_hint < PAGE_SIZE) { in tcp_zerocopy_receive()
2294 mmap_read_unlock(current->mm); in tcp_zerocopy_receive()
2302 WRITE_ONCE(tp->copied_seq, seq); in tcp_zerocopy_receive()
2309 if (length == zc->length) in tcp_zerocopy_receive()
2310 zc->recv_skip_hint = 0; in tcp_zerocopy_receive()
2312 if (!zc->recv_skip_hint && sock_flag(sk, SOCK_DONE)) in tcp_zerocopy_receive()
2313 ret = -EIO; in tcp_zerocopy_receive()
2315 zc->length = length; in tcp_zerocopy_receive()
2325 u32 tsflags = READ_ONCE(sk->sk_tsflags); in tcp_recv_timestamp()
2328 if (tss->ts[0].tv_sec || tss->ts[0].tv_nsec) { in tcp_recv_timestamp()
2333 .tv_sec = tss->ts[0].tv_sec, in tcp_recv_timestamp()
2334 .tv_nsec = tss->ts[0].tv_nsec, in tcp_recv_timestamp()
2340 .tv_sec = tss->ts[0].tv_sec, in tcp_recv_timestamp()
2341 .tv_nsec = tss->ts[0].tv_nsec, in tcp_recv_timestamp()
2349 .tv_sec = tss->ts[0].tv_sec, in tcp_recv_timestamp()
2350 .tv_usec = tss->ts[0].tv_nsec / 1000, in tcp_recv_timestamp()
2356 .tv_sec = tss->ts[0].tv_sec, in tcp_recv_timestamp()
2357 .tv_usec = tss->ts[0].tv_nsec / 1000, in tcp_recv_timestamp()
2370 tss->ts[0] = (struct timespec64) {0}; in tcp_recv_timestamp()
2373 if (tss->ts[2].tv_sec || tss->ts[2].tv_nsec) { in tcp_recv_timestamp()
2379 tss->ts[2] = (struct timespec64) {0}; in tcp_recv_timestamp()
2383 tss->ts[1] = (struct timespec64) {0}; in tcp_recv_timestamp()
2394 u32 copied_seq = READ_ONCE(tp->copied_seq); in tcp_inq_hint()
2395 u32 rcv_nxt = READ_ONCE(tp->rcv_nxt); in tcp_inq_hint()
2398 inq = rcv_nxt - copied_seq; in tcp_inq_hint()
2399 if (unlikely(inq < 0 || copied_seq != READ_ONCE(tp->copied_seq))) { in tcp_inq_hint()
2401 inq = tp->rcv_nxt - tp->copied_seq; in tcp_inq_hint()
2404 /* After receiving a FIN, tell the user-space to continue reading in tcp_inq_hint()
2405 * by returning a non-zero inq. in tcp_inq_hint()
2425 for (i = 0; i < p->idx; i++) in tcp_xa_pool_commit_locked()
2426 __xa_cmpxchg(&sk->sk_user_frags, p->tokens[i], XA_ZERO_ENTRY, in tcp_xa_pool_commit_locked()
2427 (__force void *)p->netmems[i], GFP_KERNEL); in tcp_xa_pool_commit_locked()
2428 /* Rollback what has been pre-allocated and is no longer needed. */ in tcp_xa_pool_commit_locked()
2429 for (; i < p->max; i++) in tcp_xa_pool_commit_locked()
2430 __xa_erase(&sk->sk_user_frags, p->tokens[i]); in tcp_xa_pool_commit_locked()
2432 p->max = 0; in tcp_xa_pool_commit_locked()
2433 p->idx = 0; in tcp_xa_pool_commit_locked()
2438 if (!p->max) in tcp_xa_pool_commit()
2441 xa_lock_bh(&sk->sk_user_frags); in tcp_xa_pool_commit()
2445 xa_unlock_bh(&sk->sk_user_frags); in tcp_xa_pool_commit()
2453 if (p->idx < p->max) in tcp_xa_pool_refill()
2456 xa_lock_bh(&sk->sk_user_frags); in tcp_xa_pool_refill()
2461 err = __xa_alloc(&sk->sk_user_frags, &p->tokens[k], in tcp_xa_pool_refill()
2467 xa_unlock_bh(&sk->sk_user_frags); in tcp_xa_pool_refill()
2469 p->max = k; in tcp_xa_pool_refill()
2470 p->idx = 0; in tcp_xa_pool_refill()
2474 /* On error, returns the -errno. On success, returns number of bytes sent to the
2494 err = -ENODEV; in tcp_recvmsg_dmabuf()
2499 copy = start - offset; in tcp_recvmsg_dmabuf()
2503 n = copy_to_iter(skb->data + offset, copy, in tcp_recvmsg_dmabuf()
2504 &msg->msg_iter); in tcp_recvmsg_dmabuf()
2506 err = -EFAULT; in tcp_recvmsg_dmabuf()
2511 remaining_len -= copy; in tcp_recvmsg_dmabuf()
2534 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { in tcp_recvmsg_dmabuf()
2535 skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; in tcp_recvmsg_dmabuf()
2548 net_err_ratelimited("Found non-dmabuf skb with net_iov"); in tcp_recvmsg_dmabuf()
2549 err = -ENODEV; in tcp_recvmsg_dmabuf()
2555 err = -ENODEV; in tcp_recvmsg_dmabuf()
2560 copy = end - offset; in tcp_recvmsg_dmabuf()
2566 skb_frag_off(frag) + offset - in tcp_recvmsg_dmabuf()
2571 skb_shinfo(skb)->nr_frags - i); in tcp_recvmsg_dmabuf()
2580 remaining_len -= copy; in tcp_recvmsg_dmabuf()
2589 atomic_long_inc(&niov->pp_ref_count); in tcp_recvmsg_dmabuf()
2607 skb = skb_shinfo(skb)->frag_list ?: skb->next; in tcp_recvmsg_dmabuf()
2609 offset = offset - start; in tcp_recvmsg_dmabuf()
2613 err = -EFAULT; in tcp_recvmsg_dmabuf()
2629 * tricks with *seq access order and skb->users are not required.
2638 int last_copied_dmabuf = -1; /* uninitialized */ in tcp_recvmsg_locked()
2650 err = -ENOTCONN; in tcp_recvmsg_locked()
2651 if (sk->sk_state == TCP_LISTEN) in tcp_recvmsg_locked()
2654 if (tp->recvmsg_inq) { in tcp_recvmsg_locked()
2656 msg->msg_get_inq = 1; in tcp_recvmsg_locked()
2660 /* Urgent data needs to be handled specially. */ in tcp_recvmsg_locked()
2664 if (unlikely(tp->repair)) { in tcp_recvmsg_locked()
2665 err = -EPERM; in tcp_recvmsg_locked()
2669 if (tp->repair_queue == TCP_SEND_QUEUE) in tcp_recvmsg_locked()
2672 err = -EINVAL; in tcp_recvmsg_locked()
2673 if (tp->repair_queue == TCP_NO_QUEUE) in tcp_recvmsg_locked()
2676 /* 'common' recv queue MSG_PEEK-ing */ in tcp_recvmsg_locked()
2679 seq = &tp->copied_seq; in tcp_recvmsg_locked()
2682 peek_seq = tp->copied_seq + peek_offset; in tcp_recvmsg_locked()
2691 /* Are we at urgent data? Stop if we have read anything or have SIGURG pending. */ in tcp_recvmsg_locked()
2692 if (unlikely(tp->urg_data) && tp->urg_seq == *seq) { in tcp_recvmsg_locked()
2696 copied = timeo ? sock_intr_errno(timeo) : -EAGAIN; in tcp_recvmsg_locked()
2703 last = skb_peek_tail(&sk->sk_receive_queue); in tcp_recvmsg_locked()
2704 skb_queue_walk(&sk->sk_receive_queue, skb) { in tcp_recvmsg_locked()
2709 if (WARN(before(*seq, TCP_SKB_CB(skb)->seq), in tcp_recvmsg_locked()
2711 *seq, TCP_SKB_CB(skb)->seq, tp->rcv_nxt, in tcp_recvmsg_locked()
2715 offset = *seq - TCP_SKB_CB(skb)->seq; in tcp_recvmsg_locked()
2716 if (unlikely(TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN)) { in tcp_recvmsg_locked()
2718 offset--; in tcp_recvmsg_locked()
2720 if (offset < skb->len) in tcp_recvmsg_locked()
2722 if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) in tcp_recvmsg_locked()
2726 *seq, TCP_SKB_CB(skb)->seq, tp->rcv_nxt, flags); in tcp_recvmsg_locked()
2731 if (copied >= target && !READ_ONCE(sk->sk_backlog.tail)) in tcp_recvmsg_locked()
2736 sk->sk_err || in tcp_recvmsg_locked()
2737 sk->sk_state == TCP_CLOSE || in tcp_recvmsg_locked()
2738 (sk->sk_shutdown & RCV_SHUTDOWN) || in tcp_recvmsg_locked()
2745 if (sk->sk_err) { in tcp_recvmsg_locked()
2750 if (sk->sk_shutdown & RCV_SHUTDOWN) in tcp_recvmsg_locked()
2753 if (sk->sk_state == TCP_CLOSE) { in tcp_recvmsg_locked()
2757 copied = -ENOTCONN; in tcp_recvmsg_locked()
2762 copied = -EAGAIN; in tcp_recvmsg_locked()
2785 (peek_seq - peek_offset - copied - urg_hole != tp->copied_seq)) { in tcp_recvmsg_locked()
2787 current->comm, in tcp_recvmsg_locked()
2789 peek_seq = tp->copied_seq + peek_offset; in tcp_recvmsg_locked()
2795 used = skb->len - offset; in tcp_recvmsg_locked()
2799 /* Do we have urgent data here? */ in tcp_recvmsg_locked()
2800 if (unlikely(tp->urg_data)) { in tcp_recvmsg_locked()
2801 u32 urg_offset = tp->urg_seq - *seq; in tcp_recvmsg_locked()
2808 used--; in tcp_recvmsg_locked()
2818 if (last_copied_dmabuf != -1 && in tcp_recvmsg_locked()
2828 copied = -EFAULT; in tcp_recvmsg_locked()
2837 copied = -EFAULT; in tcp_recvmsg_locked()
2858 len -= used; in tcp_recvmsg_locked()
2866 if (unlikely(tp->urg_data) && after(tp->copied_seq, tp->urg_seq)) { in tcp_recvmsg_locked()
2867 WRITE_ONCE(tp->urg_data, 0); in tcp_recvmsg_locked()
2871 if (TCP_SKB_CB(skb)->has_rxtstamp) { in tcp_recvmsg_locked()
2876 if (used + offset < skb->len) in tcp_recvmsg_locked()
2879 if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) in tcp_recvmsg_locked()
2894 * on connected socket. I was just happy when found this 8) --ANK in tcp_recvmsg_locked()
2923 skb_queue_empty_lockless(&sk->sk_receive_queue) && in tcp_recvmsg()
2924 sk->sk_state == TCP_ESTABLISHED) in tcp_recvmsg()
2931 if ((cmsg_flags || msg->msg_get_inq) && ret >= 0) { in tcp_recvmsg()
2934 if (msg->msg_get_inq) { in tcp_recvmsg()
2935 msg->msg_inq = tcp_inq_hint(sk); in tcp_recvmsg()
2938 sizeof(msg->msg_inq), &msg->msg_inq); in tcp_recvmsg()
2947 int oldstate = sk->sk_state; in tcp_set_state()
2977 * above-mentioned anonymous enum in the vmlinux DWARF and hence BTF in tcp_set_state()
2999 sk->sk_prot->unhash(sk); in tcp_set_state()
3000 if (inet_csk(sk)->icsk_bind_hash && in tcp_set_state()
3001 !(sk->sk_userlocks & SOCK_BINDPORT_LOCK)) in tcp_set_state()
3042 int next = (int)new_state[sk->sk_state]; in tcp_close_state()
3065 if ((1 << sk->sk_state) & in tcp_shutdown()
3103 if (sk->sk_wmem_queued > SOCK_MIN_SNDBUF && in tcp_out_of_memory()
3119 net_info_ratelimited("out of memory -- consider tuning tcp_mem\n"); in tcp_check_oom()
3129 WRITE_ONCE(sk->sk_shutdown, SHUTDOWN_MASK); in __tcp_close()
3131 if (sk->sk_state == TCP_LISTEN) { in __tcp_close()
3141 * descriptor close, not protocol-sourced closes, because the in __tcp_close()
3144 while ((skb = skb_peek(&sk->sk_receive_queue)) != NULL) { in __tcp_close()
3145 u32 end_seq = TCP_SKB_CB(skb)->end_seq; in __tcp_close()
3147 if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) in __tcp_close()
3148 end_seq--; in __tcp_close()
3149 if (after(end_seq, tcp_sk(sk)->copied_seq)) in __tcp_close()
3154 /* If socket has been already reset (e.g. in tcp_reset()) - kill it. */ in __tcp_close()
3155 if (sk->sk_state == TCP_CLOSE) in __tcp_close()
3162 * advertise a zero window, then kill -9 the FTP client, wheee... in __tcp_close()
3165 if (unlikely(tcp_sk(sk)->repair)) { in __tcp_close()
3166 sk->sk_prot->disconnect(sk, 0); in __tcp_close()
3171 tcp_send_active_reset(sk, sk->sk_allocation, in __tcp_close()
3173 } else if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) { in __tcp_close()
3175 sk->sk_prot->disconnect(sk, 0); in __tcp_close()
3182 /* RED-PEN. Formally speaking, we have broken TCP state in __tcp_close()
3185 * TCP_ESTABLISHED -> TCP_FIN_WAIT1 in __tcp_close()
3186 * TCP_SYN_RECV -> TCP_FIN_WAIT1 (it is difficult) in __tcp_close()
3187 * TCP_CLOSE_WAIT -> TCP_LAST_ACK in __tcp_close()
3193 * if Linux state is FIN-WAIT-1, but FIN is still not sent. in __tcp_close()
3196 * we enter time-wait state, when it is not required really in __tcp_close()
3201 * --ANK in __tcp_close()
3202 * XXX (TFO) - To start off we don't support SYN+ACK+FIN in __tcp_close()
3204 * probably need API support or TCP_CORK SYN-ACK until in __tcp_close()
3213 state = sk->sk_state; in __tcp_close()
3225 if (state != TCP_CLOSE && sk->sk_state == TCP_CLOSE) in __tcp_close()
3232 * our end. If they send after that then tough - BUT: long enough in __tcp_close()
3233 * that we won't make the old 4*rto = almost no time - whoops in __tcp_close()
3239 * linger2 option. --ANK in __tcp_close()
3242 if (sk->sk_state == TCP_FIN_WAIT2) { in __tcp_close()
3244 if (READ_ONCE(tp->linger2) < 0) { in __tcp_close()
3255 tmo - TCP_TIMEWAIT_LEN); in __tcp_close()
3262 if (sk->sk_state != TCP_CLOSE) { in __tcp_close()
3275 if (sk->sk_state == TCP_CLOSE) { in __tcp_close()
3278 req = rcu_dereference_protected(tcp_sk(sk)->fastopen_rsk, in __tcp_close()
3280 /* We could get here with a non-NULL req if the socket is in __tcp_close()
3300 if (!sk->sk_net_refcnt) in tcp_close()
3317 struct rb_node *p = rb_first(&sk->tcp_rtx_queue); in tcp_rtx_queue_purge()
3319 tcp_sk(sk)->highest_sack = NULL; in tcp_rtx_queue_purge()
3325 * list_del(&skb->tcp_tsorted_anchor) in tcp_rtx_queue_purge()
3337 while ((skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) { in tcp_write_queue_purge()
3342 INIT_LIST_HEAD(&tcp_sk(sk)->tsorted_sent_queue); in tcp_write_queue_purge()
3344 tcp_sk(sk)->packets_out = 0; in tcp_write_queue_purge()
3345 inet_csk(sk)->icsk_backoff = 0; in tcp_write_queue_purge()
3353 int old_state = sk->sk_state; in tcp_disconnect()
3363 } else if (unlikely(tp->repair)) { in tcp_disconnect()
3364 WRITE_ONCE(sk->sk_err, ECONNABORTED); in tcp_disconnect()
3367 WRITE_ONCE(sk->sk_err, ECONNRESET); in tcp_disconnect()
3368 } else if (tp->snd_nxt != tp->write_seq && in tcp_disconnect()
3375 WRITE_ONCE(sk->sk_err, ECONNRESET); in tcp_disconnect()
3377 WRITE_ONCE(sk->sk_err, ECONNRESET); in tcp_disconnect()
3380 __skb_queue_purge(&sk->sk_receive_queue); in tcp_disconnect()
3381 WRITE_ONCE(tp->copied_seq, tp->rcv_nxt); in tcp_disconnect()
3382 WRITE_ONCE(tp->urg_data, 0); in tcp_disconnect()
3383 sk_set_peek_off(sk, -1); in tcp_disconnect()
3386 skb_rbtree_purge(&tp->out_of_order_queue); in tcp_disconnect()
3388 inet->inet_dport = 0; in tcp_disconnect()
3392 WRITE_ONCE(sk->sk_shutdown, 0); in tcp_disconnect()
3394 tp->srtt_us = 0; in tcp_disconnect()
3395 tp->mdev_us = jiffies_to_usecs(TCP_TIMEOUT_INIT); in tcp_disconnect()
3396 tp->rcv_rtt_last_tsecr = 0; in tcp_disconnect()
3398 seq = tp->write_seq + tp->max_window + 2; in tcp_disconnect()
3401 WRITE_ONCE(tp->write_seq, seq); in tcp_disconnect()
3403 icsk->icsk_backoff = 0; in tcp_disconnect()
3404 WRITE_ONCE(icsk->icsk_probes_out, 0); in tcp_disconnect()
3405 icsk->icsk_probes_tstamp = 0; in tcp_disconnect()
3406 icsk->icsk_rto = TCP_TIMEOUT_INIT; in tcp_disconnect()
3407 WRITE_ONCE(icsk->icsk_rto_min, TCP_RTO_MIN); in tcp_disconnect()
3408 WRITE_ONCE(icsk->icsk_delack_max, TCP_DELACK_MAX); in tcp_disconnect()
3409 tp->snd_ssthresh = TCP_INFINITE_SSTHRESH; in tcp_disconnect()
3411 tp->snd_cwnd_cnt = 0; in tcp_disconnect()
3412 tp->is_cwnd_limited = 0; in tcp_disconnect()
3413 tp->max_packets_out = 0; in tcp_disconnect()
3414 tp->window_clamp = 0; in tcp_disconnect()
3415 tp->delivered = 0; in tcp_disconnect()
3416 tp->delivered_ce = 0; in tcp_disconnect()
3417 tp->accecn_fail_mode = 0; in tcp_disconnect()
3418 tp->saw_accecn_opt = TCP_ACCECN_OPT_NOT_SEEN; in tcp_disconnect()
3420 tp->prev_ecnfield = 0; in tcp_disconnect()
3421 tp->accecn_opt_tstamp = 0; in tcp_disconnect()
3422 if (icsk->icsk_ca_initialized && icsk->icsk_ca_ops->release) in tcp_disconnect()
3423 icsk->icsk_ca_ops->release(sk); in tcp_disconnect()
3424 memset(icsk->icsk_ca_priv, 0, sizeof(icsk->icsk_ca_priv)); in tcp_disconnect()
3425 icsk->icsk_ca_initialized = 0; in tcp_disconnect()
3427 tp->is_sack_reneg = 0; in tcp_disconnect()
3429 tp->total_retrans = 0; in tcp_disconnect()
3434 icsk->icsk_ack.rcv_mss = TCP_MIN_MSS; in tcp_disconnect()
3435 memset(&tp->rx_opt, 0, sizeof(tp->rx_opt)); in tcp_disconnect()
3437 dst_release(unrcu_pointer(xchg(&sk->sk_rx_dst, NULL))); in tcp_disconnect()
3439 tp->compressed_ack = 0; in tcp_disconnect()
3440 tp->segs_in = 0; in tcp_disconnect()
3441 tp->segs_out = 0; in tcp_disconnect()
3442 tp->bytes_sent = 0; in tcp_disconnect()
3443 tp->bytes_acked = 0; in tcp_disconnect()
3444 tp->bytes_received = 0; in tcp_disconnect()
3445 tp->bytes_retrans = 0; in tcp_disconnect()
3446 tp->data_segs_in = 0; in tcp_disconnect()
3447 tp->data_segs_out = 0; in tcp_disconnect()
3448 tp->duplicate_sack[0].start_seq = 0; in tcp_disconnect()
3449 tp->duplicate_sack[0].end_seq = 0; in tcp_disconnect()
3450 tp->dsack_dups = 0; in tcp_disconnect()
3451 tp->reord_seen = 0; in tcp_disconnect()
3452 tp->retrans_out = 0; in tcp_disconnect()
3453 tp->sacked_out = 0; in tcp_disconnect()
3454 tp->tlp_high_seq = 0; in tcp_disconnect()
3455 tp->last_oow_ack_time = 0; in tcp_disconnect()
3456 tp->plb_rehash = 0; in tcp_disconnect()
3458 tp->app_limited = ~0U; in tcp_disconnect()
3459 tp->rate_app_limited = 1; in tcp_disconnect()
3460 tp->rack.mstamp = 0; in tcp_disconnect()
3461 tp->rack.advanced = 0; in tcp_disconnect()
3462 tp->rack.reo_wnd_steps = 1; in tcp_disconnect()
3463 tp->rack.last_delivered = 0; in tcp_disconnect()
3464 tp->rack.reo_wnd_persist = 0; in tcp_disconnect()
3465 tp->rack.dsack_seen = 0; in tcp_disconnect()
3466 tp->syn_data_acked = 0; in tcp_disconnect()
3467 tp->syn_fastopen_child = 0; in tcp_disconnect()
3468 tp->rx_opt.saw_tstamp = 0; in tcp_disconnect()
3469 tp->rx_opt.dsack = 0; in tcp_disconnect()
3470 tp->rx_opt.num_sacks = 0; in tcp_disconnect()
3471 tp->rcv_ooopack = 0; in tcp_disconnect()
3475 req = rcu_dereference_protected(tp->fastopen_rsk, in tcp_disconnect()
3481 tp->fastopen_client_fail = 0; in tcp_disconnect()
3483 WARN_ON(inet->inet_num && !icsk->icsk_bind_hash); in tcp_disconnect()
3485 if (sk->sk_frag.page) { in tcp_disconnect()
3486 put_page(sk->sk_frag.page); in tcp_disconnect()
3487 sk->sk_frag.page = NULL; in tcp_disconnect()
3488 sk->sk_frag.offset = 0; in tcp_disconnect()
3497 return sockopt_ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN) && in tcp_can_repair_sock()
3498 (sk->sk_state != TCP_LISTEN); in tcp_can_repair_sock()
3505 if (!tp->repair) in tcp_repair_set_window()
3506 return -EPERM; in tcp_repair_set_window()
3509 return -EINVAL; in tcp_repair_set_window()
3512 return -EFAULT; in tcp_repair_set_window()
3515 return -EINVAL; in tcp_repair_set_window()
3517 if (after(opt.snd_wl1, tp->rcv_nxt + opt.rcv_wnd)) in tcp_repair_set_window()
3518 return -EINVAL; in tcp_repair_set_window()
3520 if (after(opt.rcv_wup, tp->rcv_nxt)) in tcp_repair_set_window()
3521 return -EINVAL; in tcp_repair_set_window()
3523 tp->snd_wl1 = opt.snd_wl1; in tcp_repair_set_window()
3524 tp->snd_wnd = opt.snd_wnd; in tcp_repair_set_window()
3525 tp->max_window = opt.max_window; in tcp_repair_set_window()
3527 tp->rcv_wnd = opt.rcv_wnd; in tcp_repair_set_window()
3528 tp->rcv_wup = opt.rcv_wup; in tcp_repair_set_window()
3542 return -EFAULT; in tcp_repair_options_est()
3545 len -= sizeof(opt); in tcp_repair_options_est()
3549 tp->rx_opt.mss_clamp = opt.opt_val; in tcp_repair_options_est()
3558 return -EFBIG; in tcp_repair_options_est()
3560 tp->rx_opt.snd_wscale = snd_wscale; in tcp_repair_options_est()
3561 tp->rx_opt.rcv_wscale = rcv_wscale; in tcp_repair_options_est()
3562 tp->rx_opt.wscale_ok = 1; in tcp_repair_options_est()
3567 return -EINVAL; in tcp_repair_options_est()
3569 tp->rx_opt.sack_ok |= TCP_SACK_SEEN; in tcp_repair_options_est()
3573 return -EINVAL; in tcp_repair_options_est()
3575 tp->rx_opt.tstamp_ok = 1; in tcp_repair_options_est()
3598 /* When set indicates to always queue non-full frames. Later the user clears
3612 tp->nonagle |= TCP_NAGLE_CORK; in __tcp_sock_set_cork()
3614 tp->nonagle &= ~TCP_NAGLE_CORK; in __tcp_sock_set_cork()
3615 if (tp->nonagle & TCP_NAGLE_OFF) in __tcp_sock_set_cork()
3616 tp->nonagle |= TCP_NAGLE_PUSH; in __tcp_sock_set_cork()
3638 tcp_sk(sk)->nonagle |= TCP_NAGLE_OFF|TCP_NAGLE_PUSH; in __tcp_sock_set_nodelay()
3641 tcp_sk(sk)->nonagle &= ~TCP_NAGLE_OFF; in __tcp_sock_set_nodelay()
3661 if ((1 << sk->sk_state) & (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT) && in __tcp_sock_set_quickack()
3663 inet_csk(sk)->icsk_ack.pending |= ICSK_ACK_PUSHED; in __tcp_sock_set_quickack()
3681 return -EINVAL; in tcp_sock_set_syncnt()
3683 WRITE_ONCE(inet_csk(sk)->icsk_syn_retries, val); in tcp_sock_set_syncnt()
3694 return -EINVAL; in tcp_sock_set_user_timeout()
3696 WRITE_ONCE(inet_csk(sk)->icsk_user_timeout, val); in tcp_sock_set_user_timeout()
3706 return -EINVAL; in tcp_sock_set_keepidle_locked()
3709 WRITE_ONCE(tp->keepalive_time, val * HZ); in tcp_sock_set_keepidle_locked()
3711 !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) { in tcp_sock_set_keepidle_locked()
3714 if (tp->keepalive_time > elapsed) in tcp_sock_set_keepidle_locked()
3715 elapsed = tp->keepalive_time - elapsed; in tcp_sock_set_keepidle_locked()
3738 return -EINVAL; in tcp_sock_set_keepintvl()
3740 WRITE_ONCE(tcp_sk(sk)->keepalive_intvl, val * HZ); in tcp_sock_set_keepintvl()
3748 return -EINVAL; in tcp_sock_set_keepcnt()
3751 WRITE_ONCE(tcp_sk(sk)->keepalive_probes, val); in tcp_sock_set_keepcnt()
3762 if (sk->sk_state != TCP_CLOSE) in tcp_set_window_clamp()
3763 return -EINVAL; in tcp_set_window_clamp()
3764 WRITE_ONCE(tp->window_clamp, 0); in tcp_set_window_clamp()
3768 old_window_clamp = tp->window_clamp; in tcp_set_window_clamp()
3774 WRITE_ONCE(tp->window_clamp, new_window_clamp); in tcp_set_window_clamp()
3782 new_rcv_ssthresh = min(tp->rcv_wnd, new_window_clamp); in tcp_set_window_clamp()
3783 tp->rcv_ssthresh = max(new_rcv_ssthresh, tp->rcv_ssthresh); in tcp_set_window_clamp()
3795 return -EINVAL; in tcp_sock_set_maxseg()
3797 WRITE_ONCE(tcp_sk(sk)->rx_opt.user_mss, val); in tcp_sock_set_maxseg()
3819 return -EINVAL; in do_tcp_setsockopt()
3822 min_t(long, TCP_CA_NAME_MAX-1, optlen)); in do_tcp_setsockopt()
3824 return -EFAULT; in do_tcp_setsockopt()
3829 sockopt_ns_capable(sock_net(sk)->user_ns, in do_tcp_setsockopt()
3838 return -EINVAL; in do_tcp_setsockopt()
3841 min_t(long, TCP_ULP_NAME_MAX - 1, in do_tcp_setsockopt()
3844 return -EFAULT; in do_tcp_setsockopt()
3861 return -EINVAL; in do_tcp_setsockopt()
3864 return -EFAULT; in do_tcp_setsockopt()
3877 return -EINVAL; in do_tcp_setsockopt()
3880 return -EFAULT; in do_tcp_setsockopt()
3894 WRITE_ONCE(tp->linger2, -1); in do_tcp_setsockopt()
3896 WRITE_ONCE(tp->linger2, TCP_FIN_TIMEOUT_MAX); in do_tcp_setsockopt()
3898 WRITE_ONCE(tp->linger2, val * HZ); in do_tcp_setsockopt()
3902 WRITE_ONCE(icsk->icsk_accept_queue.rskq_defer_accept, in do_tcp_setsockopt()
3908 return -EINVAL; in do_tcp_setsockopt()
3909 WRITE_ONCE(inet_csk(sk)->icsk_rto_max, msecs_to_jiffies(val)); in do_tcp_setsockopt()
3915 return -EINVAL; in do_tcp_setsockopt()
3916 WRITE_ONCE(inet_csk(sk)->icsk_rto_min, rto_min); in do_tcp_setsockopt()
3923 return -EINVAL; in do_tcp_setsockopt()
3924 WRITE_ONCE(inet_csk(sk)->icsk_delack_max, delack_max); in do_tcp_setsockopt()
3940 err = -EINVAL; in do_tcp_setsockopt()
3942 tp->thin_lto = val; in do_tcp_setsockopt()
3947 err = -EINVAL; in do_tcp_setsockopt()
3952 err = -EPERM; in do_tcp_setsockopt()
3954 tp->repair = 1; in do_tcp_setsockopt()
3955 sk->sk_reuse = SK_FORCE_REUSE; in do_tcp_setsockopt()
3956 tp->repair_queue = TCP_NO_QUEUE; in do_tcp_setsockopt()
3958 tp->repair = 0; in do_tcp_setsockopt()
3959 sk->sk_reuse = SK_NO_REUSE; in do_tcp_setsockopt()
3962 tp->repair = 0; in do_tcp_setsockopt()
3963 sk->sk_reuse = SK_NO_REUSE; in do_tcp_setsockopt()
3965 err = -EINVAL; in do_tcp_setsockopt()
3970 if (!tp->repair) in do_tcp_setsockopt()
3971 err = -EPERM; in do_tcp_setsockopt()
3973 tp->repair_queue = val; in do_tcp_setsockopt()
3975 err = -EINVAL; in do_tcp_setsockopt()
3979 if (sk->sk_state != TCP_CLOSE) { in do_tcp_setsockopt()
3980 err = -EPERM; in do_tcp_setsockopt()
3981 } else if (tp->repair_queue == TCP_SEND_QUEUE) { in do_tcp_setsockopt()
3983 err = -EPERM; in do_tcp_setsockopt()
3985 WRITE_ONCE(tp->write_seq, val); in do_tcp_setsockopt()
3986 } else if (tp->repair_queue == TCP_RECV_QUEUE) { in do_tcp_setsockopt()
3987 if (tp->rcv_nxt != tp->copied_seq) { in do_tcp_setsockopt()
3988 err = -EPERM; in do_tcp_setsockopt()
3990 WRITE_ONCE(tp->rcv_nxt, val); in do_tcp_setsockopt()
3991 WRITE_ONCE(tp->copied_seq, val); in do_tcp_setsockopt()
3994 err = -EINVAL; in do_tcp_setsockopt()
3999 if (!tp->repair) in do_tcp_setsockopt()
4000 err = -EINVAL; in do_tcp_setsockopt()
4001 else if (sk->sk_state == TCP_ESTABLISHED && !tp->bytes_sent) in do_tcp_setsockopt()
4004 err = -EPERM; in do_tcp_setsockopt()
4017 err = -EINVAL; in do_tcp_setsockopt()
4019 tp->save_syn = val; in do_tcp_setsockopt()
4032 err = -EPERM; in do_tcp_setsockopt()
4041 /* If this is the first TCP-AO setsockopt() on the socket, in do_tcp_setsockopt()
4045 if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE)) in do_tcp_setsockopt()
4047 if (rcu_dereference_protected(tcp_sk(sk)->ao_info, in do_tcp_setsockopt()
4050 if (tp->repair) in do_tcp_setsockopt()
4052 err = -EISCONN; in do_tcp_setsockopt()
4055 err = tp->af_specific->ao_parse(sk, optname, optval, optlen); in do_tcp_setsockopt()
4062 err = tp->af_specific->md5_parse(sk, optname, optval, optlen); in do_tcp_setsockopt()
4066 if (val >= 0 && ((1 << sk->sk_state) & (TCPF_CLOSE | in do_tcp_setsockopt()
4072 err = -EINVAL; in do_tcp_setsockopt()
4077 err = -EINVAL; in do_tcp_setsockopt()
4078 } else if (READ_ONCE(net->ipv4.sysctl_tcp_fastopen) & in do_tcp_setsockopt()
4080 if (sk->sk_state == TCP_CLOSE) in do_tcp_setsockopt()
4081 tp->fastopen_connect = val; in do_tcp_setsockopt()
4083 err = -EINVAL; in do_tcp_setsockopt()
4085 err = -EOPNOTSUPP; in do_tcp_setsockopt()
4090 err = -EINVAL; in do_tcp_setsockopt()
4091 else if (!((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) in do_tcp_setsockopt()
4092 err = -EINVAL; in do_tcp_setsockopt()
4094 tp->fastopen_no_cookie = val; in do_tcp_setsockopt()
4097 if (!tp->repair) { in do_tcp_setsockopt()
4098 err = -EPERM; in do_tcp_setsockopt()
4105 tp->tcp_usec_ts = val & 1; in do_tcp_setsockopt()
4106 WRITE_ONCE(tp->tsoffset, val - tcp_clock_ts(tp->tcp_usec_ts)); in do_tcp_setsockopt()
4112 WRITE_ONCE(tp->notsent_lowat, val); in do_tcp_setsockopt()
4113 sk->sk_write_space(sk); in do_tcp_setsockopt()
4117 err = -EINVAL; in do_tcp_setsockopt()
4119 tp->recvmsg_inq = val; in do_tcp_setsockopt()
4124 WRITE_ONCE(tp->tcp_tx_delay, val); in do_tcp_setsockopt()
4127 err = -ENOPROTOOPT; in do_tcp_setsockopt()
4142 return READ_ONCE(icsk->icsk_af_ops)->setsockopt(sk, level, optname, in tcp_setsockopt()
4155 stats[i] = tp->chrono_stat[i - 1]; in tcp_get_info_chrono_stats()
4156 if (i == tp->chrono_type) in tcp_get_info_chrono_stats()
4157 stats[i] += tcp_jiffies32 - tp->chrono_start; in tcp_get_info_chrono_stats()
4162 info->tcpi_busy_time = total; in tcp_get_info_chrono_stats()
4163 info->tcpi_rwnd_limited = stats[TCP_CHRONO_RWND_LIMITED]; in tcp_get_info_chrono_stats()
4164 info->tcpi_sndbuf_limited = stats[TCP_CHRONO_SNDBUF_LIMITED]; in tcp_get_info_chrono_stats()
4172 const u8 ect1_idx = INET_ECN_ECT_1 - 1; in tcp_get_info()
4173 const u8 ect0_idx = INET_ECN_ECT_0 - 1; in tcp_get_info()
4174 const u8 ce_idx = INET_ECN_CE - 1; in tcp_get_info()
4181 if (sk->sk_type != SOCK_STREAM) in tcp_get_info()
4184 info->tcpi_state = inet_sk_state_load(sk); in tcp_get_info()
4187 rate = READ_ONCE(sk->sk_pacing_rate); in tcp_get_info()
4189 info->tcpi_pacing_rate = rate64; in tcp_get_info()
4191 rate = READ_ONCE(sk->sk_max_pacing_rate); in tcp_get_info()
4193 info->tcpi_max_pacing_rate = rate64; in tcp_get_info()
4195 info->tcpi_reordering = tp->reordering; in tcp_get_info()
4196 info->tcpi_snd_cwnd = tcp_snd_cwnd(tp); in tcp_get_info()
4198 if (info->tcpi_state == TCP_LISTEN) { in tcp_get_info()
4200 * tcpi_unacked -> Number of children ready for accept() in tcp_get_info()
4201 * tcpi_sacked -> max backlog in tcp_get_info()
4203 info->tcpi_unacked = READ_ONCE(sk->sk_ack_backlog); in tcp_get_info()
4204 info->tcpi_sacked = READ_ONCE(sk->sk_max_ack_backlog); in tcp_get_info()
4210 info->tcpi_ca_state = icsk->icsk_ca_state; in tcp_get_info()
4211 info->tcpi_retransmits = icsk->icsk_retransmits; in tcp_get_info()
4212 info->tcpi_probes = icsk->icsk_probes_out; in tcp_get_info()
4213 info->tcpi_backoff = icsk->icsk_backoff; in tcp_get_info()
4215 if (tp->rx_opt.tstamp_ok) in tcp_get_info()
4216 info->tcpi_options |= TCPI_OPT_TIMESTAMPS; in tcp_get_info()
4218 info->tcpi_options |= TCPI_OPT_SACK; in tcp_get_info()
4219 if (tp->rx_opt.wscale_ok) { in tcp_get_info()
4220 info->tcpi_options |= TCPI_OPT_WSCALE; in tcp_get_info()
4221 info->tcpi_snd_wscale = tp->rx_opt.snd_wscale; in tcp_get_info()
4222 info->tcpi_rcv_wscale = tp->rx_opt.rcv_wscale; in tcp_get_info()
4226 info->tcpi_options |= TCPI_OPT_ECN; in tcp_get_info()
4227 if (tp->ecn_flags & TCP_ECN_SEEN) in tcp_get_info()
4228 info->tcpi_options |= TCPI_OPT_ECN_SEEN; in tcp_get_info()
4229 if (tp->syn_data_acked) in tcp_get_info()
4230 info->tcpi_options |= TCPI_OPT_SYN_DATA; in tcp_get_info()
4231 if (tp->tcp_usec_ts) in tcp_get_info()
4232 info->tcpi_options |= TCPI_OPT_USEC_TS; in tcp_get_info()
4233 if (tp->syn_fastopen_child) in tcp_get_info()
4234 info->tcpi_options |= TCPI_OPT_TFO_CHILD; in tcp_get_info()
4236 info->tcpi_rto = jiffies_to_usecs(icsk->icsk_rto); in tcp_get_info()
4237 info->tcpi_ato = jiffies_to_usecs(min_t(u32, icsk->icsk_ack.ato, in tcp_get_info()
4239 info->tcpi_snd_mss = tp->mss_cache; in tcp_get_info()
4240 info->tcpi_rcv_mss = icsk->icsk_ack.rcv_mss; in tcp_get_info()
4242 info->tcpi_unacked = tp->packets_out; in tcp_get_info()
4243 info->tcpi_sacked = tp->sacked_out; in tcp_get_info()
4245 info->tcpi_lost = tp->lost_out; in tcp_get_info()
4246 info->tcpi_retrans = tp->retrans_out; in tcp_get_info()
4249 info->tcpi_last_data_sent = jiffies_to_msecs(now - tp->lsndtime); in tcp_get_info()
4250 info->tcpi_last_data_recv = jiffies_to_msecs(now - icsk->icsk_ack.lrcvtime); in tcp_get_info()
4251 info->tcpi_last_ack_recv = jiffies_to_msecs(now - tp->rcv_tstamp); in tcp_get_info()
4253 info->tcpi_pmtu = icsk->icsk_pmtu_cookie; in tcp_get_info()
4254 info->tcpi_rcv_ssthresh = tp->rcv_ssthresh; in tcp_get_info()
4255 info->tcpi_rtt = tp->srtt_us >> 3; in tcp_get_info()
4256 info->tcpi_rttvar = tp->mdev_us >> 2; in tcp_get_info()
4257 info->tcpi_snd_ssthresh = tp->snd_ssthresh; in tcp_get_info()
4258 info->tcpi_advmss = tp->advmss; in tcp_get_info()
4260 info->tcpi_rcv_rtt = tp->rcv_rtt_est.rtt_us >> 3; in tcp_get_info()
4261 info->tcpi_rcv_space = tp->rcvq_space.space; in tcp_get_info()
4263 info->tcpi_total_retrans = tp->total_retrans; in tcp_get_info()
4265 info->tcpi_bytes_acked = tp->bytes_acked; in tcp_get_info()
4266 info->tcpi_bytes_received = tp->bytes_received; in tcp_get_info()
4267 info->tcpi_notsent_bytes = max_t(int, 0, tp->write_seq - tp->snd_nxt); in tcp_get_info()
4270 info->tcpi_segs_out = tp->segs_out; in tcp_get_info()
4273 info->tcpi_segs_in = READ_ONCE(tp->segs_in); in tcp_get_info()
4274 info->tcpi_data_segs_in = READ_ONCE(tp->data_segs_in); in tcp_get_info()
4276 info->tcpi_min_rtt = tcp_min_rtt(tp); in tcp_get_info()
4277 info->tcpi_data_segs_out = tp->data_segs_out; in tcp_get_info()
4279 info->tcpi_delivery_rate_app_limited = tp->rate_app_limited ? 1 : 0; in tcp_get_info()
4282 info->tcpi_delivery_rate = rate64; in tcp_get_info()
4283 info->tcpi_delivered = tp->delivered; in tcp_get_info()
4284 info->tcpi_delivered_ce = tp->delivered_ce; in tcp_get_info()
4285 info->tcpi_bytes_sent = tp->bytes_sent; in tcp_get_info()
4286 info->tcpi_bytes_retrans = tp->bytes_retrans; in tcp_get_info()
4287 info->tcpi_dsack_dups = tp->dsack_dups; in tcp_get_info()
4288 info->tcpi_reord_seen = tp->reord_seen; in tcp_get_info()
4289 info->tcpi_rcv_ooopack = tp->rcv_ooopack; in tcp_get_info()
4290 info->tcpi_snd_wnd = tp->snd_wnd; in tcp_get_info()
4291 info->tcpi_rcv_wnd = tp->rcv_wnd; in tcp_get_info()
4292 info->tcpi_rehash = tp->plb_rehash + tp->timeout_rehash; in tcp_get_info()
4293 info->tcpi_fastopen_client_fail = tp->fastopen_client_fail; in tcp_get_info()
4295 info->tcpi_total_rto = tp->total_rto; in tcp_get_info()
4296 info->tcpi_total_rto_recoveries = tp->total_rto_recoveries; in tcp_get_info()
4297 info->tcpi_total_rto_time = tp->total_rto_time; in tcp_get_info()
4298 if (tp->rto_stamp) in tcp_get_info()
4299 info->tcpi_total_rto_time += tcp_clock_ms() - tp->rto_stamp; in tcp_get_info()
4301 info->tcpi_accecn_fail_mode = tp->accecn_fail_mode; in tcp_get_info()
4302 info->tcpi_accecn_opt_seen = tp->saw_accecn_opt; in tcp_get_info()
4303 info->tcpi_received_ce = tp->received_ce; in tcp_get_info()
4304 info->tcpi_delivered_e1_bytes = tp->delivered_ecn_bytes[ect1_idx]; in tcp_get_info()
4305 info->tcpi_delivered_e0_bytes = tp->delivered_ecn_bytes[ect0_idx]; in tcp_get_info()
4306 info->tcpi_delivered_ce_bytes = tp->delivered_ecn_bytes[ce_idx]; in tcp_get_info()
4307 info->tcpi_received_e1_bytes = tp->received_ecn_bytes[ect1_idx]; in tcp_get_info()
4308 info->tcpi_received_e0_bytes = tp->received_ecn_bytes[ect0_idx]; in tcp_get_info()
4309 info->tcpi_received_ce_bytes = tp->received_ecn_bytes[ce_idx]; in tcp_get_info()
4351 if (skb->protocol == htons(ETH_P_IP)) in tcp_skb_ttl_or_hop_limit()
4352 return ip_hdr(skb)->ttl; in tcp_skb_ttl_or_hop_limit()
4353 else if (skb->protocol == htons(ETH_P_IPV6)) in tcp_skb_ttl_or_hop_limit()
4354 return ipv6_hdr(skb)->hop_limit; in tcp_skb_ttl_or_hop_limit()
4381 tp->data_segs_out, TCP_NLA_PAD); in tcp_get_timestamping_opt_stats()
4383 tp->total_retrans, TCP_NLA_PAD); in tcp_get_timestamping_opt_stats()
4385 rate = READ_ONCE(sk->sk_pacing_rate); in tcp_get_timestamping_opt_stats()
4393 nla_put_u32(stats, TCP_NLA_REORDERING, tp->reordering); in tcp_get_timestamping_opt_stats()
4397 READ_ONCE(inet_csk(sk)->icsk_retransmits)); in tcp_get_timestamping_opt_stats()
4398 nla_put_u8(stats, TCP_NLA_DELIVERY_RATE_APP_LMT, !!tp->rate_app_limited); in tcp_get_timestamping_opt_stats()
4399 nla_put_u32(stats, TCP_NLA_SND_SSTHRESH, tp->snd_ssthresh); in tcp_get_timestamping_opt_stats()
4400 nla_put_u32(stats, TCP_NLA_DELIVERED, tp->delivered); in tcp_get_timestamping_opt_stats()
4401 nla_put_u32(stats, TCP_NLA_DELIVERED_CE, tp->delivered_ce); in tcp_get_timestamping_opt_stats()
4403 nla_put_u32(stats, TCP_NLA_SNDQ_SIZE, tp->write_seq - tp->snd_una); in tcp_get_timestamping_opt_stats()
4404 nla_put_u8(stats, TCP_NLA_CA_STATE, inet_csk(sk)->icsk_ca_state); in tcp_get_timestamping_opt_stats()
4406 nla_put_u64_64bit(stats, TCP_NLA_BYTES_SENT, tp->bytes_sent, in tcp_get_timestamping_opt_stats()
4408 nla_put_u64_64bit(stats, TCP_NLA_BYTES_RETRANS, tp->bytes_retrans, in tcp_get_timestamping_opt_stats()
4410 nla_put_u32(stats, TCP_NLA_DSACK_DUPS, tp->dsack_dups); in tcp_get_timestamping_opt_stats()
4411 nla_put_u32(stats, TCP_NLA_REORD_SEEN, tp->reord_seen); in tcp_get_timestamping_opt_stats()
4412 nla_put_u32(stats, TCP_NLA_SRTT, tp->srtt_us >> 3); in tcp_get_timestamping_opt_stats()
4413 nla_put_u16(stats, TCP_NLA_TIMEOUT_REHASH, tp->timeout_rehash); in tcp_get_timestamping_opt_stats()
4415 max_t(int, 0, tp->write_seq - tp->snd_nxt)); in tcp_get_timestamping_opt_stats()
4416 nla_put_u64_64bit(stats, TCP_NLA_EDT, orig_skb->skb_mstamp_ns, in tcp_get_timestamping_opt_stats()
4422 nla_put_u32(stats, TCP_NLA_REHASH, tp->plb_rehash + tp->timeout_rehash); in tcp_get_timestamping_opt_stats()
4436 return -EFAULT; in do_tcp_getsockopt()
4439 return -EINVAL; in do_tcp_getsockopt()
4445 val = tp->mss_cache; in do_tcp_getsockopt()
4446 user_mss = READ_ONCE(tp->rx_opt.user_mss); in do_tcp_getsockopt()
4448 ((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) in do_tcp_getsockopt()
4450 if (tp->repair) in do_tcp_getsockopt()
4451 val = tp->rx_opt.mss_clamp; in do_tcp_getsockopt()
4454 val = !!(tp->nonagle&TCP_NAGLE_OFF); in do_tcp_getsockopt()
4457 val = !!(tp->nonagle&TCP_NAGLE_CORK); in do_tcp_getsockopt()
4469 val = READ_ONCE(icsk->icsk_syn_retries) ? : in do_tcp_getsockopt()
4470 READ_ONCE(net->ipv4.sysctl_tcp_syn_retries); in do_tcp_getsockopt()
4473 val = READ_ONCE(tp->linger2); in do_tcp_getsockopt()
4475 val = (val ? : READ_ONCE(net->ipv4.sysctl_tcp_fin_timeout)) / HZ; in do_tcp_getsockopt()
4478 val = READ_ONCE(icsk->icsk_accept_queue.rskq_defer_accept); in do_tcp_getsockopt()
4483 val = READ_ONCE(tp->window_clamp); in do_tcp_getsockopt()
4489 return -EFAULT; in do_tcp_getsockopt()
4495 return -EFAULT; in do_tcp_getsockopt()
4497 return -EFAULT; in do_tcp_getsockopt()
4507 return -EFAULT; in do_tcp_getsockopt()
4509 ca_ops = icsk->icsk_ca_ops; in do_tcp_getsockopt()
4510 if (ca_ops && ca_ops->get_info) in do_tcp_getsockopt()
4511 sz = ca_ops->get_info(sk, ~0U, &attr, &info); in do_tcp_getsockopt()
4515 return -EFAULT; in do_tcp_getsockopt()
4517 return -EFAULT; in do_tcp_getsockopt()
4526 return -EFAULT; in do_tcp_getsockopt()
4529 return -EFAULT; in do_tcp_getsockopt()
4530 if (copy_to_sockptr(optval, icsk->icsk_ca_ops->name, len)) in do_tcp_getsockopt()
4531 return -EFAULT; in do_tcp_getsockopt()
4536 return -EFAULT; in do_tcp_getsockopt()
4538 if (!icsk->icsk_ulp_ops) { in do_tcp_getsockopt()
4541 return -EFAULT; in do_tcp_getsockopt()
4545 return -EFAULT; in do_tcp_getsockopt()
4546 if (copy_to_sockptr(optval, icsk->icsk_ulp_ops->name, len)) in do_tcp_getsockopt()
4547 return -EFAULT; in do_tcp_getsockopt()
4555 return -EFAULT; in do_tcp_getsockopt()
4561 return -EFAULT; in do_tcp_getsockopt()
4563 return -EFAULT; in do_tcp_getsockopt()
4567 val = tp->thin_lto; in do_tcp_getsockopt()
4575 val = tp->repair; in do_tcp_getsockopt()
4579 if (tp->repair) in do_tcp_getsockopt()
4580 val = tp->repair_queue; in do_tcp_getsockopt()
4582 return -EINVAL; in do_tcp_getsockopt()
4589 return -EFAULT; in do_tcp_getsockopt()
4592 return -EINVAL; in do_tcp_getsockopt()
4594 if (!tp->repair) in do_tcp_getsockopt()
4595 return -EPERM; in do_tcp_getsockopt()
4597 opt.snd_wl1 = tp->snd_wl1; in do_tcp_getsockopt()
4598 opt.snd_wnd = tp->snd_wnd; in do_tcp_getsockopt()
4599 opt.max_window = tp->max_window; in do_tcp_getsockopt()
4600 opt.rcv_wnd = tp->rcv_wnd; in do_tcp_getsockopt()
4601 opt.rcv_wup = tp->rcv_wup; in do_tcp_getsockopt()
4604 return -EFAULT; in do_tcp_getsockopt()
4608 if (tp->repair_queue == TCP_SEND_QUEUE) in do_tcp_getsockopt()
4609 val = tp->write_seq; in do_tcp_getsockopt()
4610 else if (tp->repair_queue == TCP_RECV_QUEUE) in do_tcp_getsockopt()
4611 val = tp->rcv_nxt; in do_tcp_getsockopt()
4613 return -EINVAL; in do_tcp_getsockopt()
4617 val = READ_ONCE(icsk->icsk_user_timeout); in do_tcp_getsockopt()
4621 val = READ_ONCE(icsk->icsk_accept_queue.fastopenq.max_qlen); in do_tcp_getsockopt()
4625 val = tp->fastopen_connect; in do_tcp_getsockopt()
4629 val = tp->fastopen_no_cookie; in do_tcp_getsockopt()
4633 val = READ_ONCE(tp->tcp_tx_delay); in do_tcp_getsockopt()
4637 val = tcp_clock_ts(tp->tcp_usec_ts) + READ_ONCE(tp->tsoffset); in do_tcp_getsockopt()
4638 if (tp->tcp_usec_ts) in do_tcp_getsockopt()
4644 val = READ_ONCE(tp->notsent_lowat); in do_tcp_getsockopt()
4647 val = tp->recvmsg_inq; in do_tcp_getsockopt()
4650 val = tp->save_syn; in do_tcp_getsockopt()
4654 return -EFAULT; in do_tcp_getsockopt()
4657 if (tp->saved_syn) { in do_tcp_getsockopt()
4658 if (len < tcp_saved_syn_len(tp->saved_syn)) { in do_tcp_getsockopt()
4659 len = tcp_saved_syn_len(tp->saved_syn); in do_tcp_getsockopt()
4662 return -EFAULT; in do_tcp_getsockopt()
4665 return -EINVAL; in do_tcp_getsockopt()
4667 len = tcp_saved_syn_len(tp->saved_syn); in do_tcp_getsockopt()
4670 return -EFAULT; in do_tcp_getsockopt()
4672 if (copy_to_sockptr(optval, tp->saved_syn->data, len)) { in do_tcp_getsockopt()
4674 return -EFAULT; in do_tcp_getsockopt()
4682 return -EFAULT; in do_tcp_getsockopt()
4693 return -EFAULT; in do_tcp_getsockopt()
4696 return -EINVAL; in do_tcp_getsockopt()
4699 len - sizeof(zc)); in do_tcp_getsockopt()
4701 return err == 0 ? -EINVAL : err; in do_tcp_getsockopt()
4704 return -EFAULT; in do_tcp_getsockopt()
4707 return -EFAULT; in do_tcp_getsockopt()
4709 return -EINVAL; in do_tcp_getsockopt()
4711 return -EINVAL; in do_tcp_getsockopt()
4747 err = -EFAULT; in do_tcp_getsockopt()
4753 return -EPERM; in do_tcp_getsockopt()
4775 val = jiffies_to_usecs(READ_ONCE(inet_csk(sk)->icsk_rto_min)); in do_tcp_getsockopt()
4778 val = jiffies_to_usecs(READ_ONCE(inet_csk(sk)->icsk_delack_max)); in do_tcp_getsockopt()
4781 return -ENOPROTOOPT; in do_tcp_getsockopt()
4785 return -EFAULT; in do_tcp_getsockopt()
4787 return -EFAULT; in do_tcp_getsockopt()
4810 return READ_ONCE(icsk->icsk_af_ops)->getsockopt(sk, level, optname, in tcp_getsockopt()
4818 int tcp_md5_sigpool_id = -1;
4830 * id would stay the same. Re-write the id only for the case in tcp_md5_alloc_sigpool()
4854 u8 keylen = READ_ONCE(key->keylen); /* paired with WRITE_ONCE() in tcp_md5_do_add */ in tcp_md5_hash_key()
4857 sg_init_one(&sg, key->key, keylen); in tcp_md5_hash_key()
4858 ahash_request_set_crypt(hp->req, &sg, NULL, keylen); in tcp_md5_hash_key()
4861 * key->key under us in tcp_md5_hash_key()
4863 return data_race(crypto_ahash_update(hp->req)); in tcp_md5_hash_key()
4873 /* This gets called for each TCP segment that has TCP-MD5 option. in tcp_inbound_md5_hash()
4894 * IPv4-mapped case. in tcp_inbound_md5_hash()
4899 genhash = tp->af_specific->calc_md5_hash(newhash, key, in tcp_inbound_md5_hash()
4942 keyid = aoh->keyid; in tcp_inbound_hash()
4943 rnext = aoh->rnext_keyid; in tcp_inbound_hash()
4962 /* Drop if there's TCP-MD5 or TCP-AO key with any rcvid/sndid in tcp_inbound_hash()
4963 * for the remote peer. On TCP-AO established connection in tcp_inbound_hash()
4995 req = rcu_dereference_protected(tcp_sk(sk)->fastopen_rsk, 1); in tcp_done()
4997 if (sk->sk_state == TCP_SYN_SENT || sk->sk_state == TCP_SYN_RECV) in tcp_done()
5005 WRITE_ONCE(sk->sk_shutdown, SHUTDOWN_MASK); in tcp_done()
5008 sk->sk_state_change(sk); in tcp_done()
5022 inet_csk_reqsk_queue_drop(req->rsk_listener, req); in tcp_abort()
5029 refcount_inc(&tw->tw_refcnt); in tcp_abort()
5042 if (sk->sk_state == TCP_CLOSE) { in tcp_abort()
5045 return -ENOENT; in tcp_abort()
5048 if (sk->sk_state == TCP_LISTEN) { in tcp_abort()
5057 if (tcp_need_reset(sk->sk_state)) in tcp_abort()
5100 /* TX read-mostly hotpath cache lines */ in tcp_struct_check()
5111 /* TXRX read-mostly hotpath cache lines */ in tcp_struct_check()
5121 /* RX read-mostly hotpath cache lines */ in tcp_struct_check()
5134 /* TX read-write hotpath cache lines */ in tcp_struct_check()
5152 /* TXRX read-write hotpath cache lines */ in tcp_struct_check()
5172 /* RX read-write hotpath cache lines */ in tcp_struct_check()
5268 /* Set per-socket limits to no more than 1/128 the pressure threshold */ in tcp_init()
5269 limit = nr_free_buffer_pages() << (PAGE_SHIFT - 7); in tcp_init()