Lines Matching +full:page +full:- +full:burst +full:- +full:access +full:- +full:ns
1 // SPDX-License-Identifier: GPL-2.0
4 * Copyright (c) 2017 - 2019, Intel Corporation.
59 return READ_ONCE(msk->wnd_end); in mptcp_wnd_end()
65 if (sk->sk_prot == &tcpv6_prot) in mptcp_fallback_tcp_ops()
68 WARN_ON_ONCE(sk->sk_prot != &tcp_prot); in mptcp_fallback_tcp_ops()
79 spin_lock_bh(&msk->fallback_lock); in __mptcp_try_fallback()
80 if (!msk->allow_infinite_fallback) { in __mptcp_try_fallback()
81 spin_unlock_bh(&msk->fallback_lock); in __mptcp_try_fallback()
85 msk->allow_subflows = false; in __mptcp_try_fallback()
86 set_bit(MPTCP_FALLBACK_DONE, &msk->flags); in __mptcp_try_fallback()
88 spin_unlock_bh(&msk->fallback_lock); in __mptcp_try_fallback()
99 err = mptcp_subflow_create_socket(sk, sk->sk_family, &ssock); in __mptcp_socket_create()
103 msk->scaling_ratio = tcp_sk(ssock->sk)->scaling_ratio; in __mptcp_socket_create()
104 WRITE_ONCE(msk->first, ssock->sk); in __mptcp_socket_create()
105 subflow = mptcp_subflow_ctx(ssock->sk); in __mptcp_socket_create()
106 list_add(&subflow->node, &msk->conn_list); in __mptcp_socket_create()
107 sock_hold(ssock->sk); in __mptcp_socket_create()
108 subflow->request_mptcp = 1; in __mptcp_socket_create()
109 subflow->subflow_id = msk->subflow_id++; in __mptcp_socket_create()
112 WRITE_ONCE(subflow->local_id, 0); in __mptcp_socket_create()
113 mptcp_sock_graft(msk->first, sk->sk_socket); in __mptcp_socket_create()
127 if (!((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) in __mptcp_nmpc_sk()
128 return ERR_PTR(-EINVAL); in __mptcp_nmpc_sk()
130 if (!msk->first) { in __mptcp_nmpc_sk()
136 return msk->first; in __mptcp_nmpc_sk()
149 int limit = READ_ONCE(sk->sk_rcvbuf); in __mptcp_try_coalesce()
151 if (unlikely(MPTCP_SKB_CB(to)->cant_coalesce) || in __mptcp_try_coalesce()
152 MPTCP_SKB_CB(from)->offset || in __mptcp_try_coalesce()
153 ((to->len + from->len) > (limit >> 3)) || in __mptcp_try_coalesce()
158 MPTCP_SKB_CB(from)->map_seq, MPTCP_SKB_CB(to)->map_seq, in __mptcp_try_coalesce()
159 to->len, MPTCP_SKB_CB(from)->end_seq); in __mptcp_try_coalesce()
160 MPTCP_SKB_CB(to)->end_seq = MPTCP_SKB_CB(from)->end_seq; in __mptcp_try_coalesce()
177 atomic_add(delta, &sk->sk_rmem_alloc); in mptcp_try_coalesce()
187 if (MPTCP_SKB_CB(from)->map_seq != MPTCP_SKB_CB(to)->end_seq) in mptcp_ooo_try_coalesce()
194 * - mptcp does not maintain a msk-level window clamp
195 * - returns true when the receive buffer is actually updated
204 oldval = msk->rcvq_space.space; in mptcp_rcvbuf_grow()
205 msk->rcvq_space.space = newval; in mptcp_rcvbuf_grow()
206 if (!READ_ONCE(net->ipv4.sysctl_tcp_moderate_rcvbuf) || in mptcp_rcvbuf_grow()
207 (sk->sk_userlocks & SOCK_RCVBUF_LOCK)) in mptcp_rcvbuf_grow()
214 grow = (u64)rcvwin * (newval - oldval); in mptcp_rcvbuf_grow()
218 if (!RB_EMPTY_ROOT(&msk->out_of_order_queue)) in mptcp_rcvbuf_grow()
219 rcvwin += MPTCP_SKB_CB(msk->ooo_last_skb)->end_seq - msk->ack_seq; in mptcp_rcvbuf_grow()
221 cap = READ_ONCE(net->ipv4.sysctl_tcp_rmem[2]); in mptcp_rcvbuf_grow()
224 if (rcvbuf > sk->sk_rcvbuf) { in mptcp_rcvbuf_grow()
225 WRITE_ONCE(sk->sk_rcvbuf, rcvbuf); in mptcp_rcvbuf_grow()
232 * - use mptcp seqs
233 * - don't cope with sacks
242 seq = MPTCP_SKB_CB(skb)->map_seq; in mptcp_data_queue_ofo()
243 end_seq = MPTCP_SKB_CB(skb)->end_seq; in mptcp_data_queue_ofo()
244 max_seq = atomic64_read(&msk->rcv_wnd_sent); in mptcp_data_queue_ofo()
247 RB_EMPTY_ROOT(&msk->out_of_order_queue)); in mptcp_data_queue_ofo()
252 (unsigned long long)end_seq - (unsigned long)max_seq, in mptcp_data_queue_ofo()
253 (unsigned long long)atomic64_read(&msk->rcv_wnd_sent)); in mptcp_data_queue_ofo()
258 p = &msk->out_of_order_queue.rb_node; in mptcp_data_queue_ofo()
260 if (RB_EMPTY_ROOT(&msk->out_of_order_queue)) { in mptcp_data_queue_ofo()
261 rb_link_node(&skb->rbnode, NULL, p); in mptcp_data_queue_ofo()
262 rb_insert_color(&skb->rbnode, &msk->out_of_order_queue); in mptcp_data_queue_ofo()
263 msk->ooo_last_skb = skb; in mptcp_data_queue_ofo()
270 if (mptcp_ooo_try_coalesce(msk, msk->ooo_last_skb, skb)) { in mptcp_data_queue_ofo()
277 if (!before64(seq, MPTCP_SKB_CB(msk->ooo_last_skb)->end_seq)) { in mptcp_data_queue_ofo()
279 parent = &msk->ooo_last_skb->rbnode; in mptcp_data_queue_ofo()
280 p = &parent->rb_right; in mptcp_data_queue_ofo()
289 if (before64(seq, MPTCP_SKB_CB(skb1)->map_seq)) { in mptcp_data_queue_ofo()
290 p = &parent->rb_left; in mptcp_data_queue_ofo()
293 if (before64(seq, MPTCP_SKB_CB(skb1)->end_seq)) { in mptcp_data_queue_ofo()
294 if (!after64(end_seq, MPTCP_SKB_CB(skb1)->end_seq)) { in mptcp_data_queue_ofo()
300 if (after64(seq, MPTCP_SKB_CB(skb1)->map_seq)) { in mptcp_data_queue_ofo()
310 rb_replace_node(&skb1->rbnode, &skb->rbnode, in mptcp_data_queue_ofo()
311 &msk->out_of_order_queue); in mptcp_data_queue_ofo()
320 p = &parent->rb_right; in mptcp_data_queue_ofo()
325 rb_link_node(&skb->rbnode, parent, p); in mptcp_data_queue_ofo()
326 rb_insert_color(&skb->rbnode, &msk->out_of_order_queue); in mptcp_data_queue_ofo()
331 if (before64(end_seq, MPTCP_SKB_CB(skb1)->end_seq)) in mptcp_data_queue_ofo()
333 rb_erase(&skb1->rbnode, &msk->out_of_order_queue); in mptcp_data_queue_ofo()
339 msk->ooo_last_skb = skb; in mptcp_data_queue_ofo()
344 /* do not grow rcvbuf for not-yet-accepted or orphaned sockets. */ in mptcp_data_queue_ofo()
345 if (sk->sk_socket) in mptcp_data_queue_ofo()
346 mptcp_rcvbuf_grow(sk, msk->rcvq_space.space); in mptcp_data_queue_ofo()
353 bool has_rxtstamp = TCP_SKB_CB(skb)->has_rxtstamp; in mptcp_init_skb()
356 * mptcp_subflow_get_mapped_dsn() is based on the current tp->copied_seq in mptcp_init_skb()
359 MPTCP_SKB_CB(skb)->map_seq = mptcp_subflow_get_mapped_dsn(subflow); in mptcp_init_skb()
360 MPTCP_SKB_CB(skb)->end_seq = MPTCP_SKB_CB(skb)->map_seq + copy_len; in mptcp_init_skb()
361 MPTCP_SKB_CB(skb)->offset = offset; in mptcp_init_skb()
362 MPTCP_SKB_CB(skb)->has_rxtstamp = has_rxtstamp; in mptcp_init_skb()
363 MPTCP_SKB_CB(skb)->cant_coalesce = 0; in mptcp_init_skb()
365 __skb_unlink(skb, &ssk->sk_receive_queue); in mptcp_init_skb()
373 u64 copy_len = MPTCP_SKB_CB(skb)->end_seq - MPTCP_SKB_CB(skb)->map_seq; in __mptcp_move_skb()
378 if (!sk_rmem_schedule(sk, skb, skb->truesize)) { in __mptcp_move_skb()
383 if (MPTCP_SKB_CB(skb)->map_seq == msk->ack_seq) { in __mptcp_move_skb()
385 msk->bytes_received += copy_len; in __mptcp_move_skb()
386 WRITE_ONCE(msk->ack_seq, msk->ack_seq + copy_len); in __mptcp_move_skb()
387 tail = skb_peek_tail(&sk->sk_receive_queue); in __mptcp_move_skb()
392 __skb_queue_tail(&sk->sk_receive_queue, skb); in __mptcp_move_skb()
394 } else if (after64(MPTCP_SKB_CB(skb)->map_seq, msk->ack_seq)) { in __mptcp_move_skb()
412 sk_stop_timer(sk, &icsk->icsk_retransmit_timer); in mptcp_stop_rtx_timer()
413 mptcp_sk(sk)->timer_ival = 0; in mptcp_stop_rtx_timer()
421 sk->sk_state_change(sk); in mptcp_close_wake_up()
422 if (sk->sk_shutdown == SHUTDOWN_MASK || in mptcp_close_wake_up()
423 sk->sk_state == TCP_CLOSE) in mptcp_close_wake_up()
448 return ((1 << sk->sk_state) & in mptcp_pending_data_fin_ack()
450 msk->write_seq == READ_ONCE(msk->snd_una); in mptcp_pending_data_fin_ack()
459 WRITE_ONCE(msk->snd_data_fin_enable, 0); in mptcp_check_data_fin_ack()
461 switch (sk->sk_state) { in mptcp_check_data_fin_ack()
481 if (READ_ONCE(msk->rcv_data_fin) && in mptcp_pending_data_fin()
484 u64 rcv_data_fin_seq = READ_ONCE(msk->rcv_data_fin_seq); in mptcp_pending_data_fin()
486 if (READ_ONCE(msk->ack_seq) == rcv_data_fin_seq) { in mptcp_pending_data_fin()
502 retransmits = min_t(u32, icsk->icsk_retransmits, in mptcp_set_datafin_timeout()
505 mptcp_sk(sk)->timer_ival = TCP_RTO_MIN << retransmits; in mptcp_set_datafin_timeout()
510 mptcp_sk(sk)->timer_ival = tout > 0 ? tout : TCP_RTO_MIN; in __mptcp_set_timeout()
517 return inet_csk(ssk)->icsk_pending && !subflow->stale_count ? in mptcp_timeout_from_subflow()
518 icsk_timeout(inet_csk(ssk)) - jiffies : 0; in mptcp_timeout_from_subflow()
573 u8 ack_pending = READ_ONCE(icsk->icsk_ack.pending); in mptcp_subflow_could_cleanup()
577 ((READ_ONCE(tp->rcv_nxt) - READ_ONCE(tp->rcv_wup) > in mptcp_subflow_could_cleanup()
578 READ_ONCE(icsk->icsk_ack.rcv_mss)) || in mptcp_subflow_could_cleanup()
585 int old_space = READ_ONCE(msk->old_wspace); in mptcp_cleanup_rbuf()
609 * msk->rcv_data_fin was set when parsing the incoming options in mptcp_check_data_fin()
621 WRITE_ONCE(msk->ack_seq, msk->ack_seq + 1); in mptcp_check_data_fin()
622 WRITE_ONCE(msk->rcv_data_fin, 0); in mptcp_check_data_fin()
624 WRITE_ONCE(sk->sk_shutdown, sk->sk_shutdown | RCV_SHUTDOWN); in mptcp_check_data_fin()
627 switch (sk->sk_state) { in mptcp_check_data_fin()
671 u32 seq = tp->copied_seq; in __mptcp_move_skbs_from_subflow()
675 if (sk_rmem_alloc_get(sk) > sk->sk_rcvbuf) in __mptcp_move_skbs_from_subflow()
679 map_remaining = subflow->map_data_len - in __mptcp_move_skbs_from_subflow()
682 skb = skb_peek(&ssk->sk_receive_queue); in __mptcp_move_skbs_from_subflow()
691 map_remaining = skb->len; in __mptcp_move_skbs_from_subflow()
692 subflow->map_data_len = skb->len; in __mptcp_move_skbs_from_subflow()
695 offset = seq - TCP_SKB_CB(skb)->seq; in __mptcp_move_skbs_from_subflow()
696 fin = TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN; in __mptcp_move_skbs_from_subflow()
700 if (offset < skb->len) { in __mptcp_move_skbs_from_subflow()
701 size_t len = skb->len - offset; in __mptcp_move_skbs_from_subflow()
721 WRITE_ONCE(tp->copied_seq, seq); in __mptcp_move_skbs_from_subflow()
727 msk->last_data_recv = tcp_jiffies32; in __mptcp_move_skbs_from_subflow()
739 p = rb_first(&msk->out_of_order_queue); in __mptcp_ofo_queue()
740 pr_debug("msk=%p empty=%d\n", msk, RB_EMPTY_ROOT(&msk->out_of_order_queue)); in __mptcp_ofo_queue()
743 if (after64(MPTCP_SKB_CB(skb)->map_seq, msk->ack_seq)) in __mptcp_ofo_queue()
747 rb_erase(&skb->rbnode, &msk->out_of_order_queue); in __mptcp_ofo_queue()
749 if (unlikely(!after64(MPTCP_SKB_CB(skb)->end_seq, in __mptcp_ofo_queue()
750 msk->ack_seq))) { in __mptcp_ofo_queue()
756 end_seq = MPTCP_SKB_CB(skb)->end_seq; in __mptcp_ofo_queue()
757 tail = skb_peek_tail(&sk->sk_receive_queue); in __mptcp_ofo_queue()
759 int delta = msk->ack_seq - MPTCP_SKB_CB(skb)->map_seq; in __mptcp_ofo_queue()
763 MPTCP_SKB_CB(skb)->map_seq, msk->ack_seq, in __mptcp_ofo_queue()
765 MPTCP_SKB_CB(skb)->offset += delta; in __mptcp_ofo_queue()
766 MPTCP_SKB_CB(skb)->map_seq += delta; in __mptcp_ofo_queue()
767 __skb_queue_tail(&sk->sk_receive_queue, skb); in __mptcp_ofo_queue()
769 msk->bytes_received += end_seq - msk->ack_seq; in __mptcp_ofo_queue()
770 WRITE_ONCE(msk->ack_seq, end_seq); in __mptcp_ofo_queue()
784 /* only propagate errors on fallen-back sockets or in __mptcp_subflow_error_report()
787 if (sk->sk_state != TCP_SYN_SENT && !__mptcp_check_fallback(mptcp_sk(sk))) in __mptcp_subflow_error_report()
798 WRITE_ONCE(sk->sk_err, -err); in __mptcp_subflow_error_report()
826 if (unlikely(ssk->sk_err)) in move_skbs_to_msk()
843 /* Wake-up the reader only for in-sequence data */ in __mptcp_data_ready()
845 sk->sk_data_ready(sk); in __mptcp_data_ready()
856 if (unlikely(subflow->disposable)) in mptcp_data_ready()
863 __set_bit(MPTCP_DEQUEUE, &mptcp_sk(sk)->cb_flags); in mptcp_data_ready()
869 mptcp_subflow_ctx(ssk)->map_seq = READ_ONCE(msk->ack_seq); in mptcp_subflow_joined()
870 msk->allow_infinite_fallback = false; in mptcp_subflow_joined()
878 if (sk->sk_state != TCP_ESTABLISHED) in __mptcp_finish_join()
881 spin_lock_bh(&msk->fallback_lock); in __mptcp_finish_join()
882 if (!msk->allow_subflows) { in __mptcp_finish_join()
883 spin_unlock_bh(&msk->fallback_lock); in __mptcp_finish_join()
887 spin_unlock_bh(&msk->fallback_lock); in __mptcp_finish_join()
892 if (sk->sk_socket && !ssk->sk_socket) in __mptcp_finish_join()
893 mptcp_sock_graft(ssk, sk->sk_socket); in __mptcp_finish_join()
895 mptcp_subflow_ctx(ssk)->subflow_id = msk->subflow_id++; in __mptcp_finish_join()
911 list_move_tail(&subflow->node, &msk->conn_list); in __mptcp_flush_join_list()
920 return timer_pending(&inet_csk(sk)->icsk_retransmit_timer); in mptcp_rtx_timer_pending()
932 tout = mptcp_sk(sk)->timer_ival; in mptcp_reset_rtx_timer()
933 sk_reset_timer(sk, &icsk->icsk_retransmit_timer, jiffies + tout); in mptcp_reset_rtx_timer()
939 schedule_work(&mptcp_sk(sk)->work)) { in mptcp_schedule_work()
959 return mpext && mpext->data_seq + mpext->data_len == write_seq && in mptcp_skb_can_collapse_to()
960 !mpext->frozen; in mptcp_skb_can_collapse_to()
964 * - there is space available in the backing page_frag
965 * - the data frag tail matches the current page_frag free offset
966 * - the data frag end sequence number matches the current write seq
972 return df && pfrag->page == df->page && in mptcp_frag_can_collapse_to()
973 pfrag->size - pfrag->offset > 0 && in mptcp_frag_can_collapse_to()
974 pfrag->offset == (df->offset + df->data_len) && in mptcp_frag_can_collapse_to()
975 df->data_seq + df->data_len == msk->write_seq; in mptcp_frag_can_collapse_to()
981 sk_wmem_queued_add(sk, -len); in dfrag_uncharge()
986 int len = dfrag->data_len + dfrag->overhead; in dfrag_clear()
988 list_del(&dfrag->list); in dfrag_clear()
990 put_page(dfrag->page); in dfrag_clear()
1000 snd_una = msk->snd_una; in __mptcp_clean_una()
1001 list_for_each_entry_safe(dfrag, dtmp, &msk->rtx_queue, list) { in __mptcp_clean_una()
1002 if (after64(dfrag->data_seq + dfrag->data_len, snd_una)) in __mptcp_clean_una()
1005 if (unlikely(dfrag == msk->first_pending)) { in __mptcp_clean_una()
1007 if (WARN_ON_ONCE(!msk->recovery)) in __mptcp_clean_una()
1010 msk->first_pending = mptcp_send_next(sk); in __mptcp_clean_una()
1017 if (dfrag && after64(snd_una, dfrag->data_seq)) { in __mptcp_clean_una()
1018 u64 delta = snd_una - dfrag->data_seq; in __mptcp_clean_una()
1021 if (unlikely(delta > dfrag->already_sent)) { in __mptcp_clean_una()
1022 if (WARN_ON_ONCE(!msk->recovery)) in __mptcp_clean_una()
1024 if (WARN_ON_ONCE(delta > dfrag->data_len)) in __mptcp_clean_una()
1026 dfrag->already_sent += delta - dfrag->already_sent; in __mptcp_clean_una()
1029 dfrag->data_seq += delta; in __mptcp_clean_una()
1030 dfrag->offset += delta; in __mptcp_clean_una()
1031 dfrag->data_len -= delta; in __mptcp_clean_una()
1032 dfrag->already_sent -= delta; in __mptcp_clean_una()
1038 if (unlikely(msk->recovery) && after64(msk->snd_una, msk->recovery_snd_nxt)) in __mptcp_clean_una()
1039 msk->recovery = false; in __mptcp_clean_una()
1042 if (snd_una == msk->snd_nxt && snd_una == msk->write_seq) { in __mptcp_clean_una()
1055 lockdep_assert_held_once(&sk->sk_lock.slock); in __mptcp_clean_una_wakeup()
1092 pfrag, sk->sk_allocation))) in mptcp_page_frag_refill()
1106 dfrag = (struct mptcp_data_frag *)(page_to_virt(pfrag->page) + offset); in mptcp_carve_data_frag()
1107 dfrag->data_len = 0; in mptcp_carve_data_frag()
1108 dfrag->data_seq = msk->write_seq; in mptcp_carve_data_frag()
1109 dfrag->overhead = offset - orig_offset + sizeof(struct mptcp_data_frag); in mptcp_carve_data_frag()
1110 dfrag->offset = offset + sizeof(struct mptcp_data_frag); in mptcp_carve_data_frag()
1111 dfrag->already_sent = 0; in mptcp_carve_data_frag()
1112 dfrag->page = pfrag->page; in mptcp_carve_data_frag()
1135 mptcp_snd_wnd = window_end - data_seq; in mptcp_check_allowed_size()
1138 if (unlikely(tcp_sk(ssk)->snd_wnd < mptcp_snd_wnd)) { in mptcp_check_allowed_size()
1139 tcp_sk(ssk)->snd_wnd = min_t(u64, U32_MAX, mptcp_snd_wnd); in mptcp_check_allowed_size()
1164 skb->ip_summed = CHECKSUM_PARTIAL; in __mptcp_do_alloc_tx_skb()
1165 INIT_LIST_HEAD(&skb->tcp_tsorted_anchor); in __mptcp_do_alloc_tx_skb()
1183 if (likely(sk_wmem_schedule(ssk, skb->truesize))) { in __mptcp_alloc_tx_skb()
1194 gfp_t gfp = data_lock_held ? GFP_ATOMIC : sk->sk_allocation; in mptcp_alloc_tx_skb()
1205 __wsum csum = ~csum_unfold(mpext->csum); in mptcp_update_data_checksum()
1206 int offset = skb->len - added; in mptcp_update_data_checksum()
1208 mpext->csum = csum_fold(csum_block_add(csum, skb_checksum(skb, offset, added, 0), offset)); in mptcp_update_data_checksum()
1218 mpext->infinite_map = 1; in mptcp_update_infinite_map()
1219 mpext->data_len = 0; in mptcp_update_infinite_map()
1227 mptcp_subflow_ctx(ssk)->send_infinite_map = 0; in mptcp_update_infinite_map()
1230 #define MPTCP_MAX_GSO_SIZE (GSO_LEGACY_MAX_SIZE - (MAX_TCP_HEADER + 1))
1236 u64 data_seq = dfrag->data_seq + info->sent; in mptcp_sendmsg_frag()
1237 int offset = dfrag->offset + info->sent; in mptcp_sendmsg_frag()
1248 msk, ssk, dfrag->data_seq, dfrag->data_len, info->sent); in mptcp_sendmsg_frag()
1250 if (WARN_ON_ONCE(info->sent > info->limit || in mptcp_sendmsg_frag()
1251 info->limit > dfrag->data_len)) in mptcp_sendmsg_frag()
1255 return -EAGAIN; in mptcp_sendmsg_frag()
1258 if (unlikely(ssk->sk_gso_max_size > MPTCP_MAX_GSO_SIZE)) in mptcp_sendmsg_frag()
1259 ssk->sk_gso_max_size = MPTCP_MAX_GSO_SIZE; in mptcp_sendmsg_frag()
1260 info->mss_now = tcp_send_mss(ssk, &info->size_goal, info->flags); in mptcp_sendmsg_frag()
1261 copy = info->size_goal; in mptcp_sendmsg_frag()
1264 if (skb && copy > skb->len) { in mptcp_sendmsg_frag()
1268 * queue management operation, to avoid breaking the ext <-> in mptcp_sendmsg_frag()
1273 TCP_SKB_CB(skb)->eor = 1; in mptcp_sendmsg_frag()
1278 i = skb_shinfo(skb)->nr_frags; in mptcp_sendmsg_frag()
1279 can_coalesce = skb_can_coalesce(skb, i, dfrag->page, offset); in mptcp_sendmsg_frag()
1285 copy -= skb->len; in mptcp_sendmsg_frag()
1288 skb = mptcp_alloc_tx_skb(sk, ssk, info->data_lock_held); in mptcp_sendmsg_frag()
1290 return -ENOMEM; in mptcp_sendmsg_frag()
1292 i = skb_shinfo(skb)->nr_frags; in mptcp_sendmsg_frag()
1300 u64 snd_una = READ_ONCE(msk->snd_una); in mptcp_sendmsg_frag()
1306 if (snd_una != msk->snd_nxt || skb->len || in mptcp_sendmsg_frag()
1313 data_seq = snd_una - 1; in mptcp_sendmsg_frag()
1317 copy = min_t(size_t, copy, info->limit - info->sent); in mptcp_sendmsg_frag()
1320 return -ENOMEM; in mptcp_sendmsg_frag()
1324 skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy); in mptcp_sendmsg_frag()
1326 get_page(dfrag->page); in mptcp_sendmsg_frag()
1327 skb_fill_page_desc(skb, i, dfrag->page, offset, copy); in mptcp_sendmsg_frag()
1330 skb->len += copy; in mptcp_sendmsg_frag()
1331 skb->data_len += copy; in mptcp_sendmsg_frag()
1332 skb->truesize += copy; in mptcp_sendmsg_frag()
1335 WRITE_ONCE(tcp_sk(ssk)->write_seq, tcp_sk(ssk)->write_seq + copy); in mptcp_sendmsg_frag()
1336 TCP_SKB_CB(skb)->end_seq += copy; in mptcp_sendmsg_frag()
1341 TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_PSH; in mptcp_sendmsg_frag()
1342 mpext->data_len += copy; in mptcp_sendmsg_frag()
1347 mpext->data_seq = data_seq; in mptcp_sendmsg_frag()
1348 mpext->subflow_seq = mptcp_subflow_ctx(ssk)->rel_write_seq; in mptcp_sendmsg_frag()
1349 mpext->data_len = copy; in mptcp_sendmsg_frag()
1350 mpext->use_map = 1; in mptcp_sendmsg_frag()
1351 mpext->dsn64 = 1; in mptcp_sendmsg_frag()
1354 mpext->data_seq, mpext->subflow_seq, mpext->data_len, in mptcp_sendmsg_frag()
1355 mpext->dsn64); in mptcp_sendmsg_frag()
1359 mptcp_subflow_ctx(ssk)->rel_write_seq += copy; in mptcp_sendmsg_frag()
1360 mpext->frozen = 1; in mptcp_sendmsg_frag()
1361 if (READ_ONCE(msk->csum_enabled)) in mptcp_sendmsg_frag()
1367 if (READ_ONCE(msk->csum_enabled)) in mptcp_sendmsg_frag()
1369 if (mptcp_subflow_ctx(ssk)->send_infinite_map) in mptcp_sendmsg_frag()
1372 mptcp_subflow_ctx(ssk)->rel_write_seq += copy; in mptcp_sendmsg_frag()
1376 #define MPTCP_SEND_BURST_SIZE ((1 << 16) - \
1377 sizeof(struct tcphdr) - \
1378 MAX_TCP_OPTION_SPACE - \
1379 sizeof(struct ipv6hdr) - \
1389 if (!subflow->stale) in mptcp_subflow_set_active()
1392 subflow->stale = 0; in mptcp_subflow_set_active()
1398 if (unlikely(subflow->stale)) { in mptcp_subflow_active()
1399 u32 rcv_tstamp = READ_ONCE(tcp_sk(mptcp_subflow_tcp_sock(subflow))->rcv_tstamp); in mptcp_subflow_active()
1401 if (subflow->stale_rcv_tstamp == rcv_tstamp) in mptcp_subflow_active()
1422 u32 pace, burst, wmem; in mptcp_subflow_get_send() local
1431 send_info[i].linger_time = -1; in mptcp_subflow_get_send()
1435 bool backup = subflow->backup || subflow->request_bkup; in mptcp_subflow_get_send()
1444 pace = subflow->avg_pacing_rate; in mptcp_subflow_get_send()
1447 subflow->avg_pacing_rate = READ_ONCE(ssk->sk_pacing_rate); in mptcp_subflow_get_send()
1448 pace = subflow->avg_pacing_rate; in mptcp_subflow_get_send()
1453 linger_time = div_u64((u64)READ_ONCE(ssk->sk_wmem_queued) << 32, pace); in mptcp_subflow_get_send()
1467 * - estimate the faster flow linger time in mptcp_subflow_get_send()
1468 * - use the above to estimate the amount of byte transferred in mptcp_subflow_get_send()
1470 * - check that the amount of queued data is greater than the above, in mptcp_subflow_get_send()
1480 burst = min_t(int, MPTCP_SEND_BURST_SIZE, mptcp_wnd_end(msk) - msk->snd_nxt); in mptcp_subflow_get_send()
1481 wmem = READ_ONCE(ssk->sk_wmem_queued); in mptcp_subflow_get_send()
1482 if (!burst) in mptcp_subflow_get_send()
1486 subflow->avg_pacing_rate = div_u64((u64)subflow->avg_pacing_rate * wmem + in mptcp_subflow_get_send()
1487 READ_ONCE(ssk->sk_pacing_rate) * burst, in mptcp_subflow_get_send()
1488 burst + wmem); in mptcp_subflow_get_send()
1489 msk->snd_burst = burst; in mptcp_subflow_get_send()
1495 tcp_push(ssk, 0, info->mss_now, tcp_sk(ssk)->nonagle, info->size_goal); in mptcp_push_release()
1503 u64 snd_nxt_new = dfrag->data_seq; in mptcp_update_post_push()
1505 dfrag->already_sent += sent; in mptcp_update_post_push()
1507 msk->snd_burst -= sent; in mptcp_update_post_push()
1509 snd_nxt_new += dfrag->already_sent; in mptcp_update_post_push()
1512 * is recovering after a failover. In that event, this re-sends in mptcp_update_post_push()
1516 * the dfrag->data_seq that was sent and the data in mptcp_update_post_push()
1520 if (likely(after64(snd_nxt_new, msk->snd_nxt))) { in mptcp_update_post_push()
1521 msk->bytes_sent += snd_nxt_new - msk->snd_nxt; in mptcp_update_post_push()
1522 WRITE_ONCE(msk->snd_nxt, snd_nxt_new); in mptcp_update_post_push()
1530 mptcp_sk(sk)->cb_flags |= BIT(MPTCP_PUSH_PENDING); in mptcp_check_and_set_pending()
1543 info->sent = dfrag->already_sent; in __subflow_push_pending()
1544 info->limit = dfrag->data_len; in __subflow_push_pending()
1545 len = dfrag->data_len - dfrag->already_sent; in __subflow_push_pending()
1555 info->sent += ret; in __subflow_push_pending()
1557 len -= ret; in __subflow_push_pending()
1561 msk->first_pending = mptcp_send_next(sk); in __subflow_push_pending()
1563 if (msk->snd_burst <= 0 || in __subflow_push_pending()
1575 msk->last_data_sent = tcp_jiffies32; in __subflow_push_pending()
1599 if (READ_ONCE(subflow->scheduled)) { in __mptcp_push_pending()
1622 if (ret != -EAGAIN || in __mptcp_push_pending()
1623 (1 << ssk->sk_state) & in __mptcp_push_pending()
1625 push_count--; in __mptcp_push_pending()
1675 if (READ_ONCE(subflow->scheduled)) { in __mptcp_subflow_push_pending()
1684 if (READ_ONCE(subflow->scheduled)) { in __mptcp_subflow_push_pending()
1700 tcp_push(ssk, 0, info.mss_now, tcp_sk(ssk)->nonagle, in __mptcp_subflow_push_pending()
1705 if (msk->snd_data_fin_enable && in __mptcp_subflow_push_pending()
1706 msk->snd_nxt + 1 == msk->write_seq) in __mptcp_subflow_push_pending()
1716 unsigned int saved_flags = msg->msg_flags; in mptcp_sendmsg_fastopen()
1727 if (msg->msg_flags & MSG_FASTOPEN) { in mptcp_sendmsg_fastopen()
1732 if (!msk->first) in mptcp_sendmsg_fastopen()
1733 return -EINVAL; in mptcp_sendmsg_fastopen()
1735 ssk = msk->first; in mptcp_sendmsg_fastopen()
1738 msg->msg_flags |= MSG_DONTWAIT; in mptcp_sendmsg_fastopen()
1739 msk->fastopening = 1; in mptcp_sendmsg_fastopen()
1741 msk->fastopening = 0; in mptcp_sendmsg_fastopen()
1742 msg->msg_flags = saved_flags; in mptcp_sendmsg_fastopen()
1746 if (ret == -EINPROGRESS && !(msg->msg_flags & MSG_DONTWAIT)) { in mptcp_sendmsg_fastopen()
1747 ret = __inet_stream_connect(sk->sk_socket, msg->msg_name, in mptcp_sendmsg_fastopen()
1748 msg->msg_namelen, msg->msg_flags, 1); in mptcp_sendmsg_fastopen()
1753 if (ret && ret != -EINPROGRESS && ret != -ERESTARTSYS && ret != -EINTR) in mptcp_sendmsg_fastopen()
1755 } else if (ret && ret != -EINPROGRESS) { in mptcp_sendmsg_fastopen()
1762 sk->sk_disconnects++; in mptcp_sendmsg_fastopen()
1763 sk->sk_socket->state = SS_UNCONNECTED; in mptcp_sendmsg_fastopen()
1774 if (sk->sk_route_caps & NETIF_F_NOCACHE_COPY) { in do_copy_data_nocache()
1776 return -EFAULT; in do_copy_data_nocache()
1778 return -EFAULT; in do_copy_data_nocache()
1783 /* open-code sk_stream_memory_free() plus sent limit computation to
1784 * avoid indirect calls in fast-path.
1793 if (sk->sk_wmem_queued >= READ_ONCE(sk->sk_sndbuf)) in mptcp_send_limit()
1800 not_sent = msk->write_seq - msk->snd_nxt; in mptcp_send_limit()
1804 return limit - not_sent; in mptcp_send_limit()
1830 msg->msg_flags &= MSG_MORE | MSG_DONTWAIT | MSG_NOSIGNAL | MSG_FASTOPEN; in mptcp_sendmsg()
1837 msg->msg_flags & MSG_FASTOPEN)) { in mptcp_sendmsg()
1842 if (ret == -EINPROGRESS && copied_syn > 0) in mptcp_sendmsg()
1848 timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT); in mptcp_sendmsg()
1850 if ((1 << sk->sk_state) & ~(TCPF_ESTABLISHED | TCPF_CLOSE_WAIT)) { in mptcp_sendmsg()
1856 ret = -EPIPE; in mptcp_sendmsg()
1857 if (unlikely(sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN))) in mptcp_sendmsg()
1875 * page allocator in mptcp_sendmsg()
1883 dfrag = mptcp_carve_data_frag(msk, pfrag, pfrag->offset); in mptcp_sendmsg()
1884 frag_truesize = dfrag->overhead; in mptcp_sendmsg()
1891 offset = dfrag->offset + dfrag->data_len; in mptcp_sendmsg()
1892 psize = pfrag->size - offset; in mptcp_sendmsg()
1900 ret = do_copy_data_nocache(sk, psize, &msg->msg_iter, in mptcp_sendmsg()
1901 page_address(dfrag->page) + offset); in mptcp_sendmsg()
1906 sk_forward_alloc_add(sk, -total_ts); in mptcp_sendmsg()
1908 dfrag->data_len += psize; in mptcp_sendmsg()
1910 pfrag->offset += frag_truesize; in mptcp_sendmsg()
1911 WRITE_ONCE(msk->write_seq, msk->write_seq + psize); in mptcp_sendmsg()
1918 get_page(dfrag->page); in mptcp_sendmsg()
1919 list_add_tail(&dfrag->list, &msk->rtx_queue); in mptcp_sendmsg()
1920 if (!msk->first_pending) in mptcp_sendmsg()
1921 msk->first_pending = dfrag; in mptcp_sendmsg()
1924 dfrag->data_seq, dfrag->data_len, dfrag->already_sent, in mptcp_sendmsg()
1930 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); in mptcp_sendmsg()
1931 __mptcp_push_pending(sk, msg->msg_flags); in mptcp_sendmsg()
1938 __mptcp_push_pending(sk, msg->msg_flags); in mptcp_sendmsg()
1948 copied = sk_stream_error(sk, msg->msg_flags, ret); in mptcp_sendmsg()
1964 skb_queue_walk_safe(&sk->sk_receive_queue, skb, tmp) { in __mptcp_recvmsg_mskq()
1965 u32 delta, offset = MPTCP_SKB_CB(skb)->offset; in __mptcp_recvmsg_mskq()
1966 u32 data_len = skb->len - offset; in __mptcp_recvmsg_mskq()
1978 delta = copied_total - total_data_len; in __mptcp_recvmsg_mskq()
1980 data_len -= delta; in __mptcp_recvmsg_mskq()
1983 count = min_t(size_t, len - copied, data_len); in __mptcp_recvmsg_mskq()
1993 if (MPTCP_SKB_CB(skb)->has_rxtstamp) { in __mptcp_recvmsg_mskq()
2001 msk->bytes_consumed += count; in __mptcp_recvmsg_mskq()
2003 MPTCP_SKB_CB(skb)->offset += count; in __mptcp_recvmsg_mskq()
2004 MPTCP_SKB_CB(skb)->map_seq += count; in __mptcp_recvmsg_mskq()
2009 skb->destructor = NULL; in __mptcp_recvmsg_mskq()
2010 skb->sk = NULL; in __mptcp_recvmsg_mskq()
2011 atomic_sub(skb->truesize, &sk->sk_rmem_alloc); in __mptcp_recvmsg_mskq()
2012 sk_mem_uncharge(sk, skb->truesize); in __mptcp_recvmsg_mskq()
2013 __skb_unlink(skb, &sk->sk_receive_queue); in __mptcp_recvmsg_mskq()
2042 if (!msk->rcvspace_init) in mptcp_rcv_space_adjust()
2043 mptcp_rcv_space_init(msk, msk->first); in mptcp_rcv_space_adjust()
2045 msk->rcvq_space.copied += copied; in mptcp_rcv_space_adjust()
2048 time = tcp_stamp_us_delta(mstamp, msk->rcvq_space.time); in mptcp_rcv_space_adjust()
2050 rtt_us = msk->rcvq_space.rtt_us; in mptcp_rcv_space_adjust()
2062 sf_rtt_us = READ_ONCE(tp->rcv_rtt_est.rtt_us); in mptcp_rcv_space_adjust()
2063 sf_advmss = READ_ONCE(tp->advmss); in mptcp_rcv_space_adjust()
2067 scaling_ratio = min(tp->scaling_ratio, scaling_ratio); in mptcp_rcv_space_adjust()
2070 msk->rcvq_space.rtt_us = rtt_us; in mptcp_rcv_space_adjust()
2071 msk->scaling_ratio = scaling_ratio; in mptcp_rcv_space_adjust()
2075 if (msk->rcvq_space.copied <= msk->rcvq_space.space) in mptcp_rcv_space_adjust()
2078 if (mptcp_rcvbuf_grow(sk, msk->rcvq_space.copied)) { in mptcp_rcv_space_adjust()
2082 * exceed ssk->sk_rcvbuf). in mptcp_rcv_space_adjust()
2091 if (tcp_sk(ssk)->rcvq_space.space) in mptcp_rcv_space_adjust()
2092 tcp_rcvbuf_grow(ssk, msk->rcvq_space.copied); in mptcp_rcv_space_adjust()
2098 msk->rcvq_space.copied = 0; in mptcp_rcv_space_adjust()
2099 msk->rcvq_space.time = mstamp; in mptcp_rcv_space_adjust()
2108 while (!READ_ONCE(subflow->data_avail)) { in __mptcp_first_ready_from()
2122 if (list_empty(&msk->conn_list)) in __mptcp_move_skbs()
2125 subflow = list_first_entry(&msk->conn_list, in __mptcp_move_skbs()
2135 if (sk_rmem_alloc_get(sk) > sk->sk_rcvbuf) in __mptcp_move_skbs()
2145 if (unlikely(ssk->sk_err)) in __mptcp_move_skbs()
2163 skb = skb_peek(&sk->sk_receive_queue); in mptcp_inq_hint()
2165 u64 hint_val = READ_ONCE(msk->ack_seq) - MPTCP_SKB_CB(skb)->map_seq; in mptcp_inq_hint()
2173 if (sk->sk_state == TCP_CLOSE || (sk->sk_shutdown & RCV_SHUTDOWN)) in mptcp_inq_hint()
2188 /* MSG_ERRQUEUE is really a no-op till we support IP_RECVERR */ in mptcp_recvmsg()
2193 if (unlikely(sk->sk_state == TCP_LISTEN)) { in mptcp_recvmsg()
2194 copied = -ENOTCONN; in mptcp_recvmsg()
2205 if (unlikely(msk->recvmsg_inq)) in mptcp_recvmsg()
2211 bytes_read = __mptcp_recvmsg_mskq(sk, msg, len - copied, flags, in mptcp_recvmsg()
2221 if (skb_queue_empty(&sk->sk_receive_queue) && __mptcp_move_skbs(sk)) in mptcp_recvmsg()
2231 if (sk->sk_err || in mptcp_recvmsg()
2232 sk->sk_state == TCP_CLOSE || in mptcp_recvmsg()
2233 (sk->sk_shutdown & RCV_SHUTDOWN) || in mptcp_recvmsg()
2238 if (sk->sk_err) { in mptcp_recvmsg()
2243 if (sk->sk_shutdown & RCV_SHUTDOWN) in mptcp_recvmsg()
2246 if (sk->sk_state == TCP_CLOSE) { in mptcp_recvmsg()
2247 copied = -ENOTCONN; in mptcp_recvmsg()
2252 copied = -EAGAIN; in mptcp_recvmsg()
2286 msk, skb_queue_empty(&sk->sk_receive_queue), copied); in mptcp_recvmsg()
2296 struct sock *sk = &icsk->icsk_inet.sk; in mptcp_retransmit_timer()
2302 if (!test_and_set_bit(MPTCP_WORK_RTX, &msk->flags)) in mptcp_retransmit_timer()
2306 __set_bit(MPTCP_RETRANSMIT, &msk->cb_flags); in mptcp_retransmit_timer()
2340 min_stale_count = min_t(int, min_stale_count, subflow->stale_count); in mptcp_subflow_get_retrans()
2344 if (subflow->backup || subflow->request_bkup) { in mptcp_subflow_get_retrans()
2371 * keep it simple and re-inject the whole mptcp level rtx queue in __mptcp_retransmit_pending_data()
2381 msk->recovery_snd_nxt = msk->snd_nxt; in __mptcp_retransmit_pending_data()
2382 msk->recovery = true; in __mptcp_retransmit_pending_data()
2385 msk->first_pending = rtx_head; in __mptcp_retransmit_pending_data()
2386 msk->snd_burst = 0; in __mptcp_retransmit_pending_data()
2388 /* be sure to clear the "sent status" on all re-injected fragments */ in __mptcp_retransmit_pending_data()
2389 list_for_each_entry(cur, &msk->rtx_queue, list) { in __mptcp_retransmit_pending_data()
2390 if (!cur->already_sent) in __mptcp_retransmit_pending_data()
2392 cur->already_sent = 0; in __mptcp_retransmit_pending_data()
2410 if (((1 << ssk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN)) || in __mptcp_subflow_disconnect()
2412 /* The MPTCP code never wait on the subflow sockets, TCP-level in __mptcp_subflow_disconnect()
2425 * Outgoing subflows use in-kernel sockets.
2442 if (msk->in_accept_queue && msk->first == ssk && in __mptcp_close_ssk()
2446 mptcp_set_close_tout(sk, tcp_jiffies32 - (mptcp_close_timeout(sk) + 1)); in __mptcp_close_ssk()
2452 dispose_it = msk->free_first || ssk != msk->first; in __mptcp_close_ssk()
2454 list_del(&subflow->node); in __mptcp_close_ssk()
2462 ssk->sk_lingertime = 0; in __mptcp_close_ssk()
2464 subflow->send_fastclose = 1; in __mptcp_close_ssk()
2475 subflow->disposable = 1; in __mptcp_close_ssk()
2481 if (!inet_csk(ssk)->icsk_ulp_ops) { in __mptcp_close_ssk()
2498 if (ssk == msk->first) in __mptcp_close_ssk()
2499 WRITE_ONCE(msk->first, NULL); in __mptcp_close_ssk()
2511 if (list_is_singular(&msk->conn_list) && msk->first && in __mptcp_close_ssk()
2512 inet_sk_state_load(msk->first) == TCP_CLOSE) { in __mptcp_close_ssk()
2513 if (sk->sk_state != TCP_ESTABLISHED || in __mptcp_close_ssk()
2514 msk->in_accept_queue || sock_flag(sk, SOCK_DEAD)) { in __mptcp_close_ssk()
2527 if (subflow->close_event_done) in mptcp_close_ssk()
2530 subflow->close_event_done = true; in mptcp_close_ssk()
2532 if (sk->sk_state == TCP_ESTABLISHED) in mptcp_close_ssk()
2564 /* 'subflow_data_ready' will re-sched once rx queue is empty */ in __mptcp_close_subflow()
2565 if (!skb_queue_empty_lockless(&ssk->sk_receive_queue)) in __mptcp_close_subflow()
2575 if (!inet_csk(sk)->icsk_mtup.probe_timestamp || in mptcp_close_tout_expired()
2576 sk->sk_state == TCP_CLOSE) in mptcp_close_tout_expired()
2580 inet_csk(sk)->icsk_mtup.probe_timestamp + mptcp_close_timeout(sk)); in mptcp_close_tout_expired()
2588 if (likely(!READ_ONCE(msk->rcv_fastclose))) in mptcp_check_fastclose()
2598 if (tcp_sk->sk_state != TCP_CLOSE) { in mptcp_check_fastclose()
2606 switch (sk->sk_state) { in mptcp_check_fastclose()
2608 WRITE_ONCE(sk->sk_err, ECONNREFUSED); in mptcp_check_fastclose()
2611 WRITE_ONCE(sk->sk_err, EPIPE); in mptcp_check_fastclose()
2616 WRITE_ONCE(sk->sk_err, ECONNRESET); in mptcp_check_fastclose()
2620 WRITE_ONCE(sk->sk_shutdown, SHUTDOWN_MASK); in mptcp_check_fastclose()
2622 set_bit(MPTCP_WORK_CLOSE_SUBFLOW, &msk->flags); in mptcp_check_fastclose()
2628 sk->sk_state_change(sk); in mptcp_check_fastclose()
2651 WRITE_ONCE(icsk->icsk_retransmits, in __mptcp_retrans()
2652 icsk->icsk_retransmits + 1); in __mptcp_retrans()
2669 if (READ_ONCE(subflow->scheduled)) { in __mptcp_retrans()
2680 info.limit = READ_ONCE(msk->csum_enabled) ? dfrag->data_len : in __mptcp_retrans()
2681 dfrag->already_sent; in __mptcp_retrans()
2687 spin_lock_bh(&msk->fallback_lock); in __mptcp_retrans()
2689 spin_unlock_bh(&msk->fallback_lock); in __mptcp_retrans()
2705 tcp_push(ssk, 0, info.mss_now, tcp_sk(ssk)->nonagle, in __mptcp_retrans()
2707 msk->allow_infinite_fallback = false; in __mptcp_retrans()
2709 spin_unlock_bh(&msk->fallback_lock); in __mptcp_retrans()
2715 msk->bytes_retrans += len; in __mptcp_retrans()
2716 dfrag->already_sent = max(dfrag->already_sent, len); in __mptcp_retrans()
2733 if (!fail_tout && !inet_csk(sk)->icsk_mtup.probe_timestamp) in mptcp_reset_tout_timer()
2736 close_timeout = (unsigned long)inet_csk(sk)->icsk_mtup.probe_timestamp - in mptcp_reset_tout_timer()
2742 timeout = inet_csk(sk)->icsk_mtup.probe_timestamp ? close_timeout : fail_tout; in mptcp_reset_tout_timer()
2744 sk_reset_timer(sk, &sk->sk_timer, timeout); in mptcp_reset_tout_timer()
2749 struct sock *ssk = msk->first; in mptcp_mp_fail_no_response()
2759 WRITE_ONCE(mptcp_subflow_ctx(ssk)->fail_tout, 0); in mptcp_mp_fail_no_response()
2782 state = sk->sk_state; in mptcp_worker()
2794 if (test_and_clear_bit(MPTCP_WORK_CLOSE_SUBFLOW, &msk->flags)) in mptcp_worker()
2802 if (sock_flag(sk, SOCK_DEAD) && sk->sk_state == TCP_CLOSE) { in mptcp_worker()
2807 if (test_and_clear_bit(MPTCP_WORK_RTX, &msk->flags)) in mptcp_worker()
2810 fail_tout = msk->first ? READ_ONCE(mptcp_subflow_ctx(msk->first)->fail_tout) : 0; in mptcp_worker()
2823 INIT_LIST_HEAD(&msk->conn_list); in __mptcp_init_sock()
2824 INIT_LIST_HEAD(&msk->join_list); in __mptcp_init_sock()
2825 INIT_LIST_HEAD(&msk->rtx_queue); in __mptcp_init_sock()
2826 INIT_WORK(&msk->work, mptcp_worker); in __mptcp_init_sock()
2827 msk->out_of_order_queue = RB_ROOT; in __mptcp_init_sock()
2828 msk->first_pending = NULL; in __mptcp_init_sock()
2829 msk->timer_ival = TCP_RTO_MIN; in __mptcp_init_sock()
2830 msk->scaling_ratio = TCP_DEFAULT_SCALING_RATIO; in __mptcp_init_sock()
2832 WRITE_ONCE(msk->first, NULL); in __mptcp_init_sock()
2833 inet_csk(sk)->icsk_sync_mss = mptcp_sync_mss; in __mptcp_init_sock()
2834 WRITE_ONCE(msk->csum_enabled, mptcp_is_checksum_enabled(sock_net(sk))); in __mptcp_init_sock()
2835 msk->allow_infinite_fallback = true; in __mptcp_init_sock()
2836 msk->allow_subflows = true; in __mptcp_init_sock()
2837 msk->recovery = false; in __mptcp_init_sock()
2838 msk->subflow_id = 1; in __mptcp_init_sock()
2839 msk->last_data_sent = tcp_jiffies32; in __mptcp_init_sock()
2840 msk->last_data_recv = tcp_jiffies32; in __mptcp_init_sock()
2841 msk->last_ack_recv = tcp_jiffies32; in __mptcp_init_sock()
2844 spin_lock_init(&msk->fallback_lock); in __mptcp_init_sock()
2846 /* re-use the csk retrans timer for MPTCP-level retrans */ in __mptcp_init_sock()
2847 timer_setup(&msk->sk.icsk_retransmit_timer, mptcp_retransmit_timer, 0); in __mptcp_init_sock()
2848 timer_setup(&sk->sk_timer, mptcp_tout_timer, 0); in __mptcp_init_sock()
2856 strscpy(mptcp_sk(sk)->ca_name, icsk->icsk_ca_ops->name, in mptcp_ca_reset()
2857 sizeof(mptcp_sk(sk)->ca_name)); in mptcp_ca_reset()
2861 icsk->icsk_ca_ops = NULL; in mptcp_ca_reset()
2872 return -ENOPROTOOPT; in mptcp_init_sock()
2874 if (unlikely(!net->mib.mptcp_statistics) && !mptcp_mib_alloc(net)) in mptcp_init_sock()
2875 return -ENOMEM; in mptcp_init_sock()
2884 set_bit(SOCK_CUSTOM_SOCKOPT, &sk->sk_socket->flags); in mptcp_init_sock()
2892 sk->sk_rcvbuf = READ_ONCE(net->ipv4.sysctl_tcp_rmem[1]); in mptcp_init_sock()
2893 sk->sk_sndbuf = READ_ONCE(net->ipv4.sysctl_tcp_wmem[1]); in mptcp_init_sock()
2903 msk->first_pending = NULL; in __mptcp_clear_xmit()
2904 list_for_each_entry_safe(dfrag, dtmp, &msk->rtx_queue, list) in __mptcp_clear_xmit()
2912 if (cancel_work_sync(&msk->work)) in mptcp_cancel_work()
2920 switch (ssk->sk_state) { in mptcp_subflow_shutdown()
2931 ssk->sk_shutdown |= how; in mptcp_subflow_shutdown()
2937 WRITE_ONCE(mptcp_sk(sk)->snd_una, mptcp_sk(sk)->snd_nxt); in mptcp_subflow_shutdown()
2953 int oldstate = sk->sk_state; in mptcp_set_state()
2993 int next = (int)new_state[sk->sk_state]; in mptcp_close_state()
2994 int ns = next & TCP_STATE_MASK; in mptcp_close_state() local
2996 mptcp_set_state(sk, ns); in mptcp_close_state()
3007 msk, msk->snd_data_fin_enable, !!mptcp_send_head(sk), in mptcp_check_send_data_fin()
3008 msk->snd_nxt, msk->write_seq); in mptcp_check_send_data_fin()
3013 if (!msk->snd_data_fin_enable || msk->snd_nxt + 1 != msk->write_seq || in mptcp_check_send_data_fin()
3017 WRITE_ONCE(msk->snd_nxt, msk->write_seq); in mptcp_check_send_data_fin()
3031 msk, msk->snd_data_fin_enable, sk->sk_shutdown, sk->sk_state, in __mptcp_wr_shutdown()
3035 WRITE_ONCE(msk->write_seq, msk->write_seq + 1); in __mptcp_wr_shutdown()
3036 WRITE_ONCE(msk->snd_data_fin_enable, 1); in __mptcp_wr_shutdown()
3050 sk_stop_timer(sk, &sk->sk_timer); in __mptcp_destroy_sock()
3051 msk->pm.status = 0; in __mptcp_destroy_sock()
3054 sk->sk_prot->destroy(sk); in __mptcp_destroy_sock()
3081 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); in mptcp_check_listen_stop()
3082 ssk = mptcp_sk(sk)->first; in mptcp_check_listen_stop()
3101 WRITE_ONCE(sk->sk_shutdown, SHUTDOWN_MASK); in __mptcp_close()
3103 if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE)) { in __mptcp_close()
3127 subflows_alive += ssk->sk_state != TCP_CLOSE; in __mptcp_close()
3132 if (ssk == msk->first) in __mptcp_close()
3133 subflow->fail_tout = 0; in __mptcp_close()
3138 ssk->sk_socket = NULL; in __mptcp_close()
3139 ssk->sk_wq = NULL; in __mptcp_close()
3151 pr_debug("msk=%p state=%d\n", sk, sk->sk_state); in __mptcp_close()
3154 if (sk->sk_state == TCP_CLOSE) { in __mptcp_close()
3184 msk->sk_v6_daddr = ssk->sk_v6_daddr; in mptcp_copy_inaddrs()
3185 msk->sk_v6_rcv_saddr = ssk->sk_v6_rcv_saddr; in mptcp_copy_inaddrs()
3188 msk6->saddr = ssk6->saddr; in mptcp_copy_inaddrs()
3189 msk6->flow_label = ssk6->flow_label; in mptcp_copy_inaddrs()
3193 inet_sk(msk)->inet_num = inet_sk(ssk)->inet_num; in mptcp_copy_inaddrs()
3194 inet_sk(msk)->inet_dport = inet_sk(ssk)->inet_dport; in mptcp_copy_inaddrs()
3195 inet_sk(msk)->inet_sport = inet_sk(ssk)->inet_sport; in mptcp_copy_inaddrs()
3196 inet_sk(msk)->inet_daddr = inet_sk(ssk)->inet_daddr; in mptcp_copy_inaddrs()
3197 inet_sk(msk)->inet_saddr = inet_sk(ssk)->inet_saddr; in mptcp_copy_inaddrs()
3198 inet_sk(msk)->inet_rcv_saddr = inet_sk(ssk)->inet_rcv_saddr; in mptcp_copy_inaddrs()
3207 * msk->firstsocket lock). in mptcp_disconnect()
3209 if (msk->fastopening) in mptcp_disconnect()
3210 return -EBUSY; in mptcp_disconnect()
3220 /* msk->subflow is still intact, the following will not free the first in mptcp_disconnect()
3228 spin_lock_bh(&msk->fallback_lock); in mptcp_disconnect()
3229 msk->allow_subflows = true; in mptcp_disconnect()
3230 msk->allow_infinite_fallback = true; in mptcp_disconnect()
3231 WRITE_ONCE(msk->flags, 0); in mptcp_disconnect()
3232 spin_unlock_bh(&msk->fallback_lock); in mptcp_disconnect()
3234 msk->cb_flags = 0; in mptcp_disconnect()
3235 msk->recovery = false; in mptcp_disconnect()
3236 WRITE_ONCE(msk->can_ack, false); in mptcp_disconnect()
3237 WRITE_ONCE(msk->fully_established, false); in mptcp_disconnect()
3238 WRITE_ONCE(msk->rcv_data_fin, false); in mptcp_disconnect()
3239 WRITE_ONCE(msk->snd_data_fin_enable, false); in mptcp_disconnect()
3240 WRITE_ONCE(msk->rcv_fastclose, false); in mptcp_disconnect()
3241 WRITE_ONCE(msk->use_64bit_ack, false); in mptcp_disconnect()
3242 WRITE_ONCE(msk->csum_enabled, mptcp_is_checksum_enabled(sock_net(sk))); in mptcp_disconnect()
3245 msk->bytes_consumed = 0; in mptcp_disconnect()
3246 msk->bytes_acked = 0; in mptcp_disconnect()
3247 msk->bytes_received = 0; in mptcp_disconnect()
3248 msk->bytes_sent = 0; in mptcp_disconnect()
3249 msk->bytes_retrans = 0; in mptcp_disconnect()
3250 msk->rcvspace_init = 0; in mptcp_disconnect()
3252 WRITE_ONCE(sk->sk_shutdown, 0); in mptcp_disconnect()
3262 return &msk6->np; in mptcp_inet6_sk()
3274 opt = rcu_dereference(np->opt); in mptcp_copy_ip6_options()
3280 RCU_INIT_POINTER(newnp->opt, opt); in mptcp_copy_ip6_options()
3294 inet_opt = rcu_dereference(inet->inet_opt); in mptcp_copy_ip_options()
3297 inet_opt->opt.optlen, GFP_ATOMIC); in mptcp_copy_ip_options()
3301 RCU_INIT_POINTER(newinet->inet_opt, newopt); in mptcp_copy_ip_options()
3319 if (nsk->sk_family == AF_INET6) in mptcp_sk_clone_init()
3320 inet_sk(nsk)->pinet6 = mptcp_inet6_sk(nsk); in mptcp_sk_clone_init()
3326 if (nsk->sk_family == AF_INET6) in mptcp_sk_clone_init()
3333 WRITE_ONCE(msk->local_key, subflow_req->local_key); in mptcp_sk_clone_init()
3334 WRITE_ONCE(msk->token, subflow_req->token); in mptcp_sk_clone_init()
3335 msk->in_accept_queue = 1; in mptcp_sk_clone_init()
3336 WRITE_ONCE(msk->fully_established, false); in mptcp_sk_clone_init()
3337 if (mp_opt->suboptions & OPTION_MPTCP_CSUMREQD) in mptcp_sk_clone_init()
3338 WRITE_ONCE(msk->csum_enabled, true); in mptcp_sk_clone_init()
3340 WRITE_ONCE(msk->write_seq, subflow_req->idsn + 1); in mptcp_sk_clone_init()
3341 WRITE_ONCE(msk->snd_nxt, msk->write_seq); in mptcp_sk_clone_init()
3342 WRITE_ONCE(msk->snd_una, msk->write_seq); in mptcp_sk_clone_init()
3343 WRITE_ONCE(msk->wnd_end, msk->snd_nxt + tcp_sk(ssk)->snd_wnd); in mptcp_sk_clone_init()
3344 msk->setsockopt_seq = mptcp_sk(sk)->setsockopt_seq; in mptcp_sk_clone_init()
3345 mptcp_init_sched(msk, mptcp_sk(sk)->sched); in mptcp_sk_clone_init()
3348 msk->subflow_id = 2; in mptcp_sk_clone_init()
3354 * not yet exposted to user-space in mptcp_sk_clone_init()
3359 WRITE_ONCE(msk->first, ssk); in mptcp_sk_clone_init()
3361 list_add(&subflow->node, &msk->conn_list); in mptcp_sk_clone_init()
3377 if (mp_opt->suboptions & OPTION_MPTCP_MPC_ACK) in mptcp_sk_clone_init()
3389 msk->rcvspace_init = 1; in mptcp_rcv_space_init()
3390 msk->rcvq_space.copied = 0; in mptcp_rcv_space_init()
3391 msk->rcvq_space.rtt_us = 0; in mptcp_rcv_space_init()
3393 msk->rcvq_space.time = tp->tcp_mstamp; in mptcp_rcv_space_init()
3396 msk->rcvq_space.space = min_t(u32, tp->rcv_wnd, in mptcp_rcv_space_init()
3397 TCP_INIT_CWND * tp->advmss); in mptcp_rcv_space_init()
3398 if (msk->rcvq_space.space == 0) in mptcp_rcv_space_init()
3399 msk->rcvq_space.space = TCP_INIT_CWND * TCP_MSS_DEFAULT; in mptcp_rcv_space_init()
3413 __skb_queue_purge(&sk->sk_receive_queue); in mptcp_destroy_common()
3414 skb_rbtree_purge(&msk->out_of_order_queue); in mptcp_destroy_common()
3428 msk->free_first = 1; in mptcp_destroy()
3438 __set_bit(MPTCP_CLEAN_UNA, &mptcp_sk(sk)->cb_flags); in __mptcp_data_acked()
3446 __set_bit(MPTCP_PUSH_PENDING, &mptcp_sk(sk)->cb_flags); in __mptcp_check_push()
3456 __must_hold(&sk->sk_lock.slock) in mptcp_release_cb()
3461 unsigned long flags = (msk->cb_flags & MPTCP_FLAGS_PROCESS_CTX_NEED); in mptcp_release_cb()
3468 list_splice_init(&msk->join_list, &join_list); in mptcp_release_cb()
3477 msk->cb_flags &= ~flags; in mptcp_release_cb()
3478 spin_unlock_bh(&sk->sk_lock.slock); in mptcp_release_cb()
3489 sk->sk_data_ready(sk); in mptcp_release_cb()
3493 spin_lock_bh(&sk->sk_lock.slock); in mptcp_release_cb()
3496 if (__test_and_clear_bit(MPTCP_CLEAN_UNA, &msk->cb_flags)) in mptcp_release_cb()
3498 if (unlikely(msk->cb_flags)) { in mptcp_release_cb()
3503 if (__test_and_clear_bit(MPTCP_SYNC_STATE, &msk->cb_flags) && msk->first) in mptcp_release_cb()
3504 __mptcp_sync_state(sk, msk->pending_state); in mptcp_release_cb()
3505 if (__test_and_clear_bit(MPTCP_ERROR_REPORT, &msk->cb_flags)) in mptcp_release_cb()
3507 if (__test_and_clear_bit(MPTCP_SYNC_SNDBUF, &msk->cb_flags)) in mptcp_release_cb()
3522 if (READ_ONCE(mptcp_subflow_ctx(ssk)->fully_established)) in schedule_3rdack_retransmission()
3526 if (tp->srtt_us) in schedule_3rdack_retransmission()
3527 timeout = usecs_to_jiffies(tp->srtt_us >> (3 - 1)); in schedule_3rdack_retransmission()
3532 WARN_ON_ONCE(icsk->icsk_ack.pending & ICSK_ACK_TIMER); in schedule_3rdack_retransmission()
3533 smp_store_release(&icsk->icsk_ack.pending, in schedule_3rdack_retransmission()
3534 icsk->icsk_ack.pending | ICSK_ACK_SCHED | ICSK_ACK_TIMER); in schedule_3rdack_retransmission()
3535 sk_reset_timer(ssk, &icsk->icsk_delack_timer, timeout); in schedule_3rdack_retransmission()
3541 struct sock *sk = subflow->conn; in mptcp_subflow_process_delegated()
3548 __set_bit(MPTCP_PUSH_PENDING, &mptcp_sk(sk)->cb_flags); in mptcp_subflow_process_delegated()
3556 __set_bit(MPTCP_SYNC_SNDBUF, &mptcp_sk(sk)->cb_flags); in mptcp_subflow_process_delegated()
3581 pr_debug("msk=%p, ssk=%p\n", msk, msk->first); in mptcp_get_port()
3582 if (WARN_ON_ONCE(!msk->first)) in mptcp_get_port()
3583 return -EINVAL; in mptcp_get_port()
3585 return inet_csk_get_port(msk->first, snum); in mptcp_get_port()
3595 sk = subflow->conn; in mptcp_finish_connect()
3598 pr_debug("msk=%p, token=%u\n", sk, subflow->token); in mptcp_finish_connect()
3600 subflow->map_seq = subflow->iasn; in mptcp_finish_connect()
3601 subflow->map_subflow_seq = 1; in mptcp_finish_connect()
3603 /* the socket is not connected yet, no msk/subflow ops can access/race in mptcp_finish_connect()
3606 WRITE_ONCE(msk->local_key, subflow->local_key); in mptcp_finish_connect()
3613 write_lock_bh(&sk->sk_callback_lock); in mptcp_sock_graft()
3614 rcu_assign_pointer(sk->sk_wq, &parent->wq); in mptcp_sock_graft()
3616 write_unlock_bh(&sk->sk_callback_lock); in mptcp_sock_graft()
3622 struct mptcp_sock *msk = mptcp_sk(subflow->conn); in mptcp_finish_join()
3630 subflow->reset_reason = MPTCP_RST_EMPTCP; in mptcp_finish_join()
3635 if (!list_empty(&subflow->node)) { in mptcp_finish_join()
3636 spin_lock_bh(&msk->fallback_lock); in mptcp_finish_join()
3637 if (!msk->allow_subflows) { in mptcp_finish_join()
3638 spin_unlock_bh(&msk->fallback_lock); in mptcp_finish_join()
3642 spin_unlock_bh(&msk->fallback_lock); in mptcp_finish_join()
3660 list_add_tail(&subflow->node, &msk->conn_list); in mptcp_finish_join()
3664 list_add_tail(&subflow->node, &msk->join_list); in mptcp_finish_join()
3665 __set_bit(MPTCP_FLUSH_JOIN_LIST, &msk->cb_flags); in mptcp_finish_join()
3671 subflow->reset_reason = MPTCP_RST_EPROHIBIT; in mptcp_finish_join()
3691 if (sk->sk_state == TCP_LISTEN) in mptcp_ioctl_outq()
3692 return -EINVAL; in mptcp_ioctl_outq()
3694 if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) in mptcp_ioctl_outq()
3697 delta = msk->write_seq - v; in mptcp_ioctl_outq()
3698 if (__mptcp_check_fallback(msk) && msk->first) { in mptcp_ioctl_outq()
3699 struct tcp_sock *tp = tcp_sk(msk->first); in mptcp_ioctl_outq()
3701 /* the first subflow is disconnected after close - see in mptcp_ioctl_outq()
3705 if (!((1 << msk->first->sk_state) & in mptcp_ioctl_outq()
3707 delta += READ_ONCE(tp->write_seq) - tp->snd_una; in mptcp_ioctl_outq()
3722 if (sk->sk_state == TCP_LISTEN) in mptcp_ioctl()
3723 return -EINVAL; in mptcp_ioctl()
3733 *karg = mptcp_ioctl_outq(msk, READ_ONCE(msk->snd_una)); in mptcp_ioctl()
3738 *karg = mptcp_ioctl_outq(msk, msk->snd_nxt); in mptcp_ioctl()
3742 return -ENOIOCTLCMD; in mptcp_ioctl()
3752 int err = -EINVAL; in mptcp_connect()
3765 if (rcu_access_pointer(tcp_sk(ssk)->md5sig_info)) in mptcp_connect()
3768 if (subflow->request_mptcp) { in mptcp_connect()
3777 WRITE_ONCE(msk->write_seq, subflow->idsn); in mptcp_connect()
3778 WRITE_ONCE(msk->snd_nxt, subflow->idsn); in mptcp_connect()
3779 WRITE_ONCE(msk->snd_una, subflow->idsn); in mptcp_connect()
3786 if (!msk->fastopening) in mptcp_connect()
3792 if (ssk->sk_state != TCP_CLOSE) in mptcp_connect()
3796 err = ssk->sk_prot->pre_connect(ssk, uaddr, addr_len); in mptcp_connect()
3801 err = ssk->sk_prot->connect(ssk, uaddr, addr_len); in mptcp_connect()
3808 if (!msk->fastopening) in mptcp_connect()
3860 struct mptcp_sock *msk = mptcp_sk(sock->sk); in mptcp_bind()
3861 struct sock *ssk, *sk = sock->sk; in mptcp_bind()
3862 int err = -EINVAL; in mptcp_bind()
3871 if (sk->sk_family == AF_INET) in mptcp_bind()
3874 else if (sk->sk_family == AF_INET6) in mptcp_bind()
3887 struct mptcp_sock *msk = mptcp_sk(sock->sk); in mptcp_listen()
3888 struct sock *sk = sock->sk; in mptcp_listen()
3896 err = -EINVAL; in mptcp_listen()
3897 if (sock->state != SS_UNCONNECTED || sock->type != SOCK_STREAM) in mptcp_listen()
3915 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); in mptcp_listen()
3928 struct mptcp_sock *msk = mptcp_sk(sock->sk); in mptcp_stream_accept()
3936 ssk = READ_ONCE(msk->first); in mptcp_stream_accept()
3938 return -EINVAL; in mptcp_stream_accept()
3943 return arg->err; in mptcp_stream_accept()
3951 new_mptcp_sock = subflow->conn; in mptcp_stream_accept()
3953 /* is_mptcp should be false if subflow->conn is missing, see in mptcp_stream_accept()
3957 tcp_sk(newsk)->is_mptcp = 0; in mptcp_stream_accept()
3964 newsk->sk_kern_sock = arg->kern; in mptcp_stream_accept()
3968 set_bit(SOCK_CUSTOM_SOCKOPT, &newsock->flags); in mptcp_stream_accept()
3970 msk->in_accept_queue = 0; in mptcp_stream_accept()
3972 /* set ssk->sk_socket of accept()ed flows to mptcp socket. in mptcp_stream_accept()
3978 if (!ssk->sk_socket) in mptcp_stream_accept()
3987 if (unlikely(inet_sk_state_load(msk->first) == TCP_CLOSE)) { in mptcp_stream_accept()
3988 __mptcp_close_ssk(newsk, msk->first, in mptcp_stream_accept()
3989 mptcp_subflow_ctx(msk->first), 0); in mptcp_stream_accept()
3990 if (unlikely(list_is_singular(&msk->conn_list))) in mptcp_stream_accept()
3995 newsk->sk_kern_sock = arg->kern; in mptcp_stream_accept()
3998 /* we are being invoked after accepting a non-mp-capable in mptcp_stream_accept()
4004 WRITE_ONCE(newsock->sk->sk_socket->ops, in mptcp_stream_accept()
4005 mptcp_fallback_tcp_ops(newsock->sk)); in mptcp_stream_accept()
4019 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); in mptcp_check_writeable()
4030 struct sock *sk = sock->sk; in mptcp_poll()
4040 pr_debug("msk=%p state=%d flags=%lx\n", msk, state, msk->flags); in mptcp_poll()
4042 struct sock *ssk = READ_ONCE(msk->first); in mptcp_poll()
4050 shutdown = READ_ONCE(sk->sk_shutdown); in mptcp_poll()
4070 if (READ_ONCE(sk->sk_err)) in mptcp_poll()
4118 mptcp_subflow_process_delegated(ssk, xchg(&subflow->delegated_status, 0)); in mptcp_napi_poll()
4122 * In both case must dequeue the subflow here - on the same in mptcp_napi_poll()
4126 clear_bit(MPTCP_DELEGATE_SCHEDULED, &subflow->delegated_status); in mptcp_napi_poll()
4136 * will not try accessing the NULL napi->dev ptr in mptcp_napi_poll()
4157 INIT_LIST_HEAD(&delegated->head); in mptcp_proto_init()
4158 netif_napi_add_tx(mptcp_napi_dev, &delegated->napi, in mptcp_proto_init()
4160 napi_enable(&delegated->napi); in mptcp_proto_init()