Lines Matching +full:rate +full:- +full:ulp +full:- +full:ms

1 /* SPDX-License-Identifier: GPL-2.0-or-later */
47 #include <linux/bpf-cgroup.h>
62 #define TCP_MIN_GSO_SIZE (TCP_MIN_SND_MSS - MAX_TCP_OPTION_SPACE)
70 /* Minimal accepted MSS. It is (60+60+8) - (20+20). */
85 /* Maximal number of ACKs sent quickly to accelerate slow-start. */
100 * to ~3sec-8min depending on RTO.
107 * 15 is ~13-30min depending on RTO.
126 #define TCP_TIMEWAIT_LEN (60*HZ) /* how long to wait to destroy TIME-WAIT
131 * to combine FIN-WAIT-2 timeout with
132 * TIME-WAIT timer.
178 #define TCP_PAWS_MSL 60 /* Per-host timestamps are invalidated
184 #define TCP_PAWS_WINDOW 1 /* Replay window for per-host
205 * experimental options. See draft-ietf-tcpm-experimental-options-00.txt
234 /* Flags in tp->nonagle */
239 /* TCP thin-stream limits */
276 if (mem_cgroup_sockets_enabled && sk->sk_memcg &&
277 mem_cgroup_under_socket_pressure(sk->sk_memcg))
289 return (__s32)(seq1-seq2) < 0;
296 return seq3 - seq2 >= seq1 - seq2;
301 sk_wmem_queued_add(sk, -skb->truesize);
303 sk_mem_uncharge(sk, skb->truesize);
316 #define TCP_INC_STATS(net, field) SNMP_INC_STATS((net)->mib.tcp_statistics, field)
317 #define __TCP_INC_STATS(net, field) __SNMP_INC_STATS((net)->mib.tcp_statistics, field)
318 #define TCP_DEC_STATS(net, field) SNMP_DEC_STATS((net)->mib.tcp_statistics, field)
319 #define TCP_ADD_STATS(net, field, val) SNMP_ADD_STATS((net)->mib.tcp_statistics, field, val)
361 if (icsk->icsk_ack.quick) {
365 if (pkts >= icsk->icsk_ack.quick) {
366 icsk->icsk_ack.quick = 0;
368 icsk->icsk_ack.ato = TCP_ATO_MIN;
370 icsk->icsk_ack.quick -= pkts;
438 * BPF SKB-less helpers
534 if (sk->sk_reuseport) {
537 reuse = rcu_dereference(sk->sk_reuseport_cb);
539 last_overflow = READ_ONCE(reuse->synq_overflow_ts);
542 WRITE_ONCE(reuse->synq_overflow_ts, now);
547 last_overflow = READ_ONCE(tcp_sk(sk)->rx_opt.ts_recent_stamp);
549 WRITE_ONCE(tcp_sk_rw(sk)->rx_opt.ts_recent_stamp, now);
558 if (sk->sk_reuseport) {
561 reuse = rcu_dereference(sk->sk_reuseport_cb);
563 last_overflow = READ_ONCE(reuse->synq_overflow_ts);
564 return !time_between32(now, last_overflow - HZ,
570 last_overflow = READ_ONCE(tcp_sk(sk)->rx_opt.ts_recent_stamp);
574 * 'last_overflow - HZ' as lower bound. That's because a concurrent
579 return !time_between32(now, last_overflow - HZ,
591 /* Convert one nsec 64bit timestamp to ts (ms or usec resolution) */
609 return READ_ONCE(net->ipv4.sysctl_tcp_ecn) ||
616 return skb->sk;
690 if (hrtimer_try_to_cancel(&tcp_sk(sk)->pacing_timer) == 1)
693 if (hrtimer_try_to_cancel(&tcp_sk(sk)->compressed_ack_timer) == 1)
709 * to sub-MSS pieces for the sake of SWS or making sure there
715 if (tp->max_window > TCP_MSS_DEFAULT)
716 cutoff = (tp->max_window >> 1);
718 cutoff = tp->max_window;
721 return max_t(int, cutoff, 68U - tp->tcp_header_len);
729 /* Read 'sendfile()'-style from a TCP socket */
744 if (inet_csk(sk)->icsk_rto > TCP_RTO_MAX)
745 inet_csk(sk)->icsk_rto = TCP_RTO_MAX;
750 return usecs_to_jiffies((tp->srtt_us >> 3) + tp->rttvar_us);
759 tp->pred_flags = htonl((tp->tcp_header_len << 26) |
766 __tcp_fast_path_on(tp, tp->snd_wnd >> tp->rx_opt.snd_wscale);
773 if (RB_EMPTY_ROOT(&tp->out_of_order_queue) &&
774 tp->rcv_wnd &&
775 atomic_read(&sk->sk_rmem_alloc) < sk->sk_rcvbuf &&
776 !tp->urg_data)
786 u32 rto_min = inet_csk(sk)->icsk_rto_min;
806 return minmax_get(&tp->rtt_min);
815 s32 win = tp->rcv_wup + tp->rcv_wnd - tp->rcv_nxt;
832 * historically has been the same until linux-4.13.
838 * It is no longer tied to jiffies, but to 1 ms clock.
858 /* TCP Timestamp included in TS option (RFC 1323) can either use ms
870 return div_u64(tp->tcp_mstamp, USEC_PER_MSEC);
875 if (tp->tcp_usec_ts)
876 return tp->tcp_mstamp;
884 return max_t(s64, t1 - t0, 0);
890 return div_u64(skb->skb_mstamp_ns, NSEC_PER_USEC);
893 /* Provide skb TSval in usec or ms unit */
899 return div_u64(skb->skb_mstamp_ns, NSEC_PER_MSEC);
904 return tcp_clock_ts(tcptw->tw_sk.tw_usec_ts) + tcptw->tw_ts_offset;
909 return tcp_clock_ts(treq->req_usec_ts) + treq->ts_off;
939 * TCP per-packet control information to the transmission code.
940 * We also store the host-order sequence numbers in here too.
942 * If this grows please adjust skbuff.h:skbuff->cb[xxx] size appropriately.
968 #define TCPCB_DELIVERED_CE_MASK ((1U<<20) - 1)
989 #define TCP_SKB_CB(__skb) ((struct tcp_skb_cb *)&((__skb)->cb[0]))
995 * as TCP moves IP6CB into a different location in skb->cb[]
999 return TCP_SKB_CB(skb)->header.h6.iif;
1004 bool l3_slave = ipv6_l3mdev_skb(TCP_SKB_CB(skb)->header.h6.flags);
1006 return l3_slave ? skb->skb_iif : TCP_SKB_CB(skb)->header.h6.iif;
1013 if (skb && ipv6_l3mdev_skb(TCP_SKB_CB(skb)->header.h6.flags))
1014 return TCP_SKB_CB(skb)->header.h6.iif;
1031 if (skb && ipv4_l3mdev_skb(TCP_SKB_CB(skb)->header.h4.flags))
1032 return TCP_SKB_CB(skb)->header.h4.iif;
1042 return TCP_SKB_CB(skb)->tcp_gso_segs;
1047 TCP_SKB_CB(skb)->tcp_gso_segs = segs;
1052 TCP_SKB_CB(skb)->tcp_gso_segs += segs;
1058 return TCP_SKB_CB(skb)->tcp_gso_size;
1063 return likely(!TCP_SKB_CB(skb)->eor);
1093 /* Information about inbound ACK, passed to cong_ops->in_ack_event() */
1123 /* A rate sample measures the number of (original/retransmitted) data
1125 * The tcp_rate.c code fills in the rate sample, and congestion
1128 * setting cwnd and pacing rate.
1133 u32 prior_delivered; /* tp->delivered at "prior_mstamp" */
1134 u32 prior_delivered_ce;/* tp->delivered_ce at "prior_mstamp" */
1137 long interval_us; /* time for tp->delivered to incr "delivered" */
1140 long rtt_us; /* RTT of last (S)ACKed packet (or -1) */
1174 /* call when packets are delivered to update cwnd and pacing rate,
1242 return icsk->icsk_ca_ops->flags & TCP_CONG_NEEDS_ECN;
1249 if (icsk->icsk_ca_ops->cwnd_event)
1250 icsk->icsk_ca_ops->cwnd_event(sk, event);
1273 * tcp_is_sack - SACK enabled
1274 * tcp_is_reno - No SACK
1278 return likely(tp->rx_opt.sack_ok);
1288 return tp->sacked_out + tp->lost_out;
1297 * tp->packets_out to determine if the send queue is empty or not.
1307 return tp->packets_out - tcp_left_out(tp) + tp->retrans_out;
1314 return tp->snd_cwnd;
1320 tp->snd_cwnd = val;
1325 return tcp_snd_cwnd(tp) < tp->snd_ssthresh;
1330 return tp->snd_ssthresh >= TCP_INFINITE_SSTHRESH;
1336 (1 << inet_csk(sk)->icsk_ca_state);
1339 /* If cwnd > ssthresh, we may raise ssthresh to be half-way to cwnd.
1348 return tp->snd_ssthresh;
1350 return max(tp->snd_ssthresh,
1356 #define tcp_verify_left_out(tp) WARN_ON(tcp_left_out(tp) > tp->packets_out)
1372 return tp->snd_una + tp->snd_wnd;
1386 * usage, and allow application-limited process to probe bw more aggressively.
1392 if (tp->is_cwnd_limited)
1397 return tcp_snd_cwnd(tp) < 2 * tp->max_packets_out;
1410 return smp_load_acquire(&sk->sk_pacing_status) == SK_PACING_NEEDED;
1418 s64 delay = tcp_sk(sk)->tcp_wstamp_ns - tcp_sk(sk)->tcp_clock_cache;
1435 * so make sure the timer we arm now is at least 200ms in the future,
1436 * regardless of current icsk_rto value (as it could be ~2ms)
1440 return max_t(unsigned long, inet_csk(sk)->icsk_rto, TCP_RTO_MIN);
1448 inet_csk(sk)->icsk_backoff);
1456 if (!tcp_sk(sk)->packets_out && !inet_csk(sk)->icsk_pending)
1463 tp->snd_wl1 = seq;
1468 tp->snd_wl1 = seq;
1497 rx_opt->dsack = 0;
1498 rx_opt->num_sacks = 0;
1505 const struct tcp_congestion_ops *ca_ops = inet_csk(sk)->icsk_ca_ops;
1509 if (!READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_slow_start_after_idle) ||
1510 tp->packets_out || ca_ops->cong_control)
1512 delta = tcp_jiffies32 - tp->lsndtime;
1513 if (delta > inet_csk(sk)->icsk_rto)
1532 return __tcp_win_from_space(tcp_sk(sk)->scaling_ratio, space);
1546 return __tcp_space_from_win(tcp_sk(sk)->scaling_ratio, win);
1549 /* Assume a 50% default for skb->len/skb->truesize ratio.
1552 #define TCP_DEFAULT_SCALING_RATIO (1 << (TCP_RMEM_TO_WIN_SCALE - 1))
1556 tcp_sk(sk)->scaling_ratio = TCP_DEFAULT_SCALING_RATIO;
1562 return tcp_win_from_space(sk, READ_ONCE(sk->sk_rcvbuf) -
1563 READ_ONCE(sk->sk_backlog.len) -
1564 atomic_read(&sk->sk_rmem_alloc));
1569 return tcp_win_from_space(sk, READ_ONCE(sk->sk_rcvbuf));
1577 tp->rcv_ssthresh = min(tp->rcv_ssthresh, new_ssthresh);
1579 tp->rcv_ssthresh = max_t(u32, tp->rcv_ssthresh,
1585 __tcp_adjust_rcv_ssthresh(sk, 4U * tcp_sk(sk)->advmss);
1604 rcvbuf = READ_ONCE(sk->sk_rcvbuf);
1605 threshold = rcvbuf - (rcvbuf >> 3);
1607 return atomic_read(&sk->sk_rmem_alloc) > threshold;
1613 int avail = READ_ONCE(tp->rcv_nxt) - READ_ONCE(tp->copied_seq);
1619 (tcp_receive_window(tp) <= inet_csk(sk)->icsk_ack.rcv_mss);
1637 val = READ_ONCE(tp->keepalive_intvl);
1639 return val ? : READ_ONCE(net->ipv4.sysctl_tcp_keepalive_intvl);
1648 val = READ_ONCE(tp->keepalive_time);
1650 return val ? : READ_ONCE(net->ipv4.sysctl_tcp_keepalive_time);
1661 val = READ_ONCE(tp->keepalive_probes);
1663 return val ? : READ_ONCE(net->ipv4.sysctl_tcp_keepalive_probes);
1668 const struct inet_connection_sock *icsk = &tp->inet_conn;
1670 return min_t(u32, tcp_jiffies32 - icsk->icsk_ack.lrcvtime,
1671 tcp_jiffies32 - tp->rcv_tstamp);
1676 int fin_timeout = tcp_sk(sk)->linger2 ? :
1677 READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_fin_timeout);
1678 const int rto = inet_csk(sk)->icsk_rto;
1680 if (fin_timeout < (rto << 2) - (rto >> 1))
1681 fin_timeout = (rto << 2) - (rto >> 1);
1689 if ((s32)(rx_opt->ts_recent - rx_opt->rcv_tsval) <= paws_win)
1692 rx_opt->ts_recent_stamp + TCP_PAWS_WRAP)))
1699 if (!rx_opt->ts_recent)
1715 out-of-sync and half-open connections will not be reset.
1718 via reboots. Linux-2.2 DOES NOT!
1723 rx_opt->ts_recent_stamp + TCP_PAWS_MSL))
1737 TCP_ADD_STATS(net, TCP_MIB_MAXCONN, -1);
1743 tp->lost_skb_hint = NULL;
1749 tp->retransmit_skb_hint = NULL;
1754 /* - key database */
1767 /* - sock block */
1773 /* - pseudo header */
1797 * struct tcp_sigpool - per-CPU pool of ahash_requests
1798 * @scratch: per-CPU temporary area, that can be used between
1801 * @req: pre-allocated ahash request
1816 * tcp_sigpool_start - disable bh and start using tcp_sigpool_ahash
1824 * tcp_sigpool_end - enable bh and stop using tcp_sigpool
1829 /* - functions */
1867 #define tcp_twsk_md5_key(twsk) ((twsk)->tw_md5_key)
1947 ctx = rcu_dereference(inet_csk(sk)->icsk_accept_queue.fastopenq.ctx);
1949 ctx = rcu_dereference(sock_net(sk)->ipv4.tcp_fastopen_ctx);
1957 if (orig->len == TCP_FASTOPEN_COOKIE_SIZE &&
1958 orig->len == foc->len &&
1959 !memcmp(orig->val, foc->val, foc->len))
1967 return ctx->num;
1971 * chronograph-like stats that are mutually exclusive.
1975 TCP_CHRONO_BUSY, /* Actively sending data (non-empty write queue) */
1984 /* This helper is needed, because skb->tcp_tsorted_anchor uses
1985 * the same memory storage than skb->destructor/_skb_refdst
1989 skb->destructor = NULL;
1990 skb->_skb_refdst = 0UL;
1994 unsigned long _save = skb->_skb_refdst; \
1995 skb->_skb_refdst = 0UL;
1998 skb->_skb_refdst = _save; \
2005 return skb_rb_first(&sk->tcp_rtx_queue);
2010 return skb_rb_last(&sk->tcp_rtx_queue);
2015 return skb_peek_tail(&sk->sk_write_queue);
2019 skb_queue_walk_from_safe(&(sk)->sk_write_queue, skb, tmp)
2023 return skb_peek(&sk->sk_write_queue);
2029 return skb_queue_is_last(&sk->sk_write_queue, skb);
2033 * tcp_write_queue_empty - test if any payload (or FIN) is available in write queue
2037 * we must not use "return skb_queue_empty(&sk->sk_write_queue)"
2043 return tp->write_seq == tp->snd_nxt;
2048 return RB_EMPTY_ROOT(&sk->tcp_rtx_queue);
2058 __skb_queue_tail(&sk->sk_write_queue, skb);
2061 if (sk->sk_write_queue.next == skb)
2070 __skb_queue_before(&sk->sk_write_queue, skb, new);
2076 __skb_unlink(skb, &sk->sk_write_queue);
2084 rb_erase(&skb->rbnode, &sk->tcp_rtx_queue);
2089 list_del(&skb->tcp_tsorted_anchor);
2099 TCP_SKB_CB(skb)->eor = 1;
2107 __tcp_push_pending_frames(sk, tcp_current_mss(sk), tp->nonagle);
2117 if (!tp->sacked_out)
2118 return tp->snd_una;
2120 if (tp->highest_sack == NULL)
2121 return tp->snd_nxt;
2123 return TCP_SKB_CB(tp->highest_sack)->seq;
2128 tcp_sk(sk)->highest_sack = skb_rb_next(skb);
2133 return tcp_sk(sk)->highest_sack;
2138 tcp_sk(sk)->highest_sack = tcp_rtx_queue_head(sk);
2147 tcp_sk(sk)->highest_sack = new;
2153 switch (sk->sk_state) {
2155 return inet_twsk(sk)->tw_transparent;
2157 return inet_rsk(inet_reqsk(sk))->no_srccheck;
2163 * increased latency). Used to trigger latency-reducing mechanisms.
2167 return tp->packets_out < 4 && !tcp_in_initial_slowstart(tp);
2220 val = READ_ONCE(tp->notsent_lowat);
2222 return val ?: READ_ONCE(net->ipv4.sysctl_tcp_notsent_lowat);
2237 /* TCP af-specific functions */
2314 return ops->cookie_init_seq(skb, mss);
2353 ao = rcu_dereference_protected(tp->ao_info,
2356 out->ao_key = READ_ONCE(ao->current_key);
2357 out->type = TCP_KEY_AO;
2364 rcu_access_pointer(tp->md5sig_info)) {
2365 out->md5_key = tp->af_specific->md5_lookup(sk, sk);
2366 if (out->md5_key) {
2367 out->type = TCP_KEY_MD5;
2372 out->type = TCP_KEY_NONE;
2378 return key->type == TCP_KEY_MD5;
2385 return key->type == TCP_KEY_AO;
2425 plb->consec_cong_rounds = 0;
2426 plb->pause_until = 0;
2439 tcp_sk(sk)->packets_out, tcp_sk(sk)->sacked_out,
2440 tcp_sk(sk)->lost_out, tcp_sk(sk)->retrans_out,
2441 tcp_sk(sk)->tlp_high_seq, sk->sk_state,
2442 inet_csk(sk)->icsk_ca_state,
2443 tcp_sk(sk)->advmss, tcp_sk(sk)->mss_cache,
2444 inet_csk(sk)->icsk_pmtu_cookie);
2451 u32 rto = inet_csk(sk)->icsk_rto;
2456 return rto_time_stamp_us - tcp_sk(sk)->tcp_mstamp;
2470 const struct ip_options *opt = &TCP_SKB_CB(skb)->header.h4.opt;
2473 if (opt->optlen) {
2474 int opt_size = sizeof(*dopt) + opt->optlen;
2477 if (dopt && __ip_options_echo(net, &dopt->opt, skb, opt)) {
2485 /* locally generated TCP pure ACKs have skb->truesize == 2
2492 return skb->truesize == 2;
2497 skb->truesize = 2;
2505 if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) {
2508 !tp->urg_data ||
2509 before(tp->urg_seq, tp->copied_seq) ||
2510 !before(tp->urg_seq, tp->rcv_nxt)) {
2512 answ = tp->rcv_nxt - tp->copied_seq;
2516 answ--;
2518 answ = tp->urg_seq - tp->copied_seq;
2530 segs_in = max_t(u16, 1, skb_shinfo(skb)->gso_segs);
2535 WRITE_ONCE(tp->segs_in, tp->segs_in + segs_in);
2536 if (skb->len > tcp_hdrlen(skb))
2537 WRITE_ONCE(tp->data_segs_in, tp->data_segs_in + segs_in);
2549 atomic_inc(&((struct sock *)sk)->sk_drops);
2566 /* initialize ulp */
2568 /* update ulp */
2571 /* cleanup ulp */
2576 /* clone ulp */
2593 __MODULE_INFO(alias, alias_tcp_ulp, "tcp-ulp-" name)
2627 skops->skb = skb;
2628 skops->skb_data_end = skb->data + end_offset;
2664 ret = -1;
2686 return -EPERM;
2691 return -EPERM;
2697 return -EPERM;
2751 skb->skb_mstamp_ns += (u64)tp->tcp_tx_delay * NSEC_PER_USEC;
2760 u32 delay = (sk->sk_state == TCP_TIME_WAIT) ?
2761 tcp_twsk(sk)->tw_tx_delay : tcp_sk(sk)->tcp_tx_delay;
2785 *aoh = (struct tcp_ao_hdr *)(ao_tmp - 2);
2801 ao_info = rcu_dereference_check(tcp_sk(sk)->ao_info,
2806 ao_key = tcp_ao_do_lookup(sk, l3index, saddr, family, -1, -1);
2807 if (ao_info->ao_required || ao_key) {
2810 atomic64_inc(&ao_info->counters.ao_required);