109c434b8SThomas Gleixner // SPDX-License-Identifier: GPL-2.0-only 22b0a8c9eSKenneth Klette Jonassen /* 32b0a8c9eSKenneth Klette Jonassen * CAIA Delay-Gradient (CDG) congestion control 42b0a8c9eSKenneth Klette Jonassen * 52b0a8c9eSKenneth Klette Jonassen * This implementation is based on the paper: 62b0a8c9eSKenneth Klette Jonassen * D.A. Hayes and G. Armitage. "Revisiting TCP congestion control using 72b0a8c9eSKenneth Klette Jonassen * delay gradients." In IFIP Networking, pages 328-341. Springer, 2011. 82b0a8c9eSKenneth Klette Jonassen * 92b0a8c9eSKenneth Klette Jonassen * Scavenger traffic (Less-than-Best-Effort) should disable coexistence 102b0a8c9eSKenneth Klette Jonassen * heuristics using parameters use_shadow=0 and use_ineff=0. 112b0a8c9eSKenneth Klette Jonassen * 122b0a8c9eSKenneth Klette Jonassen * Parameters window, backoff_beta, and backoff_factor are crucial for 132b0a8c9eSKenneth Klette Jonassen * throughput and delay. Future work is needed to determine better defaults, 142b0a8c9eSKenneth Klette Jonassen * and to provide guidelines for use in different environments/contexts. 152b0a8c9eSKenneth Klette Jonassen * 162b0a8c9eSKenneth Klette Jonassen * Except for window, knobs are configured via /sys/module/tcp_cdg/parameters/. 172b0a8c9eSKenneth Klette Jonassen * Parameter window is only configurable when loading tcp_cdg as a module. 182b0a8c9eSKenneth Klette Jonassen * 192b0a8c9eSKenneth Klette Jonassen * Notable differences from paper/FreeBSD: 202b0a8c9eSKenneth Klette Jonassen * o Using Hybrid Slow start and Proportional Rate Reduction. 212b0a8c9eSKenneth Klette Jonassen * o Add toggle for shadow window mechanism. Suggested by David Hayes. 222b0a8c9eSKenneth Klette Jonassen * o Add toggle for non-congestion loss tolerance. 232b0a8c9eSKenneth Klette Jonassen * o Scaling parameter G is changed to a backoff factor; 242b0a8c9eSKenneth Klette Jonassen * conversion is given by: backoff_factor = 1000/(G * window). 252b0a8c9eSKenneth Klette Jonassen * o Limit shadow window to 2 * cwnd, or to cwnd when application limited. 262b0a8c9eSKenneth Klette Jonassen * o More accurate e^-x. 272b0a8c9eSKenneth Klette Jonassen */ 282b0a8c9eSKenneth Klette Jonassen #include <linux/kernel.h> 292b0a8c9eSKenneth Klette Jonassen #include <linux/random.h> 302b0a8c9eSKenneth Klette Jonassen #include <linux/module.h> 31e6017571SIngo Molnar #include <linux/sched/clock.h> 32e6017571SIngo Molnar 332b0a8c9eSKenneth Klette Jonassen #include <net/tcp.h> 342b0a8c9eSKenneth Klette Jonassen 352b0a8c9eSKenneth Klette Jonassen #define HYSTART_ACK_TRAIN 1 362b0a8c9eSKenneth Klette Jonassen #define HYSTART_DELAY 2 372b0a8c9eSKenneth Klette Jonassen 382b0a8c9eSKenneth Klette Jonassen static int window __read_mostly = 8; 392b0a8c9eSKenneth Klette Jonassen static unsigned int backoff_beta __read_mostly = 0.7071 * 1024; /* sqrt 0.5 */ 402b0a8c9eSKenneth Klette Jonassen static unsigned int backoff_factor __read_mostly = 42; 412b0a8c9eSKenneth Klette Jonassen static unsigned int hystart_detect __read_mostly = 3; 422b0a8c9eSKenneth Klette Jonassen static unsigned int use_ineff __read_mostly = 5; 432b0a8c9eSKenneth Klette Jonassen static bool use_shadow __read_mostly = true; 442b0a8c9eSKenneth Klette Jonassen static bool use_tolerance __read_mostly; 452b0a8c9eSKenneth Klette Jonassen 462b0a8c9eSKenneth Klette Jonassen module_param(window, int, 0444); 472b0a8c9eSKenneth Klette Jonassen MODULE_PARM_DESC(window, "gradient window size (power of two <= 256)"); 482b0a8c9eSKenneth Klette Jonassen module_param(backoff_beta, uint, 0644); 492b0a8c9eSKenneth Klette Jonassen MODULE_PARM_DESC(backoff_beta, "backoff beta (0-1024)"); 502b0a8c9eSKenneth Klette Jonassen module_param(backoff_factor, uint, 0644); 512b0a8c9eSKenneth Klette Jonassen MODULE_PARM_DESC(backoff_factor, "backoff probability scale factor"); 522b0a8c9eSKenneth Klette Jonassen module_param(hystart_detect, uint, 0644); 532b0a8c9eSKenneth Klette Jonassen MODULE_PARM_DESC(hystart_detect, "use Hybrid Slow start " 542b0a8c9eSKenneth Klette Jonassen "(0: disabled, 1: ACK train, 2: delay threshold, 3: both)"); 552b0a8c9eSKenneth Klette Jonassen module_param(use_ineff, uint, 0644); 562b0a8c9eSKenneth Klette Jonassen MODULE_PARM_DESC(use_ineff, "use ineffectual backoff detection (threshold)"); 572b0a8c9eSKenneth Klette Jonassen module_param(use_shadow, bool, 0644); 582b0a8c9eSKenneth Klette Jonassen MODULE_PARM_DESC(use_shadow, "use shadow window heuristic"); 592b0a8c9eSKenneth Klette Jonassen module_param(use_tolerance, bool, 0644); 602b0a8c9eSKenneth Klette Jonassen MODULE_PARM_DESC(use_tolerance, "use loss tolerance heuristic"); 612b0a8c9eSKenneth Klette Jonassen 62f78e73e2SSoheil Hassas Yeganeh struct cdg_minmax { 632b0a8c9eSKenneth Klette Jonassen union { 642b0a8c9eSKenneth Klette Jonassen struct { 652b0a8c9eSKenneth Klette Jonassen s32 min; 662b0a8c9eSKenneth Klette Jonassen s32 max; 672b0a8c9eSKenneth Klette Jonassen }; 682b0a8c9eSKenneth Klette Jonassen u64 v64; 692b0a8c9eSKenneth Klette Jonassen }; 702b0a8c9eSKenneth Klette Jonassen }; 712b0a8c9eSKenneth Klette Jonassen 722b0a8c9eSKenneth Klette Jonassen enum cdg_state { 732b0a8c9eSKenneth Klette Jonassen CDG_UNKNOWN = 0, 742b0a8c9eSKenneth Klette Jonassen CDG_NONFULL = 1, 752b0a8c9eSKenneth Klette Jonassen CDG_FULL = 2, 762b0a8c9eSKenneth Klette Jonassen CDG_BACKOFF = 3, 772b0a8c9eSKenneth Klette Jonassen }; 782b0a8c9eSKenneth Klette Jonassen 792b0a8c9eSKenneth Klette Jonassen struct cdg { 80f78e73e2SSoheil Hassas Yeganeh struct cdg_minmax rtt; 81f78e73e2SSoheil Hassas Yeganeh struct cdg_minmax rtt_prev; 82f78e73e2SSoheil Hassas Yeganeh struct cdg_minmax *gradients; 83f78e73e2SSoheil Hassas Yeganeh struct cdg_minmax gsum; 842b0a8c9eSKenneth Klette Jonassen bool gfilled; 852b0a8c9eSKenneth Klette Jonassen u8 tail; 862b0a8c9eSKenneth Klette Jonassen u8 state; 872b0a8c9eSKenneth Klette Jonassen u8 delack; 882b0a8c9eSKenneth Klette Jonassen u32 rtt_seq; 892b0a8c9eSKenneth Klette Jonassen u32 shadow_wnd; 902b0a8c9eSKenneth Klette Jonassen u16 backoff_cnt; 912b0a8c9eSKenneth Klette Jonassen u16 sample_cnt; 922b0a8c9eSKenneth Klette Jonassen s32 delay_min; 932b0a8c9eSKenneth Klette Jonassen u32 last_ack; 942b0a8c9eSKenneth Klette Jonassen u32 round_start; 952b0a8c9eSKenneth Klette Jonassen }; 962b0a8c9eSKenneth Klette Jonassen 972b0a8c9eSKenneth Klette Jonassen /** 982b0a8c9eSKenneth Klette Jonassen * nexp_u32 - negative base-e exponential 992b0a8c9eSKenneth Klette Jonassen * @ux: x in units of micro 1002b0a8c9eSKenneth Klette Jonassen * 1012b0a8c9eSKenneth Klette Jonassen * Returns exp(ux * -1e-6) * U32_MAX. 1022b0a8c9eSKenneth Klette Jonassen */ 1032b0a8c9eSKenneth Klette Jonassen static u32 __pure nexp_u32(u32 ux) 1042b0a8c9eSKenneth Klette Jonassen { 1052b0a8c9eSKenneth Klette Jonassen static const u16 v[] = { 1062b0a8c9eSKenneth Klette Jonassen /* exp(-x)*65536-1 for x = 0, 0.000256, 0.000512, ... */ 1072b0a8c9eSKenneth Klette Jonassen 65535, 1082b0a8c9eSKenneth Klette Jonassen 65518, 65501, 65468, 65401, 65267, 65001, 64470, 63422, 1092b0a8c9eSKenneth Klette Jonassen 61378, 57484, 50423, 38795, 22965, 8047, 987, 14, 1102b0a8c9eSKenneth Klette Jonassen }; 1112b0a8c9eSKenneth Klette Jonassen u32 msb = ux >> 8; 1122b0a8c9eSKenneth Klette Jonassen u32 res; 1132b0a8c9eSKenneth Klette Jonassen int i; 1142b0a8c9eSKenneth Klette Jonassen 1152b0a8c9eSKenneth Klette Jonassen /* Cut off when ux >= 2^24 (actual result is <= 222/U32_MAX). */ 1162b0a8c9eSKenneth Klette Jonassen if (msb > U16_MAX) 1172b0a8c9eSKenneth Klette Jonassen return 0; 1182b0a8c9eSKenneth Klette Jonassen 1192b0a8c9eSKenneth Klette Jonassen /* Scale first eight bits linearly: */ 1202b0a8c9eSKenneth Klette Jonassen res = U32_MAX - (ux & 0xff) * (U32_MAX / 1000000); 1212b0a8c9eSKenneth Klette Jonassen 1222b0a8c9eSKenneth Klette Jonassen /* Obtain e^(x + y + ...) by computing e^x * e^y * ...: */ 1232b0a8c9eSKenneth Klette Jonassen for (i = 1; msb; i++, msb >>= 1) { 1242b0a8c9eSKenneth Klette Jonassen u32 y = v[i & -(msb & 1)] + U32_C(1); 1252b0a8c9eSKenneth Klette Jonassen 1262b0a8c9eSKenneth Klette Jonassen res = ((u64)res * y) >> 16; 1272b0a8c9eSKenneth Klette Jonassen } 1282b0a8c9eSKenneth Klette Jonassen 1292b0a8c9eSKenneth Klette Jonassen return res; 1302b0a8c9eSKenneth Klette Jonassen } 1312b0a8c9eSKenneth Klette Jonassen 1322b0a8c9eSKenneth Klette Jonassen /* Based on the HyStart algorithm (by Ha et al.) that is implemented in 1332b0a8c9eSKenneth Klette Jonassen * tcp_cubic. Differences/experimental changes: 1342b0a8c9eSKenneth Klette Jonassen * o Using Hayes' delayed ACK filter. 1352b0a8c9eSKenneth Klette Jonassen * o Using a usec clock for the ACK train. 1362b0a8c9eSKenneth Klette Jonassen * o Reset ACK train when application limited. 1372b0a8c9eSKenneth Klette Jonassen * o Invoked at any cwnd (i.e. also when cwnd < 16). 1382b0a8c9eSKenneth Klette Jonassen * o Invoked only when cwnd < ssthresh (i.e. not when cwnd == ssthresh). 1392b0a8c9eSKenneth Klette Jonassen */ 1402b0a8c9eSKenneth Klette Jonassen static void tcp_cdg_hystart_update(struct sock *sk) 1412b0a8c9eSKenneth Klette Jonassen { 1422b0a8c9eSKenneth Klette Jonassen struct cdg *ca = inet_csk_ca(sk); 1432b0a8c9eSKenneth Klette Jonassen struct tcp_sock *tp = tcp_sk(sk); 1442b0a8c9eSKenneth Klette Jonassen 1452b0a8c9eSKenneth Klette Jonassen ca->delay_min = min_not_zero(ca->delay_min, ca->rtt.min); 1462b0a8c9eSKenneth Klette Jonassen if (ca->delay_min == 0) 1472b0a8c9eSKenneth Klette Jonassen return; 1482b0a8c9eSKenneth Klette Jonassen 1492b0a8c9eSKenneth Klette Jonassen if (hystart_detect & HYSTART_ACK_TRAIN) { 150825e1c52SEric Dumazet u32 now_us = tp->tcp_mstamp; 1512b0a8c9eSKenneth Klette Jonassen 1522b0a8c9eSKenneth Klette Jonassen if (ca->last_ack == 0 || !tcp_is_cwnd_limited(sk)) { 1532b0a8c9eSKenneth Klette Jonassen ca->last_ack = now_us; 1542b0a8c9eSKenneth Klette Jonassen ca->round_start = now_us; 1552b0a8c9eSKenneth Klette Jonassen } else if (before(now_us, ca->last_ack + 3000)) { 1562b0a8c9eSKenneth Klette Jonassen u32 base_owd = max(ca->delay_min / 2U, 125U); 1572b0a8c9eSKenneth Klette Jonassen 1582b0a8c9eSKenneth Klette Jonassen ca->last_ack = now_us; 1592b0a8c9eSKenneth Klette Jonassen if (after(now_us, ca->round_start + base_owd)) { 160c10d9310SEric Dumazet NET_INC_STATS(sock_net(sk), 1612b0a8c9eSKenneth Klette Jonassen LINUX_MIB_TCPHYSTARTTRAINDETECT); 162c10d9310SEric Dumazet NET_ADD_STATS(sock_net(sk), 1632b0a8c9eSKenneth Klette Jonassen LINUX_MIB_TCPHYSTARTTRAINCWND, 16440570375SEric Dumazet tcp_snd_cwnd(tp)); 16540570375SEric Dumazet tp->snd_ssthresh = tcp_snd_cwnd(tp); 1662b0a8c9eSKenneth Klette Jonassen return; 1672b0a8c9eSKenneth Klette Jonassen } 1682b0a8c9eSKenneth Klette Jonassen } 1692b0a8c9eSKenneth Klette Jonassen } 1702b0a8c9eSKenneth Klette Jonassen 1712b0a8c9eSKenneth Klette Jonassen if (hystart_detect & HYSTART_DELAY) { 1722b0a8c9eSKenneth Klette Jonassen if (ca->sample_cnt < 8) { 1732b0a8c9eSKenneth Klette Jonassen ca->sample_cnt++; 1742b0a8c9eSKenneth Klette Jonassen } else { 1752b0a8c9eSKenneth Klette Jonassen s32 thresh = max(ca->delay_min + ca->delay_min / 8U, 1762b0a8c9eSKenneth Klette Jonassen 125U); 1772b0a8c9eSKenneth Klette Jonassen 1782b0a8c9eSKenneth Klette Jonassen if (ca->rtt.min > thresh) { 179c10d9310SEric Dumazet NET_INC_STATS(sock_net(sk), 1802b0a8c9eSKenneth Klette Jonassen LINUX_MIB_TCPHYSTARTDELAYDETECT); 181c10d9310SEric Dumazet NET_ADD_STATS(sock_net(sk), 1822b0a8c9eSKenneth Klette Jonassen LINUX_MIB_TCPHYSTARTDELAYCWND, 18340570375SEric Dumazet tcp_snd_cwnd(tp)); 18440570375SEric Dumazet tp->snd_ssthresh = tcp_snd_cwnd(tp); 1852b0a8c9eSKenneth Klette Jonassen } 1862b0a8c9eSKenneth Klette Jonassen } 1872b0a8c9eSKenneth Klette Jonassen } 1882b0a8c9eSKenneth Klette Jonassen } 1892b0a8c9eSKenneth Klette Jonassen 1902b0a8c9eSKenneth Klette Jonassen static s32 tcp_cdg_grad(struct cdg *ca) 1912b0a8c9eSKenneth Klette Jonassen { 1922b0a8c9eSKenneth Klette Jonassen s32 gmin = ca->rtt.min - ca->rtt_prev.min; 1932b0a8c9eSKenneth Klette Jonassen s32 gmax = ca->rtt.max - ca->rtt_prev.max; 1942b0a8c9eSKenneth Klette Jonassen s32 grad; 1952b0a8c9eSKenneth Klette Jonassen 1962b0a8c9eSKenneth Klette Jonassen if (ca->gradients) { 1972b0a8c9eSKenneth Klette Jonassen ca->gsum.min += gmin - ca->gradients[ca->tail].min; 1982b0a8c9eSKenneth Klette Jonassen ca->gsum.max += gmax - ca->gradients[ca->tail].max; 1992b0a8c9eSKenneth Klette Jonassen ca->gradients[ca->tail].min = gmin; 2002b0a8c9eSKenneth Klette Jonassen ca->gradients[ca->tail].max = gmax; 2012b0a8c9eSKenneth Klette Jonassen ca->tail = (ca->tail + 1) & (window - 1); 2022b0a8c9eSKenneth Klette Jonassen gmin = ca->gsum.min; 2032b0a8c9eSKenneth Klette Jonassen gmax = ca->gsum.max; 2042b0a8c9eSKenneth Klette Jonassen } 2052b0a8c9eSKenneth Klette Jonassen 2062b0a8c9eSKenneth Klette Jonassen /* We keep sums to ignore gradients during cwnd reductions; 2072b0a8c9eSKenneth Klette Jonassen * the paper's smoothed gradients otherwise simplify to: 2082b0a8c9eSKenneth Klette Jonassen * (rtt_latest - rtt_oldest) / window. 2092b0a8c9eSKenneth Klette Jonassen * 2102b0a8c9eSKenneth Klette Jonassen * We also drop division by window here. 2112b0a8c9eSKenneth Klette Jonassen */ 2122b0a8c9eSKenneth Klette Jonassen grad = gmin > 0 ? gmin : gmax; 2132b0a8c9eSKenneth Klette Jonassen 2142b0a8c9eSKenneth Klette Jonassen /* Extrapolate missing values in gradient window: */ 2152b0a8c9eSKenneth Klette Jonassen if (!ca->gfilled) { 2162b0a8c9eSKenneth Klette Jonassen if (!ca->gradients && window > 1) 2172b0a8c9eSKenneth Klette Jonassen grad *= window; /* Memory allocation failed. */ 2182b0a8c9eSKenneth Klette Jonassen else if (ca->tail == 0) 2192b0a8c9eSKenneth Klette Jonassen ca->gfilled = true; 2202b0a8c9eSKenneth Klette Jonassen else 2212b0a8c9eSKenneth Klette Jonassen grad = (grad * window) / (int)ca->tail; 2222b0a8c9eSKenneth Klette Jonassen } 2232b0a8c9eSKenneth Klette Jonassen 2242b0a8c9eSKenneth Klette Jonassen /* Backoff was effectual: */ 2252b0a8c9eSKenneth Klette Jonassen if (gmin <= -32 || gmax <= -32) 2262b0a8c9eSKenneth Klette Jonassen ca->backoff_cnt = 0; 2272b0a8c9eSKenneth Klette Jonassen 2282b0a8c9eSKenneth Klette Jonassen if (use_tolerance) { 2292b0a8c9eSKenneth Klette Jonassen /* Reduce small variations to zero: */ 2302b0a8c9eSKenneth Klette Jonassen gmin = DIV_ROUND_CLOSEST(gmin, 64); 2312b0a8c9eSKenneth Klette Jonassen gmax = DIV_ROUND_CLOSEST(gmax, 64); 2322b0a8c9eSKenneth Klette Jonassen 2332b0a8c9eSKenneth Klette Jonassen if (gmin > 0 && gmax <= 0) 2342b0a8c9eSKenneth Klette Jonassen ca->state = CDG_FULL; 2352b0a8c9eSKenneth Klette Jonassen else if ((gmin > 0 && gmax > 0) || gmax < 0) 2362b0a8c9eSKenneth Klette Jonassen ca->state = CDG_NONFULL; 2372b0a8c9eSKenneth Klette Jonassen } 2382b0a8c9eSKenneth Klette Jonassen return grad; 2392b0a8c9eSKenneth Klette Jonassen } 2402b0a8c9eSKenneth Klette Jonassen 2412b0a8c9eSKenneth Klette Jonassen static bool tcp_cdg_backoff(struct sock *sk, u32 grad) 2422b0a8c9eSKenneth Klette Jonassen { 2432b0a8c9eSKenneth Klette Jonassen struct cdg *ca = inet_csk_ca(sk); 2442b0a8c9eSKenneth Klette Jonassen struct tcp_sock *tp = tcp_sk(sk); 2452b0a8c9eSKenneth Klette Jonassen 2462b0a8c9eSKenneth Klette Jonassen if (prandom_u32() <= nexp_u32(grad * backoff_factor)) 2472b0a8c9eSKenneth Klette Jonassen return false; 2482b0a8c9eSKenneth Klette Jonassen 2492b0a8c9eSKenneth Klette Jonassen if (use_ineff) { 2502b0a8c9eSKenneth Klette Jonassen ca->backoff_cnt++; 2512b0a8c9eSKenneth Klette Jonassen if (ca->backoff_cnt > use_ineff) 2522b0a8c9eSKenneth Klette Jonassen return false; 2532b0a8c9eSKenneth Klette Jonassen } 2542b0a8c9eSKenneth Klette Jonassen 25540570375SEric Dumazet ca->shadow_wnd = max(ca->shadow_wnd, tcp_snd_cwnd(tp)); 2562b0a8c9eSKenneth Klette Jonassen ca->state = CDG_BACKOFF; 2572b0a8c9eSKenneth Klette Jonassen tcp_enter_cwr(sk); 2582b0a8c9eSKenneth Klette Jonassen return true; 2592b0a8c9eSKenneth Klette Jonassen } 2602b0a8c9eSKenneth Klette Jonassen 2612b0a8c9eSKenneth Klette Jonassen /* Not called in CWR or Recovery state. */ 2622b0a8c9eSKenneth Klette Jonassen static void tcp_cdg_cong_avoid(struct sock *sk, u32 ack, u32 acked) 2632b0a8c9eSKenneth Klette Jonassen { 2642b0a8c9eSKenneth Klette Jonassen struct cdg *ca = inet_csk_ca(sk); 2652b0a8c9eSKenneth Klette Jonassen struct tcp_sock *tp = tcp_sk(sk); 2662b0a8c9eSKenneth Klette Jonassen u32 prior_snd_cwnd; 2672b0a8c9eSKenneth Klette Jonassen u32 incr; 2682b0a8c9eSKenneth Klette Jonassen 26976174004SYuchung Cheng if (tcp_in_slow_start(tp) && hystart_detect) 2702b0a8c9eSKenneth Klette Jonassen tcp_cdg_hystart_update(sk); 2712b0a8c9eSKenneth Klette Jonassen 2722b0a8c9eSKenneth Klette Jonassen if (after(ack, ca->rtt_seq) && ca->rtt.v64) { 2732b0a8c9eSKenneth Klette Jonassen s32 grad = 0; 2742b0a8c9eSKenneth Klette Jonassen 2752b0a8c9eSKenneth Klette Jonassen if (ca->rtt_prev.v64) 2762b0a8c9eSKenneth Klette Jonassen grad = tcp_cdg_grad(ca); 2772b0a8c9eSKenneth Klette Jonassen ca->rtt_seq = tp->snd_nxt; 2782b0a8c9eSKenneth Klette Jonassen ca->rtt_prev = ca->rtt; 2792b0a8c9eSKenneth Klette Jonassen ca->rtt.v64 = 0; 2802b0a8c9eSKenneth Klette Jonassen ca->last_ack = 0; 2812b0a8c9eSKenneth Klette Jonassen ca->sample_cnt = 0; 2822b0a8c9eSKenneth Klette Jonassen 2832b0a8c9eSKenneth Klette Jonassen if (grad > 0 && tcp_cdg_backoff(sk, grad)) 2842b0a8c9eSKenneth Klette Jonassen return; 2852b0a8c9eSKenneth Klette Jonassen } 2862b0a8c9eSKenneth Klette Jonassen 2872b0a8c9eSKenneth Klette Jonassen if (!tcp_is_cwnd_limited(sk)) { 28840570375SEric Dumazet ca->shadow_wnd = min(ca->shadow_wnd, tcp_snd_cwnd(tp)); 2892b0a8c9eSKenneth Klette Jonassen return; 2902b0a8c9eSKenneth Klette Jonassen } 2912b0a8c9eSKenneth Klette Jonassen 29240570375SEric Dumazet prior_snd_cwnd = tcp_snd_cwnd(tp); 2932b0a8c9eSKenneth Klette Jonassen tcp_reno_cong_avoid(sk, ack, acked); 2942b0a8c9eSKenneth Klette Jonassen 29540570375SEric Dumazet incr = tcp_snd_cwnd(tp) - prior_snd_cwnd; 2962b0a8c9eSKenneth Klette Jonassen ca->shadow_wnd = max(ca->shadow_wnd, ca->shadow_wnd + incr); 2972b0a8c9eSKenneth Klette Jonassen } 2982b0a8c9eSKenneth Klette Jonassen 299756ee172SLawrence Brakmo static void tcp_cdg_acked(struct sock *sk, const struct ack_sample *sample) 3002b0a8c9eSKenneth Klette Jonassen { 3012b0a8c9eSKenneth Klette Jonassen struct cdg *ca = inet_csk_ca(sk); 3022b0a8c9eSKenneth Klette Jonassen struct tcp_sock *tp = tcp_sk(sk); 3032b0a8c9eSKenneth Klette Jonassen 304756ee172SLawrence Brakmo if (sample->rtt_us <= 0) 3052b0a8c9eSKenneth Klette Jonassen return; 3062b0a8c9eSKenneth Klette Jonassen 3072b0a8c9eSKenneth Klette Jonassen /* A heuristic for filtering delayed ACKs, adapted from: 3082b0a8c9eSKenneth Klette Jonassen * D.A. Hayes. "Timing enhancements to the FreeBSD kernel to support 3092b0a8c9eSKenneth Klette Jonassen * delay and rate based TCP mechanisms." TR 100219A. CAIA, 2010. 3102b0a8c9eSKenneth Klette Jonassen */ 3112b0a8c9eSKenneth Klette Jonassen if (tp->sacked_out == 0) { 312756ee172SLawrence Brakmo if (sample->pkts_acked == 1 && ca->delack) { 3132b0a8c9eSKenneth Klette Jonassen /* A delayed ACK is only used for the minimum if it is 3142b0a8c9eSKenneth Klette Jonassen * provenly lower than an existing non-zero minimum. 3152b0a8c9eSKenneth Klette Jonassen */ 316756ee172SLawrence Brakmo ca->rtt.min = min(ca->rtt.min, sample->rtt_us); 3172b0a8c9eSKenneth Klette Jonassen ca->delack--; 3182b0a8c9eSKenneth Klette Jonassen return; 319756ee172SLawrence Brakmo } else if (sample->pkts_acked > 1 && ca->delack < 5) { 3202b0a8c9eSKenneth Klette Jonassen ca->delack++; 3212b0a8c9eSKenneth Klette Jonassen } 3222b0a8c9eSKenneth Klette Jonassen } 3232b0a8c9eSKenneth Klette Jonassen 324756ee172SLawrence Brakmo ca->rtt.min = min_not_zero(ca->rtt.min, sample->rtt_us); 325756ee172SLawrence Brakmo ca->rtt.max = max(ca->rtt.max, sample->rtt_us); 3262b0a8c9eSKenneth Klette Jonassen } 3272b0a8c9eSKenneth Klette Jonassen 3282b0a8c9eSKenneth Klette Jonassen static u32 tcp_cdg_ssthresh(struct sock *sk) 3292b0a8c9eSKenneth Klette Jonassen { 3302b0a8c9eSKenneth Klette Jonassen struct cdg *ca = inet_csk_ca(sk); 3312b0a8c9eSKenneth Klette Jonassen struct tcp_sock *tp = tcp_sk(sk); 3322b0a8c9eSKenneth Klette Jonassen 3332b0a8c9eSKenneth Klette Jonassen if (ca->state == CDG_BACKOFF) 33440570375SEric Dumazet return max(2U, (tcp_snd_cwnd(tp) * min(1024U, backoff_beta)) >> 10); 3352b0a8c9eSKenneth Klette Jonassen 3362b0a8c9eSKenneth Klette Jonassen if (ca->state == CDG_NONFULL && use_tolerance) 33740570375SEric Dumazet return tcp_snd_cwnd(tp); 3382b0a8c9eSKenneth Klette Jonassen 33940570375SEric Dumazet ca->shadow_wnd = min(ca->shadow_wnd >> 1, tcp_snd_cwnd(tp)); 3402b0a8c9eSKenneth Klette Jonassen if (use_shadow) 34140570375SEric Dumazet return max3(2U, ca->shadow_wnd, tcp_snd_cwnd(tp) >> 1); 34240570375SEric Dumazet return max(2U, tcp_snd_cwnd(tp) >> 1); 3432b0a8c9eSKenneth Klette Jonassen } 3442b0a8c9eSKenneth Klette Jonassen 3452b0a8c9eSKenneth Klette Jonassen static void tcp_cdg_cwnd_event(struct sock *sk, const enum tcp_ca_event ev) 3462b0a8c9eSKenneth Klette Jonassen { 3472b0a8c9eSKenneth Klette Jonassen struct cdg *ca = inet_csk_ca(sk); 3482b0a8c9eSKenneth Klette Jonassen struct tcp_sock *tp = tcp_sk(sk); 349f78e73e2SSoheil Hassas Yeganeh struct cdg_minmax *gradients; 3502b0a8c9eSKenneth Klette Jonassen 3512b0a8c9eSKenneth Klette Jonassen switch (ev) { 3522b0a8c9eSKenneth Klette Jonassen case CA_EVENT_CWND_RESTART: 3532b0a8c9eSKenneth Klette Jonassen gradients = ca->gradients; 3542b0a8c9eSKenneth Klette Jonassen if (gradients) 3552b0a8c9eSKenneth Klette Jonassen memset(gradients, 0, window * sizeof(gradients[0])); 3562b0a8c9eSKenneth Klette Jonassen memset(ca, 0, sizeof(*ca)); 3572b0a8c9eSKenneth Klette Jonassen 3582b0a8c9eSKenneth Klette Jonassen ca->gradients = gradients; 3592b0a8c9eSKenneth Klette Jonassen ca->rtt_seq = tp->snd_nxt; 36040570375SEric Dumazet ca->shadow_wnd = tcp_snd_cwnd(tp); 3612b0a8c9eSKenneth Klette Jonassen break; 3622b0a8c9eSKenneth Klette Jonassen case CA_EVENT_COMPLETE_CWR: 3632b0a8c9eSKenneth Klette Jonassen ca->state = CDG_UNKNOWN; 3642b0a8c9eSKenneth Klette Jonassen ca->rtt_seq = tp->snd_nxt; 3652b0a8c9eSKenneth Klette Jonassen ca->rtt_prev = ca->rtt; 3662b0a8c9eSKenneth Klette Jonassen ca->rtt.v64 = 0; 3672b0a8c9eSKenneth Klette Jonassen break; 3682b0a8c9eSKenneth Klette Jonassen default: 3692b0a8c9eSKenneth Klette Jonassen break; 3702b0a8c9eSKenneth Klette Jonassen } 3712b0a8c9eSKenneth Klette Jonassen } 3722b0a8c9eSKenneth Klette Jonassen 3732b0a8c9eSKenneth Klette Jonassen static void tcp_cdg_init(struct sock *sk) 3742b0a8c9eSKenneth Klette Jonassen { 3752b0a8c9eSKenneth Klette Jonassen struct cdg *ca = inet_csk_ca(sk); 3762b0a8c9eSKenneth Klette Jonassen struct tcp_sock *tp = tcp_sk(sk); 3772b0a8c9eSKenneth Klette Jonassen 378*72e560cbSEric Dumazet ca->gradients = NULL; 3792b0a8c9eSKenneth Klette Jonassen /* We silently fall back to window = 1 if allocation fails. */ 3802b0a8c9eSKenneth Klette Jonassen if (window > 1) 3812b0a8c9eSKenneth Klette Jonassen ca->gradients = kcalloc(window, sizeof(ca->gradients[0]), 3822b0a8c9eSKenneth Klette Jonassen GFP_NOWAIT | __GFP_NOWARN); 3832b0a8c9eSKenneth Klette Jonassen ca->rtt_seq = tp->snd_nxt; 38440570375SEric Dumazet ca->shadow_wnd = tcp_snd_cwnd(tp); 3852b0a8c9eSKenneth Klette Jonassen } 3862b0a8c9eSKenneth Klette Jonassen 3872b0a8c9eSKenneth Klette Jonassen static void tcp_cdg_release(struct sock *sk) 3882b0a8c9eSKenneth Klette Jonassen { 3892b0a8c9eSKenneth Klette Jonassen struct cdg *ca = inet_csk_ca(sk); 3902b0a8c9eSKenneth Klette Jonassen 3912b0a8c9eSKenneth Klette Jonassen kfree(ca->gradients); 392*72e560cbSEric Dumazet ca->gradients = NULL; 3932b0a8c9eSKenneth Klette Jonassen } 3942b0a8c9eSKenneth Klette Jonassen 395d85969f1SColin Ian King static struct tcp_congestion_ops tcp_cdg __read_mostly = { 3962b0a8c9eSKenneth Klette Jonassen .cong_avoid = tcp_cdg_cong_avoid, 3972b0a8c9eSKenneth Klette Jonassen .cwnd_event = tcp_cdg_cwnd_event, 3982b0a8c9eSKenneth Klette Jonassen .pkts_acked = tcp_cdg_acked, 399f1722a1bSYuchung Cheng .undo_cwnd = tcp_reno_undo_cwnd, 4002b0a8c9eSKenneth Klette Jonassen .ssthresh = tcp_cdg_ssthresh, 4012b0a8c9eSKenneth Klette Jonassen .release = tcp_cdg_release, 4022b0a8c9eSKenneth Klette Jonassen .init = tcp_cdg_init, 4032b0a8c9eSKenneth Klette Jonassen .owner = THIS_MODULE, 4042b0a8c9eSKenneth Klette Jonassen .name = "cdg", 4052b0a8c9eSKenneth Klette Jonassen }; 4062b0a8c9eSKenneth Klette Jonassen 4072b0a8c9eSKenneth Klette Jonassen static int __init tcp_cdg_register(void) 4082b0a8c9eSKenneth Klette Jonassen { 4092b0a8c9eSKenneth Klette Jonassen if (backoff_beta > 1024 || window < 1 || window > 256) 4102b0a8c9eSKenneth Klette Jonassen return -ERANGE; 4112b0a8c9eSKenneth Klette Jonassen if (!is_power_of_2(window)) 4122b0a8c9eSKenneth Klette Jonassen return -EINVAL; 4132b0a8c9eSKenneth Klette Jonassen 4142b0a8c9eSKenneth Klette Jonassen BUILD_BUG_ON(sizeof(struct cdg) > ICSK_CA_PRIV_SIZE); 4152b0a8c9eSKenneth Klette Jonassen tcp_register_congestion_control(&tcp_cdg); 4162b0a8c9eSKenneth Klette Jonassen return 0; 4172b0a8c9eSKenneth Klette Jonassen } 4182b0a8c9eSKenneth Klette Jonassen 4192b0a8c9eSKenneth Klette Jonassen static void __exit tcp_cdg_unregister(void) 4202b0a8c9eSKenneth Klette Jonassen { 4212b0a8c9eSKenneth Klette Jonassen tcp_unregister_congestion_control(&tcp_cdg); 4222b0a8c9eSKenneth Klette Jonassen } 4232b0a8c9eSKenneth Klette Jonassen 4242b0a8c9eSKenneth Klette Jonassen module_init(tcp_cdg_register); 4252b0a8c9eSKenneth Klette Jonassen module_exit(tcp_cdg_unregister); 4262b0a8c9eSKenneth Klette Jonassen MODULE_AUTHOR("Kenneth Klette Jonassen"); 4272b0a8c9eSKenneth Klette Jonassen MODULE_LICENSE("GPL"); 4282b0a8c9eSKenneth Klette Jonassen MODULE_DESCRIPTION("TCP CDG"); 429