12b0a8c9eSKenneth Klette Jonassen /* 22b0a8c9eSKenneth Klette Jonassen * CAIA Delay-Gradient (CDG) congestion control 32b0a8c9eSKenneth Klette Jonassen * 42b0a8c9eSKenneth Klette Jonassen * This implementation is based on the paper: 52b0a8c9eSKenneth Klette Jonassen * D.A. Hayes and G. Armitage. "Revisiting TCP congestion control using 62b0a8c9eSKenneth Klette Jonassen * delay gradients." In IFIP Networking, pages 328-341. Springer, 2011. 72b0a8c9eSKenneth Klette Jonassen * 82b0a8c9eSKenneth Klette Jonassen * Scavenger traffic (Less-than-Best-Effort) should disable coexistence 92b0a8c9eSKenneth Klette Jonassen * heuristics using parameters use_shadow=0 and use_ineff=0. 102b0a8c9eSKenneth Klette Jonassen * 112b0a8c9eSKenneth Klette Jonassen * Parameters window, backoff_beta, and backoff_factor are crucial for 122b0a8c9eSKenneth Klette Jonassen * throughput and delay. Future work is needed to determine better defaults, 132b0a8c9eSKenneth Klette Jonassen * and to provide guidelines for use in different environments/contexts. 142b0a8c9eSKenneth Klette Jonassen * 152b0a8c9eSKenneth Klette Jonassen * Except for window, knobs are configured via /sys/module/tcp_cdg/parameters/. 162b0a8c9eSKenneth Klette Jonassen * Parameter window is only configurable when loading tcp_cdg as a module. 172b0a8c9eSKenneth Klette Jonassen * 182b0a8c9eSKenneth Klette Jonassen * Notable differences from paper/FreeBSD: 192b0a8c9eSKenneth Klette Jonassen * o Using Hybrid Slow start and Proportional Rate Reduction. 202b0a8c9eSKenneth Klette Jonassen * o Add toggle for shadow window mechanism. Suggested by David Hayes. 212b0a8c9eSKenneth Klette Jonassen * o Add toggle for non-congestion loss tolerance. 222b0a8c9eSKenneth Klette Jonassen * o Scaling parameter G is changed to a backoff factor; 232b0a8c9eSKenneth Klette Jonassen * conversion is given by: backoff_factor = 1000/(G * window). 242b0a8c9eSKenneth Klette Jonassen * o Limit shadow window to 2 * cwnd, or to cwnd when application limited. 252b0a8c9eSKenneth Klette Jonassen * o More accurate e^-x. 262b0a8c9eSKenneth Klette Jonassen */ 272b0a8c9eSKenneth Klette Jonassen #include <linux/kernel.h> 282b0a8c9eSKenneth Klette Jonassen #include <linux/random.h> 292b0a8c9eSKenneth Klette Jonassen #include <linux/module.h> 30*e6017571SIngo Molnar #include <linux/sched/clock.h> 31*e6017571SIngo Molnar 322b0a8c9eSKenneth Klette Jonassen #include <net/tcp.h> 332b0a8c9eSKenneth Klette Jonassen 342b0a8c9eSKenneth Klette Jonassen #define HYSTART_ACK_TRAIN 1 352b0a8c9eSKenneth Klette Jonassen #define HYSTART_DELAY 2 362b0a8c9eSKenneth Klette Jonassen 372b0a8c9eSKenneth Klette Jonassen static int window __read_mostly = 8; 382b0a8c9eSKenneth Klette Jonassen static unsigned int backoff_beta __read_mostly = 0.7071 * 1024; /* sqrt 0.5 */ 392b0a8c9eSKenneth Klette Jonassen static unsigned int backoff_factor __read_mostly = 42; 402b0a8c9eSKenneth Klette Jonassen static unsigned int hystart_detect __read_mostly = 3; 412b0a8c9eSKenneth Klette Jonassen static unsigned int use_ineff __read_mostly = 5; 422b0a8c9eSKenneth Klette Jonassen static bool use_shadow __read_mostly = true; 432b0a8c9eSKenneth Klette Jonassen static bool use_tolerance __read_mostly; 442b0a8c9eSKenneth Klette Jonassen 452b0a8c9eSKenneth Klette Jonassen module_param(window, int, 0444); 462b0a8c9eSKenneth Klette Jonassen MODULE_PARM_DESC(window, "gradient window size (power of two <= 256)"); 472b0a8c9eSKenneth Klette Jonassen module_param(backoff_beta, uint, 0644); 482b0a8c9eSKenneth Klette Jonassen MODULE_PARM_DESC(backoff_beta, "backoff beta (0-1024)"); 492b0a8c9eSKenneth Klette Jonassen module_param(backoff_factor, uint, 0644); 502b0a8c9eSKenneth Klette Jonassen MODULE_PARM_DESC(backoff_factor, "backoff probability scale factor"); 512b0a8c9eSKenneth Klette Jonassen module_param(hystart_detect, uint, 0644); 522b0a8c9eSKenneth Klette Jonassen MODULE_PARM_DESC(hystart_detect, "use Hybrid Slow start " 532b0a8c9eSKenneth Klette Jonassen "(0: disabled, 1: ACK train, 2: delay threshold, 3: both)"); 542b0a8c9eSKenneth Klette Jonassen module_param(use_ineff, uint, 0644); 552b0a8c9eSKenneth Klette Jonassen MODULE_PARM_DESC(use_ineff, "use ineffectual backoff detection (threshold)"); 562b0a8c9eSKenneth Klette Jonassen module_param(use_shadow, bool, 0644); 572b0a8c9eSKenneth Klette Jonassen MODULE_PARM_DESC(use_shadow, "use shadow window heuristic"); 582b0a8c9eSKenneth Klette Jonassen module_param(use_tolerance, bool, 0644); 592b0a8c9eSKenneth Klette Jonassen MODULE_PARM_DESC(use_tolerance, "use loss tolerance heuristic"); 602b0a8c9eSKenneth Klette Jonassen 61f78e73e2SSoheil Hassas Yeganeh struct cdg_minmax { 622b0a8c9eSKenneth Klette Jonassen union { 632b0a8c9eSKenneth Klette Jonassen struct { 642b0a8c9eSKenneth Klette Jonassen s32 min; 652b0a8c9eSKenneth Klette Jonassen s32 max; 662b0a8c9eSKenneth Klette Jonassen }; 672b0a8c9eSKenneth Klette Jonassen u64 v64; 682b0a8c9eSKenneth Klette Jonassen }; 692b0a8c9eSKenneth Klette Jonassen }; 702b0a8c9eSKenneth Klette Jonassen 712b0a8c9eSKenneth Klette Jonassen enum cdg_state { 722b0a8c9eSKenneth Klette Jonassen CDG_UNKNOWN = 0, 732b0a8c9eSKenneth Klette Jonassen CDG_NONFULL = 1, 742b0a8c9eSKenneth Klette Jonassen CDG_FULL = 2, 752b0a8c9eSKenneth Klette Jonassen CDG_BACKOFF = 3, 762b0a8c9eSKenneth Klette Jonassen }; 772b0a8c9eSKenneth Klette Jonassen 782b0a8c9eSKenneth Klette Jonassen struct cdg { 79f78e73e2SSoheil Hassas Yeganeh struct cdg_minmax rtt; 80f78e73e2SSoheil Hassas Yeganeh struct cdg_minmax rtt_prev; 81f78e73e2SSoheil Hassas Yeganeh struct cdg_minmax *gradients; 82f78e73e2SSoheil Hassas Yeganeh struct cdg_minmax gsum; 832b0a8c9eSKenneth Klette Jonassen bool gfilled; 842b0a8c9eSKenneth Klette Jonassen u8 tail; 852b0a8c9eSKenneth Klette Jonassen u8 state; 862b0a8c9eSKenneth Klette Jonassen u8 delack; 872b0a8c9eSKenneth Klette Jonassen u32 rtt_seq; 882b0a8c9eSKenneth Klette Jonassen u32 undo_cwnd; 892b0a8c9eSKenneth Klette Jonassen u32 shadow_wnd; 902b0a8c9eSKenneth Klette Jonassen u16 backoff_cnt; 912b0a8c9eSKenneth Klette Jonassen u16 sample_cnt; 922b0a8c9eSKenneth Klette Jonassen s32 delay_min; 932b0a8c9eSKenneth Klette Jonassen u32 last_ack; 942b0a8c9eSKenneth Klette Jonassen u32 round_start; 952b0a8c9eSKenneth Klette Jonassen }; 962b0a8c9eSKenneth Klette Jonassen 972b0a8c9eSKenneth Klette Jonassen /** 982b0a8c9eSKenneth Klette Jonassen * nexp_u32 - negative base-e exponential 992b0a8c9eSKenneth Klette Jonassen * @ux: x in units of micro 1002b0a8c9eSKenneth Klette Jonassen * 1012b0a8c9eSKenneth Klette Jonassen * Returns exp(ux * -1e-6) * U32_MAX. 1022b0a8c9eSKenneth Klette Jonassen */ 1032b0a8c9eSKenneth Klette Jonassen static u32 __pure nexp_u32(u32 ux) 1042b0a8c9eSKenneth Klette Jonassen { 1052b0a8c9eSKenneth Klette Jonassen static const u16 v[] = { 1062b0a8c9eSKenneth Klette Jonassen /* exp(-x)*65536-1 for x = 0, 0.000256, 0.000512, ... */ 1072b0a8c9eSKenneth Klette Jonassen 65535, 1082b0a8c9eSKenneth Klette Jonassen 65518, 65501, 65468, 65401, 65267, 65001, 64470, 63422, 1092b0a8c9eSKenneth Klette Jonassen 61378, 57484, 50423, 38795, 22965, 8047, 987, 14, 1102b0a8c9eSKenneth Klette Jonassen }; 1112b0a8c9eSKenneth Klette Jonassen u32 msb = ux >> 8; 1122b0a8c9eSKenneth Klette Jonassen u32 res; 1132b0a8c9eSKenneth Klette Jonassen int i; 1142b0a8c9eSKenneth Klette Jonassen 1152b0a8c9eSKenneth Klette Jonassen /* Cut off when ux >= 2^24 (actual result is <= 222/U32_MAX). */ 1162b0a8c9eSKenneth Klette Jonassen if (msb > U16_MAX) 1172b0a8c9eSKenneth Klette Jonassen return 0; 1182b0a8c9eSKenneth Klette Jonassen 1192b0a8c9eSKenneth Klette Jonassen /* Scale first eight bits linearly: */ 1202b0a8c9eSKenneth Klette Jonassen res = U32_MAX - (ux & 0xff) * (U32_MAX / 1000000); 1212b0a8c9eSKenneth Klette Jonassen 1222b0a8c9eSKenneth Klette Jonassen /* Obtain e^(x + y + ...) by computing e^x * e^y * ...: */ 1232b0a8c9eSKenneth Klette Jonassen for (i = 1; msb; i++, msb >>= 1) { 1242b0a8c9eSKenneth Klette Jonassen u32 y = v[i & -(msb & 1)] + U32_C(1); 1252b0a8c9eSKenneth Klette Jonassen 1262b0a8c9eSKenneth Klette Jonassen res = ((u64)res * y) >> 16; 1272b0a8c9eSKenneth Klette Jonassen } 1282b0a8c9eSKenneth Klette Jonassen 1292b0a8c9eSKenneth Klette Jonassen return res; 1302b0a8c9eSKenneth Klette Jonassen } 1312b0a8c9eSKenneth Klette Jonassen 1322b0a8c9eSKenneth Klette Jonassen /* Based on the HyStart algorithm (by Ha et al.) that is implemented in 1332b0a8c9eSKenneth Klette Jonassen * tcp_cubic. Differences/experimental changes: 1342b0a8c9eSKenneth Klette Jonassen * o Using Hayes' delayed ACK filter. 1352b0a8c9eSKenneth Klette Jonassen * o Using a usec clock for the ACK train. 1362b0a8c9eSKenneth Klette Jonassen * o Reset ACK train when application limited. 1372b0a8c9eSKenneth Klette Jonassen * o Invoked at any cwnd (i.e. also when cwnd < 16). 1382b0a8c9eSKenneth Klette Jonassen * o Invoked only when cwnd < ssthresh (i.e. not when cwnd == ssthresh). 1392b0a8c9eSKenneth Klette Jonassen */ 1402b0a8c9eSKenneth Klette Jonassen static void tcp_cdg_hystart_update(struct sock *sk) 1412b0a8c9eSKenneth Klette Jonassen { 1422b0a8c9eSKenneth Klette Jonassen struct cdg *ca = inet_csk_ca(sk); 1432b0a8c9eSKenneth Klette Jonassen struct tcp_sock *tp = tcp_sk(sk); 1442b0a8c9eSKenneth Klette Jonassen 1452b0a8c9eSKenneth Klette Jonassen ca->delay_min = min_not_zero(ca->delay_min, ca->rtt.min); 1462b0a8c9eSKenneth Klette Jonassen if (ca->delay_min == 0) 1472b0a8c9eSKenneth Klette Jonassen return; 1482b0a8c9eSKenneth Klette Jonassen 1492b0a8c9eSKenneth Klette Jonassen if (hystart_detect & HYSTART_ACK_TRAIN) { 150758f0d4bSKenneth Klette Jonassen u32 now_us = div_u64(local_clock(), NSEC_PER_USEC); 1512b0a8c9eSKenneth Klette Jonassen 1522b0a8c9eSKenneth Klette Jonassen if (ca->last_ack == 0 || !tcp_is_cwnd_limited(sk)) { 1532b0a8c9eSKenneth Klette Jonassen ca->last_ack = now_us; 1542b0a8c9eSKenneth Klette Jonassen ca->round_start = now_us; 1552b0a8c9eSKenneth Klette Jonassen } else if (before(now_us, ca->last_ack + 3000)) { 1562b0a8c9eSKenneth Klette Jonassen u32 base_owd = max(ca->delay_min / 2U, 125U); 1572b0a8c9eSKenneth Klette Jonassen 1582b0a8c9eSKenneth Klette Jonassen ca->last_ack = now_us; 1592b0a8c9eSKenneth Klette Jonassen if (after(now_us, ca->round_start + base_owd)) { 160c10d9310SEric Dumazet NET_INC_STATS(sock_net(sk), 1612b0a8c9eSKenneth Klette Jonassen LINUX_MIB_TCPHYSTARTTRAINDETECT); 162c10d9310SEric Dumazet NET_ADD_STATS(sock_net(sk), 1632b0a8c9eSKenneth Klette Jonassen LINUX_MIB_TCPHYSTARTTRAINCWND, 1642b0a8c9eSKenneth Klette Jonassen tp->snd_cwnd); 1652b0a8c9eSKenneth Klette Jonassen tp->snd_ssthresh = tp->snd_cwnd; 1662b0a8c9eSKenneth Klette Jonassen return; 1672b0a8c9eSKenneth Klette Jonassen } 1682b0a8c9eSKenneth Klette Jonassen } 1692b0a8c9eSKenneth Klette Jonassen } 1702b0a8c9eSKenneth Klette Jonassen 1712b0a8c9eSKenneth Klette Jonassen if (hystart_detect & HYSTART_DELAY) { 1722b0a8c9eSKenneth Klette Jonassen if (ca->sample_cnt < 8) { 1732b0a8c9eSKenneth Klette Jonassen ca->sample_cnt++; 1742b0a8c9eSKenneth Klette Jonassen } else { 1752b0a8c9eSKenneth Klette Jonassen s32 thresh = max(ca->delay_min + ca->delay_min / 8U, 1762b0a8c9eSKenneth Klette Jonassen 125U); 1772b0a8c9eSKenneth Klette Jonassen 1782b0a8c9eSKenneth Klette Jonassen if (ca->rtt.min > thresh) { 179c10d9310SEric Dumazet NET_INC_STATS(sock_net(sk), 1802b0a8c9eSKenneth Klette Jonassen LINUX_MIB_TCPHYSTARTDELAYDETECT); 181c10d9310SEric Dumazet NET_ADD_STATS(sock_net(sk), 1822b0a8c9eSKenneth Klette Jonassen LINUX_MIB_TCPHYSTARTDELAYCWND, 1832b0a8c9eSKenneth Klette Jonassen tp->snd_cwnd); 1842b0a8c9eSKenneth Klette Jonassen tp->snd_ssthresh = tp->snd_cwnd; 1852b0a8c9eSKenneth Klette Jonassen } 1862b0a8c9eSKenneth Klette Jonassen } 1872b0a8c9eSKenneth Klette Jonassen } 1882b0a8c9eSKenneth Klette Jonassen } 1892b0a8c9eSKenneth Klette Jonassen 1902b0a8c9eSKenneth Klette Jonassen static s32 tcp_cdg_grad(struct cdg *ca) 1912b0a8c9eSKenneth Klette Jonassen { 1922b0a8c9eSKenneth Klette Jonassen s32 gmin = ca->rtt.min - ca->rtt_prev.min; 1932b0a8c9eSKenneth Klette Jonassen s32 gmax = ca->rtt.max - ca->rtt_prev.max; 1942b0a8c9eSKenneth Klette Jonassen s32 grad; 1952b0a8c9eSKenneth Klette Jonassen 1962b0a8c9eSKenneth Klette Jonassen if (ca->gradients) { 1972b0a8c9eSKenneth Klette Jonassen ca->gsum.min += gmin - ca->gradients[ca->tail].min; 1982b0a8c9eSKenneth Klette Jonassen ca->gsum.max += gmax - ca->gradients[ca->tail].max; 1992b0a8c9eSKenneth Klette Jonassen ca->gradients[ca->tail].min = gmin; 2002b0a8c9eSKenneth Klette Jonassen ca->gradients[ca->tail].max = gmax; 2012b0a8c9eSKenneth Klette Jonassen ca->tail = (ca->tail + 1) & (window - 1); 2022b0a8c9eSKenneth Klette Jonassen gmin = ca->gsum.min; 2032b0a8c9eSKenneth Klette Jonassen gmax = ca->gsum.max; 2042b0a8c9eSKenneth Klette Jonassen } 2052b0a8c9eSKenneth Klette Jonassen 2062b0a8c9eSKenneth Klette Jonassen /* We keep sums to ignore gradients during cwnd reductions; 2072b0a8c9eSKenneth Klette Jonassen * the paper's smoothed gradients otherwise simplify to: 2082b0a8c9eSKenneth Klette Jonassen * (rtt_latest - rtt_oldest) / window. 2092b0a8c9eSKenneth Klette Jonassen * 2102b0a8c9eSKenneth Klette Jonassen * We also drop division by window here. 2112b0a8c9eSKenneth Klette Jonassen */ 2122b0a8c9eSKenneth Klette Jonassen grad = gmin > 0 ? gmin : gmax; 2132b0a8c9eSKenneth Klette Jonassen 2142b0a8c9eSKenneth Klette Jonassen /* Extrapolate missing values in gradient window: */ 2152b0a8c9eSKenneth Klette Jonassen if (!ca->gfilled) { 2162b0a8c9eSKenneth Klette Jonassen if (!ca->gradients && window > 1) 2172b0a8c9eSKenneth Klette Jonassen grad *= window; /* Memory allocation failed. */ 2182b0a8c9eSKenneth Klette Jonassen else if (ca->tail == 0) 2192b0a8c9eSKenneth Klette Jonassen ca->gfilled = true; 2202b0a8c9eSKenneth Klette Jonassen else 2212b0a8c9eSKenneth Klette Jonassen grad = (grad * window) / (int)ca->tail; 2222b0a8c9eSKenneth Klette Jonassen } 2232b0a8c9eSKenneth Klette Jonassen 2242b0a8c9eSKenneth Klette Jonassen /* Backoff was effectual: */ 2252b0a8c9eSKenneth Klette Jonassen if (gmin <= -32 || gmax <= -32) 2262b0a8c9eSKenneth Klette Jonassen ca->backoff_cnt = 0; 2272b0a8c9eSKenneth Klette Jonassen 2282b0a8c9eSKenneth Klette Jonassen if (use_tolerance) { 2292b0a8c9eSKenneth Klette Jonassen /* Reduce small variations to zero: */ 2302b0a8c9eSKenneth Klette Jonassen gmin = DIV_ROUND_CLOSEST(gmin, 64); 2312b0a8c9eSKenneth Klette Jonassen gmax = DIV_ROUND_CLOSEST(gmax, 64); 2322b0a8c9eSKenneth Klette Jonassen 2332b0a8c9eSKenneth Klette Jonassen if (gmin > 0 && gmax <= 0) 2342b0a8c9eSKenneth Klette Jonassen ca->state = CDG_FULL; 2352b0a8c9eSKenneth Klette Jonassen else if ((gmin > 0 && gmax > 0) || gmax < 0) 2362b0a8c9eSKenneth Klette Jonassen ca->state = CDG_NONFULL; 2372b0a8c9eSKenneth Klette Jonassen } 2382b0a8c9eSKenneth Klette Jonassen return grad; 2392b0a8c9eSKenneth Klette Jonassen } 2402b0a8c9eSKenneth Klette Jonassen 2412b0a8c9eSKenneth Klette Jonassen static bool tcp_cdg_backoff(struct sock *sk, u32 grad) 2422b0a8c9eSKenneth Klette Jonassen { 2432b0a8c9eSKenneth Klette Jonassen struct cdg *ca = inet_csk_ca(sk); 2442b0a8c9eSKenneth Klette Jonassen struct tcp_sock *tp = tcp_sk(sk); 2452b0a8c9eSKenneth Klette Jonassen 2462b0a8c9eSKenneth Klette Jonassen if (prandom_u32() <= nexp_u32(grad * backoff_factor)) 2472b0a8c9eSKenneth Klette Jonassen return false; 2482b0a8c9eSKenneth Klette Jonassen 2492b0a8c9eSKenneth Klette Jonassen if (use_ineff) { 2502b0a8c9eSKenneth Klette Jonassen ca->backoff_cnt++; 2512b0a8c9eSKenneth Klette Jonassen if (ca->backoff_cnt > use_ineff) 2522b0a8c9eSKenneth Klette Jonassen return false; 2532b0a8c9eSKenneth Klette Jonassen } 2542b0a8c9eSKenneth Klette Jonassen 2552b0a8c9eSKenneth Klette Jonassen ca->shadow_wnd = max(ca->shadow_wnd, tp->snd_cwnd); 2562b0a8c9eSKenneth Klette Jonassen ca->state = CDG_BACKOFF; 2572b0a8c9eSKenneth Klette Jonassen tcp_enter_cwr(sk); 2582b0a8c9eSKenneth Klette Jonassen return true; 2592b0a8c9eSKenneth Klette Jonassen } 2602b0a8c9eSKenneth Klette Jonassen 2612b0a8c9eSKenneth Klette Jonassen /* Not called in CWR or Recovery state. */ 2622b0a8c9eSKenneth Klette Jonassen static void tcp_cdg_cong_avoid(struct sock *sk, u32 ack, u32 acked) 2632b0a8c9eSKenneth Klette Jonassen { 2642b0a8c9eSKenneth Klette Jonassen struct cdg *ca = inet_csk_ca(sk); 2652b0a8c9eSKenneth Klette Jonassen struct tcp_sock *tp = tcp_sk(sk); 2662b0a8c9eSKenneth Klette Jonassen u32 prior_snd_cwnd; 2672b0a8c9eSKenneth Klette Jonassen u32 incr; 2682b0a8c9eSKenneth Klette Jonassen 26976174004SYuchung Cheng if (tcp_in_slow_start(tp) && hystart_detect) 2702b0a8c9eSKenneth Klette Jonassen tcp_cdg_hystart_update(sk); 2712b0a8c9eSKenneth Klette Jonassen 2722b0a8c9eSKenneth Klette Jonassen if (after(ack, ca->rtt_seq) && ca->rtt.v64) { 2732b0a8c9eSKenneth Klette Jonassen s32 grad = 0; 2742b0a8c9eSKenneth Klette Jonassen 2752b0a8c9eSKenneth Klette Jonassen if (ca->rtt_prev.v64) 2762b0a8c9eSKenneth Klette Jonassen grad = tcp_cdg_grad(ca); 2772b0a8c9eSKenneth Klette Jonassen ca->rtt_seq = tp->snd_nxt; 2782b0a8c9eSKenneth Klette Jonassen ca->rtt_prev = ca->rtt; 2792b0a8c9eSKenneth Klette Jonassen ca->rtt.v64 = 0; 2802b0a8c9eSKenneth Klette Jonassen ca->last_ack = 0; 2812b0a8c9eSKenneth Klette Jonassen ca->sample_cnt = 0; 2822b0a8c9eSKenneth Klette Jonassen 2832b0a8c9eSKenneth Klette Jonassen if (grad > 0 && tcp_cdg_backoff(sk, grad)) 2842b0a8c9eSKenneth Klette Jonassen return; 2852b0a8c9eSKenneth Klette Jonassen } 2862b0a8c9eSKenneth Klette Jonassen 2872b0a8c9eSKenneth Klette Jonassen if (!tcp_is_cwnd_limited(sk)) { 2882b0a8c9eSKenneth Klette Jonassen ca->shadow_wnd = min(ca->shadow_wnd, tp->snd_cwnd); 2892b0a8c9eSKenneth Klette Jonassen return; 2902b0a8c9eSKenneth Klette Jonassen } 2912b0a8c9eSKenneth Klette Jonassen 2922b0a8c9eSKenneth Klette Jonassen prior_snd_cwnd = tp->snd_cwnd; 2932b0a8c9eSKenneth Klette Jonassen tcp_reno_cong_avoid(sk, ack, acked); 2942b0a8c9eSKenneth Klette Jonassen 2952b0a8c9eSKenneth Klette Jonassen incr = tp->snd_cwnd - prior_snd_cwnd; 2962b0a8c9eSKenneth Klette Jonassen ca->shadow_wnd = max(ca->shadow_wnd, ca->shadow_wnd + incr); 2972b0a8c9eSKenneth Klette Jonassen } 2982b0a8c9eSKenneth Klette Jonassen 299756ee172SLawrence Brakmo static void tcp_cdg_acked(struct sock *sk, const struct ack_sample *sample) 3002b0a8c9eSKenneth Klette Jonassen { 3012b0a8c9eSKenneth Klette Jonassen struct cdg *ca = inet_csk_ca(sk); 3022b0a8c9eSKenneth Klette Jonassen struct tcp_sock *tp = tcp_sk(sk); 3032b0a8c9eSKenneth Klette Jonassen 304756ee172SLawrence Brakmo if (sample->rtt_us <= 0) 3052b0a8c9eSKenneth Klette Jonassen return; 3062b0a8c9eSKenneth Klette Jonassen 3072b0a8c9eSKenneth Klette Jonassen /* A heuristic for filtering delayed ACKs, adapted from: 3082b0a8c9eSKenneth Klette Jonassen * D.A. Hayes. "Timing enhancements to the FreeBSD kernel to support 3092b0a8c9eSKenneth Klette Jonassen * delay and rate based TCP mechanisms." TR 100219A. CAIA, 2010. 3102b0a8c9eSKenneth Klette Jonassen */ 3112b0a8c9eSKenneth Klette Jonassen if (tp->sacked_out == 0) { 312756ee172SLawrence Brakmo if (sample->pkts_acked == 1 && ca->delack) { 3132b0a8c9eSKenneth Klette Jonassen /* A delayed ACK is only used for the minimum if it is 3142b0a8c9eSKenneth Klette Jonassen * provenly lower than an existing non-zero minimum. 3152b0a8c9eSKenneth Klette Jonassen */ 316756ee172SLawrence Brakmo ca->rtt.min = min(ca->rtt.min, sample->rtt_us); 3172b0a8c9eSKenneth Klette Jonassen ca->delack--; 3182b0a8c9eSKenneth Klette Jonassen return; 319756ee172SLawrence Brakmo } else if (sample->pkts_acked > 1 && ca->delack < 5) { 3202b0a8c9eSKenneth Klette Jonassen ca->delack++; 3212b0a8c9eSKenneth Klette Jonassen } 3222b0a8c9eSKenneth Klette Jonassen } 3232b0a8c9eSKenneth Klette Jonassen 324756ee172SLawrence Brakmo ca->rtt.min = min_not_zero(ca->rtt.min, sample->rtt_us); 325756ee172SLawrence Brakmo ca->rtt.max = max(ca->rtt.max, sample->rtt_us); 3262b0a8c9eSKenneth Klette Jonassen } 3272b0a8c9eSKenneth Klette Jonassen 3282b0a8c9eSKenneth Klette Jonassen static u32 tcp_cdg_ssthresh(struct sock *sk) 3292b0a8c9eSKenneth Klette Jonassen { 3302b0a8c9eSKenneth Klette Jonassen struct cdg *ca = inet_csk_ca(sk); 3312b0a8c9eSKenneth Klette Jonassen struct tcp_sock *tp = tcp_sk(sk); 3322b0a8c9eSKenneth Klette Jonassen 3332b0a8c9eSKenneth Klette Jonassen ca->undo_cwnd = tp->snd_cwnd; 3342b0a8c9eSKenneth Klette Jonassen 3352b0a8c9eSKenneth Klette Jonassen if (ca->state == CDG_BACKOFF) 3362b0a8c9eSKenneth Klette Jonassen return max(2U, (tp->snd_cwnd * min(1024U, backoff_beta)) >> 10); 3372b0a8c9eSKenneth Klette Jonassen 3382b0a8c9eSKenneth Klette Jonassen if (ca->state == CDG_NONFULL && use_tolerance) 3392b0a8c9eSKenneth Klette Jonassen return tp->snd_cwnd; 3402b0a8c9eSKenneth Klette Jonassen 3412b0a8c9eSKenneth Klette Jonassen ca->shadow_wnd = min(ca->shadow_wnd >> 1, tp->snd_cwnd); 3422b0a8c9eSKenneth Klette Jonassen if (use_shadow) 3432b0a8c9eSKenneth Klette Jonassen return max3(2U, ca->shadow_wnd, tp->snd_cwnd >> 1); 3442b0a8c9eSKenneth Klette Jonassen return max(2U, tp->snd_cwnd >> 1); 3452b0a8c9eSKenneth Klette Jonassen } 3462b0a8c9eSKenneth Klette Jonassen 3472b0a8c9eSKenneth Klette Jonassen static u32 tcp_cdg_undo_cwnd(struct sock *sk) 3482b0a8c9eSKenneth Klette Jonassen { 3492b0a8c9eSKenneth Klette Jonassen struct cdg *ca = inet_csk_ca(sk); 3502b0a8c9eSKenneth Klette Jonassen 3512b0a8c9eSKenneth Klette Jonassen return max(tcp_sk(sk)->snd_cwnd, ca->undo_cwnd); 3522b0a8c9eSKenneth Klette Jonassen } 3532b0a8c9eSKenneth Klette Jonassen 3542b0a8c9eSKenneth Klette Jonassen static void tcp_cdg_cwnd_event(struct sock *sk, const enum tcp_ca_event ev) 3552b0a8c9eSKenneth Klette Jonassen { 3562b0a8c9eSKenneth Klette Jonassen struct cdg *ca = inet_csk_ca(sk); 3572b0a8c9eSKenneth Klette Jonassen struct tcp_sock *tp = tcp_sk(sk); 358f78e73e2SSoheil Hassas Yeganeh struct cdg_minmax *gradients; 3592b0a8c9eSKenneth Klette Jonassen 3602b0a8c9eSKenneth Klette Jonassen switch (ev) { 3612b0a8c9eSKenneth Klette Jonassen case CA_EVENT_CWND_RESTART: 3622b0a8c9eSKenneth Klette Jonassen gradients = ca->gradients; 3632b0a8c9eSKenneth Klette Jonassen if (gradients) 3642b0a8c9eSKenneth Klette Jonassen memset(gradients, 0, window * sizeof(gradients[0])); 3652b0a8c9eSKenneth Klette Jonassen memset(ca, 0, sizeof(*ca)); 3662b0a8c9eSKenneth Klette Jonassen 3672b0a8c9eSKenneth Klette Jonassen ca->gradients = gradients; 3682b0a8c9eSKenneth Klette Jonassen ca->rtt_seq = tp->snd_nxt; 3692b0a8c9eSKenneth Klette Jonassen ca->shadow_wnd = tp->snd_cwnd; 3702b0a8c9eSKenneth Klette Jonassen break; 3712b0a8c9eSKenneth Klette Jonassen case CA_EVENT_COMPLETE_CWR: 3722b0a8c9eSKenneth Klette Jonassen ca->state = CDG_UNKNOWN; 3732b0a8c9eSKenneth Klette Jonassen ca->rtt_seq = tp->snd_nxt; 3742b0a8c9eSKenneth Klette Jonassen ca->rtt_prev = ca->rtt; 3752b0a8c9eSKenneth Klette Jonassen ca->rtt.v64 = 0; 3762b0a8c9eSKenneth Klette Jonassen break; 3772b0a8c9eSKenneth Klette Jonassen default: 3782b0a8c9eSKenneth Klette Jonassen break; 3792b0a8c9eSKenneth Klette Jonassen } 3802b0a8c9eSKenneth Klette Jonassen } 3812b0a8c9eSKenneth Klette Jonassen 3822b0a8c9eSKenneth Klette Jonassen static void tcp_cdg_init(struct sock *sk) 3832b0a8c9eSKenneth Klette Jonassen { 3842b0a8c9eSKenneth Klette Jonassen struct cdg *ca = inet_csk_ca(sk); 3852b0a8c9eSKenneth Klette Jonassen struct tcp_sock *tp = tcp_sk(sk); 3862b0a8c9eSKenneth Klette Jonassen 3872b0a8c9eSKenneth Klette Jonassen /* We silently fall back to window = 1 if allocation fails. */ 3882b0a8c9eSKenneth Klette Jonassen if (window > 1) 3892b0a8c9eSKenneth Klette Jonassen ca->gradients = kcalloc(window, sizeof(ca->gradients[0]), 3902b0a8c9eSKenneth Klette Jonassen GFP_NOWAIT | __GFP_NOWARN); 3912b0a8c9eSKenneth Klette Jonassen ca->rtt_seq = tp->snd_nxt; 3922b0a8c9eSKenneth Klette Jonassen ca->shadow_wnd = tp->snd_cwnd; 3932b0a8c9eSKenneth Klette Jonassen } 3942b0a8c9eSKenneth Klette Jonassen 3952b0a8c9eSKenneth Klette Jonassen static void tcp_cdg_release(struct sock *sk) 3962b0a8c9eSKenneth Klette Jonassen { 3972b0a8c9eSKenneth Klette Jonassen struct cdg *ca = inet_csk_ca(sk); 3982b0a8c9eSKenneth Klette Jonassen 3992b0a8c9eSKenneth Klette Jonassen kfree(ca->gradients); 4002b0a8c9eSKenneth Klette Jonassen } 4012b0a8c9eSKenneth Klette Jonassen 4022b0a8c9eSKenneth Klette Jonassen struct tcp_congestion_ops tcp_cdg __read_mostly = { 4032b0a8c9eSKenneth Klette Jonassen .cong_avoid = tcp_cdg_cong_avoid, 4042b0a8c9eSKenneth Klette Jonassen .cwnd_event = tcp_cdg_cwnd_event, 4052b0a8c9eSKenneth Klette Jonassen .pkts_acked = tcp_cdg_acked, 4062b0a8c9eSKenneth Klette Jonassen .undo_cwnd = tcp_cdg_undo_cwnd, 4072b0a8c9eSKenneth Klette Jonassen .ssthresh = tcp_cdg_ssthresh, 4082b0a8c9eSKenneth Klette Jonassen .release = tcp_cdg_release, 4092b0a8c9eSKenneth Klette Jonassen .init = tcp_cdg_init, 4102b0a8c9eSKenneth Klette Jonassen .owner = THIS_MODULE, 4112b0a8c9eSKenneth Klette Jonassen .name = "cdg", 4122b0a8c9eSKenneth Klette Jonassen }; 4132b0a8c9eSKenneth Klette Jonassen 4142b0a8c9eSKenneth Klette Jonassen static int __init tcp_cdg_register(void) 4152b0a8c9eSKenneth Klette Jonassen { 4162b0a8c9eSKenneth Klette Jonassen if (backoff_beta > 1024 || window < 1 || window > 256) 4172b0a8c9eSKenneth Klette Jonassen return -ERANGE; 4182b0a8c9eSKenneth Klette Jonassen if (!is_power_of_2(window)) 4192b0a8c9eSKenneth Klette Jonassen return -EINVAL; 4202b0a8c9eSKenneth Klette Jonassen 4212b0a8c9eSKenneth Klette Jonassen BUILD_BUG_ON(sizeof(struct cdg) > ICSK_CA_PRIV_SIZE); 4222b0a8c9eSKenneth Klette Jonassen tcp_register_congestion_control(&tcp_cdg); 4232b0a8c9eSKenneth Klette Jonassen return 0; 4242b0a8c9eSKenneth Klette Jonassen } 4252b0a8c9eSKenneth Klette Jonassen 4262b0a8c9eSKenneth Klette Jonassen static void __exit tcp_cdg_unregister(void) 4272b0a8c9eSKenneth Klette Jonassen { 4282b0a8c9eSKenneth Klette Jonassen tcp_unregister_congestion_control(&tcp_cdg); 4292b0a8c9eSKenneth Klette Jonassen } 4302b0a8c9eSKenneth Klette Jonassen 4312b0a8c9eSKenneth Klette Jonassen module_init(tcp_cdg_register); 4322b0a8c9eSKenneth Klette Jonassen module_exit(tcp_cdg_unregister); 4332b0a8c9eSKenneth Klette Jonassen MODULE_AUTHOR("Kenneth Klette Jonassen"); 4342b0a8c9eSKenneth Klette Jonassen MODULE_LICENSE("GPL"); 4352b0a8c9eSKenneth Klette Jonassen MODULE_DESCRIPTION("TCP CDG"); 436