12b0a8c9eSKenneth Klette Jonassen /* 22b0a8c9eSKenneth Klette Jonassen * CAIA Delay-Gradient (CDG) congestion control 32b0a8c9eSKenneth Klette Jonassen * 42b0a8c9eSKenneth Klette Jonassen * This implementation is based on the paper: 52b0a8c9eSKenneth Klette Jonassen * D.A. Hayes and G. Armitage. "Revisiting TCP congestion control using 62b0a8c9eSKenneth Klette Jonassen * delay gradients." In IFIP Networking, pages 328-341. Springer, 2011. 72b0a8c9eSKenneth Klette Jonassen * 82b0a8c9eSKenneth Klette Jonassen * Scavenger traffic (Less-than-Best-Effort) should disable coexistence 92b0a8c9eSKenneth Klette Jonassen * heuristics using parameters use_shadow=0 and use_ineff=0. 102b0a8c9eSKenneth Klette Jonassen * 112b0a8c9eSKenneth Klette Jonassen * Parameters window, backoff_beta, and backoff_factor are crucial for 122b0a8c9eSKenneth Klette Jonassen * throughput and delay. Future work is needed to determine better defaults, 132b0a8c9eSKenneth Klette Jonassen * and to provide guidelines for use in different environments/contexts. 142b0a8c9eSKenneth Klette Jonassen * 152b0a8c9eSKenneth Klette Jonassen * Except for window, knobs are configured via /sys/module/tcp_cdg/parameters/. 162b0a8c9eSKenneth Klette Jonassen * Parameter window is only configurable when loading tcp_cdg as a module. 172b0a8c9eSKenneth Klette Jonassen * 182b0a8c9eSKenneth Klette Jonassen * Notable differences from paper/FreeBSD: 192b0a8c9eSKenneth Klette Jonassen * o Using Hybrid Slow start and Proportional Rate Reduction. 202b0a8c9eSKenneth Klette Jonassen * o Add toggle for shadow window mechanism. Suggested by David Hayes. 212b0a8c9eSKenneth Klette Jonassen * o Add toggle for non-congestion loss tolerance. 222b0a8c9eSKenneth Klette Jonassen * o Scaling parameter G is changed to a backoff factor; 232b0a8c9eSKenneth Klette Jonassen * conversion is given by: backoff_factor = 1000/(G * window). 242b0a8c9eSKenneth Klette Jonassen * o Limit shadow window to 2 * cwnd, or to cwnd when application limited. 252b0a8c9eSKenneth Klette Jonassen * o More accurate e^-x. 262b0a8c9eSKenneth Klette Jonassen */ 272b0a8c9eSKenneth Klette Jonassen #include <linux/kernel.h> 282b0a8c9eSKenneth Klette Jonassen #include <linux/random.h> 292b0a8c9eSKenneth Klette Jonassen #include <linux/module.h> 302b0a8c9eSKenneth Klette Jonassen #include <net/tcp.h> 312b0a8c9eSKenneth Klette Jonassen 322b0a8c9eSKenneth Klette Jonassen #define HYSTART_ACK_TRAIN 1 332b0a8c9eSKenneth Klette Jonassen #define HYSTART_DELAY 2 342b0a8c9eSKenneth Klette Jonassen 352b0a8c9eSKenneth Klette Jonassen static int window __read_mostly = 8; 362b0a8c9eSKenneth Klette Jonassen static unsigned int backoff_beta __read_mostly = 0.7071 * 1024; /* sqrt 0.5 */ 372b0a8c9eSKenneth Klette Jonassen static unsigned int backoff_factor __read_mostly = 42; 382b0a8c9eSKenneth Klette Jonassen static unsigned int hystart_detect __read_mostly = 3; 392b0a8c9eSKenneth Klette Jonassen static unsigned int use_ineff __read_mostly = 5; 402b0a8c9eSKenneth Klette Jonassen static bool use_shadow __read_mostly = true; 412b0a8c9eSKenneth Klette Jonassen static bool use_tolerance __read_mostly; 422b0a8c9eSKenneth Klette Jonassen 432b0a8c9eSKenneth Klette Jonassen module_param(window, int, 0444); 442b0a8c9eSKenneth Klette Jonassen MODULE_PARM_DESC(window, "gradient window size (power of two <= 256)"); 452b0a8c9eSKenneth Klette Jonassen module_param(backoff_beta, uint, 0644); 462b0a8c9eSKenneth Klette Jonassen MODULE_PARM_DESC(backoff_beta, "backoff beta (0-1024)"); 472b0a8c9eSKenneth Klette Jonassen module_param(backoff_factor, uint, 0644); 482b0a8c9eSKenneth Klette Jonassen MODULE_PARM_DESC(backoff_factor, "backoff probability scale factor"); 492b0a8c9eSKenneth Klette Jonassen module_param(hystart_detect, uint, 0644); 502b0a8c9eSKenneth Klette Jonassen MODULE_PARM_DESC(hystart_detect, "use Hybrid Slow start " 512b0a8c9eSKenneth Klette Jonassen "(0: disabled, 1: ACK train, 2: delay threshold, 3: both)"); 522b0a8c9eSKenneth Klette Jonassen module_param(use_ineff, uint, 0644); 532b0a8c9eSKenneth Klette Jonassen MODULE_PARM_DESC(use_ineff, "use ineffectual backoff detection (threshold)"); 542b0a8c9eSKenneth Klette Jonassen module_param(use_shadow, bool, 0644); 552b0a8c9eSKenneth Klette Jonassen MODULE_PARM_DESC(use_shadow, "use shadow window heuristic"); 562b0a8c9eSKenneth Klette Jonassen module_param(use_tolerance, bool, 0644); 572b0a8c9eSKenneth Klette Jonassen MODULE_PARM_DESC(use_tolerance, "use loss tolerance heuristic"); 582b0a8c9eSKenneth Klette Jonassen 592b0a8c9eSKenneth Klette Jonassen struct minmax { 602b0a8c9eSKenneth Klette Jonassen union { 612b0a8c9eSKenneth Klette Jonassen struct { 622b0a8c9eSKenneth Klette Jonassen s32 min; 632b0a8c9eSKenneth Klette Jonassen s32 max; 642b0a8c9eSKenneth Klette Jonassen }; 652b0a8c9eSKenneth Klette Jonassen u64 v64; 662b0a8c9eSKenneth Klette Jonassen }; 672b0a8c9eSKenneth Klette Jonassen }; 682b0a8c9eSKenneth Klette Jonassen 692b0a8c9eSKenneth Klette Jonassen enum cdg_state { 702b0a8c9eSKenneth Klette Jonassen CDG_UNKNOWN = 0, 712b0a8c9eSKenneth Klette Jonassen CDG_NONFULL = 1, 722b0a8c9eSKenneth Klette Jonassen CDG_FULL = 2, 732b0a8c9eSKenneth Klette Jonassen CDG_BACKOFF = 3, 742b0a8c9eSKenneth Klette Jonassen }; 752b0a8c9eSKenneth Klette Jonassen 762b0a8c9eSKenneth Klette Jonassen struct cdg { 772b0a8c9eSKenneth Klette Jonassen struct minmax rtt; 782b0a8c9eSKenneth Klette Jonassen struct minmax rtt_prev; 792b0a8c9eSKenneth Klette Jonassen struct minmax *gradients; 802b0a8c9eSKenneth Klette Jonassen struct minmax gsum; 812b0a8c9eSKenneth Klette Jonassen bool gfilled; 822b0a8c9eSKenneth Klette Jonassen u8 tail; 832b0a8c9eSKenneth Klette Jonassen u8 state; 842b0a8c9eSKenneth Klette Jonassen u8 delack; 852b0a8c9eSKenneth Klette Jonassen u32 rtt_seq; 862b0a8c9eSKenneth Klette Jonassen u32 undo_cwnd; 872b0a8c9eSKenneth Klette Jonassen u32 shadow_wnd; 882b0a8c9eSKenneth Klette Jonassen u16 backoff_cnt; 892b0a8c9eSKenneth Klette Jonassen u16 sample_cnt; 902b0a8c9eSKenneth Klette Jonassen s32 delay_min; 912b0a8c9eSKenneth Klette Jonassen u32 last_ack; 922b0a8c9eSKenneth Klette Jonassen u32 round_start; 932b0a8c9eSKenneth Klette Jonassen }; 942b0a8c9eSKenneth Klette Jonassen 952b0a8c9eSKenneth Klette Jonassen /** 962b0a8c9eSKenneth Klette Jonassen * nexp_u32 - negative base-e exponential 972b0a8c9eSKenneth Klette Jonassen * @ux: x in units of micro 982b0a8c9eSKenneth Klette Jonassen * 992b0a8c9eSKenneth Klette Jonassen * Returns exp(ux * -1e-6) * U32_MAX. 1002b0a8c9eSKenneth Klette Jonassen */ 1012b0a8c9eSKenneth Klette Jonassen static u32 __pure nexp_u32(u32 ux) 1022b0a8c9eSKenneth Klette Jonassen { 1032b0a8c9eSKenneth Klette Jonassen static const u16 v[] = { 1042b0a8c9eSKenneth Klette Jonassen /* exp(-x)*65536-1 for x = 0, 0.000256, 0.000512, ... */ 1052b0a8c9eSKenneth Klette Jonassen 65535, 1062b0a8c9eSKenneth Klette Jonassen 65518, 65501, 65468, 65401, 65267, 65001, 64470, 63422, 1072b0a8c9eSKenneth Klette Jonassen 61378, 57484, 50423, 38795, 22965, 8047, 987, 14, 1082b0a8c9eSKenneth Klette Jonassen }; 1092b0a8c9eSKenneth Klette Jonassen u32 msb = ux >> 8; 1102b0a8c9eSKenneth Klette Jonassen u32 res; 1112b0a8c9eSKenneth Klette Jonassen int i; 1122b0a8c9eSKenneth Klette Jonassen 1132b0a8c9eSKenneth Klette Jonassen /* Cut off when ux >= 2^24 (actual result is <= 222/U32_MAX). */ 1142b0a8c9eSKenneth Klette Jonassen if (msb > U16_MAX) 1152b0a8c9eSKenneth Klette Jonassen return 0; 1162b0a8c9eSKenneth Klette Jonassen 1172b0a8c9eSKenneth Klette Jonassen /* Scale first eight bits linearly: */ 1182b0a8c9eSKenneth Klette Jonassen res = U32_MAX - (ux & 0xff) * (U32_MAX / 1000000); 1192b0a8c9eSKenneth Klette Jonassen 1202b0a8c9eSKenneth Klette Jonassen /* Obtain e^(x + y + ...) by computing e^x * e^y * ...: */ 1212b0a8c9eSKenneth Klette Jonassen for (i = 1; msb; i++, msb >>= 1) { 1222b0a8c9eSKenneth Klette Jonassen u32 y = v[i & -(msb & 1)] + U32_C(1); 1232b0a8c9eSKenneth Klette Jonassen 1242b0a8c9eSKenneth Klette Jonassen res = ((u64)res * y) >> 16; 1252b0a8c9eSKenneth Klette Jonassen } 1262b0a8c9eSKenneth Klette Jonassen 1272b0a8c9eSKenneth Klette Jonassen return res; 1282b0a8c9eSKenneth Klette Jonassen } 1292b0a8c9eSKenneth Klette Jonassen 1302b0a8c9eSKenneth Klette Jonassen /* Based on the HyStart algorithm (by Ha et al.) that is implemented in 1312b0a8c9eSKenneth Klette Jonassen * tcp_cubic. Differences/experimental changes: 1322b0a8c9eSKenneth Klette Jonassen * o Using Hayes' delayed ACK filter. 1332b0a8c9eSKenneth Klette Jonassen * o Using a usec clock for the ACK train. 1342b0a8c9eSKenneth Klette Jonassen * o Reset ACK train when application limited. 1352b0a8c9eSKenneth Klette Jonassen * o Invoked at any cwnd (i.e. also when cwnd < 16). 1362b0a8c9eSKenneth Klette Jonassen * o Invoked only when cwnd < ssthresh (i.e. not when cwnd == ssthresh). 1372b0a8c9eSKenneth Klette Jonassen */ 1382b0a8c9eSKenneth Klette Jonassen static void tcp_cdg_hystart_update(struct sock *sk) 1392b0a8c9eSKenneth Klette Jonassen { 1402b0a8c9eSKenneth Klette Jonassen struct cdg *ca = inet_csk_ca(sk); 1412b0a8c9eSKenneth Klette Jonassen struct tcp_sock *tp = tcp_sk(sk); 1422b0a8c9eSKenneth Klette Jonassen 1432b0a8c9eSKenneth Klette Jonassen ca->delay_min = min_not_zero(ca->delay_min, ca->rtt.min); 1442b0a8c9eSKenneth Klette Jonassen if (ca->delay_min == 0) 1452b0a8c9eSKenneth Klette Jonassen return; 1462b0a8c9eSKenneth Klette Jonassen 1472b0a8c9eSKenneth Klette Jonassen if (hystart_detect & HYSTART_ACK_TRAIN) { 148758f0d4bSKenneth Klette Jonassen u32 now_us = div_u64(local_clock(), NSEC_PER_USEC); 1492b0a8c9eSKenneth Klette Jonassen 1502b0a8c9eSKenneth Klette Jonassen if (ca->last_ack == 0 || !tcp_is_cwnd_limited(sk)) { 1512b0a8c9eSKenneth Klette Jonassen ca->last_ack = now_us; 1522b0a8c9eSKenneth Klette Jonassen ca->round_start = now_us; 1532b0a8c9eSKenneth Klette Jonassen } else if (before(now_us, ca->last_ack + 3000)) { 1542b0a8c9eSKenneth Klette Jonassen u32 base_owd = max(ca->delay_min / 2U, 125U); 1552b0a8c9eSKenneth Klette Jonassen 1562b0a8c9eSKenneth Klette Jonassen ca->last_ack = now_us; 1572b0a8c9eSKenneth Klette Jonassen if (after(now_us, ca->round_start + base_owd)) { 158c10d9310SEric Dumazet NET_INC_STATS(sock_net(sk), 1592b0a8c9eSKenneth Klette Jonassen LINUX_MIB_TCPHYSTARTTRAINDETECT); 160c10d9310SEric Dumazet NET_ADD_STATS(sock_net(sk), 1612b0a8c9eSKenneth Klette Jonassen LINUX_MIB_TCPHYSTARTTRAINCWND, 1622b0a8c9eSKenneth Klette Jonassen tp->snd_cwnd); 1632b0a8c9eSKenneth Klette Jonassen tp->snd_ssthresh = tp->snd_cwnd; 1642b0a8c9eSKenneth Klette Jonassen return; 1652b0a8c9eSKenneth Klette Jonassen } 1662b0a8c9eSKenneth Klette Jonassen } 1672b0a8c9eSKenneth Klette Jonassen } 1682b0a8c9eSKenneth Klette Jonassen 1692b0a8c9eSKenneth Klette Jonassen if (hystart_detect & HYSTART_DELAY) { 1702b0a8c9eSKenneth Klette Jonassen if (ca->sample_cnt < 8) { 1712b0a8c9eSKenneth Klette Jonassen ca->sample_cnt++; 1722b0a8c9eSKenneth Klette Jonassen } else { 1732b0a8c9eSKenneth Klette Jonassen s32 thresh = max(ca->delay_min + ca->delay_min / 8U, 1742b0a8c9eSKenneth Klette Jonassen 125U); 1752b0a8c9eSKenneth Klette Jonassen 1762b0a8c9eSKenneth Klette Jonassen if (ca->rtt.min > thresh) { 177c10d9310SEric Dumazet NET_INC_STATS(sock_net(sk), 1782b0a8c9eSKenneth Klette Jonassen LINUX_MIB_TCPHYSTARTDELAYDETECT); 179c10d9310SEric Dumazet NET_ADD_STATS(sock_net(sk), 1802b0a8c9eSKenneth Klette Jonassen LINUX_MIB_TCPHYSTARTDELAYCWND, 1812b0a8c9eSKenneth Klette Jonassen tp->snd_cwnd); 1822b0a8c9eSKenneth Klette Jonassen tp->snd_ssthresh = tp->snd_cwnd; 1832b0a8c9eSKenneth Klette Jonassen } 1842b0a8c9eSKenneth Klette Jonassen } 1852b0a8c9eSKenneth Klette Jonassen } 1862b0a8c9eSKenneth Klette Jonassen } 1872b0a8c9eSKenneth Klette Jonassen 1882b0a8c9eSKenneth Klette Jonassen static s32 tcp_cdg_grad(struct cdg *ca) 1892b0a8c9eSKenneth Klette Jonassen { 1902b0a8c9eSKenneth Klette Jonassen s32 gmin = ca->rtt.min - ca->rtt_prev.min; 1912b0a8c9eSKenneth Klette Jonassen s32 gmax = ca->rtt.max - ca->rtt_prev.max; 1922b0a8c9eSKenneth Klette Jonassen s32 grad; 1932b0a8c9eSKenneth Klette Jonassen 1942b0a8c9eSKenneth Klette Jonassen if (ca->gradients) { 1952b0a8c9eSKenneth Klette Jonassen ca->gsum.min += gmin - ca->gradients[ca->tail].min; 1962b0a8c9eSKenneth Klette Jonassen ca->gsum.max += gmax - ca->gradients[ca->tail].max; 1972b0a8c9eSKenneth Klette Jonassen ca->gradients[ca->tail].min = gmin; 1982b0a8c9eSKenneth Klette Jonassen ca->gradients[ca->tail].max = gmax; 1992b0a8c9eSKenneth Klette Jonassen ca->tail = (ca->tail + 1) & (window - 1); 2002b0a8c9eSKenneth Klette Jonassen gmin = ca->gsum.min; 2012b0a8c9eSKenneth Klette Jonassen gmax = ca->gsum.max; 2022b0a8c9eSKenneth Klette Jonassen } 2032b0a8c9eSKenneth Klette Jonassen 2042b0a8c9eSKenneth Klette Jonassen /* We keep sums to ignore gradients during cwnd reductions; 2052b0a8c9eSKenneth Klette Jonassen * the paper's smoothed gradients otherwise simplify to: 2062b0a8c9eSKenneth Klette Jonassen * (rtt_latest - rtt_oldest) / window. 2072b0a8c9eSKenneth Klette Jonassen * 2082b0a8c9eSKenneth Klette Jonassen * We also drop division by window here. 2092b0a8c9eSKenneth Klette Jonassen */ 2102b0a8c9eSKenneth Klette Jonassen grad = gmin > 0 ? gmin : gmax; 2112b0a8c9eSKenneth Klette Jonassen 2122b0a8c9eSKenneth Klette Jonassen /* Extrapolate missing values in gradient window: */ 2132b0a8c9eSKenneth Klette Jonassen if (!ca->gfilled) { 2142b0a8c9eSKenneth Klette Jonassen if (!ca->gradients && window > 1) 2152b0a8c9eSKenneth Klette Jonassen grad *= window; /* Memory allocation failed. */ 2162b0a8c9eSKenneth Klette Jonassen else if (ca->tail == 0) 2172b0a8c9eSKenneth Klette Jonassen ca->gfilled = true; 2182b0a8c9eSKenneth Klette Jonassen else 2192b0a8c9eSKenneth Klette Jonassen grad = (grad * window) / (int)ca->tail; 2202b0a8c9eSKenneth Klette Jonassen } 2212b0a8c9eSKenneth Klette Jonassen 2222b0a8c9eSKenneth Klette Jonassen /* Backoff was effectual: */ 2232b0a8c9eSKenneth Klette Jonassen if (gmin <= -32 || gmax <= -32) 2242b0a8c9eSKenneth Klette Jonassen ca->backoff_cnt = 0; 2252b0a8c9eSKenneth Klette Jonassen 2262b0a8c9eSKenneth Klette Jonassen if (use_tolerance) { 2272b0a8c9eSKenneth Klette Jonassen /* Reduce small variations to zero: */ 2282b0a8c9eSKenneth Klette Jonassen gmin = DIV_ROUND_CLOSEST(gmin, 64); 2292b0a8c9eSKenneth Klette Jonassen gmax = DIV_ROUND_CLOSEST(gmax, 64); 2302b0a8c9eSKenneth Klette Jonassen 2312b0a8c9eSKenneth Klette Jonassen if (gmin > 0 && gmax <= 0) 2322b0a8c9eSKenneth Klette Jonassen ca->state = CDG_FULL; 2332b0a8c9eSKenneth Klette Jonassen else if ((gmin > 0 && gmax > 0) || gmax < 0) 2342b0a8c9eSKenneth Klette Jonassen ca->state = CDG_NONFULL; 2352b0a8c9eSKenneth Klette Jonassen } 2362b0a8c9eSKenneth Klette Jonassen return grad; 2372b0a8c9eSKenneth Klette Jonassen } 2382b0a8c9eSKenneth Klette Jonassen 2392b0a8c9eSKenneth Klette Jonassen static bool tcp_cdg_backoff(struct sock *sk, u32 grad) 2402b0a8c9eSKenneth Klette Jonassen { 2412b0a8c9eSKenneth Klette Jonassen struct cdg *ca = inet_csk_ca(sk); 2422b0a8c9eSKenneth Klette Jonassen struct tcp_sock *tp = tcp_sk(sk); 2432b0a8c9eSKenneth Klette Jonassen 2442b0a8c9eSKenneth Klette Jonassen if (prandom_u32() <= nexp_u32(grad * backoff_factor)) 2452b0a8c9eSKenneth Klette Jonassen return false; 2462b0a8c9eSKenneth Klette Jonassen 2472b0a8c9eSKenneth Klette Jonassen if (use_ineff) { 2482b0a8c9eSKenneth Klette Jonassen ca->backoff_cnt++; 2492b0a8c9eSKenneth Klette Jonassen if (ca->backoff_cnt > use_ineff) 2502b0a8c9eSKenneth Klette Jonassen return false; 2512b0a8c9eSKenneth Klette Jonassen } 2522b0a8c9eSKenneth Klette Jonassen 2532b0a8c9eSKenneth Klette Jonassen ca->shadow_wnd = max(ca->shadow_wnd, tp->snd_cwnd); 2542b0a8c9eSKenneth Klette Jonassen ca->state = CDG_BACKOFF; 2552b0a8c9eSKenneth Klette Jonassen tcp_enter_cwr(sk); 2562b0a8c9eSKenneth Klette Jonassen return true; 2572b0a8c9eSKenneth Klette Jonassen } 2582b0a8c9eSKenneth Klette Jonassen 2592b0a8c9eSKenneth Klette Jonassen /* Not called in CWR or Recovery state. */ 2602b0a8c9eSKenneth Klette Jonassen static void tcp_cdg_cong_avoid(struct sock *sk, u32 ack, u32 acked) 2612b0a8c9eSKenneth Klette Jonassen { 2622b0a8c9eSKenneth Klette Jonassen struct cdg *ca = inet_csk_ca(sk); 2632b0a8c9eSKenneth Klette Jonassen struct tcp_sock *tp = tcp_sk(sk); 2642b0a8c9eSKenneth Klette Jonassen u32 prior_snd_cwnd; 2652b0a8c9eSKenneth Klette Jonassen u32 incr; 2662b0a8c9eSKenneth Klette Jonassen 26776174004SYuchung Cheng if (tcp_in_slow_start(tp) && hystart_detect) 2682b0a8c9eSKenneth Klette Jonassen tcp_cdg_hystart_update(sk); 2692b0a8c9eSKenneth Klette Jonassen 2702b0a8c9eSKenneth Klette Jonassen if (after(ack, ca->rtt_seq) && ca->rtt.v64) { 2712b0a8c9eSKenneth Klette Jonassen s32 grad = 0; 2722b0a8c9eSKenneth Klette Jonassen 2732b0a8c9eSKenneth Klette Jonassen if (ca->rtt_prev.v64) 2742b0a8c9eSKenneth Klette Jonassen grad = tcp_cdg_grad(ca); 2752b0a8c9eSKenneth Klette Jonassen ca->rtt_seq = tp->snd_nxt; 2762b0a8c9eSKenneth Klette Jonassen ca->rtt_prev = ca->rtt; 2772b0a8c9eSKenneth Klette Jonassen ca->rtt.v64 = 0; 2782b0a8c9eSKenneth Klette Jonassen ca->last_ack = 0; 2792b0a8c9eSKenneth Klette Jonassen ca->sample_cnt = 0; 2802b0a8c9eSKenneth Klette Jonassen 2812b0a8c9eSKenneth Klette Jonassen if (grad > 0 && tcp_cdg_backoff(sk, grad)) 2822b0a8c9eSKenneth Klette Jonassen return; 2832b0a8c9eSKenneth Klette Jonassen } 2842b0a8c9eSKenneth Klette Jonassen 2852b0a8c9eSKenneth Klette Jonassen if (!tcp_is_cwnd_limited(sk)) { 2862b0a8c9eSKenneth Klette Jonassen ca->shadow_wnd = min(ca->shadow_wnd, tp->snd_cwnd); 2872b0a8c9eSKenneth Klette Jonassen return; 2882b0a8c9eSKenneth Klette Jonassen } 2892b0a8c9eSKenneth Klette Jonassen 2902b0a8c9eSKenneth Klette Jonassen prior_snd_cwnd = tp->snd_cwnd; 2912b0a8c9eSKenneth Klette Jonassen tcp_reno_cong_avoid(sk, ack, acked); 2922b0a8c9eSKenneth Klette Jonassen 2932b0a8c9eSKenneth Klette Jonassen incr = tp->snd_cwnd - prior_snd_cwnd; 2942b0a8c9eSKenneth Klette Jonassen ca->shadow_wnd = max(ca->shadow_wnd, ca->shadow_wnd + incr); 2952b0a8c9eSKenneth Klette Jonassen } 2962b0a8c9eSKenneth Klette Jonassen 297*756ee172SLawrence Brakmo static void tcp_cdg_acked(struct sock *sk, const struct ack_sample *sample) 2982b0a8c9eSKenneth Klette Jonassen { 2992b0a8c9eSKenneth Klette Jonassen struct cdg *ca = inet_csk_ca(sk); 3002b0a8c9eSKenneth Klette Jonassen struct tcp_sock *tp = tcp_sk(sk); 3012b0a8c9eSKenneth Klette Jonassen 302*756ee172SLawrence Brakmo if (sample->rtt_us <= 0) 3032b0a8c9eSKenneth Klette Jonassen return; 3042b0a8c9eSKenneth Klette Jonassen 3052b0a8c9eSKenneth Klette Jonassen /* A heuristic for filtering delayed ACKs, adapted from: 3062b0a8c9eSKenneth Klette Jonassen * D.A. Hayes. "Timing enhancements to the FreeBSD kernel to support 3072b0a8c9eSKenneth Klette Jonassen * delay and rate based TCP mechanisms." TR 100219A. CAIA, 2010. 3082b0a8c9eSKenneth Klette Jonassen */ 3092b0a8c9eSKenneth Klette Jonassen if (tp->sacked_out == 0) { 310*756ee172SLawrence Brakmo if (sample->pkts_acked == 1 && ca->delack) { 3112b0a8c9eSKenneth Klette Jonassen /* A delayed ACK is only used for the minimum if it is 3122b0a8c9eSKenneth Klette Jonassen * provenly lower than an existing non-zero minimum. 3132b0a8c9eSKenneth Klette Jonassen */ 314*756ee172SLawrence Brakmo ca->rtt.min = min(ca->rtt.min, sample->rtt_us); 3152b0a8c9eSKenneth Klette Jonassen ca->delack--; 3162b0a8c9eSKenneth Klette Jonassen return; 317*756ee172SLawrence Brakmo } else if (sample->pkts_acked > 1 && ca->delack < 5) { 3182b0a8c9eSKenneth Klette Jonassen ca->delack++; 3192b0a8c9eSKenneth Klette Jonassen } 3202b0a8c9eSKenneth Klette Jonassen } 3212b0a8c9eSKenneth Klette Jonassen 322*756ee172SLawrence Brakmo ca->rtt.min = min_not_zero(ca->rtt.min, sample->rtt_us); 323*756ee172SLawrence Brakmo ca->rtt.max = max(ca->rtt.max, sample->rtt_us); 3242b0a8c9eSKenneth Klette Jonassen } 3252b0a8c9eSKenneth Klette Jonassen 3262b0a8c9eSKenneth Klette Jonassen static u32 tcp_cdg_ssthresh(struct sock *sk) 3272b0a8c9eSKenneth Klette Jonassen { 3282b0a8c9eSKenneth Klette Jonassen struct cdg *ca = inet_csk_ca(sk); 3292b0a8c9eSKenneth Klette Jonassen struct tcp_sock *tp = tcp_sk(sk); 3302b0a8c9eSKenneth Klette Jonassen 3312b0a8c9eSKenneth Klette Jonassen ca->undo_cwnd = tp->snd_cwnd; 3322b0a8c9eSKenneth Klette Jonassen 3332b0a8c9eSKenneth Klette Jonassen if (ca->state == CDG_BACKOFF) 3342b0a8c9eSKenneth Klette Jonassen return max(2U, (tp->snd_cwnd * min(1024U, backoff_beta)) >> 10); 3352b0a8c9eSKenneth Klette Jonassen 3362b0a8c9eSKenneth Klette Jonassen if (ca->state == CDG_NONFULL && use_tolerance) 3372b0a8c9eSKenneth Klette Jonassen return tp->snd_cwnd; 3382b0a8c9eSKenneth Klette Jonassen 3392b0a8c9eSKenneth Klette Jonassen ca->shadow_wnd = min(ca->shadow_wnd >> 1, tp->snd_cwnd); 3402b0a8c9eSKenneth Klette Jonassen if (use_shadow) 3412b0a8c9eSKenneth Klette Jonassen return max3(2U, ca->shadow_wnd, tp->snd_cwnd >> 1); 3422b0a8c9eSKenneth Klette Jonassen return max(2U, tp->snd_cwnd >> 1); 3432b0a8c9eSKenneth Klette Jonassen } 3442b0a8c9eSKenneth Klette Jonassen 3452b0a8c9eSKenneth Klette Jonassen static u32 tcp_cdg_undo_cwnd(struct sock *sk) 3462b0a8c9eSKenneth Klette Jonassen { 3472b0a8c9eSKenneth Klette Jonassen struct cdg *ca = inet_csk_ca(sk); 3482b0a8c9eSKenneth Klette Jonassen 3492b0a8c9eSKenneth Klette Jonassen return max(tcp_sk(sk)->snd_cwnd, ca->undo_cwnd); 3502b0a8c9eSKenneth Klette Jonassen } 3512b0a8c9eSKenneth Klette Jonassen 3522b0a8c9eSKenneth Klette Jonassen static void tcp_cdg_cwnd_event(struct sock *sk, const enum tcp_ca_event ev) 3532b0a8c9eSKenneth Klette Jonassen { 3542b0a8c9eSKenneth Klette Jonassen struct cdg *ca = inet_csk_ca(sk); 3552b0a8c9eSKenneth Klette Jonassen struct tcp_sock *tp = tcp_sk(sk); 3562b0a8c9eSKenneth Klette Jonassen struct minmax *gradients; 3572b0a8c9eSKenneth Klette Jonassen 3582b0a8c9eSKenneth Klette Jonassen switch (ev) { 3592b0a8c9eSKenneth Klette Jonassen case CA_EVENT_CWND_RESTART: 3602b0a8c9eSKenneth Klette Jonassen gradients = ca->gradients; 3612b0a8c9eSKenneth Klette Jonassen if (gradients) 3622b0a8c9eSKenneth Klette Jonassen memset(gradients, 0, window * sizeof(gradients[0])); 3632b0a8c9eSKenneth Klette Jonassen memset(ca, 0, sizeof(*ca)); 3642b0a8c9eSKenneth Klette Jonassen 3652b0a8c9eSKenneth Klette Jonassen ca->gradients = gradients; 3662b0a8c9eSKenneth Klette Jonassen ca->rtt_seq = tp->snd_nxt; 3672b0a8c9eSKenneth Klette Jonassen ca->shadow_wnd = tp->snd_cwnd; 3682b0a8c9eSKenneth Klette Jonassen break; 3692b0a8c9eSKenneth Klette Jonassen case CA_EVENT_COMPLETE_CWR: 3702b0a8c9eSKenneth Klette Jonassen ca->state = CDG_UNKNOWN; 3712b0a8c9eSKenneth Klette Jonassen ca->rtt_seq = tp->snd_nxt; 3722b0a8c9eSKenneth Klette Jonassen ca->rtt_prev = ca->rtt; 3732b0a8c9eSKenneth Klette Jonassen ca->rtt.v64 = 0; 3742b0a8c9eSKenneth Klette Jonassen break; 3752b0a8c9eSKenneth Klette Jonassen default: 3762b0a8c9eSKenneth Klette Jonassen break; 3772b0a8c9eSKenneth Klette Jonassen } 3782b0a8c9eSKenneth Klette Jonassen } 3792b0a8c9eSKenneth Klette Jonassen 3802b0a8c9eSKenneth Klette Jonassen static void tcp_cdg_init(struct sock *sk) 3812b0a8c9eSKenneth Klette Jonassen { 3822b0a8c9eSKenneth Klette Jonassen struct cdg *ca = inet_csk_ca(sk); 3832b0a8c9eSKenneth Klette Jonassen struct tcp_sock *tp = tcp_sk(sk); 3842b0a8c9eSKenneth Klette Jonassen 3852b0a8c9eSKenneth Klette Jonassen /* We silently fall back to window = 1 if allocation fails. */ 3862b0a8c9eSKenneth Klette Jonassen if (window > 1) 3872b0a8c9eSKenneth Klette Jonassen ca->gradients = kcalloc(window, sizeof(ca->gradients[0]), 3882b0a8c9eSKenneth Klette Jonassen GFP_NOWAIT | __GFP_NOWARN); 3892b0a8c9eSKenneth Klette Jonassen ca->rtt_seq = tp->snd_nxt; 3902b0a8c9eSKenneth Klette Jonassen ca->shadow_wnd = tp->snd_cwnd; 3912b0a8c9eSKenneth Klette Jonassen } 3922b0a8c9eSKenneth Klette Jonassen 3932b0a8c9eSKenneth Klette Jonassen static void tcp_cdg_release(struct sock *sk) 3942b0a8c9eSKenneth Klette Jonassen { 3952b0a8c9eSKenneth Klette Jonassen struct cdg *ca = inet_csk_ca(sk); 3962b0a8c9eSKenneth Klette Jonassen 3972b0a8c9eSKenneth Klette Jonassen kfree(ca->gradients); 3982b0a8c9eSKenneth Klette Jonassen } 3992b0a8c9eSKenneth Klette Jonassen 4002b0a8c9eSKenneth Klette Jonassen struct tcp_congestion_ops tcp_cdg __read_mostly = { 4012b0a8c9eSKenneth Klette Jonassen .cong_avoid = tcp_cdg_cong_avoid, 4022b0a8c9eSKenneth Klette Jonassen .cwnd_event = tcp_cdg_cwnd_event, 4032b0a8c9eSKenneth Klette Jonassen .pkts_acked = tcp_cdg_acked, 4042b0a8c9eSKenneth Klette Jonassen .undo_cwnd = tcp_cdg_undo_cwnd, 4052b0a8c9eSKenneth Klette Jonassen .ssthresh = tcp_cdg_ssthresh, 4062b0a8c9eSKenneth Klette Jonassen .release = tcp_cdg_release, 4072b0a8c9eSKenneth Klette Jonassen .init = tcp_cdg_init, 4082b0a8c9eSKenneth Klette Jonassen .owner = THIS_MODULE, 4092b0a8c9eSKenneth Klette Jonassen .name = "cdg", 4102b0a8c9eSKenneth Klette Jonassen }; 4112b0a8c9eSKenneth Klette Jonassen 4122b0a8c9eSKenneth Klette Jonassen static int __init tcp_cdg_register(void) 4132b0a8c9eSKenneth Klette Jonassen { 4142b0a8c9eSKenneth Klette Jonassen if (backoff_beta > 1024 || window < 1 || window > 256) 4152b0a8c9eSKenneth Klette Jonassen return -ERANGE; 4162b0a8c9eSKenneth Klette Jonassen if (!is_power_of_2(window)) 4172b0a8c9eSKenneth Klette Jonassen return -EINVAL; 4182b0a8c9eSKenneth Klette Jonassen 4192b0a8c9eSKenneth Klette Jonassen BUILD_BUG_ON(sizeof(struct cdg) > ICSK_CA_PRIV_SIZE); 4202b0a8c9eSKenneth Klette Jonassen tcp_register_congestion_control(&tcp_cdg); 4212b0a8c9eSKenneth Klette Jonassen return 0; 4222b0a8c9eSKenneth Klette Jonassen } 4232b0a8c9eSKenneth Klette Jonassen 4242b0a8c9eSKenneth Klette Jonassen static void __exit tcp_cdg_unregister(void) 4252b0a8c9eSKenneth Klette Jonassen { 4262b0a8c9eSKenneth Klette Jonassen tcp_unregister_congestion_control(&tcp_cdg); 4272b0a8c9eSKenneth Klette Jonassen } 4282b0a8c9eSKenneth Klette Jonassen 4292b0a8c9eSKenneth Klette Jonassen module_init(tcp_cdg_register); 4302b0a8c9eSKenneth Klette Jonassen module_exit(tcp_cdg_unregister); 4312b0a8c9eSKenneth Klette Jonassen MODULE_AUTHOR("Kenneth Klette Jonassen"); 4322b0a8c9eSKenneth Klette Jonassen MODULE_LICENSE("GPL"); 4332b0a8c9eSKenneth Klette Jonassen MODULE_DESCRIPTION("TCP CDG"); 434