1 /* 2 * TCP Illinois congestion control. 3 * Home page: 4 * http://www.ews.uiuc.edu/~shaoliu/tcpillinois/index.html 5 * 6 * The algorithm is described in: 7 * "TCP-Illinois: A Loss and Delay-Based Congestion Control Algorithm 8 * for High-Speed Networks" 9 * http://www.ews.uiuc.edu/~shaoliu/papersandslides/liubassri06perf.pdf 10 * 11 * Implemented from description in paper and ns-2 simulation. 12 * Copyright (C) 2007 Stephen Hemminger <shemminger@linux-foundation.org> 13 */ 14 15 #include <linux/module.h> 16 #include <linux/skbuff.h> 17 #include <linux/inet_diag.h> 18 #include <asm/div64.h> 19 #include <net/tcp.h> 20 21 #define ALPHA_SHIFT 7 22 #define ALPHA_SCALE (1u<<ALPHA_SHIFT) 23 #define ALPHA_MIN ((3*ALPHA_SCALE)/10) /* ~0.3 */ 24 #define ALPHA_MAX (10*ALPHA_SCALE) /* 10.0 */ 25 #define ALPHA_BASE ALPHA_SCALE /* 1.0 */ 26 #define U32_MAX ((u32)~0U) 27 #define RTT_MAX (U32_MAX / ALPHA_MAX) /* 3.3 secs */ 28 29 #define BETA_SHIFT 6 30 #define BETA_SCALE (1u<<BETA_SHIFT) 31 #define BETA_MIN (BETA_SCALE/8) /* 0.125 */ 32 #define BETA_MAX (BETA_SCALE/2) /* 0.5 */ 33 #define BETA_BASE BETA_MAX 34 35 static int win_thresh __read_mostly = 15; 36 module_param(win_thresh, int, 0); 37 MODULE_PARM_DESC(win_thresh, "Window threshold for starting adaptive sizing"); 38 39 static int theta __read_mostly = 5; 40 module_param(theta, int, 0); 41 MODULE_PARM_DESC(theta, "# of fast RTT's before full growth"); 42 43 /* TCP Illinois Parameters */ 44 struct illinois { 45 u64 sum_rtt; /* sum of rtt's measured within last rtt */ 46 u16 cnt_rtt; /* # of rtts measured within last rtt */ 47 u32 base_rtt; /* min of all rtt in usec */ 48 u32 max_rtt; /* max of all rtt in usec */ 49 u32 end_seq; /* right edge of current RTT */ 50 u32 alpha; /* Additive increase */ 51 u32 beta; /* Muliplicative decrease */ 52 u16 acked; /* # packets acked by current ACK */ 53 u8 rtt_above; /* average rtt has gone above threshold */ 54 u8 rtt_low; /* # of rtts measurements below threshold */ 55 }; 56 57 static void rtt_reset(struct sock *sk) 58 { 59 struct tcp_sock *tp = tcp_sk(sk); 60 struct illinois *ca = inet_csk_ca(sk); 61 62 ca->end_seq = tp->snd_nxt; 63 ca->cnt_rtt = 0; 64 ca->sum_rtt = 0; 65 66 /* TODO: age max_rtt? */ 67 } 68 69 static void tcp_illinois_init(struct sock *sk) 70 { 71 struct illinois *ca = inet_csk_ca(sk); 72 73 ca->alpha = ALPHA_MAX; 74 ca->beta = BETA_BASE; 75 ca->base_rtt = 0x7fffffff; 76 ca->max_rtt = 0; 77 78 ca->acked = 0; 79 ca->rtt_low = 0; 80 ca->rtt_above = 0; 81 82 rtt_reset(sk); 83 } 84 85 /* Measure RTT for each ack. */ 86 static void tcp_illinois_acked(struct sock *sk, u32 pkts_acked, s32 rtt) 87 { 88 struct illinois *ca = inet_csk_ca(sk); 89 90 ca->acked = pkts_acked; 91 92 /* dup ack, no rtt sample */ 93 if (rtt < 0) 94 return; 95 96 /* ignore bogus values, this prevents wraparound in alpha math */ 97 if (rtt > RTT_MAX) 98 rtt = RTT_MAX; 99 100 /* keep track of minimum RTT seen so far */ 101 if (ca->base_rtt > rtt) 102 ca->base_rtt = rtt; 103 104 /* and max */ 105 if (ca->max_rtt < rtt) 106 ca->max_rtt = rtt; 107 108 ++ca->cnt_rtt; 109 ca->sum_rtt += rtt; 110 } 111 112 /* Maximum queuing delay */ 113 static inline u32 max_delay(const struct illinois *ca) 114 { 115 return ca->max_rtt - ca->base_rtt; 116 } 117 118 /* Average queuing delay */ 119 static inline u32 avg_delay(const struct illinois *ca) 120 { 121 u64 t = ca->sum_rtt; 122 123 do_div(t, ca->cnt_rtt); 124 return t - ca->base_rtt; 125 } 126 127 /* 128 * Compute value of alpha used for additive increase. 129 * If small window then use 1.0, equivalent to Reno. 130 * 131 * For larger windows, adjust based on average delay. 132 * A. If average delay is at minimum (we are uncongested), 133 * then use large alpha (10.0) to increase faster. 134 * B. If average delay is at maximum (getting congested) 135 * then use small alpha (0.3) 136 * 137 * The result is a convex window growth curve. 138 */ 139 static u32 alpha(struct illinois *ca, u32 da, u32 dm) 140 { 141 u32 d1 = dm / 100; /* Low threshold */ 142 143 if (da <= d1) { 144 /* If never got out of low delay zone, then use max */ 145 if (!ca->rtt_above) 146 return ALPHA_MAX; 147 148 /* Wait for 5 good RTT's before allowing alpha to go alpha max. 149 * This prevents one good RTT from causing sudden window increase. 150 */ 151 if (++ca->rtt_low < theta) 152 return ca->alpha; 153 154 ca->rtt_low = 0; 155 ca->rtt_above = 0; 156 return ALPHA_MAX; 157 } 158 159 ca->rtt_above = 1; 160 161 /* 162 * Based on: 163 * 164 * (dm - d1) amin amax 165 * k1 = ------------------- 166 * amax - amin 167 * 168 * (dm - d1) amin 169 * k2 = ---------------- - d1 170 * amax - amin 171 * 172 * k1 173 * alpha = ---------- 174 * k2 + da 175 */ 176 177 dm -= d1; 178 da -= d1; 179 return (dm * ALPHA_MAX) / 180 (dm + (da * (ALPHA_MAX - ALPHA_MIN)) / ALPHA_MIN); 181 } 182 183 /* 184 * Beta used for multiplicative decrease. 185 * For small window sizes returns same value as Reno (0.5) 186 * 187 * If delay is small (10% of max) then beta = 1/8 188 * If delay is up to 80% of max then beta = 1/2 189 * In between is a linear function 190 */ 191 static u32 beta(u32 da, u32 dm) 192 { 193 u32 d2, d3; 194 195 d2 = dm / 10; 196 if (da <= d2) 197 return BETA_MIN; 198 199 d3 = (8 * dm) / 10; 200 if (da >= d3 || d3 <= d2) 201 return BETA_MAX; 202 203 /* 204 * Based on: 205 * 206 * bmin d3 - bmax d2 207 * k3 = ------------------- 208 * d3 - d2 209 * 210 * bmax - bmin 211 * k4 = ------------- 212 * d3 - d2 213 * 214 * b = k3 + k4 da 215 */ 216 return (BETA_MIN * d3 - BETA_MAX * d2 + (BETA_MAX - BETA_MIN) * da) 217 / (d3 - d2); 218 } 219 220 /* Update alpha and beta values once per RTT */ 221 static void update_params(struct sock *sk) 222 { 223 struct tcp_sock *tp = tcp_sk(sk); 224 struct illinois *ca = inet_csk_ca(sk); 225 226 if (tp->snd_cwnd < win_thresh) { 227 ca->alpha = ALPHA_BASE; 228 ca->beta = BETA_BASE; 229 } else if (ca->cnt_rtt > 0) { 230 u32 dm = max_delay(ca); 231 u32 da = avg_delay(ca); 232 233 ca->alpha = alpha(ca, da, dm); 234 ca->beta = beta(da, dm); 235 } 236 237 rtt_reset(sk); 238 } 239 240 /* 241 * In case of loss, reset to default values 242 */ 243 static void tcp_illinois_state(struct sock *sk, u8 new_state) 244 { 245 struct illinois *ca = inet_csk_ca(sk); 246 247 if (new_state == TCP_CA_Loss) { 248 ca->alpha = ALPHA_BASE; 249 ca->beta = BETA_BASE; 250 ca->rtt_low = 0; 251 ca->rtt_above = 0; 252 rtt_reset(sk); 253 } 254 } 255 256 /* 257 * Increase window in response to successful acknowledgment. 258 */ 259 static void tcp_illinois_cong_avoid(struct sock *sk, u32 ack, u32 in_flight) 260 { 261 struct tcp_sock *tp = tcp_sk(sk); 262 struct illinois *ca = inet_csk_ca(sk); 263 264 if (after(ack, ca->end_seq)) 265 update_params(sk); 266 267 /* RFC2861 only increase cwnd if fully utilized */ 268 if (!tcp_is_cwnd_limited(sk, in_flight)) 269 return; 270 271 /* In slow start */ 272 if (tp->snd_cwnd <= tp->snd_ssthresh) 273 tcp_slow_start(tp); 274 275 else { 276 u32 delta; 277 278 /* snd_cwnd_cnt is # of packets since last cwnd increment */ 279 tp->snd_cwnd_cnt += ca->acked; 280 ca->acked = 1; 281 282 /* This is close approximation of: 283 * tp->snd_cwnd += alpha/tp->snd_cwnd 284 */ 285 delta = (tp->snd_cwnd_cnt * ca->alpha) >> ALPHA_SHIFT; 286 if (delta >= tp->snd_cwnd) { 287 tp->snd_cwnd = min(tp->snd_cwnd + delta / tp->snd_cwnd, 288 (u32) tp->snd_cwnd_clamp); 289 tp->snd_cwnd_cnt = 0; 290 } 291 } 292 } 293 294 static u32 tcp_illinois_ssthresh(struct sock *sk) 295 { 296 struct tcp_sock *tp = tcp_sk(sk); 297 struct illinois *ca = inet_csk_ca(sk); 298 299 /* Multiplicative decrease */ 300 return max(tp->snd_cwnd - ((tp->snd_cwnd * ca->beta) >> BETA_SHIFT), 2U); 301 } 302 303 304 /* Extract info for Tcp socket info provided via netlink. */ 305 static void tcp_illinois_info(struct sock *sk, u32 ext, 306 struct sk_buff *skb) 307 { 308 const struct illinois *ca = inet_csk_ca(sk); 309 310 if (ext & (1 << (INET_DIAG_VEGASINFO - 1))) { 311 struct tcpvegas_info info = { 312 .tcpv_enabled = 1, 313 .tcpv_rttcnt = ca->cnt_rtt, 314 .tcpv_minrtt = ca->base_rtt, 315 }; 316 u64 t = ca->sum_rtt; 317 318 do_div(t, ca->cnt_rtt); 319 info.tcpv_rtt = t; 320 321 nla_put(skb, INET_DIAG_VEGASINFO, sizeof(info), &info); 322 } 323 } 324 325 static struct tcp_congestion_ops tcp_illinois = { 326 .flags = TCP_CONG_RTT_STAMP, 327 .init = tcp_illinois_init, 328 .ssthresh = tcp_illinois_ssthresh, 329 .min_cwnd = tcp_reno_min_cwnd, 330 .cong_avoid = tcp_illinois_cong_avoid, 331 .set_state = tcp_illinois_state, 332 .get_info = tcp_illinois_info, 333 .pkts_acked = tcp_illinois_acked, 334 335 .owner = THIS_MODULE, 336 .name = "illinois", 337 }; 338 339 static int __init tcp_illinois_register(void) 340 { 341 BUILD_BUG_ON(sizeof(struct illinois) > ICSK_CA_PRIV_SIZE); 342 return tcp_register_congestion_control(&tcp_illinois); 343 } 344 345 static void __exit tcp_illinois_unregister(void) 346 { 347 tcp_unregister_congestion_control(&tcp_illinois); 348 } 349 350 module_init(tcp_illinois_register); 351 module_exit(tcp_illinois_unregister); 352 353 MODULE_AUTHOR("Stephen Hemminger, Shao Liu"); 354 MODULE_LICENSE("GPL"); 355 MODULE_DESCRIPTION("TCP Illinois"); 356 MODULE_VERSION("1.0"); 357