1 /* 2 * Plugable TCP congestion control support and newReno 3 * congestion control. 4 * Based on ideas from I/O scheduler suport and Web100. 5 * 6 * Copyright (C) 2005 Stephen Hemminger <shemminger@osdl.org> 7 */ 8 9 #include <linux/config.h> 10 #include <linux/module.h> 11 #include <linux/mm.h> 12 #include <linux/types.h> 13 #include <linux/list.h> 14 #include <net/tcp.h> 15 16 static DEFINE_SPINLOCK(tcp_cong_list_lock); 17 static LIST_HEAD(tcp_cong_list); 18 19 /* Simple linear search, don't expect many entries! */ 20 static struct tcp_congestion_ops *tcp_ca_find(const char *name) 21 { 22 struct tcp_congestion_ops *e; 23 24 list_for_each_entry_rcu(e, &tcp_cong_list, list) { 25 if (strcmp(e->name, name) == 0) 26 return e; 27 } 28 29 return NULL; 30 } 31 32 /* 33 * Attach new congestion control algorthim to the list 34 * of available options. 35 */ 36 int tcp_register_congestion_control(struct tcp_congestion_ops *ca) 37 { 38 int ret = 0; 39 40 /* all algorithms must implement ssthresh and cong_avoid ops */ 41 if (!ca->ssthresh || !ca->cong_avoid) { 42 printk(KERN_ERR "TCP %s does not implement required ops\n", 43 ca->name); 44 return -EINVAL; 45 } 46 47 spin_lock(&tcp_cong_list_lock); 48 if (tcp_ca_find(ca->name)) { 49 printk(KERN_NOTICE "TCP %s already registered\n", ca->name); 50 ret = -EEXIST; 51 } else { 52 list_add_rcu(&ca->list, &tcp_cong_list); 53 printk(KERN_INFO "TCP %s registered\n", ca->name); 54 } 55 spin_unlock(&tcp_cong_list_lock); 56 57 return ret; 58 } 59 EXPORT_SYMBOL_GPL(tcp_register_congestion_control); 60 61 /* 62 * Remove congestion control algorithm, called from 63 * the module's remove function. Module ref counts are used 64 * to ensure that this can't be done till all sockets using 65 * that method are closed. 66 */ 67 void tcp_unregister_congestion_control(struct tcp_congestion_ops *ca) 68 { 69 spin_lock(&tcp_cong_list_lock); 70 list_del_rcu(&ca->list); 71 spin_unlock(&tcp_cong_list_lock); 72 } 73 EXPORT_SYMBOL_GPL(tcp_unregister_congestion_control); 74 75 /* Assign choice of congestion control. */ 76 void tcp_init_congestion_control(struct sock *sk) 77 { 78 struct inet_connection_sock *icsk = inet_csk(sk); 79 struct tcp_congestion_ops *ca; 80 81 if (icsk->icsk_ca_ops != &tcp_init_congestion_ops) 82 return; 83 84 rcu_read_lock(); 85 list_for_each_entry_rcu(ca, &tcp_cong_list, list) { 86 if (try_module_get(ca->owner)) { 87 icsk->icsk_ca_ops = ca; 88 break; 89 } 90 91 } 92 rcu_read_unlock(); 93 94 if (icsk->icsk_ca_ops->init) 95 icsk->icsk_ca_ops->init(sk); 96 } 97 98 /* Manage refcounts on socket close. */ 99 void tcp_cleanup_congestion_control(struct sock *sk) 100 { 101 struct inet_connection_sock *icsk = inet_csk(sk); 102 103 if (icsk->icsk_ca_ops->release) 104 icsk->icsk_ca_ops->release(sk); 105 module_put(icsk->icsk_ca_ops->owner); 106 } 107 108 /* Used by sysctl to change default congestion control */ 109 int tcp_set_default_congestion_control(const char *name) 110 { 111 struct tcp_congestion_ops *ca; 112 int ret = -ENOENT; 113 114 spin_lock(&tcp_cong_list_lock); 115 ca = tcp_ca_find(name); 116 #ifdef CONFIG_KMOD 117 if (!ca) { 118 spin_unlock(&tcp_cong_list_lock); 119 120 request_module("tcp_%s", name); 121 spin_lock(&tcp_cong_list_lock); 122 ca = tcp_ca_find(name); 123 } 124 #endif 125 126 if (ca) { 127 list_move(&ca->list, &tcp_cong_list); 128 ret = 0; 129 } 130 spin_unlock(&tcp_cong_list_lock); 131 132 return ret; 133 } 134 135 /* Get current default congestion control */ 136 void tcp_get_default_congestion_control(char *name) 137 { 138 struct tcp_congestion_ops *ca; 139 /* We will always have reno... */ 140 BUG_ON(list_empty(&tcp_cong_list)); 141 142 rcu_read_lock(); 143 ca = list_entry(tcp_cong_list.next, struct tcp_congestion_ops, list); 144 strncpy(name, ca->name, TCP_CA_NAME_MAX); 145 rcu_read_unlock(); 146 } 147 148 /* Change congestion control for socket */ 149 int tcp_set_congestion_control(struct sock *sk, const char *name) 150 { 151 struct inet_connection_sock *icsk = inet_csk(sk); 152 struct tcp_congestion_ops *ca; 153 int err = 0; 154 155 rcu_read_lock(); 156 ca = tcp_ca_find(name); 157 if (ca == icsk->icsk_ca_ops) 158 goto out; 159 160 if (!ca) 161 err = -ENOENT; 162 163 else if (!try_module_get(ca->owner)) 164 err = -EBUSY; 165 166 else { 167 tcp_cleanup_congestion_control(sk); 168 icsk->icsk_ca_ops = ca; 169 if (icsk->icsk_ca_ops->init) 170 icsk->icsk_ca_ops->init(sk); 171 } 172 out: 173 rcu_read_unlock(); 174 return err; 175 } 176 177 178 /* 179 * Linear increase during slow start 180 */ 181 void tcp_slow_start(struct tcp_sock *tp) 182 { 183 if (sysctl_tcp_abc) { 184 /* RFC3465: Slow Start 185 * TCP sender SHOULD increase cwnd by the number of 186 * previously unacknowledged bytes ACKed by each incoming 187 * acknowledgment, provided the increase is not more than L 188 */ 189 if (tp->bytes_acked < tp->mss_cache) 190 return; 191 192 /* We MAY increase by 2 if discovered delayed ack */ 193 if (sysctl_tcp_abc > 1 && tp->bytes_acked > 2*tp->mss_cache) { 194 if (tp->snd_cwnd < tp->snd_cwnd_clamp) 195 tp->snd_cwnd++; 196 } 197 } 198 tp->bytes_acked = 0; 199 200 if (tp->snd_cwnd < tp->snd_cwnd_clamp) 201 tp->snd_cwnd++; 202 } 203 EXPORT_SYMBOL_GPL(tcp_slow_start); 204 205 /* 206 * TCP Reno congestion control 207 * This is special case used for fallback as well. 208 */ 209 /* This is Jacobson's slow start and congestion avoidance. 210 * SIGCOMM '88, p. 328. 211 */ 212 void tcp_reno_cong_avoid(struct sock *sk, u32 ack, u32 rtt, u32 in_flight, 213 int flag) 214 { 215 struct tcp_sock *tp = tcp_sk(sk); 216 217 if (!tcp_is_cwnd_limited(sk, in_flight)) 218 return; 219 220 /* In "safe" area, increase. */ 221 if (tp->snd_cwnd <= tp->snd_ssthresh) 222 tcp_slow_start(tp); 223 224 /* In dangerous area, increase slowly. */ 225 else if (sysctl_tcp_abc) { 226 /* RFC3465: Appropriate Byte Count 227 * increase once for each full cwnd acked 228 */ 229 if (tp->bytes_acked >= tp->snd_cwnd*tp->mss_cache) { 230 tp->bytes_acked -= tp->snd_cwnd*tp->mss_cache; 231 if (tp->snd_cwnd < tp->snd_cwnd_clamp) 232 tp->snd_cwnd++; 233 } 234 } else { 235 /* In theory this is tp->snd_cwnd += 1 / tp->snd_cwnd */ 236 if (tp->snd_cwnd_cnt >= tp->snd_cwnd) { 237 if (tp->snd_cwnd < tp->snd_cwnd_clamp) 238 tp->snd_cwnd++; 239 tp->snd_cwnd_cnt = 0; 240 } else 241 tp->snd_cwnd_cnt++; 242 } 243 } 244 EXPORT_SYMBOL_GPL(tcp_reno_cong_avoid); 245 246 /* Slow start threshold is half the congestion window (min 2) */ 247 u32 tcp_reno_ssthresh(struct sock *sk) 248 { 249 const struct tcp_sock *tp = tcp_sk(sk); 250 return max(tp->snd_cwnd >> 1U, 2U); 251 } 252 EXPORT_SYMBOL_GPL(tcp_reno_ssthresh); 253 254 /* Lower bound on congestion window with halving. */ 255 u32 tcp_reno_min_cwnd(const struct sock *sk) 256 { 257 const struct tcp_sock *tp = tcp_sk(sk); 258 return tp->snd_ssthresh/2; 259 } 260 EXPORT_SYMBOL_GPL(tcp_reno_min_cwnd); 261 262 struct tcp_congestion_ops tcp_reno = { 263 .name = "reno", 264 .owner = THIS_MODULE, 265 .ssthresh = tcp_reno_ssthresh, 266 .cong_avoid = tcp_reno_cong_avoid, 267 .min_cwnd = tcp_reno_min_cwnd, 268 }; 269 270 /* Initial congestion control used (until SYN) 271 * really reno under another name so we can tell difference 272 * during tcp_set_default_congestion_control 273 */ 274 struct tcp_congestion_ops tcp_init_congestion_ops = { 275 .name = "", 276 .owner = THIS_MODULE, 277 .ssthresh = tcp_reno_ssthresh, 278 .cong_avoid = tcp_reno_cong_avoid, 279 .min_cwnd = tcp_reno_min_cwnd, 280 }; 281 EXPORT_SYMBOL_GPL(tcp_init_congestion_ops); 282