1 /* 2 * Plugable TCP congestion control support and newReno 3 * congestion control. 4 * Based on ideas from I/O scheduler suport and Web100. 5 * 6 * Copyright (C) 2005 Stephen Hemminger <shemminger@osdl.org> 7 */ 8 9 #include <linux/module.h> 10 #include <linux/mm.h> 11 #include <linux/types.h> 12 #include <linux/list.h> 13 #include <net/tcp.h> 14 15 static DEFINE_SPINLOCK(tcp_cong_list_lock); 16 static LIST_HEAD(tcp_cong_list); 17 18 /* Simple linear search, don't expect many entries! */ 19 static struct tcp_congestion_ops *tcp_ca_find(const char *name) 20 { 21 struct tcp_congestion_ops *e; 22 23 list_for_each_entry_rcu(e, &tcp_cong_list, list) { 24 if (strcmp(e->name, name) == 0) 25 return e; 26 } 27 28 return NULL; 29 } 30 31 /* 32 * Attach new congestion control algorthim to the list 33 * of available options. 34 */ 35 int tcp_register_congestion_control(struct tcp_congestion_ops *ca) 36 { 37 int ret = 0; 38 39 /* all algorithms must implement ssthresh and cong_avoid ops */ 40 if (!ca->ssthresh || !ca->cong_avoid) { 41 printk(KERN_ERR "TCP %s does not implement required ops\n", 42 ca->name); 43 return -EINVAL; 44 } 45 46 spin_lock(&tcp_cong_list_lock); 47 if (tcp_ca_find(ca->name)) { 48 printk(KERN_NOTICE "TCP %s already registered\n", ca->name); 49 ret = -EEXIST; 50 } else { 51 list_add_rcu(&ca->list, &tcp_cong_list); 52 printk(KERN_INFO "TCP %s registered\n", ca->name); 53 } 54 spin_unlock(&tcp_cong_list_lock); 55 56 return ret; 57 } 58 EXPORT_SYMBOL_GPL(tcp_register_congestion_control); 59 60 /* 61 * Remove congestion control algorithm, called from 62 * the module's remove function. Module ref counts are used 63 * to ensure that this can't be done till all sockets using 64 * that method are closed. 65 */ 66 void tcp_unregister_congestion_control(struct tcp_congestion_ops *ca) 67 { 68 spin_lock(&tcp_cong_list_lock); 69 list_del_rcu(&ca->list); 70 spin_unlock(&tcp_cong_list_lock); 71 } 72 EXPORT_SYMBOL_GPL(tcp_unregister_congestion_control); 73 74 /* Assign choice of congestion control. */ 75 void tcp_init_congestion_control(struct sock *sk) 76 { 77 struct inet_connection_sock *icsk = inet_csk(sk); 78 struct tcp_congestion_ops *ca; 79 80 if (icsk->icsk_ca_ops != &tcp_init_congestion_ops) 81 return; 82 83 rcu_read_lock(); 84 list_for_each_entry_rcu(ca, &tcp_cong_list, list) { 85 if (try_module_get(ca->owner)) { 86 icsk->icsk_ca_ops = ca; 87 break; 88 } 89 90 } 91 rcu_read_unlock(); 92 93 if (icsk->icsk_ca_ops->init) 94 icsk->icsk_ca_ops->init(sk); 95 } 96 97 /* Manage refcounts on socket close. */ 98 void tcp_cleanup_congestion_control(struct sock *sk) 99 { 100 struct inet_connection_sock *icsk = inet_csk(sk); 101 102 if (icsk->icsk_ca_ops->release) 103 icsk->icsk_ca_ops->release(sk); 104 module_put(icsk->icsk_ca_ops->owner); 105 } 106 107 /* Used by sysctl to change default congestion control */ 108 int tcp_set_default_congestion_control(const char *name) 109 { 110 struct tcp_congestion_ops *ca; 111 int ret = -ENOENT; 112 113 spin_lock(&tcp_cong_list_lock); 114 ca = tcp_ca_find(name); 115 #ifdef CONFIG_KMOD 116 if (!ca) { 117 spin_unlock(&tcp_cong_list_lock); 118 119 request_module("tcp_%s", name); 120 spin_lock(&tcp_cong_list_lock); 121 ca = tcp_ca_find(name); 122 } 123 #endif 124 125 if (ca) { 126 list_move(&ca->list, &tcp_cong_list); 127 ret = 0; 128 } 129 spin_unlock(&tcp_cong_list_lock); 130 131 return ret; 132 } 133 134 /* Get current default congestion control */ 135 void tcp_get_default_congestion_control(char *name) 136 { 137 struct tcp_congestion_ops *ca; 138 /* We will always have reno... */ 139 BUG_ON(list_empty(&tcp_cong_list)); 140 141 rcu_read_lock(); 142 ca = list_entry(tcp_cong_list.next, struct tcp_congestion_ops, list); 143 strncpy(name, ca->name, TCP_CA_NAME_MAX); 144 rcu_read_unlock(); 145 } 146 147 /* Change congestion control for socket */ 148 int tcp_set_congestion_control(struct sock *sk, const char *name) 149 { 150 struct inet_connection_sock *icsk = inet_csk(sk); 151 struct tcp_congestion_ops *ca; 152 int err = 0; 153 154 rcu_read_lock(); 155 ca = tcp_ca_find(name); 156 if (ca == icsk->icsk_ca_ops) 157 goto out; 158 159 if (!ca) 160 err = -ENOENT; 161 162 else if (!try_module_get(ca->owner)) 163 err = -EBUSY; 164 165 else { 166 tcp_cleanup_congestion_control(sk); 167 icsk->icsk_ca_ops = ca; 168 if (icsk->icsk_ca_ops->init) 169 icsk->icsk_ca_ops->init(sk); 170 } 171 out: 172 rcu_read_unlock(); 173 return err; 174 } 175 176 177 /* 178 * Linear increase during slow start 179 */ 180 void tcp_slow_start(struct tcp_sock *tp) 181 { 182 if (sysctl_tcp_abc) { 183 /* RFC3465: Slow Start 184 * TCP sender SHOULD increase cwnd by the number of 185 * previously unacknowledged bytes ACKed by each incoming 186 * acknowledgment, provided the increase is not more than L 187 */ 188 if (tp->bytes_acked < tp->mss_cache) 189 return; 190 191 /* We MAY increase by 2 if discovered delayed ack */ 192 if (sysctl_tcp_abc > 1 && tp->bytes_acked > 2*tp->mss_cache) { 193 if (tp->snd_cwnd < tp->snd_cwnd_clamp) 194 tp->snd_cwnd++; 195 } 196 } 197 tp->bytes_acked = 0; 198 199 if (tp->snd_cwnd < tp->snd_cwnd_clamp) 200 tp->snd_cwnd++; 201 } 202 EXPORT_SYMBOL_GPL(tcp_slow_start); 203 204 /* 205 * TCP Reno congestion control 206 * This is special case used for fallback as well. 207 */ 208 /* This is Jacobson's slow start and congestion avoidance. 209 * SIGCOMM '88, p. 328. 210 */ 211 void tcp_reno_cong_avoid(struct sock *sk, u32 ack, u32 rtt, u32 in_flight, 212 int flag) 213 { 214 struct tcp_sock *tp = tcp_sk(sk); 215 216 if (!tcp_is_cwnd_limited(sk, in_flight)) 217 return; 218 219 /* In "safe" area, increase. */ 220 if (tp->snd_cwnd <= tp->snd_ssthresh) 221 tcp_slow_start(tp); 222 223 /* In dangerous area, increase slowly. */ 224 else if (sysctl_tcp_abc) { 225 /* RFC3465: Appropriate Byte Count 226 * increase once for each full cwnd acked 227 */ 228 if (tp->bytes_acked >= tp->snd_cwnd*tp->mss_cache) { 229 tp->bytes_acked -= tp->snd_cwnd*tp->mss_cache; 230 if (tp->snd_cwnd < tp->snd_cwnd_clamp) 231 tp->snd_cwnd++; 232 } 233 } else { 234 /* In theory this is tp->snd_cwnd += 1 / tp->snd_cwnd */ 235 if (tp->snd_cwnd_cnt >= tp->snd_cwnd) { 236 if (tp->snd_cwnd < tp->snd_cwnd_clamp) 237 tp->snd_cwnd++; 238 tp->snd_cwnd_cnt = 0; 239 } else 240 tp->snd_cwnd_cnt++; 241 } 242 } 243 EXPORT_SYMBOL_GPL(tcp_reno_cong_avoid); 244 245 /* Slow start threshold is half the congestion window (min 2) */ 246 u32 tcp_reno_ssthresh(struct sock *sk) 247 { 248 const struct tcp_sock *tp = tcp_sk(sk); 249 return max(tp->snd_cwnd >> 1U, 2U); 250 } 251 EXPORT_SYMBOL_GPL(tcp_reno_ssthresh); 252 253 /* Lower bound on congestion window with halving. */ 254 u32 tcp_reno_min_cwnd(const struct sock *sk) 255 { 256 const struct tcp_sock *tp = tcp_sk(sk); 257 return tp->snd_ssthresh/2; 258 } 259 EXPORT_SYMBOL_GPL(tcp_reno_min_cwnd); 260 261 struct tcp_congestion_ops tcp_reno = { 262 .name = "reno", 263 .owner = THIS_MODULE, 264 .ssthresh = tcp_reno_ssthresh, 265 .cong_avoid = tcp_reno_cong_avoid, 266 .min_cwnd = tcp_reno_min_cwnd, 267 }; 268 269 /* Initial congestion control used (until SYN) 270 * really reno under another name so we can tell difference 271 * during tcp_set_default_congestion_control 272 */ 273 struct tcp_congestion_ops tcp_init_congestion_ops = { 274 .name = "", 275 .owner = THIS_MODULE, 276 .ssthresh = tcp_reno_ssthresh, 277 .cong_avoid = tcp_reno_cong_avoid, 278 .min_cwnd = tcp_reno_min_cwnd, 279 }; 280 EXPORT_SYMBOL_GPL(tcp_init_congestion_ops); 281