1 /* 2 * Plugable TCP congestion control support and newReno 3 * congestion control. 4 * Based on ideas from I/O scheduler suport and Web100. 5 * 6 * Copyright (C) 2005 Stephen Hemminger <shemminger@osdl.org> 7 */ 8 9 #include <linux/config.h> 10 #include <linux/module.h> 11 #include <linux/mm.h> 12 #include <linux/types.h> 13 #include <linux/list.h> 14 #include <net/tcp.h> 15 16 static DEFINE_SPINLOCK(tcp_cong_list_lock); 17 static LIST_HEAD(tcp_cong_list); 18 19 /* Simple linear search, don't expect many entries! */ 20 static struct tcp_congestion_ops *tcp_ca_find(const char *name) 21 { 22 struct tcp_congestion_ops *e; 23 24 list_for_each_entry_rcu(e, &tcp_cong_list, list) { 25 if (strcmp(e->name, name) == 0) 26 return e; 27 } 28 29 return NULL; 30 } 31 32 /* 33 * Attach new congestion control algorthim to the list 34 * of available options. 35 */ 36 int tcp_register_congestion_control(struct tcp_congestion_ops *ca) 37 { 38 int ret = 0; 39 40 /* all algorithms must implement ssthresh and cong_avoid ops */ 41 if (!ca->ssthresh || !ca->cong_avoid || !ca->min_cwnd) { 42 printk(KERN_ERR "TCP %s does not implement required ops\n", 43 ca->name); 44 return -EINVAL; 45 } 46 47 spin_lock(&tcp_cong_list_lock); 48 if (tcp_ca_find(ca->name)) { 49 printk(KERN_NOTICE "TCP %s already registered\n", ca->name); 50 ret = -EEXIST; 51 } else { 52 list_add_rcu(&ca->list, &tcp_cong_list); 53 printk(KERN_INFO "TCP %s registered\n", ca->name); 54 } 55 spin_unlock(&tcp_cong_list_lock); 56 57 return ret; 58 } 59 EXPORT_SYMBOL_GPL(tcp_register_congestion_control); 60 61 /* 62 * Remove congestion control algorithm, called from 63 * the module's remove function. Module ref counts are used 64 * to ensure that this can't be done till all sockets using 65 * that method are closed. 66 */ 67 void tcp_unregister_congestion_control(struct tcp_congestion_ops *ca) 68 { 69 spin_lock(&tcp_cong_list_lock); 70 list_del_rcu(&ca->list); 71 spin_unlock(&tcp_cong_list_lock); 72 } 73 EXPORT_SYMBOL_GPL(tcp_unregister_congestion_control); 74 75 /* Assign choice of congestion control. */ 76 void tcp_init_congestion_control(struct sock *sk) 77 { 78 struct inet_connection_sock *icsk = inet_csk(sk); 79 struct tcp_congestion_ops *ca; 80 81 if (icsk->icsk_ca_ops != &tcp_init_congestion_ops) 82 return; 83 84 rcu_read_lock(); 85 list_for_each_entry_rcu(ca, &tcp_cong_list, list) { 86 if (try_module_get(ca->owner)) { 87 icsk->icsk_ca_ops = ca; 88 break; 89 } 90 91 } 92 rcu_read_unlock(); 93 94 if (icsk->icsk_ca_ops->init) 95 icsk->icsk_ca_ops->init(sk); 96 } 97 98 /* Manage refcounts on socket close. */ 99 void tcp_cleanup_congestion_control(struct sock *sk) 100 { 101 struct inet_connection_sock *icsk = inet_csk(sk); 102 103 if (icsk->icsk_ca_ops->release) 104 icsk->icsk_ca_ops->release(sk); 105 module_put(icsk->icsk_ca_ops->owner); 106 } 107 108 /* Used by sysctl to change default congestion control */ 109 int tcp_set_default_congestion_control(const char *name) 110 { 111 struct tcp_congestion_ops *ca; 112 int ret = -ENOENT; 113 114 spin_lock(&tcp_cong_list_lock); 115 ca = tcp_ca_find(name); 116 #ifdef CONFIG_KMOD 117 if (!ca) { 118 spin_unlock(&tcp_cong_list_lock); 119 120 request_module("tcp_%s", name); 121 spin_lock(&tcp_cong_list_lock); 122 ca = tcp_ca_find(name); 123 } 124 #endif 125 126 if (ca) { 127 list_move(&ca->list, &tcp_cong_list); 128 ret = 0; 129 } 130 spin_unlock(&tcp_cong_list_lock); 131 132 return ret; 133 } 134 135 /* Get current default congestion control */ 136 void tcp_get_default_congestion_control(char *name) 137 { 138 struct tcp_congestion_ops *ca; 139 /* We will always have reno... */ 140 BUG_ON(list_empty(&tcp_cong_list)); 141 142 rcu_read_lock(); 143 ca = list_entry(tcp_cong_list.next, struct tcp_congestion_ops, list); 144 strncpy(name, ca->name, TCP_CA_NAME_MAX); 145 rcu_read_unlock(); 146 } 147 148 /* Change congestion control for socket */ 149 int tcp_set_congestion_control(struct sock *sk, const char *name) 150 { 151 struct inet_connection_sock *icsk = inet_csk(sk); 152 struct tcp_congestion_ops *ca; 153 int err = 0; 154 155 rcu_read_lock(); 156 ca = tcp_ca_find(name); 157 if (ca == icsk->icsk_ca_ops) 158 goto out; 159 160 if (!ca) 161 err = -ENOENT; 162 163 else if (!try_module_get(ca->owner)) 164 err = -EBUSY; 165 166 else { 167 tcp_cleanup_congestion_control(sk); 168 icsk->icsk_ca_ops = ca; 169 if (icsk->icsk_ca_ops->init) 170 icsk->icsk_ca_ops->init(sk); 171 } 172 out: 173 rcu_read_unlock(); 174 return err; 175 } 176 177 /* 178 * TCP Reno congestion control 179 * This is special case used for fallback as well. 180 */ 181 /* This is Jacobson's slow start and congestion avoidance. 182 * SIGCOMM '88, p. 328. 183 */ 184 void tcp_reno_cong_avoid(struct sock *sk, u32 ack, u32 rtt, u32 in_flight, 185 int flag) 186 { 187 struct tcp_sock *tp = tcp_sk(sk); 188 189 if (!tcp_is_cwnd_limited(sk, in_flight)) 190 return; 191 192 /* In "safe" area, increase. */ 193 if (tp->snd_cwnd <= tp->snd_ssthresh) 194 tcp_slow_start(tp); 195 196 /* In dangerous area, increase slowly. */ 197 else if (sysctl_tcp_abc) { 198 /* RFC3465: Apppriate Byte Count 199 * increase once for each full cwnd acked 200 */ 201 if (tp->bytes_acked >= tp->snd_cwnd*tp->mss_cache) { 202 tp->bytes_acked -= tp->snd_cwnd*tp->mss_cache; 203 if (tp->snd_cwnd < tp->snd_cwnd_clamp) 204 tp->snd_cwnd++; 205 } 206 } else { 207 /* In theory this is tp->snd_cwnd += 1 / tp->snd_cwnd */ 208 if (tp->snd_cwnd_cnt >= tp->snd_cwnd) { 209 if (tp->snd_cwnd < tp->snd_cwnd_clamp) 210 tp->snd_cwnd++; 211 tp->snd_cwnd_cnt = 0; 212 } else 213 tp->snd_cwnd_cnt++; 214 } 215 } 216 EXPORT_SYMBOL_GPL(tcp_reno_cong_avoid); 217 218 /* Slow start threshold is half the congestion window (min 2) */ 219 u32 tcp_reno_ssthresh(struct sock *sk) 220 { 221 const struct tcp_sock *tp = tcp_sk(sk); 222 return max(tp->snd_cwnd >> 1U, 2U); 223 } 224 EXPORT_SYMBOL_GPL(tcp_reno_ssthresh); 225 226 /* Lower bound on congestion window. */ 227 u32 tcp_reno_min_cwnd(struct sock *sk) 228 { 229 const struct tcp_sock *tp = tcp_sk(sk); 230 return tp->snd_ssthresh/2; 231 } 232 EXPORT_SYMBOL_GPL(tcp_reno_min_cwnd); 233 234 struct tcp_congestion_ops tcp_reno = { 235 .name = "reno", 236 .owner = THIS_MODULE, 237 .ssthresh = tcp_reno_ssthresh, 238 .cong_avoid = tcp_reno_cong_avoid, 239 .min_cwnd = tcp_reno_min_cwnd, 240 }; 241 242 /* Initial congestion control used (until SYN) 243 * really reno under another name so we can tell difference 244 * during tcp_set_default_congestion_control 245 */ 246 struct tcp_congestion_ops tcp_init_congestion_ops = { 247 .name = "", 248 .owner = THIS_MODULE, 249 .ssthresh = tcp_reno_ssthresh, 250 .cong_avoid = tcp_reno_cong_avoid, 251 .min_cwnd = tcp_reno_min_cwnd, 252 }; 253 EXPORT_SYMBOL_GPL(tcp_init_congestion_ops); 254