1 /* 2 * Plugable TCP congestion control support and newReno 3 * congestion control. 4 * Based on ideas from I/O scheduler suport and Web100. 5 * 6 * Copyright (C) 2005 Stephen Hemminger <shemminger@osdl.org> 7 */ 8 9 #include <linux/config.h> 10 #include <linux/module.h> 11 #include <linux/mm.h> 12 #include <linux/types.h> 13 #include <linux/list.h> 14 #include <net/tcp.h> 15 16 static DEFINE_SPINLOCK(tcp_cong_list_lock); 17 static LIST_HEAD(tcp_cong_list); 18 19 /* Simple linear search, don't expect many entries! */ 20 static struct tcp_congestion_ops *tcp_ca_find(const char *name) 21 { 22 struct tcp_congestion_ops *e; 23 24 list_for_each_entry_rcu(e, &tcp_cong_list, list) { 25 if (strcmp(e->name, name) == 0) 26 return e; 27 } 28 29 return NULL; 30 } 31 32 /* 33 * Attach new congestion control algorthim to the list 34 * of available options. 35 */ 36 int tcp_register_congestion_control(struct tcp_congestion_ops *ca) 37 { 38 int ret = 0; 39 40 /* all algorithms must implement ssthresh and cong_avoid ops */ 41 if (!ca->ssthresh || !ca->cong_avoid || !ca->min_cwnd) { 42 printk(KERN_ERR "TCP %s does not implement required ops\n", 43 ca->name); 44 return -EINVAL; 45 } 46 47 spin_lock(&tcp_cong_list_lock); 48 if (tcp_ca_find(ca->name)) { 49 printk(KERN_NOTICE "TCP %s already registered\n", ca->name); 50 ret = -EEXIST; 51 } else { 52 list_add_rcu(&ca->list, &tcp_cong_list); 53 printk(KERN_INFO "TCP %s registered\n", ca->name); 54 } 55 spin_unlock(&tcp_cong_list_lock); 56 57 return ret; 58 } 59 EXPORT_SYMBOL_GPL(tcp_register_congestion_control); 60 61 /* 62 * Remove congestion control algorithm, called from 63 * the module's remove function. Module ref counts are used 64 * to ensure that this can't be done till all sockets using 65 * that method are closed. 66 */ 67 void tcp_unregister_congestion_control(struct tcp_congestion_ops *ca) 68 { 69 spin_lock(&tcp_cong_list_lock); 70 list_del_rcu(&ca->list); 71 spin_unlock(&tcp_cong_list_lock); 72 } 73 EXPORT_SYMBOL_GPL(tcp_unregister_congestion_control); 74 75 /* Assign choice of congestion control. */ 76 void tcp_init_congestion_control(struct sock *sk) 77 { 78 struct inet_connection_sock *icsk = inet_csk(sk); 79 struct tcp_congestion_ops *ca; 80 81 if (icsk->icsk_ca_ops != &tcp_init_congestion_ops) 82 return; 83 84 rcu_read_lock(); 85 list_for_each_entry_rcu(ca, &tcp_cong_list, list) { 86 if (try_module_get(ca->owner)) { 87 icsk->icsk_ca_ops = ca; 88 break; 89 } 90 91 } 92 rcu_read_unlock(); 93 94 if (icsk->icsk_ca_ops->init) 95 icsk->icsk_ca_ops->init(sk); 96 } 97 98 /* Manage refcounts on socket close. */ 99 void tcp_cleanup_congestion_control(struct sock *sk) 100 { 101 struct inet_connection_sock *icsk = inet_csk(sk); 102 103 if (icsk->icsk_ca_ops->release) 104 icsk->icsk_ca_ops->release(sk); 105 module_put(icsk->icsk_ca_ops->owner); 106 } 107 108 /* Used by sysctl to change default congestion control */ 109 int tcp_set_default_congestion_control(const char *name) 110 { 111 struct tcp_congestion_ops *ca; 112 int ret = -ENOENT; 113 114 spin_lock(&tcp_cong_list_lock); 115 ca = tcp_ca_find(name); 116 #ifdef CONFIG_KMOD 117 if (!ca) { 118 spin_unlock(&tcp_cong_list_lock); 119 120 request_module("tcp_%s", name); 121 spin_lock(&tcp_cong_list_lock); 122 ca = tcp_ca_find(name); 123 } 124 #endif 125 126 if (ca) { 127 list_move(&ca->list, &tcp_cong_list); 128 ret = 0; 129 } 130 spin_unlock(&tcp_cong_list_lock); 131 132 return ret; 133 } 134 135 /* Get current default congestion control */ 136 void tcp_get_default_congestion_control(char *name) 137 { 138 struct tcp_congestion_ops *ca; 139 /* We will always have reno... */ 140 BUG_ON(list_empty(&tcp_cong_list)); 141 142 rcu_read_lock(); 143 ca = list_entry(tcp_cong_list.next, struct tcp_congestion_ops, list); 144 strncpy(name, ca->name, TCP_CA_NAME_MAX); 145 rcu_read_unlock(); 146 } 147 148 /* Change congestion control for socket */ 149 int tcp_set_congestion_control(struct sock *sk, const char *name) 150 { 151 struct inet_connection_sock *icsk = inet_csk(sk); 152 struct tcp_congestion_ops *ca; 153 int err = 0; 154 155 rcu_read_lock(); 156 ca = tcp_ca_find(name); 157 if (ca == icsk->icsk_ca_ops) 158 goto out; 159 160 if (!ca) 161 err = -ENOENT; 162 163 else if (!try_module_get(ca->owner)) 164 err = -EBUSY; 165 166 else { 167 tcp_cleanup_congestion_control(sk); 168 icsk->icsk_ca_ops = ca; 169 if (icsk->icsk_ca_ops->init) 170 icsk->icsk_ca_ops->init(sk); 171 } 172 out: 173 rcu_read_unlock(); 174 return err; 175 } 176 177 /* 178 * TCP Reno congestion control 179 * This is special case used for fallback as well. 180 */ 181 /* This is Jacobson's slow start and congestion avoidance. 182 * SIGCOMM '88, p. 328. 183 */ 184 void tcp_reno_cong_avoid(struct sock *sk, u32 ack, u32 rtt, u32 in_flight, 185 int flag) 186 { 187 struct tcp_sock *tp = tcp_sk(sk); 188 189 if (in_flight < tp->snd_cwnd) 190 return; 191 192 if (tp->snd_cwnd <= tp->snd_ssthresh) { 193 /* In "safe" area, increase. */ 194 if (tp->snd_cwnd < tp->snd_cwnd_clamp) 195 tp->snd_cwnd++; 196 } else { 197 /* In dangerous area, increase slowly. 198 * In theory this is tp->snd_cwnd += 1 / tp->snd_cwnd 199 */ 200 if (tp->snd_cwnd_cnt >= tp->snd_cwnd) { 201 if (tp->snd_cwnd < tp->snd_cwnd_clamp) 202 tp->snd_cwnd++; 203 tp->snd_cwnd_cnt = 0; 204 } else 205 tp->snd_cwnd_cnt++; 206 } 207 } 208 EXPORT_SYMBOL_GPL(tcp_reno_cong_avoid); 209 210 /* Slow start threshold is half the congestion window (min 2) */ 211 u32 tcp_reno_ssthresh(struct sock *sk) 212 { 213 const struct tcp_sock *tp = tcp_sk(sk); 214 return max(tp->snd_cwnd >> 1U, 2U); 215 } 216 EXPORT_SYMBOL_GPL(tcp_reno_ssthresh); 217 218 /* Lower bound on congestion window. */ 219 u32 tcp_reno_min_cwnd(struct sock *sk) 220 { 221 const struct tcp_sock *tp = tcp_sk(sk); 222 return tp->snd_ssthresh/2; 223 } 224 EXPORT_SYMBOL_GPL(tcp_reno_min_cwnd); 225 226 struct tcp_congestion_ops tcp_reno = { 227 .name = "reno", 228 .owner = THIS_MODULE, 229 .ssthresh = tcp_reno_ssthresh, 230 .cong_avoid = tcp_reno_cong_avoid, 231 .min_cwnd = tcp_reno_min_cwnd, 232 }; 233 234 /* Initial congestion control used (until SYN) 235 * really reno under another name so we can tell difference 236 * during tcp_set_default_congestion_control 237 */ 238 struct tcp_congestion_ops tcp_init_congestion_ops = { 239 .name = "", 240 .owner = THIS_MODULE, 241 .ssthresh = tcp_reno_ssthresh, 242 .cong_avoid = tcp_reno_cong_avoid, 243 .min_cwnd = tcp_reno_min_cwnd, 244 }; 245 EXPORT_SYMBOL_GPL(tcp_init_congestion_ops); 246