1 /* 2 * Plugable TCP congestion control support and newReno 3 * congestion control. 4 * Based on ideas from I/O scheduler suport and Web100. 5 * 6 * Copyright (C) 2005 Stephen Hemminger <shemminger@osdl.org> 7 */ 8 9 #include <linux/config.h> 10 #include <linux/module.h> 11 #include <linux/mm.h> 12 #include <linux/types.h> 13 #include <linux/list.h> 14 #include <net/tcp.h> 15 16 static DEFINE_SPINLOCK(tcp_cong_list_lock); 17 static LIST_HEAD(tcp_cong_list); 18 19 /* Simple linear search, don't expect many entries! */ 20 static struct tcp_congestion_ops *tcp_ca_find(const char *name) 21 { 22 struct tcp_congestion_ops *e; 23 24 list_for_each_entry_rcu(e, &tcp_cong_list, list) { 25 if (strcmp(e->name, name) == 0) 26 return e; 27 } 28 29 return NULL; 30 } 31 32 /* 33 * Attach new congestion control algorthim to the list 34 * of available options. 35 */ 36 int tcp_register_congestion_control(struct tcp_congestion_ops *ca) 37 { 38 int ret = 0; 39 40 /* all algorithms must implement ssthresh and cong_avoid ops */ 41 if (!ca->ssthresh || !ca->cong_avoid || !ca->min_cwnd) { 42 printk(KERN_ERR "TCP %s does not implement required ops\n", 43 ca->name); 44 return -EINVAL; 45 } 46 47 spin_lock(&tcp_cong_list_lock); 48 if (tcp_ca_find(ca->name)) { 49 printk(KERN_NOTICE "TCP %s already registered\n", ca->name); 50 ret = -EEXIST; 51 } else { 52 list_add_rcu(&ca->list, &tcp_cong_list); 53 printk(KERN_INFO "TCP %s registered\n", ca->name); 54 } 55 spin_unlock(&tcp_cong_list_lock); 56 57 return ret; 58 } 59 EXPORT_SYMBOL_GPL(tcp_register_congestion_control); 60 61 /* 62 * Remove congestion control algorithm, called from 63 * the module's remove function. Module ref counts are used 64 * to ensure that this can't be done till all sockets using 65 * that method are closed. 66 */ 67 void tcp_unregister_congestion_control(struct tcp_congestion_ops *ca) 68 { 69 spin_lock(&tcp_cong_list_lock); 70 list_del_rcu(&ca->list); 71 spin_unlock(&tcp_cong_list_lock); 72 } 73 EXPORT_SYMBOL_GPL(tcp_unregister_congestion_control); 74 75 /* Assign choice of congestion control. */ 76 void tcp_init_congestion_control(struct tcp_sock *tp) 77 { 78 struct tcp_congestion_ops *ca; 79 80 if (tp->ca_ops != &tcp_init_congestion_ops) 81 return; 82 83 rcu_read_lock(); 84 list_for_each_entry_rcu(ca, &tcp_cong_list, list) { 85 if (try_module_get(ca->owner)) { 86 tp->ca_ops = ca; 87 break; 88 } 89 90 } 91 rcu_read_unlock(); 92 93 if (tp->ca_ops->init) 94 tp->ca_ops->init(tp); 95 } 96 97 /* Manage refcounts on socket close. */ 98 void tcp_cleanup_congestion_control(struct tcp_sock *tp) 99 { 100 if (tp->ca_ops->release) 101 tp->ca_ops->release(tp); 102 module_put(tp->ca_ops->owner); 103 } 104 105 /* Used by sysctl to change default congestion control */ 106 int tcp_set_default_congestion_control(const char *name) 107 { 108 struct tcp_congestion_ops *ca; 109 int ret = -ENOENT; 110 111 spin_lock(&tcp_cong_list_lock); 112 ca = tcp_ca_find(name); 113 #ifdef CONFIG_KMOD 114 if (!ca) { 115 spin_unlock(&tcp_cong_list_lock); 116 117 request_module("tcp_%s", name); 118 spin_lock(&tcp_cong_list_lock); 119 ca = tcp_ca_find(name); 120 } 121 #endif 122 123 if (ca) { 124 list_move(&ca->list, &tcp_cong_list); 125 ret = 0; 126 } 127 spin_unlock(&tcp_cong_list_lock); 128 129 return ret; 130 } 131 132 /* Get current default congestion control */ 133 void tcp_get_default_congestion_control(char *name) 134 { 135 struct tcp_congestion_ops *ca; 136 /* We will always have reno... */ 137 BUG_ON(list_empty(&tcp_cong_list)); 138 139 rcu_read_lock(); 140 ca = list_entry(tcp_cong_list.next, struct tcp_congestion_ops, list); 141 strncpy(name, ca->name, TCP_CA_NAME_MAX); 142 rcu_read_unlock(); 143 } 144 145 /* Change congestion control for socket */ 146 int tcp_set_congestion_control(struct tcp_sock *tp, const char *name) 147 { 148 struct tcp_congestion_ops *ca; 149 int err = 0; 150 151 rcu_read_lock(); 152 ca = tcp_ca_find(name); 153 if (ca == tp->ca_ops) 154 goto out; 155 156 if (!ca) 157 err = -ENOENT; 158 159 else if (!try_module_get(ca->owner)) 160 err = -EBUSY; 161 162 else { 163 tcp_cleanup_congestion_control(tp); 164 tp->ca_ops = ca; 165 if (tp->ca_ops->init) 166 tp->ca_ops->init(tp); 167 } 168 out: 169 rcu_read_unlock(); 170 return err; 171 } 172 173 /* 174 * TCP Reno congestion control 175 * This is special case used for fallback as well. 176 */ 177 /* This is Jacobson's slow start and congestion avoidance. 178 * SIGCOMM '88, p. 328. 179 */ 180 void tcp_reno_cong_avoid(struct tcp_sock *tp, u32 ack, u32 rtt, u32 in_flight, 181 int flag) 182 { 183 if (in_flight < tp->snd_cwnd) 184 return; 185 186 if (tp->snd_cwnd <= tp->snd_ssthresh) { 187 /* In "safe" area, increase. */ 188 if (tp->snd_cwnd < tp->snd_cwnd_clamp) 189 tp->snd_cwnd++; 190 } else { 191 /* In dangerous area, increase slowly. 192 * In theory this is tp->snd_cwnd += 1 / tp->snd_cwnd 193 */ 194 if (tp->snd_cwnd_cnt >= tp->snd_cwnd) { 195 if (tp->snd_cwnd < tp->snd_cwnd_clamp) 196 tp->snd_cwnd++; 197 tp->snd_cwnd_cnt = 0; 198 } else 199 tp->snd_cwnd_cnt++; 200 } 201 } 202 EXPORT_SYMBOL_GPL(tcp_reno_cong_avoid); 203 204 /* Slow start threshold is half the congestion window (min 2) */ 205 u32 tcp_reno_ssthresh(struct tcp_sock *tp) 206 { 207 return max(tp->snd_cwnd >> 1U, 2U); 208 } 209 EXPORT_SYMBOL_GPL(tcp_reno_ssthresh); 210 211 /* Lower bound on congestion window. */ 212 u32 tcp_reno_min_cwnd(struct tcp_sock *tp) 213 { 214 return tp->snd_ssthresh/2; 215 } 216 EXPORT_SYMBOL_GPL(tcp_reno_min_cwnd); 217 218 struct tcp_congestion_ops tcp_reno = { 219 .name = "reno", 220 .owner = THIS_MODULE, 221 .ssthresh = tcp_reno_ssthresh, 222 .cong_avoid = tcp_reno_cong_avoid, 223 .min_cwnd = tcp_reno_min_cwnd, 224 }; 225 226 /* Initial congestion control used (until SYN) 227 * really reno under another name so we can tell difference 228 * during tcp_set_default_congestion_control 229 */ 230 struct tcp_congestion_ops tcp_init_congestion_ops = { 231 .name = "", 232 .owner = THIS_MODULE, 233 .ssthresh = tcp_reno_ssthresh, 234 .cong_avoid = tcp_reno_cong_avoid, 235 .min_cwnd = tcp_reno_min_cwnd, 236 }; 237 EXPORT_SYMBOL_GPL(tcp_init_congestion_ops); 238