1 /* 2 * TCP Low Priority (TCP-LP) 3 * 4 * TCP Low Priority is a distributed algorithm whose goal is to utilize only 5 * the excess network bandwidth as compared to the ``fair share`` of 6 * bandwidth as targeted by TCP. 7 * 8 * As of 2.6.13, Linux supports pluggable congestion control algorithms. 9 * Due to the limitation of the API, we take the following changes from 10 * the original TCP-LP implementation: 11 * o We use newReno in most core CA handling. Only add some checking 12 * within cong_avoid. 13 * o Error correcting in remote HZ, therefore remote HZ will be keeped 14 * on checking and updating. 15 * o Handling calculation of One-Way-Delay (OWD) within rtt_sample, sicne 16 * OWD have a similar meaning as RTT. Also correct the buggy formular. 17 * o Handle reaction for Early Congestion Indication (ECI) within 18 * pkts_acked, as mentioned within pseudo code. 19 * o OWD is handled in relative format, where local time stamp will in 20 * tcp_time_stamp format. 21 * 22 * Original Author: 23 * Aleksandar Kuzmanovic <akuzma@northwestern.edu> 24 * Available from: 25 * http://www.ece.rice.edu/~akuzma/Doc/akuzma/TCP-LP.pdf 26 * Original implementation for 2.4.19: 27 * http://www-ece.rice.edu/networks/TCP-LP/ 28 * 29 * 2.6.x module Authors: 30 * Wong Hoi Sing, Edison <hswong3i@gmail.com> 31 * Hung Hing Lun, Mike <hlhung3i@gmail.com> 32 * SourceForge project page: 33 * http://tcp-lp-mod.sourceforge.net/ 34 * 35 * Version: $Id: tcp_lp.c,v 1.24 2006/09/05 20:22:53 hswong3i Exp $ 36 */ 37 38 #include <linux/config.h> 39 #include <linux/module.h> 40 #include <net/tcp.h> 41 42 /* resolution of owd */ 43 #define LP_RESOL 1000 44 45 /** 46 * enum tcp_lp_state 47 * @LP_VALID_RHZ: is remote HZ valid? 48 * @LP_VALID_OWD: is OWD valid? 49 * @LP_WITHIN_THR: are we within threshold? 50 * @LP_WITHIN_INF: are we within inference? 51 * 52 * TCP-LP's state flags. 53 * We create this set of state flag mainly for debugging. 54 */ 55 enum tcp_lp_state { 56 LP_VALID_RHZ = (1 << 0), 57 LP_VALID_OWD = (1 << 1), 58 LP_WITHIN_THR = (1 << 3), 59 LP_WITHIN_INF = (1 << 4), 60 }; 61 62 /** 63 * struct lp 64 * @flag: TCP-LP state flag 65 * @sowd: smoothed OWD << 3 66 * @owd_min: min OWD 67 * @owd_max: max OWD 68 * @owd_max_rsv: resrved max owd 69 * @remote_hz: estimated remote HZ 70 * @remote_ref_time: remote reference time 71 * @local_ref_time: local reference time 72 * @last_drop: time for last active drop 73 * @inference: current inference 74 * 75 * TCP-LP's private struct. 76 * We get the idea from original TCP-LP implementation where only left those we 77 * found are really useful. 78 */ 79 struct lp { 80 u32 flag; 81 u32 sowd; 82 u32 owd_min; 83 u32 owd_max; 84 u32 owd_max_rsv; 85 u32 remote_hz; 86 u32 remote_ref_time; 87 u32 local_ref_time; 88 u32 last_drop; 89 u32 inference; 90 }; 91 92 /** 93 * tcp_lp_init 94 * 95 * Init all required variables. 96 * Clone the handling from Vegas module implementation. 97 */ 98 static void tcp_lp_init(struct sock *sk) 99 { 100 struct lp *lp = inet_csk_ca(sk); 101 102 lp->flag = 0; 103 lp->sowd = 0; 104 lp->owd_min = 0xffffffff; 105 lp->owd_max = 0; 106 lp->owd_max_rsv = 0; 107 lp->remote_hz = 0; 108 lp->remote_ref_time = 0; 109 lp->local_ref_time = 0; 110 lp->last_drop = 0; 111 lp->inference = 0; 112 } 113 114 /** 115 * tcp_lp_cong_avoid 116 * 117 * Implementation of cong_avoid. 118 * Will only call newReno CA when away from inference. 119 * From TCP-LP's paper, this will be handled in additive increasement. 120 */ 121 static void tcp_lp_cong_avoid(struct sock *sk, u32 ack, u32 rtt, u32 in_flight, 122 int flag) 123 { 124 struct lp *lp = inet_csk_ca(sk); 125 126 if (!(lp->flag & LP_WITHIN_INF)) 127 tcp_reno_cong_avoid(sk, ack, rtt, in_flight, flag); 128 } 129 130 /** 131 * tcp_lp_remote_hz_estimator 132 * 133 * Estimate remote HZ. 134 * We keep on updating the estimated value, where original TCP-LP 135 * implementation only guest it for once and use forever. 136 */ 137 static u32 tcp_lp_remote_hz_estimator(struct sock *sk) 138 { 139 struct tcp_sock *tp = tcp_sk(sk); 140 struct lp *lp = inet_csk_ca(sk); 141 s64 rhz = lp->remote_hz << 6; /* remote HZ << 6 */ 142 s64 m = 0; 143 144 /* not yet record reference time 145 * go away!! record it before come back!! */ 146 if (lp->remote_ref_time == 0 || lp->local_ref_time == 0) 147 goto out; 148 149 /* we can't calc remote HZ with no different!! */ 150 if (tp->rx_opt.rcv_tsval == lp->remote_ref_time 151 || tp->rx_opt.rcv_tsecr == lp->local_ref_time) 152 goto out; 153 154 m = HZ * (tp->rx_opt.rcv_tsval - 155 lp->remote_ref_time) / (tp->rx_opt.rcv_tsecr - 156 lp->local_ref_time); 157 if (m < 0) 158 m = -m; 159 160 if (rhz > 0) { 161 m -= rhz >> 6; /* m is now error in remote HZ est */ 162 rhz += m; /* 63/64 old + 1/64 new */ 163 } else 164 rhz = m << 6; 165 166 out: 167 /* record time for successful remote HZ calc */ 168 if (rhz > 0) 169 lp->flag |= LP_VALID_RHZ; 170 else 171 lp->flag &= ~LP_VALID_RHZ; 172 173 /* record reference time stamp */ 174 lp->remote_ref_time = tp->rx_opt.rcv_tsval; 175 lp->local_ref_time = tp->rx_opt.rcv_tsecr; 176 177 return rhz >> 6; 178 } 179 180 /** 181 * tcp_lp_owd_calculator 182 * 183 * Calculate one way delay (in relative format). 184 * Original implement OWD as minus of remote time difference to local time 185 * difference directly. As this time difference just simply equal to RTT, when 186 * the network status is stable, remote RTT will equal to local RTT, and result 187 * OWD into zero. 188 * It seems to be a bug and so we fixed it. 189 */ 190 static u32 tcp_lp_owd_calculator(struct sock *sk) 191 { 192 struct tcp_sock *tp = tcp_sk(sk); 193 struct lp *lp = inet_csk_ca(sk); 194 s64 owd = 0; 195 196 lp->remote_hz = tcp_lp_remote_hz_estimator(sk); 197 198 if (lp->flag & LP_VALID_RHZ) { 199 owd = 200 tp->rx_opt.rcv_tsval * (LP_RESOL / lp->remote_hz) - 201 tp->rx_opt.rcv_tsecr * (LP_RESOL / HZ); 202 if (owd < 0) 203 owd = -owd; 204 } 205 206 if (owd > 0) 207 lp->flag |= LP_VALID_OWD; 208 else 209 lp->flag &= ~LP_VALID_OWD; 210 211 return owd; 212 } 213 214 /** 215 * tcp_lp_rtt_sample 216 * 217 * Implementation or rtt_sample. 218 * Will take the following action, 219 * 1. calc OWD, 220 * 2. record the min/max OWD, 221 * 3. calc smoothed OWD (SOWD). 222 * Most ideas come from the original TCP-LP implementation. 223 */ 224 static void tcp_lp_rtt_sample(struct sock *sk, u32 usrtt) 225 { 226 struct lp *lp = inet_csk_ca(sk); 227 s64 mowd = tcp_lp_owd_calculator(sk); 228 229 /* sorry that we don't have valid data */ 230 if (!(lp->flag & LP_VALID_RHZ) || !(lp->flag & LP_VALID_OWD)) 231 return; 232 233 /* record the next min owd */ 234 if (mowd < lp->owd_min) 235 lp->owd_min = mowd; 236 237 /* always forget the max of the max 238 * we just set owd_max as one below it */ 239 if (mowd > lp->owd_max) { 240 if (mowd > lp->owd_max_rsv) { 241 if (lp->owd_max_rsv == 0) 242 lp->owd_max = mowd; 243 else 244 lp->owd_max = lp->owd_max_rsv; 245 lp->owd_max_rsv = mowd; 246 } else 247 lp->owd_max = mowd; 248 } 249 250 /* calc for smoothed owd */ 251 if (lp->sowd != 0) { 252 mowd -= lp->sowd >> 3; /* m is now error in owd est */ 253 lp->sowd += mowd; /* owd = 7/8 owd + 1/8 new */ 254 } else 255 lp->sowd = mowd << 3; /* take the measured time be owd */ 256 } 257 258 /** 259 * tcp_lp_pkts_acked 260 * 261 * Implementation of pkts_acked. 262 * Deal with active drop under Early Congestion Indication. 263 * Only drop to half and 1 will be handle, because we hope to use back 264 * newReno in increase case. 265 * We work it out by following the idea from TCP-LP's paper directly 266 */ 267 static void tcp_lp_pkts_acked(struct sock *sk, u32 num_acked) 268 { 269 struct tcp_sock *tp = tcp_sk(sk); 270 struct lp *lp = inet_csk_ca(sk); 271 272 /* calc inference */ 273 if (tcp_time_stamp > tp->rx_opt.rcv_tsecr) 274 lp->inference = 3 * (tcp_time_stamp - tp->rx_opt.rcv_tsecr); 275 276 /* test if within inference */ 277 if (lp->last_drop && (tcp_time_stamp - lp->last_drop < lp->inference)) 278 lp->flag |= LP_WITHIN_INF; 279 else 280 lp->flag &= ~LP_WITHIN_INF; 281 282 /* test if within threshold */ 283 if (lp->sowd >> 3 < 284 lp->owd_min + 15 * (lp->owd_max - lp->owd_min) / 100) 285 lp->flag |= LP_WITHIN_THR; 286 else 287 lp->flag &= ~LP_WITHIN_THR; 288 289 pr_debug("TCP-LP: %05o|%5u|%5u|%15u|%15u|%15u\n", lp->flag, 290 tp->snd_cwnd, lp->remote_hz, lp->owd_min, lp->owd_max, 291 lp->sowd >> 3); 292 293 if (lp->flag & LP_WITHIN_THR) 294 return; 295 296 /* FIXME: try to reset owd_min and owd_max here 297 * so decrease the chance the min/max is no longer suitable 298 * and will usually within threshold when whithin inference */ 299 lp->owd_min = lp->sowd >> 3; 300 lp->owd_max = lp->sowd >> 2; 301 lp->owd_max_rsv = lp->sowd >> 2; 302 303 /* happened within inference 304 * drop snd_cwnd into 1 */ 305 if (lp->flag & LP_WITHIN_INF) 306 tp->snd_cwnd = 1U; 307 308 /* happened after inference 309 * cut snd_cwnd into half */ 310 else 311 tp->snd_cwnd = max(tp->snd_cwnd >> 1U, 1U); 312 313 /* record this drop time */ 314 lp->last_drop = tcp_time_stamp; 315 } 316 317 static struct tcp_congestion_ops tcp_lp = { 318 .init = tcp_lp_init, 319 .ssthresh = tcp_reno_ssthresh, 320 .cong_avoid = tcp_lp_cong_avoid, 321 .min_cwnd = tcp_reno_min_cwnd, 322 .rtt_sample = tcp_lp_rtt_sample, 323 .pkts_acked = tcp_lp_pkts_acked, 324 325 .owner = THIS_MODULE, 326 .name = "lp" 327 }; 328 329 static int __init tcp_lp_register(void) 330 { 331 BUG_ON(sizeof(struct lp) > ICSK_CA_PRIV_SIZE); 332 return tcp_register_congestion_control(&tcp_lp); 333 } 334 335 static void __exit tcp_lp_unregister(void) 336 { 337 tcp_unregister_congestion_control(&tcp_lp); 338 } 339 340 module_init(tcp_lp_register); 341 module_exit(tcp_lp_unregister); 342 343 MODULE_AUTHOR("Wong Hoi Sing Edison, Hung Hing Lun Mike"); 344 MODULE_LICENSE("GPL"); 345 MODULE_DESCRIPTION("TCP Low Priority"); 346