1 /* 2 * TCP Low Priority (TCP-LP) 3 * 4 * TCP Low Priority is a distributed algorithm whose goal is to utilize only 5 * the excess network bandwidth as compared to the ``fair share`` of 6 * bandwidth as targeted by TCP. Available from: 7 * http://www.ece.rice.edu/~akuzma/Doc/akuzma/TCP-LP.pdf 8 * 9 * Original Author: 10 * Aleksandar Kuzmanovic <akuzma@northwestern.edu> 11 * 12 * See http://www-ece.rice.edu/networks/TCP-LP/ for their implementation. 13 * As of 2.6.13, Linux supports pluggable congestion control algorithms. 14 * Due to the limitation of the API, we take the following changes from 15 * the original TCP-LP implementation: 16 * o We use newReno in most core CA handling. Only add some checking 17 * within cong_avoid. 18 * o Error correcting in remote HZ, therefore remote HZ will be keeped 19 * on checking and updating. 20 * o Handling calculation of One-Way-Delay (OWD) within rtt_sample, sicne 21 * OWD have a similar meaning as RTT. Also correct the buggy formular. 22 * o Handle reaction for Early Congestion Indication (ECI) within 23 * pkts_acked, as mentioned within pseudo code. 24 * o OWD is handled in relative format, where local time stamp will in 25 * tcp_time_stamp format. 26 * 27 * Port from 2.4.19 to 2.6.16 as module by: 28 * Wong Hoi Sing Edison <hswong3i@gmail.com> 29 * Hung Hing Lun <hlhung3i@gmail.com> 30 * 31 * Version: $Id: tcp_lp.c,v 1.22 2006-05-02 18:18:19 hswong3i Exp $ 32 */ 33 34 #include <linux/config.h> 35 #include <linux/module.h> 36 #include <net/tcp.h> 37 38 /* resolution of owd */ 39 #define LP_RESOL 1000 40 41 /** 42 * enum tcp_lp_state 43 * @LP_VALID_RHZ: is remote HZ valid? 44 * @LP_VALID_OWD: is OWD valid? 45 * @LP_WITHIN_THR: are we within threshold? 46 * @LP_WITHIN_INF: are we within inference? 47 * 48 * TCP-LP's state flags. 49 * We create this set of state flag mainly for debugging. 50 */ 51 enum tcp_lp_state { 52 LP_VALID_RHZ = (1 << 0), 53 LP_VALID_OWD = (1 << 1), 54 LP_WITHIN_THR = (1 << 3), 55 LP_WITHIN_INF = (1 << 4), 56 }; 57 58 /** 59 * struct lp 60 * @flag: TCP-LP state flag 61 * @sowd: smoothed OWD << 3 62 * @owd_min: min OWD 63 * @owd_max: max OWD 64 * @owd_max_rsv: resrved max owd 65 * @remote_hz: estimated remote HZ 66 * @remote_ref_time: remote reference time 67 * @local_ref_time: local reference time 68 * @last_drop: time for last active drop 69 * @inference: current inference 70 * 71 * TCP-LP's private struct. 72 * We get the idea from original TCP-LP implementation where only left those we 73 * found are really useful. 74 */ 75 struct lp { 76 u32 flag; 77 u32 sowd; 78 u32 owd_min; 79 u32 owd_max; 80 u32 owd_max_rsv; 81 u32 remote_hz; 82 u32 remote_ref_time; 83 u32 local_ref_time; 84 u32 last_drop; 85 u32 inference; 86 }; 87 88 /** 89 * tcp_lp_init 90 * 91 * Init all required variables. 92 * Clone the handling from Vegas module implementation. 93 */ 94 static void tcp_lp_init(struct sock *sk) 95 { 96 struct lp *lp = inet_csk_ca(sk); 97 98 lp->flag = 0; 99 lp->sowd = 0; 100 lp->owd_min = 0xffffffff; 101 lp->owd_max = 0; 102 lp->owd_max_rsv = 0; 103 lp->remote_hz = 0; 104 lp->remote_ref_time = 0; 105 lp->local_ref_time = 0; 106 lp->last_drop = 0; 107 lp->inference = 0; 108 } 109 110 /** 111 * tcp_lp_cong_avoid 112 * 113 * Implementation of cong_avoid. 114 * Will only call newReno CA when away from inference. 115 * From TCP-LP's paper, this will be handled in additive increasement. 116 */ 117 static void tcp_lp_cong_avoid(struct sock *sk, u32 ack, u32 rtt, u32 in_flight, 118 int flag) 119 { 120 struct lp *lp = inet_csk_ca(sk); 121 122 if (!(lp->flag & LP_WITHIN_INF)) 123 tcp_reno_cong_avoid(sk, ack, rtt, in_flight, flag); 124 } 125 126 /** 127 * tcp_lp_remote_hz_estimator 128 * 129 * Estimate remote HZ. 130 * We keep on updating the estimated value, where original TCP-LP 131 * implementation only guest it for once and use forever. 132 */ 133 static u32 tcp_lp_remote_hz_estimator(struct sock *sk) 134 { 135 struct tcp_sock *tp = tcp_sk(sk); 136 struct lp *lp = inet_csk_ca(sk); 137 s64 rhz = lp->remote_hz << 6; /* remote HZ << 6 */ 138 s64 m = 0; 139 140 /* not yet record reference time 141 * go away!! record it before come back!! */ 142 if (lp->remote_ref_time == 0 || lp->local_ref_time == 0) 143 goto out; 144 145 /* we can't calc remote HZ with no different!! */ 146 if (tp->rx_opt.rcv_tsval == lp->remote_ref_time 147 || tp->rx_opt.rcv_tsecr == lp->local_ref_time) 148 goto out; 149 150 m = HZ * (tp->rx_opt.rcv_tsval - 151 lp->remote_ref_time) / (tp->rx_opt.rcv_tsecr - 152 lp->local_ref_time); 153 if (m < 0) 154 m = -m; 155 156 if (rhz != 0) { 157 m -= rhz >> 6; /* m is now error in remote HZ est */ 158 rhz += m; /* 63/64 old + 1/64 new */ 159 } else 160 rhz = m << 6; 161 162 /* record time for successful remote HZ calc */ 163 lp->flag |= LP_VALID_RHZ; 164 165 out: 166 /* record reference time stamp */ 167 lp->remote_ref_time = tp->rx_opt.rcv_tsval; 168 lp->local_ref_time = tp->rx_opt.rcv_tsecr; 169 170 return rhz >> 6; 171 } 172 173 /** 174 * tcp_lp_owd_calculator 175 * 176 * Calculate one way delay (in relative format). 177 * Original implement OWD as minus of remote time difference to local time 178 * difference directly. As this time difference just simply equal to RTT, when 179 * the network status is stable, remote RTT will equal to local RTT, and result 180 * OWD into zero. 181 * It seems to be a bug and so we fixed it. 182 */ 183 static u32 tcp_lp_owd_calculator(struct sock *sk) 184 { 185 struct tcp_sock *tp = tcp_sk(sk); 186 struct lp *lp = inet_csk_ca(sk); 187 s64 owd = 0; 188 189 lp->remote_hz = tcp_lp_remote_hz_estimator(sk); 190 191 if (lp->flag & LP_VALID_RHZ) { 192 owd = 193 tp->rx_opt.rcv_tsval * (LP_RESOL / lp->remote_hz) - 194 tp->rx_opt.rcv_tsecr * (LP_RESOL / HZ); 195 if (owd < 0) 196 owd = -owd; 197 } 198 199 if (owd > 0) 200 lp->flag |= LP_VALID_OWD; 201 else 202 lp->flag &= ~LP_VALID_OWD; 203 204 return owd; 205 } 206 207 /** 208 * tcp_lp_rtt_sample 209 * 210 * Implementation or rtt_sample. 211 * Will take the following action, 212 * 1. calc OWD, 213 * 2. record the min/max OWD, 214 * 3. calc smoothed OWD (SOWD). 215 * Most ideas come from the original TCP-LP implementation. 216 */ 217 static void tcp_lp_rtt_sample(struct sock *sk, u32 usrtt) 218 { 219 struct lp *lp = inet_csk_ca(sk); 220 s64 mowd = tcp_lp_owd_calculator(sk); 221 222 /* sorry that we don't have valid data */ 223 if (!(lp->flag & LP_VALID_RHZ) || !(lp->flag & LP_VALID_OWD)) 224 return; 225 226 /* record the next min owd */ 227 if (mowd < lp->owd_min) 228 lp->owd_min = mowd; 229 230 /* always forget the max of the max 231 * we just set owd_max as one below it */ 232 if (mowd > lp->owd_max) { 233 if (mowd > lp->owd_max_rsv) { 234 if (lp->owd_max_rsv == 0) 235 lp->owd_max = mowd; 236 else 237 lp->owd_max = lp->owd_max_rsv; 238 lp->owd_max_rsv = mowd; 239 } else 240 lp->owd_max = mowd; 241 } 242 243 /* calc for smoothed owd */ 244 if (lp->sowd != 0) { 245 mowd -= lp->sowd >> 3; /* m is now error in owd est */ 246 lp->sowd += mowd; /* owd = 7/8 owd + 1/8 new */ 247 } else 248 lp->sowd = mowd << 3; /* take the measured time be owd */ 249 } 250 251 /** 252 * tcp_lp_pkts_acked 253 * 254 * Implementation of pkts_acked. 255 * Deal with active drop under Early Congestion Indication. 256 * Only drop to half and 1 will be handle, because we hope to use back 257 * newReno in increase case. 258 * We work it out by following the idea from TCP-LP's paper directly 259 */ 260 static void tcp_lp_pkts_acked(struct sock *sk, u32 num_acked) 261 { 262 struct tcp_sock *tp = tcp_sk(sk); 263 struct lp *lp = inet_csk_ca(sk); 264 265 /* calc inference */ 266 if (tcp_time_stamp > tp->rx_opt.rcv_tsecr) 267 lp->inference = 3 * (tcp_time_stamp - tp->rx_opt.rcv_tsecr); 268 269 /* test if within inference */ 270 if (lp->last_drop && (tcp_time_stamp - lp->last_drop < lp->inference)) 271 lp->flag |= LP_WITHIN_INF; 272 else 273 lp->flag &= ~LP_WITHIN_INF; 274 275 /* test if within threshold */ 276 if (lp->sowd >> 3 < 277 lp->owd_min + 15 * (lp->owd_max - lp->owd_min) / 100) 278 lp->flag |= LP_WITHIN_THR; 279 else 280 lp->flag &= ~LP_WITHIN_THR; 281 282 pr_debug("TCP-LP: %05o|%5u|%5u|%15u|%15u|%15u\n", lp->flag, 283 tp->snd_cwnd, lp->remote_hz, lp->owd_min, lp->owd_max, 284 lp->sowd >> 3); 285 286 if (lp->flag & LP_WITHIN_THR) 287 return; 288 289 /* FIXME: try to reset owd_min and owd_max here 290 * so decrease the chance the min/max is no longer suitable 291 * and will usually within threshold when whithin inference */ 292 lp->owd_min = lp->sowd >> 3; 293 lp->owd_max = lp->sowd >> 2; 294 lp->owd_max_rsv = lp->sowd >> 2; 295 296 /* happened within inference 297 * drop snd_cwnd into 1 */ 298 if (lp->flag & LP_WITHIN_INF) 299 tp->snd_cwnd = 1U; 300 301 /* happened after inference 302 * cut snd_cwnd into half */ 303 else 304 tp->snd_cwnd = max(tp->snd_cwnd >> 1U, 1U); 305 306 /* record this drop time */ 307 lp->last_drop = tcp_time_stamp; 308 } 309 310 static struct tcp_congestion_ops tcp_lp = { 311 .init = tcp_lp_init, 312 .ssthresh = tcp_reno_ssthresh, 313 .cong_avoid = tcp_lp_cong_avoid, 314 .min_cwnd = tcp_reno_min_cwnd, 315 .rtt_sample = tcp_lp_rtt_sample, 316 .pkts_acked = tcp_lp_pkts_acked, 317 318 .owner = THIS_MODULE, 319 .name = "lp" 320 }; 321 322 static int __init tcp_lp_register(void) 323 { 324 BUG_ON(sizeof(struct lp) > ICSK_CA_PRIV_SIZE); 325 return tcp_register_congestion_control(&tcp_lp); 326 } 327 328 static void __exit tcp_lp_unregister(void) 329 { 330 tcp_unregister_congestion_control(&tcp_lp); 331 } 332 333 module_init(tcp_lp_register); 334 module_exit(tcp_lp_unregister); 335 336 MODULE_AUTHOR("Wong Hoi Sing Edison, Hung Hing Lun"); 337 MODULE_LICENSE("GPL"); 338 MODULE_DESCRIPTION("TCP Low Priority"); 339