1 /* 2 * TCP Low Priority (TCP-LP) 3 * 4 * TCP Low Priority is a distributed algorithm whose goal is to utilize only 5 * the excess network bandwidth as compared to the ``fair share`` of 6 * bandwidth as targeted by TCP. 7 * 8 * As of 2.6.13, Linux supports pluggable congestion control algorithms. 9 * Due to the limitation of the API, we take the following changes from 10 * the original TCP-LP implementation: 11 * o We use newReno in most core CA handling. Only add some checking 12 * within cong_avoid. 13 * o Error correcting in remote HZ, therefore remote HZ will be keeped 14 * on checking and updating. 15 * o Handling calculation of One-Way-Delay (OWD) within rtt_sample, since 16 * OWD have a similar meaning as RTT. Also correct the buggy formular. 17 * o Handle reaction for Early Congestion Indication (ECI) within 18 * pkts_acked, as mentioned within pseudo code. 19 * o OWD is handled in relative format, where local time stamp will in 20 * tcp_time_stamp format. 21 * 22 * Original Author: 23 * Aleksandar Kuzmanovic <akuzma@northwestern.edu> 24 * Available from: 25 * http://www.ece.rice.edu/~akuzma/Doc/akuzma/TCP-LP.pdf 26 * Original implementation for 2.4.19: 27 * http://www-ece.rice.edu/networks/TCP-LP/ 28 * 29 * 2.6.x module Authors: 30 * Wong Hoi Sing, Edison <hswong3i@gmail.com> 31 * Hung Hing Lun, Mike <hlhung3i@gmail.com> 32 * SourceForge project page: 33 * http://tcp-lp-mod.sourceforge.net/ 34 */ 35 36 #include <linux/module.h> 37 #include <net/tcp.h> 38 39 /* resolution of owd */ 40 #define LP_RESOL 1000 41 42 /** 43 * enum tcp_lp_state 44 * @LP_VALID_RHZ: is remote HZ valid? 45 * @LP_VALID_OWD: is OWD valid? 46 * @LP_WITHIN_THR: are we within threshold? 47 * @LP_WITHIN_INF: are we within inference? 48 * 49 * TCP-LP's state flags. 50 * We create this set of state flag mainly for debugging. 51 */ 52 enum tcp_lp_state { 53 LP_VALID_RHZ = (1 << 0), 54 LP_VALID_OWD = (1 << 1), 55 LP_WITHIN_THR = (1 << 3), 56 LP_WITHIN_INF = (1 << 4), 57 }; 58 59 /** 60 * struct lp 61 * @flag: TCP-LP state flag 62 * @sowd: smoothed OWD << 3 63 * @owd_min: min OWD 64 * @owd_max: max OWD 65 * @owd_max_rsv: resrved max owd 66 * @remote_hz: estimated remote HZ 67 * @remote_ref_time: remote reference time 68 * @local_ref_time: local reference time 69 * @last_drop: time for last active drop 70 * @inference: current inference 71 * 72 * TCP-LP's private struct. 73 * We get the idea from original TCP-LP implementation where only left those we 74 * found are really useful. 75 */ 76 struct lp { 77 u32 flag; 78 u32 sowd; 79 u32 owd_min; 80 u32 owd_max; 81 u32 owd_max_rsv; 82 u32 remote_hz; 83 u32 remote_ref_time; 84 u32 local_ref_time; 85 u32 last_drop; 86 u32 inference; 87 }; 88 89 /** 90 * tcp_lp_init 91 * 92 * Init all required variables. 93 * Clone the handling from Vegas module implementation. 94 */ 95 static void tcp_lp_init(struct sock *sk) 96 { 97 struct lp *lp = inet_csk_ca(sk); 98 99 lp->flag = 0; 100 lp->sowd = 0; 101 lp->owd_min = 0xffffffff; 102 lp->owd_max = 0; 103 lp->owd_max_rsv = 0; 104 lp->remote_hz = 0; 105 lp->remote_ref_time = 0; 106 lp->local_ref_time = 0; 107 lp->last_drop = 0; 108 lp->inference = 0; 109 } 110 111 /** 112 * tcp_lp_cong_avoid 113 * 114 * Implementation of cong_avoid. 115 * Will only call newReno CA when away from inference. 116 * From TCP-LP's paper, this will be handled in additive increasement. 117 */ 118 static void tcp_lp_cong_avoid(struct sock *sk, u32 ack, u32 acked, 119 u32 in_flight) 120 { 121 struct lp *lp = inet_csk_ca(sk); 122 123 if (!(lp->flag & LP_WITHIN_INF)) 124 tcp_reno_cong_avoid(sk, ack, acked, in_flight); 125 } 126 127 /** 128 * tcp_lp_remote_hz_estimator 129 * 130 * Estimate remote HZ. 131 * We keep on updating the estimated value, where original TCP-LP 132 * implementation only guest it for once and use forever. 133 */ 134 static u32 tcp_lp_remote_hz_estimator(struct sock *sk) 135 { 136 struct tcp_sock *tp = tcp_sk(sk); 137 struct lp *lp = inet_csk_ca(sk); 138 s64 rhz = lp->remote_hz << 6; /* remote HZ << 6 */ 139 s64 m = 0; 140 141 /* not yet record reference time 142 * go away!! record it before come back!! */ 143 if (lp->remote_ref_time == 0 || lp->local_ref_time == 0) 144 goto out; 145 146 /* we can't calc remote HZ with no different!! */ 147 if (tp->rx_opt.rcv_tsval == lp->remote_ref_time || 148 tp->rx_opt.rcv_tsecr == lp->local_ref_time) 149 goto out; 150 151 m = HZ * (tp->rx_opt.rcv_tsval - 152 lp->remote_ref_time) / (tp->rx_opt.rcv_tsecr - 153 lp->local_ref_time); 154 if (m < 0) 155 m = -m; 156 157 if (rhz > 0) { 158 m -= rhz >> 6; /* m is now error in remote HZ est */ 159 rhz += m; /* 63/64 old + 1/64 new */ 160 } else 161 rhz = m << 6; 162 163 out: 164 /* record time for successful remote HZ calc */ 165 if ((rhz >> 6) > 0) 166 lp->flag |= LP_VALID_RHZ; 167 else 168 lp->flag &= ~LP_VALID_RHZ; 169 170 /* record reference time stamp */ 171 lp->remote_ref_time = tp->rx_opt.rcv_tsval; 172 lp->local_ref_time = tp->rx_opt.rcv_tsecr; 173 174 return rhz >> 6; 175 } 176 177 /** 178 * tcp_lp_owd_calculator 179 * 180 * Calculate one way delay (in relative format). 181 * Original implement OWD as minus of remote time difference to local time 182 * difference directly. As this time difference just simply equal to RTT, when 183 * the network status is stable, remote RTT will equal to local RTT, and result 184 * OWD into zero. 185 * It seems to be a bug and so we fixed it. 186 */ 187 static u32 tcp_lp_owd_calculator(struct sock *sk) 188 { 189 struct tcp_sock *tp = tcp_sk(sk); 190 struct lp *lp = inet_csk_ca(sk); 191 s64 owd = 0; 192 193 lp->remote_hz = tcp_lp_remote_hz_estimator(sk); 194 195 if (lp->flag & LP_VALID_RHZ) { 196 owd = 197 tp->rx_opt.rcv_tsval * (LP_RESOL / lp->remote_hz) - 198 tp->rx_opt.rcv_tsecr * (LP_RESOL / HZ); 199 if (owd < 0) 200 owd = -owd; 201 } 202 203 if (owd > 0) 204 lp->flag |= LP_VALID_OWD; 205 else 206 lp->flag &= ~LP_VALID_OWD; 207 208 return owd; 209 } 210 211 /** 212 * tcp_lp_rtt_sample 213 * 214 * Implementation or rtt_sample. 215 * Will take the following action, 216 * 1. calc OWD, 217 * 2. record the min/max OWD, 218 * 3. calc smoothed OWD (SOWD). 219 * Most ideas come from the original TCP-LP implementation. 220 */ 221 static void tcp_lp_rtt_sample(struct sock *sk, u32 rtt) 222 { 223 struct lp *lp = inet_csk_ca(sk); 224 s64 mowd = tcp_lp_owd_calculator(sk); 225 226 /* sorry that we don't have valid data */ 227 if (!(lp->flag & LP_VALID_RHZ) || !(lp->flag & LP_VALID_OWD)) 228 return; 229 230 /* record the next min owd */ 231 if (mowd < lp->owd_min) 232 lp->owd_min = mowd; 233 234 /* always forget the max of the max 235 * we just set owd_max as one below it */ 236 if (mowd > lp->owd_max) { 237 if (mowd > lp->owd_max_rsv) { 238 if (lp->owd_max_rsv == 0) 239 lp->owd_max = mowd; 240 else 241 lp->owd_max = lp->owd_max_rsv; 242 lp->owd_max_rsv = mowd; 243 } else 244 lp->owd_max = mowd; 245 } 246 247 /* calc for smoothed owd */ 248 if (lp->sowd != 0) { 249 mowd -= lp->sowd >> 3; /* m is now error in owd est */ 250 lp->sowd += mowd; /* owd = 7/8 owd + 1/8 new */ 251 } else 252 lp->sowd = mowd << 3; /* take the measured time be owd */ 253 } 254 255 /** 256 * tcp_lp_pkts_acked 257 * 258 * Implementation of pkts_acked. 259 * Deal with active drop under Early Congestion Indication. 260 * Only drop to half and 1 will be handle, because we hope to use back 261 * newReno in increase case. 262 * We work it out by following the idea from TCP-LP's paper directly 263 */ 264 static void tcp_lp_pkts_acked(struct sock *sk, u32 num_acked, s32 rtt_us) 265 { 266 struct tcp_sock *tp = tcp_sk(sk); 267 struct lp *lp = inet_csk_ca(sk); 268 269 if (rtt_us > 0) 270 tcp_lp_rtt_sample(sk, rtt_us); 271 272 /* calc inference */ 273 if (tcp_time_stamp > tp->rx_opt.rcv_tsecr) 274 lp->inference = 3 * (tcp_time_stamp - tp->rx_opt.rcv_tsecr); 275 276 /* test if within inference */ 277 if (lp->last_drop && (tcp_time_stamp - lp->last_drop < lp->inference)) 278 lp->flag |= LP_WITHIN_INF; 279 else 280 lp->flag &= ~LP_WITHIN_INF; 281 282 /* test if within threshold */ 283 if (lp->sowd >> 3 < 284 lp->owd_min + 15 * (lp->owd_max - lp->owd_min) / 100) 285 lp->flag |= LP_WITHIN_THR; 286 else 287 lp->flag &= ~LP_WITHIN_THR; 288 289 pr_debug("TCP-LP: %05o|%5u|%5u|%15u|%15u|%15u\n", lp->flag, 290 tp->snd_cwnd, lp->remote_hz, lp->owd_min, lp->owd_max, 291 lp->sowd >> 3); 292 293 if (lp->flag & LP_WITHIN_THR) 294 return; 295 296 /* FIXME: try to reset owd_min and owd_max here 297 * so decrease the chance the min/max is no longer suitable 298 * and will usually within threshold when whithin inference */ 299 lp->owd_min = lp->sowd >> 3; 300 lp->owd_max = lp->sowd >> 2; 301 lp->owd_max_rsv = lp->sowd >> 2; 302 303 /* happened within inference 304 * drop snd_cwnd into 1 */ 305 if (lp->flag & LP_WITHIN_INF) 306 tp->snd_cwnd = 1U; 307 308 /* happened after inference 309 * cut snd_cwnd into half */ 310 else 311 tp->snd_cwnd = max(tp->snd_cwnd >> 1U, 1U); 312 313 /* record this drop time */ 314 lp->last_drop = tcp_time_stamp; 315 } 316 317 static struct tcp_congestion_ops tcp_lp __read_mostly = { 318 .flags = TCP_CONG_RTT_STAMP, 319 .init = tcp_lp_init, 320 .ssthresh = tcp_reno_ssthresh, 321 .cong_avoid = tcp_lp_cong_avoid, 322 .min_cwnd = tcp_reno_min_cwnd, 323 .pkts_acked = tcp_lp_pkts_acked, 324 325 .owner = THIS_MODULE, 326 .name = "lp" 327 }; 328 329 static int __init tcp_lp_register(void) 330 { 331 BUILD_BUG_ON(sizeof(struct lp) > ICSK_CA_PRIV_SIZE); 332 return tcp_register_congestion_control(&tcp_lp); 333 } 334 335 static void __exit tcp_lp_unregister(void) 336 { 337 tcp_unregister_congestion_control(&tcp_lp); 338 } 339 340 module_init(tcp_lp_register); 341 module_exit(tcp_lp_unregister); 342 343 MODULE_AUTHOR("Wong Hoi Sing Edison, Hung Hing Lun Mike"); 344 MODULE_LICENSE("GPL"); 345 MODULE_DESCRIPTION("TCP Low Priority"); 346