1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * TCP Low Priority (TCP-LP) 4 * 5 * TCP Low Priority is a distributed algorithm whose goal is to utilize only 6 * the excess network bandwidth as compared to the ``fair share`` of 7 * bandwidth as targeted by TCP. 8 * 9 * As of 2.6.13, Linux supports pluggable congestion control algorithms. 10 * Due to the limitation of the API, we take the following changes from 11 * the original TCP-LP implementation: 12 * o We use newReno in most core CA handling. Only add some checking 13 * within cong_avoid. 14 * o Error correcting in remote HZ, therefore remote HZ will be keeped 15 * on checking and updating. 16 * o Handling calculation of One-Way-Delay (OWD) within rtt_sample, since 17 * OWD have a similar meaning as RTT. Also correct the buggy formular. 18 * o Handle reaction for Early Congestion Indication (ECI) within 19 * pkts_acked, as mentioned within pseudo code. 20 * o OWD is handled in relative format, where local time stamp will in 21 * tcp_time_stamp format. 22 * 23 * Original Author: 24 * Aleksandar Kuzmanovic <akuzma@northwestern.edu> 25 * Available from: 26 * http://www.ece.rice.edu/~akuzma/Doc/akuzma/TCP-LP.pdf 27 * Original implementation for 2.4.19: 28 * http://www-ece.rice.edu/networks/TCP-LP/ 29 * 30 * 2.6.x module Authors: 31 * Wong Hoi Sing, Edison <hswong3i@gmail.com> 32 * Hung Hing Lun, Mike <hlhung3i@gmail.com> 33 * SourceForge project page: 34 * http://tcp-lp-mod.sourceforge.net/ 35 */ 36 37 #include <linux/module.h> 38 #include <net/tcp.h> 39 40 /* resolution of owd */ 41 #define LP_RESOL TCP_TS_HZ 42 43 /** 44 * enum tcp_lp_state 45 * @LP_VALID_RHZ: is remote HZ valid? 46 * @LP_VALID_OWD: is OWD valid? 47 * @LP_WITHIN_THR: are we within threshold? 48 * @LP_WITHIN_INF: are we within inference? 49 * 50 * TCP-LP's state flags. 51 * We create this set of state flag mainly for debugging. 52 */ 53 enum tcp_lp_state { 54 LP_VALID_RHZ = (1 << 0), 55 LP_VALID_OWD = (1 << 1), 56 LP_WITHIN_THR = (1 << 3), 57 LP_WITHIN_INF = (1 << 4), 58 }; 59 60 /** 61 * struct lp 62 * @flag: TCP-LP state flag 63 * @sowd: smoothed OWD << 3 64 * @owd_min: min OWD 65 * @owd_max: max OWD 66 * @owd_max_rsv: resrved max owd 67 * @remote_hz: estimated remote HZ 68 * @remote_ref_time: remote reference time 69 * @local_ref_time: local reference time 70 * @last_drop: time for last active drop 71 * @inference: current inference 72 * 73 * TCP-LP's private struct. 74 * We get the idea from original TCP-LP implementation where only left those we 75 * found are really useful. 76 */ 77 struct lp { 78 u32 flag; 79 u32 sowd; 80 u32 owd_min; 81 u32 owd_max; 82 u32 owd_max_rsv; 83 u32 remote_hz; 84 u32 remote_ref_time; 85 u32 local_ref_time; 86 u32 last_drop; 87 u32 inference; 88 }; 89 90 /** 91 * tcp_lp_init 92 * 93 * Init all required variables. 94 * Clone the handling from Vegas module implementation. 95 */ 96 static void tcp_lp_init(struct sock *sk) 97 { 98 struct lp *lp = inet_csk_ca(sk); 99 100 lp->flag = 0; 101 lp->sowd = 0; 102 lp->owd_min = 0xffffffff; 103 lp->owd_max = 0; 104 lp->owd_max_rsv = 0; 105 lp->remote_hz = 0; 106 lp->remote_ref_time = 0; 107 lp->local_ref_time = 0; 108 lp->last_drop = 0; 109 lp->inference = 0; 110 } 111 112 /** 113 * tcp_lp_cong_avoid 114 * 115 * Implementation of cong_avoid. 116 * Will only call newReno CA when away from inference. 117 * From TCP-LP's paper, this will be handled in additive increasement. 118 */ 119 static void tcp_lp_cong_avoid(struct sock *sk, u32 ack, u32 acked) 120 { 121 struct lp *lp = inet_csk_ca(sk); 122 123 if (!(lp->flag & LP_WITHIN_INF)) 124 tcp_reno_cong_avoid(sk, ack, acked); 125 } 126 127 /** 128 * tcp_lp_remote_hz_estimator 129 * 130 * Estimate remote HZ. 131 * We keep on updating the estimated value, where original TCP-LP 132 * implementation only guest it for once and use forever. 133 */ 134 static u32 tcp_lp_remote_hz_estimator(struct sock *sk) 135 { 136 struct tcp_sock *tp = tcp_sk(sk); 137 struct lp *lp = inet_csk_ca(sk); 138 s64 rhz = lp->remote_hz << 6; /* remote HZ << 6 */ 139 s64 m = 0; 140 141 /* not yet record reference time 142 * go away!! record it before come back!! */ 143 if (lp->remote_ref_time == 0 || lp->local_ref_time == 0) 144 goto out; 145 146 /* we can't calc remote HZ with no different!! */ 147 if (tp->rx_opt.rcv_tsval == lp->remote_ref_time || 148 tp->rx_opt.rcv_tsecr == lp->local_ref_time) 149 goto out; 150 151 m = TCP_TS_HZ * 152 (tp->rx_opt.rcv_tsval - lp->remote_ref_time) / 153 (tp->rx_opt.rcv_tsecr - lp->local_ref_time); 154 if (m < 0) 155 m = -m; 156 157 if (rhz > 0) { 158 m -= rhz >> 6; /* m is now error in remote HZ est */ 159 rhz += m; /* 63/64 old + 1/64 new */ 160 } else 161 rhz = m << 6; 162 163 out: 164 /* record time for successful remote HZ calc */ 165 if ((rhz >> 6) > 0) 166 lp->flag |= LP_VALID_RHZ; 167 else 168 lp->flag &= ~LP_VALID_RHZ; 169 170 /* record reference time stamp */ 171 lp->remote_ref_time = tp->rx_opt.rcv_tsval; 172 lp->local_ref_time = tp->rx_opt.rcv_tsecr; 173 174 return rhz >> 6; 175 } 176 177 /** 178 * tcp_lp_owd_calculator 179 * 180 * Calculate one way delay (in relative format). 181 * Original implement OWD as minus of remote time difference to local time 182 * difference directly. As this time difference just simply equal to RTT, when 183 * the network status is stable, remote RTT will equal to local RTT, and result 184 * OWD into zero. 185 * It seems to be a bug and so we fixed it. 186 */ 187 static u32 tcp_lp_owd_calculator(struct sock *sk) 188 { 189 struct tcp_sock *tp = tcp_sk(sk); 190 struct lp *lp = inet_csk_ca(sk); 191 s64 owd = 0; 192 193 lp->remote_hz = tcp_lp_remote_hz_estimator(sk); 194 195 if (lp->flag & LP_VALID_RHZ) { 196 owd = 197 tp->rx_opt.rcv_tsval * (LP_RESOL / lp->remote_hz) - 198 tp->rx_opt.rcv_tsecr * (LP_RESOL / TCP_TS_HZ); 199 if (owd < 0) 200 owd = -owd; 201 } 202 203 if (owd > 0) 204 lp->flag |= LP_VALID_OWD; 205 else 206 lp->flag &= ~LP_VALID_OWD; 207 208 return owd; 209 } 210 211 /** 212 * tcp_lp_rtt_sample 213 * 214 * Implementation or rtt_sample. 215 * Will take the following action, 216 * 1. calc OWD, 217 * 2. record the min/max OWD, 218 * 3. calc smoothed OWD (SOWD). 219 * Most ideas come from the original TCP-LP implementation. 220 */ 221 static void tcp_lp_rtt_sample(struct sock *sk, u32 rtt) 222 { 223 struct lp *lp = inet_csk_ca(sk); 224 s64 mowd = tcp_lp_owd_calculator(sk); 225 226 /* sorry that we don't have valid data */ 227 if (!(lp->flag & LP_VALID_RHZ) || !(lp->flag & LP_VALID_OWD)) 228 return; 229 230 /* record the next min owd */ 231 if (mowd < lp->owd_min) 232 lp->owd_min = mowd; 233 234 /* always forget the max of the max 235 * we just set owd_max as one below it */ 236 if (mowd > lp->owd_max) { 237 if (mowd > lp->owd_max_rsv) { 238 if (lp->owd_max_rsv == 0) 239 lp->owd_max = mowd; 240 else 241 lp->owd_max = lp->owd_max_rsv; 242 lp->owd_max_rsv = mowd; 243 } else 244 lp->owd_max = mowd; 245 } 246 247 /* calc for smoothed owd */ 248 if (lp->sowd != 0) { 249 mowd -= lp->sowd >> 3; /* m is now error in owd est */ 250 lp->sowd += mowd; /* owd = 7/8 owd + 1/8 new */ 251 } else 252 lp->sowd = mowd << 3; /* take the measured time be owd */ 253 } 254 255 /** 256 * tcp_lp_pkts_acked 257 * 258 * Implementation of pkts_acked. 259 * Deal with active drop under Early Congestion Indication. 260 * Only drop to half and 1 will be handle, because we hope to use back 261 * newReno in increase case. 262 * We work it out by following the idea from TCP-LP's paper directly 263 */ 264 static void tcp_lp_pkts_acked(struct sock *sk, const struct ack_sample *sample) 265 { 266 struct tcp_sock *tp = tcp_sk(sk); 267 struct lp *lp = inet_csk_ca(sk); 268 u32 now = tcp_time_stamp(tp); 269 u32 delta; 270 271 if (sample->rtt_us > 0) 272 tcp_lp_rtt_sample(sk, sample->rtt_us); 273 274 /* calc inference */ 275 delta = now - tp->rx_opt.rcv_tsecr; 276 if ((s32)delta > 0) 277 lp->inference = 3 * delta; 278 279 /* test if within inference */ 280 if (lp->last_drop && (now - lp->last_drop < lp->inference)) 281 lp->flag |= LP_WITHIN_INF; 282 else 283 lp->flag &= ~LP_WITHIN_INF; 284 285 /* test if within threshold */ 286 if (lp->sowd >> 3 < 287 lp->owd_min + 15 * (lp->owd_max - lp->owd_min) / 100) 288 lp->flag |= LP_WITHIN_THR; 289 else 290 lp->flag &= ~LP_WITHIN_THR; 291 292 pr_debug("TCP-LP: %05o|%5u|%5u|%15u|%15u|%15u\n", lp->flag, 293 tp->snd_cwnd, lp->remote_hz, lp->owd_min, lp->owd_max, 294 lp->sowd >> 3); 295 296 if (lp->flag & LP_WITHIN_THR) 297 return; 298 299 /* FIXME: try to reset owd_min and owd_max here 300 * so decrease the chance the min/max is no longer suitable 301 * and will usually within threshold when whithin inference */ 302 lp->owd_min = lp->sowd >> 3; 303 lp->owd_max = lp->sowd >> 2; 304 lp->owd_max_rsv = lp->sowd >> 2; 305 306 /* happened within inference 307 * drop snd_cwnd into 1 */ 308 if (lp->flag & LP_WITHIN_INF) 309 tp->snd_cwnd = 1U; 310 311 /* happened after inference 312 * cut snd_cwnd into half */ 313 else 314 tp->snd_cwnd = max(tp->snd_cwnd >> 1U, 1U); 315 316 /* record this drop time */ 317 lp->last_drop = now; 318 } 319 320 static struct tcp_congestion_ops tcp_lp __read_mostly = { 321 .init = tcp_lp_init, 322 .ssthresh = tcp_reno_ssthresh, 323 .undo_cwnd = tcp_reno_undo_cwnd, 324 .cong_avoid = tcp_lp_cong_avoid, 325 .pkts_acked = tcp_lp_pkts_acked, 326 327 .owner = THIS_MODULE, 328 .name = "lp" 329 }; 330 331 static int __init tcp_lp_register(void) 332 { 333 BUILD_BUG_ON(sizeof(struct lp) > ICSK_CA_PRIV_SIZE); 334 return tcp_register_congestion_control(&tcp_lp); 335 } 336 337 static void __exit tcp_lp_unregister(void) 338 { 339 tcp_unregister_congestion_control(&tcp_lp); 340 } 341 342 module_init(tcp_lp_register); 343 module_exit(tcp_lp_unregister); 344 345 MODULE_AUTHOR("Wong Hoi Sing Edison, Hung Hing Lun Mike"); 346 MODULE_LICENSE("GPL"); 347 MODULE_DESCRIPTION("TCP Low Priority"); 348