1 /* 2 * CoDel - The Controlled-Delay Active Queue Management algorithm 3 * 4 * Copyright (C) 2013 Ermal Luçi <eri@FreeBSD.org> 5 * Copyright (C) 2011-2012 Kathleen Nichols <nichols@pollere.com> 6 * Copyright (C) 2011-2012 Van Jacobson <van@pollere.net> 7 * Copyright (C) 2012 Michael D. Taht <dave.taht@bufferbloat.net> 8 * Copyright (C) 2012 Eric Dumazet <edumazet@google.com> 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions, and the following disclaimer, 15 * without modification. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 3. The names of the authors may not be used to endorse or promote products 20 * derived from this software without specific prior written permission. 21 * 22 * Alternatively, provided that this notice is retained in full, this 23 * software may be distributed under the terms of the GNU General 24 * Public License ("GPL") version 2, in which case the provisions of the 25 * GPL apply INSTEAD OF those given above. 26 * 27 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 28 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 29 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 30 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 31 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 32 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 33 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 34 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 35 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 36 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 37 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH 38 * DAMAGE. 39 * 40 * $FreeBSD$ 41 */ 42 #include "opt_altq.h" 43 #include "opt_inet.h" 44 #include "opt_inet6.h" 45 46 #ifdef ALTQ_CODEL /* CoDel is enabled by ALTQ_CODEL option in opt_altq.h */ 47 48 #include <sys/param.h> 49 #include <sys/malloc.h> 50 #include <sys/mbuf.h> 51 #include <sys/socket.h> 52 #include <sys/systm.h> 53 54 #include <net/if.h> 55 #include <net/if_var.h> 56 #include <net/if_private.h> 57 #include <netinet/in.h> 58 59 #include <netpfil/pf/pf.h> 60 #include <netpfil/pf/pf_altq.h> 61 #include <net/altq/if_altq.h> 62 #include <net/altq/altq.h> 63 #include <net/altq/altq_codel.h> 64 65 static int codel_should_drop(struct codel *, class_queue_t *, 66 struct mbuf *, u_int64_t); 67 static void codel_Newton_step(struct codel_vars *); 68 static u_int64_t codel_control_law(u_int64_t t, u_int64_t, u_int32_t); 69 70 #define codel_time_after(a, b) ((int64_t)(a) - (int64_t)(b) > 0) 71 #define codel_time_after_eq(a, b) ((int64_t)(a) - (int64_t)(b) >= 0) 72 #define codel_time_before(a, b) ((int64_t)(a) - (int64_t)(b) < 0) 73 #define codel_time_before_eq(a, b) ((int64_t)(a) - (int64_t)(b) <= 0) 74 75 static int codel_request(struct ifaltq *, int, void *); 76 77 static int codel_enqueue(struct ifaltq *, struct mbuf *, struct altq_pktattr *); 78 static struct mbuf *codel_dequeue(struct ifaltq *, int); 79 80 int 81 codel_pfattach(struct pf_altq *a) 82 { 83 struct ifnet *ifp; 84 85 if ((ifp = ifunit(a->ifname)) == NULL || a->altq_disc == NULL) 86 return (EINVAL); 87 88 return (altq_attach(&ifp->if_snd, ALTQT_CODEL, a->altq_disc, 89 codel_enqueue, codel_dequeue, codel_request)); 90 } 91 92 int 93 codel_add_altq(struct ifnet *ifp, struct pf_altq *a) 94 { 95 struct codel_if *cif; 96 struct codel_opts *opts; 97 98 if (ifp == NULL) 99 return (EINVAL); 100 if (!ALTQ_IS_READY(&ifp->if_snd)) 101 return (ENODEV); 102 103 opts = &a->pq_u.codel_opts; 104 105 cif = malloc(sizeof(struct codel_if), M_DEVBUF, M_NOWAIT | M_ZERO); 106 if (cif == NULL) 107 return (ENOMEM); 108 cif->cif_bandwidth = a->ifbandwidth; 109 cif->cif_ifq = &ifp->if_snd; 110 111 cif->cl_q = malloc(sizeof(class_queue_t), M_DEVBUF, M_NOWAIT | M_ZERO); 112 if (cif->cl_q == NULL) { 113 free(cif, M_DEVBUF); 114 return (ENOMEM); 115 } 116 117 if (a->qlimit == 0) 118 a->qlimit = 50; /* use default. */ 119 qlimit(cif->cl_q) = a->qlimit; 120 qtype(cif->cl_q) = Q_CODEL; 121 qlen(cif->cl_q) = 0; 122 qsize(cif->cl_q) = 0; 123 124 if (opts->target == 0) 125 opts->target = 5; 126 if (opts->interval == 0) 127 opts->interval = 100; 128 cif->codel.params.target = machclk_freq * opts->target / 1000; 129 cif->codel.params.interval = machclk_freq * opts->interval / 1000; 130 cif->codel.params.ecn = opts->ecn; 131 cif->codel.stats.maxpacket = 256; 132 133 cif->cl_stats.qlength = qlen(cif->cl_q); 134 cif->cl_stats.qlimit = qlimit(cif->cl_q); 135 136 /* keep the state in pf_altq */ 137 a->altq_disc = cif; 138 139 return (0); 140 } 141 142 int 143 codel_remove_altq(struct pf_altq *a) 144 { 145 struct codel_if *cif; 146 147 if ((cif = a->altq_disc) == NULL) 148 return (EINVAL); 149 a->altq_disc = NULL; 150 151 if (cif->cl_q) 152 free(cif->cl_q, M_DEVBUF); 153 free(cif, M_DEVBUF); 154 155 return (0); 156 } 157 158 int 159 codel_getqstats(struct pf_altq *a, void *ubuf, int *nbytes, int version) 160 { 161 struct codel_if *cif; 162 struct codel_ifstats stats; 163 int error = 0; 164 165 if ((cif = altq_lookup(a->ifname, ALTQT_CODEL)) == NULL) 166 return (EBADF); 167 168 if (*nbytes < sizeof(stats)) 169 return (EINVAL); 170 171 stats = cif->cl_stats; 172 stats.stats = cif->codel.stats; 173 174 if ((error = copyout((caddr_t)&stats, ubuf, sizeof(stats))) != 0) 175 return (error); 176 *nbytes = sizeof(stats); 177 178 return (0); 179 } 180 181 static int 182 codel_request(struct ifaltq *ifq, int req, void *arg) 183 { 184 struct codel_if *cif = (struct codel_if *)ifq->altq_disc; 185 struct mbuf *m; 186 187 IFQ_LOCK_ASSERT(ifq); 188 189 switch (req) { 190 case ALTRQ_PURGE: 191 if (!ALTQ_IS_ENABLED(cif->cif_ifq)) 192 break; 193 194 if (qempty(cif->cl_q)) 195 break; 196 197 while ((m = _getq(cif->cl_q)) != NULL) { 198 PKTCNTR_ADD(&cif->cl_stats.cl_dropcnt, m_pktlen(m)); 199 m_freem(m); 200 IFQ_DEC_LEN(cif->cif_ifq); 201 } 202 cif->cif_ifq->ifq_len = 0; 203 break; 204 } 205 206 return (0); 207 } 208 209 static int 210 codel_enqueue(struct ifaltq *ifq, struct mbuf *m, struct altq_pktattr *pktattr) 211 { 212 213 struct codel_if *cif = (struct codel_if *) ifq->altq_disc; 214 215 IFQ_LOCK_ASSERT(ifq); 216 217 /* grab class set by classifier */ 218 if ((m->m_flags & M_PKTHDR) == 0) { 219 /* should not happen */ 220 printf("altq: packet for %s does not have pkthdr\n", 221 ifq->altq_ifp->if_xname); 222 m_freem(m); 223 PKTCNTR_ADD(&cif->cl_stats.cl_dropcnt, m_pktlen(m)); 224 return (ENOBUFS); 225 } 226 227 if (codel_addq(&cif->codel, cif->cl_q, m)) { 228 PKTCNTR_ADD(&cif->cl_stats.cl_dropcnt, m_pktlen(m)); 229 return (ENOBUFS); 230 } 231 IFQ_INC_LEN(ifq); 232 233 return (0); 234 } 235 236 static struct mbuf * 237 codel_dequeue(struct ifaltq *ifq, int op) 238 { 239 struct codel_if *cif = (struct codel_if *)ifq->altq_disc; 240 struct mbuf *m; 241 242 IFQ_LOCK_ASSERT(ifq); 243 244 if (IFQ_IS_EMPTY(ifq)) 245 return (NULL); 246 247 if (op == ALTDQ_POLL) 248 return (qhead(cif->cl_q)); 249 250 m = codel_getq(&cif->codel, cif->cl_q); 251 if (m != NULL) { 252 IFQ_DEC_LEN(ifq); 253 PKTCNTR_ADD(&cif->cl_stats.cl_xmitcnt, m_pktlen(m)); 254 return (m); 255 } 256 257 return (NULL); 258 } 259 260 struct codel * 261 codel_alloc(int target, int interval, int ecn) 262 { 263 struct codel *c; 264 265 c = malloc(sizeof(*c), M_DEVBUF, M_NOWAIT | M_ZERO); 266 if (c != NULL) { 267 c->params.target = machclk_freq * target / 1000; 268 c->params.interval = machclk_freq * interval / 1000; 269 c->params.ecn = ecn; 270 c->stats.maxpacket = 256; 271 } 272 273 return (c); 274 } 275 276 void 277 codel_destroy(struct codel *c) 278 { 279 280 free(c, M_DEVBUF); 281 } 282 283 #define MTAG_CODEL 1438031249 284 int 285 codel_addq(struct codel *c, class_queue_t *q, struct mbuf *m) 286 { 287 struct m_tag *mtag; 288 uint64_t *enqueue_time; 289 290 if (qlen(q) < qlimit(q)) { 291 mtag = m_tag_locate(m, MTAG_CODEL, 0, NULL); 292 if (mtag == NULL) { 293 mtag = m_tag_alloc(MTAG_CODEL, 0, sizeof(uint64_t), 294 M_NOWAIT); 295 if (mtag != NULL) 296 m_tag_prepend(m, mtag); 297 } 298 if (mtag == NULL) { 299 m_freem(m); 300 return (-1); 301 } 302 enqueue_time = (uint64_t *)(mtag + 1); 303 *enqueue_time = read_machclk(); 304 _addq(q, m); 305 return (0); 306 } 307 c->drop_overlimit++; 308 m_freem(m); 309 310 return (-1); 311 } 312 313 static int 314 codel_should_drop(struct codel *c, class_queue_t *q, struct mbuf *m, 315 u_int64_t now) 316 { 317 struct m_tag *mtag; 318 uint64_t *enqueue_time; 319 320 if (m == NULL) { 321 c->vars.first_above_time = 0; 322 return (0); 323 } 324 325 mtag = m_tag_locate(m, MTAG_CODEL, 0, NULL); 326 if (mtag == NULL) { 327 /* Only one warning per second. */ 328 if (ppsratecheck(&c->last_log, &c->last_pps, 1)) 329 printf("%s: could not found the packet mtag!\n", 330 __func__); 331 c->vars.first_above_time = 0; 332 return (0); 333 } 334 enqueue_time = (uint64_t *)(mtag + 1); 335 c->vars.ldelay = now - *enqueue_time; 336 c->stats.maxpacket = MAX(c->stats.maxpacket, m_pktlen(m)); 337 338 if (codel_time_before(c->vars.ldelay, c->params.target) || 339 qsize(q) <= c->stats.maxpacket) { 340 /* went below - stay below for at least interval */ 341 c->vars.first_above_time = 0; 342 return (0); 343 } 344 if (c->vars.first_above_time == 0) { 345 /* just went above from below. If we stay above 346 * for at least interval we'll say it's ok to drop 347 */ 348 c->vars.first_above_time = now + c->params.interval; 349 return (0); 350 } 351 if (codel_time_after(now, c->vars.first_above_time)) 352 return (1); 353 354 return (0); 355 } 356 357 /* 358 * Run a Newton method step: 359 * new_invsqrt = (invsqrt / 2) * (3 - count * invsqrt^2) 360 * 361 * Here, invsqrt is a fixed point number (< 1.0), 32bit mantissa, aka Q0.32 362 */ 363 static void 364 codel_Newton_step(struct codel_vars *vars) 365 { 366 uint32_t invsqrt, invsqrt2; 367 uint64_t val; 368 369 /* sizeof_in_bits(rec_inv_sqrt) */ 370 #define REC_INV_SQRT_BITS (8 * sizeof(u_int16_t)) 371 /* needed shift to get a Q0.32 number from rec_inv_sqrt */ 372 #define REC_INV_SQRT_SHIFT (32 - REC_INV_SQRT_BITS) 373 374 invsqrt = ((u_int32_t)vars->rec_inv_sqrt) << REC_INV_SQRT_SHIFT; 375 invsqrt2 = ((u_int64_t)invsqrt * invsqrt) >> 32; 376 val = (3LL << 32) - ((u_int64_t)vars->count * invsqrt2); 377 val >>= 2; /* avoid overflow in following multiply */ 378 val = (val * invsqrt) >> (32 - 2 + 1); 379 380 vars->rec_inv_sqrt = val >> REC_INV_SQRT_SHIFT; 381 } 382 383 static u_int64_t 384 codel_control_law(u_int64_t t, u_int64_t interval, u_int32_t rec_inv_sqrt) 385 { 386 387 return (t + (u_int32_t)(((u_int64_t)interval * 388 (rec_inv_sqrt << REC_INV_SQRT_SHIFT)) >> 32)); 389 } 390 391 struct mbuf * 392 codel_getq(struct codel *c, class_queue_t *q) 393 { 394 struct mbuf *m; 395 u_int64_t now; 396 int drop; 397 398 if ((m = _getq(q)) == NULL) { 399 c->vars.dropping = 0; 400 return (m); 401 } 402 403 now = read_machclk(); 404 drop = codel_should_drop(c, q, m, now); 405 if (c->vars.dropping) { 406 if (!drop) { 407 /* sojourn time below target - leave dropping state */ 408 c->vars.dropping = 0; 409 } else if (codel_time_after_eq(now, c->vars.drop_next)) { 410 /* It's time for the next drop. Drop the current 411 * packet and dequeue the next. The dequeue might 412 * take us out of dropping state. 413 * If not, schedule the next drop. 414 * A large backlog might result in drop rates so high 415 * that the next drop should happen now, 416 * hence the while loop. 417 */ 418 while (c->vars.dropping && 419 codel_time_after_eq(now, c->vars.drop_next)) { 420 c->vars.count++; /* don't care of possible wrap 421 * since there is no more 422 * divide */ 423 codel_Newton_step(&c->vars); 424 /* TODO ECN */ 425 PKTCNTR_ADD(&c->stats.drop_cnt, m_pktlen(m)); 426 m_freem(m); 427 m = _getq(q); 428 if (!codel_should_drop(c, q, m, now)) 429 /* leave dropping state */ 430 c->vars.dropping = 0; 431 else 432 /* and schedule the next drop */ 433 c->vars.drop_next = 434 codel_control_law(c->vars.drop_next, 435 c->params.interval, 436 c->vars.rec_inv_sqrt); 437 } 438 } 439 } else if (drop) { 440 /* TODO ECN */ 441 PKTCNTR_ADD(&c->stats.drop_cnt, m_pktlen(m)); 442 m_freem(m); 443 444 m = _getq(q); 445 drop = codel_should_drop(c, q, m, now); 446 447 c->vars.dropping = 1; 448 /* if min went above target close to when we last went below it 449 * assume that the drop rate that controlled the queue on the 450 * last cycle is a good starting point to control it now. 451 */ 452 if (codel_time_before(now - c->vars.drop_next, 453 16 * c->params.interval)) { 454 c->vars.count = (c->vars.count - c->vars.lastcount) | 1; 455 /* we dont care if rec_inv_sqrt approximation 456 * is not very precise : 457 * Next Newton steps will correct it quadratically. 458 */ 459 codel_Newton_step(&c->vars); 460 } else { 461 c->vars.count = 1; 462 c->vars.rec_inv_sqrt = ~0U >> REC_INV_SQRT_SHIFT; 463 } 464 c->vars.lastcount = c->vars.count; 465 c->vars.drop_next = codel_control_law(now, c->params.interval, 466 c->vars.rec_inv_sqrt); 467 } 468 469 return (m); 470 } 471 472 void 473 codel_getstats(struct codel *c, struct codel_stats *s) 474 { 475 *s = c->stats; 476 } 477 478 #endif /* ALTQ_CODEL */ 479