xref: /freebsd/sys/net/altq/altq_codel.c (revision b306c604df541dede4d0f3cc96188bbf5b6719fe)
1 /*
2  * CoDel - The Controlled-Delay Active Queue Management algorithm
3  *
4  *  Copyright (C) 2013 Ermal Luçi <eri@FreeBSD.org>
5  *  Copyright (C) 2011-2012 Kathleen Nichols <nichols@pollere.com>
6  *  Copyright (C) 2011-2012 Van Jacobson <van@pollere.net>
7  *  Copyright (C) 2012 Michael D. Taht <dave.taht@bufferbloat.net>
8  *  Copyright (C) 2012 Eric Dumazet <edumazet@google.com>
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions, and the following disclaimer,
15  *    without modification.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  * 3. The names of the authors may not be used to endorse or promote products
20  *    derived from this software without specific prior written permission.
21  *
22  * Alternatively, provided that this notice is retained in full, this
23  * software may be distributed under the terms of the GNU General
24  * Public License ("GPL") version 2, in which case the provisions of the
25  * GPL apply INSTEAD OF those given above.
26  *
27  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
28  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
29  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
30  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
31  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
32  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
33  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
34  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
35  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
36  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
37  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
38  * DAMAGE.
39  *
40  * $FreeBSD$
41  */
42 #include "opt_altq.h"
43 #include "opt_inet.h"
44 #include "opt_inet6.h"
45 
46 #ifdef ALTQ_CODEL  /* CoDel is enabled by ALTQ_CODEL option in opt_altq.h */
47 
48 #include <sys/param.h>
49 #include <sys/malloc.h>
50 #include <sys/mbuf.h>
51 #include <sys/socket.h>
52 #include <sys/systm.h>
53 
54 #include <net/if.h>
55 #include <net/if_var.h>
56 #include <net/if_private.h>
57 #include <netinet/in.h>
58 
59 #include <netpfil/pf/pf.h>
60 #include <netpfil/pf/pf_altq.h>
61 #include <net/altq/if_altq.h>
62 #include <net/altq/altq.h>
63 #include <net/altq/altq_codel.h>
64 
65 static int		 codel_should_drop(struct codel *, class_queue_t *,
66 			    struct mbuf *, u_int64_t);
67 static void		 codel_Newton_step(struct codel_vars *);
68 static u_int64_t	 codel_control_law(u_int64_t t, u_int64_t, u_int32_t);
69 
70 #define	codel_time_after(a, b)		((int64_t)(a) - (int64_t)(b) > 0)
71 #define	codel_time_after_eq(a, b)	((int64_t)(a) - (int64_t)(b) >= 0)
72 #define	codel_time_before(a, b)		((int64_t)(a) - (int64_t)(b) < 0)
73 #define	codel_time_before_eq(a, b)	((int64_t)(a) - (int64_t)(b) <= 0)
74 
75 static int codel_request(struct ifaltq *, int, void *);
76 
77 static int codel_enqueue(struct ifaltq *, struct mbuf *, struct altq_pktattr *);
78 static struct mbuf *codel_dequeue(struct ifaltq *, int);
79 
80 int
81 codel_pfattach(struct pf_altq *a)
82 {
83 	struct ifnet *ifp;
84 
85 	if ((ifp = ifunit(a->ifname)) == NULL || a->altq_disc == NULL)
86 		return (EINVAL);
87 
88 	return (altq_attach(&ifp->if_snd, ALTQT_CODEL, a->altq_disc,
89 	    codel_enqueue, codel_dequeue, codel_request));
90 }
91 
92 int
93 codel_add_altq(struct ifnet *ifp, struct pf_altq *a)
94 {
95 	struct codel_if	*cif;
96 	struct codel_opts	*opts;
97 
98 	if (ifp == NULL)
99 		return (EINVAL);
100 	if (!ALTQ_IS_READY(&ifp->if_snd))
101 		return (ENODEV);
102 
103 	opts = &a->pq_u.codel_opts;
104 
105 	cif = malloc(sizeof(struct codel_if), M_DEVBUF, M_NOWAIT | M_ZERO);
106 	if (cif == NULL)
107 		return (ENOMEM);
108 	cif->cif_bandwidth = a->ifbandwidth;
109 	cif->cif_ifq = &ifp->if_snd;
110 
111 	cif->cl_q = malloc(sizeof(class_queue_t), M_DEVBUF, M_NOWAIT | M_ZERO);
112 	if (cif->cl_q == NULL) {
113 		free(cif, M_DEVBUF);
114 		return (ENOMEM);
115 	}
116 
117 	if (a->qlimit == 0)
118 		a->qlimit = 50;	/* use default. */
119 	qlimit(cif->cl_q) = a->qlimit;
120 	qtype(cif->cl_q) = Q_CODEL;
121 	qlen(cif->cl_q) = 0;
122 	qsize(cif->cl_q) = 0;
123 
124 	if (opts->target == 0)
125 		opts->target = 5;
126 	if (opts->interval == 0)
127 		opts->interval = 100;
128 	cif->codel.params.target = machclk_freq * opts->target / 1000;
129 	cif->codel.params.interval = machclk_freq * opts->interval / 1000;
130 	cif->codel.params.ecn = opts->ecn;
131 	cif->codel.stats.maxpacket = 256;
132 
133 	cif->cl_stats.qlength = qlen(cif->cl_q);
134 	cif->cl_stats.qlimit = qlimit(cif->cl_q);
135 
136 	/* keep the state in pf_altq */
137 	a->altq_disc = cif;
138 
139 	return (0);
140 }
141 
142 int
143 codel_remove_altq(struct pf_altq *a)
144 {
145 	struct codel_if *cif;
146 
147 	if ((cif = a->altq_disc) == NULL)
148 		return (EINVAL);
149 	a->altq_disc = NULL;
150 
151 	if (cif->cl_q)
152 		free(cif->cl_q, M_DEVBUF);
153 	free(cif, M_DEVBUF);
154 
155 	return (0);
156 }
157 
158 int
159 codel_getqstats(struct pf_altq *a, void *ubuf, int *nbytes, int version)
160 {
161 	struct codel_if *cif;
162 	struct codel_ifstats stats;
163 	int error = 0;
164 
165 	if ((cif = altq_lookup(a->ifname, ALTQT_CODEL)) == NULL)
166 		return (EBADF);
167 
168 	if (*nbytes < sizeof(stats))
169 		return (EINVAL);
170 
171 	stats = cif->cl_stats;
172 	stats.stats = cif->codel.stats;
173 
174 	if ((error = copyout((caddr_t)&stats, ubuf, sizeof(stats))) != 0)
175 		return (error);
176 	*nbytes = sizeof(stats);
177 
178 	return (0);
179 }
180 
181 static int
182 codel_request(struct ifaltq *ifq, int req, void *arg)
183 {
184 	struct codel_if	*cif = (struct codel_if *)ifq->altq_disc;
185 	struct mbuf *m;
186 
187 	IFQ_LOCK_ASSERT(ifq);
188 
189 	switch (req) {
190 	case ALTRQ_PURGE:
191 		if (!ALTQ_IS_ENABLED(cif->cif_ifq))
192 			break;
193 
194 		if (qempty(cif->cl_q))
195 			break;
196 
197 		while ((m = _getq(cif->cl_q)) != NULL) {
198 			PKTCNTR_ADD(&cif->cl_stats.cl_dropcnt, m_pktlen(m));
199 			m_freem(m);
200 			IFQ_DEC_LEN(cif->cif_ifq);
201 		}
202 		cif->cif_ifq->ifq_len = 0;
203 		break;
204 	}
205 
206 	return (0);
207 }
208 
209 static int
210 codel_enqueue(struct ifaltq *ifq, struct mbuf *m, struct altq_pktattr *pktattr)
211 {
212 
213 	struct codel_if *cif = (struct codel_if *) ifq->altq_disc;
214 
215 	IFQ_LOCK_ASSERT(ifq);
216 
217 	/* grab class set by classifier */
218 	if ((m->m_flags & M_PKTHDR) == 0) {
219 		/* should not happen */
220 		printf("altq: packet for %s does not have pkthdr\n",
221 		   ifq->altq_ifp->if_xname);
222 		m_freem(m);
223 		PKTCNTR_ADD(&cif->cl_stats.cl_dropcnt, m_pktlen(m));
224 		return (ENOBUFS);
225 	}
226 
227 	if (codel_addq(&cif->codel, cif->cl_q, m)) {
228 		PKTCNTR_ADD(&cif->cl_stats.cl_dropcnt, m_pktlen(m));
229 		return (ENOBUFS);
230 	}
231 	IFQ_INC_LEN(ifq);
232 
233 	return (0);
234 }
235 
236 static struct mbuf *
237 codel_dequeue(struct ifaltq *ifq, int op)
238 {
239 	struct codel_if *cif = (struct codel_if *)ifq->altq_disc;
240 	struct mbuf *m;
241 
242 	IFQ_LOCK_ASSERT(ifq);
243 
244 	if (IFQ_IS_EMPTY(ifq))
245 		return (NULL);
246 
247 	if (op == ALTDQ_POLL)
248 		return (qhead(cif->cl_q));
249 
250 	m = codel_getq(&cif->codel, cif->cl_q);
251 	if (m != NULL) {
252 		IFQ_DEC_LEN(ifq);
253 		PKTCNTR_ADD(&cif->cl_stats.cl_xmitcnt, m_pktlen(m));
254 		return (m);
255 	}
256 
257 	return (NULL);
258 }
259 
260 struct codel *
261 codel_alloc(int target, int interval, int ecn)
262 {
263 	struct codel *c;
264 
265 	c = malloc(sizeof(*c), M_DEVBUF, M_NOWAIT | M_ZERO);
266 	if (c != NULL) {
267 		c->params.target = machclk_freq * target / 1000;
268 		c->params.interval = machclk_freq * interval / 1000;
269 		c->params.ecn = ecn;
270 		c->stats.maxpacket = 256;
271 	}
272 
273 	return (c);
274 }
275 
276 void
277 codel_destroy(struct codel *c)
278 {
279 
280 	free(c, M_DEVBUF);
281 }
282 
283 #define	MTAG_CODEL	1438031249
284 int
285 codel_addq(struct codel *c, class_queue_t *q, struct mbuf *m)
286 {
287 	struct m_tag *mtag;
288 	uint64_t *enqueue_time;
289 
290 	if (qlen(q) < qlimit(q)) {
291 		mtag = m_tag_locate(m, MTAG_CODEL, 0, NULL);
292 		if (mtag == NULL) {
293 			mtag = m_tag_alloc(MTAG_CODEL, 0, sizeof(uint64_t),
294 			    M_NOWAIT);
295 			if (mtag != NULL)
296 				m_tag_prepend(m, mtag);
297 		}
298 		if (mtag == NULL) {
299 			m_freem(m);
300 			return (-1);
301 		}
302 		enqueue_time = (uint64_t *)(mtag + 1);
303 		*enqueue_time = read_machclk();
304 		_addq(q, m);
305 		return (0);
306 	}
307 	c->drop_overlimit++;
308 	m_freem(m);
309 
310 	return (-1);
311 }
312 
313 static int
314 codel_should_drop(struct codel *c, class_queue_t *q, struct mbuf *m,
315     u_int64_t now)
316 {
317 	struct m_tag *mtag;
318 	uint64_t *enqueue_time;
319 
320 	if (m == NULL) {
321 		c->vars.first_above_time = 0;
322 		return (0);
323 	}
324 
325 	mtag = m_tag_locate(m, MTAG_CODEL, 0, NULL);
326 	if (mtag == NULL) {
327 		/* Only one warning per second. */
328 		if (ppsratecheck(&c->last_log, &c->last_pps, 1))
329 			printf("%s: could not found the packet mtag!\n",
330 			    __func__);
331 		c->vars.first_above_time = 0;
332 		return (0);
333 	}
334 	enqueue_time = (uint64_t *)(mtag + 1);
335 	c->vars.ldelay = now - *enqueue_time;
336 	c->stats.maxpacket = MAX(c->stats.maxpacket, m_pktlen(m));
337 
338 	if (codel_time_before(c->vars.ldelay, c->params.target) ||
339 	    qsize(q) <= c->stats.maxpacket) {
340 		/* went below - stay below for at least interval */
341 		c->vars.first_above_time = 0;
342 		return (0);
343 	}
344 	if (c->vars.first_above_time == 0) {
345 		/* just went above from below. If we stay above
346 		 * for at least interval we'll say it's ok to drop
347 		 */
348 		c->vars.first_above_time = now + c->params.interval;
349 		return (0);
350 	}
351 	if (codel_time_after(now, c->vars.first_above_time))
352 		return (1);
353 
354 	return (0);
355 }
356 
357 /*
358  * Run a Newton method step:
359  * new_invsqrt = (invsqrt / 2) * (3 - count * invsqrt^2)
360  *
361  * Here, invsqrt is a fixed point number (< 1.0), 32bit mantissa, aka Q0.32
362  */
363 static void
364 codel_Newton_step(struct codel_vars *vars)
365 {
366 	uint32_t invsqrt, invsqrt2;
367 	uint64_t val;
368 
369 /* sizeof_in_bits(rec_inv_sqrt) */
370 #define	REC_INV_SQRT_BITS (8 * sizeof(u_int16_t))
371 /* needed shift to get a Q0.32 number from rec_inv_sqrt */
372 #define	REC_INV_SQRT_SHIFT (32 - REC_INV_SQRT_BITS)
373 
374 	invsqrt = ((u_int32_t)vars->rec_inv_sqrt) << REC_INV_SQRT_SHIFT;
375 	invsqrt2 = ((u_int64_t)invsqrt * invsqrt) >> 32;
376 	val = (3LL << 32) - ((u_int64_t)vars->count * invsqrt2);
377 	val >>= 2; /* avoid overflow in following multiply */
378 	val = (val * invsqrt) >> (32 - 2 + 1);
379 
380 	vars->rec_inv_sqrt = val >> REC_INV_SQRT_SHIFT;
381 }
382 
383 static u_int64_t
384 codel_control_law(u_int64_t t, u_int64_t interval, u_int32_t rec_inv_sqrt)
385 {
386 
387 	return (t + (u_int32_t)(((u_int64_t)interval *
388 	    (rec_inv_sqrt << REC_INV_SQRT_SHIFT)) >> 32));
389 }
390 
391 struct mbuf *
392 codel_getq(struct codel *c, class_queue_t *q)
393 {
394 	struct mbuf	*m;
395 	u_int64_t	 now;
396 	int		 drop;
397 
398 	if ((m = _getq(q)) == NULL) {
399 		c->vars.dropping = 0;
400 		return (m);
401 	}
402 
403 	now = read_machclk();
404 	drop = codel_should_drop(c, q, m, now);
405 	if (c->vars.dropping) {
406 		if (!drop) {
407 			/* sojourn time below target - leave dropping state */
408 			c->vars.dropping = 0;
409 		} else if (codel_time_after_eq(now, c->vars.drop_next)) {
410 			/* It's time for the next drop. Drop the current
411 			 * packet and dequeue the next. The dequeue might
412 			 * take us out of dropping state.
413 			 * If not, schedule the next drop.
414 			 * A large backlog might result in drop rates so high
415 			 * that the next drop should happen now,
416 			 * hence the while loop.
417 			 */
418 			while (c->vars.dropping &&
419 			    codel_time_after_eq(now, c->vars.drop_next)) {
420 				c->vars.count++; /* don't care of possible wrap
421 						  * since there is no more
422 						  * divide */
423 				codel_Newton_step(&c->vars);
424 				/* TODO ECN */
425 				PKTCNTR_ADD(&c->stats.drop_cnt, m_pktlen(m));
426 				m_freem(m);
427 				m = _getq(q);
428 				if (!codel_should_drop(c, q, m, now))
429 					/* leave dropping state */
430 					c->vars.dropping = 0;
431 				else
432 					/* and schedule the next drop */
433 					c->vars.drop_next =
434 					    codel_control_law(c->vars.drop_next,
435 						c->params.interval,
436 						c->vars.rec_inv_sqrt);
437 			}
438 		}
439 	} else if (drop) {
440 		/* TODO ECN */
441 		PKTCNTR_ADD(&c->stats.drop_cnt, m_pktlen(m));
442 		m_freem(m);
443 
444 		m = _getq(q);
445 		drop = codel_should_drop(c, q, m, now);
446 
447 		c->vars.dropping = 1;
448 		/* if min went above target close to when we last went below it
449 		 * assume that the drop rate that controlled the queue on the
450 		 * last cycle is a good starting point to control it now.
451 		 */
452 		if (codel_time_before(now - c->vars.drop_next,
453 		    16 * c->params.interval)) {
454 			c->vars.count = (c->vars.count - c->vars.lastcount) | 1;
455 			/* we dont care if rec_inv_sqrt approximation
456 			 * is not very precise :
457 			 * Next Newton steps will correct it quadratically.
458 			 */
459 			codel_Newton_step(&c->vars);
460 		} else {
461 			c->vars.count = 1;
462 			c->vars.rec_inv_sqrt = ~0U >> REC_INV_SQRT_SHIFT;
463 		}
464 		c->vars.lastcount = c->vars.count;
465 		c->vars.drop_next = codel_control_law(now, c->params.interval,
466 		    c->vars.rec_inv_sqrt);
467 	}
468 
469 	return (m);
470 }
471 
472 void
473 codel_getstats(struct codel *c, struct codel_stats *s)
474 {
475 	*s = c->stats;
476 }
477 
478 #endif /* ALTQ_CODEL */
479