xref: /freebsd/sys/net/altq/altq_fairq.c (revision ca2e4ecd7395ba655ab4bebe7262a06e634216ce)
1 /*
2  * Copyright (c) 2008 The DragonFly Project.  All rights reserved.
3  *
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@backplane.com>
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  * 3. Neither the name of The DragonFly Project nor the names of its
18  *    contributors may be used to endorse or promote products derived
19  *    from this software without specific, prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  *
34  * $DragonFly: src/sys/net/altq/altq_fairq.c,v 1.1 2008/04/06 18:58:15 dillon Exp $
35  * $FreeBSD$
36  */
37 /*
38  * Matt: I gutted altq_priq.c and used it as a skeleton on which to build
39  * fairq.  The fairq algorithm is completely different then priq, of course,
40  * but because I used priq's skeleton I believe I should include priq's
41  * copyright.
42  *
43  * Copyright (C) 2000-2003
44  *	Sony Computer Science Laboratories Inc.  All rights reserved.
45  *
46  * Redistribution and use in source and binary forms, with or without
47  * modification, are permitted provided that the following conditions
48  * are met:
49  * 1. Redistributions of source code must retain the above copyright
50  *    notice, this list of conditions and the following disclaimer.
51  * 2. Redistributions in binary form must reproduce the above copyright
52  *    notice, this list of conditions and the following disclaimer in the
53  *    documentation and/or other materials provided with the distribution.
54  *
55  * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND
56  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
57  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
58  * ARE DISCLAIMED.  IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE
59  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
60  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
61  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
62  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
63  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
64  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
65  * SUCH DAMAGE.
66  */
67 
68 /*
69  * FAIRQ - take traffic classified by keep state (hashed into
70  * mbuf->m_pkthdr.altq_state_hash) and bucketize it.  Fairly extract
71  * the first packet from each bucket in a round-robin fashion.
72  *
73  * TODO - better overall qlimit support (right now it is per-bucket).
74  *	- NOTE: red etc is per bucket, not overall.
75  *	- better service curve support.
76  *
77  * EXAMPLE:
78  *
79  *  altq on em0 fairq bandwidth 650Kb queue { std, bulk }
80  *  queue std  priority 3 bandwidth 400Kb \
81  *	fairq (buckets 64, default, hogs 1Kb) qlimit 50
82  *  queue bulk priority 2 bandwidth 100Kb \
83  *	fairq (buckets 64, hogs 1Kb) qlimit 50
84  *
85  *  pass out on em0 from any to any keep state queue std
86  *  pass out on em0 inet proto tcp ..... port ... keep state queue bulk
87  */
88 #include "opt_altq.h"
89 #include "opt_inet.h"
90 #include "opt_inet6.h"
91 
92 #ifdef ALTQ_FAIRQ  /* fairq is enabled in the kernel conf */
93 
94 #include <sys/param.h>
95 #include <sys/malloc.h>
96 #include <sys/mbuf.h>
97 #include <sys/socket.h>
98 #include <sys/sockio.h>
99 #include <sys/systm.h>
100 #include <sys/proc.h>
101 #include <sys/errno.h>
102 #include <sys/kernel.h>
103 #include <sys/queue.h>
104 
105 #include <net/if.h>
106 #include <net/if_var.h>
107 #include <netinet/in.h>
108 
109 #include <netpfil/pf/pf.h>
110 #include <netpfil/pf/pf_altq.h>
111 #include <netpfil/pf/pf_mtag.h>
112 #include <net/altq/altq.h>
113 #include <net/altq/altq_fairq.h>
114 
115 /*
116  * function prototypes
117  */
118 static int	fairq_clear_interface(struct fairq_if *);
119 static int	fairq_request(struct ifaltq *, int, void *);
120 static void	fairq_purge(struct fairq_if *);
121 static struct fairq_class *fairq_class_create(struct fairq_if *, int, int, u_int, struct fairq_opts *, int);
122 static int	fairq_class_destroy(struct fairq_class *);
123 static int	fairq_enqueue(struct ifaltq *, struct mbuf *, struct altq_pktattr *);
124 static struct mbuf *fairq_dequeue(struct ifaltq *, int);
125 
126 static int	fairq_addq(struct fairq_class *, struct mbuf *, u_int32_t);
127 static struct mbuf *fairq_getq(struct fairq_class *, uint64_t);
128 static struct mbuf *fairq_pollq(struct fairq_class *, uint64_t, int *);
129 static fairq_bucket_t *fairq_selectq(struct fairq_class *, int);
130 static void	fairq_purgeq(struct fairq_class *);
131 
132 static void	get_class_stats(struct fairq_classstats *, struct fairq_class *);
133 static struct fairq_class *clh_to_clp(struct fairq_if *, uint32_t);
134 
135 int
136 fairq_pfattach(struct pf_altq *a)
137 {
138 	struct ifnet *ifp;
139 	int error;
140 
141 	if ((ifp = ifunit(a->ifname)) == NULL || a->altq_disc == NULL)
142 		return (EINVAL);
143 
144 	error = altq_attach(&ifp->if_snd, ALTQT_FAIRQ, a->altq_disc,
145 	    fairq_enqueue, fairq_dequeue, fairq_request, NULL, NULL);
146 
147 	return (error);
148 }
149 
150 int
151 fairq_add_altq(struct pf_altq *a)
152 {
153 	struct fairq_if *pif;
154 	struct ifnet *ifp;
155 
156 	if ((ifp = ifunit(a->ifname)) == NULL)
157 		return (EINVAL);
158 	if (!ALTQ_IS_READY(&ifp->if_snd))
159 		return (ENODEV);
160 
161 
162 	pif = malloc(sizeof(struct fairq_if),
163 			M_DEVBUF, M_WAITOK | M_ZERO);
164 	pif->pif_bandwidth = a->ifbandwidth;
165 	pif->pif_maxpri = -1;
166 	pif->pif_ifq = &ifp->if_snd;
167 
168 	/* keep the state in pf_altq */
169 	a->altq_disc = pif;
170 
171 	return (0);
172 }
173 
174 int
175 fairq_remove_altq(struct pf_altq *a)
176 {
177 	struct fairq_if *pif;
178 
179 	if ((pif = a->altq_disc) == NULL)
180 		return (EINVAL);
181 	a->altq_disc = NULL;
182 
183 	fairq_clear_interface(pif);
184 
185 	free(pif, M_DEVBUF);
186 	return (0);
187 }
188 
189 int
190 fairq_add_queue(struct pf_altq *a)
191 {
192 	struct fairq_if *pif;
193 	struct fairq_class *cl;
194 
195 	if ((pif = a->altq_disc) == NULL)
196 		return (EINVAL);
197 
198 	/* check parameters */
199 	if (a->priority >= FAIRQ_MAXPRI)
200 		return (EINVAL);
201 	if (a->qid == 0)
202 		return (EINVAL);
203 	if (pif->pif_classes[a->priority] != NULL)
204 		return (EBUSY);
205 	if (clh_to_clp(pif, a->qid) != NULL)
206 		return (EBUSY);
207 
208 	cl = fairq_class_create(pif, a->priority, a->qlimit, a->bandwidth,
209 			       &a->pq_u.fairq_opts, a->qid);
210 	if (cl == NULL)
211 		return (ENOMEM);
212 
213 	return (0);
214 }
215 
216 int
217 fairq_remove_queue(struct pf_altq *a)
218 {
219 	struct fairq_if *pif;
220 	struct fairq_class *cl;
221 
222 	if ((pif = a->altq_disc) == NULL)
223 		return (EINVAL);
224 
225 	if ((cl = clh_to_clp(pif, a->qid)) == NULL)
226 		return (EINVAL);
227 
228 	return (fairq_class_destroy(cl));
229 }
230 
231 int
232 fairq_getqstats(struct pf_altq *a, void *ubuf, int *nbytes)
233 {
234 	struct fairq_if *pif;
235 	struct fairq_class *cl;
236 	struct fairq_classstats stats;
237 	int error = 0;
238 
239 	if ((pif = altq_lookup(a->ifname, ALTQT_FAIRQ)) == NULL)
240 		return (EBADF);
241 
242 	if ((cl = clh_to_clp(pif, a->qid)) == NULL)
243 		return (EINVAL);
244 
245 	if (*nbytes < sizeof(stats))
246 		return (EINVAL);
247 
248 	get_class_stats(&stats, cl);
249 
250 	if ((error = copyout((caddr_t)&stats, ubuf, sizeof(stats))) != 0)
251 		return (error);
252 	*nbytes = sizeof(stats);
253 	return (0);
254 }
255 
256 /*
257  * bring the interface back to the initial state by discarding
258  * all the filters and classes.
259  */
260 static int
261 fairq_clear_interface(struct fairq_if *pif)
262 {
263 	struct fairq_class *cl;
264 	int pri;
265 
266 	/* clear out the classes */
267 	for (pri = 0; pri <= pif->pif_maxpri; pri++) {
268 		if ((cl = pif->pif_classes[pri]) != NULL)
269 			fairq_class_destroy(cl);
270 	}
271 
272 	return (0);
273 }
274 
275 static int
276 fairq_request(struct ifaltq *ifq, int req, void *arg)
277 {
278 	struct fairq_if *pif = (struct fairq_if *)ifq->altq_disc;
279 
280 	IFQ_LOCK_ASSERT(ifq);
281 
282 	switch (req) {
283 	case ALTRQ_PURGE:
284 		fairq_purge(pif);
285 		break;
286 	}
287 	return (0);
288 }
289 
290 /* discard all the queued packets on the interface */
291 static void
292 fairq_purge(struct fairq_if *pif)
293 {
294 	struct fairq_class *cl;
295 	int pri;
296 
297 	for (pri = 0; pri <= pif->pif_maxpri; pri++) {
298 		if ((cl = pif->pif_classes[pri]) != NULL && cl->cl_head)
299 			fairq_purgeq(cl);
300 	}
301 	if (ALTQ_IS_ENABLED(pif->pif_ifq))
302 		pif->pif_ifq->ifq_len = 0;
303 }
304 
305 static struct fairq_class *
306 fairq_class_create(struct fairq_if *pif, int pri, int qlimit,
307 		   u_int bandwidth, struct fairq_opts *opts, int qid)
308 {
309 	struct fairq_class *cl;
310 	int flags = opts->flags;
311 	u_int nbuckets = opts->nbuckets;
312 	int i;
313 
314 #ifndef ALTQ_RED
315 	if (flags & FARF_RED) {
316 #ifdef ALTQ_DEBUG
317 		printf("fairq_class_create: RED not configured for FAIRQ!\n");
318 #endif
319 		return (NULL);
320 	}
321 #endif
322 	if (nbuckets == 0)
323 		nbuckets = 256;
324 	if (nbuckets > FAIRQ_MAX_BUCKETS)
325 		nbuckets = FAIRQ_MAX_BUCKETS;
326 	/* enforce power-of-2 size */
327 	while ((nbuckets ^ (nbuckets - 1)) != ((nbuckets << 1) - 1))
328 		++nbuckets;
329 
330 	if ((cl = pif->pif_classes[pri]) != NULL) {
331 		/* modify the class instead of creating a new one */
332 		IFQ_LOCK(cl->cl_pif->pif_ifq);
333 		if (cl->cl_head)
334 			fairq_purgeq(cl);
335 		IFQ_UNLOCK(cl->cl_pif->pif_ifq);
336 #ifdef ALTQ_RIO
337 		if (cl->cl_qtype == Q_RIO)
338 			rio_destroy((rio_t *)cl->cl_red);
339 #endif
340 #ifdef ALTQ_RED
341 		if (cl->cl_qtype == Q_RED)
342 			red_destroy(cl->cl_red);
343 #endif
344 	} else {
345 		cl = malloc(sizeof(struct fairq_class),
346 				M_DEVBUF, M_WAITOK | M_ZERO);
347 		cl->cl_nbuckets = nbuckets;
348 		cl->cl_nbucket_mask = nbuckets - 1;
349 
350 		cl->cl_buckets = malloc(
351 			sizeof(struct fairq_bucket) * cl->cl_nbuckets,
352 			M_DEVBUF, M_WAITOK | M_ZERO);
353 		cl->cl_head = NULL;
354 	}
355 
356 	pif->pif_classes[pri] = cl;
357 	if (flags & FARF_DEFAULTCLASS)
358 		pif->pif_default = cl;
359 	if (qlimit == 0)
360 		qlimit = 50;  /* use default */
361 	cl->cl_qlimit = qlimit;
362 	for (i = 0; i < cl->cl_nbuckets; ++i) {
363 		qlimit(&cl->cl_buckets[i].queue) = qlimit;
364 	}
365 	cl->cl_bandwidth = bandwidth / 8;
366 	cl->cl_qtype = Q_DROPTAIL;
367 	cl->cl_flags = flags & FARF_USERFLAGS;
368 	cl->cl_pri = pri;
369 	if (pri > pif->pif_maxpri)
370 		pif->pif_maxpri = pri;
371 	cl->cl_pif = pif;
372 	cl->cl_handle = qid;
373 	cl->cl_hogs_m1 = opts->hogs_m1 / 8;
374 	cl->cl_lssc_m1 = opts->lssc_m1 / 8;	/* NOT YET USED */
375 
376 #ifdef ALTQ_RED
377 	if (flags & (FARF_RED|FARF_RIO)) {
378 		int red_flags, red_pkttime;
379 
380 		red_flags = 0;
381 		if (flags & FARF_ECN)
382 			red_flags |= REDF_ECN;
383 #ifdef ALTQ_RIO
384 		if (flags & FARF_CLEARDSCP)
385 			red_flags |= RIOF_CLEARDSCP;
386 #endif
387 		if (pif->pif_bandwidth < 8)
388 			red_pkttime = 1000 * 1000 * 1000; /* 1 sec */
389 		else
390 			red_pkttime = (int64_t)pif->pif_ifq->altq_ifp->if_mtu
391 			  * 1000 * 1000 * 1000 / (pif->pif_bandwidth / 8);
392 #ifdef ALTQ_RIO
393 		if (flags & FARF_RIO) {
394 			cl->cl_red = (red_t *)rio_alloc(0, NULL,
395 						red_flags, red_pkttime);
396 			if (cl->cl_red != NULL)
397 				cl->cl_qtype = Q_RIO;
398 		} else
399 #endif
400 		if (flags & FARF_RED) {
401 			cl->cl_red = red_alloc(0, 0,
402 			    cl->cl_qlimit * 10/100,
403 			    cl->cl_qlimit * 30/100,
404 			    red_flags, red_pkttime);
405 			if (cl->cl_red != NULL)
406 				cl->cl_qtype = Q_RED;
407 		}
408 	}
409 #endif /* ALTQ_RED */
410 
411 	return (cl);
412 }
413 
414 static int
415 fairq_class_destroy(struct fairq_class *cl)
416 {
417 	struct fairq_if *pif;
418 	int pri;
419 
420 	IFQ_LOCK(cl->cl_pif->pif_ifq);
421 
422 	if (cl->cl_head)
423 		fairq_purgeq(cl);
424 
425 	pif = cl->cl_pif;
426 	pif->pif_classes[cl->cl_pri] = NULL;
427 	if (pif->pif_poll_cache == cl)
428 		pif->pif_poll_cache = NULL;
429 	if (pif->pif_maxpri == cl->cl_pri) {
430 		for (pri = cl->cl_pri; pri >= 0; pri--)
431 			if (pif->pif_classes[pri] != NULL) {
432 				pif->pif_maxpri = pri;
433 				break;
434 			}
435 		if (pri < 0)
436 			pif->pif_maxpri = -1;
437 	}
438 	IFQ_UNLOCK(cl->cl_pif->pif_ifq);
439 
440 	if (cl->cl_red != NULL) {
441 #ifdef ALTQ_RIO
442 		if (cl->cl_qtype == Q_RIO)
443 			rio_destroy((rio_t *)cl->cl_red);
444 #endif
445 #ifdef ALTQ_RED
446 		if (cl->cl_qtype == Q_RED)
447 			red_destroy(cl->cl_red);
448 #endif
449 	}
450 	free(cl->cl_buckets, M_DEVBUF);
451 	free(cl, M_DEVBUF);
452 
453 	return (0);
454 }
455 
456 /*
457  * fairq_enqueue is an enqueue function to be registered to
458  * (*altq_enqueue) in struct ifaltq.
459  */
460 static int
461 fairq_enqueue(struct ifaltq *ifq, struct mbuf *m, struct altq_pktattr *pktattr)
462 {
463 	struct fairq_if *pif = (struct fairq_if *)ifq->altq_disc;
464 	struct fairq_class *cl = NULL; /* Make compiler happy */
465 	struct pf_mtag *t;
466 	u_int32_t qid_hash = 0;
467 	int len;
468 
469 	IFQ_LOCK_ASSERT(ifq);
470 
471 	/* grab class set by classifier */
472 	if ((m->m_flags & M_PKTHDR) == 0) {
473 		/* should not happen */
474 		printf("altq: packet for %s does not have pkthdr\n",
475 			ifq->altq_ifp->if_xname);
476 		m_freem(m);
477 		return (ENOBUFS);
478 	}
479 
480 	if ((t = pf_find_mtag(m)) != NULL) {
481 		cl = clh_to_clp(pif, t->qid);
482 		qid_hash = t->qid_hash;
483 	}
484 	if (cl == NULL) {
485 		cl = pif->pif_default;
486 		if (cl == NULL) {
487 			m_freem(m);
488 			return (ENOBUFS);
489 		}
490 	}
491 	cl->cl_flags |= FARF_HAS_PACKETS;
492 	cl->cl_pktattr = NULL;
493 	len = m_pktlen(m);
494 	if (fairq_addq(cl, m, qid_hash) != 0) {
495 		/* drop occurred.  mbuf was freed in fairq_addq. */
496 		PKTCNTR_ADD(&cl->cl_dropcnt, len);
497 		return (ENOBUFS);
498 	}
499 	IFQ_INC_LEN(ifq);
500 
501 	return (0);
502 }
503 
504 /*
505  * fairq_dequeue is a dequeue function to be registered to
506  * (*altq_dequeue) in struct ifaltq.
507  *
508  * note: ALTDQ_POLL returns the next packet without removing the packet
509  *	from the queue.  ALTDQ_REMOVE is a normal dequeue operation.
510  *	ALTDQ_REMOVE must return the same packet if called immediately
511  *	after ALTDQ_POLL.
512  */
513 static struct mbuf *
514 fairq_dequeue(struct ifaltq *ifq, int op)
515 {
516 	struct fairq_if *pif = (struct fairq_if *)ifq->altq_disc;
517 	struct fairq_class *cl;
518 	struct fairq_class *best_cl;
519 	struct mbuf *best_m;
520 	struct mbuf *m = NULL;
521 	uint64_t cur_time = read_machclk();
522 	int pri;
523 	int hit_limit;
524 
525 	IFQ_LOCK_ASSERT(ifq);
526 
527 	if (IFQ_IS_EMPTY(ifq)) {
528 		return (NULL);
529 	}
530 
531 	if (pif->pif_poll_cache && op == ALTDQ_REMOVE) {
532 		best_cl = pif->pif_poll_cache;
533 		m = fairq_getq(best_cl, cur_time);
534 		pif->pif_poll_cache = NULL;
535 		if (m) {
536 			IFQ_DEC_LEN(ifq);
537 			PKTCNTR_ADD(&best_cl->cl_xmitcnt, m_pktlen(m));
538 			return (m);
539 		}
540 	} else {
541 		best_cl = NULL;
542 		best_m = NULL;
543 
544 		for (pri = pif->pif_maxpri;  pri >= 0; pri--) {
545 			if ((cl = pif->pif_classes[pri]) == NULL)
546 				continue;
547 			if ((cl->cl_flags & FARF_HAS_PACKETS) == 0)
548 				continue;
549 			m = fairq_pollq(cl, cur_time, &hit_limit);
550 			if (m == NULL) {
551 				cl->cl_flags &= ~FARF_HAS_PACKETS;
552 				continue;
553 			}
554 
555 			/*
556 			 * Only override the best choice if we are under
557 			 * the BW limit.
558 			 */
559 			if (hit_limit == 0 || best_cl == NULL) {
560 				best_cl = cl;
561 				best_m = m;
562 			}
563 
564 			/*
565 			 * Remember the highest priority mbuf in case we
566 			 * do not find any lower priority mbufs.
567 			 */
568 			if (hit_limit)
569 				continue;
570 			break;
571 		}
572 		if (op == ALTDQ_POLL) {
573 			pif->pif_poll_cache = best_cl;
574 			m = best_m;
575 		} else if (best_cl) {
576 			m = fairq_getq(best_cl, cur_time);
577 			if (m != NULL) {
578 				IFQ_DEC_LEN(ifq);
579 				PKTCNTR_ADD(&best_cl->cl_xmitcnt, m_pktlen(m));
580 			}
581 		}
582 		return (m);
583 	}
584 	return (NULL);
585 }
586 
587 static int
588 fairq_addq(struct fairq_class *cl, struct mbuf *m, u_int32_t bucketid)
589 {
590 	fairq_bucket_t *b;
591 	u_int hindex;
592 	uint64_t bw;
593 
594 	/*
595 	 * If the packet doesn't have any keep state put it on the end of
596 	 * our queue.  XXX this can result in out of order delivery.
597 	 */
598 	if (bucketid == 0) {
599 		if (cl->cl_head)
600 			b = cl->cl_head->prev;
601 		else
602 			b = &cl->cl_buckets[0];
603 	} else {
604 		hindex = bucketid & cl->cl_nbucket_mask;
605 		b = &cl->cl_buckets[hindex];
606 	}
607 
608 	/*
609 	 * Add the bucket to the end of the circular list of active buckets.
610 	 *
611 	 * As a special case we add the bucket to the beginning of the list
612 	 * instead of the end if it was not previously on the list and if
613 	 * its traffic is less then the hog level.
614 	 */
615 	if (b->in_use == 0) {
616 		b->in_use = 1;
617 		if (cl->cl_head == NULL) {
618 			cl->cl_head = b;
619 			b->next = b;
620 			b->prev = b;
621 		} else {
622 			b->next = cl->cl_head;
623 			b->prev = cl->cl_head->prev;
624 			b->prev->next = b;
625 			b->next->prev = b;
626 
627 			if (b->bw_delta && cl->cl_hogs_m1) {
628 				bw = b->bw_bytes * machclk_freq / b->bw_delta;
629 				if (bw < cl->cl_hogs_m1)
630 					cl->cl_head = b;
631 			}
632 		}
633 	}
634 
635 #ifdef ALTQ_RIO
636 	if (cl->cl_qtype == Q_RIO)
637 		return rio_addq((rio_t *)cl->cl_red, &b->queue, m, cl->cl_pktattr);
638 #endif
639 #ifdef ALTQ_RED
640 	if (cl->cl_qtype == Q_RED)
641 		return red_addq(cl->cl_red, &b->queue, m, cl->cl_pktattr);
642 #endif
643 	if (qlen(&b->queue) >= qlimit(&b->queue)) {
644 		m_freem(m);
645 		return (-1);
646 	}
647 
648 	if (cl->cl_flags & FARF_CLEARDSCP)
649 		write_dsfield(m, cl->cl_pktattr, 0);
650 
651 	_addq(&b->queue, m);
652 
653 	return (0);
654 }
655 
656 static struct mbuf *
657 fairq_getq(struct fairq_class *cl, uint64_t cur_time)
658 {
659 	fairq_bucket_t *b;
660 	struct mbuf *m;
661 
662 	b = fairq_selectq(cl, 0);
663 	if (b == NULL)
664 		m = NULL;
665 #ifdef ALTQ_RIO
666 	else if (cl->cl_qtype == Q_RIO)
667 		m = rio_getq((rio_t *)cl->cl_red, &b->queue);
668 #endif
669 #ifdef ALTQ_RED
670 	else if (cl->cl_qtype == Q_RED)
671 		m = red_getq(cl->cl_red, &b->queue);
672 #endif
673 	else
674 		m = _getq(&b->queue);
675 
676 	/*
677 	 * Calculate the BW change
678 	 */
679 	if (m != NULL) {
680 		uint64_t delta;
681 
682 		/*
683 		 * Per-class bandwidth calculation
684 		 */
685 		delta = (cur_time - cl->cl_last_time);
686 		if (delta > machclk_freq * 8)
687 			delta = machclk_freq * 8;
688 		cl->cl_bw_delta += delta;
689 		cl->cl_bw_bytes += m->m_pkthdr.len;
690 		cl->cl_last_time = cur_time;
691 		cl->cl_bw_delta -= cl->cl_bw_delta >> 3;
692 		cl->cl_bw_bytes -= cl->cl_bw_bytes >> 3;
693 
694 		/*
695 		 * Per-bucket bandwidth calculation
696 		 */
697 		delta = (cur_time - b->last_time);
698 		if (delta > machclk_freq * 8)
699 			delta = machclk_freq * 8;
700 		b->bw_delta += delta;
701 		b->bw_bytes += m->m_pkthdr.len;
702 		b->last_time = cur_time;
703 		b->bw_delta -= b->bw_delta >> 3;
704 		b->bw_bytes -= b->bw_bytes >> 3;
705 	}
706 	return(m);
707 }
708 
709 /*
710  * Figure out what the next packet would be if there were no limits.  If
711  * this class hits its bandwidth limit *hit_limit is set to no-zero, otherwise
712  * it is set to 0.  A non-NULL mbuf is returned either way.
713  */
714 static struct mbuf *
715 fairq_pollq(struct fairq_class *cl, uint64_t cur_time, int *hit_limit)
716 {
717 	fairq_bucket_t *b;
718 	struct mbuf *m;
719 	uint64_t delta;
720 	uint64_t bw;
721 
722 	*hit_limit = 0;
723 	b = fairq_selectq(cl, 1);
724 	if (b == NULL)
725 		return(NULL);
726 	m = qhead(&b->queue);
727 
728 	/*
729 	 * Did this packet exceed the class bandwidth?  Calculate the
730 	 * bandwidth component of the packet.
731 	 *
732 	 * - Calculate bytes per second
733 	 */
734 	delta = cur_time - cl->cl_last_time;
735 	if (delta > machclk_freq * 8)
736 		delta = machclk_freq * 8;
737 	cl->cl_bw_delta += delta;
738 	cl->cl_last_time = cur_time;
739 	if (cl->cl_bw_delta) {
740 		bw = cl->cl_bw_bytes * machclk_freq / cl->cl_bw_delta;
741 
742 		if (bw > cl->cl_bandwidth)
743 			*hit_limit = 1;
744 #ifdef ALTQ_DEBUG
745 		printf("BW %6ju relative to %6u %d queue %p\n",
746 			(uintmax_t)bw, cl->cl_bandwidth, *hit_limit, b);
747 #endif
748 	}
749 	return(m);
750 }
751 
752 /*
753  * Locate the next queue we want to pull a packet out of.  This code
754  * is also responsible for removing empty buckets from the circular list.
755  */
756 static
757 fairq_bucket_t *
758 fairq_selectq(struct fairq_class *cl, int ispoll)
759 {
760 	fairq_bucket_t *b;
761 	uint64_t bw;
762 
763 	if (ispoll == 0 && cl->cl_polled) {
764 		b = cl->cl_polled;
765 		cl->cl_polled = NULL;
766 		return(b);
767 	}
768 
769 	while ((b = cl->cl_head) != NULL) {
770 		/*
771 		 * Remove empty queues from consideration
772 		 */
773 		if (qempty(&b->queue)) {
774 			b->in_use = 0;
775 			cl->cl_head = b->next;
776 			if (cl->cl_head == b) {
777 				cl->cl_head = NULL;
778 			} else {
779 				b->next->prev = b->prev;
780 				b->prev->next = b->next;
781 			}
782 			continue;
783 		}
784 
785 		/*
786 		 * Advance the round robin.  Queues with bandwidths less
787 		 * then the hog bandwidth are allowed to burst.
788 		 */
789 		if (cl->cl_hogs_m1 == 0) {
790 			cl->cl_head = b->next;
791 		} else if (b->bw_delta) {
792 			bw = b->bw_bytes * machclk_freq / b->bw_delta;
793 			if (bw >= cl->cl_hogs_m1) {
794 				cl->cl_head = b->next;
795 			}
796 			/*
797 			 * XXX TODO -
798 			 */
799 		}
800 
801 		/*
802 		 * Return bucket b.
803 		 */
804 		break;
805 	}
806 	if (ispoll)
807 		cl->cl_polled = b;
808 	return(b);
809 }
810 
811 static void
812 fairq_purgeq(struct fairq_class *cl)
813 {
814 	fairq_bucket_t *b;
815 	struct mbuf *m;
816 
817 	while ((b = fairq_selectq(cl, 0)) != NULL) {
818 		while ((m = _getq(&b->queue)) != NULL) {
819 			PKTCNTR_ADD(&cl->cl_dropcnt, m_pktlen(m));
820 			m_freem(m);
821 		}
822 		ASSERT(qlen(&b->queue) == 0);
823 	}
824 }
825 
826 static void
827 get_class_stats(struct fairq_classstats *sp, struct fairq_class *cl)
828 {
829 	fairq_bucket_t *b;
830 
831 	sp->class_handle = cl->cl_handle;
832 	sp->qlimit = cl->cl_qlimit;
833 	sp->xmit_cnt = cl->cl_xmitcnt;
834 	sp->drop_cnt = cl->cl_dropcnt;
835 	sp->qtype = cl->cl_qtype;
836 	sp->qlength = 0;
837 
838 	if (cl->cl_head) {
839 		b = cl->cl_head;
840 		do {
841 			sp->qlength += qlen(&b->queue);
842 			b = b->next;
843 		} while (b != cl->cl_head);
844 	}
845 
846 #ifdef ALTQ_RED
847 	if (cl->cl_qtype == Q_RED)
848 		red_getstats(cl->cl_red, &sp->red[0]);
849 #endif
850 #ifdef ALTQ_RIO
851 	if (cl->cl_qtype == Q_RIO)
852 		rio_getstats((rio_t *)cl->cl_red, &sp->red[0]);
853 #endif
854 }
855 
856 /* convert a class handle to the corresponding class pointer */
857 static struct fairq_class *
858 clh_to_clp(struct fairq_if *pif, uint32_t chandle)
859 {
860 	struct fairq_class *cl;
861 	int idx;
862 
863 	if (chandle == 0)
864 		return (NULL);
865 
866 	for (idx = pif->pif_maxpri; idx >= 0; idx--)
867 		if ((cl = pif->pif_classes[idx]) != NULL &&
868 		    cl->cl_handle == chandle)
869 			return (cl);
870 
871 	return (NULL);
872 }
873 
874 #endif /* ALTQ_FAIRQ */
875