xref: /freebsd/sys/net/altq/altq_hfsc.c (revision 2f513db72b034fd5ef7f080b11be5c711c15186a)
1 /*-
2  * Copyright (c) 1997-1999 Carnegie Mellon University. All Rights Reserved.
3  *
4  * Permission to use, copy, modify, and distribute this software and
5  * its documentation is hereby granted (including for commercial or
6  * for-profit use), provided that both the copyright notice and this
7  * permission notice appear in all copies of the software, derivative
8  * works, or modified versions, and any portions thereof.
9  *
10  * THIS SOFTWARE IS EXPERIMENTAL AND IS KNOWN TO HAVE BUGS, SOME OF
11  * WHICH MAY HAVE SERIOUS CONSEQUENCES.  CARNEGIE MELLON PROVIDES THIS
12  * SOFTWARE IN ITS ``AS IS'' CONDITION, AND ANY EXPRESS OR IMPLIED
13  * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
14  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
15  * DISCLAIMED.  IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY BE LIABLE
16  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
17  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
18  * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
19  * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
20  * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
21  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
22  * USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
23  * DAMAGE.
24  *
25  * Carnegie Mellon encourages (but does not require) users of this
26  * software to return any improvements or extensions that they make,
27  * and to grant Carnegie Mellon the rights to redistribute these
28  * changes without encumbrance.
29  *
30  * $KAME: altq_hfsc.c,v 1.24 2003/12/05 05:40:46 kjc Exp $
31  * $FreeBSD$
32  */
33 /*
34  * H-FSC is described in Proceedings of SIGCOMM'97,
35  * "A Hierarchical Fair Service Curve Algorithm for Link-Sharing,
36  * Real-Time and Priority Service"
37  * by Ion Stoica, Hui Zhang, and T. S. Eugene Ng.
38  *
39  * Oleg Cherevko <olwi@aq.ml.com.ua> added the upperlimit for link-sharing.
40  * when a class has an upperlimit, the fit-time is computed from the
41  * upperlimit service curve.  the link-sharing scheduler does not schedule
42  * a class whose fit-time exceeds the current time.
43  */
44 
45 #include "opt_altq.h"
46 #include "opt_inet.h"
47 #include "opt_inet6.h"
48 
49 #ifdef ALTQ_HFSC  /* hfsc is enabled by ALTQ_HFSC option in opt_altq.h */
50 
51 #include <sys/param.h>
52 #include <sys/malloc.h>
53 #include <sys/mbuf.h>
54 #include <sys/socket.h>
55 #include <sys/systm.h>
56 #include <sys/errno.h>
57 #include <sys/queue.h>
58 #if 1 /* ALTQ3_COMPAT */
59 #include <sys/sockio.h>
60 #include <sys/proc.h>
61 #include <sys/kernel.h>
62 #endif /* ALTQ3_COMPAT */
63 
64 #include <net/if.h>
65 #include <net/if_var.h>
66 #include <netinet/in.h>
67 
68 #include <netpfil/pf/pf.h>
69 #include <netpfil/pf/pf_altq.h>
70 #include <netpfil/pf/pf_mtag.h>
71 #include <net/altq/altq.h>
72 #include <net/altq/altq_hfsc.h>
73 
74 /*
75  * function prototypes
76  */
77 static int			 hfsc_clear_interface(struct hfsc_if *);
78 static int			 hfsc_request(struct ifaltq *, int, void *);
79 static void			 hfsc_purge(struct hfsc_if *);
80 static struct hfsc_class	*hfsc_class_create(struct hfsc_if *,
81     struct service_curve *, struct service_curve *, struct service_curve *,
82     struct hfsc_class *, int, int, int);
83 static int			 hfsc_class_destroy(struct hfsc_class *);
84 static struct hfsc_class	*hfsc_nextclass(struct hfsc_class *);
85 static int			 hfsc_enqueue(struct ifaltq *, struct mbuf *,
86 				    struct altq_pktattr *);
87 static struct mbuf		*hfsc_dequeue(struct ifaltq *, int);
88 
89 static int		 hfsc_addq(struct hfsc_class *, struct mbuf *);
90 static struct mbuf	*hfsc_getq(struct hfsc_class *);
91 static struct mbuf	*hfsc_pollq(struct hfsc_class *);
92 static void		 hfsc_purgeq(struct hfsc_class *);
93 
94 static void		 update_cfmin(struct hfsc_class *);
95 static void		 set_active(struct hfsc_class *, int);
96 static void		 set_passive(struct hfsc_class *);
97 
98 static void		 init_ed(struct hfsc_class *, int);
99 static void		 update_ed(struct hfsc_class *, int);
100 static void		 update_d(struct hfsc_class *, int);
101 static void		 init_vf(struct hfsc_class *, int);
102 static void		 update_vf(struct hfsc_class *, int, u_int64_t);
103 static void		 ellist_insert(struct hfsc_class *);
104 static void		 ellist_remove(struct hfsc_class *);
105 static void		 ellist_update(struct hfsc_class *);
106 struct hfsc_class	*hfsc_get_mindl(struct hfsc_if *, u_int64_t);
107 static void		 actlist_insert(struct hfsc_class *);
108 static void		 actlist_remove(struct hfsc_class *);
109 static void		 actlist_update(struct hfsc_class *);
110 
111 static struct hfsc_class	*actlist_firstfit(struct hfsc_class *,
112 				    u_int64_t);
113 
114 static __inline u_int64_t	seg_x2y(u_int64_t, u_int64_t);
115 static __inline u_int64_t	seg_y2x(u_int64_t, u_int64_t);
116 static __inline u_int64_t	m2sm(u_int64_t);
117 static __inline u_int64_t	m2ism(u_int64_t);
118 static __inline u_int64_t	d2dx(u_int);
119 static u_int64_t		sm2m(u_int64_t);
120 static u_int			dx2d(u_int64_t);
121 
122 static void		sc2isc(struct service_curve *, struct internal_sc *);
123 static void		rtsc_init(struct runtime_sc *, struct internal_sc *,
124 			    u_int64_t, u_int64_t);
125 static u_int64_t	rtsc_y2x(struct runtime_sc *, u_int64_t);
126 static u_int64_t	rtsc_x2y(struct runtime_sc *, u_int64_t);
127 static void		rtsc_min(struct runtime_sc *, struct internal_sc *,
128 			    u_int64_t, u_int64_t);
129 
130 static void			 get_class_stats_v0(struct hfsc_classstats_v0 *,
131 				    struct hfsc_class *);
132 static void			 get_class_stats_v1(struct hfsc_classstats_v1 *,
133 				    struct hfsc_class *);
134 static struct hfsc_class	*clh_to_clp(struct hfsc_if *, u_int32_t);
135 
136 
137 
138 /*
139  * macros
140  */
141 #define	is_a_parent_class(cl)	((cl)->cl_children != NULL)
142 
143 #define	HT_INFINITY	0xffffffffffffffffULL	/* infinite time value */
144 
145 
146 int
147 hfsc_pfattach(struct pf_altq *a)
148 {
149 	struct ifnet *ifp;
150 	int s, error;
151 
152 	if ((ifp = ifunit(a->ifname)) == NULL || a->altq_disc == NULL)
153 		return (EINVAL);
154 	s = splnet();
155 	error = altq_attach(&ifp->if_snd, ALTQT_HFSC, a->altq_disc,
156 	    hfsc_enqueue, hfsc_dequeue, hfsc_request, NULL, NULL);
157 	splx(s);
158 	return (error);
159 }
160 
161 int
162 hfsc_add_altq(struct ifnet *ifp, struct pf_altq *a)
163 {
164 	struct hfsc_if *hif;
165 
166 	if (ifp == NULL)
167 		return (EINVAL);
168 	if (!ALTQ_IS_READY(&ifp->if_snd))
169 		return (ENODEV);
170 
171 	hif = malloc(sizeof(struct hfsc_if), M_DEVBUF, M_NOWAIT | M_ZERO);
172 	if (hif == NULL)
173 		return (ENOMEM);
174 
175 	TAILQ_INIT(&hif->hif_eligible);
176 	hif->hif_ifq = &ifp->if_snd;
177 
178 	/* keep the state in pf_altq */
179 	a->altq_disc = hif;
180 
181 	return (0);
182 }
183 
184 int
185 hfsc_remove_altq(struct pf_altq *a)
186 {
187 	struct hfsc_if *hif;
188 
189 	if ((hif = a->altq_disc) == NULL)
190 		return (EINVAL);
191 	a->altq_disc = NULL;
192 
193 	(void)hfsc_clear_interface(hif);
194 	(void)hfsc_class_destroy(hif->hif_rootclass);
195 
196 	free(hif, M_DEVBUF);
197 
198 	return (0);
199 }
200 
201 int
202 hfsc_add_queue(struct pf_altq *a)
203 {
204 	struct hfsc_if *hif;
205 	struct hfsc_class *cl, *parent;
206 	struct hfsc_opts_v1 *opts;
207 	struct service_curve rtsc, lssc, ulsc;
208 
209 	if ((hif = a->altq_disc) == NULL)
210 		return (EINVAL);
211 
212 	opts = &a->pq_u.hfsc_opts;
213 
214 	if (a->parent_qid == HFSC_NULLCLASS_HANDLE &&
215 	    hif->hif_rootclass == NULL)
216 		parent = NULL;
217 	else if ((parent = clh_to_clp(hif, a->parent_qid)) == NULL)
218 		return (EINVAL);
219 
220 	if (a->qid == 0)
221 		return (EINVAL);
222 
223 	if (clh_to_clp(hif, a->qid) != NULL)
224 		return (EBUSY);
225 
226 	rtsc.m1 = opts->rtsc_m1;
227 	rtsc.d  = opts->rtsc_d;
228 	rtsc.m2 = opts->rtsc_m2;
229 	lssc.m1 = opts->lssc_m1;
230 	lssc.d  = opts->lssc_d;
231 	lssc.m2 = opts->lssc_m2;
232 	ulsc.m1 = opts->ulsc_m1;
233 	ulsc.d  = opts->ulsc_d;
234 	ulsc.m2 = opts->ulsc_m2;
235 
236 	cl = hfsc_class_create(hif, &rtsc, &lssc, &ulsc,
237 	    parent, a->qlimit, opts->flags, a->qid);
238 	if (cl == NULL)
239 		return (ENOMEM);
240 
241 	return (0);
242 }
243 
244 int
245 hfsc_remove_queue(struct pf_altq *a)
246 {
247 	struct hfsc_if *hif;
248 	struct hfsc_class *cl;
249 
250 	if ((hif = a->altq_disc) == NULL)
251 		return (EINVAL);
252 
253 	if ((cl = clh_to_clp(hif, a->qid)) == NULL)
254 		return (EINVAL);
255 
256 	return (hfsc_class_destroy(cl));
257 }
258 
259 int
260 hfsc_getqstats(struct pf_altq *a, void *ubuf, int *nbytes, int version)
261 {
262 	struct hfsc_if *hif;
263 	struct hfsc_class *cl;
264 	union {
265 		struct hfsc_classstats_v0 v0;
266 		struct hfsc_classstats_v1 v1;
267 	} stats;
268 	size_t stats_size;
269 	int error = 0;
270 
271 	if ((hif = altq_lookup(a->ifname, ALTQT_HFSC)) == NULL)
272 		return (EBADF);
273 
274 	if ((cl = clh_to_clp(hif, a->qid)) == NULL)
275 		return (EINVAL);
276 
277 	if (version > HFSC_STATS_VERSION)
278 		return (EINVAL);
279 
280 	memset(&stats, 0, sizeof(stats));
281 	switch (version) {
282 	case 0:
283 		get_class_stats_v0(&stats.v0, cl);
284 		stats_size = sizeof(struct hfsc_classstats_v0);
285 		break;
286 	case 1:
287 		get_class_stats_v1(&stats.v1, cl);
288 		stats_size = sizeof(struct hfsc_classstats_v1);
289 		break;
290 	}
291 
292 	if (*nbytes < stats_size)
293 		return (EINVAL);
294 
295 	if ((error = copyout((caddr_t)&stats, ubuf, stats_size)) != 0)
296 		return (error);
297 	*nbytes = stats_size;
298 	return (0);
299 }
300 
301 /*
302  * bring the interface back to the initial state by discarding
303  * all the filters and classes except the root class.
304  */
305 static int
306 hfsc_clear_interface(struct hfsc_if *hif)
307 {
308 	struct hfsc_class	*cl;
309 
310 
311 	/* clear out the classes */
312 	while (hif->hif_rootclass != NULL &&
313 	    (cl = hif->hif_rootclass->cl_children) != NULL) {
314 		/*
315 		 * remove the first leaf class found in the hierarchy
316 		 * then start over
317 		 */
318 		for (; cl != NULL; cl = hfsc_nextclass(cl)) {
319 			if (!is_a_parent_class(cl)) {
320 				(void)hfsc_class_destroy(cl);
321 				break;
322 			}
323 		}
324 	}
325 
326 	return (0);
327 }
328 
329 static int
330 hfsc_request(struct ifaltq *ifq, int req, void *arg)
331 {
332 	struct hfsc_if	*hif = (struct hfsc_if *)ifq->altq_disc;
333 
334 	IFQ_LOCK_ASSERT(ifq);
335 
336 	switch (req) {
337 	case ALTRQ_PURGE:
338 		hfsc_purge(hif);
339 		break;
340 	}
341 	return (0);
342 }
343 
344 /* discard all the queued packets on the interface */
345 static void
346 hfsc_purge(struct hfsc_if *hif)
347 {
348 	struct hfsc_class *cl;
349 
350 	for (cl = hif->hif_rootclass; cl != NULL; cl = hfsc_nextclass(cl))
351 		if (!qempty(cl->cl_q))
352 			hfsc_purgeq(cl);
353 	if (ALTQ_IS_ENABLED(hif->hif_ifq))
354 		hif->hif_ifq->ifq_len = 0;
355 }
356 
357 struct hfsc_class *
358 hfsc_class_create(struct hfsc_if *hif, struct service_curve *rsc,
359     struct service_curve *fsc, struct service_curve *usc,
360     struct hfsc_class *parent, int qlimit, int flags, int qid)
361 {
362 	struct hfsc_class *cl, *p;
363 	int i, s;
364 
365 	if (hif->hif_classes >= HFSC_MAX_CLASSES)
366 		return (NULL);
367 
368 #ifndef ALTQ_RED
369 	if (flags & HFCF_RED) {
370 #ifdef ALTQ_DEBUG
371 		printf("hfsc_class_create: RED not configured for HFSC!\n");
372 #endif
373 		return (NULL);
374 	}
375 #endif
376 #ifndef ALTQ_CODEL
377 	if (flags & HFCF_CODEL) {
378 #ifdef ALTQ_DEBUG
379 		printf("hfsc_class_create: CODEL not configured for HFSC!\n");
380 #endif
381 		return (NULL);
382 	}
383 #endif
384 
385 	cl = malloc(sizeof(struct hfsc_class), M_DEVBUF, M_NOWAIT | M_ZERO);
386 	if (cl == NULL)
387 		return (NULL);
388 
389 	cl->cl_q = malloc(sizeof(class_queue_t), M_DEVBUF, M_NOWAIT | M_ZERO);
390 	if (cl->cl_q == NULL)
391 		goto err_ret;
392 
393 	TAILQ_INIT(&cl->cl_actc);
394 
395 	if (qlimit == 0)
396 		qlimit = 50;  /* use default */
397 	qlimit(cl->cl_q) = qlimit;
398 	qtype(cl->cl_q) = Q_DROPTAIL;
399 	qlen(cl->cl_q) = 0;
400 	qsize(cl->cl_q) = 0;
401 	cl->cl_flags = flags;
402 #ifdef ALTQ_RED
403 	if (flags & (HFCF_RED|HFCF_RIO)) {
404 		int red_flags, red_pkttime;
405 		u_int m2;
406 
407 		m2 = 0;
408 		if (rsc != NULL && rsc->m2 > m2)
409 			m2 = rsc->m2;
410 		if (fsc != NULL && fsc->m2 > m2)
411 			m2 = fsc->m2;
412 		if (usc != NULL && usc->m2 > m2)
413 			m2 = usc->m2;
414 
415 		red_flags = 0;
416 		if (flags & HFCF_ECN)
417 			red_flags |= REDF_ECN;
418 #ifdef ALTQ_RIO
419 		if (flags & HFCF_CLEARDSCP)
420 			red_flags |= RIOF_CLEARDSCP;
421 #endif
422 		if (m2 < 8)
423 			red_pkttime = 1000 * 1000 * 1000; /* 1 sec */
424 		else
425 			red_pkttime = (int64_t)hif->hif_ifq->altq_ifp->if_mtu
426 				* 1000 * 1000 * 1000 / (m2 / 8);
427 		if (flags & HFCF_RED) {
428 			cl->cl_red = red_alloc(0, 0,
429 			    qlimit(cl->cl_q) * 10/100,
430 			    qlimit(cl->cl_q) * 30/100,
431 			    red_flags, red_pkttime);
432 			if (cl->cl_red != NULL)
433 				qtype(cl->cl_q) = Q_RED;
434 		}
435 #ifdef ALTQ_RIO
436 		else {
437 			cl->cl_red = (red_t *)rio_alloc(0, NULL,
438 			    red_flags, red_pkttime);
439 			if (cl->cl_red != NULL)
440 				qtype(cl->cl_q) = Q_RIO;
441 		}
442 #endif
443 	}
444 #endif /* ALTQ_RED */
445 #ifdef ALTQ_CODEL
446 	if (flags & HFCF_CODEL) {
447 		cl->cl_codel = codel_alloc(5, 100, 0);
448 		if (cl->cl_codel != NULL)
449 			qtype(cl->cl_q) = Q_CODEL;
450 	}
451 #endif
452 
453 	if (rsc != NULL && (rsc->m1 != 0 || rsc->m2 != 0)) {
454 		cl->cl_rsc = malloc(sizeof(struct internal_sc),
455 		    M_DEVBUF, M_NOWAIT);
456 		if (cl->cl_rsc == NULL)
457 			goto err_ret;
458 		sc2isc(rsc, cl->cl_rsc);
459 		rtsc_init(&cl->cl_deadline, cl->cl_rsc, 0, 0);
460 		rtsc_init(&cl->cl_eligible, cl->cl_rsc, 0, 0);
461 	}
462 	if (fsc != NULL && (fsc->m1 != 0 || fsc->m2 != 0)) {
463 		cl->cl_fsc = malloc(sizeof(struct internal_sc),
464 		    M_DEVBUF, M_NOWAIT);
465 		if (cl->cl_fsc == NULL)
466 			goto err_ret;
467 		sc2isc(fsc, cl->cl_fsc);
468 		rtsc_init(&cl->cl_virtual, cl->cl_fsc, 0, 0);
469 	}
470 	if (usc != NULL && (usc->m1 != 0 || usc->m2 != 0)) {
471 		cl->cl_usc = malloc(sizeof(struct internal_sc),
472 		    M_DEVBUF, M_NOWAIT);
473 		if (cl->cl_usc == NULL)
474 			goto err_ret;
475 		sc2isc(usc, cl->cl_usc);
476 		rtsc_init(&cl->cl_ulimit, cl->cl_usc, 0, 0);
477 	}
478 
479 	cl->cl_id = hif->hif_classid++;
480 	cl->cl_handle = qid;
481 	cl->cl_hif = hif;
482 	cl->cl_parent = parent;
483 
484 	s = splnet();
485 	IFQ_LOCK(hif->hif_ifq);
486 	hif->hif_classes++;
487 
488 	/*
489 	 * find a free slot in the class table.  if the slot matching
490 	 * the lower bits of qid is free, use this slot.  otherwise,
491 	 * use the first free slot.
492 	 */
493 	i = qid % HFSC_MAX_CLASSES;
494 	if (hif->hif_class_tbl[i] == NULL)
495 		hif->hif_class_tbl[i] = cl;
496 	else {
497 		for (i = 0; i < HFSC_MAX_CLASSES; i++)
498 			if (hif->hif_class_tbl[i] == NULL) {
499 				hif->hif_class_tbl[i] = cl;
500 				break;
501 			}
502 		if (i == HFSC_MAX_CLASSES) {
503 			IFQ_UNLOCK(hif->hif_ifq);
504 			splx(s);
505 			goto err_ret;
506 		}
507 	}
508 	cl->cl_slot = i;
509 
510 	if (flags & HFCF_DEFAULTCLASS)
511 		hif->hif_defaultclass = cl;
512 
513 	if (parent == NULL) {
514 		/* this is root class */
515 		hif->hif_rootclass = cl;
516 	} else {
517 		/* add this class to the children list of the parent */
518 		if ((p = parent->cl_children) == NULL)
519 			parent->cl_children = cl;
520 		else {
521 			while (p->cl_siblings != NULL)
522 				p = p->cl_siblings;
523 			p->cl_siblings = cl;
524 		}
525 	}
526 	IFQ_UNLOCK(hif->hif_ifq);
527 	splx(s);
528 
529 	return (cl);
530 
531  err_ret:
532 	if (cl->cl_red != NULL) {
533 #ifdef ALTQ_RIO
534 		if (q_is_rio(cl->cl_q))
535 			rio_destroy((rio_t *)cl->cl_red);
536 #endif
537 #ifdef ALTQ_RED
538 		if (q_is_red(cl->cl_q))
539 			red_destroy(cl->cl_red);
540 #endif
541 #ifdef ALTQ_CODEL
542 		if (q_is_codel(cl->cl_q))
543 			codel_destroy(cl->cl_codel);
544 #endif
545 	}
546 	if (cl->cl_fsc != NULL)
547 		free(cl->cl_fsc, M_DEVBUF);
548 	if (cl->cl_rsc != NULL)
549 		free(cl->cl_rsc, M_DEVBUF);
550 	if (cl->cl_usc != NULL)
551 		free(cl->cl_usc, M_DEVBUF);
552 	if (cl->cl_q != NULL)
553 		free(cl->cl_q, M_DEVBUF);
554 	free(cl, M_DEVBUF);
555 	return (NULL);
556 }
557 
558 static int
559 hfsc_class_destroy(struct hfsc_class *cl)
560 {
561 	int s;
562 
563 	if (cl == NULL)
564 		return (0);
565 
566 	if (is_a_parent_class(cl))
567 		return (EBUSY);
568 
569 	s = splnet();
570 	IFQ_LOCK(cl->cl_hif->hif_ifq);
571 
572 
573 	if (!qempty(cl->cl_q))
574 		hfsc_purgeq(cl);
575 
576 	if (cl->cl_parent == NULL) {
577 		/* this is root class */
578 	} else {
579 		struct hfsc_class *p = cl->cl_parent->cl_children;
580 
581 		if (p == cl)
582 			cl->cl_parent->cl_children = cl->cl_siblings;
583 		else do {
584 			if (p->cl_siblings == cl) {
585 				p->cl_siblings = cl->cl_siblings;
586 				break;
587 			}
588 		} while ((p = p->cl_siblings) != NULL);
589 		ASSERT(p != NULL);
590 	}
591 
592 	cl->cl_hif->hif_class_tbl[cl->cl_slot] = NULL;
593 	cl->cl_hif->hif_classes--;
594 	IFQ_UNLOCK(cl->cl_hif->hif_ifq);
595 	splx(s);
596 
597 	if (cl->cl_red != NULL) {
598 #ifdef ALTQ_RIO
599 		if (q_is_rio(cl->cl_q))
600 			rio_destroy((rio_t *)cl->cl_red);
601 #endif
602 #ifdef ALTQ_RED
603 		if (q_is_red(cl->cl_q))
604 			red_destroy(cl->cl_red);
605 #endif
606 #ifdef ALTQ_CODEL
607 		if (q_is_codel(cl->cl_q))
608 			codel_destroy(cl->cl_codel);
609 #endif
610 	}
611 
612 	IFQ_LOCK(cl->cl_hif->hif_ifq);
613 	if (cl == cl->cl_hif->hif_rootclass)
614 		cl->cl_hif->hif_rootclass = NULL;
615 	if (cl == cl->cl_hif->hif_defaultclass)
616 		cl->cl_hif->hif_defaultclass = NULL;
617 	IFQ_UNLOCK(cl->cl_hif->hif_ifq);
618 
619 	if (cl->cl_usc != NULL)
620 		free(cl->cl_usc, M_DEVBUF);
621 	if (cl->cl_fsc != NULL)
622 		free(cl->cl_fsc, M_DEVBUF);
623 	if (cl->cl_rsc != NULL)
624 		free(cl->cl_rsc, M_DEVBUF);
625 	free(cl->cl_q, M_DEVBUF);
626 	free(cl, M_DEVBUF);
627 
628 	return (0);
629 }
630 
631 /*
632  * hfsc_nextclass returns the next class in the tree.
633  *   usage:
634  *	for (cl = hif->hif_rootclass; cl != NULL; cl = hfsc_nextclass(cl))
635  *		do_something;
636  */
637 static struct hfsc_class *
638 hfsc_nextclass(struct hfsc_class *cl)
639 {
640 	if (cl->cl_children != NULL)
641 		cl = cl->cl_children;
642 	else if (cl->cl_siblings != NULL)
643 		cl = cl->cl_siblings;
644 	else {
645 		while ((cl = cl->cl_parent) != NULL)
646 			if (cl->cl_siblings) {
647 				cl = cl->cl_siblings;
648 				break;
649 			}
650 	}
651 
652 	return (cl);
653 }
654 
655 /*
656  * hfsc_enqueue is an enqueue function to be registered to
657  * (*altq_enqueue) in struct ifaltq.
658  */
659 static int
660 hfsc_enqueue(struct ifaltq *ifq, struct mbuf *m, struct altq_pktattr *pktattr)
661 {
662 	struct hfsc_if	*hif = (struct hfsc_if *)ifq->altq_disc;
663 	struct hfsc_class *cl;
664 	struct pf_mtag *t;
665 	int len;
666 
667 	IFQ_LOCK_ASSERT(ifq);
668 
669 	/* grab class set by classifier */
670 	if ((m->m_flags & M_PKTHDR) == 0) {
671 		/* should not happen */
672 		printf("altq: packet for %s does not have pkthdr\n",
673 		    ifq->altq_ifp->if_xname);
674 		m_freem(m);
675 		return (ENOBUFS);
676 	}
677 	cl = NULL;
678 	if ((t = pf_find_mtag(m)) != NULL)
679 		cl = clh_to_clp(hif, t->qid);
680 	if (cl == NULL || is_a_parent_class(cl)) {
681 		cl = hif->hif_defaultclass;
682 		if (cl == NULL) {
683 			m_freem(m);
684 			return (ENOBUFS);
685 		}
686 	}
687 	cl->cl_pktattr = NULL;
688 	len = m_pktlen(m);
689 	if (hfsc_addq(cl, m) != 0) {
690 		/* drop occurred.  mbuf was freed in hfsc_addq. */
691 		PKTCNTR_ADD(&cl->cl_stats.drop_cnt, len);
692 		return (ENOBUFS);
693 	}
694 	IFQ_INC_LEN(ifq);
695 	cl->cl_hif->hif_packets++;
696 
697 	/* successfully queued. */
698 	if (qlen(cl->cl_q) == 1)
699 		set_active(cl, m_pktlen(m));
700 
701 	return (0);
702 }
703 
704 /*
705  * hfsc_dequeue is a dequeue function to be registered to
706  * (*altq_dequeue) in struct ifaltq.
707  *
708  * note: ALTDQ_POLL returns the next packet without removing the packet
709  *	from the queue.  ALTDQ_REMOVE is a normal dequeue operation.
710  *	ALTDQ_REMOVE must return the same packet if called immediately
711  *	after ALTDQ_POLL.
712  */
713 static struct mbuf *
714 hfsc_dequeue(struct ifaltq *ifq, int op)
715 {
716 	struct hfsc_if	*hif = (struct hfsc_if *)ifq->altq_disc;
717 	struct hfsc_class *cl;
718 	struct mbuf *m;
719 	int len, next_len;
720 	int realtime = 0;
721 	u_int64_t cur_time;
722 
723 	IFQ_LOCK_ASSERT(ifq);
724 
725 	if (hif->hif_packets == 0)
726 		/* no packet in the tree */
727 		return (NULL);
728 
729 	cur_time = read_machclk();
730 
731 	if (op == ALTDQ_REMOVE && hif->hif_pollcache != NULL) {
732 
733 		cl = hif->hif_pollcache;
734 		hif->hif_pollcache = NULL;
735 		/* check if the class was scheduled by real-time criteria */
736 		if (cl->cl_rsc != NULL)
737 			realtime = (cl->cl_e <= cur_time);
738 	} else {
739 		/*
740 		 * if there are eligible classes, use real-time criteria.
741 		 * find the class with the minimum deadline among
742 		 * the eligible classes.
743 		 */
744 		if ((cl = hfsc_get_mindl(hif, cur_time))
745 		    != NULL) {
746 			realtime = 1;
747 		} else {
748 #ifdef ALTQ_DEBUG
749 			int fits = 0;
750 #endif
751 			/*
752 			 * use link-sharing criteria
753 			 * get the class with the minimum vt in the hierarchy
754 			 */
755 			cl = hif->hif_rootclass;
756 			while (is_a_parent_class(cl)) {
757 
758 				cl = actlist_firstfit(cl, cur_time);
759 				if (cl == NULL) {
760 #ifdef ALTQ_DEBUG
761 					if (fits > 0)
762 						printf("%d fit but none found\n",fits);
763 #endif
764 					return (NULL);
765 				}
766 				/*
767 				 * update parent's cl_cvtmin.
768 				 * don't update if the new vt is smaller.
769 				 */
770 				if (cl->cl_parent->cl_cvtmin < cl->cl_vt)
771 					cl->cl_parent->cl_cvtmin = cl->cl_vt;
772 #ifdef ALTQ_DEBUG
773 				fits++;
774 #endif
775 			}
776 		}
777 
778 		if (op == ALTDQ_POLL) {
779 			hif->hif_pollcache = cl;
780 			m = hfsc_pollq(cl);
781 			return (m);
782 		}
783 	}
784 
785 	m = hfsc_getq(cl);
786 	if (m == NULL)
787 		panic("hfsc_dequeue:");
788 	len = m_pktlen(m);
789 	cl->cl_hif->hif_packets--;
790 	IFQ_DEC_LEN(ifq);
791 	PKTCNTR_ADD(&cl->cl_stats.xmit_cnt, len);
792 
793 	update_vf(cl, len, cur_time);
794 	if (realtime)
795 		cl->cl_cumul += len;
796 
797 	if (!qempty(cl->cl_q)) {
798 		if (cl->cl_rsc != NULL) {
799 			/* update ed */
800 			next_len = m_pktlen(qhead(cl->cl_q));
801 
802 			if (realtime)
803 				update_ed(cl, next_len);
804 			else
805 				update_d(cl, next_len);
806 		}
807 	} else {
808 		/* the class becomes passive */
809 		set_passive(cl);
810 	}
811 
812 	return (m);
813 }
814 
815 static int
816 hfsc_addq(struct hfsc_class *cl, struct mbuf *m)
817 {
818 
819 #ifdef ALTQ_RIO
820 	if (q_is_rio(cl->cl_q))
821 		return rio_addq((rio_t *)cl->cl_red, cl->cl_q,
822 				m, cl->cl_pktattr);
823 #endif
824 #ifdef ALTQ_RED
825 	if (q_is_red(cl->cl_q))
826 		return red_addq(cl->cl_red, cl->cl_q, m, cl->cl_pktattr);
827 #endif
828 #ifdef ALTQ_CODEL
829 	if (q_is_codel(cl->cl_q))
830 		return codel_addq(cl->cl_codel, cl->cl_q, m);
831 #endif
832 	if (qlen(cl->cl_q) >= qlimit(cl->cl_q)) {
833 		m_freem(m);
834 		return (-1);
835 	}
836 
837 	if (cl->cl_flags & HFCF_CLEARDSCP)
838 		write_dsfield(m, cl->cl_pktattr, 0);
839 
840 	_addq(cl->cl_q, m);
841 
842 	return (0);
843 }
844 
845 static struct mbuf *
846 hfsc_getq(struct hfsc_class *cl)
847 {
848 #ifdef ALTQ_RIO
849 	if (q_is_rio(cl->cl_q))
850 		return rio_getq((rio_t *)cl->cl_red, cl->cl_q);
851 #endif
852 #ifdef ALTQ_RED
853 	if (q_is_red(cl->cl_q))
854 		return red_getq(cl->cl_red, cl->cl_q);
855 #endif
856 #ifdef ALTQ_CODEL
857 	if (q_is_codel(cl->cl_q))
858 		return codel_getq(cl->cl_codel, cl->cl_q);
859 #endif
860 	return _getq(cl->cl_q);
861 }
862 
863 static struct mbuf *
864 hfsc_pollq(struct hfsc_class *cl)
865 {
866 	return qhead(cl->cl_q);
867 }
868 
869 static void
870 hfsc_purgeq(struct hfsc_class *cl)
871 {
872 	struct mbuf *m;
873 
874 	if (qempty(cl->cl_q))
875 		return;
876 
877 	while ((m = _getq(cl->cl_q)) != NULL) {
878 		PKTCNTR_ADD(&cl->cl_stats.drop_cnt, m_pktlen(m));
879 		m_freem(m);
880 		cl->cl_hif->hif_packets--;
881 		IFQ_DEC_LEN(cl->cl_hif->hif_ifq);
882 	}
883 	ASSERT(qlen(cl->cl_q) == 0);
884 
885 	update_vf(cl, 0, 0);	/* remove cl from the actlist */
886 	set_passive(cl);
887 }
888 
889 static void
890 set_active(struct hfsc_class *cl, int len)
891 {
892 	if (cl->cl_rsc != NULL)
893 		init_ed(cl, len);
894 	if (cl->cl_fsc != NULL)
895 		init_vf(cl, len);
896 
897 	cl->cl_stats.period++;
898 }
899 
900 static void
901 set_passive(struct hfsc_class *cl)
902 {
903 	if (cl->cl_rsc != NULL)
904 		ellist_remove(cl);
905 
906 	/*
907 	 * actlist is now handled in update_vf() so that update_vf(cl, 0, 0)
908 	 * needs to be called explicitly to remove a class from actlist
909 	 */
910 }
911 
912 static void
913 init_ed(struct hfsc_class *cl, int next_len)
914 {
915 	u_int64_t cur_time;
916 
917 	cur_time = read_machclk();
918 
919 	/* update the deadline curve */
920 	rtsc_min(&cl->cl_deadline, cl->cl_rsc, cur_time, cl->cl_cumul);
921 
922 	/*
923 	 * update the eligible curve.
924 	 * for concave, it is equal to the deadline curve.
925 	 * for convex, it is a linear curve with slope m2.
926 	 */
927 	cl->cl_eligible = cl->cl_deadline;
928 	if (cl->cl_rsc->sm1 <= cl->cl_rsc->sm2) {
929 		cl->cl_eligible.dx = 0;
930 		cl->cl_eligible.dy = 0;
931 	}
932 
933 	/* compute e and d */
934 	cl->cl_e = rtsc_y2x(&cl->cl_eligible, cl->cl_cumul);
935 	cl->cl_d = rtsc_y2x(&cl->cl_deadline, cl->cl_cumul + next_len);
936 
937 	ellist_insert(cl);
938 }
939 
940 static void
941 update_ed(struct hfsc_class *cl, int next_len)
942 {
943 	cl->cl_e = rtsc_y2x(&cl->cl_eligible, cl->cl_cumul);
944 	cl->cl_d = rtsc_y2x(&cl->cl_deadline, cl->cl_cumul + next_len);
945 
946 	ellist_update(cl);
947 }
948 
949 static void
950 update_d(struct hfsc_class *cl, int next_len)
951 {
952 	cl->cl_d = rtsc_y2x(&cl->cl_deadline, cl->cl_cumul + next_len);
953 }
954 
955 static void
956 init_vf(struct hfsc_class *cl, int len)
957 {
958 	struct hfsc_class *max_cl, *p;
959 	u_int64_t vt, f, cur_time;
960 	int go_active;
961 
962 	cur_time = 0;
963 	go_active = 1;
964 	for ( ; cl->cl_parent != NULL; cl = cl->cl_parent) {
965 
966 		if (go_active && cl->cl_nactive++ == 0)
967 			go_active = 1;
968 		else
969 			go_active = 0;
970 
971 		if (go_active) {
972 			max_cl = TAILQ_LAST(&cl->cl_parent->cl_actc, acthead);
973 			if (max_cl != NULL) {
974 				/*
975 				 * set vt to the average of the min and max
976 				 * classes.  if the parent's period didn't
977 				 * change, don't decrease vt of the class.
978 				 */
979 				vt = max_cl->cl_vt;
980 				if (cl->cl_parent->cl_cvtmin != 0)
981 					vt = (cl->cl_parent->cl_cvtmin + vt)/2;
982 
983 				if (cl->cl_parent->cl_vtperiod !=
984 				    cl->cl_parentperiod || vt > cl->cl_vt)
985 					cl->cl_vt = vt;
986 			} else {
987 				/*
988 				 * first child for a new parent backlog period.
989 				 * add parent's cvtmax to vtoff of children
990 				 * to make a new vt (vtoff + vt) larger than
991 				 * the vt in the last period for all children.
992 				 */
993 				vt = cl->cl_parent->cl_cvtmax;
994 				for (p = cl->cl_parent->cl_children; p != NULL;
995 				     p = p->cl_siblings)
996 					p->cl_vtoff += vt;
997 				cl->cl_vt = 0;
998 				cl->cl_parent->cl_cvtmax = 0;
999 				cl->cl_parent->cl_cvtmin = 0;
1000 			}
1001 			cl->cl_initvt = cl->cl_vt;
1002 
1003 			/* update the virtual curve */
1004 			vt = cl->cl_vt + cl->cl_vtoff;
1005 			rtsc_min(&cl->cl_virtual, cl->cl_fsc, vt, cl->cl_total);
1006 			if (cl->cl_virtual.x == vt) {
1007 				cl->cl_virtual.x -= cl->cl_vtoff;
1008 				cl->cl_vtoff = 0;
1009 			}
1010 			cl->cl_vtadj = 0;
1011 
1012 			cl->cl_vtperiod++;  /* increment vt period */
1013 			cl->cl_parentperiod = cl->cl_parent->cl_vtperiod;
1014 			if (cl->cl_parent->cl_nactive == 0)
1015 				cl->cl_parentperiod++;
1016 			cl->cl_f = 0;
1017 
1018 			actlist_insert(cl);
1019 
1020 			if (cl->cl_usc != NULL) {
1021 				/* class has upper limit curve */
1022 				if (cur_time == 0)
1023 					cur_time = read_machclk();
1024 
1025 				/* update the ulimit curve */
1026 				rtsc_min(&cl->cl_ulimit, cl->cl_usc, cur_time,
1027 				    cl->cl_total);
1028 				/* compute myf */
1029 				cl->cl_myf = rtsc_y2x(&cl->cl_ulimit,
1030 				    cl->cl_total);
1031 				cl->cl_myfadj = 0;
1032 			}
1033 		}
1034 
1035 		if (cl->cl_myf > cl->cl_cfmin)
1036 			f = cl->cl_myf;
1037 		else
1038 			f = cl->cl_cfmin;
1039 		if (f != cl->cl_f) {
1040 			cl->cl_f = f;
1041 			update_cfmin(cl->cl_parent);
1042 		}
1043 	}
1044 }
1045 
1046 static void
1047 update_vf(struct hfsc_class *cl, int len, u_int64_t cur_time)
1048 {
1049 	u_int64_t f, myf_bound, delta;
1050 	int go_passive;
1051 
1052 	go_passive = qempty(cl->cl_q);
1053 
1054 	for (; cl->cl_parent != NULL; cl = cl->cl_parent) {
1055 
1056 		cl->cl_total += len;
1057 
1058 		if (cl->cl_fsc == NULL || cl->cl_nactive == 0)
1059 			continue;
1060 
1061 		if (go_passive && --cl->cl_nactive == 0)
1062 			go_passive = 1;
1063 		else
1064 			go_passive = 0;
1065 
1066 		if (go_passive) {
1067 			/* no more active child, going passive */
1068 
1069 			/* update cvtmax of the parent class */
1070 			if (cl->cl_vt > cl->cl_parent->cl_cvtmax)
1071 				cl->cl_parent->cl_cvtmax = cl->cl_vt;
1072 
1073 			/* remove this class from the vt list */
1074 			actlist_remove(cl);
1075 
1076 			update_cfmin(cl->cl_parent);
1077 
1078 			continue;
1079 		}
1080 
1081 		/*
1082 		 * update vt and f
1083 		 */
1084 		cl->cl_vt = rtsc_y2x(&cl->cl_virtual, cl->cl_total)
1085 		    - cl->cl_vtoff + cl->cl_vtadj;
1086 
1087 		/*
1088 		 * if vt of the class is smaller than cvtmin,
1089 		 * the class was skipped in the past due to non-fit.
1090 		 * if so, we need to adjust vtadj.
1091 		 */
1092 		if (cl->cl_vt < cl->cl_parent->cl_cvtmin) {
1093 			cl->cl_vtadj += cl->cl_parent->cl_cvtmin - cl->cl_vt;
1094 			cl->cl_vt = cl->cl_parent->cl_cvtmin;
1095 		}
1096 
1097 		/* update the vt list */
1098 		actlist_update(cl);
1099 
1100 		if (cl->cl_usc != NULL) {
1101 			cl->cl_myf = cl->cl_myfadj
1102 			    + rtsc_y2x(&cl->cl_ulimit, cl->cl_total);
1103 
1104 			/*
1105 			 * if myf lags behind by more than one clock tick
1106 			 * from the current time, adjust myfadj to prevent
1107 			 * a rate-limited class from going greedy.
1108 			 * in a steady state under rate-limiting, myf
1109 			 * fluctuates within one clock tick.
1110 			 */
1111 			myf_bound = cur_time - machclk_per_tick;
1112 			if (cl->cl_myf < myf_bound) {
1113 				delta = cur_time - cl->cl_myf;
1114 				cl->cl_myfadj += delta;
1115 				cl->cl_myf += delta;
1116 			}
1117 		}
1118 
1119 		/* cl_f is max(cl_myf, cl_cfmin) */
1120 		if (cl->cl_myf > cl->cl_cfmin)
1121 			f = cl->cl_myf;
1122 		else
1123 			f = cl->cl_cfmin;
1124 		if (f != cl->cl_f) {
1125 			cl->cl_f = f;
1126 			update_cfmin(cl->cl_parent);
1127 		}
1128 	}
1129 }
1130 
1131 static void
1132 update_cfmin(struct hfsc_class *cl)
1133 {
1134 	struct hfsc_class *p;
1135 	u_int64_t cfmin;
1136 
1137 	if (TAILQ_EMPTY(&cl->cl_actc)) {
1138 		cl->cl_cfmin = 0;
1139 		return;
1140 	}
1141 	cfmin = HT_INFINITY;
1142 	TAILQ_FOREACH(p, &cl->cl_actc, cl_actlist) {
1143 		if (p->cl_f == 0) {
1144 			cl->cl_cfmin = 0;
1145 			return;
1146 		}
1147 		if (p->cl_f < cfmin)
1148 			cfmin = p->cl_f;
1149 	}
1150 	cl->cl_cfmin = cfmin;
1151 }
1152 
1153 /*
1154  * TAILQ based ellist and actlist implementation
1155  * (ion wanted to make a calendar queue based implementation)
1156  */
1157 /*
1158  * eligible list holds backlogged classes being sorted by their eligible times.
1159  * there is one eligible list per interface.
1160  */
1161 
1162 static void
1163 ellist_insert(struct hfsc_class *cl)
1164 {
1165 	struct hfsc_if	*hif = cl->cl_hif;
1166 	struct hfsc_class *p;
1167 
1168 	/* check the last entry first */
1169 	if ((p = TAILQ_LAST(&hif->hif_eligible, elighead)) == NULL ||
1170 	    p->cl_e <= cl->cl_e) {
1171 		TAILQ_INSERT_TAIL(&hif->hif_eligible, cl, cl_ellist);
1172 		return;
1173 	}
1174 
1175 	TAILQ_FOREACH(p, &hif->hif_eligible, cl_ellist) {
1176 		if (cl->cl_e < p->cl_e) {
1177 			TAILQ_INSERT_BEFORE(p, cl, cl_ellist);
1178 			return;
1179 		}
1180 	}
1181 	ASSERT(0); /* should not reach here */
1182 }
1183 
1184 static void
1185 ellist_remove(struct hfsc_class *cl)
1186 {
1187 	struct hfsc_if	*hif = cl->cl_hif;
1188 
1189 	TAILQ_REMOVE(&hif->hif_eligible, cl, cl_ellist);
1190 }
1191 
1192 static void
1193 ellist_update(struct hfsc_class *cl)
1194 {
1195 	struct hfsc_if	*hif = cl->cl_hif;
1196 	struct hfsc_class *p, *last;
1197 
1198 	/*
1199 	 * the eligible time of a class increases monotonically.
1200 	 * if the next entry has a larger eligible time, nothing to do.
1201 	 */
1202 	p = TAILQ_NEXT(cl, cl_ellist);
1203 	if (p == NULL || cl->cl_e <= p->cl_e)
1204 		return;
1205 
1206 	/* check the last entry */
1207 	last = TAILQ_LAST(&hif->hif_eligible, elighead);
1208 	ASSERT(last != NULL);
1209 	if (last->cl_e <= cl->cl_e) {
1210 		TAILQ_REMOVE(&hif->hif_eligible, cl, cl_ellist);
1211 		TAILQ_INSERT_TAIL(&hif->hif_eligible, cl, cl_ellist);
1212 		return;
1213 	}
1214 
1215 	/*
1216 	 * the new position must be between the next entry
1217 	 * and the last entry
1218 	 */
1219 	while ((p = TAILQ_NEXT(p, cl_ellist)) != NULL) {
1220 		if (cl->cl_e < p->cl_e) {
1221 			TAILQ_REMOVE(&hif->hif_eligible, cl, cl_ellist);
1222 			TAILQ_INSERT_BEFORE(p, cl, cl_ellist);
1223 			return;
1224 		}
1225 	}
1226 	ASSERT(0); /* should not reach here */
1227 }
1228 
1229 /* find the class with the minimum deadline among the eligible classes */
1230 struct hfsc_class *
1231 hfsc_get_mindl(struct hfsc_if *hif, u_int64_t cur_time)
1232 {
1233 	struct hfsc_class *p, *cl = NULL;
1234 
1235 	TAILQ_FOREACH(p, &hif->hif_eligible, cl_ellist) {
1236 		if (p->cl_e > cur_time)
1237 			break;
1238 		if (cl == NULL || p->cl_d < cl->cl_d)
1239 			cl = p;
1240 	}
1241 	return (cl);
1242 }
1243 
1244 /*
1245  * active children list holds backlogged child classes being sorted
1246  * by their virtual time.
1247  * each intermediate class has one active children list.
1248  */
1249 
1250 static void
1251 actlist_insert(struct hfsc_class *cl)
1252 {
1253 	struct hfsc_class *p;
1254 
1255 	/* check the last entry first */
1256 	if ((p = TAILQ_LAST(&cl->cl_parent->cl_actc, acthead)) == NULL
1257 	    || p->cl_vt <= cl->cl_vt) {
1258 		TAILQ_INSERT_TAIL(&cl->cl_parent->cl_actc, cl, cl_actlist);
1259 		return;
1260 	}
1261 
1262 	TAILQ_FOREACH(p, &cl->cl_parent->cl_actc, cl_actlist) {
1263 		if (cl->cl_vt < p->cl_vt) {
1264 			TAILQ_INSERT_BEFORE(p, cl, cl_actlist);
1265 			return;
1266 		}
1267 	}
1268 	ASSERT(0); /* should not reach here */
1269 }
1270 
1271 static void
1272 actlist_remove(struct hfsc_class *cl)
1273 {
1274 	TAILQ_REMOVE(&cl->cl_parent->cl_actc, cl, cl_actlist);
1275 }
1276 
1277 static void
1278 actlist_update(struct hfsc_class *cl)
1279 {
1280 	struct hfsc_class *p, *last;
1281 
1282 	/*
1283 	 * the virtual time of a class increases monotonically during its
1284 	 * backlogged period.
1285 	 * if the next entry has a larger virtual time, nothing to do.
1286 	 */
1287 	p = TAILQ_NEXT(cl, cl_actlist);
1288 	if (p == NULL || cl->cl_vt < p->cl_vt)
1289 		return;
1290 
1291 	/* check the last entry */
1292 	last = TAILQ_LAST(&cl->cl_parent->cl_actc, acthead);
1293 	ASSERT(last != NULL);
1294 	if (last->cl_vt <= cl->cl_vt) {
1295 		TAILQ_REMOVE(&cl->cl_parent->cl_actc, cl, cl_actlist);
1296 		TAILQ_INSERT_TAIL(&cl->cl_parent->cl_actc, cl, cl_actlist);
1297 		return;
1298 	}
1299 
1300 	/*
1301 	 * the new position must be between the next entry
1302 	 * and the last entry
1303 	 */
1304 	while ((p = TAILQ_NEXT(p, cl_actlist)) != NULL) {
1305 		if (cl->cl_vt < p->cl_vt) {
1306 			TAILQ_REMOVE(&cl->cl_parent->cl_actc, cl, cl_actlist);
1307 			TAILQ_INSERT_BEFORE(p, cl, cl_actlist);
1308 			return;
1309 		}
1310 	}
1311 	ASSERT(0); /* should not reach here */
1312 }
1313 
1314 static struct hfsc_class *
1315 actlist_firstfit(struct hfsc_class *cl, u_int64_t cur_time)
1316 {
1317 	struct hfsc_class *p;
1318 
1319 	TAILQ_FOREACH(p, &cl->cl_actc, cl_actlist) {
1320 		if (p->cl_f <= cur_time)
1321 			return (p);
1322 	}
1323 	return (NULL);
1324 }
1325 
1326 /*
1327  * service curve support functions
1328  *
1329  *  external service curve parameters
1330  *	m: bits/sec
1331  *	d: msec
1332  *  internal service curve parameters
1333  *	sm: (bytes/machclk tick) << SM_SHIFT
1334  *	ism: (machclk ticks/byte) << ISM_SHIFT
1335  *	dx: machclk ticks
1336  *
1337  * SM_SHIFT and ISM_SHIFT are scaled in order to keep effective digits.  we
1338  * should be able to handle 100K-100Gbps linkspeed with 256 MHz machclk
1339  * frequency and at least 3 effective digits in decimal.
1340  *
1341  */
1342 #define	SM_SHIFT	24
1343 #define	ISM_SHIFT	14
1344 
1345 #define	SM_MASK		((1LL << SM_SHIFT) - 1)
1346 #define	ISM_MASK	((1LL << ISM_SHIFT) - 1)
1347 
1348 static __inline u_int64_t
1349 seg_x2y(u_int64_t x, u_int64_t sm)
1350 {
1351 	u_int64_t y;
1352 
1353 	/*
1354 	 * compute
1355 	 *	y = x * sm >> SM_SHIFT
1356 	 * but divide it for the upper and lower bits to avoid overflow
1357 	 */
1358 	y = (x >> SM_SHIFT) * sm + (((x & SM_MASK) * sm) >> SM_SHIFT);
1359 	return (y);
1360 }
1361 
1362 static __inline u_int64_t
1363 seg_y2x(u_int64_t y, u_int64_t ism)
1364 {
1365 	u_int64_t x;
1366 
1367 	if (y == 0)
1368 		x = 0;
1369 	else if (ism == HT_INFINITY)
1370 		x = HT_INFINITY;
1371 	else {
1372 		x = (y >> ISM_SHIFT) * ism
1373 		    + (((y & ISM_MASK) * ism) >> ISM_SHIFT);
1374 	}
1375 	return (x);
1376 }
1377 
1378 static __inline u_int64_t
1379 m2sm(u_int64_t m)
1380 {
1381 	u_int64_t sm;
1382 
1383 	sm = (m << SM_SHIFT) / 8 / machclk_freq;
1384 	return (sm);
1385 }
1386 
1387 static __inline u_int64_t
1388 m2ism(u_int64_t m)
1389 {
1390 	u_int64_t ism;
1391 
1392 	if (m == 0)
1393 		ism = HT_INFINITY;
1394 	else
1395 		ism = ((u_int64_t)machclk_freq << ISM_SHIFT) * 8 / m;
1396 	return (ism);
1397 }
1398 
1399 static __inline u_int64_t
1400 d2dx(u_int d)
1401 {
1402 	u_int64_t dx;
1403 
1404 	dx = ((u_int64_t)d * machclk_freq) / 1000;
1405 	return (dx);
1406 }
1407 
1408 static u_int64_t
1409 sm2m(u_int64_t sm)
1410 {
1411 	u_int64_t m;
1412 
1413 	m = (sm * 8 * machclk_freq) >> SM_SHIFT;
1414 	return (m);
1415 }
1416 
1417 static u_int
1418 dx2d(u_int64_t dx)
1419 {
1420 	u_int64_t d;
1421 
1422 	d = dx * 1000 / machclk_freq;
1423 	return ((u_int)d);
1424 }
1425 
1426 static void
1427 sc2isc(struct service_curve *sc, struct internal_sc *isc)
1428 {
1429 	isc->sm1 = m2sm(sc->m1);
1430 	isc->ism1 = m2ism(sc->m1);
1431 	isc->dx = d2dx(sc->d);
1432 	isc->dy = seg_x2y(isc->dx, isc->sm1);
1433 	isc->sm2 = m2sm(sc->m2);
1434 	isc->ism2 = m2ism(sc->m2);
1435 }
1436 
1437 /*
1438  * initialize the runtime service curve with the given internal
1439  * service curve starting at (x, y).
1440  */
1441 static void
1442 rtsc_init(struct runtime_sc *rtsc, struct internal_sc * isc, u_int64_t x,
1443     u_int64_t y)
1444 {
1445 	rtsc->x =	x;
1446 	rtsc->y =	y;
1447 	rtsc->sm1 =	isc->sm1;
1448 	rtsc->ism1 =	isc->ism1;
1449 	rtsc->dx =	isc->dx;
1450 	rtsc->dy =	isc->dy;
1451 	rtsc->sm2 =	isc->sm2;
1452 	rtsc->ism2 =	isc->ism2;
1453 }
1454 
1455 /*
1456  * calculate the y-projection of the runtime service curve by the
1457  * given x-projection value
1458  */
1459 static u_int64_t
1460 rtsc_y2x(struct runtime_sc *rtsc, u_int64_t y)
1461 {
1462 	u_int64_t	x;
1463 
1464 	if (y < rtsc->y)
1465 		x = rtsc->x;
1466 	else if (y <= rtsc->y + rtsc->dy) {
1467 		/* x belongs to the 1st segment */
1468 		if (rtsc->dy == 0)
1469 			x = rtsc->x + rtsc->dx;
1470 		else
1471 			x = rtsc->x + seg_y2x(y - rtsc->y, rtsc->ism1);
1472 	} else {
1473 		/* x belongs to the 2nd segment */
1474 		x = rtsc->x + rtsc->dx
1475 		    + seg_y2x(y - rtsc->y - rtsc->dy, rtsc->ism2);
1476 	}
1477 	return (x);
1478 }
1479 
1480 static u_int64_t
1481 rtsc_x2y(struct runtime_sc *rtsc, u_int64_t x)
1482 {
1483 	u_int64_t	y;
1484 
1485 	if (x <= rtsc->x)
1486 		y = rtsc->y;
1487 	else if (x <= rtsc->x + rtsc->dx)
1488 		/* y belongs to the 1st segment */
1489 		y = rtsc->y + seg_x2y(x - rtsc->x, rtsc->sm1);
1490 	else
1491 		/* y belongs to the 2nd segment */
1492 		y = rtsc->y + rtsc->dy
1493 		    + seg_x2y(x - rtsc->x - rtsc->dx, rtsc->sm2);
1494 	return (y);
1495 }
1496 
1497 /*
1498  * update the runtime service curve by taking the minimum of the current
1499  * runtime service curve and the service curve starting at (x, y).
1500  */
1501 static void
1502 rtsc_min(struct runtime_sc *rtsc, struct internal_sc *isc, u_int64_t x,
1503     u_int64_t y)
1504 {
1505 	u_int64_t	y1, y2, dx, dy;
1506 
1507 	if (isc->sm1 <= isc->sm2) {
1508 		/* service curve is convex */
1509 		y1 = rtsc_x2y(rtsc, x);
1510 		if (y1 < y)
1511 			/* the current rtsc is smaller */
1512 			return;
1513 		rtsc->x = x;
1514 		rtsc->y = y;
1515 		return;
1516 	}
1517 
1518 	/*
1519 	 * service curve is concave
1520 	 * compute the two y values of the current rtsc
1521 	 *	y1: at x
1522 	 *	y2: at (x + dx)
1523 	 */
1524 	y1 = rtsc_x2y(rtsc, x);
1525 	if (y1 <= y) {
1526 		/* rtsc is below isc, no change to rtsc */
1527 		return;
1528 	}
1529 
1530 	y2 = rtsc_x2y(rtsc, x + isc->dx);
1531 	if (y2 >= y + isc->dy) {
1532 		/* rtsc is above isc, replace rtsc by isc */
1533 		rtsc->x = x;
1534 		rtsc->y = y;
1535 		rtsc->dx = isc->dx;
1536 		rtsc->dy = isc->dy;
1537 		return;
1538 	}
1539 
1540 	/*
1541 	 * the two curves intersect
1542 	 * compute the offsets (dx, dy) using the reverse
1543 	 * function of seg_x2y()
1544 	 *	seg_x2y(dx, sm1) == seg_x2y(dx, sm2) + (y1 - y)
1545 	 */
1546 	dx = ((y1 - y) << SM_SHIFT) / (isc->sm1 - isc->sm2);
1547 	/*
1548 	 * check if (x, y1) belongs to the 1st segment of rtsc.
1549 	 * if so, add the offset.
1550 	 */
1551 	if (rtsc->x + rtsc->dx > x)
1552 		dx += rtsc->x + rtsc->dx - x;
1553 	dy = seg_x2y(dx, isc->sm1);
1554 
1555 	rtsc->x = x;
1556 	rtsc->y = y;
1557 	rtsc->dx = dx;
1558 	rtsc->dy = dy;
1559 	return;
1560 }
1561 
1562 static void
1563 get_class_stats_v0(struct hfsc_classstats_v0 *sp, struct hfsc_class *cl)
1564 {
1565 	sp->class_id = cl->cl_id;
1566 	sp->class_handle = cl->cl_handle;
1567 
1568 #define SATU32(x)	(u_int32_t)uqmin((x), UINT_MAX)
1569 
1570 	if (cl->cl_rsc != NULL) {
1571 		sp->rsc.m1 = SATU32(sm2m(cl->cl_rsc->sm1));
1572 		sp->rsc.d = dx2d(cl->cl_rsc->dx);
1573 		sp->rsc.m2 = SATU32(sm2m(cl->cl_rsc->sm2));
1574 	} else {
1575 		sp->rsc.m1 = 0;
1576 		sp->rsc.d = 0;
1577 		sp->rsc.m2 = 0;
1578 	}
1579 	if (cl->cl_fsc != NULL) {
1580 		sp->fsc.m1 = SATU32(sm2m(cl->cl_fsc->sm1));
1581 		sp->fsc.d = dx2d(cl->cl_fsc->dx);
1582 		sp->fsc.m2 = SATU32(sm2m(cl->cl_fsc->sm2));
1583 	} else {
1584 		sp->fsc.m1 = 0;
1585 		sp->fsc.d = 0;
1586 		sp->fsc.m2 = 0;
1587 	}
1588 	if (cl->cl_usc != NULL) {
1589 		sp->usc.m1 = SATU32(sm2m(cl->cl_usc->sm1));
1590 		sp->usc.d = dx2d(cl->cl_usc->dx);
1591 		sp->usc.m2 = SATU32(sm2m(cl->cl_usc->sm2));
1592 	} else {
1593 		sp->usc.m1 = 0;
1594 		sp->usc.d = 0;
1595 		sp->usc.m2 = 0;
1596 	}
1597 
1598 #undef SATU32
1599 
1600 	sp->total = cl->cl_total;
1601 	sp->cumul = cl->cl_cumul;
1602 
1603 	sp->d = cl->cl_d;
1604 	sp->e = cl->cl_e;
1605 	sp->vt = cl->cl_vt;
1606 	sp->f = cl->cl_f;
1607 
1608 	sp->initvt = cl->cl_initvt;
1609 	sp->vtperiod = cl->cl_vtperiod;
1610 	sp->parentperiod = cl->cl_parentperiod;
1611 	sp->nactive = cl->cl_nactive;
1612 	sp->vtoff = cl->cl_vtoff;
1613 	sp->cvtmax = cl->cl_cvtmax;
1614 	sp->myf = cl->cl_myf;
1615 	sp->cfmin = cl->cl_cfmin;
1616 	sp->cvtmin = cl->cl_cvtmin;
1617 	sp->myfadj = cl->cl_myfadj;
1618 	sp->vtadj = cl->cl_vtadj;
1619 
1620 	sp->cur_time = read_machclk();
1621 	sp->machclk_freq = machclk_freq;
1622 
1623 	sp->qlength = qlen(cl->cl_q);
1624 	sp->qlimit = qlimit(cl->cl_q);
1625 	sp->xmit_cnt = cl->cl_stats.xmit_cnt;
1626 	sp->drop_cnt = cl->cl_stats.drop_cnt;
1627 	sp->period = cl->cl_stats.period;
1628 
1629 	sp->qtype = qtype(cl->cl_q);
1630 #ifdef ALTQ_RED
1631 	if (q_is_red(cl->cl_q))
1632 		red_getstats(cl->cl_red, &sp->red[0]);
1633 #endif
1634 #ifdef ALTQ_RIO
1635 	if (q_is_rio(cl->cl_q))
1636 		rio_getstats((rio_t *)cl->cl_red, &sp->red[0]);
1637 #endif
1638 #ifdef ALTQ_CODEL
1639 	if (q_is_codel(cl->cl_q))
1640 		codel_getstats(cl->cl_codel, &sp->codel);
1641 #endif
1642 }
1643 
1644 static void
1645 get_class_stats_v1(struct hfsc_classstats_v1 *sp, struct hfsc_class *cl)
1646 {
1647 	sp->class_id = cl->cl_id;
1648 	sp->class_handle = cl->cl_handle;
1649 
1650 	if (cl->cl_rsc != NULL) {
1651 		sp->rsc.m1 = sm2m(cl->cl_rsc->sm1);
1652 		sp->rsc.d = dx2d(cl->cl_rsc->dx);
1653 		sp->rsc.m2 = sm2m(cl->cl_rsc->sm2);
1654 	} else {
1655 		sp->rsc.m1 = 0;
1656 		sp->rsc.d = 0;
1657 		sp->rsc.m2 = 0;
1658 	}
1659 	if (cl->cl_fsc != NULL) {
1660 		sp->fsc.m1 = sm2m(cl->cl_fsc->sm1);
1661 		sp->fsc.d = dx2d(cl->cl_fsc->dx);
1662 		sp->fsc.m2 = sm2m(cl->cl_fsc->sm2);
1663 	} else {
1664 		sp->fsc.m1 = 0;
1665 		sp->fsc.d = 0;
1666 		sp->fsc.m2 = 0;
1667 	}
1668 	if (cl->cl_usc != NULL) {
1669 		sp->usc.m1 = sm2m(cl->cl_usc->sm1);
1670 		sp->usc.d = dx2d(cl->cl_usc->dx);
1671 		sp->usc.m2 = sm2m(cl->cl_usc->sm2);
1672 	} else {
1673 		sp->usc.m1 = 0;
1674 		sp->usc.d = 0;
1675 		sp->usc.m2 = 0;
1676 	}
1677 
1678 	sp->total = cl->cl_total;
1679 	sp->cumul = cl->cl_cumul;
1680 
1681 	sp->d = cl->cl_d;
1682 	sp->e = cl->cl_e;
1683 	sp->vt = cl->cl_vt;
1684 	sp->f = cl->cl_f;
1685 
1686 	sp->initvt = cl->cl_initvt;
1687 	sp->vtperiod = cl->cl_vtperiod;
1688 	sp->parentperiod = cl->cl_parentperiod;
1689 	sp->nactive = cl->cl_nactive;
1690 	sp->vtoff = cl->cl_vtoff;
1691 	sp->cvtmax = cl->cl_cvtmax;
1692 	sp->myf = cl->cl_myf;
1693 	sp->cfmin = cl->cl_cfmin;
1694 	sp->cvtmin = cl->cl_cvtmin;
1695 	sp->myfadj = cl->cl_myfadj;
1696 	sp->vtadj = cl->cl_vtadj;
1697 
1698 	sp->cur_time = read_machclk();
1699 	sp->machclk_freq = machclk_freq;
1700 
1701 	sp->qlength = qlen(cl->cl_q);
1702 	sp->qlimit = qlimit(cl->cl_q);
1703 	sp->xmit_cnt = cl->cl_stats.xmit_cnt;
1704 	sp->drop_cnt = cl->cl_stats.drop_cnt;
1705 	sp->period = cl->cl_stats.period;
1706 
1707 	sp->qtype = qtype(cl->cl_q);
1708 #ifdef ALTQ_RED
1709 	if (q_is_red(cl->cl_q))
1710 		red_getstats(cl->cl_red, &sp->red[0]);
1711 #endif
1712 #ifdef ALTQ_RIO
1713 	if (q_is_rio(cl->cl_q))
1714 		rio_getstats((rio_t *)cl->cl_red, &sp->red[0]);
1715 #endif
1716 #ifdef ALTQ_CODEL
1717 	if (q_is_codel(cl->cl_q))
1718 		codel_getstats(cl->cl_codel, &sp->codel);
1719 #endif
1720 }
1721 
1722 /* convert a class handle to the corresponding class pointer */
1723 static struct hfsc_class *
1724 clh_to_clp(struct hfsc_if *hif, u_int32_t chandle)
1725 {
1726 	int i;
1727 	struct hfsc_class *cl;
1728 
1729 	if (chandle == 0)
1730 		return (NULL);
1731 	/*
1732 	 * first, try optimistically the slot matching the lower bits of
1733 	 * the handle.  if it fails, do the linear table search.
1734 	 */
1735 	i = chandle % HFSC_MAX_CLASSES;
1736 	if ((cl = hif->hif_class_tbl[i]) != NULL && cl->cl_handle == chandle)
1737 		return (cl);
1738 	for (i = 0; i < HFSC_MAX_CLASSES; i++)
1739 		if ((cl = hif->hif_class_tbl[i]) != NULL &&
1740 		    cl->cl_handle == chandle)
1741 			return (cl);
1742 	return (NULL);
1743 }
1744 
1745 
1746 #endif /* ALTQ_HFSC */
1747