xref: /freebsd/sys/net/altq/altq_subr.c (revision b339ef955c65fd672f7e3dd39f22c8f946d09f3e)
1 /*-
2  * Copyright (C) 1997-2003
3  *	Sony Computer Science Laboratories Inc.  All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  *
26  * $KAME: altq_subr.c,v 1.21 2003/11/06 06:32:53 kjc Exp $
27  * $FreeBSD$
28  */
29 
30 #include "opt_altq.h"
31 #include "opt_inet.h"
32 #include "opt_inet6.h"
33 
34 #include <sys/param.h>
35 #include <sys/malloc.h>
36 #include <sys/mbuf.h>
37 #include <sys/systm.h>
38 #include <sys/proc.h>
39 #include <sys/socket.h>
40 #include <sys/socketvar.h>
41 #include <sys/kernel.h>
42 #include <sys/errno.h>
43 #include <sys/syslog.h>
44 #include <sys/sysctl.h>
45 #include <sys/queue.h>
46 
47 #include <net/if.h>
48 #include <net/if_var.h>
49 #include <net/if_dl.h>
50 #include <net/if_types.h>
51 #include <net/vnet.h>
52 
53 #include <netinet/in.h>
54 #include <netinet/in_systm.h>
55 #include <netinet/ip.h>
56 #ifdef INET6
57 #include <netinet/ip6.h>
58 #endif
59 #include <netinet/tcp.h>
60 #include <netinet/udp.h>
61 
62 #include <netpfil/pf/pf.h>
63 #include <netpfil/pf/pf_altq.h>
64 #include <net/altq/altq.h>
65 #ifdef ALTQ3_COMPAT
66 #include <net/altq/altq_conf.h>
67 #endif
68 
69 /* machine dependent clock related includes */
70 #include <sys/bus.h>
71 #include <sys/cpu.h>
72 #include <sys/eventhandler.h>
73 #include <machine/clock.h>
74 #if defined(__amd64__) || defined(__i386__)
75 #include <machine/cpufunc.h>		/* for pentium tsc */
76 #include <machine/specialreg.h>		/* for CPUID_TSC */
77 #include <machine/md_var.h>		/* for cpu_feature */
78 #endif /* __amd64 || __i386__ */
79 
80 /*
81  * internal function prototypes
82  */
83 static void	tbr_timeout(void *);
84 int (*altq_input)(struct mbuf *, int) = NULL;
85 static struct mbuf *tbr_dequeue(struct ifaltq *, int);
86 static int tbr_timer = 0;	/* token bucket regulator timer */
87 #if !defined(__FreeBSD__) || (__FreeBSD_version < 600000)
88 static struct callout tbr_callout = CALLOUT_INITIALIZER;
89 #else
90 static struct callout tbr_callout;
91 #endif
92 
93 #ifdef ALTQ3_CLFIER_COMPAT
94 static int 	extract_ports4(struct mbuf *, struct ip *, struct flowinfo_in *);
95 #ifdef INET6
96 static int 	extract_ports6(struct mbuf *, struct ip6_hdr *,
97 			       struct flowinfo_in6 *);
98 #endif
99 static int	apply_filter4(u_int32_t, struct flow_filter *,
100 			      struct flowinfo_in *);
101 static int	apply_ppfilter4(u_int32_t, struct flow_filter *,
102 				struct flowinfo_in *);
103 #ifdef INET6
104 static int	apply_filter6(u_int32_t, struct flow_filter6 *,
105 			      struct flowinfo_in6 *);
106 #endif
107 static int	apply_tosfilter4(u_int32_t, struct flow_filter *,
108 				 struct flowinfo_in *);
109 static u_long	get_filt_handle(struct acc_classifier *, int);
110 static struct acc_filter *filth_to_filtp(struct acc_classifier *, u_long);
111 static u_int32_t filt2fibmask(struct flow_filter *);
112 
113 static void 	ip4f_cache(struct ip *, struct flowinfo_in *);
114 static int 	ip4f_lookup(struct ip *, struct flowinfo_in *);
115 static int 	ip4f_init(void);
116 static struct ip4_frag	*ip4f_alloc(void);
117 static void 	ip4f_free(struct ip4_frag *);
118 #endif /* ALTQ3_CLFIER_COMPAT */
119 
120 /*
121  * alternate queueing support routines
122  */
123 
124 /* look up the queue state by the interface name and the queueing type. */
125 void *
126 altq_lookup(name, type)
127 	char *name;
128 	int type;
129 {
130 	struct ifnet *ifp;
131 
132 	if ((ifp = ifunit(name)) != NULL) {
133 		/* read if_snd unlocked */
134 		if (type != ALTQT_NONE && ifp->if_snd.altq_type == type)
135 			return (ifp->if_snd.altq_disc);
136 	}
137 
138 	return NULL;
139 }
140 
141 int
142 altq_attach(ifq, type, discipline, enqueue, dequeue, request, clfier, classify)
143 	struct ifaltq *ifq;
144 	int type;
145 	void *discipline;
146 	int (*enqueue)(struct ifaltq *, struct mbuf *, struct altq_pktattr *);
147 	struct mbuf *(*dequeue)(struct ifaltq *, int);
148 	int (*request)(struct ifaltq *, int, void *);
149 	void *clfier;
150 	void *(*classify)(void *, struct mbuf *, int);
151 {
152 	IFQ_LOCK(ifq);
153 	if (!ALTQ_IS_READY(ifq)) {
154 		IFQ_UNLOCK(ifq);
155 		return ENXIO;
156 	}
157 
158 #ifdef ALTQ3_COMPAT
159 	/*
160 	 * pfaltq can override the existing discipline, but altq3 cannot.
161 	 * check these if clfier is not NULL (which implies altq3).
162 	 */
163 	if (clfier != NULL) {
164 		if (ALTQ_IS_ENABLED(ifq)) {
165 			IFQ_UNLOCK(ifq);
166 			return EBUSY;
167 		}
168 		if (ALTQ_IS_ATTACHED(ifq)) {
169 			IFQ_UNLOCK(ifq);
170 			return EEXIST;
171 		}
172 	}
173 #endif
174 	ifq->altq_type     = type;
175 	ifq->altq_disc     = discipline;
176 	ifq->altq_enqueue  = enqueue;
177 	ifq->altq_dequeue  = dequeue;
178 	ifq->altq_request  = request;
179 	ifq->altq_clfier   = clfier;
180 	ifq->altq_classify = classify;
181 	ifq->altq_flags &= (ALTQF_CANTCHANGE|ALTQF_ENABLED);
182 #ifdef ALTQ3_COMPAT
183 #ifdef ALTQ_KLD
184 	altq_module_incref(type);
185 #endif
186 #endif
187 	IFQ_UNLOCK(ifq);
188 	return 0;
189 }
190 
191 int
192 altq_detach(ifq)
193 	struct ifaltq *ifq;
194 {
195 	IFQ_LOCK(ifq);
196 
197 	if (!ALTQ_IS_READY(ifq)) {
198 		IFQ_UNLOCK(ifq);
199 		return ENXIO;
200 	}
201 	if (ALTQ_IS_ENABLED(ifq)) {
202 		IFQ_UNLOCK(ifq);
203 		return EBUSY;
204 	}
205 	if (!ALTQ_IS_ATTACHED(ifq)) {
206 		IFQ_UNLOCK(ifq);
207 		return (0);
208 	}
209 #ifdef ALTQ3_COMPAT
210 #ifdef ALTQ_KLD
211 	altq_module_declref(ifq->altq_type);
212 #endif
213 #endif
214 
215 	ifq->altq_type     = ALTQT_NONE;
216 	ifq->altq_disc     = NULL;
217 	ifq->altq_enqueue  = NULL;
218 	ifq->altq_dequeue  = NULL;
219 	ifq->altq_request  = NULL;
220 	ifq->altq_clfier   = NULL;
221 	ifq->altq_classify = NULL;
222 	ifq->altq_flags &= ALTQF_CANTCHANGE;
223 
224 	IFQ_UNLOCK(ifq);
225 	return 0;
226 }
227 
228 int
229 altq_enable(ifq)
230 	struct ifaltq *ifq;
231 {
232 	int s;
233 
234 	IFQ_LOCK(ifq);
235 
236 	if (!ALTQ_IS_READY(ifq)) {
237 		IFQ_UNLOCK(ifq);
238 		return ENXIO;
239 	}
240 	if (ALTQ_IS_ENABLED(ifq)) {
241 		IFQ_UNLOCK(ifq);
242 		return 0;
243 	}
244 
245 	s = splnet();
246 	IFQ_PURGE_NOLOCK(ifq);
247 	ASSERT(ifq->ifq_len == 0);
248 	ifq->ifq_drv_maxlen = 0;		/* disable bulk dequeue */
249 	ifq->altq_flags |= ALTQF_ENABLED;
250 	if (ifq->altq_clfier != NULL)
251 		ifq->altq_flags |= ALTQF_CLASSIFY;
252 	splx(s);
253 
254 	IFQ_UNLOCK(ifq);
255 	return 0;
256 }
257 
258 int
259 altq_disable(ifq)
260 	struct ifaltq *ifq;
261 {
262 	int s;
263 
264 	IFQ_LOCK(ifq);
265 	if (!ALTQ_IS_ENABLED(ifq)) {
266 		IFQ_UNLOCK(ifq);
267 		return 0;
268 	}
269 
270 	s = splnet();
271 	IFQ_PURGE_NOLOCK(ifq);
272 	ASSERT(ifq->ifq_len == 0);
273 	ifq->altq_flags &= ~(ALTQF_ENABLED|ALTQF_CLASSIFY);
274 	splx(s);
275 
276 	IFQ_UNLOCK(ifq);
277 	return 0;
278 }
279 
280 #ifdef ALTQ_DEBUG
281 void
282 altq_assert(file, line, failedexpr)
283 	const char *file, *failedexpr;
284 	int line;
285 {
286 	(void)printf("altq assertion \"%s\" failed: file \"%s\", line %d\n",
287 		     failedexpr, file, line);
288 	panic("altq assertion");
289 	/* NOTREACHED */
290 }
291 #endif
292 
293 /*
294  * internal representation of token bucket parameters
295  *	rate:	byte_per_unittime << 32
296  *		(((bits_per_sec) / 8) << 32) / machclk_freq
297  *	depth:	byte << 32
298  *
299  */
300 #define	TBR_SHIFT	32
301 #define	TBR_SCALE(x)	((int64_t)(x) << TBR_SHIFT)
302 #define	TBR_UNSCALE(x)	((x) >> TBR_SHIFT)
303 
304 static struct mbuf *
305 tbr_dequeue(ifq, op)
306 	struct ifaltq *ifq;
307 	int op;
308 {
309 	struct tb_regulator *tbr;
310 	struct mbuf *m;
311 	int64_t interval;
312 	u_int64_t now;
313 
314 	IFQ_LOCK_ASSERT(ifq);
315 	tbr = ifq->altq_tbr;
316 	if (op == ALTDQ_REMOVE && tbr->tbr_lastop == ALTDQ_POLL) {
317 		/* if this is a remove after poll, bypass tbr check */
318 	} else {
319 		/* update token only when it is negative */
320 		if (tbr->tbr_token <= 0) {
321 			now = read_machclk();
322 			interval = now - tbr->tbr_last;
323 			if (interval >= tbr->tbr_filluptime)
324 				tbr->tbr_token = tbr->tbr_depth;
325 			else {
326 				tbr->tbr_token += interval * tbr->tbr_rate;
327 				if (tbr->tbr_token > tbr->tbr_depth)
328 					tbr->tbr_token = tbr->tbr_depth;
329 			}
330 			tbr->tbr_last = now;
331 		}
332 		/* if token is still negative, don't allow dequeue */
333 		if (tbr->tbr_token <= 0)
334 			return (NULL);
335 	}
336 
337 	if (ALTQ_IS_ENABLED(ifq))
338 		m = (*ifq->altq_dequeue)(ifq, op);
339 	else {
340 		if (op == ALTDQ_POLL)
341 			_IF_POLL(ifq, m);
342 		else
343 			_IF_DEQUEUE(ifq, m);
344 	}
345 
346 	if (m != NULL && op == ALTDQ_REMOVE)
347 		tbr->tbr_token -= TBR_SCALE(m_pktlen(m));
348 	tbr->tbr_lastop = op;
349 	return (m);
350 }
351 
352 /*
353  * set a token bucket regulator.
354  * if the specified rate is zero, the token bucket regulator is deleted.
355  */
356 int
357 tbr_set(ifq, profile)
358 	struct ifaltq *ifq;
359 	struct tb_profile *profile;
360 {
361 	struct tb_regulator *tbr, *otbr;
362 
363 	if (tbr_dequeue_ptr == NULL)
364 		tbr_dequeue_ptr = tbr_dequeue;
365 
366 	if (machclk_freq == 0)
367 		init_machclk();
368 	if (machclk_freq == 0) {
369 		printf("tbr_set: no cpu clock available!\n");
370 		return (ENXIO);
371 	}
372 
373 	IFQ_LOCK(ifq);
374 	if (profile->rate == 0) {
375 		/* delete this tbr */
376 		if ((tbr = ifq->altq_tbr) == NULL) {
377 			IFQ_UNLOCK(ifq);
378 			return (ENOENT);
379 		}
380 		ifq->altq_tbr = NULL;
381 		free(tbr, M_DEVBUF);
382 		IFQ_UNLOCK(ifq);
383 		return (0);
384 	}
385 
386 	tbr = malloc(sizeof(struct tb_regulator), M_DEVBUF, M_NOWAIT | M_ZERO);
387 	if (tbr == NULL) {
388 		IFQ_UNLOCK(ifq);
389 		return (ENOMEM);
390 	}
391 
392 	tbr->tbr_rate = TBR_SCALE(profile->rate / 8) / machclk_freq;
393 	tbr->tbr_depth = TBR_SCALE(profile->depth);
394 	if (tbr->tbr_rate > 0)
395 		tbr->tbr_filluptime = tbr->tbr_depth / tbr->tbr_rate;
396 	else
397 		tbr->tbr_filluptime = 0xffffffffffffffffLL;
398 	tbr->tbr_token = tbr->tbr_depth;
399 	tbr->tbr_last = read_machclk();
400 	tbr->tbr_lastop = ALTDQ_REMOVE;
401 
402 	otbr = ifq->altq_tbr;
403 	ifq->altq_tbr = tbr;	/* set the new tbr */
404 
405 	if (otbr != NULL)
406 		free(otbr, M_DEVBUF);
407 	else {
408 		if (tbr_timer == 0) {
409 			CALLOUT_RESET(&tbr_callout, 1, tbr_timeout, (void *)0);
410 			tbr_timer = 1;
411 		}
412 	}
413 	IFQ_UNLOCK(ifq);
414 	return (0);
415 }
416 
417 /*
418  * tbr_timeout goes through the interface list, and kicks the drivers
419  * if necessary.
420  *
421  * MPSAFE
422  */
423 static void
424 tbr_timeout(arg)
425 	void *arg;
426 {
427 	VNET_ITERATOR_DECL(vnet_iter);
428 	struct ifnet *ifp;
429 	int active, s;
430 
431 	active = 0;
432 	s = splnet();
433 	IFNET_RLOCK_NOSLEEP();
434 	VNET_LIST_RLOCK_NOSLEEP();
435 	VNET_FOREACH(vnet_iter) {
436 		CURVNET_SET(vnet_iter);
437 		for (ifp = TAILQ_FIRST(&V_ifnet); ifp;
438 		    ifp = TAILQ_NEXT(ifp, if_list)) {
439 			/* read from if_snd unlocked */
440 			if (!TBR_IS_ENABLED(&ifp->if_snd))
441 				continue;
442 			active++;
443 			if (!IFQ_IS_EMPTY(&ifp->if_snd) &&
444 			    ifp->if_start != NULL)
445 				(*ifp->if_start)(ifp);
446 		}
447 		CURVNET_RESTORE();
448 	}
449 	VNET_LIST_RUNLOCK_NOSLEEP();
450 	IFNET_RUNLOCK_NOSLEEP();
451 	splx(s);
452 	if (active > 0)
453 		CALLOUT_RESET(&tbr_callout, 1, tbr_timeout, (void *)0);
454 	else
455 		tbr_timer = 0;	/* don't need tbr_timer anymore */
456 }
457 
458 /*
459  * get token bucket regulator profile
460  */
461 int
462 tbr_get(ifq, profile)
463 	struct ifaltq *ifq;
464 	struct tb_profile *profile;
465 {
466 	struct tb_regulator *tbr;
467 
468 	IFQ_LOCK(ifq);
469 	if ((tbr = ifq->altq_tbr) == NULL) {
470 		profile->rate = 0;
471 		profile->depth = 0;
472 	} else {
473 		profile->rate =
474 		    (u_int)TBR_UNSCALE(tbr->tbr_rate * 8 * machclk_freq);
475 		profile->depth = (u_int)TBR_UNSCALE(tbr->tbr_depth);
476 	}
477 	IFQ_UNLOCK(ifq);
478 	return (0);
479 }
480 
481 /*
482  * attach a discipline to the interface.  if one already exists, it is
483  * overridden.
484  * Locking is done in the discipline specific attach functions. Basically
485  * they call back to altq_attach which takes care of the attach and locking.
486  */
487 int
488 altq_pfattach(struct pf_altq *a)
489 {
490 	int error = 0;
491 
492 	switch (a->scheduler) {
493 	case ALTQT_NONE:
494 		break;
495 #ifdef ALTQ_CBQ
496 	case ALTQT_CBQ:
497 		error = cbq_pfattach(a);
498 		break;
499 #endif
500 #ifdef ALTQ_PRIQ
501 	case ALTQT_PRIQ:
502 		error = priq_pfattach(a);
503 		break;
504 #endif
505 #ifdef ALTQ_HFSC
506 	case ALTQT_HFSC:
507 		error = hfsc_pfattach(a);
508 		break;
509 #endif
510 #ifdef ALTQ_FAIRQ
511 	case ALTQT_FAIRQ:
512 		error = fairq_pfattach(a);
513 		break;
514 #endif
515 	default:
516 		error = ENXIO;
517 	}
518 
519 	return (error);
520 }
521 
522 /*
523  * detach a discipline from the interface.
524  * it is possible that the discipline was already overridden by another
525  * discipline.
526  */
527 int
528 altq_pfdetach(struct pf_altq *a)
529 {
530 	struct ifnet *ifp;
531 	int s, error = 0;
532 
533 	if ((ifp = ifunit(a->ifname)) == NULL)
534 		return (EINVAL);
535 
536 	/* if this discipline is no longer referenced, just return */
537 	/* read unlocked from if_snd */
538 	if (a->altq_disc == NULL || a->altq_disc != ifp->if_snd.altq_disc)
539 		return (0);
540 
541 	s = splnet();
542 	/* read unlocked from if_snd, _disable and _detach take care */
543 	if (ALTQ_IS_ENABLED(&ifp->if_snd))
544 		error = altq_disable(&ifp->if_snd);
545 	if (error == 0)
546 		error = altq_detach(&ifp->if_snd);
547 	splx(s);
548 
549 	return (error);
550 }
551 
552 /*
553  * add a discipline or a queue
554  * Locking is done in the discipline specific functions with regards to
555  * malloc with WAITOK, also it is not yet clear which lock to use.
556  */
557 int
558 altq_add(struct pf_altq *a)
559 {
560 	int error = 0;
561 
562 	if (a->qname[0] != 0)
563 		return (altq_add_queue(a));
564 
565 	if (machclk_freq == 0)
566 		init_machclk();
567 	if (machclk_freq == 0)
568 		panic("altq_add: no cpu clock");
569 
570 	switch (a->scheduler) {
571 #ifdef ALTQ_CBQ
572 	case ALTQT_CBQ:
573 		error = cbq_add_altq(a);
574 		break;
575 #endif
576 #ifdef ALTQ_PRIQ
577 	case ALTQT_PRIQ:
578 		error = priq_add_altq(a);
579 		break;
580 #endif
581 #ifdef ALTQ_HFSC
582 	case ALTQT_HFSC:
583 		error = hfsc_add_altq(a);
584 		break;
585 #endif
586 #ifdef ALTQ_FAIRQ
587         case ALTQT_FAIRQ:
588                 error = fairq_add_altq(a);
589                 break;
590 #endif
591 	default:
592 		error = ENXIO;
593 	}
594 
595 	return (error);
596 }
597 
598 /*
599  * remove a discipline or a queue
600  * It is yet unclear what lock to use to protect this operation, the
601  * discipline specific functions will determine and grab it
602  */
603 int
604 altq_remove(struct pf_altq *a)
605 {
606 	int error = 0;
607 
608 	if (a->qname[0] != 0)
609 		return (altq_remove_queue(a));
610 
611 	switch (a->scheduler) {
612 #ifdef ALTQ_CBQ
613 	case ALTQT_CBQ:
614 		error = cbq_remove_altq(a);
615 		break;
616 #endif
617 #ifdef ALTQ_PRIQ
618 	case ALTQT_PRIQ:
619 		error = priq_remove_altq(a);
620 		break;
621 #endif
622 #ifdef ALTQ_HFSC
623 	case ALTQT_HFSC:
624 		error = hfsc_remove_altq(a);
625 		break;
626 #endif
627 #ifdef ALTQ_FAIRQ
628         case ALTQT_FAIRQ:
629                 error = fairq_remove_altq(a);
630                 break;
631 #endif
632 	default:
633 		error = ENXIO;
634 	}
635 
636 	return (error);
637 }
638 
639 /*
640  * add a queue to the discipline
641  * It is yet unclear what lock to use to protect this operation, the
642  * discipline specific functions will determine and grab it
643  */
644 int
645 altq_add_queue(struct pf_altq *a)
646 {
647 	int error = 0;
648 
649 	switch (a->scheduler) {
650 #ifdef ALTQ_CBQ
651 	case ALTQT_CBQ:
652 		error = cbq_add_queue(a);
653 		break;
654 #endif
655 #ifdef ALTQ_PRIQ
656 	case ALTQT_PRIQ:
657 		error = priq_add_queue(a);
658 		break;
659 #endif
660 #ifdef ALTQ_HFSC
661 	case ALTQT_HFSC:
662 		error = hfsc_add_queue(a);
663 		break;
664 #endif
665 #ifdef ALTQ_FAIRQ
666         case ALTQT_FAIRQ:
667                 error = fairq_add_queue(a);
668                 break;
669 #endif
670 	default:
671 		error = ENXIO;
672 	}
673 
674 	return (error);
675 }
676 
677 /*
678  * remove a queue from the discipline
679  * It is yet unclear what lock to use to protect this operation, the
680  * discipline specific functions will determine and grab it
681  */
682 int
683 altq_remove_queue(struct pf_altq *a)
684 {
685 	int error = 0;
686 
687 	switch (a->scheduler) {
688 #ifdef ALTQ_CBQ
689 	case ALTQT_CBQ:
690 		error = cbq_remove_queue(a);
691 		break;
692 #endif
693 #ifdef ALTQ_PRIQ
694 	case ALTQT_PRIQ:
695 		error = priq_remove_queue(a);
696 		break;
697 #endif
698 #ifdef ALTQ_HFSC
699 	case ALTQT_HFSC:
700 		error = hfsc_remove_queue(a);
701 		break;
702 #endif
703 #ifdef ALTQ_FAIRQ
704         case ALTQT_FAIRQ:
705                 error = fairq_remove_queue(a);
706                 break;
707 #endif
708 	default:
709 		error = ENXIO;
710 	}
711 
712 	return (error);
713 }
714 
715 /*
716  * get queue statistics
717  * Locking is done in the discipline specific functions with regards to
718  * copyout operations, also it is not yet clear which lock to use.
719  */
720 int
721 altq_getqstats(struct pf_altq *a, void *ubuf, int *nbytes)
722 {
723 	int error = 0;
724 
725 	switch (a->scheduler) {
726 #ifdef ALTQ_CBQ
727 	case ALTQT_CBQ:
728 		error = cbq_getqstats(a, ubuf, nbytes);
729 		break;
730 #endif
731 #ifdef ALTQ_PRIQ
732 	case ALTQT_PRIQ:
733 		error = priq_getqstats(a, ubuf, nbytes);
734 		break;
735 #endif
736 #ifdef ALTQ_HFSC
737 	case ALTQT_HFSC:
738 		error = hfsc_getqstats(a, ubuf, nbytes);
739 		break;
740 #endif
741 #ifdef ALTQ_FAIRQ
742         case ALTQT_FAIRQ:
743                 error = fairq_getqstats(a, ubuf, nbytes);
744                 break;
745 #endif
746 	default:
747 		error = ENXIO;
748 	}
749 
750 	return (error);
751 }
752 
753 /*
754  * read and write diffserv field in IPv4 or IPv6 header
755  */
756 u_int8_t
757 read_dsfield(m, pktattr)
758 	struct mbuf *m;
759 	struct altq_pktattr *pktattr;
760 {
761 	struct mbuf *m0;
762 	u_int8_t ds_field = 0;
763 
764 	if (pktattr == NULL ||
765 	    (pktattr->pattr_af != AF_INET && pktattr->pattr_af != AF_INET6))
766 		return ((u_int8_t)0);
767 
768 	/* verify that pattr_hdr is within the mbuf data */
769 	for (m0 = m; m0 != NULL; m0 = m0->m_next)
770 		if ((pktattr->pattr_hdr >= m0->m_data) &&
771 		    (pktattr->pattr_hdr < m0->m_data + m0->m_len))
772 			break;
773 	if (m0 == NULL) {
774 		/* ick, pattr_hdr is stale */
775 		pktattr->pattr_af = AF_UNSPEC;
776 #ifdef ALTQ_DEBUG
777 		printf("read_dsfield: can't locate header!\n");
778 #endif
779 		return ((u_int8_t)0);
780 	}
781 
782 	if (pktattr->pattr_af == AF_INET) {
783 		struct ip *ip = (struct ip *)pktattr->pattr_hdr;
784 
785 		if (ip->ip_v != 4)
786 			return ((u_int8_t)0);	/* version mismatch! */
787 		ds_field = ip->ip_tos;
788 	}
789 #ifdef INET6
790 	else if (pktattr->pattr_af == AF_INET6) {
791 		struct ip6_hdr *ip6 = (struct ip6_hdr *)pktattr->pattr_hdr;
792 		u_int32_t flowlabel;
793 
794 		flowlabel = ntohl(ip6->ip6_flow);
795 		if ((flowlabel >> 28) != 6)
796 			return ((u_int8_t)0);	/* version mismatch! */
797 		ds_field = (flowlabel >> 20) & 0xff;
798 	}
799 #endif
800 	return (ds_field);
801 }
802 
803 void
804 write_dsfield(struct mbuf *m, struct altq_pktattr *pktattr, u_int8_t dsfield)
805 {
806 	struct mbuf *m0;
807 
808 	if (pktattr == NULL ||
809 	    (pktattr->pattr_af != AF_INET && pktattr->pattr_af != AF_INET6))
810 		return;
811 
812 	/* verify that pattr_hdr is within the mbuf data */
813 	for (m0 = m; m0 != NULL; m0 = m0->m_next)
814 		if ((pktattr->pattr_hdr >= m0->m_data) &&
815 		    (pktattr->pattr_hdr < m0->m_data + m0->m_len))
816 			break;
817 	if (m0 == NULL) {
818 		/* ick, pattr_hdr is stale */
819 		pktattr->pattr_af = AF_UNSPEC;
820 #ifdef ALTQ_DEBUG
821 		printf("write_dsfield: can't locate header!\n");
822 #endif
823 		return;
824 	}
825 
826 	if (pktattr->pattr_af == AF_INET) {
827 		struct ip *ip = (struct ip *)pktattr->pattr_hdr;
828 		u_int8_t old;
829 		int32_t sum;
830 
831 		if (ip->ip_v != 4)
832 			return;		/* version mismatch! */
833 		old = ip->ip_tos;
834 		dsfield |= old & 3;	/* leave CU bits */
835 		if (old == dsfield)
836 			return;
837 		ip->ip_tos = dsfield;
838 		/*
839 		 * update checksum (from RFC1624)
840 		 *	   HC' = ~(~HC + ~m + m')
841 		 */
842 		sum = ~ntohs(ip->ip_sum) & 0xffff;
843 		sum += 0xff00 + (~old & 0xff) + dsfield;
844 		sum = (sum >> 16) + (sum & 0xffff);
845 		sum += (sum >> 16);  /* add carry */
846 
847 		ip->ip_sum = htons(~sum & 0xffff);
848 	}
849 #ifdef INET6
850 	else if (pktattr->pattr_af == AF_INET6) {
851 		struct ip6_hdr *ip6 = (struct ip6_hdr *)pktattr->pattr_hdr;
852 		u_int32_t flowlabel;
853 
854 		flowlabel = ntohl(ip6->ip6_flow);
855 		if ((flowlabel >> 28) != 6)
856 			return;		/* version mismatch! */
857 		flowlabel = (flowlabel & 0xf03fffff) | (dsfield << 20);
858 		ip6->ip6_flow = htonl(flowlabel);
859 	}
860 #endif
861 	return;
862 }
863 
864 
865 /*
866  * high resolution clock support taking advantage of a machine dependent
867  * high resolution time counter (e.g., timestamp counter of intel pentium).
868  * we assume
869  *  - 64-bit-long monotonically-increasing counter
870  *  - frequency range is 100M-4GHz (CPU speed)
871  */
872 /* if pcc is not available or disabled, emulate 256MHz using microtime() */
873 #define	MACHCLK_SHIFT	8
874 
875 int machclk_usepcc;
876 u_int32_t machclk_freq;
877 u_int32_t machclk_per_tick;
878 
879 #if defined(__i386__) && defined(__NetBSD__)
880 extern u_int64_t cpu_tsc_freq;
881 #endif
882 
883 #if (__FreeBSD_version >= 700035)
884 /* Update TSC freq with the value indicated by the caller. */
885 static void
886 tsc_freq_changed(void *arg, const struct cf_level *level, int status)
887 {
888 	/* If there was an error during the transition, don't do anything. */
889 	if (status != 0)
890 		return;
891 
892 #if (__FreeBSD_version >= 701102) && (defined(__amd64__) || defined(__i386__))
893 	/* If TSC is P-state invariant, don't do anything. */
894 	if (tsc_is_invariant)
895 		return;
896 #endif
897 
898 	/* Total setting for this level gives the new frequency in MHz. */
899 	init_machclk();
900 }
901 EVENTHANDLER_DEFINE(cpufreq_post_change, tsc_freq_changed, NULL,
902     EVENTHANDLER_PRI_LAST);
903 #endif /* __FreeBSD_version >= 700035 */
904 
905 static void
906 init_machclk_setup(void)
907 {
908 #if (__FreeBSD_version >= 600000)
909 	callout_init(&tbr_callout, 0);
910 #endif
911 
912 	machclk_usepcc = 1;
913 
914 #if (!defined(__amd64__) && !defined(__i386__)) || defined(ALTQ_NOPCC)
915 	machclk_usepcc = 0;
916 #endif
917 #if defined(__FreeBSD__) && defined(SMP)
918 	machclk_usepcc = 0;
919 #endif
920 #if defined(__NetBSD__) && defined(MULTIPROCESSOR)
921 	machclk_usepcc = 0;
922 #endif
923 #if defined(__amd64__) || defined(__i386__)
924 	/* check if TSC is available */
925 	if ((cpu_feature & CPUID_TSC) == 0 ||
926 	    atomic_load_acq_64(&tsc_freq) == 0)
927 		machclk_usepcc = 0;
928 #endif
929 }
930 
931 void
932 init_machclk(void)
933 {
934 	static int called;
935 
936 	/* Call one-time initialization function. */
937 	if (!called) {
938 		init_machclk_setup();
939 		called = 1;
940 	}
941 
942 	if (machclk_usepcc == 0) {
943 		/* emulate 256MHz using microtime() */
944 		machclk_freq = 1000000 << MACHCLK_SHIFT;
945 		machclk_per_tick = machclk_freq / hz;
946 #ifdef ALTQ_DEBUG
947 		printf("altq: emulate %uHz cpu clock\n", machclk_freq);
948 #endif
949 		return;
950 	}
951 
952 	/*
953 	 * if the clock frequency (of Pentium TSC or Alpha PCC) is
954 	 * accessible, just use it.
955 	 */
956 #if defined(__amd64__) || defined(__i386__)
957 	machclk_freq = atomic_load_acq_64(&tsc_freq);
958 #endif
959 
960 	/*
961 	 * if we don't know the clock frequency, measure it.
962 	 */
963 	if (machclk_freq == 0) {
964 		static int	wait;
965 		struct timeval	tv_start, tv_end;
966 		u_int64_t	start, end, diff;
967 		int		timo;
968 
969 		microtime(&tv_start);
970 		start = read_machclk();
971 		timo = hz;	/* 1 sec */
972 		(void)tsleep(&wait, PWAIT | PCATCH, "init_machclk", timo);
973 		microtime(&tv_end);
974 		end = read_machclk();
975 		diff = (u_int64_t)(tv_end.tv_sec - tv_start.tv_sec) * 1000000
976 		    + tv_end.tv_usec - tv_start.tv_usec;
977 		if (diff != 0)
978 			machclk_freq = (u_int)((end - start) * 1000000 / diff);
979 	}
980 
981 	machclk_per_tick = machclk_freq / hz;
982 
983 #ifdef ALTQ_DEBUG
984 	printf("altq: CPU clock: %uHz\n", machclk_freq);
985 #endif
986 }
987 
988 #if defined(__OpenBSD__) && defined(__i386__)
989 static __inline u_int64_t
990 rdtsc(void)
991 {
992 	u_int64_t rv;
993 	__asm __volatile(".byte 0x0f, 0x31" : "=A" (rv));
994 	return (rv);
995 }
996 #endif /* __OpenBSD__ && __i386__ */
997 
998 u_int64_t
999 read_machclk(void)
1000 {
1001 	u_int64_t val;
1002 
1003 	if (machclk_usepcc) {
1004 #if defined(__amd64__) || defined(__i386__)
1005 		val = rdtsc();
1006 #else
1007 		panic("read_machclk");
1008 #endif
1009 	} else {
1010 		struct timeval tv;
1011 
1012 		microtime(&tv);
1013 		val = (((u_int64_t)(tv.tv_sec - boottime.tv_sec) * 1000000
1014 		    + tv.tv_usec) << MACHCLK_SHIFT);
1015 	}
1016 	return (val);
1017 }
1018 
1019 #ifdef ALTQ3_CLFIER_COMPAT
1020 
1021 #ifndef IPPROTO_ESP
1022 #define	IPPROTO_ESP	50		/* encapsulating security payload */
1023 #endif
1024 #ifndef IPPROTO_AH
1025 #define	IPPROTO_AH	51		/* authentication header */
1026 #endif
1027 
1028 /*
1029  * extract flow information from a given packet.
1030  * filt_mask shows flowinfo fields required.
1031  * we assume the ip header is in one mbuf, and addresses and ports are
1032  * in network byte order.
1033  */
1034 int
1035 altq_extractflow(m, af, flow, filt_bmask)
1036 	struct mbuf *m;
1037 	int af;
1038 	struct flowinfo *flow;
1039 	u_int32_t	filt_bmask;
1040 {
1041 
1042 	switch (af) {
1043 	case PF_INET: {
1044 		struct flowinfo_in *fin;
1045 		struct ip *ip;
1046 
1047 		ip = mtod(m, struct ip *);
1048 
1049 		if (ip->ip_v != 4)
1050 			break;
1051 
1052 		fin = (struct flowinfo_in *)flow;
1053 		fin->fi_len = sizeof(struct flowinfo_in);
1054 		fin->fi_family = AF_INET;
1055 
1056 		fin->fi_proto = ip->ip_p;
1057 		fin->fi_tos = ip->ip_tos;
1058 
1059 		fin->fi_src.s_addr = ip->ip_src.s_addr;
1060 		fin->fi_dst.s_addr = ip->ip_dst.s_addr;
1061 
1062 		if (filt_bmask & FIMB4_PORTS)
1063 			/* if port info is required, extract port numbers */
1064 			extract_ports4(m, ip, fin);
1065 		else {
1066 			fin->fi_sport = 0;
1067 			fin->fi_dport = 0;
1068 			fin->fi_gpi = 0;
1069 		}
1070 		return (1);
1071 	}
1072 
1073 #ifdef INET6
1074 	case PF_INET6: {
1075 		struct flowinfo_in6 *fin6;
1076 		struct ip6_hdr *ip6;
1077 
1078 		ip6 = mtod(m, struct ip6_hdr *);
1079 		/* should we check the ip version? */
1080 
1081 		fin6 = (struct flowinfo_in6 *)flow;
1082 		fin6->fi6_len = sizeof(struct flowinfo_in6);
1083 		fin6->fi6_family = AF_INET6;
1084 
1085 		fin6->fi6_proto = ip6->ip6_nxt;
1086 		fin6->fi6_tclass   = (ntohl(ip6->ip6_flow) >> 20) & 0xff;
1087 
1088 		fin6->fi6_flowlabel = ip6->ip6_flow & htonl(0x000fffff);
1089 		fin6->fi6_src = ip6->ip6_src;
1090 		fin6->fi6_dst = ip6->ip6_dst;
1091 
1092 		if ((filt_bmask & FIMB6_PORTS) ||
1093 		    ((filt_bmask & FIMB6_PROTO)
1094 		     && ip6->ip6_nxt > IPPROTO_IPV6))
1095 			/*
1096 			 * if port info is required, or proto is required
1097 			 * but there are option headers, extract port
1098 			 * and protocol numbers.
1099 			 */
1100 			extract_ports6(m, ip6, fin6);
1101 		else {
1102 			fin6->fi6_sport = 0;
1103 			fin6->fi6_dport = 0;
1104 			fin6->fi6_gpi = 0;
1105 		}
1106 		return (1);
1107 	}
1108 #endif /* INET6 */
1109 
1110 	default:
1111 		break;
1112 	}
1113 
1114 	/* failed */
1115 	flow->fi_len = sizeof(struct flowinfo);
1116 	flow->fi_family = AF_UNSPEC;
1117 	return (0);
1118 }
1119 
1120 /*
1121  * helper routine to extract port numbers
1122  */
1123 /* structure for ipsec and ipv6 option header template */
1124 struct _opt6 {
1125 	u_int8_t	opt6_nxt;	/* next header */
1126 	u_int8_t	opt6_hlen;	/* header extension length */
1127 	u_int16_t	_pad;
1128 	u_int32_t	ah_spi;		/* security parameter index
1129 					   for authentication header */
1130 };
1131 
1132 /*
1133  * extract port numbers from a ipv4 packet.
1134  */
1135 static int
1136 extract_ports4(m, ip, fin)
1137 	struct mbuf *m;
1138 	struct ip *ip;
1139 	struct flowinfo_in *fin;
1140 {
1141 	struct mbuf *m0;
1142 	u_short ip_off;
1143 	u_int8_t proto;
1144 	int 	off;
1145 
1146 	fin->fi_sport = 0;
1147 	fin->fi_dport = 0;
1148 	fin->fi_gpi = 0;
1149 
1150 	ip_off = ntohs(ip->ip_off);
1151 	/* if it is a fragment, try cached fragment info */
1152 	if (ip_off & IP_OFFMASK) {
1153 		ip4f_lookup(ip, fin);
1154 		return (1);
1155 	}
1156 
1157 	/* locate the mbuf containing the protocol header */
1158 	for (m0 = m; m0 != NULL; m0 = m0->m_next)
1159 		if (((caddr_t)ip >= m0->m_data) &&
1160 		    ((caddr_t)ip < m0->m_data + m0->m_len))
1161 			break;
1162 	if (m0 == NULL) {
1163 #ifdef ALTQ_DEBUG
1164 		printf("extract_ports4: can't locate header! ip=%p\n", ip);
1165 #endif
1166 		return (0);
1167 	}
1168 	off = ((caddr_t)ip - m0->m_data) + (ip->ip_hl << 2);
1169 	proto = ip->ip_p;
1170 
1171 #ifdef ALTQ_IPSEC
1172  again:
1173 #endif
1174 	while (off >= m0->m_len) {
1175 		off -= m0->m_len;
1176 		m0 = m0->m_next;
1177 		if (m0 == NULL)
1178 			return (0);  /* bogus ip_hl! */
1179 	}
1180 	if (m0->m_len < off + 4)
1181 		return (0);
1182 
1183 	switch (proto) {
1184 	case IPPROTO_TCP:
1185 	case IPPROTO_UDP: {
1186 		struct udphdr *udp;
1187 
1188 		udp = (struct udphdr *)(mtod(m0, caddr_t) + off);
1189 		fin->fi_sport = udp->uh_sport;
1190 		fin->fi_dport = udp->uh_dport;
1191 		fin->fi_proto = proto;
1192 		}
1193 		break;
1194 
1195 #ifdef ALTQ_IPSEC
1196 	case IPPROTO_ESP:
1197 		if (fin->fi_gpi == 0){
1198 			u_int32_t *gpi;
1199 
1200 			gpi = (u_int32_t *)(mtod(m0, caddr_t) + off);
1201 			fin->fi_gpi   = *gpi;
1202 		}
1203 		fin->fi_proto = proto;
1204 		break;
1205 
1206 	case IPPROTO_AH: {
1207 			/* get next header and header length */
1208 			struct _opt6 *opt6;
1209 
1210 			opt6 = (struct _opt6 *)(mtod(m0, caddr_t) + off);
1211 			proto = opt6->opt6_nxt;
1212 			off += 8 + (opt6->opt6_hlen * 4);
1213 			if (fin->fi_gpi == 0 && m0->m_len >= off + 8)
1214 				fin->fi_gpi = opt6->ah_spi;
1215 		}
1216 		/* goto the next header */
1217 		goto again;
1218 #endif  /* ALTQ_IPSEC */
1219 
1220 	default:
1221 		fin->fi_proto = proto;
1222 		return (0);
1223 	}
1224 
1225 	/* if this is a first fragment, cache it. */
1226 	if (ip_off & IP_MF)
1227 		ip4f_cache(ip, fin);
1228 
1229 	return (1);
1230 }
1231 
1232 #ifdef INET6
1233 static int
1234 extract_ports6(m, ip6, fin6)
1235 	struct mbuf *m;
1236 	struct ip6_hdr *ip6;
1237 	struct flowinfo_in6 *fin6;
1238 {
1239 	struct mbuf *m0;
1240 	int	off;
1241 	u_int8_t proto;
1242 
1243 	fin6->fi6_gpi   = 0;
1244 	fin6->fi6_sport = 0;
1245 	fin6->fi6_dport = 0;
1246 
1247 	/* locate the mbuf containing the protocol header */
1248 	for (m0 = m; m0 != NULL; m0 = m0->m_next)
1249 		if (((caddr_t)ip6 >= m0->m_data) &&
1250 		    ((caddr_t)ip6 < m0->m_data + m0->m_len))
1251 			break;
1252 	if (m0 == NULL) {
1253 #ifdef ALTQ_DEBUG
1254 		printf("extract_ports6: can't locate header! ip6=%p\n", ip6);
1255 #endif
1256 		return (0);
1257 	}
1258 	off = ((caddr_t)ip6 - m0->m_data) + sizeof(struct ip6_hdr);
1259 
1260 	proto = ip6->ip6_nxt;
1261 	do {
1262 		while (off >= m0->m_len) {
1263 			off -= m0->m_len;
1264 			m0 = m0->m_next;
1265 			if (m0 == NULL)
1266 				return (0);
1267 		}
1268 		if (m0->m_len < off + 4)
1269 			return (0);
1270 
1271 		switch (proto) {
1272 		case IPPROTO_TCP:
1273 		case IPPROTO_UDP: {
1274 			struct udphdr *udp;
1275 
1276 			udp = (struct udphdr *)(mtod(m0, caddr_t) + off);
1277 			fin6->fi6_sport = udp->uh_sport;
1278 			fin6->fi6_dport = udp->uh_dport;
1279 			fin6->fi6_proto = proto;
1280 			}
1281 			return (1);
1282 
1283 		case IPPROTO_ESP:
1284 			if (fin6->fi6_gpi == 0) {
1285 				u_int32_t *gpi;
1286 
1287 				gpi = (u_int32_t *)(mtod(m0, caddr_t) + off);
1288 				fin6->fi6_gpi   = *gpi;
1289 			}
1290 			fin6->fi6_proto = proto;
1291 			return (1);
1292 
1293 		case IPPROTO_AH: {
1294 			/* get next header and header length */
1295 			struct _opt6 *opt6;
1296 
1297 			opt6 = (struct _opt6 *)(mtod(m0, caddr_t) + off);
1298 			if (fin6->fi6_gpi == 0 && m0->m_len >= off + 8)
1299 				fin6->fi6_gpi = opt6->ah_spi;
1300 			proto = opt6->opt6_nxt;
1301 			off += 8 + (opt6->opt6_hlen * 4);
1302 			/* goto the next header */
1303 			break;
1304 			}
1305 
1306 		case IPPROTO_HOPOPTS:
1307 		case IPPROTO_ROUTING:
1308 		case IPPROTO_DSTOPTS: {
1309 			/* get next header and header length */
1310 			struct _opt6 *opt6;
1311 
1312 			opt6 = (struct _opt6 *)(mtod(m0, caddr_t) + off);
1313 			proto = opt6->opt6_nxt;
1314 			off += (opt6->opt6_hlen + 1) * 8;
1315 			/* goto the next header */
1316 			break;
1317 			}
1318 
1319 		case IPPROTO_FRAGMENT:
1320 			/* ipv6 fragmentations are not supported yet */
1321 		default:
1322 			fin6->fi6_proto = proto;
1323 			return (0);
1324 		}
1325 	} while (1);
1326 	/*NOTREACHED*/
1327 }
1328 #endif /* INET6 */
1329 
1330 /*
1331  * altq common classifier
1332  */
1333 int
1334 acc_add_filter(classifier, filter, class, phandle)
1335 	struct acc_classifier *classifier;
1336 	struct flow_filter *filter;
1337 	void	*class;
1338 	u_long	*phandle;
1339 {
1340 	struct acc_filter *afp, *prev, *tmp;
1341 	int	i, s;
1342 
1343 #ifdef INET6
1344 	if (filter->ff_flow.fi_family != AF_INET &&
1345 	    filter->ff_flow.fi_family != AF_INET6)
1346 		return (EINVAL);
1347 #else
1348 	if (filter->ff_flow.fi_family != AF_INET)
1349 		return (EINVAL);
1350 #endif
1351 
1352 	afp = malloc(sizeof(struct acc_filter),
1353 	       M_DEVBUF, M_WAITOK);
1354 	if (afp == NULL)
1355 		return (ENOMEM);
1356 	bzero(afp, sizeof(struct acc_filter));
1357 
1358 	afp->f_filter = *filter;
1359 	afp->f_class = class;
1360 
1361 	i = ACC_WILDCARD_INDEX;
1362 	if (filter->ff_flow.fi_family == AF_INET) {
1363 		struct flow_filter *filter4 = &afp->f_filter;
1364 
1365 		/*
1366 		 * if address is 0, it's a wildcard.  if address mask
1367 		 * isn't set, use full mask.
1368 		 */
1369 		if (filter4->ff_flow.fi_dst.s_addr == 0)
1370 			filter4->ff_mask.mask_dst.s_addr = 0;
1371 		else if (filter4->ff_mask.mask_dst.s_addr == 0)
1372 			filter4->ff_mask.mask_dst.s_addr = 0xffffffff;
1373 		if (filter4->ff_flow.fi_src.s_addr == 0)
1374 			filter4->ff_mask.mask_src.s_addr = 0;
1375 		else if (filter4->ff_mask.mask_src.s_addr == 0)
1376 			filter4->ff_mask.mask_src.s_addr = 0xffffffff;
1377 
1378 		/* clear extra bits in addresses  */
1379 		   filter4->ff_flow.fi_dst.s_addr &=
1380 		       filter4->ff_mask.mask_dst.s_addr;
1381 		   filter4->ff_flow.fi_src.s_addr &=
1382 		       filter4->ff_mask.mask_src.s_addr;
1383 
1384 		/*
1385 		 * if dst address is a wildcard, use hash-entry
1386 		 * ACC_WILDCARD_INDEX.
1387 		 */
1388 		if (filter4->ff_mask.mask_dst.s_addr != 0xffffffff)
1389 			i = ACC_WILDCARD_INDEX;
1390 		else
1391 			i = ACC_GET_HASH_INDEX(filter4->ff_flow.fi_dst.s_addr);
1392 	}
1393 #ifdef INET6
1394 	else if (filter->ff_flow.fi_family == AF_INET6) {
1395 		struct flow_filter6 *filter6 =
1396 			(struct flow_filter6 *)&afp->f_filter;
1397 #ifndef IN6MASK0 /* taken from kame ipv6 */
1398 #define	IN6MASK0	{{{ 0, 0, 0, 0 }}}
1399 #define	IN6MASK128	{{{ 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff }}}
1400 		const struct in6_addr in6mask0 = IN6MASK0;
1401 		const struct in6_addr in6mask128 = IN6MASK128;
1402 #endif
1403 
1404 		if (IN6_IS_ADDR_UNSPECIFIED(&filter6->ff_flow6.fi6_dst))
1405 			filter6->ff_mask6.mask6_dst = in6mask0;
1406 		else if (IN6_IS_ADDR_UNSPECIFIED(&filter6->ff_mask6.mask6_dst))
1407 			filter6->ff_mask6.mask6_dst = in6mask128;
1408 		if (IN6_IS_ADDR_UNSPECIFIED(&filter6->ff_flow6.fi6_src))
1409 			filter6->ff_mask6.mask6_src = in6mask0;
1410 		else if (IN6_IS_ADDR_UNSPECIFIED(&filter6->ff_mask6.mask6_src))
1411 			filter6->ff_mask6.mask6_src = in6mask128;
1412 
1413 		/* clear extra bits in addresses  */
1414 		for (i = 0; i < 16; i++)
1415 			filter6->ff_flow6.fi6_dst.s6_addr[i] &=
1416 			    filter6->ff_mask6.mask6_dst.s6_addr[i];
1417 		for (i = 0; i < 16; i++)
1418 			filter6->ff_flow6.fi6_src.s6_addr[i] &=
1419 			    filter6->ff_mask6.mask6_src.s6_addr[i];
1420 
1421 		if (filter6->ff_flow6.fi6_flowlabel == 0)
1422 			i = ACC_WILDCARD_INDEX;
1423 		else
1424 			i = ACC_GET_HASH_INDEX(filter6->ff_flow6.fi6_flowlabel);
1425 	}
1426 #endif /* INET6 */
1427 
1428 	afp->f_handle = get_filt_handle(classifier, i);
1429 
1430 	/* update filter bitmask */
1431 	afp->f_fbmask = filt2fibmask(filter);
1432 	classifier->acc_fbmask |= afp->f_fbmask;
1433 
1434 	/*
1435 	 * add this filter to the filter list.
1436 	 * filters are ordered from the highest rule number.
1437 	 */
1438 	s = splnet();
1439 	prev = NULL;
1440 	LIST_FOREACH(tmp, &classifier->acc_filters[i], f_chain) {
1441 		if (tmp->f_filter.ff_ruleno > afp->f_filter.ff_ruleno)
1442 			prev = tmp;
1443 		else
1444 			break;
1445 	}
1446 	if (prev == NULL)
1447 		LIST_INSERT_HEAD(&classifier->acc_filters[i], afp, f_chain);
1448 	else
1449 		LIST_INSERT_AFTER(prev, afp, f_chain);
1450 	splx(s);
1451 
1452 	*phandle = afp->f_handle;
1453 	return (0);
1454 }
1455 
1456 int
1457 acc_delete_filter(classifier, handle)
1458 	struct acc_classifier *classifier;
1459 	u_long handle;
1460 {
1461 	struct acc_filter *afp;
1462 	int	s;
1463 
1464 	if ((afp = filth_to_filtp(classifier, handle)) == NULL)
1465 		return (EINVAL);
1466 
1467 	s = splnet();
1468 	LIST_REMOVE(afp, f_chain);
1469 	splx(s);
1470 
1471 	free(afp, M_DEVBUF);
1472 
1473 	/* todo: update filt_bmask */
1474 
1475 	return (0);
1476 }
1477 
1478 /*
1479  * delete filters referencing to the specified class.
1480  * if the all flag is not 0, delete all the filters.
1481  */
1482 int
1483 acc_discard_filters(classifier, class, all)
1484 	struct acc_classifier *classifier;
1485 	void	*class;
1486 	int	all;
1487 {
1488 	struct acc_filter *afp;
1489 	int	i, s;
1490 
1491 	s = splnet();
1492 	for (i = 0; i < ACC_FILTER_TABLESIZE; i++) {
1493 		do {
1494 			LIST_FOREACH(afp, &classifier->acc_filters[i], f_chain)
1495 				if (all || afp->f_class == class) {
1496 					LIST_REMOVE(afp, f_chain);
1497 					free(afp, M_DEVBUF);
1498 					/* start again from the head */
1499 					break;
1500 				}
1501 		} while (afp != NULL);
1502 	}
1503 	splx(s);
1504 
1505 	if (all)
1506 		classifier->acc_fbmask = 0;
1507 
1508 	return (0);
1509 }
1510 
1511 void *
1512 acc_classify(clfier, m, af)
1513 	void *clfier;
1514 	struct mbuf *m;
1515 	int af;
1516 {
1517 	struct acc_classifier *classifier;
1518 	struct flowinfo flow;
1519 	struct acc_filter *afp;
1520 	int	i;
1521 
1522 	classifier = (struct acc_classifier *)clfier;
1523 	altq_extractflow(m, af, &flow, classifier->acc_fbmask);
1524 
1525 	if (flow.fi_family == AF_INET) {
1526 		struct flowinfo_in *fp = (struct flowinfo_in *)&flow;
1527 
1528 		if ((classifier->acc_fbmask & FIMB4_ALL) == FIMB4_TOS) {
1529 			/* only tos is used */
1530 			LIST_FOREACH(afp,
1531 				 &classifier->acc_filters[ACC_WILDCARD_INDEX],
1532 				 f_chain)
1533 				if (apply_tosfilter4(afp->f_fbmask,
1534 						     &afp->f_filter, fp))
1535 					/* filter matched */
1536 					return (afp->f_class);
1537 		} else if ((classifier->acc_fbmask &
1538 			(~(FIMB4_PROTO|FIMB4_SPORT|FIMB4_DPORT) & FIMB4_ALL))
1539 		    == 0) {
1540 			/* only proto and ports are used */
1541 			LIST_FOREACH(afp,
1542 				 &classifier->acc_filters[ACC_WILDCARD_INDEX],
1543 				 f_chain)
1544 				if (apply_ppfilter4(afp->f_fbmask,
1545 						    &afp->f_filter, fp))
1546 					/* filter matched */
1547 					return (afp->f_class);
1548 		} else {
1549 			/* get the filter hash entry from its dest address */
1550 			i = ACC_GET_HASH_INDEX(fp->fi_dst.s_addr);
1551 			do {
1552 				/*
1553 				 * go through this loop twice.  first for dst
1554 				 * hash, second for wildcards.
1555 				 */
1556 				LIST_FOREACH(afp, &classifier->acc_filters[i],
1557 					     f_chain)
1558 					if (apply_filter4(afp->f_fbmask,
1559 							  &afp->f_filter, fp))
1560 						/* filter matched */
1561 						return (afp->f_class);
1562 
1563 				/*
1564 				 * check again for filters with a dst addr
1565 				 * wildcard.
1566 				 * (daddr == 0 || dmask != 0xffffffff).
1567 				 */
1568 				if (i != ACC_WILDCARD_INDEX)
1569 					i = ACC_WILDCARD_INDEX;
1570 				else
1571 					break;
1572 			} while (1);
1573 		}
1574 	}
1575 #ifdef INET6
1576 	else if (flow.fi_family == AF_INET6) {
1577 		struct flowinfo_in6 *fp6 = (struct flowinfo_in6 *)&flow;
1578 
1579 		/* get the filter hash entry from its flow ID */
1580 		if (fp6->fi6_flowlabel != 0)
1581 			i = ACC_GET_HASH_INDEX(fp6->fi6_flowlabel);
1582 		else
1583 			/* flowlable can be zero */
1584 			i = ACC_WILDCARD_INDEX;
1585 
1586 		/* go through this loop twice.  first for flow hash, second
1587 		   for wildcards. */
1588 		do {
1589 			LIST_FOREACH(afp, &classifier->acc_filters[i], f_chain)
1590 				if (apply_filter6(afp->f_fbmask,
1591 					(struct flow_filter6 *)&afp->f_filter,
1592 					fp6))
1593 					/* filter matched */
1594 					return (afp->f_class);
1595 
1596 			/*
1597 			 * check again for filters with a wildcard.
1598 			 */
1599 			if (i != ACC_WILDCARD_INDEX)
1600 				i = ACC_WILDCARD_INDEX;
1601 			else
1602 				break;
1603 		} while (1);
1604 	}
1605 #endif /* INET6 */
1606 
1607 	/* no filter matched */
1608 	return (NULL);
1609 }
1610 
1611 static int
1612 apply_filter4(fbmask, filt, pkt)
1613 	u_int32_t	fbmask;
1614 	struct flow_filter *filt;
1615 	struct flowinfo_in *pkt;
1616 {
1617 	if (filt->ff_flow.fi_family != AF_INET)
1618 		return (0);
1619 	if ((fbmask & FIMB4_SPORT) && filt->ff_flow.fi_sport != pkt->fi_sport)
1620 		return (0);
1621 	if ((fbmask & FIMB4_DPORT) && filt->ff_flow.fi_dport != pkt->fi_dport)
1622 		return (0);
1623 	if ((fbmask & FIMB4_DADDR) &&
1624 	    filt->ff_flow.fi_dst.s_addr !=
1625 	    (pkt->fi_dst.s_addr & filt->ff_mask.mask_dst.s_addr))
1626 		return (0);
1627 	if ((fbmask & FIMB4_SADDR) &&
1628 	    filt->ff_flow.fi_src.s_addr !=
1629 	    (pkt->fi_src.s_addr & filt->ff_mask.mask_src.s_addr))
1630 		return (0);
1631 	if ((fbmask & FIMB4_PROTO) && filt->ff_flow.fi_proto != pkt->fi_proto)
1632 		return (0);
1633 	if ((fbmask & FIMB4_TOS) && filt->ff_flow.fi_tos !=
1634 	    (pkt->fi_tos & filt->ff_mask.mask_tos))
1635 		return (0);
1636 	if ((fbmask & FIMB4_GPI) && filt->ff_flow.fi_gpi != (pkt->fi_gpi))
1637 		return (0);
1638 	/* match */
1639 	return (1);
1640 }
1641 
1642 /*
1643  * filter matching function optimized for a common case that checks
1644  * only protocol and port numbers
1645  */
1646 static int
1647 apply_ppfilter4(fbmask, filt, pkt)
1648 	u_int32_t	fbmask;
1649 	struct flow_filter *filt;
1650 	struct flowinfo_in *pkt;
1651 {
1652 	if (filt->ff_flow.fi_family != AF_INET)
1653 		return (0);
1654 	if ((fbmask & FIMB4_SPORT) && filt->ff_flow.fi_sport != pkt->fi_sport)
1655 		return (0);
1656 	if ((fbmask & FIMB4_DPORT) && filt->ff_flow.fi_dport != pkt->fi_dport)
1657 		return (0);
1658 	if ((fbmask & FIMB4_PROTO) && filt->ff_flow.fi_proto != pkt->fi_proto)
1659 		return (0);
1660 	/* match */
1661 	return (1);
1662 }
1663 
1664 /*
1665  * filter matching function only for tos field.
1666  */
1667 static int
1668 apply_tosfilter4(fbmask, filt, pkt)
1669 	u_int32_t	fbmask;
1670 	struct flow_filter *filt;
1671 	struct flowinfo_in *pkt;
1672 {
1673 	if (filt->ff_flow.fi_family != AF_INET)
1674 		return (0);
1675 	if ((fbmask & FIMB4_TOS) && filt->ff_flow.fi_tos !=
1676 	    (pkt->fi_tos & filt->ff_mask.mask_tos))
1677 		return (0);
1678 	/* match */
1679 	return (1);
1680 }
1681 
1682 #ifdef INET6
1683 static int
1684 apply_filter6(fbmask, filt, pkt)
1685 	u_int32_t	fbmask;
1686 	struct flow_filter6 *filt;
1687 	struct flowinfo_in6 *pkt;
1688 {
1689 	int i;
1690 
1691 	if (filt->ff_flow6.fi6_family != AF_INET6)
1692 		return (0);
1693 	if ((fbmask & FIMB6_FLABEL) &&
1694 	    filt->ff_flow6.fi6_flowlabel != pkt->fi6_flowlabel)
1695 		return (0);
1696 	if ((fbmask & FIMB6_PROTO) &&
1697 	    filt->ff_flow6.fi6_proto != pkt->fi6_proto)
1698 		return (0);
1699 	if ((fbmask & FIMB6_SPORT) &&
1700 	    filt->ff_flow6.fi6_sport != pkt->fi6_sport)
1701 		return (0);
1702 	if ((fbmask & FIMB6_DPORT) &&
1703 	    filt->ff_flow6.fi6_dport != pkt->fi6_dport)
1704 		return (0);
1705 	if (fbmask & FIMB6_SADDR) {
1706 		for (i = 0; i < 4; i++)
1707 			if (filt->ff_flow6.fi6_src.s6_addr32[i] !=
1708 			    (pkt->fi6_src.s6_addr32[i] &
1709 			     filt->ff_mask6.mask6_src.s6_addr32[i]))
1710 				return (0);
1711 	}
1712 	if (fbmask & FIMB6_DADDR) {
1713 		for (i = 0; i < 4; i++)
1714 			if (filt->ff_flow6.fi6_dst.s6_addr32[i] !=
1715 			    (pkt->fi6_dst.s6_addr32[i] &
1716 			     filt->ff_mask6.mask6_dst.s6_addr32[i]))
1717 				return (0);
1718 	}
1719 	if ((fbmask & FIMB6_TCLASS) &&
1720 	    filt->ff_flow6.fi6_tclass !=
1721 	    (pkt->fi6_tclass & filt->ff_mask6.mask6_tclass))
1722 		return (0);
1723 	if ((fbmask & FIMB6_GPI) &&
1724 	    filt->ff_flow6.fi6_gpi != pkt->fi6_gpi)
1725 		return (0);
1726 	/* match */
1727 	return (1);
1728 }
1729 #endif /* INET6 */
1730 
1731 /*
1732  *  filter handle:
1733  *	bit 20-28: index to the filter hash table
1734  *	bit  0-19: unique id in the hash bucket.
1735  */
1736 static u_long
1737 get_filt_handle(classifier, i)
1738 	struct acc_classifier *classifier;
1739 	int	i;
1740 {
1741 	static u_long handle_number = 1;
1742 	u_long 	handle;
1743 	struct acc_filter *afp;
1744 
1745 	while (1) {
1746 		handle = handle_number++ & 0x000fffff;
1747 
1748 		if (LIST_EMPTY(&classifier->acc_filters[i]))
1749 			break;
1750 
1751 		LIST_FOREACH(afp, &classifier->acc_filters[i], f_chain)
1752 			if ((afp->f_handle & 0x000fffff) == handle)
1753 				break;
1754 		if (afp == NULL)
1755 			break;
1756 		/* this handle is already used, try again */
1757 	}
1758 
1759 	return ((i << 20) | handle);
1760 }
1761 
1762 /* convert filter handle to filter pointer */
1763 static struct acc_filter *
1764 filth_to_filtp(classifier, handle)
1765 	struct acc_classifier *classifier;
1766 	u_long handle;
1767 {
1768 	struct acc_filter *afp;
1769 	int	i;
1770 
1771 	i = ACC_GET_HINDEX(handle);
1772 
1773 	LIST_FOREACH(afp, &classifier->acc_filters[i], f_chain)
1774 		if (afp->f_handle == handle)
1775 			return (afp);
1776 
1777 	return (NULL);
1778 }
1779 
1780 /* create flowinfo bitmask */
1781 static u_int32_t
1782 filt2fibmask(filt)
1783 	struct flow_filter *filt;
1784 {
1785 	u_int32_t mask = 0;
1786 #ifdef INET6
1787 	struct flow_filter6 *filt6;
1788 #endif
1789 
1790 	switch (filt->ff_flow.fi_family) {
1791 	case AF_INET:
1792 		if (filt->ff_flow.fi_proto != 0)
1793 			mask |= FIMB4_PROTO;
1794 		if (filt->ff_flow.fi_tos != 0)
1795 			mask |= FIMB4_TOS;
1796 		if (filt->ff_flow.fi_dst.s_addr != 0)
1797 			mask |= FIMB4_DADDR;
1798 		if (filt->ff_flow.fi_src.s_addr != 0)
1799 			mask |= FIMB4_SADDR;
1800 		if (filt->ff_flow.fi_sport != 0)
1801 			mask |= FIMB4_SPORT;
1802 		if (filt->ff_flow.fi_dport != 0)
1803 			mask |= FIMB4_DPORT;
1804 		if (filt->ff_flow.fi_gpi != 0)
1805 			mask |= FIMB4_GPI;
1806 		break;
1807 #ifdef INET6
1808 	case AF_INET6:
1809 		filt6 = (struct flow_filter6 *)filt;
1810 
1811 		if (filt6->ff_flow6.fi6_proto != 0)
1812 			mask |= FIMB6_PROTO;
1813 		if (filt6->ff_flow6.fi6_tclass != 0)
1814 			mask |= FIMB6_TCLASS;
1815 		if (!IN6_IS_ADDR_UNSPECIFIED(&filt6->ff_flow6.fi6_dst))
1816 			mask |= FIMB6_DADDR;
1817 		if (!IN6_IS_ADDR_UNSPECIFIED(&filt6->ff_flow6.fi6_src))
1818 			mask |= FIMB6_SADDR;
1819 		if (filt6->ff_flow6.fi6_sport != 0)
1820 			mask |= FIMB6_SPORT;
1821 		if (filt6->ff_flow6.fi6_dport != 0)
1822 			mask |= FIMB6_DPORT;
1823 		if (filt6->ff_flow6.fi6_gpi != 0)
1824 			mask |= FIMB6_GPI;
1825 		if (filt6->ff_flow6.fi6_flowlabel != 0)
1826 			mask |= FIMB6_FLABEL;
1827 		break;
1828 #endif /* INET6 */
1829 	}
1830 	return (mask);
1831 }
1832 
1833 
1834 /*
1835  * helper functions to handle IPv4 fragments.
1836  * currently only in-sequence fragments are handled.
1837  *	- fragment info is cached in a LRU list.
1838  *	- when a first fragment is found, cache its flow info.
1839  *	- when a non-first fragment is found, lookup the cache.
1840  */
1841 
1842 struct ip4_frag {
1843     TAILQ_ENTRY(ip4_frag) ip4f_chain;
1844     char    ip4f_valid;
1845     u_short ip4f_id;
1846     struct flowinfo_in ip4f_info;
1847 };
1848 
1849 static TAILQ_HEAD(ip4f_list, ip4_frag) ip4f_list; /* IPv4 fragment cache */
1850 
1851 #define	IP4F_TABSIZE		16	/* IPv4 fragment cache size */
1852 
1853 
1854 static void
1855 ip4f_cache(ip, fin)
1856 	struct ip *ip;
1857 	struct flowinfo_in *fin;
1858 {
1859 	struct ip4_frag *fp;
1860 
1861 	if (TAILQ_EMPTY(&ip4f_list)) {
1862 		/* first time call, allocate fragment cache entries. */
1863 		if (ip4f_init() < 0)
1864 			/* allocation failed! */
1865 			return;
1866 	}
1867 
1868 	fp = ip4f_alloc();
1869 	fp->ip4f_id = ip->ip_id;
1870 	fp->ip4f_info.fi_proto = ip->ip_p;
1871 	fp->ip4f_info.fi_src.s_addr = ip->ip_src.s_addr;
1872 	fp->ip4f_info.fi_dst.s_addr = ip->ip_dst.s_addr;
1873 
1874 	/* save port numbers */
1875 	fp->ip4f_info.fi_sport = fin->fi_sport;
1876 	fp->ip4f_info.fi_dport = fin->fi_dport;
1877 	fp->ip4f_info.fi_gpi   = fin->fi_gpi;
1878 }
1879 
1880 static int
1881 ip4f_lookup(ip, fin)
1882 	struct ip *ip;
1883 	struct flowinfo_in *fin;
1884 {
1885 	struct ip4_frag *fp;
1886 
1887 	for (fp = TAILQ_FIRST(&ip4f_list); fp != NULL && fp->ip4f_valid;
1888 	     fp = TAILQ_NEXT(fp, ip4f_chain))
1889 		if (ip->ip_id == fp->ip4f_id &&
1890 		    ip->ip_src.s_addr == fp->ip4f_info.fi_src.s_addr &&
1891 		    ip->ip_dst.s_addr == fp->ip4f_info.fi_dst.s_addr &&
1892 		    ip->ip_p == fp->ip4f_info.fi_proto) {
1893 
1894 			/* found the matching entry */
1895 			fin->fi_sport = fp->ip4f_info.fi_sport;
1896 			fin->fi_dport = fp->ip4f_info.fi_dport;
1897 			fin->fi_gpi   = fp->ip4f_info.fi_gpi;
1898 
1899 			if ((ntohs(ip->ip_off) & IP_MF) == 0)
1900 				/* this is the last fragment,
1901 				   release the entry. */
1902 				ip4f_free(fp);
1903 
1904 			return (1);
1905 		}
1906 
1907 	/* no matching entry found */
1908 	return (0);
1909 }
1910 
1911 static int
1912 ip4f_init(void)
1913 {
1914 	struct ip4_frag *fp;
1915 	int i;
1916 
1917 	TAILQ_INIT(&ip4f_list);
1918 	for (i=0; i<IP4F_TABSIZE; i++) {
1919 		fp = malloc(sizeof(struct ip4_frag),
1920 		       M_DEVBUF, M_NOWAIT);
1921 		if (fp == NULL) {
1922 			printf("ip4f_init: can't alloc %dth entry!\n", i);
1923 			if (i == 0)
1924 				return (-1);
1925 			return (0);
1926 		}
1927 		fp->ip4f_valid = 0;
1928 		TAILQ_INSERT_TAIL(&ip4f_list, fp, ip4f_chain);
1929 	}
1930 	return (0);
1931 }
1932 
1933 static struct ip4_frag *
1934 ip4f_alloc(void)
1935 {
1936 	struct ip4_frag *fp;
1937 
1938 	/* reclaim an entry at the tail, put it at the head */
1939 	fp = TAILQ_LAST(&ip4f_list, ip4f_list);
1940 	TAILQ_REMOVE(&ip4f_list, fp, ip4f_chain);
1941 	fp->ip4f_valid = 1;
1942 	TAILQ_INSERT_HEAD(&ip4f_list, fp, ip4f_chain);
1943 	return (fp);
1944 }
1945 
1946 static void
1947 ip4f_free(fp)
1948 	struct ip4_frag *fp;
1949 {
1950 	TAILQ_REMOVE(&ip4f_list, fp, ip4f_chain);
1951 	fp->ip4f_valid = 0;
1952 	TAILQ_INSERT_TAIL(&ip4f_list, fp, ip4f_chain);
1953 }
1954 
1955 #endif /* ALTQ3_CLFIER_COMPAT */
1956