xref: /freebsd/sbin/pfctl/pfctl_altq.c (revision 145992504973bd16cf3518af9ba5ce185fefa82a)
1 /*	$OpenBSD: pfctl_altq.c,v 1.93 2007/10/15 02:16:35 deraadt Exp $	*/
2 
3 /*
4  * Copyright (c) 2002
5  *	Sony Computer Science Laboratories Inc.
6  * Copyright (c) 2002, 2003 Henning Brauer <henning@openbsd.org>
7  *
8  * Permission to use, copy, modify, and distribute this software for any
9  * purpose with or without fee is hereby granted, provided that the above
10  * copyright notice and this permission notice appear in all copies.
11  *
12  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
13  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
14  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
15  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
16  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
17  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
18  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
19  */
20 
21 #include <sys/cdefs.h>
22 __FBSDID("$FreeBSD$");
23 
24 #include <sys/types.h>
25 #include <sys/ioctl.h>
26 #include <sys/socket.h>
27 
28 #include <net/if.h>
29 #include <netinet/in.h>
30 #include <net/pfvar.h>
31 
32 #include <err.h>
33 #include <errno.h>
34 #include <limits.h>
35 #include <math.h>
36 #include <stdio.h>
37 #include <stdlib.h>
38 #include <string.h>
39 #include <unistd.h>
40 
41 #include <altq/altq.h>
42 #include <altq/altq_cbq.h>
43 #include <altq/altq_priq.h>
44 #include <altq/altq_hfsc.h>
45 
46 #include "pfctl_parser.h"
47 #include "pfctl.h"
48 
49 #define is_sc_null(sc)	(((sc) == NULL) || ((sc)->m1 == 0 && (sc)->m2 == 0))
50 
51 TAILQ_HEAD(altqs, pf_altq) altqs = TAILQ_HEAD_INITIALIZER(altqs);
52 LIST_HEAD(gen_sc, segment) rtsc, lssc;
53 
54 struct pf_altq	*qname_to_pfaltq(const char *, const char *);
55 u_int32_t	 qname_to_qid(const char *);
56 
57 static int	eval_pfqueue_cbq(struct pfctl *, struct pf_altq *);
58 static int	cbq_compute_idletime(struct pfctl *, struct pf_altq *);
59 static int	check_commit_cbq(int, int, struct pf_altq *);
60 static int	print_cbq_opts(const struct pf_altq *);
61 
62 static int	eval_pfqueue_priq(struct pfctl *, struct pf_altq *);
63 static int	check_commit_priq(int, int, struct pf_altq *);
64 static int	print_priq_opts(const struct pf_altq *);
65 
66 static int	eval_pfqueue_hfsc(struct pfctl *, struct pf_altq *);
67 static int	check_commit_hfsc(int, int, struct pf_altq *);
68 static int	print_hfsc_opts(const struct pf_altq *,
69 		    const struct node_queue_opt *);
70 
71 static void		 gsc_add_sc(struct gen_sc *, struct service_curve *);
72 static int		 is_gsc_under_sc(struct gen_sc *,
73 			     struct service_curve *);
74 static void		 gsc_destroy(struct gen_sc *);
75 static struct segment	*gsc_getentry(struct gen_sc *, double);
76 static int		 gsc_add_seg(struct gen_sc *, double, double, double,
77 			     double);
78 static double		 sc_x2y(struct service_curve *, double);
79 
80 #ifdef __FreeBSD__
81 u_int32_t	getifspeed(int, char *);
82 #else
83 u_int32_t	 getifspeed(char *);
84 #endif
85 u_long		 getifmtu(char *);
86 int		 eval_queue_opts(struct pf_altq *, struct node_queue_opt *,
87 		     u_int32_t);
88 u_int32_t	 eval_bwspec(struct node_queue_bw *, u_int32_t);
89 void		 print_hfsc_sc(const char *, u_int, u_int, u_int,
90 		     const struct node_hfsc_sc *);
91 
92 void
93 pfaltq_store(struct pf_altq *a)
94 {
95 	struct pf_altq	*altq;
96 
97 	if ((altq = malloc(sizeof(*altq))) == NULL)
98 		err(1, "malloc");
99 	memcpy(altq, a, sizeof(struct pf_altq));
100 	TAILQ_INSERT_TAIL(&altqs, altq, entries);
101 }
102 
103 struct pf_altq *
104 pfaltq_lookup(const char *ifname)
105 {
106 	struct pf_altq	*altq;
107 
108 	TAILQ_FOREACH(altq, &altqs, entries) {
109 		if (strncmp(ifname, altq->ifname, IFNAMSIZ) == 0 &&
110 		    altq->qname[0] == 0)
111 			return (altq);
112 	}
113 	return (NULL);
114 }
115 
116 struct pf_altq *
117 qname_to_pfaltq(const char *qname, const char *ifname)
118 {
119 	struct pf_altq	*altq;
120 
121 	TAILQ_FOREACH(altq, &altqs, entries) {
122 		if (strncmp(ifname, altq->ifname, IFNAMSIZ) == 0 &&
123 		    strncmp(qname, altq->qname, PF_QNAME_SIZE) == 0)
124 			return (altq);
125 	}
126 	return (NULL);
127 }
128 
129 u_int32_t
130 qname_to_qid(const char *qname)
131 {
132 	struct pf_altq	*altq;
133 
134 	/*
135 	 * We guarantee that same named queues on different interfaces
136 	 * have the same qid, so we do NOT need to limit matching on
137 	 * one interface!
138 	 */
139 
140 	TAILQ_FOREACH(altq, &altqs, entries) {
141 		if (strncmp(qname, altq->qname, PF_QNAME_SIZE) == 0)
142 			return (altq->qid);
143 	}
144 	return (0);
145 }
146 
147 void
148 print_altq(const struct pf_altq *a, unsigned int level,
149     struct node_queue_bw *bw, struct node_queue_opt *qopts)
150 {
151 	if (a->qname[0] != 0) {
152 		print_queue(a, level, bw, 1, qopts);
153 		return;
154 	}
155 
156 #ifdef __FreeBSD__
157 	if (a->local_flags & PFALTQ_FLAG_IF_REMOVED)
158 		printf("INACTIVE ");
159 #endif
160 
161 	printf("altq on %s ", a->ifname);
162 
163 	switch (a->scheduler) {
164 	case ALTQT_CBQ:
165 		if (!print_cbq_opts(a))
166 			printf("cbq ");
167 		break;
168 	case ALTQT_PRIQ:
169 		if (!print_priq_opts(a))
170 			printf("priq ");
171 		break;
172 	case ALTQT_HFSC:
173 		if (!print_hfsc_opts(a, qopts))
174 			printf("hfsc ");
175 		break;
176 	}
177 
178 	if (bw != NULL && bw->bw_percent > 0) {
179 		if (bw->bw_percent < 100)
180 			printf("bandwidth %u%% ", bw->bw_percent);
181 	} else
182 		printf("bandwidth %s ", rate2str((double)a->ifbandwidth));
183 
184 	if (a->qlimit != DEFAULT_QLIMIT)
185 		printf("qlimit %u ", a->qlimit);
186 	printf("tbrsize %u ", a->tbrsize);
187 }
188 
189 void
190 print_queue(const struct pf_altq *a, unsigned int level,
191     struct node_queue_bw *bw, int print_interface,
192     struct node_queue_opt *qopts)
193 {
194 	unsigned int	i;
195 
196 #ifdef __FreeBSD__
197 	if (a->local_flags & PFALTQ_FLAG_IF_REMOVED)
198 		printf("INACTIVE ");
199 #endif
200 	printf("queue ");
201 	for (i = 0; i < level; ++i)
202 		printf(" ");
203 	printf("%s ", a->qname);
204 	if (print_interface)
205 		printf("on %s ", a->ifname);
206 	if (a->scheduler == ALTQT_CBQ || a->scheduler == ALTQT_HFSC) {
207 		if (bw != NULL && bw->bw_percent > 0) {
208 			if (bw->bw_percent < 100)
209 				printf("bandwidth %u%% ", bw->bw_percent);
210 		} else
211 			printf("bandwidth %s ", rate2str((double)a->bandwidth));
212 	}
213 	if (a->priority != DEFAULT_PRIORITY)
214 		printf("priority %u ", a->priority);
215 	if (a->qlimit != DEFAULT_QLIMIT)
216 		printf("qlimit %u ", a->qlimit);
217 	switch (a->scheduler) {
218 	case ALTQT_CBQ:
219 		print_cbq_opts(a);
220 		break;
221 	case ALTQT_PRIQ:
222 		print_priq_opts(a);
223 		break;
224 	case ALTQT_HFSC:
225 		print_hfsc_opts(a, qopts);
226 		break;
227 	}
228 }
229 
230 /*
231  * eval_pfaltq computes the discipline parameters.
232  */
233 int
234 eval_pfaltq(struct pfctl *pf, struct pf_altq *pa, struct node_queue_bw *bw,
235     struct node_queue_opt *opts)
236 {
237 	u_int	rate, size, errors = 0;
238 
239 	if (bw->bw_absolute > 0)
240 		pa->ifbandwidth = bw->bw_absolute;
241 	else
242 #ifdef __FreeBSD__
243 		if ((rate = getifspeed(pf->dev, pa->ifname)) == 0) {
244 #else
245 		if ((rate = getifspeed(pa->ifname)) == 0) {
246 #endif
247 			fprintf(stderr, "interface %s does not know its bandwidth, "
248 			    "please specify an absolute bandwidth\n",
249 			    pa->ifname);
250 			errors++;
251 		} else if ((pa->ifbandwidth = eval_bwspec(bw, rate)) == 0)
252 			pa->ifbandwidth = rate;
253 
254 	errors += eval_queue_opts(pa, opts, pa->ifbandwidth);
255 
256 	/* if tbrsize is not specified, use heuristics */
257 	if (pa->tbrsize == 0) {
258 		rate = pa->ifbandwidth;
259 		if (rate <= 1 * 1000 * 1000)
260 			size = 1;
261 		else if (rate <= 10 * 1000 * 1000)
262 			size = 4;
263 		else if (rate <= 200 * 1000 * 1000)
264 			size = 8;
265 		else
266 			size = 24;
267 		size = size * getifmtu(pa->ifname);
268 		if (size > 0xffff)
269 			size = 0xffff;
270 		pa->tbrsize = size;
271 	}
272 	return (errors);
273 }
274 
275 /*
276  * check_commit_altq does consistency check for each interface
277  */
278 int
279 check_commit_altq(int dev, int opts)
280 {
281 	struct pf_altq	*altq;
282 	int		 error = 0;
283 
284 	/* call the discipline check for each interface. */
285 	TAILQ_FOREACH(altq, &altqs, entries) {
286 		if (altq->qname[0] == 0) {
287 			switch (altq->scheduler) {
288 			case ALTQT_CBQ:
289 				error = check_commit_cbq(dev, opts, altq);
290 				break;
291 			case ALTQT_PRIQ:
292 				error = check_commit_priq(dev, opts, altq);
293 				break;
294 			case ALTQT_HFSC:
295 				error = check_commit_hfsc(dev, opts, altq);
296 				break;
297 			default:
298 				break;
299 			}
300 		}
301 	}
302 	return (error);
303 }
304 
305 /*
306  * eval_pfqueue computes the queue parameters.
307  */
308 int
309 eval_pfqueue(struct pfctl *pf, struct pf_altq *pa, struct node_queue_bw *bw,
310     struct node_queue_opt *opts)
311 {
312 	/* should be merged with expand_queue */
313 	struct pf_altq	*if_pa, *parent, *altq;
314 	u_int32_t	 bwsum;
315 	int		 error = 0;
316 
317 	/* find the corresponding interface and copy fields used by queues */
318 	if ((if_pa = pfaltq_lookup(pa->ifname)) == NULL) {
319 		fprintf(stderr, "altq not defined on %s\n", pa->ifname);
320 		return (1);
321 	}
322 	pa->scheduler = if_pa->scheduler;
323 	pa->ifbandwidth = if_pa->ifbandwidth;
324 
325 	if (qname_to_pfaltq(pa->qname, pa->ifname) != NULL) {
326 		fprintf(stderr, "queue %s already exists on interface %s\n",
327 		    pa->qname, pa->ifname);
328 		return (1);
329 	}
330 	pa->qid = qname_to_qid(pa->qname);
331 
332 	parent = NULL;
333 	if (pa->parent[0] != 0) {
334 		parent = qname_to_pfaltq(pa->parent, pa->ifname);
335 		if (parent == NULL) {
336 			fprintf(stderr, "parent %s not found for %s\n",
337 			    pa->parent, pa->qname);
338 			return (1);
339 		}
340 		pa->parent_qid = parent->qid;
341 	}
342 	if (pa->qlimit == 0)
343 		pa->qlimit = DEFAULT_QLIMIT;
344 
345 	if (pa->scheduler == ALTQT_CBQ || pa->scheduler == ALTQT_HFSC) {
346 		pa->bandwidth = eval_bwspec(bw,
347 		    parent == NULL ? 0 : parent->bandwidth);
348 
349 		if (pa->bandwidth > pa->ifbandwidth) {
350 			fprintf(stderr, "bandwidth for %s higher than "
351 			    "interface\n", pa->qname);
352 			return (1);
353 		}
354 		/* check the sum of the child bandwidth is under parent's */
355 		if (parent != NULL) {
356 			if (pa->bandwidth > parent->bandwidth) {
357 				warnx("bandwidth for %s higher than parent",
358 				    pa->qname);
359 				return (1);
360 			}
361 			bwsum = 0;
362 			TAILQ_FOREACH(altq, &altqs, entries) {
363 				if (strncmp(altq->ifname, pa->ifname,
364 				    IFNAMSIZ) == 0 &&
365 				    altq->qname[0] != 0 &&
366 				    strncmp(altq->parent, pa->parent,
367 				    PF_QNAME_SIZE) == 0)
368 					bwsum += altq->bandwidth;
369 			}
370 			bwsum += pa->bandwidth;
371 			if (bwsum > parent->bandwidth) {
372 				warnx("the sum of the child bandwidth higher"
373 				    " than parent \"%s\"", parent->qname);
374 			}
375 		}
376 	}
377 
378 	if (eval_queue_opts(pa, opts, parent == NULL? 0 : parent->bandwidth))
379 		return (1);
380 
381 	switch (pa->scheduler) {
382 	case ALTQT_CBQ:
383 		error = eval_pfqueue_cbq(pf, pa);
384 		break;
385 	case ALTQT_PRIQ:
386 		error = eval_pfqueue_priq(pf, pa);
387 		break;
388 	case ALTQT_HFSC:
389 		error = eval_pfqueue_hfsc(pf, pa);
390 		break;
391 	default:
392 		break;
393 	}
394 	return (error);
395 }
396 
397 /*
398  * CBQ support functions
399  */
400 #define	RM_FILTER_GAIN	5	/* log2 of gain, e.g., 5 => 31/32 */
401 #define	RM_NS_PER_SEC	(1000000000)
402 
403 static int
404 eval_pfqueue_cbq(struct pfctl *pf, struct pf_altq *pa)
405 {
406 	struct cbq_opts	*opts;
407 	u_int		 ifmtu;
408 
409 	if (pa->priority >= CBQ_MAXPRI) {
410 		warnx("priority out of range: max %d", CBQ_MAXPRI - 1);
411 		return (-1);
412 	}
413 
414 	ifmtu = getifmtu(pa->ifname);
415 	opts = &pa->pq_u.cbq_opts;
416 
417 	if (opts->pktsize == 0) {	/* use default */
418 		opts->pktsize = ifmtu;
419 		if (opts->pktsize > MCLBYTES)	/* do what TCP does */
420 			opts->pktsize &= ~MCLBYTES;
421 	} else if (opts->pktsize > ifmtu)
422 		opts->pktsize = ifmtu;
423 	if (opts->maxpktsize == 0)	/* use default */
424 		opts->maxpktsize = ifmtu;
425 	else if (opts->maxpktsize > ifmtu)
426 		opts->pktsize = ifmtu;
427 
428 	if (opts->pktsize > opts->maxpktsize)
429 		opts->pktsize = opts->maxpktsize;
430 
431 	if (pa->parent[0] == 0)
432 		opts->flags |= (CBQCLF_ROOTCLASS | CBQCLF_WRR);
433 
434 	cbq_compute_idletime(pf, pa);
435 	return (0);
436 }
437 
438 /*
439  * compute ns_per_byte, maxidle, minidle, and offtime
440  */
441 static int
442 cbq_compute_idletime(struct pfctl *pf, struct pf_altq *pa)
443 {
444 	struct cbq_opts	*opts;
445 	double		 maxidle_s, maxidle, minidle;
446 	double		 offtime, nsPerByte, ifnsPerByte, ptime, cptime;
447 	double		 z, g, f, gton, gtom;
448 	u_int		 minburst, maxburst;
449 
450 	opts = &pa->pq_u.cbq_opts;
451 	ifnsPerByte = (1.0 / (double)pa->ifbandwidth) * RM_NS_PER_SEC * 8;
452 	minburst = opts->minburst;
453 	maxburst = opts->maxburst;
454 
455 	if (pa->bandwidth == 0)
456 		f = 0.0001;	/* small enough? */
457 	else
458 		f = ((double) pa->bandwidth / (double) pa->ifbandwidth);
459 
460 	nsPerByte = ifnsPerByte / f;
461 	ptime = (double)opts->pktsize * ifnsPerByte;
462 	cptime = ptime * (1.0 - f) / f;
463 
464 	if (nsPerByte * (double)opts->maxpktsize > (double)INT_MAX) {
465 		/*
466 		 * this causes integer overflow in kernel!
467 		 * (bandwidth < 6Kbps when max_pkt_size=1500)
468 		 */
469 		if (pa->bandwidth != 0 && (pf->opts & PF_OPT_QUIET) == 0)
470 			warnx("queue bandwidth must be larger than %s",
471 			    rate2str(ifnsPerByte * (double)opts->maxpktsize /
472 			    (double)INT_MAX * (double)pa->ifbandwidth));
473 			fprintf(stderr, "cbq: queue %s is too slow!\n",
474 			    pa->qname);
475 		nsPerByte = (double)(INT_MAX / opts->maxpktsize);
476 	}
477 
478 	if (maxburst == 0) {  /* use default */
479 		if (cptime > 10.0 * 1000000)
480 			maxburst = 4;
481 		else
482 			maxburst = 16;
483 	}
484 	if (minburst == 0)  /* use default */
485 		minburst = 2;
486 	if (minburst > maxburst)
487 		minburst = maxburst;
488 
489 	z = (double)(1 << RM_FILTER_GAIN);
490 	g = (1.0 - 1.0 / z);
491 	gton = pow(g, (double)maxburst);
492 	gtom = pow(g, (double)(minburst-1));
493 	maxidle = ((1.0 / f - 1.0) * ((1.0 - gton) / gton));
494 	maxidle_s = (1.0 - g);
495 	if (maxidle > maxidle_s)
496 		maxidle = ptime * maxidle;
497 	else
498 		maxidle = ptime * maxidle_s;
499 	offtime = cptime * (1.0 + 1.0/(1.0 - g) * (1.0 - gtom) / gtom);
500 	minidle = -((double)opts->maxpktsize * (double)nsPerByte);
501 
502 	/* scale parameters */
503 	maxidle = ((maxidle * 8.0) / nsPerByte) *
504 	    pow(2.0, (double)RM_FILTER_GAIN);
505 	offtime = (offtime * 8.0) / nsPerByte *
506 	    pow(2.0, (double)RM_FILTER_GAIN);
507 	minidle = ((minidle * 8.0) / nsPerByte) *
508 	    pow(2.0, (double)RM_FILTER_GAIN);
509 
510 	maxidle = maxidle / 1000.0;
511 	offtime = offtime / 1000.0;
512 	minidle = minidle / 1000.0;
513 
514 	opts->minburst = minburst;
515 	opts->maxburst = maxburst;
516 	opts->ns_per_byte = (u_int)nsPerByte;
517 	opts->maxidle = (u_int)fabs(maxidle);
518 	opts->minidle = (int)minidle;
519 	opts->offtime = (u_int)fabs(offtime);
520 
521 	return (0);
522 }
523 
524 static int
525 check_commit_cbq(int dev, int opts, struct pf_altq *pa)
526 {
527 	struct pf_altq	*altq;
528 	int		 root_class, default_class;
529 	int		 error = 0;
530 
531 	/*
532 	 * check if cbq has one root queue and one default queue
533 	 * for this interface
534 	 */
535 	root_class = default_class = 0;
536 	TAILQ_FOREACH(altq, &altqs, entries) {
537 		if (strncmp(altq->ifname, pa->ifname, IFNAMSIZ) != 0)
538 			continue;
539 		if (altq->qname[0] == 0)  /* this is for interface */
540 			continue;
541 		if (altq->pq_u.cbq_opts.flags & CBQCLF_ROOTCLASS)
542 			root_class++;
543 		if (altq->pq_u.cbq_opts.flags & CBQCLF_DEFCLASS)
544 			default_class++;
545 	}
546 	if (root_class != 1) {
547 		warnx("should have one root queue on %s", pa->ifname);
548 		error++;
549 	}
550 	if (default_class != 1) {
551 		warnx("should have one default queue on %s", pa->ifname);
552 		error++;
553 	}
554 	return (error);
555 }
556 
557 static int
558 print_cbq_opts(const struct pf_altq *a)
559 {
560 	const struct cbq_opts	*opts;
561 
562 	opts = &a->pq_u.cbq_opts;
563 	if (opts->flags) {
564 		printf("cbq(");
565 		if (opts->flags & CBQCLF_RED)
566 			printf(" red");
567 		if (opts->flags & CBQCLF_ECN)
568 			printf(" ecn");
569 		if (opts->flags & CBQCLF_RIO)
570 			printf(" rio");
571 		if (opts->flags & CBQCLF_CLEARDSCP)
572 			printf(" cleardscp");
573 		if (opts->flags & CBQCLF_FLOWVALVE)
574 			printf(" flowvalve");
575 		if (opts->flags & CBQCLF_BORROW)
576 			printf(" borrow");
577 		if (opts->flags & CBQCLF_WRR)
578 			printf(" wrr");
579 		if (opts->flags & CBQCLF_EFFICIENT)
580 			printf(" efficient");
581 		if (opts->flags & CBQCLF_ROOTCLASS)
582 			printf(" root");
583 		if (opts->flags & CBQCLF_DEFCLASS)
584 			printf(" default");
585 		printf(" ) ");
586 
587 		return (1);
588 	} else
589 		return (0);
590 }
591 
592 /*
593  * PRIQ support functions
594  */
595 static int
596 eval_pfqueue_priq(struct pfctl *pf, struct pf_altq *pa)
597 {
598 	struct pf_altq	*altq;
599 
600 	if (pa->priority >= PRIQ_MAXPRI) {
601 		warnx("priority out of range: max %d", PRIQ_MAXPRI - 1);
602 		return (-1);
603 	}
604 	/* the priority should be unique for the interface */
605 	TAILQ_FOREACH(altq, &altqs, entries) {
606 		if (strncmp(altq->ifname, pa->ifname, IFNAMSIZ) == 0 &&
607 		    altq->qname[0] != 0 && altq->priority == pa->priority) {
608 			warnx("%s and %s have the same priority",
609 			    altq->qname, pa->qname);
610 			return (-1);
611 		}
612 	}
613 
614 	return (0);
615 }
616 
617 static int
618 check_commit_priq(int dev, int opts, struct pf_altq *pa)
619 {
620 	struct pf_altq	*altq;
621 	int		 default_class;
622 	int		 error = 0;
623 
624 	/*
625 	 * check if priq has one default class for this interface
626 	 */
627 	default_class = 0;
628 	TAILQ_FOREACH(altq, &altqs, entries) {
629 		if (strncmp(altq->ifname, pa->ifname, IFNAMSIZ) != 0)
630 			continue;
631 		if (altq->qname[0] == 0)  /* this is for interface */
632 			continue;
633 		if (altq->pq_u.priq_opts.flags & PRCF_DEFAULTCLASS)
634 			default_class++;
635 	}
636 	if (default_class != 1) {
637 		warnx("should have one default queue on %s", pa->ifname);
638 		error++;
639 	}
640 	return (error);
641 }
642 
643 static int
644 print_priq_opts(const struct pf_altq *a)
645 {
646 	const struct priq_opts	*opts;
647 
648 	opts = &a->pq_u.priq_opts;
649 
650 	if (opts->flags) {
651 		printf("priq(");
652 		if (opts->flags & PRCF_RED)
653 			printf(" red");
654 		if (opts->flags & PRCF_ECN)
655 			printf(" ecn");
656 		if (opts->flags & PRCF_RIO)
657 			printf(" rio");
658 		if (opts->flags & PRCF_CLEARDSCP)
659 			printf(" cleardscp");
660 		if (opts->flags & PRCF_DEFAULTCLASS)
661 			printf(" default");
662 		printf(" ) ");
663 
664 		return (1);
665 	} else
666 		return (0);
667 }
668 
669 /*
670  * HFSC support functions
671  */
672 static int
673 eval_pfqueue_hfsc(struct pfctl *pf, struct pf_altq *pa)
674 {
675 	struct pf_altq		*altq, *parent;
676 	struct hfsc_opts	*opts;
677 	struct service_curve	 sc;
678 
679 	opts = &pa->pq_u.hfsc_opts;
680 
681 	if (pa->parent[0] == 0) {
682 		/* root queue */
683 		opts->lssc_m1 = pa->ifbandwidth;
684 		opts->lssc_m2 = pa->ifbandwidth;
685 		opts->lssc_d = 0;
686 		return (0);
687 	}
688 
689 	LIST_INIT(&rtsc);
690 	LIST_INIT(&lssc);
691 
692 	/* if link_share is not specified, use bandwidth */
693 	if (opts->lssc_m2 == 0)
694 		opts->lssc_m2 = pa->bandwidth;
695 
696 	if ((opts->rtsc_m1 > 0 && opts->rtsc_m2 == 0) ||
697 	    (opts->lssc_m1 > 0 && opts->lssc_m2 == 0) ||
698 	    (opts->ulsc_m1 > 0 && opts->ulsc_m2 == 0)) {
699 		warnx("m2 is zero for %s", pa->qname);
700 		return (-1);
701 	}
702 
703 	if ((opts->rtsc_m1 < opts->rtsc_m2 && opts->rtsc_m1 != 0) ||
704 	    (opts->lssc_m1 < opts->lssc_m2 && opts->lssc_m1 != 0) ||
705 	    (opts->ulsc_m1 < opts->ulsc_m2 && opts->ulsc_m1 != 0)) {
706 		warnx("m1 must be zero for convex curve: %s", pa->qname);
707 		return (-1);
708 	}
709 
710 	/*
711 	 * admission control:
712 	 * for the real-time service curve, the sum of the service curves
713 	 * should not exceed 80% of the interface bandwidth.  20% is reserved
714 	 * not to over-commit the actual interface bandwidth.
715 	 * for the linkshare service curve, the sum of the child service
716 	 * curve should not exceed the parent service curve.
717 	 * for the upper-limit service curve, the assigned bandwidth should
718 	 * be smaller than the interface bandwidth, and the upper-limit should
719 	 * be larger than the real-time service curve when both are defined.
720 	 */
721 	parent = qname_to_pfaltq(pa->parent, pa->ifname);
722 	if (parent == NULL)
723 		errx(1, "parent %s not found for %s", pa->parent, pa->qname);
724 
725 	TAILQ_FOREACH(altq, &altqs, entries) {
726 		if (strncmp(altq->ifname, pa->ifname, IFNAMSIZ) != 0)
727 			continue;
728 		if (altq->qname[0] == 0)  /* this is for interface */
729 			continue;
730 
731 		/* if the class has a real-time service curve, add it. */
732 		if (opts->rtsc_m2 != 0 && altq->pq_u.hfsc_opts.rtsc_m2 != 0) {
733 			sc.m1 = altq->pq_u.hfsc_opts.rtsc_m1;
734 			sc.d = altq->pq_u.hfsc_opts.rtsc_d;
735 			sc.m2 = altq->pq_u.hfsc_opts.rtsc_m2;
736 			gsc_add_sc(&rtsc, &sc);
737 		}
738 
739 		if (strncmp(altq->parent, pa->parent, PF_QNAME_SIZE) != 0)
740 			continue;
741 
742 		/* if the class has a linkshare service curve, add it. */
743 		if (opts->lssc_m2 != 0 && altq->pq_u.hfsc_opts.lssc_m2 != 0) {
744 			sc.m1 = altq->pq_u.hfsc_opts.lssc_m1;
745 			sc.d = altq->pq_u.hfsc_opts.lssc_d;
746 			sc.m2 = altq->pq_u.hfsc_opts.lssc_m2;
747 			gsc_add_sc(&lssc, &sc);
748 		}
749 	}
750 
751 	/* check the real-time service curve.  reserve 20% of interface bw */
752 	if (opts->rtsc_m2 != 0) {
753 		/* add this queue to the sum */
754 		sc.m1 = opts->rtsc_m1;
755 		sc.d = opts->rtsc_d;
756 		sc.m2 = opts->rtsc_m2;
757 		gsc_add_sc(&rtsc, &sc);
758 		/* compare the sum with 80% of the interface */
759 		sc.m1 = 0;
760 		sc.d = 0;
761 		sc.m2 = pa->ifbandwidth / 100 * 80;
762 		if (!is_gsc_under_sc(&rtsc, &sc)) {
763 			warnx("real-time sc exceeds 80%% of the interface "
764 			    "bandwidth (%s)", rate2str((double)sc.m2));
765 			goto err_ret;
766 		}
767 	}
768 
769 	/* check the linkshare service curve. */
770 	if (opts->lssc_m2 != 0) {
771 		/* add this queue to the child sum */
772 		sc.m1 = opts->lssc_m1;
773 		sc.d = opts->lssc_d;
774 		sc.m2 = opts->lssc_m2;
775 		gsc_add_sc(&lssc, &sc);
776 		/* compare the sum of the children with parent's sc */
777 		sc.m1 = parent->pq_u.hfsc_opts.lssc_m1;
778 		sc.d = parent->pq_u.hfsc_opts.lssc_d;
779 		sc.m2 = parent->pq_u.hfsc_opts.lssc_m2;
780 		if (!is_gsc_under_sc(&lssc, &sc)) {
781 			warnx("linkshare sc exceeds parent's sc");
782 			goto err_ret;
783 		}
784 	}
785 
786 	/* check the upper-limit service curve. */
787 	if (opts->ulsc_m2 != 0) {
788 		if (opts->ulsc_m1 > pa->ifbandwidth ||
789 		    opts->ulsc_m2 > pa->ifbandwidth) {
790 			warnx("upper-limit larger than interface bandwidth");
791 			goto err_ret;
792 		}
793 		if (opts->rtsc_m2 != 0 && opts->rtsc_m2 > opts->ulsc_m2) {
794 			warnx("upper-limit sc smaller than real-time sc");
795 			goto err_ret;
796 		}
797 	}
798 
799 	gsc_destroy(&rtsc);
800 	gsc_destroy(&lssc);
801 
802 	return (0);
803 
804 err_ret:
805 	gsc_destroy(&rtsc);
806 	gsc_destroy(&lssc);
807 	return (-1);
808 }
809 
810 static int
811 check_commit_hfsc(int dev, int opts, struct pf_altq *pa)
812 {
813 	struct pf_altq	*altq, *def = NULL;
814 	int		 default_class;
815 	int		 error = 0;
816 
817 	/* check if hfsc has one default queue for this interface */
818 	default_class = 0;
819 	TAILQ_FOREACH(altq, &altqs, entries) {
820 		if (strncmp(altq->ifname, pa->ifname, IFNAMSIZ) != 0)
821 			continue;
822 		if (altq->qname[0] == 0)  /* this is for interface */
823 			continue;
824 		if (altq->parent[0] == 0)  /* dummy root */
825 			continue;
826 		if (altq->pq_u.hfsc_opts.flags & HFCF_DEFAULTCLASS) {
827 			default_class++;
828 			def = altq;
829 		}
830 	}
831 	if (default_class != 1) {
832 		warnx("should have one default queue on %s", pa->ifname);
833 		return (1);
834 	}
835 	/* make sure the default queue is a leaf */
836 	TAILQ_FOREACH(altq, &altqs, entries) {
837 		if (strncmp(altq->ifname, pa->ifname, IFNAMSIZ) != 0)
838 			continue;
839 		if (altq->qname[0] == 0)  /* this is for interface */
840 			continue;
841 		if (strncmp(altq->parent, def->qname, PF_QNAME_SIZE) == 0) {
842 			warnx("default queue is not a leaf");
843 			error++;
844 		}
845 	}
846 	return (error);
847 }
848 
849 static int
850 print_hfsc_opts(const struct pf_altq *a, const struct node_queue_opt *qopts)
851 {
852 	const struct hfsc_opts		*opts;
853 	const struct node_hfsc_sc	*rtsc, *lssc, *ulsc;
854 
855 	opts = &a->pq_u.hfsc_opts;
856 	if (qopts == NULL)
857 		rtsc = lssc = ulsc = NULL;
858 	else {
859 		rtsc = &qopts->data.hfsc_opts.realtime;
860 		lssc = &qopts->data.hfsc_opts.linkshare;
861 		ulsc = &qopts->data.hfsc_opts.upperlimit;
862 	}
863 
864 	if (opts->flags || opts->rtsc_m2 != 0 || opts->ulsc_m2 != 0 ||
865 	    (opts->lssc_m2 != 0 && (opts->lssc_m2 != a->bandwidth ||
866 	    opts->lssc_d != 0))) {
867 		printf("hfsc(");
868 		if (opts->flags & HFCF_RED)
869 			printf(" red");
870 		if (opts->flags & HFCF_ECN)
871 			printf(" ecn");
872 		if (opts->flags & HFCF_RIO)
873 			printf(" rio");
874 		if (opts->flags & HFCF_CLEARDSCP)
875 			printf(" cleardscp");
876 		if (opts->flags & HFCF_DEFAULTCLASS)
877 			printf(" default");
878 		if (opts->rtsc_m2 != 0)
879 			print_hfsc_sc("realtime", opts->rtsc_m1, opts->rtsc_d,
880 			    opts->rtsc_m2, rtsc);
881 		if (opts->lssc_m2 != 0 && (opts->lssc_m2 != a->bandwidth ||
882 		    opts->lssc_d != 0))
883 			print_hfsc_sc("linkshare", opts->lssc_m1, opts->lssc_d,
884 			    opts->lssc_m2, lssc);
885 		if (opts->ulsc_m2 != 0)
886 			print_hfsc_sc("upperlimit", opts->ulsc_m1, opts->ulsc_d,
887 			    opts->ulsc_m2, ulsc);
888 		printf(" ) ");
889 
890 		return (1);
891 	} else
892 		return (0);
893 }
894 
895 /*
896  * admission control using generalized service curve
897  */
898 
899 /* add a new service curve to a generalized service curve */
900 static void
901 gsc_add_sc(struct gen_sc *gsc, struct service_curve *sc)
902 {
903 	if (is_sc_null(sc))
904 		return;
905 	if (sc->d != 0)
906 		gsc_add_seg(gsc, 0.0, 0.0, (double)sc->d, (double)sc->m1);
907 	gsc_add_seg(gsc, (double)sc->d, 0.0, INFINITY, (double)sc->m2);
908 }
909 
910 /*
911  * check whether all points of a generalized service curve have
912  * their y-coordinates no larger than a given two-piece linear
913  * service curve.
914  */
915 static int
916 is_gsc_under_sc(struct gen_sc *gsc, struct service_curve *sc)
917 {
918 	struct segment	*s, *last, *end;
919 	double		 y;
920 
921 	if (is_sc_null(sc)) {
922 		if (LIST_EMPTY(gsc))
923 			return (1);
924 		LIST_FOREACH(s, gsc, _next) {
925 			if (s->m != 0)
926 				return (0);
927 		}
928 		return (1);
929 	}
930 	/*
931 	 * gsc has a dummy entry at the end with x = INFINITY.
932 	 * loop through up to this dummy entry.
933 	 */
934 	end = gsc_getentry(gsc, INFINITY);
935 	if (end == NULL)
936 		return (1);
937 	last = NULL;
938 	for (s = LIST_FIRST(gsc); s != end; s = LIST_NEXT(s, _next)) {
939 		if (s->y > sc_x2y(sc, s->x))
940 			return (0);
941 		last = s;
942 	}
943 	/* last now holds the real last segment */
944 	if (last == NULL)
945 		return (1);
946 	if (last->m > sc->m2)
947 		return (0);
948 	if (last->x < sc->d && last->m > sc->m1) {
949 		y = last->y + (sc->d - last->x) * last->m;
950 		if (y > sc_x2y(sc, sc->d))
951 			return (0);
952 	}
953 	return (1);
954 }
955 
956 static void
957 gsc_destroy(struct gen_sc *gsc)
958 {
959 	struct segment	*s;
960 
961 	while ((s = LIST_FIRST(gsc)) != NULL) {
962 		LIST_REMOVE(s, _next);
963 		free(s);
964 	}
965 }
966 
967 /*
968  * return a segment entry starting at x.
969  * if gsc has no entry starting at x, a new entry is created at x.
970  */
971 static struct segment *
972 gsc_getentry(struct gen_sc *gsc, double x)
973 {
974 	struct segment	*new, *prev, *s;
975 
976 	prev = NULL;
977 	LIST_FOREACH(s, gsc, _next) {
978 		if (s->x == x)
979 			return (s);	/* matching entry found */
980 		else if (s->x < x)
981 			prev = s;
982 		else
983 			break;
984 	}
985 
986 	/* we have to create a new entry */
987 	if ((new = calloc(1, sizeof(struct segment))) == NULL)
988 		return (NULL);
989 
990 	new->x = x;
991 	if (x == INFINITY || s == NULL)
992 		new->d = 0;
993 	else if (s->x == INFINITY)
994 		new->d = INFINITY;
995 	else
996 		new->d = s->x - x;
997 	if (prev == NULL) {
998 		/* insert the new entry at the head of the list */
999 		new->y = 0;
1000 		new->m = 0;
1001 		LIST_INSERT_HEAD(gsc, new, _next);
1002 	} else {
1003 		/*
1004 		 * the start point intersects with the segment pointed by
1005 		 * prev.  divide prev into 2 segments
1006 		 */
1007 		if (x == INFINITY) {
1008 			prev->d = INFINITY;
1009 			if (prev->m == 0)
1010 				new->y = prev->y;
1011 			else
1012 				new->y = INFINITY;
1013 		} else {
1014 			prev->d = x - prev->x;
1015 			new->y = prev->d * prev->m + prev->y;
1016 		}
1017 		new->m = prev->m;
1018 		LIST_INSERT_AFTER(prev, new, _next);
1019 	}
1020 	return (new);
1021 }
1022 
1023 /* add a segment to a generalized service curve */
1024 static int
1025 gsc_add_seg(struct gen_sc *gsc, double x, double y, double d, double m)
1026 {
1027 	struct segment	*start, *end, *s;
1028 	double		 x2;
1029 
1030 	if (d == INFINITY)
1031 		x2 = INFINITY;
1032 	else
1033 		x2 = x + d;
1034 	start = gsc_getentry(gsc, x);
1035 	end = gsc_getentry(gsc, x2);
1036 	if (start == NULL || end == NULL)
1037 		return (-1);
1038 
1039 	for (s = start; s != end; s = LIST_NEXT(s, _next)) {
1040 		s->m += m;
1041 		s->y += y + (s->x - x) * m;
1042 	}
1043 
1044 	end = gsc_getentry(gsc, INFINITY);
1045 	for (; s != end; s = LIST_NEXT(s, _next)) {
1046 		s->y += m * d;
1047 	}
1048 
1049 	return (0);
1050 }
1051 
1052 /* get y-projection of a service curve */
1053 static double
1054 sc_x2y(struct service_curve *sc, double x)
1055 {
1056 	double	y;
1057 
1058 	if (x <= (double)sc->d)
1059 		/* y belongs to the 1st segment */
1060 		y = x * (double)sc->m1;
1061 	else
1062 		/* y belongs to the 2nd segment */
1063 		y = (double)sc->d * (double)sc->m1
1064 			+ (x - (double)sc->d) * (double)sc->m2;
1065 	return (y);
1066 }
1067 
1068 /*
1069  * misc utilities
1070  */
1071 #define	R2S_BUFS	8
1072 #define	RATESTR_MAX	16
1073 
1074 char *
1075 rate2str(double rate)
1076 {
1077 	char		*buf;
1078 	static char	 r2sbuf[R2S_BUFS][RATESTR_MAX];  /* ring bufer */
1079 	static int	 idx = 0;
1080 	int		 i;
1081 	static const char unit[] = " KMG";
1082 
1083 	buf = r2sbuf[idx++];
1084 	if (idx == R2S_BUFS)
1085 		idx = 0;
1086 
1087 	for (i = 0; rate >= 1000 && i <= 3; i++)
1088 		rate /= 1000;
1089 
1090 	if ((int)(rate * 100) % 100)
1091 		snprintf(buf, RATESTR_MAX, "%.2f%cb", rate, unit[i]);
1092 	else
1093 		snprintf(buf, RATESTR_MAX, "%d%cb", (int)rate, unit[i]);
1094 
1095 	return (buf);
1096 }
1097 
1098 #ifdef __FreeBSD__
1099 /*
1100  * XXX
1101  * FreeBSD does not have SIOCGIFDATA.
1102  * To emulate this, DIOCGIFSPEED ioctl added to pf.
1103  */
1104 u_int32_t
1105 getifspeed(int pfdev, char *ifname)
1106 {
1107 	struct pf_ifspeed io;
1108 
1109 	bzero(&io, sizeof io);
1110 	if (strlcpy(io.ifname, ifname, IFNAMSIZ) >=
1111 	    sizeof(io.ifname))
1112 		errx(1, "getifspeed: strlcpy");
1113 	if (ioctl(pfdev, DIOCGIFSPEED, &io) == -1)
1114 		err(1, "DIOCGIFSPEED");
1115 	return ((u_int32_t)io.baudrate);
1116 }
1117 #else
1118 u_int32_t
1119 getifspeed(char *ifname)
1120 {
1121 	int		s;
1122 	struct ifreq	ifr;
1123 	struct if_data	ifrdat;
1124 
1125 	if ((s = socket(AF_INET, SOCK_DGRAM, 0)) < 0)
1126 		err(1, "socket");
1127 	bzero(&ifr, sizeof(ifr));
1128 	if (strlcpy(ifr.ifr_name, ifname, sizeof(ifr.ifr_name)) >=
1129 	    sizeof(ifr.ifr_name))
1130 		errx(1, "getifspeed: strlcpy");
1131 	ifr.ifr_data = (caddr_t)&ifrdat;
1132 	if (ioctl(s, SIOCGIFDATA, (caddr_t)&ifr) == -1)
1133 		err(1, "SIOCGIFDATA");
1134 	if (close(s))
1135 		err(1, "close");
1136 	return ((u_int32_t)ifrdat.ifi_baudrate);
1137 }
1138 #endif
1139 
1140 u_long
1141 getifmtu(char *ifname)
1142 {
1143 	int		s;
1144 	struct ifreq	ifr;
1145 
1146 	if ((s = socket(AF_INET, SOCK_DGRAM, 0)) < 0)
1147 		err(1, "socket");
1148 	bzero(&ifr, sizeof(ifr));
1149 	if (strlcpy(ifr.ifr_name, ifname, sizeof(ifr.ifr_name)) >=
1150 	    sizeof(ifr.ifr_name))
1151 		errx(1, "getifmtu: strlcpy");
1152 	if (ioctl(s, SIOCGIFMTU, (caddr_t)&ifr) == -1)
1153 #ifdef __FreeBSD__
1154 		ifr.ifr_mtu = 1500;
1155 #else
1156 		err(1, "SIOCGIFMTU");
1157 #endif
1158 	if (close(s))
1159 		err(1, "close");
1160 	if (ifr.ifr_mtu > 0)
1161 		return (ifr.ifr_mtu);
1162 	else {
1163 		warnx("could not get mtu for %s, assuming 1500", ifname);
1164 		return (1500);
1165 	}
1166 }
1167 
1168 int
1169 eval_queue_opts(struct pf_altq *pa, struct node_queue_opt *opts,
1170     u_int32_t ref_bw)
1171 {
1172 	int	errors = 0;
1173 
1174 	switch (pa->scheduler) {
1175 	case ALTQT_CBQ:
1176 		pa->pq_u.cbq_opts = opts->data.cbq_opts;
1177 		break;
1178 	case ALTQT_PRIQ:
1179 		pa->pq_u.priq_opts = opts->data.priq_opts;
1180 		break;
1181 	case ALTQT_HFSC:
1182 		pa->pq_u.hfsc_opts.flags = opts->data.hfsc_opts.flags;
1183 		if (opts->data.hfsc_opts.linkshare.used) {
1184 			pa->pq_u.hfsc_opts.lssc_m1 =
1185 			    eval_bwspec(&opts->data.hfsc_opts.linkshare.m1,
1186 			    ref_bw);
1187 			pa->pq_u.hfsc_opts.lssc_m2 =
1188 			    eval_bwspec(&opts->data.hfsc_opts.linkshare.m2,
1189 			    ref_bw);
1190 			pa->pq_u.hfsc_opts.lssc_d =
1191 			    opts->data.hfsc_opts.linkshare.d;
1192 		}
1193 		if (opts->data.hfsc_opts.realtime.used) {
1194 			pa->pq_u.hfsc_opts.rtsc_m1 =
1195 			    eval_bwspec(&opts->data.hfsc_opts.realtime.m1,
1196 			    ref_bw);
1197 			pa->pq_u.hfsc_opts.rtsc_m2 =
1198 			    eval_bwspec(&opts->data.hfsc_opts.realtime.m2,
1199 			    ref_bw);
1200 			pa->pq_u.hfsc_opts.rtsc_d =
1201 			    opts->data.hfsc_opts.realtime.d;
1202 		}
1203 		if (opts->data.hfsc_opts.upperlimit.used) {
1204 			pa->pq_u.hfsc_opts.ulsc_m1 =
1205 			    eval_bwspec(&opts->data.hfsc_opts.upperlimit.m1,
1206 			    ref_bw);
1207 			pa->pq_u.hfsc_opts.ulsc_m2 =
1208 			    eval_bwspec(&opts->data.hfsc_opts.upperlimit.m2,
1209 			    ref_bw);
1210 			pa->pq_u.hfsc_opts.ulsc_d =
1211 			    opts->data.hfsc_opts.upperlimit.d;
1212 		}
1213 		break;
1214 	default:
1215 		warnx("eval_queue_opts: unknown scheduler type %u",
1216 		    opts->qtype);
1217 		errors++;
1218 		break;
1219 	}
1220 
1221 	return (errors);
1222 }
1223 
1224 u_int32_t
1225 eval_bwspec(struct node_queue_bw *bw, u_int32_t ref_bw)
1226 {
1227 	if (bw->bw_absolute > 0)
1228 		return (bw->bw_absolute);
1229 
1230 	if (bw->bw_percent > 0)
1231 		return (ref_bw / 100 * bw->bw_percent);
1232 
1233 	return (0);
1234 }
1235 
1236 void
1237 print_hfsc_sc(const char *scname, u_int m1, u_int d, u_int m2,
1238     const struct node_hfsc_sc *sc)
1239 {
1240 	printf(" %s", scname);
1241 
1242 	if (d != 0) {
1243 		printf("(");
1244 		if (sc != NULL && sc->m1.bw_percent > 0)
1245 			printf("%u%%", sc->m1.bw_percent);
1246 		else
1247 			printf("%s", rate2str((double)m1));
1248 		printf(" %u", d);
1249 	}
1250 
1251 	if (sc != NULL && sc->m2.bw_percent > 0)
1252 		printf(" %u%%", sc->m2.bw_percent);
1253 	else
1254 		printf(" %s", rate2str((double)m2));
1255 
1256 	if (d != 0)
1257 		printf(")");
1258 }
1259