1 /*- 2 * Copyright (C) 1997-2003 3 * Sony Computer Science Laboratories Inc. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 * 26 * $KAME: altq_subr.c,v 1.21 2003/11/06 06:32:53 kjc Exp $ 27 * $FreeBSD$ 28 */ 29 30 #include "opt_altq.h" 31 #include "opt_inet.h" 32 #include "opt_inet6.h" 33 34 #include <sys/param.h> 35 #include <sys/malloc.h> 36 #include <sys/mbuf.h> 37 #include <sys/systm.h> 38 #include <sys/proc.h> 39 #include <sys/socket.h> 40 #include <sys/socketvar.h> 41 #include <sys/kernel.h> 42 #include <sys/errno.h> 43 #include <sys/syslog.h> 44 #include <sys/sysctl.h> 45 #include <sys/queue.h> 46 47 #include <net/if.h> 48 #include <net/if_var.h> 49 #include <net/if_private.h> 50 #include <net/if_dl.h> 51 #include <net/if_types.h> 52 #include <net/vnet.h> 53 54 #include <netinet/in.h> 55 #include <netinet/in_systm.h> 56 #include <netinet/ip.h> 57 #ifdef INET6 58 #include <netinet/ip6.h> 59 #endif 60 #include <netinet/tcp.h> 61 #include <netinet/udp.h> 62 63 #include <netpfil/pf/pf.h> 64 #include <netpfil/pf/pf_altq.h> 65 #include <net/altq/altq.h> 66 67 /* machine dependent clock related includes */ 68 #include <sys/bus.h> 69 #include <sys/cpu.h> 70 #include <sys/eventhandler.h> 71 #include <machine/clock.h> 72 #if defined(__amd64__) || defined(__i386__) 73 #include <machine/cpufunc.h> /* for pentium tsc */ 74 #include <machine/specialreg.h> /* for CPUID_TSC */ 75 #include <machine/md_var.h> /* for cpu_feature */ 76 #endif /* __amd64 || __i386__ */ 77 78 /* 79 * internal function prototypes 80 */ 81 static void tbr_timeout(void *); 82 static struct mbuf *tbr_dequeue(struct ifaltq *, int); 83 static int tbr_timer = 0; /* token bucket regulator timer */ 84 static struct callout tbr_callout; 85 86 #ifdef ALTQ3_CLFIER_COMPAT 87 static int extract_ports4(struct mbuf *, struct ip *, struct flowinfo_in *); 88 #ifdef INET6 89 static int extract_ports6(struct mbuf *, struct ip6_hdr *, 90 struct flowinfo_in6 *); 91 #endif 92 static int apply_filter4(u_int32_t, struct flow_filter *, 93 struct flowinfo_in *); 94 static int apply_ppfilter4(u_int32_t, struct flow_filter *, 95 struct flowinfo_in *); 96 #ifdef INET6 97 static int apply_filter6(u_int32_t, struct flow_filter6 *, 98 struct flowinfo_in6 *); 99 #endif 100 static int apply_tosfilter4(u_int32_t, struct flow_filter *, 101 struct flowinfo_in *); 102 static u_long get_filt_handle(struct acc_classifier *, int); 103 static struct acc_filter *filth_to_filtp(struct acc_classifier *, u_long); 104 static u_int32_t filt2fibmask(struct flow_filter *); 105 106 static void ip4f_cache(struct ip *, struct flowinfo_in *); 107 static int ip4f_lookup(struct ip *, struct flowinfo_in *); 108 static int ip4f_init(void); 109 static struct ip4_frag *ip4f_alloc(void); 110 static void ip4f_free(struct ip4_frag *); 111 #endif /* ALTQ3_CLFIER_COMPAT */ 112 113 #ifdef ALTQ 114 SYSCTL_NODE(_kern_features, OID_AUTO, altq, CTLFLAG_RD | CTLFLAG_CAPRD, 0, 115 "ALTQ packet queuing"); 116 117 #define ALTQ_FEATURE(name, desc) \ 118 SYSCTL_INT_WITH_LABEL(_kern_features_altq, OID_AUTO, name, \ 119 CTLFLAG_RD | CTLFLAG_CAPRD, SYSCTL_NULL_INT_PTR, 1, \ 120 desc, "feature") 121 122 #ifdef ALTQ_CBQ 123 ALTQ_FEATURE(cbq, "ALTQ Class Based Queuing discipline"); 124 #endif 125 #ifdef ALTQ_CODEL 126 ALTQ_FEATURE(codel, "ALTQ Controlled Delay discipline"); 127 #endif 128 #ifdef ALTQ_RED 129 ALTQ_FEATURE(red, "ALTQ Random Early Detection discipline"); 130 #endif 131 #ifdef ALTQ_RIO 132 ALTQ_FEATURE(rio, "ALTQ Random Early Drop discipline"); 133 #endif 134 #ifdef ALTQ_HFSC 135 ALTQ_FEATURE(hfsc, "ALTQ Hierarchical Packet Scheduler discipline"); 136 #endif 137 #ifdef ALTQ_PRIQ 138 ALTQ_FEATURE(priq, "ATLQ Priority Queuing discipline"); 139 #endif 140 #ifdef ALTQ_FAIRQ 141 ALTQ_FEATURE(fairq, "ALTQ Fair Queuing discipline"); 142 #endif 143 #endif 144 145 /* 146 * alternate queueing support routines 147 */ 148 149 /* look up the queue state by the interface name and the queueing type. */ 150 void * 151 altq_lookup(name, type) 152 char *name; 153 int type; 154 { 155 struct ifnet *ifp; 156 157 if ((ifp = ifunit(name)) != NULL) { 158 /* read if_snd unlocked */ 159 if (type != ALTQT_NONE && ifp->if_snd.altq_type == type) 160 return (ifp->if_snd.altq_disc); 161 } 162 163 return NULL; 164 } 165 166 int 167 altq_attach(ifq, type, discipline, enqueue, dequeue, request) 168 struct ifaltq *ifq; 169 int type; 170 void *discipline; 171 int (*enqueue)(struct ifaltq *, struct mbuf *, struct altq_pktattr *); 172 struct mbuf *(*dequeue)(struct ifaltq *, int); 173 int (*request)(struct ifaltq *, int, void *); 174 { 175 IFQ_LOCK(ifq); 176 if (!ALTQ_IS_READY(ifq)) { 177 IFQ_UNLOCK(ifq); 178 return ENXIO; 179 } 180 181 ifq->altq_type = type; 182 ifq->altq_disc = discipline; 183 ifq->altq_enqueue = enqueue; 184 ifq->altq_dequeue = dequeue; 185 ifq->altq_request = request; 186 ifq->altq_flags &= (ALTQF_CANTCHANGE|ALTQF_ENABLED); 187 IFQ_UNLOCK(ifq); 188 return 0; 189 } 190 191 int 192 altq_detach(ifq) 193 struct ifaltq *ifq; 194 { 195 IFQ_LOCK(ifq); 196 197 if (!ALTQ_IS_READY(ifq)) { 198 IFQ_UNLOCK(ifq); 199 return ENXIO; 200 } 201 if (ALTQ_IS_ENABLED(ifq)) { 202 IFQ_UNLOCK(ifq); 203 return EBUSY; 204 } 205 if (!ALTQ_IS_ATTACHED(ifq)) { 206 IFQ_UNLOCK(ifq); 207 return (0); 208 } 209 210 ifq->altq_type = ALTQT_NONE; 211 ifq->altq_disc = NULL; 212 ifq->altq_enqueue = NULL; 213 ifq->altq_dequeue = NULL; 214 ifq->altq_request = NULL; 215 ifq->altq_flags &= ALTQF_CANTCHANGE; 216 217 IFQ_UNLOCK(ifq); 218 return 0; 219 } 220 221 int 222 altq_enable(ifq) 223 struct ifaltq *ifq; 224 { 225 int s; 226 227 IFQ_LOCK(ifq); 228 229 if (!ALTQ_IS_READY(ifq)) { 230 IFQ_UNLOCK(ifq); 231 return ENXIO; 232 } 233 if (ALTQ_IS_ENABLED(ifq)) { 234 IFQ_UNLOCK(ifq); 235 return 0; 236 } 237 238 s = splnet(); 239 IFQ_PURGE_NOLOCK(ifq); 240 ASSERT(ifq->ifq_len == 0); 241 ifq->ifq_drv_maxlen = 0; /* disable bulk dequeue */ 242 ifq->altq_flags |= ALTQF_ENABLED; 243 splx(s); 244 245 IFQ_UNLOCK(ifq); 246 return 0; 247 } 248 249 int 250 altq_disable(ifq) 251 struct ifaltq *ifq; 252 { 253 int s; 254 255 IFQ_LOCK(ifq); 256 if (!ALTQ_IS_ENABLED(ifq)) { 257 IFQ_UNLOCK(ifq); 258 return 0; 259 } 260 261 s = splnet(); 262 IFQ_PURGE_NOLOCK(ifq); 263 ASSERT(ifq->ifq_len == 0); 264 ifq->altq_flags &= ~(ALTQF_ENABLED); 265 splx(s); 266 267 IFQ_UNLOCK(ifq); 268 return 0; 269 } 270 271 #ifdef ALTQ_DEBUG 272 void 273 altq_assert(file, line, failedexpr) 274 const char *file, *failedexpr; 275 int line; 276 { 277 (void)printf("altq assertion \"%s\" failed: file \"%s\", line %d\n", 278 failedexpr, file, line); 279 panic("altq assertion"); 280 /* NOTREACHED */ 281 } 282 #endif 283 284 /* 285 * internal representation of token bucket parameters 286 * rate: (byte_per_unittime << TBR_SHIFT) / machclk_freq 287 * (((bits_per_sec) / 8) << TBR_SHIFT) / machclk_freq 288 * depth: byte << TBR_SHIFT 289 * 290 */ 291 #define TBR_SHIFT 29 292 #define TBR_SCALE(x) ((int64_t)(x) << TBR_SHIFT) 293 #define TBR_UNSCALE(x) ((x) >> TBR_SHIFT) 294 295 static struct mbuf * 296 tbr_dequeue(ifq, op) 297 struct ifaltq *ifq; 298 int op; 299 { 300 struct tb_regulator *tbr; 301 struct mbuf *m; 302 int64_t interval; 303 u_int64_t now; 304 305 IFQ_LOCK_ASSERT(ifq); 306 tbr = ifq->altq_tbr; 307 if (op == ALTDQ_REMOVE && tbr->tbr_lastop == ALTDQ_POLL) { 308 /* if this is a remove after poll, bypass tbr check */ 309 } else { 310 /* update token only when it is negative */ 311 if (tbr->tbr_token <= 0) { 312 now = read_machclk(); 313 interval = now - tbr->tbr_last; 314 if (interval >= tbr->tbr_filluptime) 315 tbr->tbr_token = tbr->tbr_depth; 316 else { 317 tbr->tbr_token += interval * tbr->tbr_rate; 318 if (tbr->tbr_token > tbr->tbr_depth) 319 tbr->tbr_token = tbr->tbr_depth; 320 } 321 tbr->tbr_last = now; 322 } 323 /* if token is still negative, don't allow dequeue */ 324 if (tbr->tbr_token <= 0) 325 return (NULL); 326 } 327 328 if (ALTQ_IS_ENABLED(ifq)) 329 m = (*ifq->altq_dequeue)(ifq, op); 330 else { 331 if (op == ALTDQ_POLL) 332 _IF_POLL(ifq, m); 333 else 334 _IF_DEQUEUE(ifq, m); 335 } 336 337 if (m != NULL && op == ALTDQ_REMOVE) 338 tbr->tbr_token -= TBR_SCALE(m_pktlen(m)); 339 tbr->tbr_lastop = op; 340 return (m); 341 } 342 343 /* 344 * set a token bucket regulator. 345 * if the specified rate is zero, the token bucket regulator is deleted. 346 */ 347 int 348 tbr_set(ifq, profile) 349 struct ifaltq *ifq; 350 struct tb_profile *profile; 351 { 352 struct tb_regulator *tbr, *otbr; 353 354 if (tbr_dequeue_ptr == NULL) 355 tbr_dequeue_ptr = tbr_dequeue; 356 357 if (machclk_freq == 0) 358 init_machclk(); 359 if (machclk_freq == 0) { 360 printf("tbr_set: no cpu clock available!\n"); 361 return (ENXIO); 362 } 363 364 IFQ_LOCK(ifq); 365 if (profile->rate == 0) { 366 /* delete this tbr */ 367 if ((tbr = ifq->altq_tbr) == NULL) { 368 IFQ_UNLOCK(ifq); 369 return (ENOENT); 370 } 371 ifq->altq_tbr = NULL; 372 free(tbr, M_DEVBUF); 373 IFQ_UNLOCK(ifq); 374 return (0); 375 } 376 377 tbr = malloc(sizeof(struct tb_regulator), M_DEVBUF, M_NOWAIT | M_ZERO); 378 if (tbr == NULL) { 379 IFQ_UNLOCK(ifq); 380 return (ENOMEM); 381 } 382 383 tbr->tbr_rate = TBR_SCALE(profile->rate / 8) / machclk_freq; 384 tbr->tbr_depth = TBR_SCALE(profile->depth); 385 if (tbr->tbr_rate > 0) 386 tbr->tbr_filluptime = tbr->tbr_depth / tbr->tbr_rate; 387 else 388 tbr->tbr_filluptime = LLONG_MAX; 389 /* 390 * The longest time between tbr_dequeue() calls will be about 1 391 * system tick, as the callout that drives it is scheduled once per 392 * tick. The refill-time detection logic in tbr_dequeue() can only 393 * properly detect the passage of up to LLONG_MAX machclk ticks. 394 * Therefore, in order for this logic to function properly in the 395 * extreme case, the maximum value of tbr_filluptime should be 396 * LLONG_MAX less one system tick's worth of machclk ticks less 397 * some additional slop factor (here one more system tick's worth 398 * of machclk ticks). 399 */ 400 if (tbr->tbr_filluptime > (LLONG_MAX - 2 * machclk_per_tick)) 401 tbr->tbr_filluptime = LLONG_MAX - 2 * machclk_per_tick; 402 tbr->tbr_token = tbr->tbr_depth; 403 tbr->tbr_last = read_machclk(); 404 tbr->tbr_lastop = ALTDQ_REMOVE; 405 406 otbr = ifq->altq_tbr; 407 ifq->altq_tbr = tbr; /* set the new tbr */ 408 409 if (otbr != NULL) 410 free(otbr, M_DEVBUF); 411 else { 412 if (tbr_timer == 0) { 413 CALLOUT_RESET(&tbr_callout, 1, tbr_timeout, (void *)0); 414 tbr_timer = 1; 415 } 416 } 417 IFQ_UNLOCK(ifq); 418 return (0); 419 } 420 421 /* 422 * tbr_timeout goes through the interface list, and kicks the drivers 423 * if necessary. 424 * 425 * MPSAFE 426 */ 427 static void 428 tbr_timeout(arg) 429 void *arg; 430 { 431 VNET_ITERATOR_DECL(vnet_iter); 432 struct ifnet *ifp; 433 struct epoch_tracker et; 434 int active; 435 436 active = 0; 437 NET_EPOCH_ENTER(et); 438 VNET_LIST_RLOCK_NOSLEEP(); 439 VNET_FOREACH(vnet_iter) { 440 CURVNET_SET(vnet_iter); 441 for (ifp = CK_STAILQ_FIRST(&V_ifnet); ifp; 442 ifp = CK_STAILQ_NEXT(ifp, if_link)) { 443 /* read from if_snd unlocked */ 444 if (!TBR_IS_ENABLED(&ifp->if_snd)) 445 continue; 446 active++; 447 if (!IFQ_IS_EMPTY(&ifp->if_snd) && 448 ifp->if_start != NULL) 449 (*ifp->if_start)(ifp); 450 } 451 CURVNET_RESTORE(); 452 } 453 VNET_LIST_RUNLOCK_NOSLEEP(); 454 NET_EPOCH_EXIT(et); 455 if (active > 0) 456 CALLOUT_RESET(&tbr_callout, 1, tbr_timeout, (void *)0); 457 else 458 tbr_timer = 0; /* don't need tbr_timer anymore */ 459 } 460 461 /* 462 * attach a discipline to the interface. if one already exists, it is 463 * overridden. 464 * Locking is done in the discipline specific attach functions. Basically 465 * they call back to altq_attach which takes care of the attach and locking. 466 */ 467 int 468 altq_pfattach(struct pf_altq *a) 469 { 470 int error = 0; 471 472 switch (a->scheduler) { 473 case ALTQT_NONE: 474 break; 475 #ifdef ALTQ_CBQ 476 case ALTQT_CBQ: 477 error = cbq_pfattach(a); 478 break; 479 #endif 480 #ifdef ALTQ_PRIQ 481 case ALTQT_PRIQ: 482 error = priq_pfattach(a); 483 break; 484 #endif 485 #ifdef ALTQ_HFSC 486 case ALTQT_HFSC: 487 error = hfsc_pfattach(a); 488 break; 489 #endif 490 #ifdef ALTQ_FAIRQ 491 case ALTQT_FAIRQ: 492 error = fairq_pfattach(a); 493 break; 494 #endif 495 #ifdef ALTQ_CODEL 496 case ALTQT_CODEL: 497 error = codel_pfattach(a); 498 break; 499 #endif 500 default: 501 error = ENXIO; 502 } 503 504 return (error); 505 } 506 507 /* 508 * detach a discipline from the interface. 509 * it is possible that the discipline was already overridden by another 510 * discipline. 511 */ 512 int 513 altq_pfdetach(struct pf_altq *a) 514 { 515 struct ifnet *ifp; 516 int s, error = 0; 517 518 if ((ifp = ifunit(a->ifname)) == NULL) 519 return (EINVAL); 520 521 /* if this discipline is no longer referenced, just return */ 522 /* read unlocked from if_snd */ 523 if (a->altq_disc == NULL || a->altq_disc != ifp->if_snd.altq_disc) 524 return (0); 525 526 s = splnet(); 527 /* read unlocked from if_snd, _disable and _detach take care */ 528 if (ALTQ_IS_ENABLED(&ifp->if_snd)) 529 error = altq_disable(&ifp->if_snd); 530 if (error == 0) 531 error = altq_detach(&ifp->if_snd); 532 splx(s); 533 534 return (error); 535 } 536 537 /* 538 * add a discipline or a queue 539 * Locking is done in the discipline specific functions with regards to 540 * malloc with WAITOK, also it is not yet clear which lock to use. 541 */ 542 int 543 altq_add(struct ifnet *ifp, struct pf_altq *a) 544 { 545 int error = 0; 546 547 if (a->qname[0] != 0) 548 return (altq_add_queue(a)); 549 550 if (machclk_freq == 0) 551 init_machclk(); 552 if (machclk_freq == 0) 553 panic("altq_add: no cpu clock"); 554 555 switch (a->scheduler) { 556 #ifdef ALTQ_CBQ 557 case ALTQT_CBQ: 558 error = cbq_add_altq(ifp, a); 559 break; 560 #endif 561 #ifdef ALTQ_PRIQ 562 case ALTQT_PRIQ: 563 error = priq_add_altq(ifp, a); 564 break; 565 #endif 566 #ifdef ALTQ_HFSC 567 case ALTQT_HFSC: 568 error = hfsc_add_altq(ifp, a); 569 break; 570 #endif 571 #ifdef ALTQ_FAIRQ 572 case ALTQT_FAIRQ: 573 error = fairq_add_altq(ifp, a); 574 break; 575 #endif 576 #ifdef ALTQ_CODEL 577 case ALTQT_CODEL: 578 error = codel_add_altq(ifp, a); 579 break; 580 #endif 581 default: 582 error = ENXIO; 583 } 584 585 return (error); 586 } 587 588 /* 589 * remove a discipline or a queue 590 * It is yet unclear what lock to use to protect this operation, the 591 * discipline specific functions will determine and grab it 592 */ 593 int 594 altq_remove(struct pf_altq *a) 595 { 596 int error = 0; 597 598 if (a->qname[0] != 0) 599 return (altq_remove_queue(a)); 600 601 switch (a->scheduler) { 602 #ifdef ALTQ_CBQ 603 case ALTQT_CBQ: 604 error = cbq_remove_altq(a); 605 break; 606 #endif 607 #ifdef ALTQ_PRIQ 608 case ALTQT_PRIQ: 609 error = priq_remove_altq(a); 610 break; 611 #endif 612 #ifdef ALTQ_HFSC 613 case ALTQT_HFSC: 614 error = hfsc_remove_altq(a); 615 break; 616 #endif 617 #ifdef ALTQ_FAIRQ 618 case ALTQT_FAIRQ: 619 error = fairq_remove_altq(a); 620 break; 621 #endif 622 #ifdef ALTQ_CODEL 623 case ALTQT_CODEL: 624 error = codel_remove_altq(a); 625 break; 626 #endif 627 default: 628 error = ENXIO; 629 } 630 631 return (error); 632 } 633 634 /* 635 * add a queue to the discipline 636 * It is yet unclear what lock to use to protect this operation, the 637 * discipline specific functions will determine and grab it 638 */ 639 int 640 altq_add_queue(struct pf_altq *a) 641 { 642 int error = 0; 643 644 switch (a->scheduler) { 645 #ifdef ALTQ_CBQ 646 case ALTQT_CBQ: 647 error = cbq_add_queue(a); 648 break; 649 #endif 650 #ifdef ALTQ_PRIQ 651 case ALTQT_PRIQ: 652 error = priq_add_queue(a); 653 break; 654 #endif 655 #ifdef ALTQ_HFSC 656 case ALTQT_HFSC: 657 error = hfsc_add_queue(a); 658 break; 659 #endif 660 #ifdef ALTQ_FAIRQ 661 case ALTQT_FAIRQ: 662 error = fairq_add_queue(a); 663 break; 664 #endif 665 default: 666 error = ENXIO; 667 } 668 669 return (error); 670 } 671 672 /* 673 * remove a queue from the discipline 674 * It is yet unclear what lock to use to protect this operation, the 675 * discipline specific functions will determine and grab it 676 */ 677 int 678 altq_remove_queue(struct pf_altq *a) 679 { 680 int error = 0; 681 682 switch (a->scheduler) { 683 #ifdef ALTQ_CBQ 684 case ALTQT_CBQ: 685 error = cbq_remove_queue(a); 686 break; 687 #endif 688 #ifdef ALTQ_PRIQ 689 case ALTQT_PRIQ: 690 error = priq_remove_queue(a); 691 break; 692 #endif 693 #ifdef ALTQ_HFSC 694 case ALTQT_HFSC: 695 error = hfsc_remove_queue(a); 696 break; 697 #endif 698 #ifdef ALTQ_FAIRQ 699 case ALTQT_FAIRQ: 700 error = fairq_remove_queue(a); 701 break; 702 #endif 703 default: 704 error = ENXIO; 705 } 706 707 return (error); 708 } 709 710 /* 711 * get queue statistics 712 * Locking is done in the discipline specific functions with regards to 713 * copyout operations, also it is not yet clear which lock to use. 714 */ 715 int 716 altq_getqstats(struct pf_altq *a, void *ubuf, int *nbytes, int version) 717 { 718 int error = 0; 719 720 switch (a->scheduler) { 721 #ifdef ALTQ_CBQ 722 case ALTQT_CBQ: 723 error = cbq_getqstats(a, ubuf, nbytes, version); 724 break; 725 #endif 726 #ifdef ALTQ_PRIQ 727 case ALTQT_PRIQ: 728 error = priq_getqstats(a, ubuf, nbytes, version); 729 break; 730 #endif 731 #ifdef ALTQ_HFSC 732 case ALTQT_HFSC: 733 error = hfsc_getqstats(a, ubuf, nbytes, version); 734 break; 735 #endif 736 #ifdef ALTQ_FAIRQ 737 case ALTQT_FAIRQ: 738 error = fairq_getqstats(a, ubuf, nbytes, version); 739 break; 740 #endif 741 #ifdef ALTQ_CODEL 742 case ALTQT_CODEL: 743 error = codel_getqstats(a, ubuf, nbytes, version); 744 break; 745 #endif 746 default: 747 error = ENXIO; 748 } 749 750 return (error); 751 } 752 753 /* 754 * read and write diffserv field in IPv4 or IPv6 header 755 */ 756 u_int8_t 757 read_dsfield(m, pktattr) 758 struct mbuf *m; 759 struct altq_pktattr *pktattr; 760 { 761 struct mbuf *m0; 762 u_int8_t ds_field = 0; 763 764 if (pktattr == NULL || 765 (pktattr->pattr_af != AF_INET && pktattr->pattr_af != AF_INET6)) 766 return ((u_int8_t)0); 767 768 /* verify that pattr_hdr is within the mbuf data */ 769 for (m0 = m; m0 != NULL; m0 = m0->m_next) 770 if ((pktattr->pattr_hdr >= m0->m_data) && 771 (pktattr->pattr_hdr < m0->m_data + m0->m_len)) 772 break; 773 if (m0 == NULL) { 774 /* ick, pattr_hdr is stale */ 775 pktattr->pattr_af = AF_UNSPEC; 776 #ifdef ALTQ_DEBUG 777 printf("read_dsfield: can't locate header!\n"); 778 #endif 779 return ((u_int8_t)0); 780 } 781 782 if (pktattr->pattr_af == AF_INET) { 783 struct ip *ip = (struct ip *)pktattr->pattr_hdr; 784 785 if (ip->ip_v != 4) 786 return ((u_int8_t)0); /* version mismatch! */ 787 ds_field = ip->ip_tos; 788 } 789 #ifdef INET6 790 else if (pktattr->pattr_af == AF_INET6) { 791 struct ip6_hdr *ip6 = (struct ip6_hdr *)pktattr->pattr_hdr; 792 u_int32_t flowlabel; 793 794 flowlabel = ntohl(ip6->ip6_flow); 795 if ((flowlabel >> 28) != 6) 796 return ((u_int8_t)0); /* version mismatch! */ 797 ds_field = (flowlabel >> 20) & 0xff; 798 } 799 #endif 800 return (ds_field); 801 } 802 803 void 804 write_dsfield(struct mbuf *m, struct altq_pktattr *pktattr, u_int8_t dsfield) 805 { 806 struct mbuf *m0; 807 808 if (pktattr == NULL || 809 (pktattr->pattr_af != AF_INET && pktattr->pattr_af != AF_INET6)) 810 return; 811 812 /* verify that pattr_hdr is within the mbuf data */ 813 for (m0 = m; m0 != NULL; m0 = m0->m_next) 814 if ((pktattr->pattr_hdr >= m0->m_data) && 815 (pktattr->pattr_hdr < m0->m_data + m0->m_len)) 816 break; 817 if (m0 == NULL) { 818 /* ick, pattr_hdr is stale */ 819 pktattr->pattr_af = AF_UNSPEC; 820 #ifdef ALTQ_DEBUG 821 printf("write_dsfield: can't locate header!\n"); 822 #endif 823 return; 824 } 825 826 if (pktattr->pattr_af == AF_INET) { 827 struct ip *ip = (struct ip *)pktattr->pattr_hdr; 828 u_int8_t old; 829 int32_t sum; 830 831 if (ip->ip_v != 4) 832 return; /* version mismatch! */ 833 old = ip->ip_tos; 834 dsfield |= old & 3; /* leave CU bits */ 835 if (old == dsfield) 836 return; 837 ip->ip_tos = dsfield; 838 /* 839 * update checksum (from RFC1624) 840 * HC' = ~(~HC + ~m + m') 841 */ 842 sum = ~ntohs(ip->ip_sum) & 0xffff; 843 sum += 0xff00 + (~old & 0xff) + dsfield; 844 sum = (sum >> 16) + (sum & 0xffff); 845 sum += (sum >> 16); /* add carry */ 846 847 ip->ip_sum = htons(~sum & 0xffff); 848 } 849 #ifdef INET6 850 else if (pktattr->pattr_af == AF_INET6) { 851 struct ip6_hdr *ip6 = (struct ip6_hdr *)pktattr->pattr_hdr; 852 u_int32_t flowlabel; 853 854 flowlabel = ntohl(ip6->ip6_flow); 855 if ((flowlabel >> 28) != 6) 856 return; /* version mismatch! */ 857 flowlabel = (flowlabel & 0xf03fffff) | (dsfield << 20); 858 ip6->ip6_flow = htonl(flowlabel); 859 } 860 #endif 861 return; 862 } 863 864 /* 865 * high resolution clock support taking advantage of a machine dependent 866 * high resolution time counter (e.g., timestamp counter of intel pentium). 867 * we assume 868 * - 64-bit-long monotonically-increasing counter 869 * - frequency range is 100M-4GHz (CPU speed) 870 */ 871 /* if pcc is not available or disabled, emulate 256MHz using microtime() */ 872 #define MACHCLK_SHIFT 8 873 874 int machclk_usepcc; 875 u_int32_t machclk_freq; 876 u_int32_t machclk_per_tick; 877 878 #if defined(__i386__) && defined(__NetBSD__) 879 extern u_int64_t cpu_tsc_freq; 880 #endif 881 882 /* Update TSC freq with the value indicated by the caller. */ 883 static void 884 tsc_freq_changed(void *arg, const struct cf_level *level, int status) 885 { 886 /* If there was an error during the transition, don't do anything. */ 887 if (status != 0) 888 return; 889 890 #if defined(__amd64__) || defined(__i386__) 891 /* If TSC is P-state invariant, don't do anything. */ 892 if (tsc_is_invariant) 893 return; 894 #endif 895 896 /* Total setting for this level gives the new frequency in MHz. */ 897 init_machclk(); 898 } 899 EVENTHANDLER_DEFINE(cpufreq_post_change, tsc_freq_changed, NULL, 900 EVENTHANDLER_PRI_LAST); 901 902 static void 903 init_machclk_setup(void) 904 { 905 callout_init(&tbr_callout, 1); 906 907 machclk_usepcc = 1; 908 909 #if (!defined(__amd64__) && !defined(__i386__)) || defined(ALTQ_NOPCC) 910 machclk_usepcc = 0; 911 #endif 912 #if defined(__FreeBSD__) && defined(SMP) 913 machclk_usepcc = 0; 914 #endif 915 #if defined(__NetBSD__) && defined(MULTIPROCESSOR) 916 machclk_usepcc = 0; 917 #endif 918 #if defined(__amd64__) || defined(__i386__) 919 /* check if TSC is available */ 920 if ((cpu_feature & CPUID_TSC) == 0 || 921 atomic_load_acq_64(&tsc_freq) == 0) 922 machclk_usepcc = 0; 923 #endif 924 } 925 926 void 927 init_machclk(void) 928 { 929 static int called; 930 931 /* Call one-time initialization function. */ 932 if (!called) { 933 init_machclk_setup(); 934 called = 1; 935 } 936 937 if (machclk_usepcc == 0) { 938 /* emulate 256MHz using microtime() */ 939 machclk_freq = 1000000 << MACHCLK_SHIFT; 940 machclk_per_tick = machclk_freq / hz; 941 #ifdef ALTQ_DEBUG 942 printf("altq: emulate %uHz cpu clock\n", machclk_freq); 943 #endif 944 return; 945 } 946 947 /* 948 * if the clock frequency (of Pentium TSC or Alpha PCC) is 949 * accessible, just use it. 950 */ 951 #if defined(__amd64__) || defined(__i386__) 952 machclk_freq = atomic_load_acq_64(&tsc_freq); 953 #endif 954 955 /* 956 * if we don't know the clock frequency, measure it. 957 */ 958 if (machclk_freq == 0) { 959 static int wait; 960 struct timeval tv_start, tv_end; 961 u_int64_t start, end, diff; 962 int timo; 963 964 microtime(&tv_start); 965 start = read_machclk(); 966 timo = hz; /* 1 sec */ 967 (void)tsleep(&wait, PWAIT | PCATCH, "init_machclk", timo); 968 microtime(&tv_end); 969 end = read_machclk(); 970 diff = (u_int64_t)(tv_end.tv_sec - tv_start.tv_sec) * 1000000 971 + tv_end.tv_usec - tv_start.tv_usec; 972 if (diff != 0) 973 machclk_freq = (u_int)((end - start) * 1000000 / diff); 974 } 975 976 machclk_per_tick = machclk_freq / hz; 977 978 #ifdef ALTQ_DEBUG 979 printf("altq: CPU clock: %uHz\n", machclk_freq); 980 #endif 981 } 982 983 #if defined(__OpenBSD__) && defined(__i386__) 984 static __inline u_int64_t 985 rdtsc(void) 986 { 987 u_int64_t rv; 988 __asm __volatile(".byte 0x0f, 0x31" : "=A" (rv)); 989 return (rv); 990 } 991 #endif /* __OpenBSD__ && __i386__ */ 992 993 u_int64_t 994 read_machclk(void) 995 { 996 u_int64_t val; 997 998 if (machclk_usepcc) { 999 #if defined(__amd64__) || defined(__i386__) 1000 val = rdtsc(); 1001 #else 1002 panic("read_machclk"); 1003 #endif 1004 } else { 1005 struct timeval tv, boottime; 1006 1007 microtime(&tv); 1008 getboottime(&boottime); 1009 val = (((u_int64_t)(tv.tv_sec - boottime.tv_sec) * 1000000 1010 + tv.tv_usec) << MACHCLK_SHIFT); 1011 } 1012 return (val); 1013 } 1014 1015 #ifdef ALTQ3_CLFIER_COMPAT 1016 1017 #ifndef IPPROTO_ESP 1018 #define IPPROTO_ESP 50 /* encapsulating security payload */ 1019 #endif 1020 #ifndef IPPROTO_AH 1021 #define IPPROTO_AH 51 /* authentication header */ 1022 #endif 1023 1024 /* 1025 * extract flow information from a given packet. 1026 * filt_mask shows flowinfo fields required. 1027 * we assume the ip header is in one mbuf, and addresses and ports are 1028 * in network byte order. 1029 */ 1030 int 1031 altq_extractflow(m, af, flow, filt_bmask) 1032 struct mbuf *m; 1033 int af; 1034 struct flowinfo *flow; 1035 u_int32_t filt_bmask; 1036 { 1037 1038 switch (af) { 1039 case PF_INET: { 1040 struct flowinfo_in *fin; 1041 struct ip *ip; 1042 1043 ip = mtod(m, struct ip *); 1044 1045 if (ip->ip_v != 4) 1046 break; 1047 1048 fin = (struct flowinfo_in *)flow; 1049 fin->fi_len = sizeof(struct flowinfo_in); 1050 fin->fi_family = AF_INET; 1051 1052 fin->fi_proto = ip->ip_p; 1053 fin->fi_tos = ip->ip_tos; 1054 1055 fin->fi_src.s_addr = ip->ip_src.s_addr; 1056 fin->fi_dst.s_addr = ip->ip_dst.s_addr; 1057 1058 if (filt_bmask & FIMB4_PORTS) 1059 /* if port info is required, extract port numbers */ 1060 extract_ports4(m, ip, fin); 1061 else { 1062 fin->fi_sport = 0; 1063 fin->fi_dport = 0; 1064 fin->fi_gpi = 0; 1065 } 1066 return (1); 1067 } 1068 1069 #ifdef INET6 1070 case PF_INET6: { 1071 struct flowinfo_in6 *fin6; 1072 struct ip6_hdr *ip6; 1073 1074 ip6 = mtod(m, struct ip6_hdr *); 1075 /* should we check the ip version? */ 1076 1077 fin6 = (struct flowinfo_in6 *)flow; 1078 fin6->fi6_len = sizeof(struct flowinfo_in6); 1079 fin6->fi6_family = AF_INET6; 1080 1081 fin6->fi6_proto = ip6->ip6_nxt; 1082 fin6->fi6_tclass = IPV6_TRAFFIC_CLASS(ip6); 1083 1084 fin6->fi6_flowlabel = ip6->ip6_flow & htonl(0x000fffff); 1085 fin6->fi6_src = ip6->ip6_src; 1086 fin6->fi6_dst = ip6->ip6_dst; 1087 1088 if ((filt_bmask & FIMB6_PORTS) || 1089 ((filt_bmask & FIMB6_PROTO) 1090 && ip6->ip6_nxt > IPPROTO_IPV6)) 1091 /* 1092 * if port info is required, or proto is required 1093 * but there are option headers, extract port 1094 * and protocol numbers. 1095 */ 1096 extract_ports6(m, ip6, fin6); 1097 else { 1098 fin6->fi6_sport = 0; 1099 fin6->fi6_dport = 0; 1100 fin6->fi6_gpi = 0; 1101 } 1102 return (1); 1103 } 1104 #endif /* INET6 */ 1105 1106 default: 1107 break; 1108 } 1109 1110 /* failed */ 1111 flow->fi_len = sizeof(struct flowinfo); 1112 flow->fi_family = AF_UNSPEC; 1113 return (0); 1114 } 1115 1116 /* 1117 * helper routine to extract port numbers 1118 */ 1119 /* structure for ipsec and ipv6 option header template */ 1120 struct _opt6 { 1121 u_int8_t opt6_nxt; /* next header */ 1122 u_int8_t opt6_hlen; /* header extension length */ 1123 u_int16_t _pad; 1124 u_int32_t ah_spi; /* security parameter index 1125 for authentication header */ 1126 }; 1127 1128 /* 1129 * extract port numbers from a ipv4 packet. 1130 */ 1131 static int 1132 extract_ports4(m, ip, fin) 1133 struct mbuf *m; 1134 struct ip *ip; 1135 struct flowinfo_in *fin; 1136 { 1137 struct mbuf *m0; 1138 u_short ip_off; 1139 u_int8_t proto; 1140 int off; 1141 1142 fin->fi_sport = 0; 1143 fin->fi_dport = 0; 1144 fin->fi_gpi = 0; 1145 1146 ip_off = ntohs(ip->ip_off); 1147 /* if it is a fragment, try cached fragment info */ 1148 if (ip_off & IP_OFFMASK) { 1149 ip4f_lookup(ip, fin); 1150 return (1); 1151 } 1152 1153 /* locate the mbuf containing the protocol header */ 1154 for (m0 = m; m0 != NULL; m0 = m0->m_next) 1155 if (((caddr_t)ip >= m0->m_data) && 1156 ((caddr_t)ip < m0->m_data + m0->m_len)) 1157 break; 1158 if (m0 == NULL) { 1159 #ifdef ALTQ_DEBUG 1160 printf("extract_ports4: can't locate header! ip=%p\n", ip); 1161 #endif 1162 return (0); 1163 } 1164 off = ((caddr_t)ip - m0->m_data) + (ip->ip_hl << 2); 1165 proto = ip->ip_p; 1166 1167 #ifdef ALTQ_IPSEC 1168 again: 1169 #endif 1170 while (off >= m0->m_len) { 1171 off -= m0->m_len; 1172 m0 = m0->m_next; 1173 if (m0 == NULL) 1174 return (0); /* bogus ip_hl! */ 1175 } 1176 if (m0->m_len < off + 4) 1177 return (0); 1178 1179 switch (proto) { 1180 case IPPROTO_TCP: 1181 case IPPROTO_UDP: { 1182 struct udphdr *udp; 1183 1184 udp = (struct udphdr *)(mtod(m0, caddr_t) + off); 1185 fin->fi_sport = udp->uh_sport; 1186 fin->fi_dport = udp->uh_dport; 1187 fin->fi_proto = proto; 1188 } 1189 break; 1190 1191 #ifdef ALTQ_IPSEC 1192 case IPPROTO_ESP: 1193 if (fin->fi_gpi == 0){ 1194 u_int32_t *gpi; 1195 1196 gpi = (u_int32_t *)(mtod(m0, caddr_t) + off); 1197 fin->fi_gpi = *gpi; 1198 } 1199 fin->fi_proto = proto; 1200 break; 1201 1202 case IPPROTO_AH: { 1203 /* get next header and header length */ 1204 struct _opt6 *opt6; 1205 1206 opt6 = (struct _opt6 *)(mtod(m0, caddr_t) + off); 1207 proto = opt6->opt6_nxt; 1208 off += 8 + (opt6->opt6_hlen * 4); 1209 if (fin->fi_gpi == 0 && m0->m_len >= off + 8) 1210 fin->fi_gpi = opt6->ah_spi; 1211 } 1212 /* goto the next header */ 1213 goto again; 1214 #endif /* ALTQ_IPSEC */ 1215 1216 default: 1217 fin->fi_proto = proto; 1218 return (0); 1219 } 1220 1221 /* if this is a first fragment, cache it. */ 1222 if (ip_off & IP_MF) 1223 ip4f_cache(ip, fin); 1224 1225 return (1); 1226 } 1227 1228 #ifdef INET6 1229 static int 1230 extract_ports6(m, ip6, fin6) 1231 struct mbuf *m; 1232 struct ip6_hdr *ip6; 1233 struct flowinfo_in6 *fin6; 1234 { 1235 struct mbuf *m0; 1236 int off; 1237 u_int8_t proto; 1238 1239 fin6->fi6_gpi = 0; 1240 fin6->fi6_sport = 0; 1241 fin6->fi6_dport = 0; 1242 1243 /* locate the mbuf containing the protocol header */ 1244 for (m0 = m; m0 != NULL; m0 = m0->m_next) 1245 if (((caddr_t)ip6 >= m0->m_data) && 1246 ((caddr_t)ip6 < m0->m_data + m0->m_len)) 1247 break; 1248 if (m0 == NULL) { 1249 #ifdef ALTQ_DEBUG 1250 printf("extract_ports6: can't locate header! ip6=%p\n", ip6); 1251 #endif 1252 return (0); 1253 } 1254 off = ((caddr_t)ip6 - m0->m_data) + sizeof(struct ip6_hdr); 1255 1256 proto = ip6->ip6_nxt; 1257 do { 1258 while (off >= m0->m_len) { 1259 off -= m0->m_len; 1260 m0 = m0->m_next; 1261 if (m0 == NULL) 1262 return (0); 1263 } 1264 if (m0->m_len < off + 4) 1265 return (0); 1266 1267 switch (proto) { 1268 case IPPROTO_TCP: 1269 case IPPROTO_UDP: { 1270 struct udphdr *udp; 1271 1272 udp = (struct udphdr *)(mtod(m0, caddr_t) + off); 1273 fin6->fi6_sport = udp->uh_sport; 1274 fin6->fi6_dport = udp->uh_dport; 1275 fin6->fi6_proto = proto; 1276 } 1277 return (1); 1278 1279 case IPPROTO_ESP: 1280 if (fin6->fi6_gpi == 0) { 1281 u_int32_t *gpi; 1282 1283 gpi = (u_int32_t *)(mtod(m0, caddr_t) + off); 1284 fin6->fi6_gpi = *gpi; 1285 } 1286 fin6->fi6_proto = proto; 1287 return (1); 1288 1289 case IPPROTO_AH: { 1290 /* get next header and header length */ 1291 struct _opt6 *opt6; 1292 1293 opt6 = (struct _opt6 *)(mtod(m0, caddr_t) + off); 1294 if (fin6->fi6_gpi == 0 && m0->m_len >= off + 8) 1295 fin6->fi6_gpi = opt6->ah_spi; 1296 proto = opt6->opt6_nxt; 1297 off += 8 + (opt6->opt6_hlen * 4); 1298 /* goto the next header */ 1299 break; 1300 } 1301 1302 case IPPROTO_HOPOPTS: 1303 case IPPROTO_ROUTING: 1304 case IPPROTO_DSTOPTS: { 1305 /* get next header and header length */ 1306 struct _opt6 *opt6; 1307 1308 opt6 = (struct _opt6 *)(mtod(m0, caddr_t) + off); 1309 proto = opt6->opt6_nxt; 1310 off += (opt6->opt6_hlen + 1) * 8; 1311 /* goto the next header */ 1312 break; 1313 } 1314 1315 case IPPROTO_FRAGMENT: 1316 /* ipv6 fragmentations are not supported yet */ 1317 default: 1318 fin6->fi6_proto = proto; 1319 return (0); 1320 } 1321 } while (1); 1322 /*NOTREACHED*/ 1323 } 1324 #endif /* INET6 */ 1325 1326 /* 1327 * altq common classifier 1328 */ 1329 int 1330 acc_add_filter(classifier, filter, class, phandle) 1331 struct acc_classifier *classifier; 1332 struct flow_filter *filter; 1333 void *class; 1334 u_long *phandle; 1335 { 1336 struct acc_filter *afp, *prev, *tmp; 1337 int i, s; 1338 1339 #ifdef INET6 1340 if (filter->ff_flow.fi_family != AF_INET && 1341 filter->ff_flow.fi_family != AF_INET6) 1342 return (EINVAL); 1343 #else 1344 if (filter->ff_flow.fi_family != AF_INET) 1345 return (EINVAL); 1346 #endif 1347 1348 afp = malloc(sizeof(struct acc_filter), 1349 M_DEVBUF, M_WAITOK); 1350 if (afp == NULL) 1351 return (ENOMEM); 1352 bzero(afp, sizeof(struct acc_filter)); 1353 1354 afp->f_filter = *filter; 1355 afp->f_class = class; 1356 1357 i = ACC_WILDCARD_INDEX; 1358 if (filter->ff_flow.fi_family == AF_INET) { 1359 struct flow_filter *filter4 = &afp->f_filter; 1360 1361 /* 1362 * if address is 0, it's a wildcard. if address mask 1363 * isn't set, use full mask. 1364 */ 1365 if (filter4->ff_flow.fi_dst.s_addr == 0) 1366 filter4->ff_mask.mask_dst.s_addr = 0; 1367 else if (filter4->ff_mask.mask_dst.s_addr == 0) 1368 filter4->ff_mask.mask_dst.s_addr = 0xffffffff; 1369 if (filter4->ff_flow.fi_src.s_addr == 0) 1370 filter4->ff_mask.mask_src.s_addr = 0; 1371 else if (filter4->ff_mask.mask_src.s_addr == 0) 1372 filter4->ff_mask.mask_src.s_addr = 0xffffffff; 1373 1374 /* clear extra bits in addresses */ 1375 filter4->ff_flow.fi_dst.s_addr &= 1376 filter4->ff_mask.mask_dst.s_addr; 1377 filter4->ff_flow.fi_src.s_addr &= 1378 filter4->ff_mask.mask_src.s_addr; 1379 1380 /* 1381 * if dst address is a wildcard, use hash-entry 1382 * ACC_WILDCARD_INDEX. 1383 */ 1384 if (filter4->ff_mask.mask_dst.s_addr != 0xffffffff) 1385 i = ACC_WILDCARD_INDEX; 1386 else 1387 i = ACC_GET_HASH_INDEX(filter4->ff_flow.fi_dst.s_addr); 1388 } 1389 #ifdef INET6 1390 else if (filter->ff_flow.fi_family == AF_INET6) { 1391 struct flow_filter6 *filter6 = 1392 (struct flow_filter6 *)&afp->f_filter; 1393 #ifndef IN6MASK0 /* taken from kame ipv6 */ 1394 #define IN6MASK0 {{{ 0, 0, 0, 0 }}} 1395 #define IN6MASK128 {{{ 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff }}} 1396 const struct in6_addr in6mask0 = IN6MASK0; 1397 const struct in6_addr in6mask128 = IN6MASK128; 1398 #endif 1399 1400 if (IN6_IS_ADDR_UNSPECIFIED(&filter6->ff_flow6.fi6_dst)) 1401 filter6->ff_mask6.mask6_dst = in6mask0; 1402 else if (IN6_IS_ADDR_UNSPECIFIED(&filter6->ff_mask6.mask6_dst)) 1403 filter6->ff_mask6.mask6_dst = in6mask128; 1404 if (IN6_IS_ADDR_UNSPECIFIED(&filter6->ff_flow6.fi6_src)) 1405 filter6->ff_mask6.mask6_src = in6mask0; 1406 else if (IN6_IS_ADDR_UNSPECIFIED(&filter6->ff_mask6.mask6_src)) 1407 filter6->ff_mask6.mask6_src = in6mask128; 1408 1409 /* clear extra bits in addresses */ 1410 for (i = 0; i < 16; i++) 1411 filter6->ff_flow6.fi6_dst.s6_addr[i] &= 1412 filter6->ff_mask6.mask6_dst.s6_addr[i]; 1413 for (i = 0; i < 16; i++) 1414 filter6->ff_flow6.fi6_src.s6_addr[i] &= 1415 filter6->ff_mask6.mask6_src.s6_addr[i]; 1416 1417 if (filter6->ff_flow6.fi6_flowlabel == 0) 1418 i = ACC_WILDCARD_INDEX; 1419 else 1420 i = ACC_GET_HASH_INDEX(filter6->ff_flow6.fi6_flowlabel); 1421 } 1422 #endif /* INET6 */ 1423 1424 afp->f_handle = get_filt_handle(classifier, i); 1425 1426 /* update filter bitmask */ 1427 afp->f_fbmask = filt2fibmask(filter); 1428 classifier->acc_fbmask |= afp->f_fbmask; 1429 1430 /* 1431 * add this filter to the filter list. 1432 * filters are ordered from the highest rule number. 1433 */ 1434 s = splnet(); 1435 prev = NULL; 1436 LIST_FOREACH(tmp, &classifier->acc_filters[i], f_chain) { 1437 if (tmp->f_filter.ff_ruleno > afp->f_filter.ff_ruleno) 1438 prev = tmp; 1439 else 1440 break; 1441 } 1442 if (prev == NULL) 1443 LIST_INSERT_HEAD(&classifier->acc_filters[i], afp, f_chain); 1444 else 1445 LIST_INSERT_AFTER(prev, afp, f_chain); 1446 splx(s); 1447 1448 *phandle = afp->f_handle; 1449 return (0); 1450 } 1451 1452 int 1453 acc_delete_filter(classifier, handle) 1454 struct acc_classifier *classifier; 1455 u_long handle; 1456 { 1457 struct acc_filter *afp; 1458 int s; 1459 1460 if ((afp = filth_to_filtp(classifier, handle)) == NULL) 1461 return (EINVAL); 1462 1463 s = splnet(); 1464 LIST_REMOVE(afp, f_chain); 1465 splx(s); 1466 1467 free(afp, M_DEVBUF); 1468 1469 /* todo: update filt_bmask */ 1470 1471 return (0); 1472 } 1473 1474 /* 1475 * delete filters referencing to the specified class. 1476 * if the all flag is not 0, delete all the filters. 1477 */ 1478 int 1479 acc_discard_filters(classifier, class, all) 1480 struct acc_classifier *classifier; 1481 void *class; 1482 int all; 1483 { 1484 struct acc_filter *afp; 1485 int i, s; 1486 1487 s = splnet(); 1488 for (i = 0; i < ACC_FILTER_TABLESIZE; i++) { 1489 do { 1490 LIST_FOREACH(afp, &classifier->acc_filters[i], f_chain) 1491 if (all || afp->f_class == class) { 1492 LIST_REMOVE(afp, f_chain); 1493 free(afp, M_DEVBUF); 1494 /* start again from the head */ 1495 break; 1496 } 1497 } while (afp != NULL); 1498 } 1499 splx(s); 1500 1501 if (all) 1502 classifier->acc_fbmask = 0; 1503 1504 return (0); 1505 } 1506 1507 void * 1508 acc_classify(clfier, m, af) 1509 void *clfier; 1510 struct mbuf *m; 1511 int af; 1512 { 1513 struct acc_classifier *classifier; 1514 struct flowinfo flow; 1515 struct acc_filter *afp; 1516 int i; 1517 1518 classifier = (struct acc_classifier *)clfier; 1519 altq_extractflow(m, af, &flow, classifier->acc_fbmask); 1520 1521 if (flow.fi_family == AF_INET) { 1522 struct flowinfo_in *fp = (struct flowinfo_in *)&flow; 1523 1524 if ((classifier->acc_fbmask & FIMB4_ALL) == FIMB4_TOS) { 1525 /* only tos is used */ 1526 LIST_FOREACH(afp, 1527 &classifier->acc_filters[ACC_WILDCARD_INDEX], 1528 f_chain) 1529 if (apply_tosfilter4(afp->f_fbmask, 1530 &afp->f_filter, fp)) 1531 /* filter matched */ 1532 return (afp->f_class); 1533 } else if ((classifier->acc_fbmask & 1534 (~(FIMB4_PROTO|FIMB4_SPORT|FIMB4_DPORT) & FIMB4_ALL)) 1535 == 0) { 1536 /* only proto and ports are used */ 1537 LIST_FOREACH(afp, 1538 &classifier->acc_filters[ACC_WILDCARD_INDEX], 1539 f_chain) 1540 if (apply_ppfilter4(afp->f_fbmask, 1541 &afp->f_filter, fp)) 1542 /* filter matched */ 1543 return (afp->f_class); 1544 } else { 1545 /* get the filter hash entry from its dest address */ 1546 i = ACC_GET_HASH_INDEX(fp->fi_dst.s_addr); 1547 do { 1548 /* 1549 * go through this loop twice. first for dst 1550 * hash, second for wildcards. 1551 */ 1552 LIST_FOREACH(afp, &classifier->acc_filters[i], 1553 f_chain) 1554 if (apply_filter4(afp->f_fbmask, 1555 &afp->f_filter, fp)) 1556 /* filter matched */ 1557 return (afp->f_class); 1558 1559 /* 1560 * check again for filters with a dst addr 1561 * wildcard. 1562 * (daddr == 0 || dmask != 0xffffffff). 1563 */ 1564 if (i != ACC_WILDCARD_INDEX) 1565 i = ACC_WILDCARD_INDEX; 1566 else 1567 break; 1568 } while (1); 1569 } 1570 } 1571 #ifdef INET6 1572 else if (flow.fi_family == AF_INET6) { 1573 struct flowinfo_in6 *fp6 = (struct flowinfo_in6 *)&flow; 1574 1575 /* get the filter hash entry from its flow ID */ 1576 if (fp6->fi6_flowlabel != 0) 1577 i = ACC_GET_HASH_INDEX(fp6->fi6_flowlabel); 1578 else 1579 /* flowlable can be zero */ 1580 i = ACC_WILDCARD_INDEX; 1581 1582 /* go through this loop twice. first for flow hash, second 1583 for wildcards. */ 1584 do { 1585 LIST_FOREACH(afp, &classifier->acc_filters[i], f_chain) 1586 if (apply_filter6(afp->f_fbmask, 1587 (struct flow_filter6 *)&afp->f_filter, 1588 fp6)) 1589 /* filter matched */ 1590 return (afp->f_class); 1591 1592 /* 1593 * check again for filters with a wildcard. 1594 */ 1595 if (i != ACC_WILDCARD_INDEX) 1596 i = ACC_WILDCARD_INDEX; 1597 else 1598 break; 1599 } while (1); 1600 } 1601 #endif /* INET6 */ 1602 1603 /* no filter matched */ 1604 return (NULL); 1605 } 1606 1607 static int 1608 apply_filter4(fbmask, filt, pkt) 1609 u_int32_t fbmask; 1610 struct flow_filter *filt; 1611 struct flowinfo_in *pkt; 1612 { 1613 if (filt->ff_flow.fi_family != AF_INET) 1614 return (0); 1615 if ((fbmask & FIMB4_SPORT) && filt->ff_flow.fi_sport != pkt->fi_sport) 1616 return (0); 1617 if ((fbmask & FIMB4_DPORT) && filt->ff_flow.fi_dport != pkt->fi_dport) 1618 return (0); 1619 if ((fbmask & FIMB4_DADDR) && 1620 filt->ff_flow.fi_dst.s_addr != 1621 (pkt->fi_dst.s_addr & filt->ff_mask.mask_dst.s_addr)) 1622 return (0); 1623 if ((fbmask & FIMB4_SADDR) && 1624 filt->ff_flow.fi_src.s_addr != 1625 (pkt->fi_src.s_addr & filt->ff_mask.mask_src.s_addr)) 1626 return (0); 1627 if ((fbmask & FIMB4_PROTO) && filt->ff_flow.fi_proto != pkt->fi_proto) 1628 return (0); 1629 if ((fbmask & FIMB4_TOS) && filt->ff_flow.fi_tos != 1630 (pkt->fi_tos & filt->ff_mask.mask_tos)) 1631 return (0); 1632 if ((fbmask & FIMB4_GPI) && filt->ff_flow.fi_gpi != (pkt->fi_gpi)) 1633 return (0); 1634 /* match */ 1635 return (1); 1636 } 1637 1638 /* 1639 * filter matching function optimized for a common case that checks 1640 * only protocol and port numbers 1641 */ 1642 static int 1643 apply_ppfilter4(fbmask, filt, pkt) 1644 u_int32_t fbmask; 1645 struct flow_filter *filt; 1646 struct flowinfo_in *pkt; 1647 { 1648 if (filt->ff_flow.fi_family != AF_INET) 1649 return (0); 1650 if ((fbmask & FIMB4_SPORT) && filt->ff_flow.fi_sport != pkt->fi_sport) 1651 return (0); 1652 if ((fbmask & FIMB4_DPORT) && filt->ff_flow.fi_dport != pkt->fi_dport) 1653 return (0); 1654 if ((fbmask & FIMB4_PROTO) && filt->ff_flow.fi_proto != pkt->fi_proto) 1655 return (0); 1656 /* match */ 1657 return (1); 1658 } 1659 1660 /* 1661 * filter matching function only for tos field. 1662 */ 1663 static int 1664 apply_tosfilter4(fbmask, filt, pkt) 1665 u_int32_t fbmask; 1666 struct flow_filter *filt; 1667 struct flowinfo_in *pkt; 1668 { 1669 if (filt->ff_flow.fi_family != AF_INET) 1670 return (0); 1671 if ((fbmask & FIMB4_TOS) && filt->ff_flow.fi_tos != 1672 (pkt->fi_tos & filt->ff_mask.mask_tos)) 1673 return (0); 1674 /* match */ 1675 return (1); 1676 } 1677 1678 #ifdef INET6 1679 static int 1680 apply_filter6(fbmask, filt, pkt) 1681 u_int32_t fbmask; 1682 struct flow_filter6 *filt; 1683 struct flowinfo_in6 *pkt; 1684 { 1685 int i; 1686 1687 if (filt->ff_flow6.fi6_family != AF_INET6) 1688 return (0); 1689 if ((fbmask & FIMB6_FLABEL) && 1690 filt->ff_flow6.fi6_flowlabel != pkt->fi6_flowlabel) 1691 return (0); 1692 if ((fbmask & FIMB6_PROTO) && 1693 filt->ff_flow6.fi6_proto != pkt->fi6_proto) 1694 return (0); 1695 if ((fbmask & FIMB6_SPORT) && 1696 filt->ff_flow6.fi6_sport != pkt->fi6_sport) 1697 return (0); 1698 if ((fbmask & FIMB6_DPORT) && 1699 filt->ff_flow6.fi6_dport != pkt->fi6_dport) 1700 return (0); 1701 if (fbmask & FIMB6_SADDR) { 1702 for (i = 0; i < 4; i++) 1703 if (filt->ff_flow6.fi6_src.s6_addr32[i] != 1704 (pkt->fi6_src.s6_addr32[i] & 1705 filt->ff_mask6.mask6_src.s6_addr32[i])) 1706 return (0); 1707 } 1708 if (fbmask & FIMB6_DADDR) { 1709 for (i = 0; i < 4; i++) 1710 if (filt->ff_flow6.fi6_dst.s6_addr32[i] != 1711 (pkt->fi6_dst.s6_addr32[i] & 1712 filt->ff_mask6.mask6_dst.s6_addr32[i])) 1713 return (0); 1714 } 1715 if ((fbmask & FIMB6_TCLASS) && 1716 filt->ff_flow6.fi6_tclass != 1717 (pkt->fi6_tclass & filt->ff_mask6.mask6_tclass)) 1718 return (0); 1719 if ((fbmask & FIMB6_GPI) && 1720 filt->ff_flow6.fi6_gpi != pkt->fi6_gpi) 1721 return (0); 1722 /* match */ 1723 return (1); 1724 } 1725 #endif /* INET6 */ 1726 1727 /* 1728 * filter handle: 1729 * bit 20-28: index to the filter hash table 1730 * bit 0-19: unique id in the hash bucket. 1731 */ 1732 static u_long 1733 get_filt_handle(classifier, i) 1734 struct acc_classifier *classifier; 1735 int i; 1736 { 1737 static u_long handle_number = 1; 1738 u_long handle; 1739 struct acc_filter *afp; 1740 1741 while (1) { 1742 handle = handle_number++ & 0x000fffff; 1743 1744 if (LIST_EMPTY(&classifier->acc_filters[i])) 1745 break; 1746 1747 LIST_FOREACH(afp, &classifier->acc_filters[i], f_chain) 1748 if ((afp->f_handle & 0x000fffff) == handle) 1749 break; 1750 if (afp == NULL) 1751 break; 1752 /* this handle is already used, try again */ 1753 } 1754 1755 return ((i << 20) | handle); 1756 } 1757 1758 /* convert filter handle to filter pointer */ 1759 static struct acc_filter * 1760 filth_to_filtp(classifier, handle) 1761 struct acc_classifier *classifier; 1762 u_long handle; 1763 { 1764 struct acc_filter *afp; 1765 int i; 1766 1767 i = ACC_GET_HINDEX(handle); 1768 1769 LIST_FOREACH(afp, &classifier->acc_filters[i], f_chain) 1770 if (afp->f_handle == handle) 1771 return (afp); 1772 1773 return (NULL); 1774 } 1775 1776 /* create flowinfo bitmask */ 1777 static u_int32_t 1778 filt2fibmask(filt) 1779 struct flow_filter *filt; 1780 { 1781 u_int32_t mask = 0; 1782 #ifdef INET6 1783 struct flow_filter6 *filt6; 1784 #endif 1785 1786 switch (filt->ff_flow.fi_family) { 1787 case AF_INET: 1788 if (filt->ff_flow.fi_proto != 0) 1789 mask |= FIMB4_PROTO; 1790 if (filt->ff_flow.fi_tos != 0) 1791 mask |= FIMB4_TOS; 1792 if (filt->ff_flow.fi_dst.s_addr != 0) 1793 mask |= FIMB4_DADDR; 1794 if (filt->ff_flow.fi_src.s_addr != 0) 1795 mask |= FIMB4_SADDR; 1796 if (filt->ff_flow.fi_sport != 0) 1797 mask |= FIMB4_SPORT; 1798 if (filt->ff_flow.fi_dport != 0) 1799 mask |= FIMB4_DPORT; 1800 if (filt->ff_flow.fi_gpi != 0) 1801 mask |= FIMB4_GPI; 1802 break; 1803 #ifdef INET6 1804 case AF_INET6: 1805 filt6 = (struct flow_filter6 *)filt; 1806 1807 if (filt6->ff_flow6.fi6_proto != 0) 1808 mask |= FIMB6_PROTO; 1809 if (filt6->ff_flow6.fi6_tclass != 0) 1810 mask |= FIMB6_TCLASS; 1811 if (!IN6_IS_ADDR_UNSPECIFIED(&filt6->ff_flow6.fi6_dst)) 1812 mask |= FIMB6_DADDR; 1813 if (!IN6_IS_ADDR_UNSPECIFIED(&filt6->ff_flow6.fi6_src)) 1814 mask |= FIMB6_SADDR; 1815 if (filt6->ff_flow6.fi6_sport != 0) 1816 mask |= FIMB6_SPORT; 1817 if (filt6->ff_flow6.fi6_dport != 0) 1818 mask |= FIMB6_DPORT; 1819 if (filt6->ff_flow6.fi6_gpi != 0) 1820 mask |= FIMB6_GPI; 1821 if (filt6->ff_flow6.fi6_flowlabel != 0) 1822 mask |= FIMB6_FLABEL; 1823 break; 1824 #endif /* INET6 */ 1825 } 1826 return (mask); 1827 } 1828 1829 /* 1830 * helper functions to handle IPv4 fragments. 1831 * currently only in-sequence fragments are handled. 1832 * - fragment info is cached in a LRU list. 1833 * - when a first fragment is found, cache its flow info. 1834 * - when a non-first fragment is found, lookup the cache. 1835 */ 1836 1837 struct ip4_frag { 1838 TAILQ_ENTRY(ip4_frag) ip4f_chain; 1839 char ip4f_valid; 1840 u_short ip4f_id; 1841 struct flowinfo_in ip4f_info; 1842 }; 1843 1844 static TAILQ_HEAD(ip4f_list, ip4_frag) ip4f_list; /* IPv4 fragment cache */ 1845 1846 #define IP4F_TABSIZE 16 /* IPv4 fragment cache size */ 1847 1848 static void 1849 ip4f_cache(ip, fin) 1850 struct ip *ip; 1851 struct flowinfo_in *fin; 1852 { 1853 struct ip4_frag *fp; 1854 1855 if (TAILQ_EMPTY(&ip4f_list)) { 1856 /* first time call, allocate fragment cache entries. */ 1857 if (ip4f_init() < 0) 1858 /* allocation failed! */ 1859 return; 1860 } 1861 1862 fp = ip4f_alloc(); 1863 fp->ip4f_id = ip->ip_id; 1864 fp->ip4f_info.fi_proto = ip->ip_p; 1865 fp->ip4f_info.fi_src.s_addr = ip->ip_src.s_addr; 1866 fp->ip4f_info.fi_dst.s_addr = ip->ip_dst.s_addr; 1867 1868 /* save port numbers */ 1869 fp->ip4f_info.fi_sport = fin->fi_sport; 1870 fp->ip4f_info.fi_dport = fin->fi_dport; 1871 fp->ip4f_info.fi_gpi = fin->fi_gpi; 1872 } 1873 1874 static int 1875 ip4f_lookup(ip, fin) 1876 struct ip *ip; 1877 struct flowinfo_in *fin; 1878 { 1879 struct ip4_frag *fp; 1880 1881 for (fp = TAILQ_FIRST(&ip4f_list); fp != NULL && fp->ip4f_valid; 1882 fp = TAILQ_NEXT(fp, ip4f_chain)) 1883 if (ip->ip_id == fp->ip4f_id && 1884 ip->ip_src.s_addr == fp->ip4f_info.fi_src.s_addr && 1885 ip->ip_dst.s_addr == fp->ip4f_info.fi_dst.s_addr && 1886 ip->ip_p == fp->ip4f_info.fi_proto) { 1887 /* found the matching entry */ 1888 fin->fi_sport = fp->ip4f_info.fi_sport; 1889 fin->fi_dport = fp->ip4f_info.fi_dport; 1890 fin->fi_gpi = fp->ip4f_info.fi_gpi; 1891 1892 if ((ntohs(ip->ip_off) & IP_MF) == 0) 1893 /* this is the last fragment, 1894 release the entry. */ 1895 ip4f_free(fp); 1896 1897 return (1); 1898 } 1899 1900 /* no matching entry found */ 1901 return (0); 1902 } 1903 1904 static int 1905 ip4f_init(void) 1906 { 1907 struct ip4_frag *fp; 1908 int i; 1909 1910 TAILQ_INIT(&ip4f_list); 1911 for (i=0; i<IP4F_TABSIZE; i++) { 1912 fp = malloc(sizeof(struct ip4_frag), 1913 M_DEVBUF, M_NOWAIT); 1914 if (fp == NULL) { 1915 printf("ip4f_init: can't alloc %dth entry!\n", i); 1916 if (i == 0) 1917 return (-1); 1918 return (0); 1919 } 1920 fp->ip4f_valid = 0; 1921 TAILQ_INSERT_TAIL(&ip4f_list, fp, ip4f_chain); 1922 } 1923 return (0); 1924 } 1925 1926 static struct ip4_frag * 1927 ip4f_alloc(void) 1928 { 1929 struct ip4_frag *fp; 1930 1931 /* reclaim an entry at the tail, put it at the head */ 1932 fp = TAILQ_LAST(&ip4f_list, ip4f_list); 1933 TAILQ_REMOVE(&ip4f_list, fp, ip4f_chain); 1934 fp->ip4f_valid = 1; 1935 TAILQ_INSERT_HEAD(&ip4f_list, fp, ip4f_chain); 1936 return (fp); 1937 } 1938 1939 static void 1940 ip4f_free(fp) 1941 struct ip4_frag *fp; 1942 { 1943 TAILQ_REMOVE(&ip4f_list, fp, ip4f_chain); 1944 fp->ip4f_valid = 0; 1945 TAILQ_INSERT_TAIL(&ip4f_list, fp, ip4f_chain); 1946 } 1947 1948 #endif /* ALTQ3_CLFIER_COMPAT */ 1949