1 /*- 2 * Copyright (C) 1997-2003 3 * Sony Computer Science Laboratories Inc. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 * 26 * $KAME: altq_subr.c,v 1.21 2003/11/06 06:32:53 kjc Exp $ 27 * $FreeBSD$ 28 */ 29 30 #include "opt_altq.h" 31 #include "opt_inet.h" 32 #include "opt_inet6.h" 33 34 #include <sys/param.h> 35 #include <sys/malloc.h> 36 #include <sys/mbuf.h> 37 #include <sys/systm.h> 38 #include <sys/proc.h> 39 #include <sys/socket.h> 40 #include <sys/socketvar.h> 41 #include <sys/kernel.h> 42 #include <sys/errno.h> 43 #include <sys/syslog.h> 44 #include <sys/sysctl.h> 45 #include <sys/queue.h> 46 47 #include <net/if.h> 48 #include <net/if_var.h> 49 #include <net/if_dl.h> 50 #include <net/if_types.h> 51 #include <net/vnet.h> 52 53 #include <netinet/in.h> 54 #include <netinet/in_systm.h> 55 #include <netinet/ip.h> 56 #ifdef INET6 57 #include <netinet/ip6.h> 58 #endif 59 #include <netinet/tcp.h> 60 #include <netinet/udp.h> 61 62 #include <netpfil/pf/pf.h> 63 #include <netpfil/pf/pf_altq.h> 64 #include <net/altq/altq.h> 65 #ifdef ALTQ3_COMPAT 66 #include <net/altq/altq_conf.h> 67 #endif 68 69 /* machine dependent clock related includes */ 70 #include <sys/bus.h> 71 #include <sys/cpu.h> 72 #include <sys/eventhandler.h> 73 #include <machine/clock.h> 74 #if defined(__amd64__) || defined(__i386__) 75 #include <machine/cpufunc.h> /* for pentium tsc */ 76 #include <machine/specialreg.h> /* for CPUID_TSC */ 77 #include <machine/md_var.h> /* for cpu_feature */ 78 #endif /* __amd64 || __i386__ */ 79 80 /* 81 * internal function prototypes 82 */ 83 static void tbr_timeout(void *); 84 int (*altq_input)(struct mbuf *, int) = NULL; 85 static struct mbuf *tbr_dequeue(struct ifaltq *, int); 86 static int tbr_timer = 0; /* token bucket regulator timer */ 87 #if !defined(__FreeBSD__) || (__FreeBSD_version < 600000) 88 static struct callout tbr_callout = CALLOUT_INITIALIZER; 89 #else 90 static struct callout tbr_callout; 91 #endif 92 93 #ifdef ALTQ3_CLFIER_COMPAT 94 static int extract_ports4(struct mbuf *, struct ip *, struct flowinfo_in *); 95 #ifdef INET6 96 static int extract_ports6(struct mbuf *, struct ip6_hdr *, 97 struct flowinfo_in6 *); 98 #endif 99 static int apply_filter4(u_int32_t, struct flow_filter *, 100 struct flowinfo_in *); 101 static int apply_ppfilter4(u_int32_t, struct flow_filter *, 102 struct flowinfo_in *); 103 #ifdef INET6 104 static int apply_filter6(u_int32_t, struct flow_filter6 *, 105 struct flowinfo_in6 *); 106 #endif 107 static int apply_tosfilter4(u_int32_t, struct flow_filter *, 108 struct flowinfo_in *); 109 static u_long get_filt_handle(struct acc_classifier *, int); 110 static struct acc_filter *filth_to_filtp(struct acc_classifier *, u_long); 111 static u_int32_t filt2fibmask(struct flow_filter *); 112 113 static void ip4f_cache(struct ip *, struct flowinfo_in *); 114 static int ip4f_lookup(struct ip *, struct flowinfo_in *); 115 static int ip4f_init(void); 116 static struct ip4_frag *ip4f_alloc(void); 117 static void ip4f_free(struct ip4_frag *); 118 #endif /* ALTQ3_CLFIER_COMPAT */ 119 120 /* 121 * alternate queueing support routines 122 */ 123 124 /* look up the queue state by the interface name and the queueing type. */ 125 void * 126 altq_lookup(name, type) 127 char *name; 128 int type; 129 { 130 struct ifnet *ifp; 131 132 if ((ifp = ifunit(name)) != NULL) { 133 /* read if_snd unlocked */ 134 if (type != ALTQT_NONE && ifp->if_snd.altq_type == type) 135 return (ifp->if_snd.altq_disc); 136 } 137 138 return NULL; 139 } 140 141 int 142 altq_attach(ifq, type, discipline, enqueue, dequeue, request, clfier, classify) 143 struct ifaltq *ifq; 144 int type; 145 void *discipline; 146 int (*enqueue)(struct ifaltq *, struct mbuf *, struct altq_pktattr *); 147 struct mbuf *(*dequeue)(struct ifaltq *, int); 148 int (*request)(struct ifaltq *, int, void *); 149 void *clfier; 150 void *(*classify)(void *, struct mbuf *, int); 151 { 152 IFQ_LOCK(ifq); 153 if (!ALTQ_IS_READY(ifq)) { 154 IFQ_UNLOCK(ifq); 155 return ENXIO; 156 } 157 158 #ifdef ALTQ3_COMPAT 159 /* 160 * pfaltq can override the existing discipline, but altq3 cannot. 161 * check these if clfier is not NULL (which implies altq3). 162 */ 163 if (clfier != NULL) { 164 if (ALTQ_IS_ENABLED(ifq)) { 165 IFQ_UNLOCK(ifq); 166 return EBUSY; 167 } 168 if (ALTQ_IS_ATTACHED(ifq)) { 169 IFQ_UNLOCK(ifq); 170 return EEXIST; 171 } 172 } 173 #endif 174 ifq->altq_type = type; 175 ifq->altq_disc = discipline; 176 ifq->altq_enqueue = enqueue; 177 ifq->altq_dequeue = dequeue; 178 ifq->altq_request = request; 179 ifq->altq_clfier = clfier; 180 ifq->altq_classify = classify; 181 ifq->altq_flags &= (ALTQF_CANTCHANGE|ALTQF_ENABLED); 182 #ifdef ALTQ3_COMPAT 183 #ifdef ALTQ_KLD 184 altq_module_incref(type); 185 #endif 186 #endif 187 IFQ_UNLOCK(ifq); 188 return 0; 189 } 190 191 int 192 altq_detach(ifq) 193 struct ifaltq *ifq; 194 { 195 IFQ_LOCK(ifq); 196 197 if (!ALTQ_IS_READY(ifq)) { 198 IFQ_UNLOCK(ifq); 199 return ENXIO; 200 } 201 if (ALTQ_IS_ENABLED(ifq)) { 202 IFQ_UNLOCK(ifq); 203 return EBUSY; 204 } 205 if (!ALTQ_IS_ATTACHED(ifq)) { 206 IFQ_UNLOCK(ifq); 207 return (0); 208 } 209 #ifdef ALTQ3_COMPAT 210 #ifdef ALTQ_KLD 211 altq_module_declref(ifq->altq_type); 212 #endif 213 #endif 214 215 ifq->altq_type = ALTQT_NONE; 216 ifq->altq_disc = NULL; 217 ifq->altq_enqueue = NULL; 218 ifq->altq_dequeue = NULL; 219 ifq->altq_request = NULL; 220 ifq->altq_clfier = NULL; 221 ifq->altq_classify = NULL; 222 ifq->altq_flags &= ALTQF_CANTCHANGE; 223 224 IFQ_UNLOCK(ifq); 225 return 0; 226 } 227 228 int 229 altq_enable(ifq) 230 struct ifaltq *ifq; 231 { 232 int s; 233 234 IFQ_LOCK(ifq); 235 236 if (!ALTQ_IS_READY(ifq)) { 237 IFQ_UNLOCK(ifq); 238 return ENXIO; 239 } 240 if (ALTQ_IS_ENABLED(ifq)) { 241 IFQ_UNLOCK(ifq); 242 return 0; 243 } 244 245 s = splnet(); 246 IFQ_PURGE_NOLOCK(ifq); 247 ASSERT(ifq->ifq_len == 0); 248 ifq->ifq_drv_maxlen = 0; /* disable bulk dequeue */ 249 ifq->altq_flags |= ALTQF_ENABLED; 250 if (ifq->altq_clfier != NULL) 251 ifq->altq_flags |= ALTQF_CLASSIFY; 252 splx(s); 253 254 IFQ_UNLOCK(ifq); 255 return 0; 256 } 257 258 int 259 altq_disable(ifq) 260 struct ifaltq *ifq; 261 { 262 int s; 263 264 IFQ_LOCK(ifq); 265 if (!ALTQ_IS_ENABLED(ifq)) { 266 IFQ_UNLOCK(ifq); 267 return 0; 268 } 269 270 s = splnet(); 271 IFQ_PURGE_NOLOCK(ifq); 272 ASSERT(ifq->ifq_len == 0); 273 ifq->altq_flags &= ~(ALTQF_ENABLED|ALTQF_CLASSIFY); 274 splx(s); 275 276 IFQ_UNLOCK(ifq); 277 return 0; 278 } 279 280 #ifdef ALTQ_DEBUG 281 void 282 altq_assert(file, line, failedexpr) 283 const char *file, *failedexpr; 284 int line; 285 { 286 (void)printf("altq assertion \"%s\" failed: file \"%s\", line %d\n", 287 failedexpr, file, line); 288 panic("altq assertion"); 289 /* NOTREACHED */ 290 } 291 #endif 292 293 /* 294 * internal representation of token bucket parameters 295 * rate: (byte_per_unittime << TBR_SHIFT) / machclk_freq 296 * (((bits_per_sec) / 8) << TBR_SHIFT) / machclk_freq 297 * depth: byte << TBR_SHIFT 298 * 299 */ 300 #define TBR_SHIFT 29 301 #define TBR_SCALE(x) ((int64_t)(x) << TBR_SHIFT) 302 #define TBR_UNSCALE(x) ((x) >> TBR_SHIFT) 303 304 static struct mbuf * 305 tbr_dequeue(ifq, op) 306 struct ifaltq *ifq; 307 int op; 308 { 309 struct tb_regulator *tbr; 310 struct mbuf *m; 311 int64_t interval; 312 u_int64_t now; 313 314 IFQ_LOCK_ASSERT(ifq); 315 tbr = ifq->altq_tbr; 316 if (op == ALTDQ_REMOVE && tbr->tbr_lastop == ALTDQ_POLL) { 317 /* if this is a remove after poll, bypass tbr check */ 318 } else { 319 /* update token only when it is negative */ 320 if (tbr->tbr_token <= 0) { 321 now = read_machclk(); 322 interval = now - tbr->tbr_last; 323 if (interval >= tbr->tbr_filluptime) 324 tbr->tbr_token = tbr->tbr_depth; 325 else { 326 tbr->tbr_token += interval * tbr->tbr_rate; 327 if (tbr->tbr_token > tbr->tbr_depth) 328 tbr->tbr_token = tbr->tbr_depth; 329 } 330 tbr->tbr_last = now; 331 } 332 /* if token is still negative, don't allow dequeue */ 333 if (tbr->tbr_token <= 0) 334 return (NULL); 335 } 336 337 if (ALTQ_IS_ENABLED(ifq)) 338 m = (*ifq->altq_dequeue)(ifq, op); 339 else { 340 if (op == ALTDQ_POLL) 341 _IF_POLL(ifq, m); 342 else 343 _IF_DEQUEUE(ifq, m); 344 } 345 346 if (m != NULL && op == ALTDQ_REMOVE) 347 tbr->tbr_token -= TBR_SCALE(m_pktlen(m)); 348 tbr->tbr_lastop = op; 349 return (m); 350 } 351 352 /* 353 * set a token bucket regulator. 354 * if the specified rate is zero, the token bucket regulator is deleted. 355 */ 356 int 357 tbr_set(ifq, profile) 358 struct ifaltq *ifq; 359 struct tb_profile *profile; 360 { 361 struct tb_regulator *tbr, *otbr; 362 363 if (tbr_dequeue_ptr == NULL) 364 tbr_dequeue_ptr = tbr_dequeue; 365 366 if (machclk_freq == 0) 367 init_machclk(); 368 if (machclk_freq == 0) { 369 printf("tbr_set: no cpu clock available!\n"); 370 return (ENXIO); 371 } 372 373 IFQ_LOCK(ifq); 374 if (profile->rate == 0) { 375 /* delete this tbr */ 376 if ((tbr = ifq->altq_tbr) == NULL) { 377 IFQ_UNLOCK(ifq); 378 return (ENOENT); 379 } 380 ifq->altq_tbr = NULL; 381 free(tbr, M_DEVBUF); 382 IFQ_UNLOCK(ifq); 383 return (0); 384 } 385 386 tbr = malloc(sizeof(struct tb_regulator), M_DEVBUF, M_NOWAIT | M_ZERO); 387 if (tbr == NULL) { 388 IFQ_UNLOCK(ifq); 389 return (ENOMEM); 390 } 391 392 tbr->tbr_rate = TBR_SCALE(profile->rate / 8) / machclk_freq; 393 tbr->tbr_depth = TBR_SCALE(profile->depth); 394 if (tbr->tbr_rate > 0) 395 tbr->tbr_filluptime = tbr->tbr_depth / tbr->tbr_rate; 396 else 397 tbr->tbr_filluptime = LLONG_MAX; 398 /* 399 * The longest time between tbr_dequeue() calls will be about 1 400 * system tick, as the callout that drives it is scheduled once per 401 * tick. The refill-time detection logic in tbr_dequeue() can only 402 * properly detect the passage of up to LLONG_MAX machclk ticks. 403 * Therefore, in order for this logic to function properly in the 404 * extreme case, the maximum value of tbr_filluptime should be 405 * LLONG_MAX less one system tick's worth of machclk ticks less 406 * some additional slop factor (here one more system tick's worth 407 * of machclk ticks). 408 */ 409 if (tbr->tbr_filluptime > (LLONG_MAX - 2 * machclk_per_tick)) 410 tbr->tbr_filluptime = LLONG_MAX - 2 * machclk_per_tick; 411 tbr->tbr_token = tbr->tbr_depth; 412 tbr->tbr_last = read_machclk(); 413 tbr->tbr_lastop = ALTDQ_REMOVE; 414 415 otbr = ifq->altq_tbr; 416 ifq->altq_tbr = tbr; /* set the new tbr */ 417 418 if (otbr != NULL) 419 free(otbr, M_DEVBUF); 420 else { 421 if (tbr_timer == 0) { 422 CALLOUT_RESET(&tbr_callout, 1, tbr_timeout, (void *)0); 423 tbr_timer = 1; 424 } 425 } 426 IFQ_UNLOCK(ifq); 427 return (0); 428 } 429 430 /* 431 * tbr_timeout goes through the interface list, and kicks the drivers 432 * if necessary. 433 * 434 * MPSAFE 435 */ 436 static void 437 tbr_timeout(arg) 438 void *arg; 439 { 440 VNET_ITERATOR_DECL(vnet_iter); 441 struct ifnet *ifp; 442 int active, s; 443 444 active = 0; 445 s = splnet(); 446 IFNET_RLOCK_NOSLEEP(); 447 VNET_LIST_RLOCK_NOSLEEP(); 448 VNET_FOREACH(vnet_iter) { 449 CURVNET_SET(vnet_iter); 450 for (ifp = CK_STAILQ_FIRST(&V_ifnet); ifp; 451 ifp = CK_STAILQ_NEXT(ifp, if_link)) { 452 /* read from if_snd unlocked */ 453 if (!TBR_IS_ENABLED(&ifp->if_snd)) 454 continue; 455 active++; 456 if (!IFQ_IS_EMPTY(&ifp->if_snd) && 457 ifp->if_start != NULL) 458 (*ifp->if_start)(ifp); 459 } 460 CURVNET_RESTORE(); 461 } 462 VNET_LIST_RUNLOCK_NOSLEEP(); 463 IFNET_RUNLOCK_NOSLEEP(); 464 splx(s); 465 if (active > 0) 466 CALLOUT_RESET(&tbr_callout, 1, tbr_timeout, (void *)0); 467 else 468 tbr_timer = 0; /* don't need tbr_timer anymore */ 469 } 470 471 /* 472 * attach a discipline to the interface. if one already exists, it is 473 * overridden. 474 * Locking is done in the discipline specific attach functions. Basically 475 * they call back to altq_attach which takes care of the attach and locking. 476 */ 477 int 478 altq_pfattach(struct pf_altq *a) 479 { 480 int error = 0; 481 482 switch (a->scheduler) { 483 case ALTQT_NONE: 484 break; 485 #ifdef ALTQ_CBQ 486 case ALTQT_CBQ: 487 error = cbq_pfattach(a); 488 break; 489 #endif 490 #ifdef ALTQ_PRIQ 491 case ALTQT_PRIQ: 492 error = priq_pfattach(a); 493 break; 494 #endif 495 #ifdef ALTQ_HFSC 496 case ALTQT_HFSC: 497 error = hfsc_pfattach(a); 498 break; 499 #endif 500 #ifdef ALTQ_FAIRQ 501 case ALTQT_FAIRQ: 502 error = fairq_pfattach(a); 503 break; 504 #endif 505 #ifdef ALTQ_CODEL 506 case ALTQT_CODEL: 507 error = codel_pfattach(a); 508 break; 509 #endif 510 default: 511 error = ENXIO; 512 } 513 514 return (error); 515 } 516 517 /* 518 * detach a discipline from the interface. 519 * it is possible that the discipline was already overridden by another 520 * discipline. 521 */ 522 int 523 altq_pfdetach(struct pf_altq *a) 524 { 525 struct ifnet *ifp; 526 int s, error = 0; 527 528 if ((ifp = ifunit(a->ifname)) == NULL) 529 return (EINVAL); 530 531 /* if this discipline is no longer referenced, just return */ 532 /* read unlocked from if_snd */ 533 if (a->altq_disc == NULL || a->altq_disc != ifp->if_snd.altq_disc) 534 return (0); 535 536 s = splnet(); 537 /* read unlocked from if_snd, _disable and _detach take care */ 538 if (ALTQ_IS_ENABLED(&ifp->if_snd)) 539 error = altq_disable(&ifp->if_snd); 540 if (error == 0) 541 error = altq_detach(&ifp->if_snd); 542 splx(s); 543 544 return (error); 545 } 546 547 /* 548 * add a discipline or a queue 549 * Locking is done in the discipline specific functions with regards to 550 * malloc with WAITOK, also it is not yet clear which lock to use. 551 */ 552 int 553 altq_add(struct pf_altq *a) 554 { 555 int error = 0; 556 557 if (a->qname[0] != 0) 558 return (altq_add_queue(a)); 559 560 if (machclk_freq == 0) 561 init_machclk(); 562 if (machclk_freq == 0) 563 panic("altq_add: no cpu clock"); 564 565 switch (a->scheduler) { 566 #ifdef ALTQ_CBQ 567 case ALTQT_CBQ: 568 error = cbq_add_altq(a); 569 break; 570 #endif 571 #ifdef ALTQ_PRIQ 572 case ALTQT_PRIQ: 573 error = priq_add_altq(a); 574 break; 575 #endif 576 #ifdef ALTQ_HFSC 577 case ALTQT_HFSC: 578 error = hfsc_add_altq(a); 579 break; 580 #endif 581 #ifdef ALTQ_FAIRQ 582 case ALTQT_FAIRQ: 583 error = fairq_add_altq(a); 584 break; 585 #endif 586 #ifdef ALTQ_CODEL 587 case ALTQT_CODEL: 588 error = codel_add_altq(a); 589 break; 590 #endif 591 default: 592 error = ENXIO; 593 } 594 595 return (error); 596 } 597 598 /* 599 * remove a discipline or a queue 600 * It is yet unclear what lock to use to protect this operation, the 601 * discipline specific functions will determine and grab it 602 */ 603 int 604 altq_remove(struct pf_altq *a) 605 { 606 int error = 0; 607 608 if (a->qname[0] != 0) 609 return (altq_remove_queue(a)); 610 611 switch (a->scheduler) { 612 #ifdef ALTQ_CBQ 613 case ALTQT_CBQ: 614 error = cbq_remove_altq(a); 615 break; 616 #endif 617 #ifdef ALTQ_PRIQ 618 case ALTQT_PRIQ: 619 error = priq_remove_altq(a); 620 break; 621 #endif 622 #ifdef ALTQ_HFSC 623 case ALTQT_HFSC: 624 error = hfsc_remove_altq(a); 625 break; 626 #endif 627 #ifdef ALTQ_FAIRQ 628 case ALTQT_FAIRQ: 629 error = fairq_remove_altq(a); 630 break; 631 #endif 632 #ifdef ALTQ_CODEL 633 case ALTQT_CODEL: 634 error = codel_remove_altq(a); 635 break; 636 #endif 637 default: 638 error = ENXIO; 639 } 640 641 return (error); 642 } 643 644 /* 645 * add a queue to the discipline 646 * It is yet unclear what lock to use to protect this operation, the 647 * discipline specific functions will determine and grab it 648 */ 649 int 650 altq_add_queue(struct pf_altq *a) 651 { 652 int error = 0; 653 654 switch (a->scheduler) { 655 #ifdef ALTQ_CBQ 656 case ALTQT_CBQ: 657 error = cbq_add_queue(a); 658 break; 659 #endif 660 #ifdef ALTQ_PRIQ 661 case ALTQT_PRIQ: 662 error = priq_add_queue(a); 663 break; 664 #endif 665 #ifdef ALTQ_HFSC 666 case ALTQT_HFSC: 667 error = hfsc_add_queue(a); 668 break; 669 #endif 670 #ifdef ALTQ_FAIRQ 671 case ALTQT_FAIRQ: 672 error = fairq_add_queue(a); 673 break; 674 #endif 675 default: 676 error = ENXIO; 677 } 678 679 return (error); 680 } 681 682 /* 683 * remove a queue from the discipline 684 * It is yet unclear what lock to use to protect this operation, the 685 * discipline specific functions will determine and grab it 686 */ 687 int 688 altq_remove_queue(struct pf_altq *a) 689 { 690 int error = 0; 691 692 switch (a->scheduler) { 693 #ifdef ALTQ_CBQ 694 case ALTQT_CBQ: 695 error = cbq_remove_queue(a); 696 break; 697 #endif 698 #ifdef ALTQ_PRIQ 699 case ALTQT_PRIQ: 700 error = priq_remove_queue(a); 701 break; 702 #endif 703 #ifdef ALTQ_HFSC 704 case ALTQT_HFSC: 705 error = hfsc_remove_queue(a); 706 break; 707 #endif 708 #ifdef ALTQ_FAIRQ 709 case ALTQT_FAIRQ: 710 error = fairq_remove_queue(a); 711 break; 712 #endif 713 default: 714 error = ENXIO; 715 } 716 717 return (error); 718 } 719 720 /* 721 * get queue statistics 722 * Locking is done in the discipline specific functions with regards to 723 * copyout operations, also it is not yet clear which lock to use. 724 */ 725 int 726 altq_getqstats(struct pf_altq *a, void *ubuf, int *nbytes, int version) 727 { 728 int error = 0; 729 730 switch (a->scheduler) { 731 #ifdef ALTQ_CBQ 732 case ALTQT_CBQ: 733 error = cbq_getqstats(a, ubuf, nbytes, version); 734 break; 735 #endif 736 #ifdef ALTQ_PRIQ 737 case ALTQT_PRIQ: 738 error = priq_getqstats(a, ubuf, nbytes, version); 739 break; 740 #endif 741 #ifdef ALTQ_HFSC 742 case ALTQT_HFSC: 743 error = hfsc_getqstats(a, ubuf, nbytes, version); 744 break; 745 #endif 746 #ifdef ALTQ_FAIRQ 747 case ALTQT_FAIRQ: 748 error = fairq_getqstats(a, ubuf, nbytes, version); 749 break; 750 #endif 751 #ifdef ALTQ_CODEL 752 case ALTQT_CODEL: 753 error = codel_getqstats(a, ubuf, nbytes, version); 754 break; 755 #endif 756 default: 757 error = ENXIO; 758 } 759 760 return (error); 761 } 762 763 /* 764 * read and write diffserv field in IPv4 or IPv6 header 765 */ 766 u_int8_t 767 read_dsfield(m, pktattr) 768 struct mbuf *m; 769 struct altq_pktattr *pktattr; 770 { 771 struct mbuf *m0; 772 u_int8_t ds_field = 0; 773 774 if (pktattr == NULL || 775 (pktattr->pattr_af != AF_INET && pktattr->pattr_af != AF_INET6)) 776 return ((u_int8_t)0); 777 778 /* verify that pattr_hdr is within the mbuf data */ 779 for (m0 = m; m0 != NULL; m0 = m0->m_next) 780 if ((pktattr->pattr_hdr >= m0->m_data) && 781 (pktattr->pattr_hdr < m0->m_data + m0->m_len)) 782 break; 783 if (m0 == NULL) { 784 /* ick, pattr_hdr is stale */ 785 pktattr->pattr_af = AF_UNSPEC; 786 #ifdef ALTQ_DEBUG 787 printf("read_dsfield: can't locate header!\n"); 788 #endif 789 return ((u_int8_t)0); 790 } 791 792 if (pktattr->pattr_af == AF_INET) { 793 struct ip *ip = (struct ip *)pktattr->pattr_hdr; 794 795 if (ip->ip_v != 4) 796 return ((u_int8_t)0); /* version mismatch! */ 797 ds_field = ip->ip_tos; 798 } 799 #ifdef INET6 800 else if (pktattr->pattr_af == AF_INET6) { 801 struct ip6_hdr *ip6 = (struct ip6_hdr *)pktattr->pattr_hdr; 802 u_int32_t flowlabel; 803 804 flowlabel = ntohl(ip6->ip6_flow); 805 if ((flowlabel >> 28) != 6) 806 return ((u_int8_t)0); /* version mismatch! */ 807 ds_field = (flowlabel >> 20) & 0xff; 808 } 809 #endif 810 return (ds_field); 811 } 812 813 void 814 write_dsfield(struct mbuf *m, struct altq_pktattr *pktattr, u_int8_t dsfield) 815 { 816 struct mbuf *m0; 817 818 if (pktattr == NULL || 819 (pktattr->pattr_af != AF_INET && pktattr->pattr_af != AF_INET6)) 820 return; 821 822 /* verify that pattr_hdr is within the mbuf data */ 823 for (m0 = m; m0 != NULL; m0 = m0->m_next) 824 if ((pktattr->pattr_hdr >= m0->m_data) && 825 (pktattr->pattr_hdr < m0->m_data + m0->m_len)) 826 break; 827 if (m0 == NULL) { 828 /* ick, pattr_hdr is stale */ 829 pktattr->pattr_af = AF_UNSPEC; 830 #ifdef ALTQ_DEBUG 831 printf("write_dsfield: can't locate header!\n"); 832 #endif 833 return; 834 } 835 836 if (pktattr->pattr_af == AF_INET) { 837 struct ip *ip = (struct ip *)pktattr->pattr_hdr; 838 u_int8_t old; 839 int32_t sum; 840 841 if (ip->ip_v != 4) 842 return; /* version mismatch! */ 843 old = ip->ip_tos; 844 dsfield |= old & 3; /* leave CU bits */ 845 if (old == dsfield) 846 return; 847 ip->ip_tos = dsfield; 848 /* 849 * update checksum (from RFC1624) 850 * HC' = ~(~HC + ~m + m') 851 */ 852 sum = ~ntohs(ip->ip_sum) & 0xffff; 853 sum += 0xff00 + (~old & 0xff) + dsfield; 854 sum = (sum >> 16) + (sum & 0xffff); 855 sum += (sum >> 16); /* add carry */ 856 857 ip->ip_sum = htons(~sum & 0xffff); 858 } 859 #ifdef INET6 860 else if (pktattr->pattr_af == AF_INET6) { 861 struct ip6_hdr *ip6 = (struct ip6_hdr *)pktattr->pattr_hdr; 862 u_int32_t flowlabel; 863 864 flowlabel = ntohl(ip6->ip6_flow); 865 if ((flowlabel >> 28) != 6) 866 return; /* version mismatch! */ 867 flowlabel = (flowlabel & 0xf03fffff) | (dsfield << 20); 868 ip6->ip6_flow = htonl(flowlabel); 869 } 870 #endif 871 return; 872 } 873 874 875 /* 876 * high resolution clock support taking advantage of a machine dependent 877 * high resolution time counter (e.g., timestamp counter of intel pentium). 878 * we assume 879 * - 64-bit-long monotonically-increasing counter 880 * - frequency range is 100M-4GHz (CPU speed) 881 */ 882 /* if pcc is not available or disabled, emulate 256MHz using microtime() */ 883 #define MACHCLK_SHIFT 8 884 885 int machclk_usepcc; 886 u_int32_t machclk_freq; 887 u_int32_t machclk_per_tick; 888 889 #if defined(__i386__) && defined(__NetBSD__) 890 extern u_int64_t cpu_tsc_freq; 891 #endif 892 893 #if (__FreeBSD_version >= 700035) 894 /* Update TSC freq with the value indicated by the caller. */ 895 static void 896 tsc_freq_changed(void *arg, const struct cf_level *level, int status) 897 { 898 /* If there was an error during the transition, don't do anything. */ 899 if (status != 0) 900 return; 901 902 #if (__FreeBSD_version >= 701102) && (defined(__amd64__) || defined(__i386__)) 903 /* If TSC is P-state invariant, don't do anything. */ 904 if (tsc_is_invariant) 905 return; 906 #endif 907 908 /* Total setting for this level gives the new frequency in MHz. */ 909 init_machclk(); 910 } 911 EVENTHANDLER_DEFINE(cpufreq_post_change, tsc_freq_changed, NULL, 912 EVENTHANDLER_PRI_LAST); 913 #endif /* __FreeBSD_version >= 700035 */ 914 915 static void 916 init_machclk_setup(void) 917 { 918 #if (__FreeBSD_version >= 600000) 919 callout_init(&tbr_callout, 0); 920 #endif 921 922 machclk_usepcc = 1; 923 924 #if (!defined(__amd64__) && !defined(__i386__)) || defined(ALTQ_NOPCC) 925 machclk_usepcc = 0; 926 #endif 927 #if defined(__FreeBSD__) && defined(SMP) 928 machclk_usepcc = 0; 929 #endif 930 #if defined(__NetBSD__) && defined(MULTIPROCESSOR) 931 machclk_usepcc = 0; 932 #endif 933 #if defined(__amd64__) || defined(__i386__) 934 /* check if TSC is available */ 935 if ((cpu_feature & CPUID_TSC) == 0 || 936 atomic_load_acq_64(&tsc_freq) == 0) 937 machclk_usepcc = 0; 938 #endif 939 } 940 941 void 942 init_machclk(void) 943 { 944 static int called; 945 946 /* Call one-time initialization function. */ 947 if (!called) { 948 init_machclk_setup(); 949 called = 1; 950 } 951 952 if (machclk_usepcc == 0) { 953 /* emulate 256MHz using microtime() */ 954 machclk_freq = 1000000 << MACHCLK_SHIFT; 955 machclk_per_tick = machclk_freq / hz; 956 #ifdef ALTQ_DEBUG 957 printf("altq: emulate %uHz cpu clock\n", machclk_freq); 958 #endif 959 return; 960 } 961 962 /* 963 * if the clock frequency (of Pentium TSC or Alpha PCC) is 964 * accessible, just use it. 965 */ 966 #if defined(__amd64__) || defined(__i386__) 967 machclk_freq = atomic_load_acq_64(&tsc_freq); 968 #endif 969 970 /* 971 * if we don't know the clock frequency, measure it. 972 */ 973 if (machclk_freq == 0) { 974 static int wait; 975 struct timeval tv_start, tv_end; 976 u_int64_t start, end, diff; 977 int timo; 978 979 microtime(&tv_start); 980 start = read_machclk(); 981 timo = hz; /* 1 sec */ 982 (void)tsleep(&wait, PWAIT | PCATCH, "init_machclk", timo); 983 microtime(&tv_end); 984 end = read_machclk(); 985 diff = (u_int64_t)(tv_end.tv_sec - tv_start.tv_sec) * 1000000 986 + tv_end.tv_usec - tv_start.tv_usec; 987 if (diff != 0) 988 machclk_freq = (u_int)((end - start) * 1000000 / diff); 989 } 990 991 machclk_per_tick = machclk_freq / hz; 992 993 #ifdef ALTQ_DEBUG 994 printf("altq: CPU clock: %uHz\n", machclk_freq); 995 #endif 996 } 997 998 #if defined(__OpenBSD__) && defined(__i386__) 999 static __inline u_int64_t 1000 rdtsc(void) 1001 { 1002 u_int64_t rv; 1003 __asm __volatile(".byte 0x0f, 0x31" : "=A" (rv)); 1004 return (rv); 1005 } 1006 #endif /* __OpenBSD__ && __i386__ */ 1007 1008 u_int64_t 1009 read_machclk(void) 1010 { 1011 u_int64_t val; 1012 1013 if (machclk_usepcc) { 1014 #if defined(__amd64__) || defined(__i386__) 1015 val = rdtsc(); 1016 #else 1017 panic("read_machclk"); 1018 #endif 1019 } else { 1020 struct timeval tv, boottime; 1021 1022 microtime(&tv); 1023 getboottime(&boottime); 1024 val = (((u_int64_t)(tv.tv_sec - boottime.tv_sec) * 1000000 1025 + tv.tv_usec) << MACHCLK_SHIFT); 1026 } 1027 return (val); 1028 } 1029 1030 #ifdef ALTQ3_CLFIER_COMPAT 1031 1032 #ifndef IPPROTO_ESP 1033 #define IPPROTO_ESP 50 /* encapsulating security payload */ 1034 #endif 1035 #ifndef IPPROTO_AH 1036 #define IPPROTO_AH 51 /* authentication header */ 1037 #endif 1038 1039 /* 1040 * extract flow information from a given packet. 1041 * filt_mask shows flowinfo fields required. 1042 * we assume the ip header is in one mbuf, and addresses and ports are 1043 * in network byte order. 1044 */ 1045 int 1046 altq_extractflow(m, af, flow, filt_bmask) 1047 struct mbuf *m; 1048 int af; 1049 struct flowinfo *flow; 1050 u_int32_t filt_bmask; 1051 { 1052 1053 switch (af) { 1054 case PF_INET: { 1055 struct flowinfo_in *fin; 1056 struct ip *ip; 1057 1058 ip = mtod(m, struct ip *); 1059 1060 if (ip->ip_v != 4) 1061 break; 1062 1063 fin = (struct flowinfo_in *)flow; 1064 fin->fi_len = sizeof(struct flowinfo_in); 1065 fin->fi_family = AF_INET; 1066 1067 fin->fi_proto = ip->ip_p; 1068 fin->fi_tos = ip->ip_tos; 1069 1070 fin->fi_src.s_addr = ip->ip_src.s_addr; 1071 fin->fi_dst.s_addr = ip->ip_dst.s_addr; 1072 1073 if (filt_bmask & FIMB4_PORTS) 1074 /* if port info is required, extract port numbers */ 1075 extract_ports4(m, ip, fin); 1076 else { 1077 fin->fi_sport = 0; 1078 fin->fi_dport = 0; 1079 fin->fi_gpi = 0; 1080 } 1081 return (1); 1082 } 1083 1084 #ifdef INET6 1085 case PF_INET6: { 1086 struct flowinfo_in6 *fin6; 1087 struct ip6_hdr *ip6; 1088 1089 ip6 = mtod(m, struct ip6_hdr *); 1090 /* should we check the ip version? */ 1091 1092 fin6 = (struct flowinfo_in6 *)flow; 1093 fin6->fi6_len = sizeof(struct flowinfo_in6); 1094 fin6->fi6_family = AF_INET6; 1095 1096 fin6->fi6_proto = ip6->ip6_nxt; 1097 fin6->fi6_tclass = (ntohl(ip6->ip6_flow) >> 20) & 0xff; 1098 1099 fin6->fi6_flowlabel = ip6->ip6_flow & htonl(0x000fffff); 1100 fin6->fi6_src = ip6->ip6_src; 1101 fin6->fi6_dst = ip6->ip6_dst; 1102 1103 if ((filt_bmask & FIMB6_PORTS) || 1104 ((filt_bmask & FIMB6_PROTO) 1105 && ip6->ip6_nxt > IPPROTO_IPV6)) 1106 /* 1107 * if port info is required, or proto is required 1108 * but there are option headers, extract port 1109 * and protocol numbers. 1110 */ 1111 extract_ports6(m, ip6, fin6); 1112 else { 1113 fin6->fi6_sport = 0; 1114 fin6->fi6_dport = 0; 1115 fin6->fi6_gpi = 0; 1116 } 1117 return (1); 1118 } 1119 #endif /* INET6 */ 1120 1121 default: 1122 break; 1123 } 1124 1125 /* failed */ 1126 flow->fi_len = sizeof(struct flowinfo); 1127 flow->fi_family = AF_UNSPEC; 1128 return (0); 1129 } 1130 1131 /* 1132 * helper routine to extract port numbers 1133 */ 1134 /* structure for ipsec and ipv6 option header template */ 1135 struct _opt6 { 1136 u_int8_t opt6_nxt; /* next header */ 1137 u_int8_t opt6_hlen; /* header extension length */ 1138 u_int16_t _pad; 1139 u_int32_t ah_spi; /* security parameter index 1140 for authentication header */ 1141 }; 1142 1143 /* 1144 * extract port numbers from a ipv4 packet. 1145 */ 1146 static int 1147 extract_ports4(m, ip, fin) 1148 struct mbuf *m; 1149 struct ip *ip; 1150 struct flowinfo_in *fin; 1151 { 1152 struct mbuf *m0; 1153 u_short ip_off; 1154 u_int8_t proto; 1155 int off; 1156 1157 fin->fi_sport = 0; 1158 fin->fi_dport = 0; 1159 fin->fi_gpi = 0; 1160 1161 ip_off = ntohs(ip->ip_off); 1162 /* if it is a fragment, try cached fragment info */ 1163 if (ip_off & IP_OFFMASK) { 1164 ip4f_lookup(ip, fin); 1165 return (1); 1166 } 1167 1168 /* locate the mbuf containing the protocol header */ 1169 for (m0 = m; m0 != NULL; m0 = m0->m_next) 1170 if (((caddr_t)ip >= m0->m_data) && 1171 ((caddr_t)ip < m0->m_data + m0->m_len)) 1172 break; 1173 if (m0 == NULL) { 1174 #ifdef ALTQ_DEBUG 1175 printf("extract_ports4: can't locate header! ip=%p\n", ip); 1176 #endif 1177 return (0); 1178 } 1179 off = ((caddr_t)ip - m0->m_data) + (ip->ip_hl << 2); 1180 proto = ip->ip_p; 1181 1182 #ifdef ALTQ_IPSEC 1183 again: 1184 #endif 1185 while (off >= m0->m_len) { 1186 off -= m0->m_len; 1187 m0 = m0->m_next; 1188 if (m0 == NULL) 1189 return (0); /* bogus ip_hl! */ 1190 } 1191 if (m0->m_len < off + 4) 1192 return (0); 1193 1194 switch (proto) { 1195 case IPPROTO_TCP: 1196 case IPPROTO_UDP: { 1197 struct udphdr *udp; 1198 1199 udp = (struct udphdr *)(mtod(m0, caddr_t) + off); 1200 fin->fi_sport = udp->uh_sport; 1201 fin->fi_dport = udp->uh_dport; 1202 fin->fi_proto = proto; 1203 } 1204 break; 1205 1206 #ifdef ALTQ_IPSEC 1207 case IPPROTO_ESP: 1208 if (fin->fi_gpi == 0){ 1209 u_int32_t *gpi; 1210 1211 gpi = (u_int32_t *)(mtod(m0, caddr_t) + off); 1212 fin->fi_gpi = *gpi; 1213 } 1214 fin->fi_proto = proto; 1215 break; 1216 1217 case IPPROTO_AH: { 1218 /* get next header and header length */ 1219 struct _opt6 *opt6; 1220 1221 opt6 = (struct _opt6 *)(mtod(m0, caddr_t) + off); 1222 proto = opt6->opt6_nxt; 1223 off += 8 + (opt6->opt6_hlen * 4); 1224 if (fin->fi_gpi == 0 && m0->m_len >= off + 8) 1225 fin->fi_gpi = opt6->ah_spi; 1226 } 1227 /* goto the next header */ 1228 goto again; 1229 #endif /* ALTQ_IPSEC */ 1230 1231 default: 1232 fin->fi_proto = proto; 1233 return (0); 1234 } 1235 1236 /* if this is a first fragment, cache it. */ 1237 if (ip_off & IP_MF) 1238 ip4f_cache(ip, fin); 1239 1240 return (1); 1241 } 1242 1243 #ifdef INET6 1244 static int 1245 extract_ports6(m, ip6, fin6) 1246 struct mbuf *m; 1247 struct ip6_hdr *ip6; 1248 struct flowinfo_in6 *fin6; 1249 { 1250 struct mbuf *m0; 1251 int off; 1252 u_int8_t proto; 1253 1254 fin6->fi6_gpi = 0; 1255 fin6->fi6_sport = 0; 1256 fin6->fi6_dport = 0; 1257 1258 /* locate the mbuf containing the protocol header */ 1259 for (m0 = m; m0 != NULL; m0 = m0->m_next) 1260 if (((caddr_t)ip6 >= m0->m_data) && 1261 ((caddr_t)ip6 < m0->m_data + m0->m_len)) 1262 break; 1263 if (m0 == NULL) { 1264 #ifdef ALTQ_DEBUG 1265 printf("extract_ports6: can't locate header! ip6=%p\n", ip6); 1266 #endif 1267 return (0); 1268 } 1269 off = ((caddr_t)ip6 - m0->m_data) + sizeof(struct ip6_hdr); 1270 1271 proto = ip6->ip6_nxt; 1272 do { 1273 while (off >= m0->m_len) { 1274 off -= m0->m_len; 1275 m0 = m0->m_next; 1276 if (m0 == NULL) 1277 return (0); 1278 } 1279 if (m0->m_len < off + 4) 1280 return (0); 1281 1282 switch (proto) { 1283 case IPPROTO_TCP: 1284 case IPPROTO_UDP: { 1285 struct udphdr *udp; 1286 1287 udp = (struct udphdr *)(mtod(m0, caddr_t) + off); 1288 fin6->fi6_sport = udp->uh_sport; 1289 fin6->fi6_dport = udp->uh_dport; 1290 fin6->fi6_proto = proto; 1291 } 1292 return (1); 1293 1294 case IPPROTO_ESP: 1295 if (fin6->fi6_gpi == 0) { 1296 u_int32_t *gpi; 1297 1298 gpi = (u_int32_t *)(mtod(m0, caddr_t) + off); 1299 fin6->fi6_gpi = *gpi; 1300 } 1301 fin6->fi6_proto = proto; 1302 return (1); 1303 1304 case IPPROTO_AH: { 1305 /* get next header and header length */ 1306 struct _opt6 *opt6; 1307 1308 opt6 = (struct _opt6 *)(mtod(m0, caddr_t) + off); 1309 if (fin6->fi6_gpi == 0 && m0->m_len >= off + 8) 1310 fin6->fi6_gpi = opt6->ah_spi; 1311 proto = opt6->opt6_nxt; 1312 off += 8 + (opt6->opt6_hlen * 4); 1313 /* goto the next header */ 1314 break; 1315 } 1316 1317 case IPPROTO_HOPOPTS: 1318 case IPPROTO_ROUTING: 1319 case IPPROTO_DSTOPTS: { 1320 /* get next header and header length */ 1321 struct _opt6 *opt6; 1322 1323 opt6 = (struct _opt6 *)(mtod(m0, caddr_t) + off); 1324 proto = opt6->opt6_nxt; 1325 off += (opt6->opt6_hlen + 1) * 8; 1326 /* goto the next header */ 1327 break; 1328 } 1329 1330 case IPPROTO_FRAGMENT: 1331 /* ipv6 fragmentations are not supported yet */ 1332 default: 1333 fin6->fi6_proto = proto; 1334 return (0); 1335 } 1336 } while (1); 1337 /*NOTREACHED*/ 1338 } 1339 #endif /* INET6 */ 1340 1341 /* 1342 * altq common classifier 1343 */ 1344 int 1345 acc_add_filter(classifier, filter, class, phandle) 1346 struct acc_classifier *classifier; 1347 struct flow_filter *filter; 1348 void *class; 1349 u_long *phandle; 1350 { 1351 struct acc_filter *afp, *prev, *tmp; 1352 int i, s; 1353 1354 #ifdef INET6 1355 if (filter->ff_flow.fi_family != AF_INET && 1356 filter->ff_flow.fi_family != AF_INET6) 1357 return (EINVAL); 1358 #else 1359 if (filter->ff_flow.fi_family != AF_INET) 1360 return (EINVAL); 1361 #endif 1362 1363 afp = malloc(sizeof(struct acc_filter), 1364 M_DEVBUF, M_WAITOK); 1365 if (afp == NULL) 1366 return (ENOMEM); 1367 bzero(afp, sizeof(struct acc_filter)); 1368 1369 afp->f_filter = *filter; 1370 afp->f_class = class; 1371 1372 i = ACC_WILDCARD_INDEX; 1373 if (filter->ff_flow.fi_family == AF_INET) { 1374 struct flow_filter *filter4 = &afp->f_filter; 1375 1376 /* 1377 * if address is 0, it's a wildcard. if address mask 1378 * isn't set, use full mask. 1379 */ 1380 if (filter4->ff_flow.fi_dst.s_addr == 0) 1381 filter4->ff_mask.mask_dst.s_addr = 0; 1382 else if (filter4->ff_mask.mask_dst.s_addr == 0) 1383 filter4->ff_mask.mask_dst.s_addr = 0xffffffff; 1384 if (filter4->ff_flow.fi_src.s_addr == 0) 1385 filter4->ff_mask.mask_src.s_addr = 0; 1386 else if (filter4->ff_mask.mask_src.s_addr == 0) 1387 filter4->ff_mask.mask_src.s_addr = 0xffffffff; 1388 1389 /* clear extra bits in addresses */ 1390 filter4->ff_flow.fi_dst.s_addr &= 1391 filter4->ff_mask.mask_dst.s_addr; 1392 filter4->ff_flow.fi_src.s_addr &= 1393 filter4->ff_mask.mask_src.s_addr; 1394 1395 /* 1396 * if dst address is a wildcard, use hash-entry 1397 * ACC_WILDCARD_INDEX. 1398 */ 1399 if (filter4->ff_mask.mask_dst.s_addr != 0xffffffff) 1400 i = ACC_WILDCARD_INDEX; 1401 else 1402 i = ACC_GET_HASH_INDEX(filter4->ff_flow.fi_dst.s_addr); 1403 } 1404 #ifdef INET6 1405 else if (filter->ff_flow.fi_family == AF_INET6) { 1406 struct flow_filter6 *filter6 = 1407 (struct flow_filter6 *)&afp->f_filter; 1408 #ifndef IN6MASK0 /* taken from kame ipv6 */ 1409 #define IN6MASK0 {{{ 0, 0, 0, 0 }}} 1410 #define IN6MASK128 {{{ 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff }}} 1411 const struct in6_addr in6mask0 = IN6MASK0; 1412 const struct in6_addr in6mask128 = IN6MASK128; 1413 #endif 1414 1415 if (IN6_IS_ADDR_UNSPECIFIED(&filter6->ff_flow6.fi6_dst)) 1416 filter6->ff_mask6.mask6_dst = in6mask0; 1417 else if (IN6_IS_ADDR_UNSPECIFIED(&filter6->ff_mask6.mask6_dst)) 1418 filter6->ff_mask6.mask6_dst = in6mask128; 1419 if (IN6_IS_ADDR_UNSPECIFIED(&filter6->ff_flow6.fi6_src)) 1420 filter6->ff_mask6.mask6_src = in6mask0; 1421 else if (IN6_IS_ADDR_UNSPECIFIED(&filter6->ff_mask6.mask6_src)) 1422 filter6->ff_mask6.mask6_src = in6mask128; 1423 1424 /* clear extra bits in addresses */ 1425 for (i = 0; i < 16; i++) 1426 filter6->ff_flow6.fi6_dst.s6_addr[i] &= 1427 filter6->ff_mask6.mask6_dst.s6_addr[i]; 1428 for (i = 0; i < 16; i++) 1429 filter6->ff_flow6.fi6_src.s6_addr[i] &= 1430 filter6->ff_mask6.mask6_src.s6_addr[i]; 1431 1432 if (filter6->ff_flow6.fi6_flowlabel == 0) 1433 i = ACC_WILDCARD_INDEX; 1434 else 1435 i = ACC_GET_HASH_INDEX(filter6->ff_flow6.fi6_flowlabel); 1436 } 1437 #endif /* INET6 */ 1438 1439 afp->f_handle = get_filt_handle(classifier, i); 1440 1441 /* update filter bitmask */ 1442 afp->f_fbmask = filt2fibmask(filter); 1443 classifier->acc_fbmask |= afp->f_fbmask; 1444 1445 /* 1446 * add this filter to the filter list. 1447 * filters are ordered from the highest rule number. 1448 */ 1449 s = splnet(); 1450 prev = NULL; 1451 LIST_FOREACH(tmp, &classifier->acc_filters[i], f_chain) { 1452 if (tmp->f_filter.ff_ruleno > afp->f_filter.ff_ruleno) 1453 prev = tmp; 1454 else 1455 break; 1456 } 1457 if (prev == NULL) 1458 LIST_INSERT_HEAD(&classifier->acc_filters[i], afp, f_chain); 1459 else 1460 LIST_INSERT_AFTER(prev, afp, f_chain); 1461 splx(s); 1462 1463 *phandle = afp->f_handle; 1464 return (0); 1465 } 1466 1467 int 1468 acc_delete_filter(classifier, handle) 1469 struct acc_classifier *classifier; 1470 u_long handle; 1471 { 1472 struct acc_filter *afp; 1473 int s; 1474 1475 if ((afp = filth_to_filtp(classifier, handle)) == NULL) 1476 return (EINVAL); 1477 1478 s = splnet(); 1479 LIST_REMOVE(afp, f_chain); 1480 splx(s); 1481 1482 free(afp, M_DEVBUF); 1483 1484 /* todo: update filt_bmask */ 1485 1486 return (0); 1487 } 1488 1489 /* 1490 * delete filters referencing to the specified class. 1491 * if the all flag is not 0, delete all the filters. 1492 */ 1493 int 1494 acc_discard_filters(classifier, class, all) 1495 struct acc_classifier *classifier; 1496 void *class; 1497 int all; 1498 { 1499 struct acc_filter *afp; 1500 int i, s; 1501 1502 s = splnet(); 1503 for (i = 0; i < ACC_FILTER_TABLESIZE; i++) { 1504 do { 1505 LIST_FOREACH(afp, &classifier->acc_filters[i], f_chain) 1506 if (all || afp->f_class == class) { 1507 LIST_REMOVE(afp, f_chain); 1508 free(afp, M_DEVBUF); 1509 /* start again from the head */ 1510 break; 1511 } 1512 } while (afp != NULL); 1513 } 1514 splx(s); 1515 1516 if (all) 1517 classifier->acc_fbmask = 0; 1518 1519 return (0); 1520 } 1521 1522 void * 1523 acc_classify(clfier, m, af) 1524 void *clfier; 1525 struct mbuf *m; 1526 int af; 1527 { 1528 struct acc_classifier *classifier; 1529 struct flowinfo flow; 1530 struct acc_filter *afp; 1531 int i; 1532 1533 classifier = (struct acc_classifier *)clfier; 1534 altq_extractflow(m, af, &flow, classifier->acc_fbmask); 1535 1536 if (flow.fi_family == AF_INET) { 1537 struct flowinfo_in *fp = (struct flowinfo_in *)&flow; 1538 1539 if ((classifier->acc_fbmask & FIMB4_ALL) == FIMB4_TOS) { 1540 /* only tos is used */ 1541 LIST_FOREACH(afp, 1542 &classifier->acc_filters[ACC_WILDCARD_INDEX], 1543 f_chain) 1544 if (apply_tosfilter4(afp->f_fbmask, 1545 &afp->f_filter, fp)) 1546 /* filter matched */ 1547 return (afp->f_class); 1548 } else if ((classifier->acc_fbmask & 1549 (~(FIMB4_PROTO|FIMB4_SPORT|FIMB4_DPORT) & FIMB4_ALL)) 1550 == 0) { 1551 /* only proto and ports are used */ 1552 LIST_FOREACH(afp, 1553 &classifier->acc_filters[ACC_WILDCARD_INDEX], 1554 f_chain) 1555 if (apply_ppfilter4(afp->f_fbmask, 1556 &afp->f_filter, fp)) 1557 /* filter matched */ 1558 return (afp->f_class); 1559 } else { 1560 /* get the filter hash entry from its dest address */ 1561 i = ACC_GET_HASH_INDEX(fp->fi_dst.s_addr); 1562 do { 1563 /* 1564 * go through this loop twice. first for dst 1565 * hash, second for wildcards. 1566 */ 1567 LIST_FOREACH(afp, &classifier->acc_filters[i], 1568 f_chain) 1569 if (apply_filter4(afp->f_fbmask, 1570 &afp->f_filter, fp)) 1571 /* filter matched */ 1572 return (afp->f_class); 1573 1574 /* 1575 * check again for filters with a dst addr 1576 * wildcard. 1577 * (daddr == 0 || dmask != 0xffffffff). 1578 */ 1579 if (i != ACC_WILDCARD_INDEX) 1580 i = ACC_WILDCARD_INDEX; 1581 else 1582 break; 1583 } while (1); 1584 } 1585 } 1586 #ifdef INET6 1587 else if (flow.fi_family == AF_INET6) { 1588 struct flowinfo_in6 *fp6 = (struct flowinfo_in6 *)&flow; 1589 1590 /* get the filter hash entry from its flow ID */ 1591 if (fp6->fi6_flowlabel != 0) 1592 i = ACC_GET_HASH_INDEX(fp6->fi6_flowlabel); 1593 else 1594 /* flowlable can be zero */ 1595 i = ACC_WILDCARD_INDEX; 1596 1597 /* go through this loop twice. first for flow hash, second 1598 for wildcards. */ 1599 do { 1600 LIST_FOREACH(afp, &classifier->acc_filters[i], f_chain) 1601 if (apply_filter6(afp->f_fbmask, 1602 (struct flow_filter6 *)&afp->f_filter, 1603 fp6)) 1604 /* filter matched */ 1605 return (afp->f_class); 1606 1607 /* 1608 * check again for filters with a wildcard. 1609 */ 1610 if (i != ACC_WILDCARD_INDEX) 1611 i = ACC_WILDCARD_INDEX; 1612 else 1613 break; 1614 } while (1); 1615 } 1616 #endif /* INET6 */ 1617 1618 /* no filter matched */ 1619 return (NULL); 1620 } 1621 1622 static int 1623 apply_filter4(fbmask, filt, pkt) 1624 u_int32_t fbmask; 1625 struct flow_filter *filt; 1626 struct flowinfo_in *pkt; 1627 { 1628 if (filt->ff_flow.fi_family != AF_INET) 1629 return (0); 1630 if ((fbmask & FIMB4_SPORT) && filt->ff_flow.fi_sport != pkt->fi_sport) 1631 return (0); 1632 if ((fbmask & FIMB4_DPORT) && filt->ff_flow.fi_dport != pkt->fi_dport) 1633 return (0); 1634 if ((fbmask & FIMB4_DADDR) && 1635 filt->ff_flow.fi_dst.s_addr != 1636 (pkt->fi_dst.s_addr & filt->ff_mask.mask_dst.s_addr)) 1637 return (0); 1638 if ((fbmask & FIMB4_SADDR) && 1639 filt->ff_flow.fi_src.s_addr != 1640 (pkt->fi_src.s_addr & filt->ff_mask.mask_src.s_addr)) 1641 return (0); 1642 if ((fbmask & FIMB4_PROTO) && filt->ff_flow.fi_proto != pkt->fi_proto) 1643 return (0); 1644 if ((fbmask & FIMB4_TOS) && filt->ff_flow.fi_tos != 1645 (pkt->fi_tos & filt->ff_mask.mask_tos)) 1646 return (0); 1647 if ((fbmask & FIMB4_GPI) && filt->ff_flow.fi_gpi != (pkt->fi_gpi)) 1648 return (0); 1649 /* match */ 1650 return (1); 1651 } 1652 1653 /* 1654 * filter matching function optimized for a common case that checks 1655 * only protocol and port numbers 1656 */ 1657 static int 1658 apply_ppfilter4(fbmask, filt, pkt) 1659 u_int32_t fbmask; 1660 struct flow_filter *filt; 1661 struct flowinfo_in *pkt; 1662 { 1663 if (filt->ff_flow.fi_family != AF_INET) 1664 return (0); 1665 if ((fbmask & FIMB4_SPORT) && filt->ff_flow.fi_sport != pkt->fi_sport) 1666 return (0); 1667 if ((fbmask & FIMB4_DPORT) && filt->ff_flow.fi_dport != pkt->fi_dport) 1668 return (0); 1669 if ((fbmask & FIMB4_PROTO) && filt->ff_flow.fi_proto != pkt->fi_proto) 1670 return (0); 1671 /* match */ 1672 return (1); 1673 } 1674 1675 /* 1676 * filter matching function only for tos field. 1677 */ 1678 static int 1679 apply_tosfilter4(fbmask, filt, pkt) 1680 u_int32_t fbmask; 1681 struct flow_filter *filt; 1682 struct flowinfo_in *pkt; 1683 { 1684 if (filt->ff_flow.fi_family != AF_INET) 1685 return (0); 1686 if ((fbmask & FIMB4_TOS) && filt->ff_flow.fi_tos != 1687 (pkt->fi_tos & filt->ff_mask.mask_tos)) 1688 return (0); 1689 /* match */ 1690 return (1); 1691 } 1692 1693 #ifdef INET6 1694 static int 1695 apply_filter6(fbmask, filt, pkt) 1696 u_int32_t fbmask; 1697 struct flow_filter6 *filt; 1698 struct flowinfo_in6 *pkt; 1699 { 1700 int i; 1701 1702 if (filt->ff_flow6.fi6_family != AF_INET6) 1703 return (0); 1704 if ((fbmask & FIMB6_FLABEL) && 1705 filt->ff_flow6.fi6_flowlabel != pkt->fi6_flowlabel) 1706 return (0); 1707 if ((fbmask & FIMB6_PROTO) && 1708 filt->ff_flow6.fi6_proto != pkt->fi6_proto) 1709 return (0); 1710 if ((fbmask & FIMB6_SPORT) && 1711 filt->ff_flow6.fi6_sport != pkt->fi6_sport) 1712 return (0); 1713 if ((fbmask & FIMB6_DPORT) && 1714 filt->ff_flow6.fi6_dport != pkt->fi6_dport) 1715 return (0); 1716 if (fbmask & FIMB6_SADDR) { 1717 for (i = 0; i < 4; i++) 1718 if (filt->ff_flow6.fi6_src.s6_addr32[i] != 1719 (pkt->fi6_src.s6_addr32[i] & 1720 filt->ff_mask6.mask6_src.s6_addr32[i])) 1721 return (0); 1722 } 1723 if (fbmask & FIMB6_DADDR) { 1724 for (i = 0; i < 4; i++) 1725 if (filt->ff_flow6.fi6_dst.s6_addr32[i] != 1726 (pkt->fi6_dst.s6_addr32[i] & 1727 filt->ff_mask6.mask6_dst.s6_addr32[i])) 1728 return (0); 1729 } 1730 if ((fbmask & FIMB6_TCLASS) && 1731 filt->ff_flow6.fi6_tclass != 1732 (pkt->fi6_tclass & filt->ff_mask6.mask6_tclass)) 1733 return (0); 1734 if ((fbmask & FIMB6_GPI) && 1735 filt->ff_flow6.fi6_gpi != pkt->fi6_gpi) 1736 return (0); 1737 /* match */ 1738 return (1); 1739 } 1740 #endif /* INET6 */ 1741 1742 /* 1743 * filter handle: 1744 * bit 20-28: index to the filter hash table 1745 * bit 0-19: unique id in the hash bucket. 1746 */ 1747 static u_long 1748 get_filt_handle(classifier, i) 1749 struct acc_classifier *classifier; 1750 int i; 1751 { 1752 static u_long handle_number = 1; 1753 u_long handle; 1754 struct acc_filter *afp; 1755 1756 while (1) { 1757 handle = handle_number++ & 0x000fffff; 1758 1759 if (LIST_EMPTY(&classifier->acc_filters[i])) 1760 break; 1761 1762 LIST_FOREACH(afp, &classifier->acc_filters[i], f_chain) 1763 if ((afp->f_handle & 0x000fffff) == handle) 1764 break; 1765 if (afp == NULL) 1766 break; 1767 /* this handle is already used, try again */ 1768 } 1769 1770 return ((i << 20) | handle); 1771 } 1772 1773 /* convert filter handle to filter pointer */ 1774 static struct acc_filter * 1775 filth_to_filtp(classifier, handle) 1776 struct acc_classifier *classifier; 1777 u_long handle; 1778 { 1779 struct acc_filter *afp; 1780 int i; 1781 1782 i = ACC_GET_HINDEX(handle); 1783 1784 LIST_FOREACH(afp, &classifier->acc_filters[i], f_chain) 1785 if (afp->f_handle == handle) 1786 return (afp); 1787 1788 return (NULL); 1789 } 1790 1791 /* create flowinfo bitmask */ 1792 static u_int32_t 1793 filt2fibmask(filt) 1794 struct flow_filter *filt; 1795 { 1796 u_int32_t mask = 0; 1797 #ifdef INET6 1798 struct flow_filter6 *filt6; 1799 #endif 1800 1801 switch (filt->ff_flow.fi_family) { 1802 case AF_INET: 1803 if (filt->ff_flow.fi_proto != 0) 1804 mask |= FIMB4_PROTO; 1805 if (filt->ff_flow.fi_tos != 0) 1806 mask |= FIMB4_TOS; 1807 if (filt->ff_flow.fi_dst.s_addr != 0) 1808 mask |= FIMB4_DADDR; 1809 if (filt->ff_flow.fi_src.s_addr != 0) 1810 mask |= FIMB4_SADDR; 1811 if (filt->ff_flow.fi_sport != 0) 1812 mask |= FIMB4_SPORT; 1813 if (filt->ff_flow.fi_dport != 0) 1814 mask |= FIMB4_DPORT; 1815 if (filt->ff_flow.fi_gpi != 0) 1816 mask |= FIMB4_GPI; 1817 break; 1818 #ifdef INET6 1819 case AF_INET6: 1820 filt6 = (struct flow_filter6 *)filt; 1821 1822 if (filt6->ff_flow6.fi6_proto != 0) 1823 mask |= FIMB6_PROTO; 1824 if (filt6->ff_flow6.fi6_tclass != 0) 1825 mask |= FIMB6_TCLASS; 1826 if (!IN6_IS_ADDR_UNSPECIFIED(&filt6->ff_flow6.fi6_dst)) 1827 mask |= FIMB6_DADDR; 1828 if (!IN6_IS_ADDR_UNSPECIFIED(&filt6->ff_flow6.fi6_src)) 1829 mask |= FIMB6_SADDR; 1830 if (filt6->ff_flow6.fi6_sport != 0) 1831 mask |= FIMB6_SPORT; 1832 if (filt6->ff_flow6.fi6_dport != 0) 1833 mask |= FIMB6_DPORT; 1834 if (filt6->ff_flow6.fi6_gpi != 0) 1835 mask |= FIMB6_GPI; 1836 if (filt6->ff_flow6.fi6_flowlabel != 0) 1837 mask |= FIMB6_FLABEL; 1838 break; 1839 #endif /* INET6 */ 1840 } 1841 return (mask); 1842 } 1843 1844 1845 /* 1846 * helper functions to handle IPv4 fragments. 1847 * currently only in-sequence fragments are handled. 1848 * - fragment info is cached in a LRU list. 1849 * - when a first fragment is found, cache its flow info. 1850 * - when a non-first fragment is found, lookup the cache. 1851 */ 1852 1853 struct ip4_frag { 1854 TAILQ_ENTRY(ip4_frag) ip4f_chain; 1855 char ip4f_valid; 1856 u_short ip4f_id; 1857 struct flowinfo_in ip4f_info; 1858 }; 1859 1860 static TAILQ_HEAD(ip4f_list, ip4_frag) ip4f_list; /* IPv4 fragment cache */ 1861 1862 #define IP4F_TABSIZE 16 /* IPv4 fragment cache size */ 1863 1864 1865 static void 1866 ip4f_cache(ip, fin) 1867 struct ip *ip; 1868 struct flowinfo_in *fin; 1869 { 1870 struct ip4_frag *fp; 1871 1872 if (TAILQ_EMPTY(&ip4f_list)) { 1873 /* first time call, allocate fragment cache entries. */ 1874 if (ip4f_init() < 0) 1875 /* allocation failed! */ 1876 return; 1877 } 1878 1879 fp = ip4f_alloc(); 1880 fp->ip4f_id = ip->ip_id; 1881 fp->ip4f_info.fi_proto = ip->ip_p; 1882 fp->ip4f_info.fi_src.s_addr = ip->ip_src.s_addr; 1883 fp->ip4f_info.fi_dst.s_addr = ip->ip_dst.s_addr; 1884 1885 /* save port numbers */ 1886 fp->ip4f_info.fi_sport = fin->fi_sport; 1887 fp->ip4f_info.fi_dport = fin->fi_dport; 1888 fp->ip4f_info.fi_gpi = fin->fi_gpi; 1889 } 1890 1891 static int 1892 ip4f_lookup(ip, fin) 1893 struct ip *ip; 1894 struct flowinfo_in *fin; 1895 { 1896 struct ip4_frag *fp; 1897 1898 for (fp = TAILQ_FIRST(&ip4f_list); fp != NULL && fp->ip4f_valid; 1899 fp = TAILQ_NEXT(fp, ip4f_chain)) 1900 if (ip->ip_id == fp->ip4f_id && 1901 ip->ip_src.s_addr == fp->ip4f_info.fi_src.s_addr && 1902 ip->ip_dst.s_addr == fp->ip4f_info.fi_dst.s_addr && 1903 ip->ip_p == fp->ip4f_info.fi_proto) { 1904 1905 /* found the matching entry */ 1906 fin->fi_sport = fp->ip4f_info.fi_sport; 1907 fin->fi_dport = fp->ip4f_info.fi_dport; 1908 fin->fi_gpi = fp->ip4f_info.fi_gpi; 1909 1910 if ((ntohs(ip->ip_off) & IP_MF) == 0) 1911 /* this is the last fragment, 1912 release the entry. */ 1913 ip4f_free(fp); 1914 1915 return (1); 1916 } 1917 1918 /* no matching entry found */ 1919 return (0); 1920 } 1921 1922 static int 1923 ip4f_init(void) 1924 { 1925 struct ip4_frag *fp; 1926 int i; 1927 1928 TAILQ_INIT(&ip4f_list); 1929 for (i=0; i<IP4F_TABSIZE; i++) { 1930 fp = malloc(sizeof(struct ip4_frag), 1931 M_DEVBUF, M_NOWAIT); 1932 if (fp == NULL) { 1933 printf("ip4f_init: can't alloc %dth entry!\n", i); 1934 if (i == 0) 1935 return (-1); 1936 return (0); 1937 } 1938 fp->ip4f_valid = 0; 1939 TAILQ_INSERT_TAIL(&ip4f_list, fp, ip4f_chain); 1940 } 1941 return (0); 1942 } 1943 1944 static struct ip4_frag * 1945 ip4f_alloc(void) 1946 { 1947 struct ip4_frag *fp; 1948 1949 /* reclaim an entry at the tail, put it at the head */ 1950 fp = TAILQ_LAST(&ip4f_list, ip4f_list); 1951 TAILQ_REMOVE(&ip4f_list, fp, ip4f_chain); 1952 fp->ip4f_valid = 1; 1953 TAILQ_INSERT_HEAD(&ip4f_list, fp, ip4f_chain); 1954 return (fp); 1955 } 1956 1957 static void 1958 ip4f_free(fp) 1959 struct ip4_frag *fp; 1960 { 1961 TAILQ_REMOVE(&ip4f_list, fp, ip4f_chain); 1962 fp->ip4f_valid = 0; 1963 TAILQ_INSERT_TAIL(&ip4f_list, fp, ip4f_chain); 1964 } 1965 1966 #endif /* ALTQ3_CLFIER_COMPAT */ 1967