1 /*- 2 * Copyright (C) 1997-2003 3 * Sony Computer Science Laboratories Inc. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 * 26 * $KAME: altq_subr.c,v 1.21 2003/11/06 06:32:53 kjc Exp $ 27 * $FreeBSD$ 28 */ 29 30 #include "opt_altq.h" 31 #include "opt_inet.h" 32 #include "opt_inet6.h" 33 34 #include <sys/param.h> 35 #include <sys/malloc.h> 36 #include <sys/mbuf.h> 37 #include <sys/systm.h> 38 #include <sys/proc.h> 39 #include <sys/socket.h> 40 #include <sys/socketvar.h> 41 #include <sys/kernel.h> 42 #include <sys/errno.h> 43 #include <sys/syslog.h> 44 #include <sys/sysctl.h> 45 #include <sys/queue.h> 46 47 #include <net/if.h> 48 #include <net/if_var.h> 49 #include <net/if_dl.h> 50 #include <net/if_types.h> 51 #include <net/vnet.h> 52 53 #include <netinet/in.h> 54 #include <netinet/in_systm.h> 55 #include <netinet/ip.h> 56 #ifdef INET6 57 #include <netinet/ip6.h> 58 #endif 59 #include <netinet/tcp.h> 60 #include <netinet/udp.h> 61 62 #include <netpfil/pf/pf.h> 63 #include <netpfil/pf/pf_altq.h> 64 #include <net/altq/altq.h> 65 #ifdef ALTQ3_COMPAT 66 #include <net/altq/altq_conf.h> 67 #endif 68 69 /* machine dependent clock related includes */ 70 #include <sys/bus.h> 71 #include <sys/cpu.h> 72 #include <sys/eventhandler.h> 73 #include <machine/clock.h> 74 #if defined(__amd64__) || defined(__i386__) 75 #include <machine/cpufunc.h> /* for pentium tsc */ 76 #include <machine/specialreg.h> /* for CPUID_TSC */ 77 #include <machine/md_var.h> /* for cpu_feature */ 78 #endif /* __amd64 || __i386__ */ 79 80 /* 81 * internal function prototypes 82 */ 83 static void tbr_timeout(void *); 84 int (*altq_input)(struct mbuf *, int) = NULL; 85 static struct mbuf *tbr_dequeue(struct ifaltq *, int); 86 static int tbr_timer = 0; /* token bucket regulator timer */ 87 #if !defined(__FreeBSD__) || (__FreeBSD_version < 600000) 88 static struct callout tbr_callout = CALLOUT_INITIALIZER; 89 #else 90 static struct callout tbr_callout; 91 #endif 92 93 #ifdef ALTQ3_CLFIER_COMPAT 94 static int extract_ports4(struct mbuf *, struct ip *, struct flowinfo_in *); 95 #ifdef INET6 96 static int extract_ports6(struct mbuf *, struct ip6_hdr *, 97 struct flowinfo_in6 *); 98 #endif 99 static int apply_filter4(u_int32_t, struct flow_filter *, 100 struct flowinfo_in *); 101 static int apply_ppfilter4(u_int32_t, struct flow_filter *, 102 struct flowinfo_in *); 103 #ifdef INET6 104 static int apply_filter6(u_int32_t, struct flow_filter6 *, 105 struct flowinfo_in6 *); 106 #endif 107 static int apply_tosfilter4(u_int32_t, struct flow_filter *, 108 struct flowinfo_in *); 109 static u_long get_filt_handle(struct acc_classifier *, int); 110 static struct acc_filter *filth_to_filtp(struct acc_classifier *, u_long); 111 static u_int32_t filt2fibmask(struct flow_filter *); 112 113 static void ip4f_cache(struct ip *, struct flowinfo_in *); 114 static int ip4f_lookup(struct ip *, struct flowinfo_in *); 115 static int ip4f_init(void); 116 static struct ip4_frag *ip4f_alloc(void); 117 static void ip4f_free(struct ip4_frag *); 118 #endif /* ALTQ3_CLFIER_COMPAT */ 119 120 /* 121 * alternate queueing support routines 122 */ 123 124 /* look up the queue state by the interface name and the queueing type. */ 125 void * 126 altq_lookup(name, type) 127 char *name; 128 int type; 129 { 130 struct ifnet *ifp; 131 132 if ((ifp = ifunit(name)) != NULL) { 133 /* read if_snd unlocked */ 134 if (type != ALTQT_NONE && ifp->if_snd.altq_type == type) 135 return (ifp->if_snd.altq_disc); 136 } 137 138 return NULL; 139 } 140 141 int 142 altq_attach(ifq, type, discipline, enqueue, dequeue, request, clfier, classify) 143 struct ifaltq *ifq; 144 int type; 145 void *discipline; 146 int (*enqueue)(struct ifaltq *, struct mbuf *, struct altq_pktattr *); 147 struct mbuf *(*dequeue)(struct ifaltq *, int); 148 int (*request)(struct ifaltq *, int, void *); 149 void *clfier; 150 void *(*classify)(void *, struct mbuf *, int); 151 { 152 IFQ_LOCK(ifq); 153 if (!ALTQ_IS_READY(ifq)) { 154 IFQ_UNLOCK(ifq); 155 return ENXIO; 156 } 157 158 #ifdef ALTQ3_COMPAT 159 /* 160 * pfaltq can override the existing discipline, but altq3 cannot. 161 * check these if clfier is not NULL (which implies altq3). 162 */ 163 if (clfier != NULL) { 164 if (ALTQ_IS_ENABLED(ifq)) { 165 IFQ_UNLOCK(ifq); 166 return EBUSY; 167 } 168 if (ALTQ_IS_ATTACHED(ifq)) { 169 IFQ_UNLOCK(ifq); 170 return EEXIST; 171 } 172 } 173 #endif 174 ifq->altq_type = type; 175 ifq->altq_disc = discipline; 176 ifq->altq_enqueue = enqueue; 177 ifq->altq_dequeue = dequeue; 178 ifq->altq_request = request; 179 ifq->altq_clfier = clfier; 180 ifq->altq_classify = classify; 181 ifq->altq_flags &= (ALTQF_CANTCHANGE|ALTQF_ENABLED); 182 #ifdef ALTQ3_COMPAT 183 #ifdef ALTQ_KLD 184 altq_module_incref(type); 185 #endif 186 #endif 187 IFQ_UNLOCK(ifq); 188 return 0; 189 } 190 191 int 192 altq_detach(ifq) 193 struct ifaltq *ifq; 194 { 195 IFQ_LOCK(ifq); 196 197 if (!ALTQ_IS_READY(ifq)) { 198 IFQ_UNLOCK(ifq); 199 return ENXIO; 200 } 201 if (ALTQ_IS_ENABLED(ifq)) { 202 IFQ_UNLOCK(ifq); 203 return EBUSY; 204 } 205 if (!ALTQ_IS_ATTACHED(ifq)) { 206 IFQ_UNLOCK(ifq); 207 return (0); 208 } 209 #ifdef ALTQ3_COMPAT 210 #ifdef ALTQ_KLD 211 altq_module_declref(ifq->altq_type); 212 #endif 213 #endif 214 215 ifq->altq_type = ALTQT_NONE; 216 ifq->altq_disc = NULL; 217 ifq->altq_enqueue = NULL; 218 ifq->altq_dequeue = NULL; 219 ifq->altq_request = NULL; 220 ifq->altq_clfier = NULL; 221 ifq->altq_classify = NULL; 222 ifq->altq_flags &= ALTQF_CANTCHANGE; 223 224 IFQ_UNLOCK(ifq); 225 return 0; 226 } 227 228 int 229 altq_enable(ifq) 230 struct ifaltq *ifq; 231 { 232 int s; 233 234 IFQ_LOCK(ifq); 235 236 if (!ALTQ_IS_READY(ifq)) { 237 IFQ_UNLOCK(ifq); 238 return ENXIO; 239 } 240 if (ALTQ_IS_ENABLED(ifq)) { 241 IFQ_UNLOCK(ifq); 242 return 0; 243 } 244 245 s = splnet(); 246 IFQ_PURGE_NOLOCK(ifq); 247 ASSERT(ifq->ifq_len == 0); 248 ifq->ifq_drv_maxlen = 0; /* disable bulk dequeue */ 249 ifq->altq_flags |= ALTQF_ENABLED; 250 if (ifq->altq_clfier != NULL) 251 ifq->altq_flags |= ALTQF_CLASSIFY; 252 splx(s); 253 254 IFQ_UNLOCK(ifq); 255 return 0; 256 } 257 258 int 259 altq_disable(ifq) 260 struct ifaltq *ifq; 261 { 262 int s; 263 264 IFQ_LOCK(ifq); 265 if (!ALTQ_IS_ENABLED(ifq)) { 266 IFQ_UNLOCK(ifq); 267 return 0; 268 } 269 270 s = splnet(); 271 IFQ_PURGE_NOLOCK(ifq); 272 ASSERT(ifq->ifq_len == 0); 273 ifq->altq_flags &= ~(ALTQF_ENABLED|ALTQF_CLASSIFY); 274 splx(s); 275 276 IFQ_UNLOCK(ifq); 277 return 0; 278 } 279 280 #ifdef ALTQ_DEBUG 281 void 282 altq_assert(file, line, failedexpr) 283 const char *file, *failedexpr; 284 int line; 285 { 286 (void)printf("altq assertion \"%s\" failed: file \"%s\", line %d\n", 287 failedexpr, file, line); 288 panic("altq assertion"); 289 /* NOTREACHED */ 290 } 291 #endif 292 293 /* 294 * internal representation of token bucket parameters 295 * rate: byte_per_unittime << 32 296 * (((bits_per_sec) / 8) << 32) / machclk_freq 297 * depth: byte << 32 298 * 299 */ 300 #define TBR_SHIFT 32 301 #define TBR_SCALE(x) ((int64_t)(x) << TBR_SHIFT) 302 #define TBR_UNSCALE(x) ((x) >> TBR_SHIFT) 303 304 static struct mbuf * 305 tbr_dequeue(ifq, op) 306 struct ifaltq *ifq; 307 int op; 308 { 309 struct tb_regulator *tbr; 310 struct mbuf *m; 311 int64_t interval; 312 u_int64_t now; 313 314 IFQ_LOCK_ASSERT(ifq); 315 tbr = ifq->altq_tbr; 316 if (op == ALTDQ_REMOVE && tbr->tbr_lastop == ALTDQ_POLL) { 317 /* if this is a remove after poll, bypass tbr check */ 318 } else { 319 /* update token only when it is negative */ 320 if (tbr->tbr_token <= 0) { 321 now = read_machclk(); 322 interval = now - tbr->tbr_last; 323 if (interval >= tbr->tbr_filluptime) 324 tbr->tbr_token = tbr->tbr_depth; 325 else { 326 tbr->tbr_token += interval * tbr->tbr_rate; 327 if (tbr->tbr_token > tbr->tbr_depth) 328 tbr->tbr_token = tbr->tbr_depth; 329 } 330 tbr->tbr_last = now; 331 } 332 /* if token is still negative, don't allow dequeue */ 333 if (tbr->tbr_token <= 0) 334 return (NULL); 335 } 336 337 if (ALTQ_IS_ENABLED(ifq)) 338 m = (*ifq->altq_dequeue)(ifq, op); 339 else { 340 if (op == ALTDQ_POLL) 341 _IF_POLL(ifq, m); 342 else 343 _IF_DEQUEUE(ifq, m); 344 } 345 346 if (m != NULL && op == ALTDQ_REMOVE) 347 tbr->tbr_token -= TBR_SCALE(m_pktlen(m)); 348 tbr->tbr_lastop = op; 349 return (m); 350 } 351 352 /* 353 * set a token bucket regulator. 354 * if the specified rate is zero, the token bucket regulator is deleted. 355 */ 356 int 357 tbr_set(ifq, profile) 358 struct ifaltq *ifq; 359 struct tb_profile *profile; 360 { 361 struct tb_regulator *tbr, *otbr; 362 363 if (tbr_dequeue_ptr == NULL) 364 tbr_dequeue_ptr = tbr_dequeue; 365 366 if (machclk_freq == 0) 367 init_machclk(); 368 if (machclk_freq == 0) { 369 printf("tbr_set: no cpu clock available!\n"); 370 return (ENXIO); 371 } 372 373 IFQ_LOCK(ifq); 374 if (profile->rate == 0) { 375 /* delete this tbr */ 376 if ((tbr = ifq->altq_tbr) == NULL) { 377 IFQ_UNLOCK(ifq); 378 return (ENOENT); 379 } 380 ifq->altq_tbr = NULL; 381 free(tbr, M_DEVBUF); 382 IFQ_UNLOCK(ifq); 383 return (0); 384 } 385 386 tbr = malloc(sizeof(struct tb_regulator), M_DEVBUF, M_NOWAIT | M_ZERO); 387 if (tbr == NULL) { 388 IFQ_UNLOCK(ifq); 389 return (ENOMEM); 390 } 391 392 tbr->tbr_rate = TBR_SCALE(profile->rate / 8) / machclk_freq; 393 tbr->tbr_depth = TBR_SCALE(profile->depth); 394 if (tbr->tbr_rate > 0) 395 tbr->tbr_filluptime = tbr->tbr_depth / tbr->tbr_rate; 396 else 397 tbr->tbr_filluptime = 0xffffffffffffffffLL; 398 tbr->tbr_token = tbr->tbr_depth; 399 tbr->tbr_last = read_machclk(); 400 tbr->tbr_lastop = ALTDQ_REMOVE; 401 402 otbr = ifq->altq_tbr; 403 ifq->altq_tbr = tbr; /* set the new tbr */ 404 405 if (otbr != NULL) 406 free(otbr, M_DEVBUF); 407 else { 408 if (tbr_timer == 0) { 409 CALLOUT_RESET(&tbr_callout, 1, tbr_timeout, (void *)0); 410 tbr_timer = 1; 411 } 412 } 413 IFQ_UNLOCK(ifq); 414 return (0); 415 } 416 417 /* 418 * tbr_timeout goes through the interface list, and kicks the drivers 419 * if necessary. 420 * 421 * MPSAFE 422 */ 423 static void 424 tbr_timeout(arg) 425 void *arg; 426 { 427 VNET_ITERATOR_DECL(vnet_iter); 428 struct ifnet *ifp; 429 int active, s; 430 431 active = 0; 432 s = splnet(); 433 IFNET_RLOCK_NOSLEEP(); 434 VNET_LIST_RLOCK_NOSLEEP(); 435 VNET_FOREACH(vnet_iter) { 436 CURVNET_SET(vnet_iter); 437 for (ifp = TAILQ_FIRST(&V_ifnet); ifp; 438 ifp = TAILQ_NEXT(ifp, if_list)) { 439 /* read from if_snd unlocked */ 440 if (!TBR_IS_ENABLED(&ifp->if_snd)) 441 continue; 442 active++; 443 if (!IFQ_IS_EMPTY(&ifp->if_snd) && 444 ifp->if_start != NULL) 445 (*ifp->if_start)(ifp); 446 } 447 CURVNET_RESTORE(); 448 } 449 VNET_LIST_RUNLOCK_NOSLEEP(); 450 IFNET_RUNLOCK_NOSLEEP(); 451 splx(s); 452 if (active > 0) 453 CALLOUT_RESET(&tbr_callout, 1, tbr_timeout, (void *)0); 454 else 455 tbr_timer = 0; /* don't need tbr_timer anymore */ 456 } 457 458 /* 459 * get token bucket regulator profile 460 */ 461 int 462 tbr_get(ifq, profile) 463 struct ifaltq *ifq; 464 struct tb_profile *profile; 465 { 466 struct tb_regulator *tbr; 467 468 IFQ_LOCK(ifq); 469 if ((tbr = ifq->altq_tbr) == NULL) { 470 profile->rate = 0; 471 profile->depth = 0; 472 } else { 473 profile->rate = 474 (u_int)TBR_UNSCALE(tbr->tbr_rate * 8 * machclk_freq); 475 profile->depth = (u_int)TBR_UNSCALE(tbr->tbr_depth); 476 } 477 IFQ_UNLOCK(ifq); 478 return (0); 479 } 480 481 /* 482 * attach a discipline to the interface. if one already exists, it is 483 * overridden. 484 * Locking is done in the discipline specific attach functions. Basically 485 * they call back to altq_attach which takes care of the attach and locking. 486 */ 487 int 488 altq_pfattach(struct pf_altq *a) 489 { 490 int error = 0; 491 492 switch (a->scheduler) { 493 case ALTQT_NONE: 494 break; 495 #ifdef ALTQ_CBQ 496 case ALTQT_CBQ: 497 error = cbq_pfattach(a); 498 break; 499 #endif 500 #ifdef ALTQ_PRIQ 501 case ALTQT_PRIQ: 502 error = priq_pfattach(a); 503 break; 504 #endif 505 #ifdef ALTQ_HFSC 506 case ALTQT_HFSC: 507 error = hfsc_pfattach(a); 508 break; 509 #endif 510 #ifdef ALTQ_FAIRQ 511 case ALTQT_FAIRQ: 512 error = fairq_pfattach(a); 513 break; 514 #endif 515 default: 516 error = ENXIO; 517 } 518 519 return (error); 520 } 521 522 /* 523 * detach a discipline from the interface. 524 * it is possible that the discipline was already overridden by another 525 * discipline. 526 */ 527 int 528 altq_pfdetach(struct pf_altq *a) 529 { 530 struct ifnet *ifp; 531 int s, error = 0; 532 533 if ((ifp = ifunit(a->ifname)) == NULL) 534 return (EINVAL); 535 536 /* if this discipline is no longer referenced, just return */ 537 /* read unlocked from if_snd */ 538 if (a->altq_disc == NULL || a->altq_disc != ifp->if_snd.altq_disc) 539 return (0); 540 541 s = splnet(); 542 /* read unlocked from if_snd, _disable and _detach take care */ 543 if (ALTQ_IS_ENABLED(&ifp->if_snd)) 544 error = altq_disable(&ifp->if_snd); 545 if (error == 0) 546 error = altq_detach(&ifp->if_snd); 547 splx(s); 548 549 return (error); 550 } 551 552 /* 553 * add a discipline or a queue 554 * Locking is done in the discipline specific functions with regards to 555 * malloc with WAITOK, also it is not yet clear which lock to use. 556 */ 557 int 558 altq_add(struct pf_altq *a) 559 { 560 int error = 0; 561 562 if (a->qname[0] != 0) 563 return (altq_add_queue(a)); 564 565 if (machclk_freq == 0) 566 init_machclk(); 567 if (machclk_freq == 0) 568 panic("altq_add: no cpu clock"); 569 570 switch (a->scheduler) { 571 #ifdef ALTQ_CBQ 572 case ALTQT_CBQ: 573 error = cbq_add_altq(a); 574 break; 575 #endif 576 #ifdef ALTQ_PRIQ 577 case ALTQT_PRIQ: 578 error = priq_add_altq(a); 579 break; 580 #endif 581 #ifdef ALTQ_HFSC 582 case ALTQT_HFSC: 583 error = hfsc_add_altq(a); 584 break; 585 #endif 586 #ifdef ALTQ_FAIRQ 587 case ALTQT_FAIRQ: 588 error = fairq_add_altq(a); 589 break; 590 #endif 591 default: 592 error = ENXIO; 593 } 594 595 return (error); 596 } 597 598 /* 599 * remove a discipline or a queue 600 * It is yet unclear what lock to use to protect this operation, the 601 * discipline specific functions will determine and grab it 602 */ 603 int 604 altq_remove(struct pf_altq *a) 605 { 606 int error = 0; 607 608 if (a->qname[0] != 0) 609 return (altq_remove_queue(a)); 610 611 switch (a->scheduler) { 612 #ifdef ALTQ_CBQ 613 case ALTQT_CBQ: 614 error = cbq_remove_altq(a); 615 break; 616 #endif 617 #ifdef ALTQ_PRIQ 618 case ALTQT_PRIQ: 619 error = priq_remove_altq(a); 620 break; 621 #endif 622 #ifdef ALTQ_HFSC 623 case ALTQT_HFSC: 624 error = hfsc_remove_altq(a); 625 break; 626 #endif 627 #ifdef ALTQ_FAIRQ 628 case ALTQT_FAIRQ: 629 error = fairq_remove_altq(a); 630 break; 631 #endif 632 default: 633 error = ENXIO; 634 } 635 636 return (error); 637 } 638 639 /* 640 * add a queue to the discipline 641 * It is yet unclear what lock to use to protect this operation, the 642 * discipline specific functions will determine and grab it 643 */ 644 int 645 altq_add_queue(struct pf_altq *a) 646 { 647 int error = 0; 648 649 switch (a->scheduler) { 650 #ifdef ALTQ_CBQ 651 case ALTQT_CBQ: 652 error = cbq_add_queue(a); 653 break; 654 #endif 655 #ifdef ALTQ_PRIQ 656 case ALTQT_PRIQ: 657 error = priq_add_queue(a); 658 break; 659 #endif 660 #ifdef ALTQ_HFSC 661 case ALTQT_HFSC: 662 error = hfsc_add_queue(a); 663 break; 664 #endif 665 #ifdef ALTQ_FAIRQ 666 case ALTQT_FAIRQ: 667 error = fairq_add_queue(a); 668 break; 669 #endif 670 default: 671 error = ENXIO; 672 } 673 674 return (error); 675 } 676 677 /* 678 * remove a queue from the discipline 679 * It is yet unclear what lock to use to protect this operation, the 680 * discipline specific functions will determine and grab it 681 */ 682 int 683 altq_remove_queue(struct pf_altq *a) 684 { 685 int error = 0; 686 687 switch (a->scheduler) { 688 #ifdef ALTQ_CBQ 689 case ALTQT_CBQ: 690 error = cbq_remove_queue(a); 691 break; 692 #endif 693 #ifdef ALTQ_PRIQ 694 case ALTQT_PRIQ: 695 error = priq_remove_queue(a); 696 break; 697 #endif 698 #ifdef ALTQ_HFSC 699 case ALTQT_HFSC: 700 error = hfsc_remove_queue(a); 701 break; 702 #endif 703 #ifdef ALTQ_FAIRQ 704 case ALTQT_FAIRQ: 705 error = fairq_remove_queue(a); 706 break; 707 #endif 708 default: 709 error = ENXIO; 710 } 711 712 return (error); 713 } 714 715 /* 716 * get queue statistics 717 * Locking is done in the discipline specific functions with regards to 718 * copyout operations, also it is not yet clear which lock to use. 719 */ 720 int 721 altq_getqstats(struct pf_altq *a, void *ubuf, int *nbytes) 722 { 723 int error = 0; 724 725 switch (a->scheduler) { 726 #ifdef ALTQ_CBQ 727 case ALTQT_CBQ: 728 error = cbq_getqstats(a, ubuf, nbytes); 729 break; 730 #endif 731 #ifdef ALTQ_PRIQ 732 case ALTQT_PRIQ: 733 error = priq_getqstats(a, ubuf, nbytes); 734 break; 735 #endif 736 #ifdef ALTQ_HFSC 737 case ALTQT_HFSC: 738 error = hfsc_getqstats(a, ubuf, nbytes); 739 break; 740 #endif 741 #ifdef ALTQ_FAIRQ 742 case ALTQT_FAIRQ: 743 error = fairq_getqstats(a, ubuf, nbytes); 744 break; 745 #endif 746 default: 747 error = ENXIO; 748 } 749 750 return (error); 751 } 752 753 /* 754 * read and write diffserv field in IPv4 or IPv6 header 755 */ 756 u_int8_t 757 read_dsfield(m, pktattr) 758 struct mbuf *m; 759 struct altq_pktattr *pktattr; 760 { 761 struct mbuf *m0; 762 u_int8_t ds_field = 0; 763 764 if (pktattr == NULL || 765 (pktattr->pattr_af != AF_INET && pktattr->pattr_af != AF_INET6)) 766 return ((u_int8_t)0); 767 768 /* verify that pattr_hdr is within the mbuf data */ 769 for (m0 = m; m0 != NULL; m0 = m0->m_next) 770 if ((pktattr->pattr_hdr >= m0->m_data) && 771 (pktattr->pattr_hdr < m0->m_data + m0->m_len)) 772 break; 773 if (m0 == NULL) { 774 /* ick, pattr_hdr is stale */ 775 pktattr->pattr_af = AF_UNSPEC; 776 #ifdef ALTQ_DEBUG 777 printf("read_dsfield: can't locate header!\n"); 778 #endif 779 return ((u_int8_t)0); 780 } 781 782 if (pktattr->pattr_af == AF_INET) { 783 struct ip *ip = (struct ip *)pktattr->pattr_hdr; 784 785 if (ip->ip_v != 4) 786 return ((u_int8_t)0); /* version mismatch! */ 787 ds_field = ip->ip_tos; 788 } 789 #ifdef INET6 790 else if (pktattr->pattr_af == AF_INET6) { 791 struct ip6_hdr *ip6 = (struct ip6_hdr *)pktattr->pattr_hdr; 792 u_int32_t flowlabel; 793 794 flowlabel = ntohl(ip6->ip6_flow); 795 if ((flowlabel >> 28) != 6) 796 return ((u_int8_t)0); /* version mismatch! */ 797 ds_field = (flowlabel >> 20) & 0xff; 798 } 799 #endif 800 return (ds_field); 801 } 802 803 void 804 write_dsfield(struct mbuf *m, struct altq_pktattr *pktattr, u_int8_t dsfield) 805 { 806 struct mbuf *m0; 807 808 if (pktattr == NULL || 809 (pktattr->pattr_af != AF_INET && pktattr->pattr_af != AF_INET6)) 810 return; 811 812 /* verify that pattr_hdr is within the mbuf data */ 813 for (m0 = m; m0 != NULL; m0 = m0->m_next) 814 if ((pktattr->pattr_hdr >= m0->m_data) && 815 (pktattr->pattr_hdr < m0->m_data + m0->m_len)) 816 break; 817 if (m0 == NULL) { 818 /* ick, pattr_hdr is stale */ 819 pktattr->pattr_af = AF_UNSPEC; 820 #ifdef ALTQ_DEBUG 821 printf("write_dsfield: can't locate header!\n"); 822 #endif 823 return; 824 } 825 826 if (pktattr->pattr_af == AF_INET) { 827 struct ip *ip = (struct ip *)pktattr->pattr_hdr; 828 u_int8_t old; 829 int32_t sum; 830 831 if (ip->ip_v != 4) 832 return; /* version mismatch! */ 833 old = ip->ip_tos; 834 dsfield |= old & 3; /* leave CU bits */ 835 if (old == dsfield) 836 return; 837 ip->ip_tos = dsfield; 838 /* 839 * update checksum (from RFC1624) 840 * HC' = ~(~HC + ~m + m') 841 */ 842 sum = ~ntohs(ip->ip_sum) & 0xffff; 843 sum += 0xff00 + (~old & 0xff) + dsfield; 844 sum = (sum >> 16) + (sum & 0xffff); 845 sum += (sum >> 16); /* add carry */ 846 847 ip->ip_sum = htons(~sum & 0xffff); 848 } 849 #ifdef INET6 850 else if (pktattr->pattr_af == AF_INET6) { 851 struct ip6_hdr *ip6 = (struct ip6_hdr *)pktattr->pattr_hdr; 852 u_int32_t flowlabel; 853 854 flowlabel = ntohl(ip6->ip6_flow); 855 if ((flowlabel >> 28) != 6) 856 return; /* version mismatch! */ 857 flowlabel = (flowlabel & 0xf03fffff) | (dsfield << 20); 858 ip6->ip6_flow = htonl(flowlabel); 859 } 860 #endif 861 return; 862 } 863 864 865 /* 866 * high resolution clock support taking advantage of a machine dependent 867 * high resolution time counter (e.g., timestamp counter of intel pentium). 868 * we assume 869 * - 64-bit-long monotonically-increasing counter 870 * - frequency range is 100M-4GHz (CPU speed) 871 */ 872 /* if pcc is not available or disabled, emulate 256MHz using microtime() */ 873 #define MACHCLK_SHIFT 8 874 875 int machclk_usepcc; 876 u_int32_t machclk_freq; 877 u_int32_t machclk_per_tick; 878 879 #if defined(__i386__) && defined(__NetBSD__) 880 extern u_int64_t cpu_tsc_freq; 881 #endif 882 883 #if (__FreeBSD_version >= 700035) 884 /* Update TSC freq with the value indicated by the caller. */ 885 static void 886 tsc_freq_changed(void *arg, const struct cf_level *level, int status) 887 { 888 /* If there was an error during the transition, don't do anything. */ 889 if (status != 0) 890 return; 891 892 #if (__FreeBSD_version >= 701102) && (defined(__amd64__) || defined(__i386__)) 893 /* If TSC is P-state invariant, don't do anything. */ 894 if (tsc_is_invariant) 895 return; 896 #endif 897 898 /* Total setting for this level gives the new frequency in MHz. */ 899 init_machclk(); 900 } 901 EVENTHANDLER_DEFINE(cpufreq_post_change, tsc_freq_changed, NULL, 902 EVENTHANDLER_PRI_LAST); 903 #endif /* __FreeBSD_version >= 700035 */ 904 905 static void 906 init_machclk_setup(void) 907 { 908 #if (__FreeBSD_version >= 600000) 909 callout_init(&tbr_callout, 0); 910 #endif 911 912 machclk_usepcc = 1; 913 914 #if (!defined(__amd64__) && !defined(__i386__)) || defined(ALTQ_NOPCC) 915 machclk_usepcc = 0; 916 #endif 917 #if defined(__FreeBSD__) && defined(SMP) 918 machclk_usepcc = 0; 919 #endif 920 #if defined(__NetBSD__) && defined(MULTIPROCESSOR) 921 machclk_usepcc = 0; 922 #endif 923 #if defined(__amd64__) || defined(__i386__) 924 /* check if TSC is available */ 925 if ((cpu_feature & CPUID_TSC) == 0 || 926 atomic_load_acq_64(&tsc_freq) == 0) 927 machclk_usepcc = 0; 928 #endif 929 } 930 931 void 932 init_machclk(void) 933 { 934 static int called; 935 936 /* Call one-time initialization function. */ 937 if (!called) { 938 init_machclk_setup(); 939 called = 1; 940 } 941 942 if (machclk_usepcc == 0) { 943 /* emulate 256MHz using microtime() */ 944 machclk_freq = 1000000 << MACHCLK_SHIFT; 945 machclk_per_tick = machclk_freq / hz; 946 #ifdef ALTQ_DEBUG 947 printf("altq: emulate %uHz cpu clock\n", machclk_freq); 948 #endif 949 return; 950 } 951 952 /* 953 * if the clock frequency (of Pentium TSC or Alpha PCC) is 954 * accessible, just use it. 955 */ 956 #if defined(__amd64__) || defined(__i386__) 957 machclk_freq = atomic_load_acq_64(&tsc_freq); 958 #endif 959 960 /* 961 * if we don't know the clock frequency, measure it. 962 */ 963 if (machclk_freq == 0) { 964 static int wait; 965 struct timeval tv_start, tv_end; 966 u_int64_t start, end, diff; 967 int timo; 968 969 microtime(&tv_start); 970 start = read_machclk(); 971 timo = hz; /* 1 sec */ 972 (void)tsleep(&wait, PWAIT | PCATCH, "init_machclk", timo); 973 microtime(&tv_end); 974 end = read_machclk(); 975 diff = (u_int64_t)(tv_end.tv_sec - tv_start.tv_sec) * 1000000 976 + tv_end.tv_usec - tv_start.tv_usec; 977 if (diff != 0) 978 machclk_freq = (u_int)((end - start) * 1000000 / diff); 979 } 980 981 machclk_per_tick = machclk_freq / hz; 982 983 #ifdef ALTQ_DEBUG 984 printf("altq: CPU clock: %uHz\n", machclk_freq); 985 #endif 986 } 987 988 #if defined(__OpenBSD__) && defined(__i386__) 989 static __inline u_int64_t 990 rdtsc(void) 991 { 992 u_int64_t rv; 993 __asm __volatile(".byte 0x0f, 0x31" : "=A" (rv)); 994 return (rv); 995 } 996 #endif /* __OpenBSD__ && __i386__ */ 997 998 u_int64_t 999 read_machclk(void) 1000 { 1001 u_int64_t val; 1002 1003 if (machclk_usepcc) { 1004 #if defined(__amd64__) || defined(__i386__) 1005 val = rdtsc(); 1006 #else 1007 panic("read_machclk"); 1008 #endif 1009 } else { 1010 struct timeval tv; 1011 1012 microtime(&tv); 1013 val = (((u_int64_t)(tv.tv_sec - boottime.tv_sec) * 1000000 1014 + tv.tv_usec) << MACHCLK_SHIFT); 1015 } 1016 return (val); 1017 } 1018 1019 #ifdef ALTQ3_CLFIER_COMPAT 1020 1021 #ifndef IPPROTO_ESP 1022 #define IPPROTO_ESP 50 /* encapsulating security payload */ 1023 #endif 1024 #ifndef IPPROTO_AH 1025 #define IPPROTO_AH 51 /* authentication header */ 1026 #endif 1027 1028 /* 1029 * extract flow information from a given packet. 1030 * filt_mask shows flowinfo fields required. 1031 * we assume the ip header is in one mbuf, and addresses and ports are 1032 * in network byte order. 1033 */ 1034 int 1035 altq_extractflow(m, af, flow, filt_bmask) 1036 struct mbuf *m; 1037 int af; 1038 struct flowinfo *flow; 1039 u_int32_t filt_bmask; 1040 { 1041 1042 switch (af) { 1043 case PF_INET: { 1044 struct flowinfo_in *fin; 1045 struct ip *ip; 1046 1047 ip = mtod(m, struct ip *); 1048 1049 if (ip->ip_v != 4) 1050 break; 1051 1052 fin = (struct flowinfo_in *)flow; 1053 fin->fi_len = sizeof(struct flowinfo_in); 1054 fin->fi_family = AF_INET; 1055 1056 fin->fi_proto = ip->ip_p; 1057 fin->fi_tos = ip->ip_tos; 1058 1059 fin->fi_src.s_addr = ip->ip_src.s_addr; 1060 fin->fi_dst.s_addr = ip->ip_dst.s_addr; 1061 1062 if (filt_bmask & FIMB4_PORTS) 1063 /* if port info is required, extract port numbers */ 1064 extract_ports4(m, ip, fin); 1065 else { 1066 fin->fi_sport = 0; 1067 fin->fi_dport = 0; 1068 fin->fi_gpi = 0; 1069 } 1070 return (1); 1071 } 1072 1073 #ifdef INET6 1074 case PF_INET6: { 1075 struct flowinfo_in6 *fin6; 1076 struct ip6_hdr *ip6; 1077 1078 ip6 = mtod(m, struct ip6_hdr *); 1079 /* should we check the ip version? */ 1080 1081 fin6 = (struct flowinfo_in6 *)flow; 1082 fin6->fi6_len = sizeof(struct flowinfo_in6); 1083 fin6->fi6_family = AF_INET6; 1084 1085 fin6->fi6_proto = ip6->ip6_nxt; 1086 fin6->fi6_tclass = (ntohl(ip6->ip6_flow) >> 20) & 0xff; 1087 1088 fin6->fi6_flowlabel = ip6->ip6_flow & htonl(0x000fffff); 1089 fin6->fi6_src = ip6->ip6_src; 1090 fin6->fi6_dst = ip6->ip6_dst; 1091 1092 if ((filt_bmask & FIMB6_PORTS) || 1093 ((filt_bmask & FIMB6_PROTO) 1094 && ip6->ip6_nxt > IPPROTO_IPV6)) 1095 /* 1096 * if port info is required, or proto is required 1097 * but there are option headers, extract port 1098 * and protocol numbers. 1099 */ 1100 extract_ports6(m, ip6, fin6); 1101 else { 1102 fin6->fi6_sport = 0; 1103 fin6->fi6_dport = 0; 1104 fin6->fi6_gpi = 0; 1105 } 1106 return (1); 1107 } 1108 #endif /* INET6 */ 1109 1110 default: 1111 break; 1112 } 1113 1114 /* failed */ 1115 flow->fi_len = sizeof(struct flowinfo); 1116 flow->fi_family = AF_UNSPEC; 1117 return (0); 1118 } 1119 1120 /* 1121 * helper routine to extract port numbers 1122 */ 1123 /* structure for ipsec and ipv6 option header template */ 1124 struct _opt6 { 1125 u_int8_t opt6_nxt; /* next header */ 1126 u_int8_t opt6_hlen; /* header extension length */ 1127 u_int16_t _pad; 1128 u_int32_t ah_spi; /* security parameter index 1129 for authentication header */ 1130 }; 1131 1132 /* 1133 * extract port numbers from a ipv4 packet. 1134 */ 1135 static int 1136 extract_ports4(m, ip, fin) 1137 struct mbuf *m; 1138 struct ip *ip; 1139 struct flowinfo_in *fin; 1140 { 1141 struct mbuf *m0; 1142 u_short ip_off; 1143 u_int8_t proto; 1144 int off; 1145 1146 fin->fi_sport = 0; 1147 fin->fi_dport = 0; 1148 fin->fi_gpi = 0; 1149 1150 ip_off = ntohs(ip->ip_off); 1151 /* if it is a fragment, try cached fragment info */ 1152 if (ip_off & IP_OFFMASK) { 1153 ip4f_lookup(ip, fin); 1154 return (1); 1155 } 1156 1157 /* locate the mbuf containing the protocol header */ 1158 for (m0 = m; m0 != NULL; m0 = m0->m_next) 1159 if (((caddr_t)ip >= m0->m_data) && 1160 ((caddr_t)ip < m0->m_data + m0->m_len)) 1161 break; 1162 if (m0 == NULL) { 1163 #ifdef ALTQ_DEBUG 1164 printf("extract_ports4: can't locate header! ip=%p\n", ip); 1165 #endif 1166 return (0); 1167 } 1168 off = ((caddr_t)ip - m0->m_data) + (ip->ip_hl << 2); 1169 proto = ip->ip_p; 1170 1171 #ifdef ALTQ_IPSEC 1172 again: 1173 #endif 1174 while (off >= m0->m_len) { 1175 off -= m0->m_len; 1176 m0 = m0->m_next; 1177 if (m0 == NULL) 1178 return (0); /* bogus ip_hl! */ 1179 } 1180 if (m0->m_len < off + 4) 1181 return (0); 1182 1183 switch (proto) { 1184 case IPPROTO_TCP: 1185 case IPPROTO_UDP: { 1186 struct udphdr *udp; 1187 1188 udp = (struct udphdr *)(mtod(m0, caddr_t) + off); 1189 fin->fi_sport = udp->uh_sport; 1190 fin->fi_dport = udp->uh_dport; 1191 fin->fi_proto = proto; 1192 } 1193 break; 1194 1195 #ifdef ALTQ_IPSEC 1196 case IPPROTO_ESP: 1197 if (fin->fi_gpi == 0){ 1198 u_int32_t *gpi; 1199 1200 gpi = (u_int32_t *)(mtod(m0, caddr_t) + off); 1201 fin->fi_gpi = *gpi; 1202 } 1203 fin->fi_proto = proto; 1204 break; 1205 1206 case IPPROTO_AH: { 1207 /* get next header and header length */ 1208 struct _opt6 *opt6; 1209 1210 opt6 = (struct _opt6 *)(mtod(m0, caddr_t) + off); 1211 proto = opt6->opt6_nxt; 1212 off += 8 + (opt6->opt6_hlen * 4); 1213 if (fin->fi_gpi == 0 && m0->m_len >= off + 8) 1214 fin->fi_gpi = opt6->ah_spi; 1215 } 1216 /* goto the next header */ 1217 goto again; 1218 #endif /* ALTQ_IPSEC */ 1219 1220 default: 1221 fin->fi_proto = proto; 1222 return (0); 1223 } 1224 1225 /* if this is a first fragment, cache it. */ 1226 if (ip_off & IP_MF) 1227 ip4f_cache(ip, fin); 1228 1229 return (1); 1230 } 1231 1232 #ifdef INET6 1233 static int 1234 extract_ports6(m, ip6, fin6) 1235 struct mbuf *m; 1236 struct ip6_hdr *ip6; 1237 struct flowinfo_in6 *fin6; 1238 { 1239 struct mbuf *m0; 1240 int off; 1241 u_int8_t proto; 1242 1243 fin6->fi6_gpi = 0; 1244 fin6->fi6_sport = 0; 1245 fin6->fi6_dport = 0; 1246 1247 /* locate the mbuf containing the protocol header */ 1248 for (m0 = m; m0 != NULL; m0 = m0->m_next) 1249 if (((caddr_t)ip6 >= m0->m_data) && 1250 ((caddr_t)ip6 < m0->m_data + m0->m_len)) 1251 break; 1252 if (m0 == NULL) { 1253 #ifdef ALTQ_DEBUG 1254 printf("extract_ports6: can't locate header! ip6=%p\n", ip6); 1255 #endif 1256 return (0); 1257 } 1258 off = ((caddr_t)ip6 - m0->m_data) + sizeof(struct ip6_hdr); 1259 1260 proto = ip6->ip6_nxt; 1261 do { 1262 while (off >= m0->m_len) { 1263 off -= m0->m_len; 1264 m0 = m0->m_next; 1265 if (m0 == NULL) 1266 return (0); 1267 } 1268 if (m0->m_len < off + 4) 1269 return (0); 1270 1271 switch (proto) { 1272 case IPPROTO_TCP: 1273 case IPPROTO_UDP: { 1274 struct udphdr *udp; 1275 1276 udp = (struct udphdr *)(mtod(m0, caddr_t) + off); 1277 fin6->fi6_sport = udp->uh_sport; 1278 fin6->fi6_dport = udp->uh_dport; 1279 fin6->fi6_proto = proto; 1280 } 1281 return (1); 1282 1283 case IPPROTO_ESP: 1284 if (fin6->fi6_gpi == 0) { 1285 u_int32_t *gpi; 1286 1287 gpi = (u_int32_t *)(mtod(m0, caddr_t) + off); 1288 fin6->fi6_gpi = *gpi; 1289 } 1290 fin6->fi6_proto = proto; 1291 return (1); 1292 1293 case IPPROTO_AH: { 1294 /* get next header and header length */ 1295 struct _opt6 *opt6; 1296 1297 opt6 = (struct _opt6 *)(mtod(m0, caddr_t) + off); 1298 if (fin6->fi6_gpi == 0 && m0->m_len >= off + 8) 1299 fin6->fi6_gpi = opt6->ah_spi; 1300 proto = opt6->opt6_nxt; 1301 off += 8 + (opt6->opt6_hlen * 4); 1302 /* goto the next header */ 1303 break; 1304 } 1305 1306 case IPPROTO_HOPOPTS: 1307 case IPPROTO_ROUTING: 1308 case IPPROTO_DSTOPTS: { 1309 /* get next header and header length */ 1310 struct _opt6 *opt6; 1311 1312 opt6 = (struct _opt6 *)(mtod(m0, caddr_t) + off); 1313 proto = opt6->opt6_nxt; 1314 off += (opt6->opt6_hlen + 1) * 8; 1315 /* goto the next header */ 1316 break; 1317 } 1318 1319 case IPPROTO_FRAGMENT: 1320 /* ipv6 fragmentations are not supported yet */ 1321 default: 1322 fin6->fi6_proto = proto; 1323 return (0); 1324 } 1325 } while (1); 1326 /*NOTREACHED*/ 1327 } 1328 #endif /* INET6 */ 1329 1330 /* 1331 * altq common classifier 1332 */ 1333 int 1334 acc_add_filter(classifier, filter, class, phandle) 1335 struct acc_classifier *classifier; 1336 struct flow_filter *filter; 1337 void *class; 1338 u_long *phandle; 1339 { 1340 struct acc_filter *afp, *prev, *tmp; 1341 int i, s; 1342 1343 #ifdef INET6 1344 if (filter->ff_flow.fi_family != AF_INET && 1345 filter->ff_flow.fi_family != AF_INET6) 1346 return (EINVAL); 1347 #else 1348 if (filter->ff_flow.fi_family != AF_INET) 1349 return (EINVAL); 1350 #endif 1351 1352 afp = malloc(sizeof(struct acc_filter), 1353 M_DEVBUF, M_WAITOK); 1354 if (afp == NULL) 1355 return (ENOMEM); 1356 bzero(afp, sizeof(struct acc_filter)); 1357 1358 afp->f_filter = *filter; 1359 afp->f_class = class; 1360 1361 i = ACC_WILDCARD_INDEX; 1362 if (filter->ff_flow.fi_family == AF_INET) { 1363 struct flow_filter *filter4 = &afp->f_filter; 1364 1365 /* 1366 * if address is 0, it's a wildcard. if address mask 1367 * isn't set, use full mask. 1368 */ 1369 if (filter4->ff_flow.fi_dst.s_addr == 0) 1370 filter4->ff_mask.mask_dst.s_addr = 0; 1371 else if (filter4->ff_mask.mask_dst.s_addr == 0) 1372 filter4->ff_mask.mask_dst.s_addr = 0xffffffff; 1373 if (filter4->ff_flow.fi_src.s_addr == 0) 1374 filter4->ff_mask.mask_src.s_addr = 0; 1375 else if (filter4->ff_mask.mask_src.s_addr == 0) 1376 filter4->ff_mask.mask_src.s_addr = 0xffffffff; 1377 1378 /* clear extra bits in addresses */ 1379 filter4->ff_flow.fi_dst.s_addr &= 1380 filter4->ff_mask.mask_dst.s_addr; 1381 filter4->ff_flow.fi_src.s_addr &= 1382 filter4->ff_mask.mask_src.s_addr; 1383 1384 /* 1385 * if dst address is a wildcard, use hash-entry 1386 * ACC_WILDCARD_INDEX. 1387 */ 1388 if (filter4->ff_mask.mask_dst.s_addr != 0xffffffff) 1389 i = ACC_WILDCARD_INDEX; 1390 else 1391 i = ACC_GET_HASH_INDEX(filter4->ff_flow.fi_dst.s_addr); 1392 } 1393 #ifdef INET6 1394 else if (filter->ff_flow.fi_family == AF_INET6) { 1395 struct flow_filter6 *filter6 = 1396 (struct flow_filter6 *)&afp->f_filter; 1397 #ifndef IN6MASK0 /* taken from kame ipv6 */ 1398 #define IN6MASK0 {{{ 0, 0, 0, 0 }}} 1399 #define IN6MASK128 {{{ 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff }}} 1400 const struct in6_addr in6mask0 = IN6MASK0; 1401 const struct in6_addr in6mask128 = IN6MASK128; 1402 #endif 1403 1404 if (IN6_IS_ADDR_UNSPECIFIED(&filter6->ff_flow6.fi6_dst)) 1405 filter6->ff_mask6.mask6_dst = in6mask0; 1406 else if (IN6_IS_ADDR_UNSPECIFIED(&filter6->ff_mask6.mask6_dst)) 1407 filter6->ff_mask6.mask6_dst = in6mask128; 1408 if (IN6_IS_ADDR_UNSPECIFIED(&filter6->ff_flow6.fi6_src)) 1409 filter6->ff_mask6.mask6_src = in6mask0; 1410 else if (IN6_IS_ADDR_UNSPECIFIED(&filter6->ff_mask6.mask6_src)) 1411 filter6->ff_mask6.mask6_src = in6mask128; 1412 1413 /* clear extra bits in addresses */ 1414 for (i = 0; i < 16; i++) 1415 filter6->ff_flow6.fi6_dst.s6_addr[i] &= 1416 filter6->ff_mask6.mask6_dst.s6_addr[i]; 1417 for (i = 0; i < 16; i++) 1418 filter6->ff_flow6.fi6_src.s6_addr[i] &= 1419 filter6->ff_mask6.mask6_src.s6_addr[i]; 1420 1421 if (filter6->ff_flow6.fi6_flowlabel == 0) 1422 i = ACC_WILDCARD_INDEX; 1423 else 1424 i = ACC_GET_HASH_INDEX(filter6->ff_flow6.fi6_flowlabel); 1425 } 1426 #endif /* INET6 */ 1427 1428 afp->f_handle = get_filt_handle(classifier, i); 1429 1430 /* update filter bitmask */ 1431 afp->f_fbmask = filt2fibmask(filter); 1432 classifier->acc_fbmask |= afp->f_fbmask; 1433 1434 /* 1435 * add this filter to the filter list. 1436 * filters are ordered from the highest rule number. 1437 */ 1438 s = splnet(); 1439 prev = NULL; 1440 LIST_FOREACH(tmp, &classifier->acc_filters[i], f_chain) { 1441 if (tmp->f_filter.ff_ruleno > afp->f_filter.ff_ruleno) 1442 prev = tmp; 1443 else 1444 break; 1445 } 1446 if (prev == NULL) 1447 LIST_INSERT_HEAD(&classifier->acc_filters[i], afp, f_chain); 1448 else 1449 LIST_INSERT_AFTER(prev, afp, f_chain); 1450 splx(s); 1451 1452 *phandle = afp->f_handle; 1453 return (0); 1454 } 1455 1456 int 1457 acc_delete_filter(classifier, handle) 1458 struct acc_classifier *classifier; 1459 u_long handle; 1460 { 1461 struct acc_filter *afp; 1462 int s; 1463 1464 if ((afp = filth_to_filtp(classifier, handle)) == NULL) 1465 return (EINVAL); 1466 1467 s = splnet(); 1468 LIST_REMOVE(afp, f_chain); 1469 splx(s); 1470 1471 free(afp, M_DEVBUF); 1472 1473 /* todo: update filt_bmask */ 1474 1475 return (0); 1476 } 1477 1478 /* 1479 * delete filters referencing to the specified class. 1480 * if the all flag is not 0, delete all the filters. 1481 */ 1482 int 1483 acc_discard_filters(classifier, class, all) 1484 struct acc_classifier *classifier; 1485 void *class; 1486 int all; 1487 { 1488 struct acc_filter *afp; 1489 int i, s; 1490 1491 s = splnet(); 1492 for (i = 0; i < ACC_FILTER_TABLESIZE; i++) { 1493 do { 1494 LIST_FOREACH(afp, &classifier->acc_filters[i], f_chain) 1495 if (all || afp->f_class == class) { 1496 LIST_REMOVE(afp, f_chain); 1497 free(afp, M_DEVBUF); 1498 /* start again from the head */ 1499 break; 1500 } 1501 } while (afp != NULL); 1502 } 1503 splx(s); 1504 1505 if (all) 1506 classifier->acc_fbmask = 0; 1507 1508 return (0); 1509 } 1510 1511 void * 1512 acc_classify(clfier, m, af) 1513 void *clfier; 1514 struct mbuf *m; 1515 int af; 1516 { 1517 struct acc_classifier *classifier; 1518 struct flowinfo flow; 1519 struct acc_filter *afp; 1520 int i; 1521 1522 classifier = (struct acc_classifier *)clfier; 1523 altq_extractflow(m, af, &flow, classifier->acc_fbmask); 1524 1525 if (flow.fi_family == AF_INET) { 1526 struct flowinfo_in *fp = (struct flowinfo_in *)&flow; 1527 1528 if ((classifier->acc_fbmask & FIMB4_ALL) == FIMB4_TOS) { 1529 /* only tos is used */ 1530 LIST_FOREACH(afp, 1531 &classifier->acc_filters[ACC_WILDCARD_INDEX], 1532 f_chain) 1533 if (apply_tosfilter4(afp->f_fbmask, 1534 &afp->f_filter, fp)) 1535 /* filter matched */ 1536 return (afp->f_class); 1537 } else if ((classifier->acc_fbmask & 1538 (~(FIMB4_PROTO|FIMB4_SPORT|FIMB4_DPORT) & FIMB4_ALL)) 1539 == 0) { 1540 /* only proto and ports are used */ 1541 LIST_FOREACH(afp, 1542 &classifier->acc_filters[ACC_WILDCARD_INDEX], 1543 f_chain) 1544 if (apply_ppfilter4(afp->f_fbmask, 1545 &afp->f_filter, fp)) 1546 /* filter matched */ 1547 return (afp->f_class); 1548 } else { 1549 /* get the filter hash entry from its dest address */ 1550 i = ACC_GET_HASH_INDEX(fp->fi_dst.s_addr); 1551 do { 1552 /* 1553 * go through this loop twice. first for dst 1554 * hash, second for wildcards. 1555 */ 1556 LIST_FOREACH(afp, &classifier->acc_filters[i], 1557 f_chain) 1558 if (apply_filter4(afp->f_fbmask, 1559 &afp->f_filter, fp)) 1560 /* filter matched */ 1561 return (afp->f_class); 1562 1563 /* 1564 * check again for filters with a dst addr 1565 * wildcard. 1566 * (daddr == 0 || dmask != 0xffffffff). 1567 */ 1568 if (i != ACC_WILDCARD_INDEX) 1569 i = ACC_WILDCARD_INDEX; 1570 else 1571 break; 1572 } while (1); 1573 } 1574 } 1575 #ifdef INET6 1576 else if (flow.fi_family == AF_INET6) { 1577 struct flowinfo_in6 *fp6 = (struct flowinfo_in6 *)&flow; 1578 1579 /* get the filter hash entry from its flow ID */ 1580 if (fp6->fi6_flowlabel != 0) 1581 i = ACC_GET_HASH_INDEX(fp6->fi6_flowlabel); 1582 else 1583 /* flowlable can be zero */ 1584 i = ACC_WILDCARD_INDEX; 1585 1586 /* go through this loop twice. first for flow hash, second 1587 for wildcards. */ 1588 do { 1589 LIST_FOREACH(afp, &classifier->acc_filters[i], f_chain) 1590 if (apply_filter6(afp->f_fbmask, 1591 (struct flow_filter6 *)&afp->f_filter, 1592 fp6)) 1593 /* filter matched */ 1594 return (afp->f_class); 1595 1596 /* 1597 * check again for filters with a wildcard. 1598 */ 1599 if (i != ACC_WILDCARD_INDEX) 1600 i = ACC_WILDCARD_INDEX; 1601 else 1602 break; 1603 } while (1); 1604 } 1605 #endif /* INET6 */ 1606 1607 /* no filter matched */ 1608 return (NULL); 1609 } 1610 1611 static int 1612 apply_filter4(fbmask, filt, pkt) 1613 u_int32_t fbmask; 1614 struct flow_filter *filt; 1615 struct flowinfo_in *pkt; 1616 { 1617 if (filt->ff_flow.fi_family != AF_INET) 1618 return (0); 1619 if ((fbmask & FIMB4_SPORT) && filt->ff_flow.fi_sport != pkt->fi_sport) 1620 return (0); 1621 if ((fbmask & FIMB4_DPORT) && filt->ff_flow.fi_dport != pkt->fi_dport) 1622 return (0); 1623 if ((fbmask & FIMB4_DADDR) && 1624 filt->ff_flow.fi_dst.s_addr != 1625 (pkt->fi_dst.s_addr & filt->ff_mask.mask_dst.s_addr)) 1626 return (0); 1627 if ((fbmask & FIMB4_SADDR) && 1628 filt->ff_flow.fi_src.s_addr != 1629 (pkt->fi_src.s_addr & filt->ff_mask.mask_src.s_addr)) 1630 return (0); 1631 if ((fbmask & FIMB4_PROTO) && filt->ff_flow.fi_proto != pkt->fi_proto) 1632 return (0); 1633 if ((fbmask & FIMB4_TOS) && filt->ff_flow.fi_tos != 1634 (pkt->fi_tos & filt->ff_mask.mask_tos)) 1635 return (0); 1636 if ((fbmask & FIMB4_GPI) && filt->ff_flow.fi_gpi != (pkt->fi_gpi)) 1637 return (0); 1638 /* match */ 1639 return (1); 1640 } 1641 1642 /* 1643 * filter matching function optimized for a common case that checks 1644 * only protocol and port numbers 1645 */ 1646 static int 1647 apply_ppfilter4(fbmask, filt, pkt) 1648 u_int32_t fbmask; 1649 struct flow_filter *filt; 1650 struct flowinfo_in *pkt; 1651 { 1652 if (filt->ff_flow.fi_family != AF_INET) 1653 return (0); 1654 if ((fbmask & FIMB4_SPORT) && filt->ff_flow.fi_sport != pkt->fi_sport) 1655 return (0); 1656 if ((fbmask & FIMB4_DPORT) && filt->ff_flow.fi_dport != pkt->fi_dport) 1657 return (0); 1658 if ((fbmask & FIMB4_PROTO) && filt->ff_flow.fi_proto != pkt->fi_proto) 1659 return (0); 1660 /* match */ 1661 return (1); 1662 } 1663 1664 /* 1665 * filter matching function only for tos field. 1666 */ 1667 static int 1668 apply_tosfilter4(fbmask, filt, pkt) 1669 u_int32_t fbmask; 1670 struct flow_filter *filt; 1671 struct flowinfo_in *pkt; 1672 { 1673 if (filt->ff_flow.fi_family != AF_INET) 1674 return (0); 1675 if ((fbmask & FIMB4_TOS) && filt->ff_flow.fi_tos != 1676 (pkt->fi_tos & filt->ff_mask.mask_tos)) 1677 return (0); 1678 /* match */ 1679 return (1); 1680 } 1681 1682 #ifdef INET6 1683 static int 1684 apply_filter6(fbmask, filt, pkt) 1685 u_int32_t fbmask; 1686 struct flow_filter6 *filt; 1687 struct flowinfo_in6 *pkt; 1688 { 1689 int i; 1690 1691 if (filt->ff_flow6.fi6_family != AF_INET6) 1692 return (0); 1693 if ((fbmask & FIMB6_FLABEL) && 1694 filt->ff_flow6.fi6_flowlabel != pkt->fi6_flowlabel) 1695 return (0); 1696 if ((fbmask & FIMB6_PROTO) && 1697 filt->ff_flow6.fi6_proto != pkt->fi6_proto) 1698 return (0); 1699 if ((fbmask & FIMB6_SPORT) && 1700 filt->ff_flow6.fi6_sport != pkt->fi6_sport) 1701 return (0); 1702 if ((fbmask & FIMB6_DPORT) && 1703 filt->ff_flow6.fi6_dport != pkt->fi6_dport) 1704 return (0); 1705 if (fbmask & FIMB6_SADDR) { 1706 for (i = 0; i < 4; i++) 1707 if (filt->ff_flow6.fi6_src.s6_addr32[i] != 1708 (pkt->fi6_src.s6_addr32[i] & 1709 filt->ff_mask6.mask6_src.s6_addr32[i])) 1710 return (0); 1711 } 1712 if (fbmask & FIMB6_DADDR) { 1713 for (i = 0; i < 4; i++) 1714 if (filt->ff_flow6.fi6_dst.s6_addr32[i] != 1715 (pkt->fi6_dst.s6_addr32[i] & 1716 filt->ff_mask6.mask6_dst.s6_addr32[i])) 1717 return (0); 1718 } 1719 if ((fbmask & FIMB6_TCLASS) && 1720 filt->ff_flow6.fi6_tclass != 1721 (pkt->fi6_tclass & filt->ff_mask6.mask6_tclass)) 1722 return (0); 1723 if ((fbmask & FIMB6_GPI) && 1724 filt->ff_flow6.fi6_gpi != pkt->fi6_gpi) 1725 return (0); 1726 /* match */ 1727 return (1); 1728 } 1729 #endif /* INET6 */ 1730 1731 /* 1732 * filter handle: 1733 * bit 20-28: index to the filter hash table 1734 * bit 0-19: unique id in the hash bucket. 1735 */ 1736 static u_long 1737 get_filt_handle(classifier, i) 1738 struct acc_classifier *classifier; 1739 int i; 1740 { 1741 static u_long handle_number = 1; 1742 u_long handle; 1743 struct acc_filter *afp; 1744 1745 while (1) { 1746 handle = handle_number++ & 0x000fffff; 1747 1748 if (LIST_EMPTY(&classifier->acc_filters[i])) 1749 break; 1750 1751 LIST_FOREACH(afp, &classifier->acc_filters[i], f_chain) 1752 if ((afp->f_handle & 0x000fffff) == handle) 1753 break; 1754 if (afp == NULL) 1755 break; 1756 /* this handle is already used, try again */ 1757 } 1758 1759 return ((i << 20) | handle); 1760 } 1761 1762 /* convert filter handle to filter pointer */ 1763 static struct acc_filter * 1764 filth_to_filtp(classifier, handle) 1765 struct acc_classifier *classifier; 1766 u_long handle; 1767 { 1768 struct acc_filter *afp; 1769 int i; 1770 1771 i = ACC_GET_HINDEX(handle); 1772 1773 LIST_FOREACH(afp, &classifier->acc_filters[i], f_chain) 1774 if (afp->f_handle == handle) 1775 return (afp); 1776 1777 return (NULL); 1778 } 1779 1780 /* create flowinfo bitmask */ 1781 static u_int32_t 1782 filt2fibmask(filt) 1783 struct flow_filter *filt; 1784 { 1785 u_int32_t mask = 0; 1786 #ifdef INET6 1787 struct flow_filter6 *filt6; 1788 #endif 1789 1790 switch (filt->ff_flow.fi_family) { 1791 case AF_INET: 1792 if (filt->ff_flow.fi_proto != 0) 1793 mask |= FIMB4_PROTO; 1794 if (filt->ff_flow.fi_tos != 0) 1795 mask |= FIMB4_TOS; 1796 if (filt->ff_flow.fi_dst.s_addr != 0) 1797 mask |= FIMB4_DADDR; 1798 if (filt->ff_flow.fi_src.s_addr != 0) 1799 mask |= FIMB4_SADDR; 1800 if (filt->ff_flow.fi_sport != 0) 1801 mask |= FIMB4_SPORT; 1802 if (filt->ff_flow.fi_dport != 0) 1803 mask |= FIMB4_DPORT; 1804 if (filt->ff_flow.fi_gpi != 0) 1805 mask |= FIMB4_GPI; 1806 break; 1807 #ifdef INET6 1808 case AF_INET6: 1809 filt6 = (struct flow_filter6 *)filt; 1810 1811 if (filt6->ff_flow6.fi6_proto != 0) 1812 mask |= FIMB6_PROTO; 1813 if (filt6->ff_flow6.fi6_tclass != 0) 1814 mask |= FIMB6_TCLASS; 1815 if (!IN6_IS_ADDR_UNSPECIFIED(&filt6->ff_flow6.fi6_dst)) 1816 mask |= FIMB6_DADDR; 1817 if (!IN6_IS_ADDR_UNSPECIFIED(&filt6->ff_flow6.fi6_src)) 1818 mask |= FIMB6_SADDR; 1819 if (filt6->ff_flow6.fi6_sport != 0) 1820 mask |= FIMB6_SPORT; 1821 if (filt6->ff_flow6.fi6_dport != 0) 1822 mask |= FIMB6_DPORT; 1823 if (filt6->ff_flow6.fi6_gpi != 0) 1824 mask |= FIMB6_GPI; 1825 if (filt6->ff_flow6.fi6_flowlabel != 0) 1826 mask |= FIMB6_FLABEL; 1827 break; 1828 #endif /* INET6 */ 1829 } 1830 return (mask); 1831 } 1832 1833 1834 /* 1835 * helper functions to handle IPv4 fragments. 1836 * currently only in-sequence fragments are handled. 1837 * - fragment info is cached in a LRU list. 1838 * - when a first fragment is found, cache its flow info. 1839 * - when a non-first fragment is found, lookup the cache. 1840 */ 1841 1842 struct ip4_frag { 1843 TAILQ_ENTRY(ip4_frag) ip4f_chain; 1844 char ip4f_valid; 1845 u_short ip4f_id; 1846 struct flowinfo_in ip4f_info; 1847 }; 1848 1849 static TAILQ_HEAD(ip4f_list, ip4_frag) ip4f_list; /* IPv4 fragment cache */ 1850 1851 #define IP4F_TABSIZE 16 /* IPv4 fragment cache size */ 1852 1853 1854 static void 1855 ip4f_cache(ip, fin) 1856 struct ip *ip; 1857 struct flowinfo_in *fin; 1858 { 1859 struct ip4_frag *fp; 1860 1861 if (TAILQ_EMPTY(&ip4f_list)) { 1862 /* first time call, allocate fragment cache entries. */ 1863 if (ip4f_init() < 0) 1864 /* allocation failed! */ 1865 return; 1866 } 1867 1868 fp = ip4f_alloc(); 1869 fp->ip4f_id = ip->ip_id; 1870 fp->ip4f_info.fi_proto = ip->ip_p; 1871 fp->ip4f_info.fi_src.s_addr = ip->ip_src.s_addr; 1872 fp->ip4f_info.fi_dst.s_addr = ip->ip_dst.s_addr; 1873 1874 /* save port numbers */ 1875 fp->ip4f_info.fi_sport = fin->fi_sport; 1876 fp->ip4f_info.fi_dport = fin->fi_dport; 1877 fp->ip4f_info.fi_gpi = fin->fi_gpi; 1878 } 1879 1880 static int 1881 ip4f_lookup(ip, fin) 1882 struct ip *ip; 1883 struct flowinfo_in *fin; 1884 { 1885 struct ip4_frag *fp; 1886 1887 for (fp = TAILQ_FIRST(&ip4f_list); fp != NULL && fp->ip4f_valid; 1888 fp = TAILQ_NEXT(fp, ip4f_chain)) 1889 if (ip->ip_id == fp->ip4f_id && 1890 ip->ip_src.s_addr == fp->ip4f_info.fi_src.s_addr && 1891 ip->ip_dst.s_addr == fp->ip4f_info.fi_dst.s_addr && 1892 ip->ip_p == fp->ip4f_info.fi_proto) { 1893 1894 /* found the matching entry */ 1895 fin->fi_sport = fp->ip4f_info.fi_sport; 1896 fin->fi_dport = fp->ip4f_info.fi_dport; 1897 fin->fi_gpi = fp->ip4f_info.fi_gpi; 1898 1899 if ((ntohs(ip->ip_off) & IP_MF) == 0) 1900 /* this is the last fragment, 1901 release the entry. */ 1902 ip4f_free(fp); 1903 1904 return (1); 1905 } 1906 1907 /* no matching entry found */ 1908 return (0); 1909 } 1910 1911 static int 1912 ip4f_init(void) 1913 { 1914 struct ip4_frag *fp; 1915 int i; 1916 1917 TAILQ_INIT(&ip4f_list); 1918 for (i=0; i<IP4F_TABSIZE; i++) { 1919 fp = malloc(sizeof(struct ip4_frag), 1920 M_DEVBUF, M_NOWAIT); 1921 if (fp == NULL) { 1922 printf("ip4f_init: can't alloc %dth entry!\n", i); 1923 if (i == 0) 1924 return (-1); 1925 return (0); 1926 } 1927 fp->ip4f_valid = 0; 1928 TAILQ_INSERT_TAIL(&ip4f_list, fp, ip4f_chain); 1929 } 1930 return (0); 1931 } 1932 1933 static struct ip4_frag * 1934 ip4f_alloc(void) 1935 { 1936 struct ip4_frag *fp; 1937 1938 /* reclaim an entry at the tail, put it at the head */ 1939 fp = TAILQ_LAST(&ip4f_list, ip4f_list); 1940 TAILQ_REMOVE(&ip4f_list, fp, ip4f_chain); 1941 fp->ip4f_valid = 1; 1942 TAILQ_INSERT_HEAD(&ip4f_list, fp, ip4f_chain); 1943 return (fp); 1944 } 1945 1946 static void 1947 ip4f_free(fp) 1948 struct ip4_frag *fp; 1949 { 1950 TAILQ_REMOVE(&ip4f_list, fp, ip4f_chain); 1951 fp->ip4f_valid = 0; 1952 TAILQ_INSERT_TAIL(&ip4f_list, fp, ip4f_chain); 1953 } 1954 1955 #endif /* ALTQ3_CLFIER_COMPAT */ 1956