1 /*- 2 * Copyright (C) 1997-2003 3 * Sony Computer Science Laboratories Inc. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 * 26 * $KAME: altq_subr.c,v 1.21 2003/11/06 06:32:53 kjc Exp $ 27 * $FreeBSD$ 28 */ 29 30 #include "opt_altq.h" 31 #include "opt_inet.h" 32 #include "opt_inet6.h" 33 34 #include <sys/param.h> 35 #include <sys/malloc.h> 36 #include <sys/mbuf.h> 37 #include <sys/systm.h> 38 #include <sys/proc.h> 39 #include <sys/socket.h> 40 #include <sys/socketvar.h> 41 #include <sys/kernel.h> 42 #include <sys/errno.h> 43 #include <sys/syslog.h> 44 #include <sys/sysctl.h> 45 #include <sys/queue.h> 46 47 #include <net/if.h> 48 #include <net/if_var.h> 49 #include <net/if_dl.h> 50 #include <net/if_types.h> 51 #include <net/vnet.h> 52 53 #include <netinet/in.h> 54 #include <netinet/in_systm.h> 55 #include <netinet/ip.h> 56 #ifdef INET6 57 #include <netinet/ip6.h> 58 #endif 59 #include <netinet/tcp.h> 60 #include <netinet/udp.h> 61 62 #include <netpfil/pf/pf.h> 63 #include <netpfil/pf/pf_altq.h> 64 #include <net/altq/altq.h> 65 66 /* machine dependent clock related includes */ 67 #include <sys/bus.h> 68 #include <sys/cpu.h> 69 #include <sys/eventhandler.h> 70 #include <machine/clock.h> 71 #if defined(__amd64__) || defined(__i386__) 72 #include <machine/cpufunc.h> /* for pentium tsc */ 73 #include <machine/specialreg.h> /* for CPUID_TSC */ 74 #include <machine/md_var.h> /* for cpu_feature */ 75 #endif /* __amd64 || __i386__ */ 76 77 /* 78 * internal function prototypes 79 */ 80 static void tbr_timeout(void *); 81 static struct mbuf *tbr_dequeue(struct ifaltq *, int); 82 static int tbr_timer = 0; /* token bucket regulator timer */ 83 #if !defined(__FreeBSD__) || (__FreeBSD_version < 600000) 84 static struct callout tbr_callout = CALLOUT_INITIALIZER; 85 #else 86 static struct callout tbr_callout; 87 #endif 88 89 #ifdef ALTQ3_CLFIER_COMPAT 90 static int extract_ports4(struct mbuf *, struct ip *, struct flowinfo_in *); 91 #ifdef INET6 92 static int extract_ports6(struct mbuf *, struct ip6_hdr *, 93 struct flowinfo_in6 *); 94 #endif 95 static int apply_filter4(u_int32_t, struct flow_filter *, 96 struct flowinfo_in *); 97 static int apply_ppfilter4(u_int32_t, struct flow_filter *, 98 struct flowinfo_in *); 99 #ifdef INET6 100 static int apply_filter6(u_int32_t, struct flow_filter6 *, 101 struct flowinfo_in6 *); 102 #endif 103 static int apply_tosfilter4(u_int32_t, struct flow_filter *, 104 struct flowinfo_in *); 105 static u_long get_filt_handle(struct acc_classifier *, int); 106 static struct acc_filter *filth_to_filtp(struct acc_classifier *, u_long); 107 static u_int32_t filt2fibmask(struct flow_filter *); 108 109 static void ip4f_cache(struct ip *, struct flowinfo_in *); 110 static int ip4f_lookup(struct ip *, struct flowinfo_in *); 111 static int ip4f_init(void); 112 static struct ip4_frag *ip4f_alloc(void); 113 static void ip4f_free(struct ip4_frag *); 114 #endif /* ALTQ3_CLFIER_COMPAT */ 115 116 #ifdef ALTQ 117 SYSCTL_NODE(_kern_features, OID_AUTO, altq, CTLFLAG_RD | CTLFLAG_CAPRD, 0, 118 "ALTQ packet queuing"); 119 120 #define ALTQ_FEATURE(name, desc) \ 121 SYSCTL_INT_WITH_LABEL(_kern_features_altq, OID_AUTO, name, \ 122 CTLFLAG_RD | CTLFLAG_CAPRD, SYSCTL_NULL_INT_PTR, 1, \ 123 desc, "feature") 124 125 #ifdef ALTQ_CBQ 126 ALTQ_FEATURE(cbq, "ALTQ Class Based Queuing discipline"); 127 #endif 128 #ifdef ALTQ_CODEL 129 ALTQ_FEATURE(codel, "ALTQ Controlled Delay discipline"); 130 #endif 131 #ifdef ALTQ_RED 132 ALTQ_FEATURE(red, "ALTQ Random Early Detection discipline"); 133 #endif 134 #ifdef ALTQ_RIO 135 ALTQ_FEATURE(rio, "ALTQ Random Early Drop discipline"); 136 #endif 137 #ifdef ALTQ_HFSC 138 ALTQ_FEATURE(hfsc, "ALTQ Hierarchical Packet Scheduler discipline"); 139 #endif 140 #ifdef ALTQ_PRIQ 141 ALTQ_FEATURE(priq, "ATLQ Priority Queuing discipline"); 142 #endif 143 #ifdef ALTQ_FAIRQ 144 ALTQ_FEATURE(fairq, "ALTQ Fair Queuing discipline"); 145 #endif 146 #endif 147 148 /* 149 * alternate queueing support routines 150 */ 151 152 /* look up the queue state by the interface name and the queueing type. */ 153 void * 154 altq_lookup(name, type) 155 char *name; 156 int type; 157 { 158 struct ifnet *ifp; 159 160 if ((ifp = ifunit(name)) != NULL) { 161 /* read if_snd unlocked */ 162 if (type != ALTQT_NONE && ifp->if_snd.altq_type == type) 163 return (ifp->if_snd.altq_disc); 164 } 165 166 return NULL; 167 } 168 169 int 170 altq_attach(ifq, type, discipline, enqueue, dequeue, request) 171 struct ifaltq *ifq; 172 int type; 173 void *discipline; 174 int (*enqueue)(struct ifaltq *, struct mbuf *, struct altq_pktattr *); 175 struct mbuf *(*dequeue)(struct ifaltq *, int); 176 int (*request)(struct ifaltq *, int, void *); 177 { 178 IFQ_LOCK(ifq); 179 if (!ALTQ_IS_READY(ifq)) { 180 IFQ_UNLOCK(ifq); 181 return ENXIO; 182 } 183 184 ifq->altq_type = type; 185 ifq->altq_disc = discipline; 186 ifq->altq_enqueue = enqueue; 187 ifq->altq_dequeue = dequeue; 188 ifq->altq_request = request; 189 ifq->altq_flags &= (ALTQF_CANTCHANGE|ALTQF_ENABLED); 190 IFQ_UNLOCK(ifq); 191 return 0; 192 } 193 194 int 195 altq_detach(ifq) 196 struct ifaltq *ifq; 197 { 198 IFQ_LOCK(ifq); 199 200 if (!ALTQ_IS_READY(ifq)) { 201 IFQ_UNLOCK(ifq); 202 return ENXIO; 203 } 204 if (ALTQ_IS_ENABLED(ifq)) { 205 IFQ_UNLOCK(ifq); 206 return EBUSY; 207 } 208 if (!ALTQ_IS_ATTACHED(ifq)) { 209 IFQ_UNLOCK(ifq); 210 return (0); 211 } 212 213 ifq->altq_type = ALTQT_NONE; 214 ifq->altq_disc = NULL; 215 ifq->altq_enqueue = NULL; 216 ifq->altq_dequeue = NULL; 217 ifq->altq_request = NULL; 218 ifq->altq_flags &= ALTQF_CANTCHANGE; 219 220 IFQ_UNLOCK(ifq); 221 return 0; 222 } 223 224 int 225 altq_enable(ifq) 226 struct ifaltq *ifq; 227 { 228 int s; 229 230 IFQ_LOCK(ifq); 231 232 if (!ALTQ_IS_READY(ifq)) { 233 IFQ_UNLOCK(ifq); 234 return ENXIO; 235 } 236 if (ALTQ_IS_ENABLED(ifq)) { 237 IFQ_UNLOCK(ifq); 238 return 0; 239 } 240 241 s = splnet(); 242 IFQ_PURGE_NOLOCK(ifq); 243 ASSERT(ifq->ifq_len == 0); 244 ifq->ifq_drv_maxlen = 0; /* disable bulk dequeue */ 245 ifq->altq_flags |= ALTQF_ENABLED; 246 splx(s); 247 248 IFQ_UNLOCK(ifq); 249 return 0; 250 } 251 252 int 253 altq_disable(ifq) 254 struct ifaltq *ifq; 255 { 256 int s; 257 258 IFQ_LOCK(ifq); 259 if (!ALTQ_IS_ENABLED(ifq)) { 260 IFQ_UNLOCK(ifq); 261 return 0; 262 } 263 264 s = splnet(); 265 IFQ_PURGE_NOLOCK(ifq); 266 ASSERT(ifq->ifq_len == 0); 267 ifq->altq_flags &= ~(ALTQF_ENABLED); 268 splx(s); 269 270 IFQ_UNLOCK(ifq); 271 return 0; 272 } 273 274 #ifdef ALTQ_DEBUG 275 void 276 altq_assert(file, line, failedexpr) 277 const char *file, *failedexpr; 278 int line; 279 { 280 (void)printf("altq assertion \"%s\" failed: file \"%s\", line %d\n", 281 failedexpr, file, line); 282 panic("altq assertion"); 283 /* NOTREACHED */ 284 } 285 #endif 286 287 /* 288 * internal representation of token bucket parameters 289 * rate: (byte_per_unittime << TBR_SHIFT) / machclk_freq 290 * (((bits_per_sec) / 8) << TBR_SHIFT) / machclk_freq 291 * depth: byte << TBR_SHIFT 292 * 293 */ 294 #define TBR_SHIFT 29 295 #define TBR_SCALE(x) ((int64_t)(x) << TBR_SHIFT) 296 #define TBR_UNSCALE(x) ((x) >> TBR_SHIFT) 297 298 static struct mbuf * 299 tbr_dequeue(ifq, op) 300 struct ifaltq *ifq; 301 int op; 302 { 303 struct tb_regulator *tbr; 304 struct mbuf *m; 305 int64_t interval; 306 u_int64_t now; 307 308 IFQ_LOCK_ASSERT(ifq); 309 tbr = ifq->altq_tbr; 310 if (op == ALTDQ_REMOVE && tbr->tbr_lastop == ALTDQ_POLL) { 311 /* if this is a remove after poll, bypass tbr check */ 312 } else { 313 /* update token only when it is negative */ 314 if (tbr->tbr_token <= 0) { 315 now = read_machclk(); 316 interval = now - tbr->tbr_last; 317 if (interval >= tbr->tbr_filluptime) 318 tbr->tbr_token = tbr->tbr_depth; 319 else { 320 tbr->tbr_token += interval * tbr->tbr_rate; 321 if (tbr->tbr_token > tbr->tbr_depth) 322 tbr->tbr_token = tbr->tbr_depth; 323 } 324 tbr->tbr_last = now; 325 } 326 /* if token is still negative, don't allow dequeue */ 327 if (tbr->tbr_token <= 0) 328 return (NULL); 329 } 330 331 if (ALTQ_IS_ENABLED(ifq)) 332 m = (*ifq->altq_dequeue)(ifq, op); 333 else { 334 if (op == ALTDQ_POLL) 335 _IF_POLL(ifq, m); 336 else 337 _IF_DEQUEUE(ifq, m); 338 } 339 340 if (m != NULL && op == ALTDQ_REMOVE) 341 tbr->tbr_token -= TBR_SCALE(m_pktlen(m)); 342 tbr->tbr_lastop = op; 343 return (m); 344 } 345 346 /* 347 * set a token bucket regulator. 348 * if the specified rate is zero, the token bucket regulator is deleted. 349 */ 350 int 351 tbr_set(ifq, profile) 352 struct ifaltq *ifq; 353 struct tb_profile *profile; 354 { 355 struct tb_regulator *tbr, *otbr; 356 357 if (tbr_dequeue_ptr == NULL) 358 tbr_dequeue_ptr = tbr_dequeue; 359 360 if (machclk_freq == 0) 361 init_machclk(); 362 if (machclk_freq == 0) { 363 printf("tbr_set: no cpu clock available!\n"); 364 return (ENXIO); 365 } 366 367 IFQ_LOCK(ifq); 368 if (profile->rate == 0) { 369 /* delete this tbr */ 370 if ((tbr = ifq->altq_tbr) == NULL) { 371 IFQ_UNLOCK(ifq); 372 return (ENOENT); 373 } 374 ifq->altq_tbr = NULL; 375 free(tbr, M_DEVBUF); 376 IFQ_UNLOCK(ifq); 377 return (0); 378 } 379 380 tbr = malloc(sizeof(struct tb_regulator), M_DEVBUF, M_NOWAIT | M_ZERO); 381 if (tbr == NULL) { 382 IFQ_UNLOCK(ifq); 383 return (ENOMEM); 384 } 385 386 tbr->tbr_rate = TBR_SCALE(profile->rate / 8) / machclk_freq; 387 tbr->tbr_depth = TBR_SCALE(profile->depth); 388 if (tbr->tbr_rate > 0) 389 tbr->tbr_filluptime = tbr->tbr_depth / tbr->tbr_rate; 390 else 391 tbr->tbr_filluptime = LLONG_MAX; 392 /* 393 * The longest time between tbr_dequeue() calls will be about 1 394 * system tick, as the callout that drives it is scheduled once per 395 * tick. The refill-time detection logic in tbr_dequeue() can only 396 * properly detect the passage of up to LLONG_MAX machclk ticks. 397 * Therefore, in order for this logic to function properly in the 398 * extreme case, the maximum value of tbr_filluptime should be 399 * LLONG_MAX less one system tick's worth of machclk ticks less 400 * some additional slop factor (here one more system tick's worth 401 * of machclk ticks). 402 */ 403 if (tbr->tbr_filluptime > (LLONG_MAX - 2 * machclk_per_tick)) 404 tbr->tbr_filluptime = LLONG_MAX - 2 * machclk_per_tick; 405 tbr->tbr_token = tbr->tbr_depth; 406 tbr->tbr_last = read_machclk(); 407 tbr->tbr_lastop = ALTDQ_REMOVE; 408 409 otbr = ifq->altq_tbr; 410 ifq->altq_tbr = tbr; /* set the new tbr */ 411 412 if (otbr != NULL) 413 free(otbr, M_DEVBUF); 414 else { 415 if (tbr_timer == 0) { 416 CALLOUT_RESET(&tbr_callout, 1, tbr_timeout, (void *)0); 417 tbr_timer = 1; 418 } 419 } 420 IFQ_UNLOCK(ifq); 421 return (0); 422 } 423 424 /* 425 * tbr_timeout goes through the interface list, and kicks the drivers 426 * if necessary. 427 * 428 * MPSAFE 429 */ 430 static void 431 tbr_timeout(arg) 432 void *arg; 433 { 434 VNET_ITERATOR_DECL(vnet_iter); 435 struct ifnet *ifp; 436 struct epoch_tracker et; 437 int active; 438 439 active = 0; 440 NET_EPOCH_ENTER(et); 441 VNET_LIST_RLOCK_NOSLEEP(); 442 VNET_FOREACH(vnet_iter) { 443 CURVNET_SET(vnet_iter); 444 for (ifp = CK_STAILQ_FIRST(&V_ifnet); ifp; 445 ifp = CK_STAILQ_NEXT(ifp, if_link)) { 446 /* read from if_snd unlocked */ 447 if (!TBR_IS_ENABLED(&ifp->if_snd)) 448 continue; 449 active++; 450 if (!IFQ_IS_EMPTY(&ifp->if_snd) && 451 ifp->if_start != NULL) 452 (*ifp->if_start)(ifp); 453 } 454 CURVNET_RESTORE(); 455 } 456 VNET_LIST_RUNLOCK_NOSLEEP(); 457 NET_EPOCH_EXIT(et); 458 if (active > 0) 459 CALLOUT_RESET(&tbr_callout, 1, tbr_timeout, (void *)0); 460 else 461 tbr_timer = 0; /* don't need tbr_timer anymore */ 462 } 463 464 /* 465 * attach a discipline to the interface. if one already exists, it is 466 * overridden. 467 * Locking is done in the discipline specific attach functions. Basically 468 * they call back to altq_attach which takes care of the attach and locking. 469 */ 470 int 471 altq_pfattach(struct pf_altq *a) 472 { 473 int error = 0; 474 475 switch (a->scheduler) { 476 case ALTQT_NONE: 477 break; 478 #ifdef ALTQ_CBQ 479 case ALTQT_CBQ: 480 error = cbq_pfattach(a); 481 break; 482 #endif 483 #ifdef ALTQ_PRIQ 484 case ALTQT_PRIQ: 485 error = priq_pfattach(a); 486 break; 487 #endif 488 #ifdef ALTQ_HFSC 489 case ALTQT_HFSC: 490 error = hfsc_pfattach(a); 491 break; 492 #endif 493 #ifdef ALTQ_FAIRQ 494 case ALTQT_FAIRQ: 495 error = fairq_pfattach(a); 496 break; 497 #endif 498 #ifdef ALTQ_CODEL 499 case ALTQT_CODEL: 500 error = codel_pfattach(a); 501 break; 502 #endif 503 default: 504 error = ENXIO; 505 } 506 507 return (error); 508 } 509 510 /* 511 * detach a discipline from the interface. 512 * it is possible that the discipline was already overridden by another 513 * discipline. 514 */ 515 int 516 altq_pfdetach(struct pf_altq *a) 517 { 518 struct ifnet *ifp; 519 int s, error = 0; 520 521 if ((ifp = ifunit(a->ifname)) == NULL) 522 return (EINVAL); 523 524 /* if this discipline is no longer referenced, just return */ 525 /* read unlocked from if_snd */ 526 if (a->altq_disc == NULL || a->altq_disc != ifp->if_snd.altq_disc) 527 return (0); 528 529 s = splnet(); 530 /* read unlocked from if_snd, _disable and _detach take care */ 531 if (ALTQ_IS_ENABLED(&ifp->if_snd)) 532 error = altq_disable(&ifp->if_snd); 533 if (error == 0) 534 error = altq_detach(&ifp->if_snd); 535 splx(s); 536 537 return (error); 538 } 539 540 /* 541 * add a discipline or a queue 542 * Locking is done in the discipline specific functions with regards to 543 * malloc with WAITOK, also it is not yet clear which lock to use. 544 */ 545 int 546 altq_add(struct ifnet *ifp, struct pf_altq *a) 547 { 548 int error = 0; 549 550 if (a->qname[0] != 0) 551 return (altq_add_queue(a)); 552 553 if (machclk_freq == 0) 554 init_machclk(); 555 if (machclk_freq == 0) 556 panic("altq_add: no cpu clock"); 557 558 switch (a->scheduler) { 559 #ifdef ALTQ_CBQ 560 case ALTQT_CBQ: 561 error = cbq_add_altq(ifp, a); 562 break; 563 #endif 564 #ifdef ALTQ_PRIQ 565 case ALTQT_PRIQ: 566 error = priq_add_altq(ifp, a); 567 break; 568 #endif 569 #ifdef ALTQ_HFSC 570 case ALTQT_HFSC: 571 error = hfsc_add_altq(ifp, a); 572 break; 573 #endif 574 #ifdef ALTQ_FAIRQ 575 case ALTQT_FAIRQ: 576 error = fairq_add_altq(ifp, a); 577 break; 578 #endif 579 #ifdef ALTQ_CODEL 580 case ALTQT_CODEL: 581 error = codel_add_altq(ifp, a); 582 break; 583 #endif 584 default: 585 error = ENXIO; 586 } 587 588 return (error); 589 } 590 591 /* 592 * remove a discipline or a queue 593 * It is yet unclear what lock to use to protect this operation, the 594 * discipline specific functions will determine and grab it 595 */ 596 int 597 altq_remove(struct pf_altq *a) 598 { 599 int error = 0; 600 601 if (a->qname[0] != 0) 602 return (altq_remove_queue(a)); 603 604 switch (a->scheduler) { 605 #ifdef ALTQ_CBQ 606 case ALTQT_CBQ: 607 error = cbq_remove_altq(a); 608 break; 609 #endif 610 #ifdef ALTQ_PRIQ 611 case ALTQT_PRIQ: 612 error = priq_remove_altq(a); 613 break; 614 #endif 615 #ifdef ALTQ_HFSC 616 case ALTQT_HFSC: 617 error = hfsc_remove_altq(a); 618 break; 619 #endif 620 #ifdef ALTQ_FAIRQ 621 case ALTQT_FAIRQ: 622 error = fairq_remove_altq(a); 623 break; 624 #endif 625 #ifdef ALTQ_CODEL 626 case ALTQT_CODEL: 627 error = codel_remove_altq(a); 628 break; 629 #endif 630 default: 631 error = ENXIO; 632 } 633 634 return (error); 635 } 636 637 /* 638 * add a queue to the discipline 639 * It is yet unclear what lock to use to protect this operation, the 640 * discipline specific functions will determine and grab it 641 */ 642 int 643 altq_add_queue(struct pf_altq *a) 644 { 645 int error = 0; 646 647 switch (a->scheduler) { 648 #ifdef ALTQ_CBQ 649 case ALTQT_CBQ: 650 error = cbq_add_queue(a); 651 break; 652 #endif 653 #ifdef ALTQ_PRIQ 654 case ALTQT_PRIQ: 655 error = priq_add_queue(a); 656 break; 657 #endif 658 #ifdef ALTQ_HFSC 659 case ALTQT_HFSC: 660 error = hfsc_add_queue(a); 661 break; 662 #endif 663 #ifdef ALTQ_FAIRQ 664 case ALTQT_FAIRQ: 665 error = fairq_add_queue(a); 666 break; 667 #endif 668 default: 669 error = ENXIO; 670 } 671 672 return (error); 673 } 674 675 /* 676 * remove a queue from the discipline 677 * It is yet unclear what lock to use to protect this operation, the 678 * discipline specific functions will determine and grab it 679 */ 680 int 681 altq_remove_queue(struct pf_altq *a) 682 { 683 int error = 0; 684 685 switch (a->scheduler) { 686 #ifdef ALTQ_CBQ 687 case ALTQT_CBQ: 688 error = cbq_remove_queue(a); 689 break; 690 #endif 691 #ifdef ALTQ_PRIQ 692 case ALTQT_PRIQ: 693 error = priq_remove_queue(a); 694 break; 695 #endif 696 #ifdef ALTQ_HFSC 697 case ALTQT_HFSC: 698 error = hfsc_remove_queue(a); 699 break; 700 #endif 701 #ifdef ALTQ_FAIRQ 702 case ALTQT_FAIRQ: 703 error = fairq_remove_queue(a); 704 break; 705 #endif 706 default: 707 error = ENXIO; 708 } 709 710 return (error); 711 } 712 713 /* 714 * get queue statistics 715 * Locking is done in the discipline specific functions with regards to 716 * copyout operations, also it is not yet clear which lock to use. 717 */ 718 int 719 altq_getqstats(struct pf_altq *a, void *ubuf, int *nbytes, int version) 720 { 721 int error = 0; 722 723 switch (a->scheduler) { 724 #ifdef ALTQ_CBQ 725 case ALTQT_CBQ: 726 error = cbq_getqstats(a, ubuf, nbytes, version); 727 break; 728 #endif 729 #ifdef ALTQ_PRIQ 730 case ALTQT_PRIQ: 731 error = priq_getqstats(a, ubuf, nbytes, version); 732 break; 733 #endif 734 #ifdef ALTQ_HFSC 735 case ALTQT_HFSC: 736 error = hfsc_getqstats(a, ubuf, nbytes, version); 737 break; 738 #endif 739 #ifdef ALTQ_FAIRQ 740 case ALTQT_FAIRQ: 741 error = fairq_getqstats(a, ubuf, nbytes, version); 742 break; 743 #endif 744 #ifdef ALTQ_CODEL 745 case ALTQT_CODEL: 746 error = codel_getqstats(a, ubuf, nbytes, version); 747 break; 748 #endif 749 default: 750 error = ENXIO; 751 } 752 753 return (error); 754 } 755 756 /* 757 * read and write diffserv field in IPv4 or IPv6 header 758 */ 759 u_int8_t 760 read_dsfield(m, pktattr) 761 struct mbuf *m; 762 struct altq_pktattr *pktattr; 763 { 764 struct mbuf *m0; 765 u_int8_t ds_field = 0; 766 767 if (pktattr == NULL || 768 (pktattr->pattr_af != AF_INET && pktattr->pattr_af != AF_INET6)) 769 return ((u_int8_t)0); 770 771 /* verify that pattr_hdr is within the mbuf data */ 772 for (m0 = m; m0 != NULL; m0 = m0->m_next) 773 if ((pktattr->pattr_hdr >= m0->m_data) && 774 (pktattr->pattr_hdr < m0->m_data + m0->m_len)) 775 break; 776 if (m0 == NULL) { 777 /* ick, pattr_hdr is stale */ 778 pktattr->pattr_af = AF_UNSPEC; 779 #ifdef ALTQ_DEBUG 780 printf("read_dsfield: can't locate header!\n"); 781 #endif 782 return ((u_int8_t)0); 783 } 784 785 if (pktattr->pattr_af == AF_INET) { 786 struct ip *ip = (struct ip *)pktattr->pattr_hdr; 787 788 if (ip->ip_v != 4) 789 return ((u_int8_t)0); /* version mismatch! */ 790 ds_field = ip->ip_tos; 791 } 792 #ifdef INET6 793 else if (pktattr->pattr_af == AF_INET6) { 794 struct ip6_hdr *ip6 = (struct ip6_hdr *)pktattr->pattr_hdr; 795 u_int32_t flowlabel; 796 797 flowlabel = ntohl(ip6->ip6_flow); 798 if ((flowlabel >> 28) != 6) 799 return ((u_int8_t)0); /* version mismatch! */ 800 ds_field = (flowlabel >> 20) & 0xff; 801 } 802 #endif 803 return (ds_field); 804 } 805 806 void 807 write_dsfield(struct mbuf *m, struct altq_pktattr *pktattr, u_int8_t dsfield) 808 { 809 struct mbuf *m0; 810 811 if (pktattr == NULL || 812 (pktattr->pattr_af != AF_INET && pktattr->pattr_af != AF_INET6)) 813 return; 814 815 /* verify that pattr_hdr is within the mbuf data */ 816 for (m0 = m; m0 != NULL; m0 = m0->m_next) 817 if ((pktattr->pattr_hdr >= m0->m_data) && 818 (pktattr->pattr_hdr < m0->m_data + m0->m_len)) 819 break; 820 if (m0 == NULL) { 821 /* ick, pattr_hdr is stale */ 822 pktattr->pattr_af = AF_UNSPEC; 823 #ifdef ALTQ_DEBUG 824 printf("write_dsfield: can't locate header!\n"); 825 #endif 826 return; 827 } 828 829 if (pktattr->pattr_af == AF_INET) { 830 struct ip *ip = (struct ip *)pktattr->pattr_hdr; 831 u_int8_t old; 832 int32_t sum; 833 834 if (ip->ip_v != 4) 835 return; /* version mismatch! */ 836 old = ip->ip_tos; 837 dsfield |= old & 3; /* leave CU bits */ 838 if (old == dsfield) 839 return; 840 ip->ip_tos = dsfield; 841 /* 842 * update checksum (from RFC1624) 843 * HC' = ~(~HC + ~m + m') 844 */ 845 sum = ~ntohs(ip->ip_sum) & 0xffff; 846 sum += 0xff00 + (~old & 0xff) + dsfield; 847 sum = (sum >> 16) + (sum & 0xffff); 848 sum += (sum >> 16); /* add carry */ 849 850 ip->ip_sum = htons(~sum & 0xffff); 851 } 852 #ifdef INET6 853 else if (pktattr->pattr_af == AF_INET6) { 854 struct ip6_hdr *ip6 = (struct ip6_hdr *)pktattr->pattr_hdr; 855 u_int32_t flowlabel; 856 857 flowlabel = ntohl(ip6->ip6_flow); 858 if ((flowlabel >> 28) != 6) 859 return; /* version mismatch! */ 860 flowlabel = (flowlabel & 0xf03fffff) | (dsfield << 20); 861 ip6->ip6_flow = htonl(flowlabel); 862 } 863 #endif 864 return; 865 } 866 867 /* 868 * high resolution clock support taking advantage of a machine dependent 869 * high resolution time counter (e.g., timestamp counter of intel pentium). 870 * we assume 871 * - 64-bit-long monotonically-increasing counter 872 * - frequency range is 100M-4GHz (CPU speed) 873 */ 874 /* if pcc is not available or disabled, emulate 256MHz using microtime() */ 875 #define MACHCLK_SHIFT 8 876 877 int machclk_usepcc; 878 u_int32_t machclk_freq; 879 u_int32_t machclk_per_tick; 880 881 #if defined(__i386__) && defined(__NetBSD__) 882 extern u_int64_t cpu_tsc_freq; 883 #endif 884 885 #if (__FreeBSD_version >= 700035) 886 /* Update TSC freq with the value indicated by the caller. */ 887 static void 888 tsc_freq_changed(void *arg, const struct cf_level *level, int status) 889 { 890 /* If there was an error during the transition, don't do anything. */ 891 if (status != 0) 892 return; 893 894 #if (__FreeBSD_version >= 701102) && (defined(__amd64__) || defined(__i386__)) 895 /* If TSC is P-state invariant, don't do anything. */ 896 if (tsc_is_invariant) 897 return; 898 #endif 899 900 /* Total setting for this level gives the new frequency in MHz. */ 901 init_machclk(); 902 } 903 EVENTHANDLER_DEFINE(cpufreq_post_change, tsc_freq_changed, NULL, 904 EVENTHANDLER_PRI_LAST); 905 #endif /* __FreeBSD_version >= 700035 */ 906 907 static void 908 init_machclk_setup(void) 909 { 910 callout_init(&tbr_callout, 1); 911 912 machclk_usepcc = 1; 913 914 #if (!defined(__amd64__) && !defined(__i386__)) || defined(ALTQ_NOPCC) 915 machclk_usepcc = 0; 916 #endif 917 #if defined(__FreeBSD__) && defined(SMP) 918 machclk_usepcc = 0; 919 #endif 920 #if defined(__NetBSD__) && defined(MULTIPROCESSOR) 921 machclk_usepcc = 0; 922 #endif 923 #if defined(__amd64__) || defined(__i386__) 924 /* check if TSC is available */ 925 if ((cpu_feature & CPUID_TSC) == 0 || 926 atomic_load_acq_64(&tsc_freq) == 0) 927 machclk_usepcc = 0; 928 #endif 929 } 930 931 void 932 init_machclk(void) 933 { 934 static int called; 935 936 /* Call one-time initialization function. */ 937 if (!called) { 938 init_machclk_setup(); 939 called = 1; 940 } 941 942 if (machclk_usepcc == 0) { 943 /* emulate 256MHz using microtime() */ 944 machclk_freq = 1000000 << MACHCLK_SHIFT; 945 machclk_per_tick = machclk_freq / hz; 946 #ifdef ALTQ_DEBUG 947 printf("altq: emulate %uHz cpu clock\n", machclk_freq); 948 #endif 949 return; 950 } 951 952 /* 953 * if the clock frequency (of Pentium TSC or Alpha PCC) is 954 * accessible, just use it. 955 */ 956 #if defined(__amd64__) || defined(__i386__) 957 machclk_freq = atomic_load_acq_64(&tsc_freq); 958 #endif 959 960 /* 961 * if we don't know the clock frequency, measure it. 962 */ 963 if (machclk_freq == 0) { 964 static int wait; 965 struct timeval tv_start, tv_end; 966 u_int64_t start, end, diff; 967 int timo; 968 969 microtime(&tv_start); 970 start = read_machclk(); 971 timo = hz; /* 1 sec */ 972 (void)tsleep(&wait, PWAIT | PCATCH, "init_machclk", timo); 973 microtime(&tv_end); 974 end = read_machclk(); 975 diff = (u_int64_t)(tv_end.tv_sec - tv_start.tv_sec) * 1000000 976 + tv_end.tv_usec - tv_start.tv_usec; 977 if (diff != 0) 978 machclk_freq = (u_int)((end - start) * 1000000 / diff); 979 } 980 981 machclk_per_tick = machclk_freq / hz; 982 983 #ifdef ALTQ_DEBUG 984 printf("altq: CPU clock: %uHz\n", machclk_freq); 985 #endif 986 } 987 988 #if defined(__OpenBSD__) && defined(__i386__) 989 static __inline u_int64_t 990 rdtsc(void) 991 { 992 u_int64_t rv; 993 __asm __volatile(".byte 0x0f, 0x31" : "=A" (rv)); 994 return (rv); 995 } 996 #endif /* __OpenBSD__ && __i386__ */ 997 998 u_int64_t 999 read_machclk(void) 1000 { 1001 u_int64_t val; 1002 1003 if (machclk_usepcc) { 1004 #if defined(__amd64__) || defined(__i386__) 1005 val = rdtsc(); 1006 #else 1007 panic("read_machclk"); 1008 #endif 1009 } else { 1010 struct timeval tv, boottime; 1011 1012 microtime(&tv); 1013 getboottime(&boottime); 1014 val = (((u_int64_t)(tv.tv_sec - boottime.tv_sec) * 1000000 1015 + tv.tv_usec) << MACHCLK_SHIFT); 1016 } 1017 return (val); 1018 } 1019 1020 #ifdef ALTQ3_CLFIER_COMPAT 1021 1022 #ifndef IPPROTO_ESP 1023 #define IPPROTO_ESP 50 /* encapsulating security payload */ 1024 #endif 1025 #ifndef IPPROTO_AH 1026 #define IPPROTO_AH 51 /* authentication header */ 1027 #endif 1028 1029 /* 1030 * extract flow information from a given packet. 1031 * filt_mask shows flowinfo fields required. 1032 * we assume the ip header is in one mbuf, and addresses and ports are 1033 * in network byte order. 1034 */ 1035 int 1036 altq_extractflow(m, af, flow, filt_bmask) 1037 struct mbuf *m; 1038 int af; 1039 struct flowinfo *flow; 1040 u_int32_t filt_bmask; 1041 { 1042 1043 switch (af) { 1044 case PF_INET: { 1045 struct flowinfo_in *fin; 1046 struct ip *ip; 1047 1048 ip = mtod(m, struct ip *); 1049 1050 if (ip->ip_v != 4) 1051 break; 1052 1053 fin = (struct flowinfo_in *)flow; 1054 fin->fi_len = sizeof(struct flowinfo_in); 1055 fin->fi_family = AF_INET; 1056 1057 fin->fi_proto = ip->ip_p; 1058 fin->fi_tos = ip->ip_tos; 1059 1060 fin->fi_src.s_addr = ip->ip_src.s_addr; 1061 fin->fi_dst.s_addr = ip->ip_dst.s_addr; 1062 1063 if (filt_bmask & FIMB4_PORTS) 1064 /* if port info is required, extract port numbers */ 1065 extract_ports4(m, ip, fin); 1066 else { 1067 fin->fi_sport = 0; 1068 fin->fi_dport = 0; 1069 fin->fi_gpi = 0; 1070 } 1071 return (1); 1072 } 1073 1074 #ifdef INET6 1075 case PF_INET6: { 1076 struct flowinfo_in6 *fin6; 1077 struct ip6_hdr *ip6; 1078 1079 ip6 = mtod(m, struct ip6_hdr *); 1080 /* should we check the ip version? */ 1081 1082 fin6 = (struct flowinfo_in6 *)flow; 1083 fin6->fi6_len = sizeof(struct flowinfo_in6); 1084 fin6->fi6_family = AF_INET6; 1085 1086 fin6->fi6_proto = ip6->ip6_nxt; 1087 fin6->fi6_tclass = IPV6_TRAFFIC_CLASS(ip6); 1088 1089 fin6->fi6_flowlabel = ip6->ip6_flow & htonl(0x000fffff); 1090 fin6->fi6_src = ip6->ip6_src; 1091 fin6->fi6_dst = ip6->ip6_dst; 1092 1093 if ((filt_bmask & FIMB6_PORTS) || 1094 ((filt_bmask & FIMB6_PROTO) 1095 && ip6->ip6_nxt > IPPROTO_IPV6)) 1096 /* 1097 * if port info is required, or proto is required 1098 * but there are option headers, extract port 1099 * and protocol numbers. 1100 */ 1101 extract_ports6(m, ip6, fin6); 1102 else { 1103 fin6->fi6_sport = 0; 1104 fin6->fi6_dport = 0; 1105 fin6->fi6_gpi = 0; 1106 } 1107 return (1); 1108 } 1109 #endif /* INET6 */ 1110 1111 default: 1112 break; 1113 } 1114 1115 /* failed */ 1116 flow->fi_len = sizeof(struct flowinfo); 1117 flow->fi_family = AF_UNSPEC; 1118 return (0); 1119 } 1120 1121 /* 1122 * helper routine to extract port numbers 1123 */ 1124 /* structure for ipsec and ipv6 option header template */ 1125 struct _opt6 { 1126 u_int8_t opt6_nxt; /* next header */ 1127 u_int8_t opt6_hlen; /* header extension length */ 1128 u_int16_t _pad; 1129 u_int32_t ah_spi; /* security parameter index 1130 for authentication header */ 1131 }; 1132 1133 /* 1134 * extract port numbers from a ipv4 packet. 1135 */ 1136 static int 1137 extract_ports4(m, ip, fin) 1138 struct mbuf *m; 1139 struct ip *ip; 1140 struct flowinfo_in *fin; 1141 { 1142 struct mbuf *m0; 1143 u_short ip_off; 1144 u_int8_t proto; 1145 int off; 1146 1147 fin->fi_sport = 0; 1148 fin->fi_dport = 0; 1149 fin->fi_gpi = 0; 1150 1151 ip_off = ntohs(ip->ip_off); 1152 /* if it is a fragment, try cached fragment info */ 1153 if (ip_off & IP_OFFMASK) { 1154 ip4f_lookup(ip, fin); 1155 return (1); 1156 } 1157 1158 /* locate the mbuf containing the protocol header */ 1159 for (m0 = m; m0 != NULL; m0 = m0->m_next) 1160 if (((caddr_t)ip >= m0->m_data) && 1161 ((caddr_t)ip < m0->m_data + m0->m_len)) 1162 break; 1163 if (m0 == NULL) { 1164 #ifdef ALTQ_DEBUG 1165 printf("extract_ports4: can't locate header! ip=%p\n", ip); 1166 #endif 1167 return (0); 1168 } 1169 off = ((caddr_t)ip - m0->m_data) + (ip->ip_hl << 2); 1170 proto = ip->ip_p; 1171 1172 #ifdef ALTQ_IPSEC 1173 again: 1174 #endif 1175 while (off >= m0->m_len) { 1176 off -= m0->m_len; 1177 m0 = m0->m_next; 1178 if (m0 == NULL) 1179 return (0); /* bogus ip_hl! */ 1180 } 1181 if (m0->m_len < off + 4) 1182 return (0); 1183 1184 switch (proto) { 1185 case IPPROTO_TCP: 1186 case IPPROTO_UDP: { 1187 struct udphdr *udp; 1188 1189 udp = (struct udphdr *)(mtod(m0, caddr_t) + off); 1190 fin->fi_sport = udp->uh_sport; 1191 fin->fi_dport = udp->uh_dport; 1192 fin->fi_proto = proto; 1193 } 1194 break; 1195 1196 #ifdef ALTQ_IPSEC 1197 case IPPROTO_ESP: 1198 if (fin->fi_gpi == 0){ 1199 u_int32_t *gpi; 1200 1201 gpi = (u_int32_t *)(mtod(m0, caddr_t) + off); 1202 fin->fi_gpi = *gpi; 1203 } 1204 fin->fi_proto = proto; 1205 break; 1206 1207 case IPPROTO_AH: { 1208 /* get next header and header length */ 1209 struct _opt6 *opt6; 1210 1211 opt6 = (struct _opt6 *)(mtod(m0, caddr_t) + off); 1212 proto = opt6->opt6_nxt; 1213 off += 8 + (opt6->opt6_hlen * 4); 1214 if (fin->fi_gpi == 0 && m0->m_len >= off + 8) 1215 fin->fi_gpi = opt6->ah_spi; 1216 } 1217 /* goto the next header */ 1218 goto again; 1219 #endif /* ALTQ_IPSEC */ 1220 1221 default: 1222 fin->fi_proto = proto; 1223 return (0); 1224 } 1225 1226 /* if this is a first fragment, cache it. */ 1227 if (ip_off & IP_MF) 1228 ip4f_cache(ip, fin); 1229 1230 return (1); 1231 } 1232 1233 #ifdef INET6 1234 static int 1235 extract_ports6(m, ip6, fin6) 1236 struct mbuf *m; 1237 struct ip6_hdr *ip6; 1238 struct flowinfo_in6 *fin6; 1239 { 1240 struct mbuf *m0; 1241 int off; 1242 u_int8_t proto; 1243 1244 fin6->fi6_gpi = 0; 1245 fin6->fi6_sport = 0; 1246 fin6->fi6_dport = 0; 1247 1248 /* locate the mbuf containing the protocol header */ 1249 for (m0 = m; m0 != NULL; m0 = m0->m_next) 1250 if (((caddr_t)ip6 >= m0->m_data) && 1251 ((caddr_t)ip6 < m0->m_data + m0->m_len)) 1252 break; 1253 if (m0 == NULL) { 1254 #ifdef ALTQ_DEBUG 1255 printf("extract_ports6: can't locate header! ip6=%p\n", ip6); 1256 #endif 1257 return (0); 1258 } 1259 off = ((caddr_t)ip6 - m0->m_data) + sizeof(struct ip6_hdr); 1260 1261 proto = ip6->ip6_nxt; 1262 do { 1263 while (off >= m0->m_len) { 1264 off -= m0->m_len; 1265 m0 = m0->m_next; 1266 if (m0 == NULL) 1267 return (0); 1268 } 1269 if (m0->m_len < off + 4) 1270 return (0); 1271 1272 switch (proto) { 1273 case IPPROTO_TCP: 1274 case IPPROTO_UDP: { 1275 struct udphdr *udp; 1276 1277 udp = (struct udphdr *)(mtod(m0, caddr_t) + off); 1278 fin6->fi6_sport = udp->uh_sport; 1279 fin6->fi6_dport = udp->uh_dport; 1280 fin6->fi6_proto = proto; 1281 } 1282 return (1); 1283 1284 case IPPROTO_ESP: 1285 if (fin6->fi6_gpi == 0) { 1286 u_int32_t *gpi; 1287 1288 gpi = (u_int32_t *)(mtod(m0, caddr_t) + off); 1289 fin6->fi6_gpi = *gpi; 1290 } 1291 fin6->fi6_proto = proto; 1292 return (1); 1293 1294 case IPPROTO_AH: { 1295 /* get next header and header length */ 1296 struct _opt6 *opt6; 1297 1298 opt6 = (struct _opt6 *)(mtod(m0, caddr_t) + off); 1299 if (fin6->fi6_gpi == 0 && m0->m_len >= off + 8) 1300 fin6->fi6_gpi = opt6->ah_spi; 1301 proto = opt6->opt6_nxt; 1302 off += 8 + (opt6->opt6_hlen * 4); 1303 /* goto the next header */ 1304 break; 1305 } 1306 1307 case IPPROTO_HOPOPTS: 1308 case IPPROTO_ROUTING: 1309 case IPPROTO_DSTOPTS: { 1310 /* get next header and header length */ 1311 struct _opt6 *opt6; 1312 1313 opt6 = (struct _opt6 *)(mtod(m0, caddr_t) + off); 1314 proto = opt6->opt6_nxt; 1315 off += (opt6->opt6_hlen + 1) * 8; 1316 /* goto the next header */ 1317 break; 1318 } 1319 1320 case IPPROTO_FRAGMENT: 1321 /* ipv6 fragmentations are not supported yet */ 1322 default: 1323 fin6->fi6_proto = proto; 1324 return (0); 1325 } 1326 } while (1); 1327 /*NOTREACHED*/ 1328 } 1329 #endif /* INET6 */ 1330 1331 /* 1332 * altq common classifier 1333 */ 1334 int 1335 acc_add_filter(classifier, filter, class, phandle) 1336 struct acc_classifier *classifier; 1337 struct flow_filter *filter; 1338 void *class; 1339 u_long *phandle; 1340 { 1341 struct acc_filter *afp, *prev, *tmp; 1342 int i, s; 1343 1344 #ifdef INET6 1345 if (filter->ff_flow.fi_family != AF_INET && 1346 filter->ff_flow.fi_family != AF_INET6) 1347 return (EINVAL); 1348 #else 1349 if (filter->ff_flow.fi_family != AF_INET) 1350 return (EINVAL); 1351 #endif 1352 1353 afp = malloc(sizeof(struct acc_filter), 1354 M_DEVBUF, M_WAITOK); 1355 if (afp == NULL) 1356 return (ENOMEM); 1357 bzero(afp, sizeof(struct acc_filter)); 1358 1359 afp->f_filter = *filter; 1360 afp->f_class = class; 1361 1362 i = ACC_WILDCARD_INDEX; 1363 if (filter->ff_flow.fi_family == AF_INET) { 1364 struct flow_filter *filter4 = &afp->f_filter; 1365 1366 /* 1367 * if address is 0, it's a wildcard. if address mask 1368 * isn't set, use full mask. 1369 */ 1370 if (filter4->ff_flow.fi_dst.s_addr == 0) 1371 filter4->ff_mask.mask_dst.s_addr = 0; 1372 else if (filter4->ff_mask.mask_dst.s_addr == 0) 1373 filter4->ff_mask.mask_dst.s_addr = 0xffffffff; 1374 if (filter4->ff_flow.fi_src.s_addr == 0) 1375 filter4->ff_mask.mask_src.s_addr = 0; 1376 else if (filter4->ff_mask.mask_src.s_addr == 0) 1377 filter4->ff_mask.mask_src.s_addr = 0xffffffff; 1378 1379 /* clear extra bits in addresses */ 1380 filter4->ff_flow.fi_dst.s_addr &= 1381 filter4->ff_mask.mask_dst.s_addr; 1382 filter4->ff_flow.fi_src.s_addr &= 1383 filter4->ff_mask.mask_src.s_addr; 1384 1385 /* 1386 * if dst address is a wildcard, use hash-entry 1387 * ACC_WILDCARD_INDEX. 1388 */ 1389 if (filter4->ff_mask.mask_dst.s_addr != 0xffffffff) 1390 i = ACC_WILDCARD_INDEX; 1391 else 1392 i = ACC_GET_HASH_INDEX(filter4->ff_flow.fi_dst.s_addr); 1393 } 1394 #ifdef INET6 1395 else if (filter->ff_flow.fi_family == AF_INET6) { 1396 struct flow_filter6 *filter6 = 1397 (struct flow_filter6 *)&afp->f_filter; 1398 #ifndef IN6MASK0 /* taken from kame ipv6 */ 1399 #define IN6MASK0 {{{ 0, 0, 0, 0 }}} 1400 #define IN6MASK128 {{{ 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff }}} 1401 const struct in6_addr in6mask0 = IN6MASK0; 1402 const struct in6_addr in6mask128 = IN6MASK128; 1403 #endif 1404 1405 if (IN6_IS_ADDR_UNSPECIFIED(&filter6->ff_flow6.fi6_dst)) 1406 filter6->ff_mask6.mask6_dst = in6mask0; 1407 else if (IN6_IS_ADDR_UNSPECIFIED(&filter6->ff_mask6.mask6_dst)) 1408 filter6->ff_mask6.mask6_dst = in6mask128; 1409 if (IN6_IS_ADDR_UNSPECIFIED(&filter6->ff_flow6.fi6_src)) 1410 filter6->ff_mask6.mask6_src = in6mask0; 1411 else if (IN6_IS_ADDR_UNSPECIFIED(&filter6->ff_mask6.mask6_src)) 1412 filter6->ff_mask6.mask6_src = in6mask128; 1413 1414 /* clear extra bits in addresses */ 1415 for (i = 0; i < 16; i++) 1416 filter6->ff_flow6.fi6_dst.s6_addr[i] &= 1417 filter6->ff_mask6.mask6_dst.s6_addr[i]; 1418 for (i = 0; i < 16; i++) 1419 filter6->ff_flow6.fi6_src.s6_addr[i] &= 1420 filter6->ff_mask6.mask6_src.s6_addr[i]; 1421 1422 if (filter6->ff_flow6.fi6_flowlabel == 0) 1423 i = ACC_WILDCARD_INDEX; 1424 else 1425 i = ACC_GET_HASH_INDEX(filter6->ff_flow6.fi6_flowlabel); 1426 } 1427 #endif /* INET6 */ 1428 1429 afp->f_handle = get_filt_handle(classifier, i); 1430 1431 /* update filter bitmask */ 1432 afp->f_fbmask = filt2fibmask(filter); 1433 classifier->acc_fbmask |= afp->f_fbmask; 1434 1435 /* 1436 * add this filter to the filter list. 1437 * filters are ordered from the highest rule number. 1438 */ 1439 s = splnet(); 1440 prev = NULL; 1441 LIST_FOREACH(tmp, &classifier->acc_filters[i], f_chain) { 1442 if (tmp->f_filter.ff_ruleno > afp->f_filter.ff_ruleno) 1443 prev = tmp; 1444 else 1445 break; 1446 } 1447 if (prev == NULL) 1448 LIST_INSERT_HEAD(&classifier->acc_filters[i], afp, f_chain); 1449 else 1450 LIST_INSERT_AFTER(prev, afp, f_chain); 1451 splx(s); 1452 1453 *phandle = afp->f_handle; 1454 return (0); 1455 } 1456 1457 int 1458 acc_delete_filter(classifier, handle) 1459 struct acc_classifier *classifier; 1460 u_long handle; 1461 { 1462 struct acc_filter *afp; 1463 int s; 1464 1465 if ((afp = filth_to_filtp(classifier, handle)) == NULL) 1466 return (EINVAL); 1467 1468 s = splnet(); 1469 LIST_REMOVE(afp, f_chain); 1470 splx(s); 1471 1472 free(afp, M_DEVBUF); 1473 1474 /* todo: update filt_bmask */ 1475 1476 return (0); 1477 } 1478 1479 /* 1480 * delete filters referencing to the specified class. 1481 * if the all flag is not 0, delete all the filters. 1482 */ 1483 int 1484 acc_discard_filters(classifier, class, all) 1485 struct acc_classifier *classifier; 1486 void *class; 1487 int all; 1488 { 1489 struct acc_filter *afp; 1490 int i, s; 1491 1492 s = splnet(); 1493 for (i = 0; i < ACC_FILTER_TABLESIZE; i++) { 1494 do { 1495 LIST_FOREACH(afp, &classifier->acc_filters[i], f_chain) 1496 if (all || afp->f_class == class) { 1497 LIST_REMOVE(afp, f_chain); 1498 free(afp, M_DEVBUF); 1499 /* start again from the head */ 1500 break; 1501 } 1502 } while (afp != NULL); 1503 } 1504 splx(s); 1505 1506 if (all) 1507 classifier->acc_fbmask = 0; 1508 1509 return (0); 1510 } 1511 1512 void * 1513 acc_classify(clfier, m, af) 1514 void *clfier; 1515 struct mbuf *m; 1516 int af; 1517 { 1518 struct acc_classifier *classifier; 1519 struct flowinfo flow; 1520 struct acc_filter *afp; 1521 int i; 1522 1523 classifier = (struct acc_classifier *)clfier; 1524 altq_extractflow(m, af, &flow, classifier->acc_fbmask); 1525 1526 if (flow.fi_family == AF_INET) { 1527 struct flowinfo_in *fp = (struct flowinfo_in *)&flow; 1528 1529 if ((classifier->acc_fbmask & FIMB4_ALL) == FIMB4_TOS) { 1530 /* only tos is used */ 1531 LIST_FOREACH(afp, 1532 &classifier->acc_filters[ACC_WILDCARD_INDEX], 1533 f_chain) 1534 if (apply_tosfilter4(afp->f_fbmask, 1535 &afp->f_filter, fp)) 1536 /* filter matched */ 1537 return (afp->f_class); 1538 } else if ((classifier->acc_fbmask & 1539 (~(FIMB4_PROTO|FIMB4_SPORT|FIMB4_DPORT) & FIMB4_ALL)) 1540 == 0) { 1541 /* only proto and ports are used */ 1542 LIST_FOREACH(afp, 1543 &classifier->acc_filters[ACC_WILDCARD_INDEX], 1544 f_chain) 1545 if (apply_ppfilter4(afp->f_fbmask, 1546 &afp->f_filter, fp)) 1547 /* filter matched */ 1548 return (afp->f_class); 1549 } else { 1550 /* get the filter hash entry from its dest address */ 1551 i = ACC_GET_HASH_INDEX(fp->fi_dst.s_addr); 1552 do { 1553 /* 1554 * go through this loop twice. first for dst 1555 * hash, second for wildcards. 1556 */ 1557 LIST_FOREACH(afp, &classifier->acc_filters[i], 1558 f_chain) 1559 if (apply_filter4(afp->f_fbmask, 1560 &afp->f_filter, fp)) 1561 /* filter matched */ 1562 return (afp->f_class); 1563 1564 /* 1565 * check again for filters with a dst addr 1566 * wildcard. 1567 * (daddr == 0 || dmask != 0xffffffff). 1568 */ 1569 if (i != ACC_WILDCARD_INDEX) 1570 i = ACC_WILDCARD_INDEX; 1571 else 1572 break; 1573 } while (1); 1574 } 1575 } 1576 #ifdef INET6 1577 else if (flow.fi_family == AF_INET6) { 1578 struct flowinfo_in6 *fp6 = (struct flowinfo_in6 *)&flow; 1579 1580 /* get the filter hash entry from its flow ID */ 1581 if (fp6->fi6_flowlabel != 0) 1582 i = ACC_GET_HASH_INDEX(fp6->fi6_flowlabel); 1583 else 1584 /* flowlable can be zero */ 1585 i = ACC_WILDCARD_INDEX; 1586 1587 /* go through this loop twice. first for flow hash, second 1588 for wildcards. */ 1589 do { 1590 LIST_FOREACH(afp, &classifier->acc_filters[i], f_chain) 1591 if (apply_filter6(afp->f_fbmask, 1592 (struct flow_filter6 *)&afp->f_filter, 1593 fp6)) 1594 /* filter matched */ 1595 return (afp->f_class); 1596 1597 /* 1598 * check again for filters with a wildcard. 1599 */ 1600 if (i != ACC_WILDCARD_INDEX) 1601 i = ACC_WILDCARD_INDEX; 1602 else 1603 break; 1604 } while (1); 1605 } 1606 #endif /* INET6 */ 1607 1608 /* no filter matched */ 1609 return (NULL); 1610 } 1611 1612 static int 1613 apply_filter4(fbmask, filt, pkt) 1614 u_int32_t fbmask; 1615 struct flow_filter *filt; 1616 struct flowinfo_in *pkt; 1617 { 1618 if (filt->ff_flow.fi_family != AF_INET) 1619 return (0); 1620 if ((fbmask & FIMB4_SPORT) && filt->ff_flow.fi_sport != pkt->fi_sport) 1621 return (0); 1622 if ((fbmask & FIMB4_DPORT) && filt->ff_flow.fi_dport != pkt->fi_dport) 1623 return (0); 1624 if ((fbmask & FIMB4_DADDR) && 1625 filt->ff_flow.fi_dst.s_addr != 1626 (pkt->fi_dst.s_addr & filt->ff_mask.mask_dst.s_addr)) 1627 return (0); 1628 if ((fbmask & FIMB4_SADDR) && 1629 filt->ff_flow.fi_src.s_addr != 1630 (pkt->fi_src.s_addr & filt->ff_mask.mask_src.s_addr)) 1631 return (0); 1632 if ((fbmask & FIMB4_PROTO) && filt->ff_flow.fi_proto != pkt->fi_proto) 1633 return (0); 1634 if ((fbmask & FIMB4_TOS) && filt->ff_flow.fi_tos != 1635 (pkt->fi_tos & filt->ff_mask.mask_tos)) 1636 return (0); 1637 if ((fbmask & FIMB4_GPI) && filt->ff_flow.fi_gpi != (pkt->fi_gpi)) 1638 return (0); 1639 /* match */ 1640 return (1); 1641 } 1642 1643 /* 1644 * filter matching function optimized for a common case that checks 1645 * only protocol and port numbers 1646 */ 1647 static int 1648 apply_ppfilter4(fbmask, filt, pkt) 1649 u_int32_t fbmask; 1650 struct flow_filter *filt; 1651 struct flowinfo_in *pkt; 1652 { 1653 if (filt->ff_flow.fi_family != AF_INET) 1654 return (0); 1655 if ((fbmask & FIMB4_SPORT) && filt->ff_flow.fi_sport != pkt->fi_sport) 1656 return (0); 1657 if ((fbmask & FIMB4_DPORT) && filt->ff_flow.fi_dport != pkt->fi_dport) 1658 return (0); 1659 if ((fbmask & FIMB4_PROTO) && filt->ff_flow.fi_proto != pkt->fi_proto) 1660 return (0); 1661 /* match */ 1662 return (1); 1663 } 1664 1665 /* 1666 * filter matching function only for tos field. 1667 */ 1668 static int 1669 apply_tosfilter4(fbmask, filt, pkt) 1670 u_int32_t fbmask; 1671 struct flow_filter *filt; 1672 struct flowinfo_in *pkt; 1673 { 1674 if (filt->ff_flow.fi_family != AF_INET) 1675 return (0); 1676 if ((fbmask & FIMB4_TOS) && filt->ff_flow.fi_tos != 1677 (pkt->fi_tos & filt->ff_mask.mask_tos)) 1678 return (0); 1679 /* match */ 1680 return (1); 1681 } 1682 1683 #ifdef INET6 1684 static int 1685 apply_filter6(fbmask, filt, pkt) 1686 u_int32_t fbmask; 1687 struct flow_filter6 *filt; 1688 struct flowinfo_in6 *pkt; 1689 { 1690 int i; 1691 1692 if (filt->ff_flow6.fi6_family != AF_INET6) 1693 return (0); 1694 if ((fbmask & FIMB6_FLABEL) && 1695 filt->ff_flow6.fi6_flowlabel != pkt->fi6_flowlabel) 1696 return (0); 1697 if ((fbmask & FIMB6_PROTO) && 1698 filt->ff_flow6.fi6_proto != pkt->fi6_proto) 1699 return (0); 1700 if ((fbmask & FIMB6_SPORT) && 1701 filt->ff_flow6.fi6_sport != pkt->fi6_sport) 1702 return (0); 1703 if ((fbmask & FIMB6_DPORT) && 1704 filt->ff_flow6.fi6_dport != pkt->fi6_dport) 1705 return (0); 1706 if (fbmask & FIMB6_SADDR) { 1707 for (i = 0; i < 4; i++) 1708 if (filt->ff_flow6.fi6_src.s6_addr32[i] != 1709 (pkt->fi6_src.s6_addr32[i] & 1710 filt->ff_mask6.mask6_src.s6_addr32[i])) 1711 return (0); 1712 } 1713 if (fbmask & FIMB6_DADDR) { 1714 for (i = 0; i < 4; i++) 1715 if (filt->ff_flow6.fi6_dst.s6_addr32[i] != 1716 (pkt->fi6_dst.s6_addr32[i] & 1717 filt->ff_mask6.mask6_dst.s6_addr32[i])) 1718 return (0); 1719 } 1720 if ((fbmask & FIMB6_TCLASS) && 1721 filt->ff_flow6.fi6_tclass != 1722 (pkt->fi6_tclass & filt->ff_mask6.mask6_tclass)) 1723 return (0); 1724 if ((fbmask & FIMB6_GPI) && 1725 filt->ff_flow6.fi6_gpi != pkt->fi6_gpi) 1726 return (0); 1727 /* match */ 1728 return (1); 1729 } 1730 #endif /* INET6 */ 1731 1732 /* 1733 * filter handle: 1734 * bit 20-28: index to the filter hash table 1735 * bit 0-19: unique id in the hash bucket. 1736 */ 1737 static u_long 1738 get_filt_handle(classifier, i) 1739 struct acc_classifier *classifier; 1740 int i; 1741 { 1742 static u_long handle_number = 1; 1743 u_long handle; 1744 struct acc_filter *afp; 1745 1746 while (1) { 1747 handle = handle_number++ & 0x000fffff; 1748 1749 if (LIST_EMPTY(&classifier->acc_filters[i])) 1750 break; 1751 1752 LIST_FOREACH(afp, &classifier->acc_filters[i], f_chain) 1753 if ((afp->f_handle & 0x000fffff) == handle) 1754 break; 1755 if (afp == NULL) 1756 break; 1757 /* this handle is already used, try again */ 1758 } 1759 1760 return ((i << 20) | handle); 1761 } 1762 1763 /* convert filter handle to filter pointer */ 1764 static struct acc_filter * 1765 filth_to_filtp(classifier, handle) 1766 struct acc_classifier *classifier; 1767 u_long handle; 1768 { 1769 struct acc_filter *afp; 1770 int i; 1771 1772 i = ACC_GET_HINDEX(handle); 1773 1774 LIST_FOREACH(afp, &classifier->acc_filters[i], f_chain) 1775 if (afp->f_handle == handle) 1776 return (afp); 1777 1778 return (NULL); 1779 } 1780 1781 /* create flowinfo bitmask */ 1782 static u_int32_t 1783 filt2fibmask(filt) 1784 struct flow_filter *filt; 1785 { 1786 u_int32_t mask = 0; 1787 #ifdef INET6 1788 struct flow_filter6 *filt6; 1789 #endif 1790 1791 switch (filt->ff_flow.fi_family) { 1792 case AF_INET: 1793 if (filt->ff_flow.fi_proto != 0) 1794 mask |= FIMB4_PROTO; 1795 if (filt->ff_flow.fi_tos != 0) 1796 mask |= FIMB4_TOS; 1797 if (filt->ff_flow.fi_dst.s_addr != 0) 1798 mask |= FIMB4_DADDR; 1799 if (filt->ff_flow.fi_src.s_addr != 0) 1800 mask |= FIMB4_SADDR; 1801 if (filt->ff_flow.fi_sport != 0) 1802 mask |= FIMB4_SPORT; 1803 if (filt->ff_flow.fi_dport != 0) 1804 mask |= FIMB4_DPORT; 1805 if (filt->ff_flow.fi_gpi != 0) 1806 mask |= FIMB4_GPI; 1807 break; 1808 #ifdef INET6 1809 case AF_INET6: 1810 filt6 = (struct flow_filter6 *)filt; 1811 1812 if (filt6->ff_flow6.fi6_proto != 0) 1813 mask |= FIMB6_PROTO; 1814 if (filt6->ff_flow6.fi6_tclass != 0) 1815 mask |= FIMB6_TCLASS; 1816 if (!IN6_IS_ADDR_UNSPECIFIED(&filt6->ff_flow6.fi6_dst)) 1817 mask |= FIMB6_DADDR; 1818 if (!IN6_IS_ADDR_UNSPECIFIED(&filt6->ff_flow6.fi6_src)) 1819 mask |= FIMB6_SADDR; 1820 if (filt6->ff_flow6.fi6_sport != 0) 1821 mask |= FIMB6_SPORT; 1822 if (filt6->ff_flow6.fi6_dport != 0) 1823 mask |= FIMB6_DPORT; 1824 if (filt6->ff_flow6.fi6_gpi != 0) 1825 mask |= FIMB6_GPI; 1826 if (filt6->ff_flow6.fi6_flowlabel != 0) 1827 mask |= FIMB6_FLABEL; 1828 break; 1829 #endif /* INET6 */ 1830 } 1831 return (mask); 1832 } 1833 1834 /* 1835 * helper functions to handle IPv4 fragments. 1836 * currently only in-sequence fragments are handled. 1837 * - fragment info is cached in a LRU list. 1838 * - when a first fragment is found, cache its flow info. 1839 * - when a non-first fragment is found, lookup the cache. 1840 */ 1841 1842 struct ip4_frag { 1843 TAILQ_ENTRY(ip4_frag) ip4f_chain; 1844 char ip4f_valid; 1845 u_short ip4f_id; 1846 struct flowinfo_in ip4f_info; 1847 }; 1848 1849 static TAILQ_HEAD(ip4f_list, ip4_frag) ip4f_list; /* IPv4 fragment cache */ 1850 1851 #define IP4F_TABSIZE 16 /* IPv4 fragment cache size */ 1852 1853 static void 1854 ip4f_cache(ip, fin) 1855 struct ip *ip; 1856 struct flowinfo_in *fin; 1857 { 1858 struct ip4_frag *fp; 1859 1860 if (TAILQ_EMPTY(&ip4f_list)) { 1861 /* first time call, allocate fragment cache entries. */ 1862 if (ip4f_init() < 0) 1863 /* allocation failed! */ 1864 return; 1865 } 1866 1867 fp = ip4f_alloc(); 1868 fp->ip4f_id = ip->ip_id; 1869 fp->ip4f_info.fi_proto = ip->ip_p; 1870 fp->ip4f_info.fi_src.s_addr = ip->ip_src.s_addr; 1871 fp->ip4f_info.fi_dst.s_addr = ip->ip_dst.s_addr; 1872 1873 /* save port numbers */ 1874 fp->ip4f_info.fi_sport = fin->fi_sport; 1875 fp->ip4f_info.fi_dport = fin->fi_dport; 1876 fp->ip4f_info.fi_gpi = fin->fi_gpi; 1877 } 1878 1879 static int 1880 ip4f_lookup(ip, fin) 1881 struct ip *ip; 1882 struct flowinfo_in *fin; 1883 { 1884 struct ip4_frag *fp; 1885 1886 for (fp = TAILQ_FIRST(&ip4f_list); fp != NULL && fp->ip4f_valid; 1887 fp = TAILQ_NEXT(fp, ip4f_chain)) 1888 if (ip->ip_id == fp->ip4f_id && 1889 ip->ip_src.s_addr == fp->ip4f_info.fi_src.s_addr && 1890 ip->ip_dst.s_addr == fp->ip4f_info.fi_dst.s_addr && 1891 ip->ip_p == fp->ip4f_info.fi_proto) { 1892 /* found the matching entry */ 1893 fin->fi_sport = fp->ip4f_info.fi_sport; 1894 fin->fi_dport = fp->ip4f_info.fi_dport; 1895 fin->fi_gpi = fp->ip4f_info.fi_gpi; 1896 1897 if ((ntohs(ip->ip_off) & IP_MF) == 0) 1898 /* this is the last fragment, 1899 release the entry. */ 1900 ip4f_free(fp); 1901 1902 return (1); 1903 } 1904 1905 /* no matching entry found */ 1906 return (0); 1907 } 1908 1909 static int 1910 ip4f_init(void) 1911 { 1912 struct ip4_frag *fp; 1913 int i; 1914 1915 TAILQ_INIT(&ip4f_list); 1916 for (i=0; i<IP4F_TABSIZE; i++) { 1917 fp = malloc(sizeof(struct ip4_frag), 1918 M_DEVBUF, M_NOWAIT); 1919 if (fp == NULL) { 1920 printf("ip4f_init: can't alloc %dth entry!\n", i); 1921 if (i == 0) 1922 return (-1); 1923 return (0); 1924 } 1925 fp->ip4f_valid = 0; 1926 TAILQ_INSERT_TAIL(&ip4f_list, fp, ip4f_chain); 1927 } 1928 return (0); 1929 } 1930 1931 static struct ip4_frag * 1932 ip4f_alloc(void) 1933 { 1934 struct ip4_frag *fp; 1935 1936 /* reclaim an entry at the tail, put it at the head */ 1937 fp = TAILQ_LAST(&ip4f_list, ip4f_list); 1938 TAILQ_REMOVE(&ip4f_list, fp, ip4f_chain); 1939 fp->ip4f_valid = 1; 1940 TAILQ_INSERT_HEAD(&ip4f_list, fp, ip4f_chain); 1941 return (fp); 1942 } 1943 1944 static void 1945 ip4f_free(fp) 1946 struct ip4_frag *fp; 1947 { 1948 TAILQ_REMOVE(&ip4f_list, fp, ip4f_chain); 1949 fp->ip4f_valid = 0; 1950 TAILQ_INSERT_TAIL(&ip4f_list, fp, ip4f_chain); 1951 } 1952 1953 #endif /* ALTQ3_CLFIER_COMPAT */ 1954