1 /*- 2 * Copyright (C) 1997-2003 3 * Sony Computer Science Laboratories Inc. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 * 26 * $KAME: altq_subr.c,v 1.21 2003/11/06 06:32:53 kjc Exp $ 27 * $FreeBSD$ 28 */ 29 30 #include "opt_altq.h" 31 #include "opt_inet.h" 32 #include "opt_inet6.h" 33 34 #include <sys/param.h> 35 #include <sys/malloc.h> 36 #include <sys/mbuf.h> 37 #include <sys/systm.h> 38 #include <sys/proc.h> 39 #include <sys/socket.h> 40 #include <sys/socketvar.h> 41 #include <sys/kernel.h> 42 #include <sys/errno.h> 43 #include <sys/syslog.h> 44 #include <sys/sysctl.h> 45 #include <sys/queue.h> 46 47 #include <net/if.h> 48 #include <net/if_var.h> 49 #include <net/if_private.h> 50 #include <net/if_dl.h> 51 #include <net/if_types.h> 52 #include <net/vnet.h> 53 54 #include <netinet/in.h> 55 #include <netinet/in_systm.h> 56 #include <netinet/ip.h> 57 #ifdef INET6 58 #include <netinet/ip6.h> 59 #endif 60 #include <netinet/tcp.h> 61 #include <netinet/udp.h> 62 63 #include <netpfil/pf/pf.h> 64 #include <netpfil/pf/pf_altq.h> 65 #include <net/altq/altq.h> 66 67 /* machine dependent clock related includes */ 68 #include <sys/bus.h> 69 #include <sys/cpu.h> 70 #include <sys/eventhandler.h> 71 #include <machine/clock.h> 72 #if defined(__amd64__) || defined(__i386__) 73 #include <machine/cpufunc.h> /* for pentium tsc */ 74 #include <machine/specialreg.h> /* for CPUID_TSC */ 75 #include <machine/md_var.h> /* for cpu_feature */ 76 #endif /* __amd64 || __i386__ */ 77 78 /* 79 * internal function prototypes 80 */ 81 static void tbr_timeout(void *); 82 static struct mbuf *tbr_dequeue(struct ifaltq *, int); 83 static int tbr_timer = 0; /* token bucket regulator timer */ 84 static struct callout tbr_callout; 85 86 #ifdef ALTQ3_CLFIER_COMPAT 87 static int extract_ports4(struct mbuf *, struct ip *, struct flowinfo_in *); 88 #ifdef INET6 89 static int extract_ports6(struct mbuf *, struct ip6_hdr *, 90 struct flowinfo_in6 *); 91 #endif 92 static int apply_filter4(u_int32_t, struct flow_filter *, 93 struct flowinfo_in *); 94 static int apply_ppfilter4(u_int32_t, struct flow_filter *, 95 struct flowinfo_in *); 96 #ifdef INET6 97 static int apply_filter6(u_int32_t, struct flow_filter6 *, 98 struct flowinfo_in6 *); 99 #endif 100 static int apply_tosfilter4(u_int32_t, struct flow_filter *, 101 struct flowinfo_in *); 102 static u_long get_filt_handle(struct acc_classifier *, int); 103 static struct acc_filter *filth_to_filtp(struct acc_classifier *, u_long); 104 static u_int32_t filt2fibmask(struct flow_filter *); 105 106 static void ip4f_cache(struct ip *, struct flowinfo_in *); 107 static int ip4f_lookup(struct ip *, struct flowinfo_in *); 108 static int ip4f_init(void); 109 static struct ip4_frag *ip4f_alloc(void); 110 static void ip4f_free(struct ip4_frag *); 111 #endif /* ALTQ3_CLFIER_COMPAT */ 112 113 #ifdef ALTQ 114 SYSCTL_NODE(_kern_features, OID_AUTO, altq, CTLFLAG_RD | CTLFLAG_CAPRD, 0, 115 "ALTQ packet queuing"); 116 117 #define ALTQ_FEATURE(name, desc) \ 118 SYSCTL_INT_WITH_LABEL(_kern_features_altq, OID_AUTO, name, \ 119 CTLFLAG_RD | CTLFLAG_CAPRD, SYSCTL_NULL_INT_PTR, 1, \ 120 desc, "feature") 121 122 #ifdef ALTQ_CBQ 123 ALTQ_FEATURE(cbq, "ALTQ Class Based Queuing discipline"); 124 #endif 125 #ifdef ALTQ_CODEL 126 ALTQ_FEATURE(codel, "ALTQ Controlled Delay discipline"); 127 #endif 128 #ifdef ALTQ_RED 129 ALTQ_FEATURE(red, "ALTQ Random Early Detection discipline"); 130 #endif 131 #ifdef ALTQ_RIO 132 ALTQ_FEATURE(rio, "ALTQ Random Early Drop discipline"); 133 #endif 134 #ifdef ALTQ_HFSC 135 ALTQ_FEATURE(hfsc, "ALTQ Hierarchical Packet Scheduler discipline"); 136 #endif 137 #ifdef ALTQ_PRIQ 138 ALTQ_FEATURE(priq, "ATLQ Priority Queuing discipline"); 139 #endif 140 #ifdef ALTQ_FAIRQ 141 ALTQ_FEATURE(fairq, "ALTQ Fair Queuing discipline"); 142 #endif 143 #endif 144 145 /* 146 * alternate queueing support routines 147 */ 148 149 /* look up the queue state by the interface name and the queueing type. */ 150 void * 151 altq_lookup(char *name, int type) 152 { 153 struct ifnet *ifp; 154 155 if ((ifp = ifunit(name)) != NULL) { 156 /* read if_snd unlocked */ 157 if (type != ALTQT_NONE && ifp->if_snd.altq_type == type) 158 return (ifp->if_snd.altq_disc); 159 } 160 161 return NULL; 162 } 163 164 int 165 altq_attach(struct ifaltq *ifq, int type, void *discipline, 166 int (*enqueue)(struct ifaltq *, struct mbuf *, struct altq_pktattr *), 167 struct mbuf *(*dequeue)(struct ifaltq *, int), 168 int (*request)(struct ifaltq *, int, void *)) 169 { 170 IFQ_LOCK(ifq); 171 if (!ALTQ_IS_READY(ifq)) { 172 IFQ_UNLOCK(ifq); 173 return ENXIO; 174 } 175 176 ifq->altq_type = type; 177 ifq->altq_disc = discipline; 178 ifq->altq_enqueue = enqueue; 179 ifq->altq_dequeue = dequeue; 180 ifq->altq_request = request; 181 ifq->altq_flags &= (ALTQF_CANTCHANGE|ALTQF_ENABLED); 182 IFQ_UNLOCK(ifq); 183 return 0; 184 } 185 186 int 187 altq_detach(struct ifaltq *ifq) 188 { 189 IFQ_LOCK(ifq); 190 191 if (!ALTQ_IS_READY(ifq)) { 192 IFQ_UNLOCK(ifq); 193 return ENXIO; 194 } 195 if (ALTQ_IS_ENABLED(ifq)) { 196 IFQ_UNLOCK(ifq); 197 return EBUSY; 198 } 199 if (!ALTQ_IS_ATTACHED(ifq)) { 200 IFQ_UNLOCK(ifq); 201 return (0); 202 } 203 204 ifq->altq_type = ALTQT_NONE; 205 ifq->altq_disc = NULL; 206 ifq->altq_enqueue = NULL; 207 ifq->altq_dequeue = NULL; 208 ifq->altq_request = NULL; 209 ifq->altq_flags &= ALTQF_CANTCHANGE; 210 211 IFQ_UNLOCK(ifq); 212 return 0; 213 } 214 215 int 216 altq_enable(struct ifaltq *ifq) 217 { 218 int s; 219 220 IFQ_LOCK(ifq); 221 222 if (!ALTQ_IS_READY(ifq)) { 223 IFQ_UNLOCK(ifq); 224 return ENXIO; 225 } 226 if (ALTQ_IS_ENABLED(ifq)) { 227 IFQ_UNLOCK(ifq); 228 return 0; 229 } 230 231 s = splnet(); 232 IFQ_PURGE_NOLOCK(ifq); 233 ASSERT(ifq->ifq_len == 0); 234 ifq->ifq_drv_maxlen = 0; /* disable bulk dequeue */ 235 ifq->altq_flags |= ALTQF_ENABLED; 236 splx(s); 237 238 IFQ_UNLOCK(ifq); 239 return 0; 240 } 241 242 int 243 altq_disable(struct ifaltq *ifq) 244 { 245 int s; 246 247 IFQ_LOCK(ifq); 248 if (!ALTQ_IS_ENABLED(ifq)) { 249 IFQ_UNLOCK(ifq); 250 return 0; 251 } 252 253 s = splnet(); 254 IFQ_PURGE_NOLOCK(ifq); 255 ASSERT(ifq->ifq_len == 0); 256 ifq->altq_flags &= ~(ALTQF_ENABLED); 257 splx(s); 258 259 IFQ_UNLOCK(ifq); 260 return 0; 261 } 262 263 #ifdef ALTQ_DEBUG 264 void 265 altq_assert(const char *file, int line, const char *failedexpr) 266 { 267 (void)printf("altq assertion \"%s\" failed: file \"%s\", line %d\n", 268 failedexpr, file, line); 269 panic("altq assertion"); 270 /* NOTREACHED */ 271 } 272 #endif 273 274 /* 275 * internal representation of token bucket parameters 276 * rate: (byte_per_unittime << TBR_SHIFT) / machclk_freq 277 * (((bits_per_sec) / 8) << TBR_SHIFT) / machclk_freq 278 * depth: byte << TBR_SHIFT 279 * 280 */ 281 #define TBR_SHIFT 29 282 #define TBR_SCALE(x) ((int64_t)(x) << TBR_SHIFT) 283 #define TBR_UNSCALE(x) ((x) >> TBR_SHIFT) 284 285 static struct mbuf * 286 tbr_dequeue(struct ifaltq *ifq, int op) 287 { 288 struct tb_regulator *tbr; 289 struct mbuf *m; 290 int64_t interval; 291 u_int64_t now; 292 293 IFQ_LOCK_ASSERT(ifq); 294 tbr = ifq->altq_tbr; 295 if (op == ALTDQ_REMOVE && tbr->tbr_lastop == ALTDQ_POLL) { 296 /* if this is a remove after poll, bypass tbr check */ 297 } else { 298 /* update token only when it is negative */ 299 if (tbr->tbr_token <= 0) { 300 now = read_machclk(); 301 interval = now - tbr->tbr_last; 302 if (interval >= tbr->tbr_filluptime) 303 tbr->tbr_token = tbr->tbr_depth; 304 else { 305 tbr->tbr_token += interval * tbr->tbr_rate; 306 if (tbr->tbr_token > tbr->tbr_depth) 307 tbr->tbr_token = tbr->tbr_depth; 308 } 309 tbr->tbr_last = now; 310 } 311 /* if token is still negative, don't allow dequeue */ 312 if (tbr->tbr_token <= 0) 313 return (NULL); 314 } 315 316 if (ALTQ_IS_ENABLED(ifq)) 317 m = (*ifq->altq_dequeue)(ifq, op); 318 else { 319 if (op == ALTDQ_POLL) 320 _IF_POLL(ifq, m); 321 else 322 _IF_DEQUEUE(ifq, m); 323 } 324 325 if (m != NULL && op == ALTDQ_REMOVE) 326 tbr->tbr_token -= TBR_SCALE(m_pktlen(m)); 327 tbr->tbr_lastop = op; 328 return (m); 329 } 330 331 /* 332 * set a token bucket regulator. 333 * if the specified rate is zero, the token bucket regulator is deleted. 334 */ 335 int 336 tbr_set(struct ifaltq *ifq, struct tb_profile *profile) 337 { 338 struct tb_regulator *tbr, *otbr; 339 340 if (tbr_dequeue_ptr == NULL) 341 tbr_dequeue_ptr = tbr_dequeue; 342 343 if (machclk_freq == 0) 344 init_machclk(); 345 if (machclk_freq == 0) { 346 printf("tbr_set: no cpu clock available!\n"); 347 return (ENXIO); 348 } 349 350 IFQ_LOCK(ifq); 351 if (profile->rate == 0) { 352 /* delete this tbr */ 353 if ((tbr = ifq->altq_tbr) == NULL) { 354 IFQ_UNLOCK(ifq); 355 return (ENOENT); 356 } 357 ifq->altq_tbr = NULL; 358 free(tbr, M_DEVBUF); 359 IFQ_UNLOCK(ifq); 360 return (0); 361 } 362 363 tbr = malloc(sizeof(struct tb_regulator), M_DEVBUF, M_NOWAIT | M_ZERO); 364 if (tbr == NULL) { 365 IFQ_UNLOCK(ifq); 366 return (ENOMEM); 367 } 368 369 tbr->tbr_rate = TBR_SCALE(profile->rate / 8) / machclk_freq; 370 tbr->tbr_depth = TBR_SCALE(profile->depth); 371 if (tbr->tbr_rate > 0) 372 tbr->tbr_filluptime = tbr->tbr_depth / tbr->tbr_rate; 373 else 374 tbr->tbr_filluptime = LLONG_MAX; 375 /* 376 * The longest time between tbr_dequeue() calls will be about 1 377 * system tick, as the callout that drives it is scheduled once per 378 * tick. The refill-time detection logic in tbr_dequeue() can only 379 * properly detect the passage of up to LLONG_MAX machclk ticks. 380 * Therefore, in order for this logic to function properly in the 381 * extreme case, the maximum value of tbr_filluptime should be 382 * LLONG_MAX less one system tick's worth of machclk ticks less 383 * some additional slop factor (here one more system tick's worth 384 * of machclk ticks). 385 */ 386 if (tbr->tbr_filluptime > (LLONG_MAX - 2 * machclk_per_tick)) 387 tbr->tbr_filluptime = LLONG_MAX - 2 * machclk_per_tick; 388 tbr->tbr_token = tbr->tbr_depth; 389 tbr->tbr_last = read_machclk(); 390 tbr->tbr_lastop = ALTDQ_REMOVE; 391 392 otbr = ifq->altq_tbr; 393 ifq->altq_tbr = tbr; /* set the new tbr */ 394 395 if (otbr != NULL) 396 free(otbr, M_DEVBUF); 397 else { 398 if (tbr_timer == 0) { 399 CALLOUT_RESET(&tbr_callout, 1, tbr_timeout, (void *)0); 400 tbr_timer = 1; 401 } 402 } 403 IFQ_UNLOCK(ifq); 404 return (0); 405 } 406 407 /* 408 * tbr_timeout goes through the interface list, and kicks the drivers 409 * if necessary. 410 * 411 * MPSAFE 412 */ 413 static void 414 tbr_timeout(void *arg) 415 { 416 VNET_ITERATOR_DECL(vnet_iter); 417 struct ifnet *ifp; 418 struct epoch_tracker et; 419 int active; 420 421 active = 0; 422 NET_EPOCH_ENTER(et); 423 VNET_LIST_RLOCK_NOSLEEP(); 424 VNET_FOREACH(vnet_iter) { 425 CURVNET_SET(vnet_iter); 426 for (ifp = CK_STAILQ_FIRST(&V_ifnet); ifp; 427 ifp = CK_STAILQ_NEXT(ifp, if_link)) { 428 /* read from if_snd unlocked */ 429 if (!TBR_IS_ENABLED(&ifp->if_snd)) 430 continue; 431 active++; 432 if (!IFQ_IS_EMPTY(&ifp->if_snd) && 433 ifp->if_start != NULL) 434 (*ifp->if_start)(ifp); 435 } 436 CURVNET_RESTORE(); 437 } 438 VNET_LIST_RUNLOCK_NOSLEEP(); 439 NET_EPOCH_EXIT(et); 440 if (active > 0) 441 CALLOUT_RESET(&tbr_callout, 1, tbr_timeout, (void *)0); 442 else 443 tbr_timer = 0; /* don't need tbr_timer anymore */ 444 } 445 446 /* 447 * attach a discipline to the interface. if one already exists, it is 448 * overridden. 449 * Locking is done in the discipline specific attach functions. Basically 450 * they call back to altq_attach which takes care of the attach and locking. 451 */ 452 int 453 altq_pfattach(struct pf_altq *a) 454 { 455 int error = 0; 456 457 switch (a->scheduler) { 458 case ALTQT_NONE: 459 break; 460 #ifdef ALTQ_CBQ 461 case ALTQT_CBQ: 462 error = cbq_pfattach(a); 463 break; 464 #endif 465 #ifdef ALTQ_PRIQ 466 case ALTQT_PRIQ: 467 error = priq_pfattach(a); 468 break; 469 #endif 470 #ifdef ALTQ_HFSC 471 case ALTQT_HFSC: 472 error = hfsc_pfattach(a); 473 break; 474 #endif 475 #ifdef ALTQ_FAIRQ 476 case ALTQT_FAIRQ: 477 error = fairq_pfattach(a); 478 break; 479 #endif 480 #ifdef ALTQ_CODEL 481 case ALTQT_CODEL: 482 error = codel_pfattach(a); 483 break; 484 #endif 485 default: 486 error = ENXIO; 487 } 488 489 return (error); 490 } 491 492 /* 493 * detach a discipline from the interface. 494 * it is possible that the discipline was already overridden by another 495 * discipline. 496 */ 497 int 498 altq_pfdetach(struct pf_altq *a) 499 { 500 struct ifnet *ifp; 501 int s, error = 0; 502 503 if ((ifp = ifunit(a->ifname)) == NULL) 504 return (EINVAL); 505 506 /* if this discipline is no longer referenced, just return */ 507 /* read unlocked from if_snd */ 508 if (a->altq_disc == NULL || a->altq_disc != ifp->if_snd.altq_disc) 509 return (0); 510 511 s = splnet(); 512 /* read unlocked from if_snd, _disable and _detach take care */ 513 if (ALTQ_IS_ENABLED(&ifp->if_snd)) 514 error = altq_disable(&ifp->if_snd); 515 if (error == 0) 516 error = altq_detach(&ifp->if_snd); 517 splx(s); 518 519 return (error); 520 } 521 522 /* 523 * add a discipline or a queue 524 * Locking is done in the discipline specific functions with regards to 525 * malloc with WAITOK, also it is not yet clear which lock to use. 526 */ 527 int 528 altq_add(struct ifnet *ifp, struct pf_altq *a) 529 { 530 int error = 0; 531 532 if (a->qname[0] != 0) 533 return (altq_add_queue(a)); 534 535 if (machclk_freq == 0) 536 init_machclk(); 537 if (machclk_freq == 0) 538 panic("altq_add: no cpu clock"); 539 540 switch (a->scheduler) { 541 #ifdef ALTQ_CBQ 542 case ALTQT_CBQ: 543 error = cbq_add_altq(ifp, a); 544 break; 545 #endif 546 #ifdef ALTQ_PRIQ 547 case ALTQT_PRIQ: 548 error = priq_add_altq(ifp, a); 549 break; 550 #endif 551 #ifdef ALTQ_HFSC 552 case ALTQT_HFSC: 553 error = hfsc_add_altq(ifp, a); 554 break; 555 #endif 556 #ifdef ALTQ_FAIRQ 557 case ALTQT_FAIRQ: 558 error = fairq_add_altq(ifp, a); 559 break; 560 #endif 561 #ifdef ALTQ_CODEL 562 case ALTQT_CODEL: 563 error = codel_add_altq(ifp, a); 564 break; 565 #endif 566 default: 567 error = ENXIO; 568 } 569 570 return (error); 571 } 572 573 /* 574 * remove a discipline or a queue 575 * It is yet unclear what lock to use to protect this operation, the 576 * discipline specific functions will determine and grab it 577 */ 578 int 579 altq_remove(struct pf_altq *a) 580 { 581 int error = 0; 582 583 if (a->qname[0] != 0) 584 return (altq_remove_queue(a)); 585 586 switch (a->scheduler) { 587 #ifdef ALTQ_CBQ 588 case ALTQT_CBQ: 589 error = cbq_remove_altq(a); 590 break; 591 #endif 592 #ifdef ALTQ_PRIQ 593 case ALTQT_PRIQ: 594 error = priq_remove_altq(a); 595 break; 596 #endif 597 #ifdef ALTQ_HFSC 598 case ALTQT_HFSC: 599 error = hfsc_remove_altq(a); 600 break; 601 #endif 602 #ifdef ALTQ_FAIRQ 603 case ALTQT_FAIRQ: 604 error = fairq_remove_altq(a); 605 break; 606 #endif 607 #ifdef ALTQ_CODEL 608 case ALTQT_CODEL: 609 error = codel_remove_altq(a); 610 break; 611 #endif 612 default: 613 error = ENXIO; 614 } 615 616 return (error); 617 } 618 619 /* 620 * add a queue to the discipline 621 * It is yet unclear what lock to use to protect this operation, the 622 * discipline specific functions will determine and grab it 623 */ 624 int 625 altq_add_queue(struct pf_altq *a) 626 { 627 int error = 0; 628 629 switch (a->scheduler) { 630 #ifdef ALTQ_CBQ 631 case ALTQT_CBQ: 632 error = cbq_add_queue(a); 633 break; 634 #endif 635 #ifdef ALTQ_PRIQ 636 case ALTQT_PRIQ: 637 error = priq_add_queue(a); 638 break; 639 #endif 640 #ifdef ALTQ_HFSC 641 case ALTQT_HFSC: 642 error = hfsc_add_queue(a); 643 break; 644 #endif 645 #ifdef ALTQ_FAIRQ 646 case ALTQT_FAIRQ: 647 error = fairq_add_queue(a); 648 break; 649 #endif 650 default: 651 error = ENXIO; 652 } 653 654 return (error); 655 } 656 657 /* 658 * remove a queue from the discipline 659 * It is yet unclear what lock to use to protect this operation, the 660 * discipline specific functions will determine and grab it 661 */ 662 int 663 altq_remove_queue(struct pf_altq *a) 664 { 665 int error = 0; 666 667 switch (a->scheduler) { 668 #ifdef ALTQ_CBQ 669 case ALTQT_CBQ: 670 error = cbq_remove_queue(a); 671 break; 672 #endif 673 #ifdef ALTQ_PRIQ 674 case ALTQT_PRIQ: 675 error = priq_remove_queue(a); 676 break; 677 #endif 678 #ifdef ALTQ_HFSC 679 case ALTQT_HFSC: 680 error = hfsc_remove_queue(a); 681 break; 682 #endif 683 #ifdef ALTQ_FAIRQ 684 case ALTQT_FAIRQ: 685 error = fairq_remove_queue(a); 686 break; 687 #endif 688 default: 689 error = ENXIO; 690 } 691 692 return (error); 693 } 694 695 /* 696 * get queue statistics 697 * Locking is done in the discipline specific functions with regards to 698 * copyout operations, also it is not yet clear which lock to use. 699 */ 700 int 701 altq_getqstats(struct pf_altq *a, void *ubuf, int *nbytes, int version) 702 { 703 int error = 0; 704 705 switch (a->scheduler) { 706 #ifdef ALTQ_CBQ 707 case ALTQT_CBQ: 708 error = cbq_getqstats(a, ubuf, nbytes, version); 709 break; 710 #endif 711 #ifdef ALTQ_PRIQ 712 case ALTQT_PRIQ: 713 error = priq_getqstats(a, ubuf, nbytes, version); 714 break; 715 #endif 716 #ifdef ALTQ_HFSC 717 case ALTQT_HFSC: 718 error = hfsc_getqstats(a, ubuf, nbytes, version); 719 break; 720 #endif 721 #ifdef ALTQ_FAIRQ 722 case ALTQT_FAIRQ: 723 error = fairq_getqstats(a, ubuf, nbytes, version); 724 break; 725 #endif 726 #ifdef ALTQ_CODEL 727 case ALTQT_CODEL: 728 error = codel_getqstats(a, ubuf, nbytes, version); 729 break; 730 #endif 731 default: 732 error = ENXIO; 733 } 734 735 return (error); 736 } 737 738 /* 739 * read and write diffserv field in IPv4 or IPv6 header 740 */ 741 u_int8_t 742 read_dsfield(struct mbuf *m, struct altq_pktattr *pktattr) 743 { 744 struct mbuf *m0; 745 u_int8_t ds_field = 0; 746 747 if (pktattr == NULL || 748 (pktattr->pattr_af != AF_INET && pktattr->pattr_af != AF_INET6)) 749 return ((u_int8_t)0); 750 751 /* verify that pattr_hdr is within the mbuf data */ 752 for (m0 = m; m0 != NULL; m0 = m0->m_next) 753 if ((pktattr->pattr_hdr >= m0->m_data) && 754 (pktattr->pattr_hdr < m0->m_data + m0->m_len)) 755 break; 756 if (m0 == NULL) { 757 /* ick, pattr_hdr is stale */ 758 pktattr->pattr_af = AF_UNSPEC; 759 #ifdef ALTQ_DEBUG 760 printf("read_dsfield: can't locate header!\n"); 761 #endif 762 return ((u_int8_t)0); 763 } 764 765 if (pktattr->pattr_af == AF_INET) { 766 struct ip *ip = (struct ip *)pktattr->pattr_hdr; 767 768 if (ip->ip_v != 4) 769 return ((u_int8_t)0); /* version mismatch! */ 770 ds_field = ip->ip_tos; 771 } 772 #ifdef INET6 773 else if (pktattr->pattr_af == AF_INET6) { 774 struct ip6_hdr *ip6 = (struct ip6_hdr *)pktattr->pattr_hdr; 775 u_int32_t flowlabel; 776 777 flowlabel = ntohl(ip6->ip6_flow); 778 if ((flowlabel >> 28) != 6) 779 return ((u_int8_t)0); /* version mismatch! */ 780 ds_field = (flowlabel >> 20) & 0xff; 781 } 782 #endif 783 return (ds_field); 784 } 785 786 void 787 write_dsfield(struct mbuf *m, struct altq_pktattr *pktattr, u_int8_t dsfield) 788 { 789 struct mbuf *m0; 790 791 if (pktattr == NULL || 792 (pktattr->pattr_af != AF_INET && pktattr->pattr_af != AF_INET6)) 793 return; 794 795 /* verify that pattr_hdr is within the mbuf data */ 796 for (m0 = m; m0 != NULL; m0 = m0->m_next) 797 if ((pktattr->pattr_hdr >= m0->m_data) && 798 (pktattr->pattr_hdr < m0->m_data + m0->m_len)) 799 break; 800 if (m0 == NULL) { 801 /* ick, pattr_hdr is stale */ 802 pktattr->pattr_af = AF_UNSPEC; 803 #ifdef ALTQ_DEBUG 804 printf("write_dsfield: can't locate header!\n"); 805 #endif 806 return; 807 } 808 809 if (pktattr->pattr_af == AF_INET) { 810 struct ip *ip = (struct ip *)pktattr->pattr_hdr; 811 u_int8_t old; 812 int32_t sum; 813 814 if (ip->ip_v != 4) 815 return; /* version mismatch! */ 816 old = ip->ip_tos; 817 dsfield |= old & 3; /* leave CU bits */ 818 if (old == dsfield) 819 return; 820 ip->ip_tos = dsfield; 821 /* 822 * update checksum (from RFC1624) 823 * HC' = ~(~HC + ~m + m') 824 */ 825 sum = ~ntohs(ip->ip_sum) & 0xffff; 826 sum += 0xff00 + (~old & 0xff) + dsfield; 827 sum = (sum >> 16) + (sum & 0xffff); 828 sum += (sum >> 16); /* add carry */ 829 830 ip->ip_sum = htons(~sum & 0xffff); 831 } 832 #ifdef INET6 833 else if (pktattr->pattr_af == AF_INET6) { 834 struct ip6_hdr *ip6 = (struct ip6_hdr *)pktattr->pattr_hdr; 835 u_int32_t flowlabel; 836 837 flowlabel = ntohl(ip6->ip6_flow); 838 if ((flowlabel >> 28) != 6) 839 return; /* version mismatch! */ 840 flowlabel = (flowlabel & 0xf03fffff) | (dsfield << 20); 841 ip6->ip6_flow = htonl(flowlabel); 842 } 843 #endif 844 return; 845 } 846 847 /* 848 * high resolution clock support taking advantage of a machine dependent 849 * high resolution time counter (e.g., timestamp counter of intel pentium). 850 * we assume 851 * - 64-bit-long monotonically-increasing counter 852 * - frequency range is 100M-4GHz (CPU speed) 853 */ 854 /* if pcc is not available or disabled, emulate 256MHz using microtime() */ 855 #define MACHCLK_SHIFT 8 856 857 int machclk_usepcc; 858 u_int32_t machclk_freq; 859 u_int32_t machclk_per_tick; 860 861 #if defined(__i386__) && defined(__NetBSD__) 862 extern u_int64_t cpu_tsc_freq; 863 #endif 864 865 /* Update TSC freq with the value indicated by the caller. */ 866 static void 867 tsc_freq_changed(void *arg, const struct cf_level *level, int status) 868 { 869 /* If there was an error during the transition, don't do anything. */ 870 if (status != 0) 871 return; 872 873 #if defined(__amd64__) || defined(__i386__) 874 /* If TSC is P-state invariant, don't do anything. */ 875 if (tsc_is_invariant) 876 return; 877 #endif 878 879 /* Total setting for this level gives the new frequency in MHz. */ 880 init_machclk(); 881 } 882 EVENTHANDLER_DEFINE(cpufreq_post_change, tsc_freq_changed, NULL, 883 EVENTHANDLER_PRI_LAST); 884 885 static void 886 init_machclk_setup(void) 887 { 888 callout_init(&tbr_callout, 1); 889 890 machclk_usepcc = 1; 891 892 #if (!defined(__amd64__) && !defined(__i386__)) || defined(ALTQ_NOPCC) 893 machclk_usepcc = 0; 894 #endif 895 #if defined(__FreeBSD__) && defined(SMP) 896 machclk_usepcc = 0; 897 #endif 898 #if defined(__NetBSD__) && defined(MULTIPROCESSOR) 899 machclk_usepcc = 0; 900 #endif 901 #if defined(__amd64__) || defined(__i386__) 902 /* check if TSC is available */ 903 if ((cpu_feature & CPUID_TSC) == 0 || 904 atomic_load_acq_64(&tsc_freq) == 0) 905 machclk_usepcc = 0; 906 #endif 907 } 908 909 void 910 init_machclk(void) 911 { 912 static int called; 913 914 /* Call one-time initialization function. */ 915 if (!called) { 916 init_machclk_setup(); 917 called = 1; 918 } 919 920 if (machclk_usepcc == 0) { 921 /* emulate 256MHz using microtime() */ 922 machclk_freq = 1000000 << MACHCLK_SHIFT; 923 machclk_per_tick = machclk_freq / hz; 924 #ifdef ALTQ_DEBUG 925 printf("altq: emulate %uHz cpu clock\n", machclk_freq); 926 #endif 927 return; 928 } 929 930 /* 931 * if the clock frequency (of Pentium TSC or Alpha PCC) is 932 * accessible, just use it. 933 */ 934 #if defined(__amd64__) || defined(__i386__) 935 machclk_freq = atomic_load_acq_64(&tsc_freq); 936 #endif 937 938 /* 939 * if we don't know the clock frequency, measure it. 940 */ 941 if (machclk_freq == 0) { 942 static int wait; 943 struct timeval tv_start, tv_end; 944 u_int64_t start, end, diff; 945 int timo; 946 947 microtime(&tv_start); 948 start = read_machclk(); 949 timo = hz; /* 1 sec */ 950 (void)tsleep(&wait, PWAIT | PCATCH, "init_machclk", timo); 951 microtime(&tv_end); 952 end = read_machclk(); 953 diff = (u_int64_t)(tv_end.tv_sec - tv_start.tv_sec) * 1000000 954 + tv_end.tv_usec - tv_start.tv_usec; 955 if (diff != 0) 956 machclk_freq = (u_int)((end - start) * 1000000 / diff); 957 } 958 959 machclk_per_tick = machclk_freq / hz; 960 961 #ifdef ALTQ_DEBUG 962 printf("altq: CPU clock: %uHz\n", machclk_freq); 963 #endif 964 } 965 966 #if defined(__OpenBSD__) && defined(__i386__) 967 static __inline u_int64_t 968 rdtsc(void) 969 { 970 u_int64_t rv; 971 __asm __volatile(".byte 0x0f, 0x31" : "=A" (rv)); 972 return (rv); 973 } 974 #endif /* __OpenBSD__ && __i386__ */ 975 976 u_int64_t 977 read_machclk(void) 978 { 979 u_int64_t val; 980 981 if (machclk_usepcc) { 982 #if defined(__amd64__) || defined(__i386__) 983 val = rdtsc(); 984 #else 985 panic("read_machclk"); 986 #endif 987 } else { 988 struct timeval tv, boottime; 989 990 microtime(&tv); 991 getboottime(&boottime); 992 val = (((u_int64_t)(tv.tv_sec - boottime.tv_sec) * 1000000 993 + tv.tv_usec) << MACHCLK_SHIFT); 994 } 995 return (val); 996 } 997 998 #ifdef ALTQ3_CLFIER_COMPAT 999 1000 #ifndef IPPROTO_ESP 1001 #define IPPROTO_ESP 50 /* encapsulating security payload */ 1002 #endif 1003 #ifndef IPPROTO_AH 1004 #define IPPROTO_AH 51 /* authentication header */ 1005 #endif 1006 1007 /* 1008 * extract flow information from a given packet. 1009 * filt_mask shows flowinfo fields required. 1010 * we assume the ip header is in one mbuf, and addresses and ports are 1011 * in network byte order. 1012 */ 1013 int 1014 altq_extractflow(m, af, flow, filt_bmask) 1015 struct mbuf *m; 1016 int af; 1017 struct flowinfo *flow; 1018 u_int32_t filt_bmask; 1019 { 1020 1021 switch (af) { 1022 case PF_INET: { 1023 struct flowinfo_in *fin; 1024 struct ip *ip; 1025 1026 ip = mtod(m, struct ip *); 1027 1028 if (ip->ip_v != 4) 1029 break; 1030 1031 fin = (struct flowinfo_in *)flow; 1032 fin->fi_len = sizeof(struct flowinfo_in); 1033 fin->fi_family = AF_INET; 1034 1035 fin->fi_proto = ip->ip_p; 1036 fin->fi_tos = ip->ip_tos; 1037 1038 fin->fi_src.s_addr = ip->ip_src.s_addr; 1039 fin->fi_dst.s_addr = ip->ip_dst.s_addr; 1040 1041 if (filt_bmask & FIMB4_PORTS) 1042 /* if port info is required, extract port numbers */ 1043 extract_ports4(m, ip, fin); 1044 else { 1045 fin->fi_sport = 0; 1046 fin->fi_dport = 0; 1047 fin->fi_gpi = 0; 1048 } 1049 return (1); 1050 } 1051 1052 #ifdef INET6 1053 case PF_INET6: { 1054 struct flowinfo_in6 *fin6; 1055 struct ip6_hdr *ip6; 1056 1057 ip6 = mtod(m, struct ip6_hdr *); 1058 /* should we check the ip version? */ 1059 1060 fin6 = (struct flowinfo_in6 *)flow; 1061 fin6->fi6_len = sizeof(struct flowinfo_in6); 1062 fin6->fi6_family = AF_INET6; 1063 1064 fin6->fi6_proto = ip6->ip6_nxt; 1065 fin6->fi6_tclass = IPV6_TRAFFIC_CLASS(ip6); 1066 1067 fin6->fi6_flowlabel = ip6->ip6_flow & htonl(0x000fffff); 1068 fin6->fi6_src = ip6->ip6_src; 1069 fin6->fi6_dst = ip6->ip6_dst; 1070 1071 if ((filt_bmask & FIMB6_PORTS) || 1072 ((filt_bmask & FIMB6_PROTO) 1073 && ip6->ip6_nxt > IPPROTO_IPV6)) 1074 /* 1075 * if port info is required, or proto is required 1076 * but there are option headers, extract port 1077 * and protocol numbers. 1078 */ 1079 extract_ports6(m, ip6, fin6); 1080 else { 1081 fin6->fi6_sport = 0; 1082 fin6->fi6_dport = 0; 1083 fin6->fi6_gpi = 0; 1084 } 1085 return (1); 1086 } 1087 #endif /* INET6 */ 1088 1089 default: 1090 break; 1091 } 1092 1093 /* failed */ 1094 flow->fi_len = sizeof(struct flowinfo); 1095 flow->fi_family = AF_UNSPEC; 1096 return (0); 1097 } 1098 1099 /* 1100 * helper routine to extract port numbers 1101 */ 1102 /* structure for ipsec and ipv6 option header template */ 1103 struct _opt6 { 1104 u_int8_t opt6_nxt; /* next header */ 1105 u_int8_t opt6_hlen; /* header extension length */ 1106 u_int16_t _pad; 1107 u_int32_t ah_spi; /* security parameter index 1108 for authentication header */ 1109 }; 1110 1111 /* 1112 * extract port numbers from a ipv4 packet. 1113 */ 1114 static int 1115 extract_ports4(m, ip, fin) 1116 struct mbuf *m; 1117 struct ip *ip; 1118 struct flowinfo_in *fin; 1119 { 1120 struct mbuf *m0; 1121 u_short ip_off; 1122 u_int8_t proto; 1123 int off; 1124 1125 fin->fi_sport = 0; 1126 fin->fi_dport = 0; 1127 fin->fi_gpi = 0; 1128 1129 ip_off = ntohs(ip->ip_off); 1130 /* if it is a fragment, try cached fragment info */ 1131 if (ip_off & IP_OFFMASK) { 1132 ip4f_lookup(ip, fin); 1133 return (1); 1134 } 1135 1136 /* locate the mbuf containing the protocol header */ 1137 for (m0 = m; m0 != NULL; m0 = m0->m_next) 1138 if (((caddr_t)ip >= m0->m_data) && 1139 ((caddr_t)ip < m0->m_data + m0->m_len)) 1140 break; 1141 if (m0 == NULL) { 1142 #ifdef ALTQ_DEBUG 1143 printf("extract_ports4: can't locate header! ip=%p\n", ip); 1144 #endif 1145 return (0); 1146 } 1147 off = ((caddr_t)ip - m0->m_data) + (ip->ip_hl << 2); 1148 proto = ip->ip_p; 1149 1150 #ifdef ALTQ_IPSEC 1151 again: 1152 #endif 1153 while (off >= m0->m_len) { 1154 off -= m0->m_len; 1155 m0 = m0->m_next; 1156 if (m0 == NULL) 1157 return (0); /* bogus ip_hl! */ 1158 } 1159 if (m0->m_len < off + 4) 1160 return (0); 1161 1162 switch (proto) { 1163 case IPPROTO_TCP: 1164 case IPPROTO_UDP: { 1165 struct udphdr *udp; 1166 1167 udp = (struct udphdr *)(mtod(m0, caddr_t) + off); 1168 fin->fi_sport = udp->uh_sport; 1169 fin->fi_dport = udp->uh_dport; 1170 fin->fi_proto = proto; 1171 } 1172 break; 1173 1174 #ifdef ALTQ_IPSEC 1175 case IPPROTO_ESP: 1176 if (fin->fi_gpi == 0){ 1177 u_int32_t *gpi; 1178 1179 gpi = (u_int32_t *)(mtod(m0, caddr_t) + off); 1180 fin->fi_gpi = *gpi; 1181 } 1182 fin->fi_proto = proto; 1183 break; 1184 1185 case IPPROTO_AH: { 1186 /* get next header and header length */ 1187 struct _opt6 *opt6; 1188 1189 opt6 = (struct _opt6 *)(mtod(m0, caddr_t) + off); 1190 proto = opt6->opt6_nxt; 1191 off += 8 + (opt6->opt6_hlen * 4); 1192 if (fin->fi_gpi == 0 && m0->m_len >= off + 8) 1193 fin->fi_gpi = opt6->ah_spi; 1194 } 1195 /* goto the next header */ 1196 goto again; 1197 #endif /* ALTQ_IPSEC */ 1198 1199 default: 1200 fin->fi_proto = proto; 1201 return (0); 1202 } 1203 1204 /* if this is a first fragment, cache it. */ 1205 if (ip_off & IP_MF) 1206 ip4f_cache(ip, fin); 1207 1208 return (1); 1209 } 1210 1211 #ifdef INET6 1212 static int 1213 extract_ports6(m, ip6, fin6) 1214 struct mbuf *m; 1215 struct ip6_hdr *ip6; 1216 struct flowinfo_in6 *fin6; 1217 { 1218 struct mbuf *m0; 1219 int off; 1220 u_int8_t proto; 1221 1222 fin6->fi6_gpi = 0; 1223 fin6->fi6_sport = 0; 1224 fin6->fi6_dport = 0; 1225 1226 /* locate the mbuf containing the protocol header */ 1227 for (m0 = m; m0 != NULL; m0 = m0->m_next) 1228 if (((caddr_t)ip6 >= m0->m_data) && 1229 ((caddr_t)ip6 < m0->m_data + m0->m_len)) 1230 break; 1231 if (m0 == NULL) { 1232 #ifdef ALTQ_DEBUG 1233 printf("extract_ports6: can't locate header! ip6=%p\n", ip6); 1234 #endif 1235 return (0); 1236 } 1237 off = ((caddr_t)ip6 - m0->m_data) + sizeof(struct ip6_hdr); 1238 1239 proto = ip6->ip6_nxt; 1240 do { 1241 while (off >= m0->m_len) { 1242 off -= m0->m_len; 1243 m0 = m0->m_next; 1244 if (m0 == NULL) 1245 return (0); 1246 } 1247 if (m0->m_len < off + 4) 1248 return (0); 1249 1250 switch (proto) { 1251 case IPPROTO_TCP: 1252 case IPPROTO_UDP: { 1253 struct udphdr *udp; 1254 1255 udp = (struct udphdr *)(mtod(m0, caddr_t) + off); 1256 fin6->fi6_sport = udp->uh_sport; 1257 fin6->fi6_dport = udp->uh_dport; 1258 fin6->fi6_proto = proto; 1259 } 1260 return (1); 1261 1262 case IPPROTO_ESP: 1263 if (fin6->fi6_gpi == 0) { 1264 u_int32_t *gpi; 1265 1266 gpi = (u_int32_t *)(mtod(m0, caddr_t) + off); 1267 fin6->fi6_gpi = *gpi; 1268 } 1269 fin6->fi6_proto = proto; 1270 return (1); 1271 1272 case IPPROTO_AH: { 1273 /* get next header and header length */ 1274 struct _opt6 *opt6; 1275 1276 opt6 = (struct _opt6 *)(mtod(m0, caddr_t) + off); 1277 if (fin6->fi6_gpi == 0 && m0->m_len >= off + 8) 1278 fin6->fi6_gpi = opt6->ah_spi; 1279 proto = opt6->opt6_nxt; 1280 off += 8 + (opt6->opt6_hlen * 4); 1281 /* goto the next header */ 1282 break; 1283 } 1284 1285 case IPPROTO_HOPOPTS: 1286 case IPPROTO_ROUTING: 1287 case IPPROTO_DSTOPTS: { 1288 /* get next header and header length */ 1289 struct _opt6 *opt6; 1290 1291 opt6 = (struct _opt6 *)(mtod(m0, caddr_t) + off); 1292 proto = opt6->opt6_nxt; 1293 off += (opt6->opt6_hlen + 1) * 8; 1294 /* goto the next header */ 1295 break; 1296 } 1297 1298 case IPPROTO_FRAGMENT: 1299 /* ipv6 fragmentations are not supported yet */ 1300 default: 1301 fin6->fi6_proto = proto; 1302 return (0); 1303 } 1304 } while (1); 1305 /*NOTREACHED*/ 1306 } 1307 #endif /* INET6 */ 1308 1309 /* 1310 * altq common classifier 1311 */ 1312 int 1313 acc_add_filter(classifier, filter, class, phandle) 1314 struct acc_classifier *classifier; 1315 struct flow_filter *filter; 1316 void *class; 1317 u_long *phandle; 1318 { 1319 struct acc_filter *afp, *prev, *tmp; 1320 int i, s; 1321 1322 #ifdef INET6 1323 if (filter->ff_flow.fi_family != AF_INET && 1324 filter->ff_flow.fi_family != AF_INET6) 1325 return (EINVAL); 1326 #else 1327 if (filter->ff_flow.fi_family != AF_INET) 1328 return (EINVAL); 1329 #endif 1330 1331 afp = malloc(sizeof(struct acc_filter), 1332 M_DEVBUF, M_WAITOK); 1333 if (afp == NULL) 1334 return (ENOMEM); 1335 bzero(afp, sizeof(struct acc_filter)); 1336 1337 afp->f_filter = *filter; 1338 afp->f_class = class; 1339 1340 i = ACC_WILDCARD_INDEX; 1341 if (filter->ff_flow.fi_family == AF_INET) { 1342 struct flow_filter *filter4 = &afp->f_filter; 1343 1344 /* 1345 * if address is 0, it's a wildcard. if address mask 1346 * isn't set, use full mask. 1347 */ 1348 if (filter4->ff_flow.fi_dst.s_addr == 0) 1349 filter4->ff_mask.mask_dst.s_addr = 0; 1350 else if (filter4->ff_mask.mask_dst.s_addr == 0) 1351 filter4->ff_mask.mask_dst.s_addr = 0xffffffff; 1352 if (filter4->ff_flow.fi_src.s_addr == 0) 1353 filter4->ff_mask.mask_src.s_addr = 0; 1354 else if (filter4->ff_mask.mask_src.s_addr == 0) 1355 filter4->ff_mask.mask_src.s_addr = 0xffffffff; 1356 1357 /* clear extra bits in addresses */ 1358 filter4->ff_flow.fi_dst.s_addr &= 1359 filter4->ff_mask.mask_dst.s_addr; 1360 filter4->ff_flow.fi_src.s_addr &= 1361 filter4->ff_mask.mask_src.s_addr; 1362 1363 /* 1364 * if dst address is a wildcard, use hash-entry 1365 * ACC_WILDCARD_INDEX. 1366 */ 1367 if (filter4->ff_mask.mask_dst.s_addr != 0xffffffff) 1368 i = ACC_WILDCARD_INDEX; 1369 else 1370 i = ACC_GET_HASH_INDEX(filter4->ff_flow.fi_dst.s_addr); 1371 } 1372 #ifdef INET6 1373 else if (filter->ff_flow.fi_family == AF_INET6) { 1374 struct flow_filter6 *filter6 = 1375 (struct flow_filter6 *)&afp->f_filter; 1376 #ifndef IN6MASK0 /* taken from kame ipv6 */ 1377 #define IN6MASK0 {{{ 0, 0, 0, 0 }}} 1378 #define IN6MASK128 {{{ 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff }}} 1379 const struct in6_addr in6mask0 = IN6MASK0; 1380 const struct in6_addr in6mask128 = IN6MASK128; 1381 #endif 1382 1383 if (IN6_IS_ADDR_UNSPECIFIED(&filter6->ff_flow6.fi6_dst)) 1384 filter6->ff_mask6.mask6_dst = in6mask0; 1385 else if (IN6_IS_ADDR_UNSPECIFIED(&filter6->ff_mask6.mask6_dst)) 1386 filter6->ff_mask6.mask6_dst = in6mask128; 1387 if (IN6_IS_ADDR_UNSPECIFIED(&filter6->ff_flow6.fi6_src)) 1388 filter6->ff_mask6.mask6_src = in6mask0; 1389 else if (IN6_IS_ADDR_UNSPECIFIED(&filter6->ff_mask6.mask6_src)) 1390 filter6->ff_mask6.mask6_src = in6mask128; 1391 1392 /* clear extra bits in addresses */ 1393 for (i = 0; i < 16; i++) 1394 filter6->ff_flow6.fi6_dst.s6_addr[i] &= 1395 filter6->ff_mask6.mask6_dst.s6_addr[i]; 1396 for (i = 0; i < 16; i++) 1397 filter6->ff_flow6.fi6_src.s6_addr[i] &= 1398 filter6->ff_mask6.mask6_src.s6_addr[i]; 1399 1400 if (filter6->ff_flow6.fi6_flowlabel == 0) 1401 i = ACC_WILDCARD_INDEX; 1402 else 1403 i = ACC_GET_HASH_INDEX(filter6->ff_flow6.fi6_flowlabel); 1404 } 1405 #endif /* INET6 */ 1406 1407 afp->f_handle = get_filt_handle(classifier, i); 1408 1409 /* update filter bitmask */ 1410 afp->f_fbmask = filt2fibmask(filter); 1411 classifier->acc_fbmask |= afp->f_fbmask; 1412 1413 /* 1414 * add this filter to the filter list. 1415 * filters are ordered from the highest rule number. 1416 */ 1417 s = splnet(); 1418 prev = NULL; 1419 LIST_FOREACH(tmp, &classifier->acc_filters[i], f_chain) { 1420 if (tmp->f_filter.ff_ruleno > afp->f_filter.ff_ruleno) 1421 prev = tmp; 1422 else 1423 break; 1424 } 1425 if (prev == NULL) 1426 LIST_INSERT_HEAD(&classifier->acc_filters[i], afp, f_chain); 1427 else 1428 LIST_INSERT_AFTER(prev, afp, f_chain); 1429 splx(s); 1430 1431 *phandle = afp->f_handle; 1432 return (0); 1433 } 1434 1435 int 1436 acc_delete_filter(classifier, handle) 1437 struct acc_classifier *classifier; 1438 u_long handle; 1439 { 1440 struct acc_filter *afp; 1441 int s; 1442 1443 if ((afp = filth_to_filtp(classifier, handle)) == NULL) 1444 return (EINVAL); 1445 1446 s = splnet(); 1447 LIST_REMOVE(afp, f_chain); 1448 splx(s); 1449 1450 free(afp, M_DEVBUF); 1451 1452 /* todo: update filt_bmask */ 1453 1454 return (0); 1455 } 1456 1457 /* 1458 * delete filters referencing to the specified class. 1459 * if the all flag is not 0, delete all the filters. 1460 */ 1461 int 1462 acc_discard_filters(classifier, class, all) 1463 struct acc_classifier *classifier; 1464 void *class; 1465 int all; 1466 { 1467 struct acc_filter *afp; 1468 int i, s; 1469 1470 s = splnet(); 1471 for (i = 0; i < ACC_FILTER_TABLESIZE; i++) { 1472 do { 1473 LIST_FOREACH(afp, &classifier->acc_filters[i], f_chain) 1474 if (all || afp->f_class == class) { 1475 LIST_REMOVE(afp, f_chain); 1476 free(afp, M_DEVBUF); 1477 /* start again from the head */ 1478 break; 1479 } 1480 } while (afp != NULL); 1481 } 1482 splx(s); 1483 1484 if (all) 1485 classifier->acc_fbmask = 0; 1486 1487 return (0); 1488 } 1489 1490 void * 1491 acc_classify(clfier, m, af) 1492 void *clfier; 1493 struct mbuf *m; 1494 int af; 1495 { 1496 struct acc_classifier *classifier; 1497 struct flowinfo flow; 1498 struct acc_filter *afp; 1499 int i; 1500 1501 classifier = (struct acc_classifier *)clfier; 1502 altq_extractflow(m, af, &flow, classifier->acc_fbmask); 1503 1504 if (flow.fi_family == AF_INET) { 1505 struct flowinfo_in *fp = (struct flowinfo_in *)&flow; 1506 1507 if ((classifier->acc_fbmask & FIMB4_ALL) == FIMB4_TOS) { 1508 /* only tos is used */ 1509 LIST_FOREACH(afp, 1510 &classifier->acc_filters[ACC_WILDCARD_INDEX], 1511 f_chain) 1512 if (apply_tosfilter4(afp->f_fbmask, 1513 &afp->f_filter, fp)) 1514 /* filter matched */ 1515 return (afp->f_class); 1516 } else if ((classifier->acc_fbmask & 1517 (~(FIMB4_PROTO|FIMB4_SPORT|FIMB4_DPORT) & FIMB4_ALL)) 1518 == 0) { 1519 /* only proto and ports are used */ 1520 LIST_FOREACH(afp, 1521 &classifier->acc_filters[ACC_WILDCARD_INDEX], 1522 f_chain) 1523 if (apply_ppfilter4(afp->f_fbmask, 1524 &afp->f_filter, fp)) 1525 /* filter matched */ 1526 return (afp->f_class); 1527 } else { 1528 /* get the filter hash entry from its dest address */ 1529 i = ACC_GET_HASH_INDEX(fp->fi_dst.s_addr); 1530 do { 1531 /* 1532 * go through this loop twice. first for dst 1533 * hash, second for wildcards. 1534 */ 1535 LIST_FOREACH(afp, &classifier->acc_filters[i], 1536 f_chain) 1537 if (apply_filter4(afp->f_fbmask, 1538 &afp->f_filter, fp)) 1539 /* filter matched */ 1540 return (afp->f_class); 1541 1542 /* 1543 * check again for filters with a dst addr 1544 * wildcard. 1545 * (daddr == 0 || dmask != 0xffffffff). 1546 */ 1547 if (i != ACC_WILDCARD_INDEX) 1548 i = ACC_WILDCARD_INDEX; 1549 else 1550 break; 1551 } while (1); 1552 } 1553 } 1554 #ifdef INET6 1555 else if (flow.fi_family == AF_INET6) { 1556 struct flowinfo_in6 *fp6 = (struct flowinfo_in6 *)&flow; 1557 1558 /* get the filter hash entry from its flow ID */ 1559 if (fp6->fi6_flowlabel != 0) 1560 i = ACC_GET_HASH_INDEX(fp6->fi6_flowlabel); 1561 else 1562 /* flowlable can be zero */ 1563 i = ACC_WILDCARD_INDEX; 1564 1565 /* go through this loop twice. first for flow hash, second 1566 for wildcards. */ 1567 do { 1568 LIST_FOREACH(afp, &classifier->acc_filters[i], f_chain) 1569 if (apply_filter6(afp->f_fbmask, 1570 (struct flow_filter6 *)&afp->f_filter, 1571 fp6)) 1572 /* filter matched */ 1573 return (afp->f_class); 1574 1575 /* 1576 * check again for filters with a wildcard. 1577 */ 1578 if (i != ACC_WILDCARD_INDEX) 1579 i = ACC_WILDCARD_INDEX; 1580 else 1581 break; 1582 } while (1); 1583 } 1584 #endif /* INET6 */ 1585 1586 /* no filter matched */ 1587 return (NULL); 1588 } 1589 1590 static int 1591 apply_filter4(fbmask, filt, pkt) 1592 u_int32_t fbmask; 1593 struct flow_filter *filt; 1594 struct flowinfo_in *pkt; 1595 { 1596 if (filt->ff_flow.fi_family != AF_INET) 1597 return (0); 1598 if ((fbmask & FIMB4_SPORT) && filt->ff_flow.fi_sport != pkt->fi_sport) 1599 return (0); 1600 if ((fbmask & FIMB4_DPORT) && filt->ff_flow.fi_dport != pkt->fi_dport) 1601 return (0); 1602 if ((fbmask & FIMB4_DADDR) && 1603 filt->ff_flow.fi_dst.s_addr != 1604 (pkt->fi_dst.s_addr & filt->ff_mask.mask_dst.s_addr)) 1605 return (0); 1606 if ((fbmask & FIMB4_SADDR) && 1607 filt->ff_flow.fi_src.s_addr != 1608 (pkt->fi_src.s_addr & filt->ff_mask.mask_src.s_addr)) 1609 return (0); 1610 if ((fbmask & FIMB4_PROTO) && filt->ff_flow.fi_proto != pkt->fi_proto) 1611 return (0); 1612 if ((fbmask & FIMB4_TOS) && filt->ff_flow.fi_tos != 1613 (pkt->fi_tos & filt->ff_mask.mask_tos)) 1614 return (0); 1615 if ((fbmask & FIMB4_GPI) && filt->ff_flow.fi_gpi != (pkt->fi_gpi)) 1616 return (0); 1617 /* match */ 1618 return (1); 1619 } 1620 1621 /* 1622 * filter matching function optimized for a common case that checks 1623 * only protocol and port numbers 1624 */ 1625 static int 1626 apply_ppfilter4(fbmask, filt, pkt) 1627 u_int32_t fbmask; 1628 struct flow_filter *filt; 1629 struct flowinfo_in *pkt; 1630 { 1631 if (filt->ff_flow.fi_family != AF_INET) 1632 return (0); 1633 if ((fbmask & FIMB4_SPORT) && filt->ff_flow.fi_sport != pkt->fi_sport) 1634 return (0); 1635 if ((fbmask & FIMB4_DPORT) && filt->ff_flow.fi_dport != pkt->fi_dport) 1636 return (0); 1637 if ((fbmask & FIMB4_PROTO) && filt->ff_flow.fi_proto != pkt->fi_proto) 1638 return (0); 1639 /* match */ 1640 return (1); 1641 } 1642 1643 /* 1644 * filter matching function only for tos field. 1645 */ 1646 static int 1647 apply_tosfilter4(fbmask, filt, pkt) 1648 u_int32_t fbmask; 1649 struct flow_filter *filt; 1650 struct flowinfo_in *pkt; 1651 { 1652 if (filt->ff_flow.fi_family != AF_INET) 1653 return (0); 1654 if ((fbmask & FIMB4_TOS) && filt->ff_flow.fi_tos != 1655 (pkt->fi_tos & filt->ff_mask.mask_tos)) 1656 return (0); 1657 /* match */ 1658 return (1); 1659 } 1660 1661 #ifdef INET6 1662 static int 1663 apply_filter6(fbmask, filt, pkt) 1664 u_int32_t fbmask; 1665 struct flow_filter6 *filt; 1666 struct flowinfo_in6 *pkt; 1667 { 1668 int i; 1669 1670 if (filt->ff_flow6.fi6_family != AF_INET6) 1671 return (0); 1672 if ((fbmask & FIMB6_FLABEL) && 1673 filt->ff_flow6.fi6_flowlabel != pkt->fi6_flowlabel) 1674 return (0); 1675 if ((fbmask & FIMB6_PROTO) && 1676 filt->ff_flow6.fi6_proto != pkt->fi6_proto) 1677 return (0); 1678 if ((fbmask & FIMB6_SPORT) && 1679 filt->ff_flow6.fi6_sport != pkt->fi6_sport) 1680 return (0); 1681 if ((fbmask & FIMB6_DPORT) && 1682 filt->ff_flow6.fi6_dport != pkt->fi6_dport) 1683 return (0); 1684 if (fbmask & FIMB6_SADDR) { 1685 for (i = 0; i < 4; i++) 1686 if (filt->ff_flow6.fi6_src.s6_addr32[i] != 1687 (pkt->fi6_src.s6_addr32[i] & 1688 filt->ff_mask6.mask6_src.s6_addr32[i])) 1689 return (0); 1690 } 1691 if (fbmask & FIMB6_DADDR) { 1692 for (i = 0; i < 4; i++) 1693 if (filt->ff_flow6.fi6_dst.s6_addr32[i] != 1694 (pkt->fi6_dst.s6_addr32[i] & 1695 filt->ff_mask6.mask6_dst.s6_addr32[i])) 1696 return (0); 1697 } 1698 if ((fbmask & FIMB6_TCLASS) && 1699 filt->ff_flow6.fi6_tclass != 1700 (pkt->fi6_tclass & filt->ff_mask6.mask6_tclass)) 1701 return (0); 1702 if ((fbmask & FIMB6_GPI) && 1703 filt->ff_flow6.fi6_gpi != pkt->fi6_gpi) 1704 return (0); 1705 /* match */ 1706 return (1); 1707 } 1708 #endif /* INET6 */ 1709 1710 /* 1711 * filter handle: 1712 * bit 20-28: index to the filter hash table 1713 * bit 0-19: unique id in the hash bucket. 1714 */ 1715 static u_long 1716 get_filt_handle(classifier, i) 1717 struct acc_classifier *classifier; 1718 int i; 1719 { 1720 static u_long handle_number = 1; 1721 u_long handle; 1722 struct acc_filter *afp; 1723 1724 while (1) { 1725 handle = handle_number++ & 0x000fffff; 1726 1727 if (LIST_EMPTY(&classifier->acc_filters[i])) 1728 break; 1729 1730 LIST_FOREACH(afp, &classifier->acc_filters[i], f_chain) 1731 if ((afp->f_handle & 0x000fffff) == handle) 1732 break; 1733 if (afp == NULL) 1734 break; 1735 /* this handle is already used, try again */ 1736 } 1737 1738 return ((i << 20) | handle); 1739 } 1740 1741 /* convert filter handle to filter pointer */ 1742 static struct acc_filter * 1743 filth_to_filtp(classifier, handle) 1744 struct acc_classifier *classifier; 1745 u_long handle; 1746 { 1747 struct acc_filter *afp; 1748 int i; 1749 1750 i = ACC_GET_HINDEX(handle); 1751 1752 LIST_FOREACH(afp, &classifier->acc_filters[i], f_chain) 1753 if (afp->f_handle == handle) 1754 return (afp); 1755 1756 return (NULL); 1757 } 1758 1759 /* create flowinfo bitmask */ 1760 static u_int32_t 1761 filt2fibmask(filt) 1762 struct flow_filter *filt; 1763 { 1764 u_int32_t mask = 0; 1765 #ifdef INET6 1766 struct flow_filter6 *filt6; 1767 #endif 1768 1769 switch (filt->ff_flow.fi_family) { 1770 case AF_INET: 1771 if (filt->ff_flow.fi_proto != 0) 1772 mask |= FIMB4_PROTO; 1773 if (filt->ff_flow.fi_tos != 0) 1774 mask |= FIMB4_TOS; 1775 if (filt->ff_flow.fi_dst.s_addr != 0) 1776 mask |= FIMB4_DADDR; 1777 if (filt->ff_flow.fi_src.s_addr != 0) 1778 mask |= FIMB4_SADDR; 1779 if (filt->ff_flow.fi_sport != 0) 1780 mask |= FIMB4_SPORT; 1781 if (filt->ff_flow.fi_dport != 0) 1782 mask |= FIMB4_DPORT; 1783 if (filt->ff_flow.fi_gpi != 0) 1784 mask |= FIMB4_GPI; 1785 break; 1786 #ifdef INET6 1787 case AF_INET6: 1788 filt6 = (struct flow_filter6 *)filt; 1789 1790 if (filt6->ff_flow6.fi6_proto != 0) 1791 mask |= FIMB6_PROTO; 1792 if (filt6->ff_flow6.fi6_tclass != 0) 1793 mask |= FIMB6_TCLASS; 1794 if (!IN6_IS_ADDR_UNSPECIFIED(&filt6->ff_flow6.fi6_dst)) 1795 mask |= FIMB6_DADDR; 1796 if (!IN6_IS_ADDR_UNSPECIFIED(&filt6->ff_flow6.fi6_src)) 1797 mask |= FIMB6_SADDR; 1798 if (filt6->ff_flow6.fi6_sport != 0) 1799 mask |= FIMB6_SPORT; 1800 if (filt6->ff_flow6.fi6_dport != 0) 1801 mask |= FIMB6_DPORT; 1802 if (filt6->ff_flow6.fi6_gpi != 0) 1803 mask |= FIMB6_GPI; 1804 if (filt6->ff_flow6.fi6_flowlabel != 0) 1805 mask |= FIMB6_FLABEL; 1806 break; 1807 #endif /* INET6 */ 1808 } 1809 return (mask); 1810 } 1811 1812 /* 1813 * helper functions to handle IPv4 fragments. 1814 * currently only in-sequence fragments are handled. 1815 * - fragment info is cached in a LRU list. 1816 * - when a first fragment is found, cache its flow info. 1817 * - when a non-first fragment is found, lookup the cache. 1818 */ 1819 1820 struct ip4_frag { 1821 TAILQ_ENTRY(ip4_frag) ip4f_chain; 1822 char ip4f_valid; 1823 u_short ip4f_id; 1824 struct flowinfo_in ip4f_info; 1825 }; 1826 1827 static TAILQ_HEAD(ip4f_list, ip4_frag) ip4f_list; /* IPv4 fragment cache */ 1828 1829 #define IP4F_TABSIZE 16 /* IPv4 fragment cache size */ 1830 1831 static void 1832 ip4f_cache(ip, fin) 1833 struct ip *ip; 1834 struct flowinfo_in *fin; 1835 { 1836 struct ip4_frag *fp; 1837 1838 if (TAILQ_EMPTY(&ip4f_list)) { 1839 /* first time call, allocate fragment cache entries. */ 1840 if (ip4f_init() < 0) 1841 /* allocation failed! */ 1842 return; 1843 } 1844 1845 fp = ip4f_alloc(); 1846 fp->ip4f_id = ip->ip_id; 1847 fp->ip4f_info.fi_proto = ip->ip_p; 1848 fp->ip4f_info.fi_src.s_addr = ip->ip_src.s_addr; 1849 fp->ip4f_info.fi_dst.s_addr = ip->ip_dst.s_addr; 1850 1851 /* save port numbers */ 1852 fp->ip4f_info.fi_sport = fin->fi_sport; 1853 fp->ip4f_info.fi_dport = fin->fi_dport; 1854 fp->ip4f_info.fi_gpi = fin->fi_gpi; 1855 } 1856 1857 static int 1858 ip4f_lookup(ip, fin) 1859 struct ip *ip; 1860 struct flowinfo_in *fin; 1861 { 1862 struct ip4_frag *fp; 1863 1864 for (fp = TAILQ_FIRST(&ip4f_list); fp != NULL && fp->ip4f_valid; 1865 fp = TAILQ_NEXT(fp, ip4f_chain)) 1866 if (ip->ip_id == fp->ip4f_id && 1867 ip->ip_src.s_addr == fp->ip4f_info.fi_src.s_addr && 1868 ip->ip_dst.s_addr == fp->ip4f_info.fi_dst.s_addr && 1869 ip->ip_p == fp->ip4f_info.fi_proto) { 1870 /* found the matching entry */ 1871 fin->fi_sport = fp->ip4f_info.fi_sport; 1872 fin->fi_dport = fp->ip4f_info.fi_dport; 1873 fin->fi_gpi = fp->ip4f_info.fi_gpi; 1874 1875 if ((ntohs(ip->ip_off) & IP_MF) == 0) 1876 /* this is the last fragment, 1877 release the entry. */ 1878 ip4f_free(fp); 1879 1880 return (1); 1881 } 1882 1883 /* no matching entry found */ 1884 return (0); 1885 } 1886 1887 static int 1888 ip4f_init(void) 1889 { 1890 struct ip4_frag *fp; 1891 int i; 1892 1893 TAILQ_INIT(&ip4f_list); 1894 for (i=0; i<IP4F_TABSIZE; i++) { 1895 fp = malloc(sizeof(struct ip4_frag), 1896 M_DEVBUF, M_NOWAIT); 1897 if (fp == NULL) { 1898 printf("ip4f_init: can't alloc %dth entry!\n", i); 1899 if (i == 0) 1900 return (-1); 1901 return (0); 1902 } 1903 fp->ip4f_valid = 0; 1904 TAILQ_INSERT_TAIL(&ip4f_list, fp, ip4f_chain); 1905 } 1906 return (0); 1907 } 1908 1909 static struct ip4_frag * 1910 ip4f_alloc(void) 1911 { 1912 struct ip4_frag *fp; 1913 1914 /* reclaim an entry at the tail, put it at the head */ 1915 fp = TAILQ_LAST(&ip4f_list, ip4f_list); 1916 TAILQ_REMOVE(&ip4f_list, fp, ip4f_chain); 1917 fp->ip4f_valid = 1; 1918 TAILQ_INSERT_HEAD(&ip4f_list, fp, ip4f_chain); 1919 return (fp); 1920 } 1921 1922 static void 1923 ip4f_free(fp) 1924 struct ip4_frag *fp; 1925 { 1926 TAILQ_REMOVE(&ip4f_list, fp, ip4f_chain); 1927 fp->ip4f_valid = 0; 1928 TAILQ_INSERT_TAIL(&ip4f_list, fp, ip4f_chain); 1929 } 1930 1931 #endif /* ALTQ3_CLFIER_COMPAT */ 1932