1 /*- 2 * Copyright (C) 1997-2003 3 * Sony Computer Science Laboratories Inc. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 * 26 * $KAME: altq_subr.c,v 1.21 2003/11/06 06:32:53 kjc Exp $ 27 * $FreeBSD$ 28 */ 29 30 #include "opt_altq.h" 31 #include "opt_inet.h" 32 #include "opt_inet6.h" 33 34 #include <sys/param.h> 35 #include <sys/malloc.h> 36 #include <sys/mbuf.h> 37 #include <sys/systm.h> 38 #include <sys/proc.h> 39 #include <sys/socket.h> 40 #include <sys/socketvar.h> 41 #include <sys/kernel.h> 42 #include <sys/errno.h> 43 #include <sys/syslog.h> 44 #include <sys/sysctl.h> 45 #include <sys/queue.h> 46 47 #include <net/if.h> 48 #include <net/if_var.h> 49 #include <net/if_dl.h> 50 #include <net/if_types.h> 51 #include <net/vnet.h> 52 53 #include <netinet/in.h> 54 #include <netinet/in_systm.h> 55 #include <netinet/ip.h> 56 #ifdef INET6 57 #include <netinet/ip6.h> 58 #endif 59 #include <netinet/tcp.h> 60 #include <netinet/udp.h> 61 62 #include <netpfil/pf/pf.h> 63 #include <netpfil/pf/pf_altq.h> 64 #include <net/altq/altq.h> 65 66 /* machine dependent clock related includes */ 67 #include <sys/bus.h> 68 #include <sys/cpu.h> 69 #include <sys/eventhandler.h> 70 #include <machine/clock.h> 71 #if defined(__amd64__) || defined(__i386__) 72 #include <machine/cpufunc.h> /* for pentium tsc */ 73 #include <machine/specialreg.h> /* for CPUID_TSC */ 74 #include <machine/md_var.h> /* for cpu_feature */ 75 #endif /* __amd64 || __i386__ */ 76 77 /* 78 * internal function prototypes 79 */ 80 static void tbr_timeout(void *); 81 int (*altq_input)(struct mbuf *, int) = NULL; 82 static struct mbuf *tbr_dequeue(struct ifaltq *, int); 83 static int tbr_timer = 0; /* token bucket regulator timer */ 84 #if !defined(__FreeBSD__) || (__FreeBSD_version < 600000) 85 static struct callout tbr_callout = CALLOUT_INITIALIZER; 86 #else 87 static struct callout tbr_callout; 88 #endif 89 90 #ifdef ALTQ3_CLFIER_COMPAT 91 static int extract_ports4(struct mbuf *, struct ip *, struct flowinfo_in *); 92 #ifdef INET6 93 static int extract_ports6(struct mbuf *, struct ip6_hdr *, 94 struct flowinfo_in6 *); 95 #endif 96 static int apply_filter4(u_int32_t, struct flow_filter *, 97 struct flowinfo_in *); 98 static int apply_ppfilter4(u_int32_t, struct flow_filter *, 99 struct flowinfo_in *); 100 #ifdef INET6 101 static int apply_filter6(u_int32_t, struct flow_filter6 *, 102 struct flowinfo_in6 *); 103 #endif 104 static int apply_tosfilter4(u_int32_t, struct flow_filter *, 105 struct flowinfo_in *); 106 static u_long get_filt_handle(struct acc_classifier *, int); 107 static struct acc_filter *filth_to_filtp(struct acc_classifier *, u_long); 108 static u_int32_t filt2fibmask(struct flow_filter *); 109 110 static void ip4f_cache(struct ip *, struct flowinfo_in *); 111 static int ip4f_lookup(struct ip *, struct flowinfo_in *); 112 static int ip4f_init(void); 113 static struct ip4_frag *ip4f_alloc(void); 114 static void ip4f_free(struct ip4_frag *); 115 #endif /* ALTQ3_CLFIER_COMPAT */ 116 117 #ifdef ALTQ 118 SYSCTL_NODE(_kern_features, OID_AUTO, altq, CTLFLAG_RD | CTLFLAG_CAPRD, 0, 119 "ALTQ packet queuing"); 120 121 #define ALTQ_FEATURE(name, desc) \ 122 SYSCTL_INT_WITH_LABEL(_kern_features_altq, OID_AUTO, name, \ 123 CTLFLAG_RD | CTLFLAG_CAPRD, SYSCTL_NULL_INT_PTR, 1, \ 124 desc, "feature") 125 126 #ifdef ALTQ_CBQ 127 ALTQ_FEATURE(cbq, "ALTQ Class Based Queuing discipline"); 128 #endif 129 #ifdef ALTQ_CODEL 130 ALTQ_FEATURE(codel, "ALTQ Controlled Delay discipline"); 131 #endif 132 #ifdef ALTQ_RED 133 ALTQ_FEATURE(red, "ALTQ Random Early Detection discipline"); 134 #endif 135 #ifdef ALTQ_RIO 136 ALTQ_FEATURE(rio, "ALTQ Random Early Drop discipline"); 137 #endif 138 #ifdef ALTQ_HFSC 139 ALTQ_FEATURE(hfsc, "ALTQ Hierarchical Packet Scheduler discipline"); 140 #endif 141 #ifdef ALTQ_PRIQ 142 ALTQ_FEATURE(priq, "ATLQ Priority Queuing discipline"); 143 #endif 144 #ifdef ALTQ_FAIRQ 145 ALTQ_FEATURE(fairq, "ALTQ Fair Queuing discipline"); 146 #endif 147 #endif 148 149 /* 150 * alternate queueing support routines 151 */ 152 153 /* look up the queue state by the interface name and the queueing type. */ 154 void * 155 altq_lookup(name, type) 156 char *name; 157 int type; 158 { 159 struct ifnet *ifp; 160 161 if ((ifp = ifunit(name)) != NULL) { 162 /* read if_snd unlocked */ 163 if (type != ALTQT_NONE && ifp->if_snd.altq_type == type) 164 return (ifp->if_snd.altq_disc); 165 } 166 167 return NULL; 168 } 169 170 int 171 altq_attach(ifq, type, discipline, enqueue, dequeue, request) 172 struct ifaltq *ifq; 173 int type; 174 void *discipline; 175 int (*enqueue)(struct ifaltq *, struct mbuf *, struct altq_pktattr *); 176 struct mbuf *(*dequeue)(struct ifaltq *, int); 177 int (*request)(struct ifaltq *, int, void *); 178 { 179 IFQ_LOCK(ifq); 180 if (!ALTQ_IS_READY(ifq)) { 181 IFQ_UNLOCK(ifq); 182 return ENXIO; 183 } 184 185 ifq->altq_type = type; 186 ifq->altq_disc = discipline; 187 ifq->altq_enqueue = enqueue; 188 ifq->altq_dequeue = dequeue; 189 ifq->altq_request = request; 190 ifq->altq_flags &= (ALTQF_CANTCHANGE|ALTQF_ENABLED); 191 IFQ_UNLOCK(ifq); 192 return 0; 193 } 194 195 int 196 altq_detach(ifq) 197 struct ifaltq *ifq; 198 { 199 IFQ_LOCK(ifq); 200 201 if (!ALTQ_IS_READY(ifq)) { 202 IFQ_UNLOCK(ifq); 203 return ENXIO; 204 } 205 if (ALTQ_IS_ENABLED(ifq)) { 206 IFQ_UNLOCK(ifq); 207 return EBUSY; 208 } 209 if (!ALTQ_IS_ATTACHED(ifq)) { 210 IFQ_UNLOCK(ifq); 211 return (0); 212 } 213 214 ifq->altq_type = ALTQT_NONE; 215 ifq->altq_disc = NULL; 216 ifq->altq_enqueue = NULL; 217 ifq->altq_dequeue = NULL; 218 ifq->altq_request = NULL; 219 ifq->altq_flags &= ALTQF_CANTCHANGE; 220 221 IFQ_UNLOCK(ifq); 222 return 0; 223 } 224 225 int 226 altq_enable(ifq) 227 struct ifaltq *ifq; 228 { 229 int s; 230 231 IFQ_LOCK(ifq); 232 233 if (!ALTQ_IS_READY(ifq)) { 234 IFQ_UNLOCK(ifq); 235 return ENXIO; 236 } 237 if (ALTQ_IS_ENABLED(ifq)) { 238 IFQ_UNLOCK(ifq); 239 return 0; 240 } 241 242 s = splnet(); 243 IFQ_PURGE_NOLOCK(ifq); 244 ASSERT(ifq->ifq_len == 0); 245 ifq->ifq_drv_maxlen = 0; /* disable bulk dequeue */ 246 ifq->altq_flags |= ALTQF_ENABLED; 247 splx(s); 248 249 IFQ_UNLOCK(ifq); 250 return 0; 251 } 252 253 int 254 altq_disable(ifq) 255 struct ifaltq *ifq; 256 { 257 int s; 258 259 IFQ_LOCK(ifq); 260 if (!ALTQ_IS_ENABLED(ifq)) { 261 IFQ_UNLOCK(ifq); 262 return 0; 263 } 264 265 s = splnet(); 266 IFQ_PURGE_NOLOCK(ifq); 267 ASSERT(ifq->ifq_len == 0); 268 ifq->altq_flags &= ~(ALTQF_ENABLED); 269 splx(s); 270 271 IFQ_UNLOCK(ifq); 272 return 0; 273 } 274 275 #ifdef ALTQ_DEBUG 276 void 277 altq_assert(file, line, failedexpr) 278 const char *file, *failedexpr; 279 int line; 280 { 281 (void)printf("altq assertion \"%s\" failed: file \"%s\", line %d\n", 282 failedexpr, file, line); 283 panic("altq assertion"); 284 /* NOTREACHED */ 285 } 286 #endif 287 288 /* 289 * internal representation of token bucket parameters 290 * rate: (byte_per_unittime << TBR_SHIFT) / machclk_freq 291 * (((bits_per_sec) / 8) << TBR_SHIFT) / machclk_freq 292 * depth: byte << TBR_SHIFT 293 * 294 */ 295 #define TBR_SHIFT 29 296 #define TBR_SCALE(x) ((int64_t)(x) << TBR_SHIFT) 297 #define TBR_UNSCALE(x) ((x) >> TBR_SHIFT) 298 299 static struct mbuf * 300 tbr_dequeue(ifq, op) 301 struct ifaltq *ifq; 302 int op; 303 { 304 struct tb_regulator *tbr; 305 struct mbuf *m; 306 int64_t interval; 307 u_int64_t now; 308 309 IFQ_LOCK_ASSERT(ifq); 310 tbr = ifq->altq_tbr; 311 if (op == ALTDQ_REMOVE && tbr->tbr_lastop == ALTDQ_POLL) { 312 /* if this is a remove after poll, bypass tbr check */ 313 } else { 314 /* update token only when it is negative */ 315 if (tbr->tbr_token <= 0) { 316 now = read_machclk(); 317 interval = now - tbr->tbr_last; 318 if (interval >= tbr->tbr_filluptime) 319 tbr->tbr_token = tbr->tbr_depth; 320 else { 321 tbr->tbr_token += interval * tbr->tbr_rate; 322 if (tbr->tbr_token > tbr->tbr_depth) 323 tbr->tbr_token = tbr->tbr_depth; 324 } 325 tbr->tbr_last = now; 326 } 327 /* if token is still negative, don't allow dequeue */ 328 if (tbr->tbr_token <= 0) 329 return (NULL); 330 } 331 332 if (ALTQ_IS_ENABLED(ifq)) 333 m = (*ifq->altq_dequeue)(ifq, op); 334 else { 335 if (op == ALTDQ_POLL) 336 _IF_POLL(ifq, m); 337 else 338 _IF_DEQUEUE(ifq, m); 339 } 340 341 if (m != NULL && op == ALTDQ_REMOVE) 342 tbr->tbr_token -= TBR_SCALE(m_pktlen(m)); 343 tbr->tbr_lastop = op; 344 return (m); 345 } 346 347 /* 348 * set a token bucket regulator. 349 * if the specified rate is zero, the token bucket regulator is deleted. 350 */ 351 int 352 tbr_set(ifq, profile) 353 struct ifaltq *ifq; 354 struct tb_profile *profile; 355 { 356 struct tb_regulator *tbr, *otbr; 357 358 if (tbr_dequeue_ptr == NULL) 359 tbr_dequeue_ptr = tbr_dequeue; 360 361 if (machclk_freq == 0) 362 init_machclk(); 363 if (machclk_freq == 0) { 364 printf("tbr_set: no cpu clock available!\n"); 365 return (ENXIO); 366 } 367 368 IFQ_LOCK(ifq); 369 if (profile->rate == 0) { 370 /* delete this tbr */ 371 if ((tbr = ifq->altq_tbr) == NULL) { 372 IFQ_UNLOCK(ifq); 373 return (ENOENT); 374 } 375 ifq->altq_tbr = NULL; 376 free(tbr, M_DEVBUF); 377 IFQ_UNLOCK(ifq); 378 return (0); 379 } 380 381 tbr = malloc(sizeof(struct tb_regulator), M_DEVBUF, M_NOWAIT | M_ZERO); 382 if (tbr == NULL) { 383 IFQ_UNLOCK(ifq); 384 return (ENOMEM); 385 } 386 387 tbr->tbr_rate = TBR_SCALE(profile->rate / 8) / machclk_freq; 388 tbr->tbr_depth = TBR_SCALE(profile->depth); 389 if (tbr->tbr_rate > 0) 390 tbr->tbr_filluptime = tbr->tbr_depth / tbr->tbr_rate; 391 else 392 tbr->tbr_filluptime = LLONG_MAX; 393 /* 394 * The longest time between tbr_dequeue() calls will be about 1 395 * system tick, as the callout that drives it is scheduled once per 396 * tick. The refill-time detection logic in tbr_dequeue() can only 397 * properly detect the passage of up to LLONG_MAX machclk ticks. 398 * Therefore, in order for this logic to function properly in the 399 * extreme case, the maximum value of tbr_filluptime should be 400 * LLONG_MAX less one system tick's worth of machclk ticks less 401 * some additional slop factor (here one more system tick's worth 402 * of machclk ticks). 403 */ 404 if (tbr->tbr_filluptime > (LLONG_MAX - 2 * machclk_per_tick)) 405 tbr->tbr_filluptime = LLONG_MAX - 2 * machclk_per_tick; 406 tbr->tbr_token = tbr->tbr_depth; 407 tbr->tbr_last = read_machclk(); 408 tbr->tbr_lastop = ALTDQ_REMOVE; 409 410 otbr = ifq->altq_tbr; 411 ifq->altq_tbr = tbr; /* set the new tbr */ 412 413 if (otbr != NULL) 414 free(otbr, M_DEVBUF); 415 else { 416 if (tbr_timer == 0) { 417 CALLOUT_RESET(&tbr_callout, 1, tbr_timeout, (void *)0); 418 tbr_timer = 1; 419 } 420 } 421 IFQ_UNLOCK(ifq); 422 return (0); 423 } 424 425 /* 426 * tbr_timeout goes through the interface list, and kicks the drivers 427 * if necessary. 428 * 429 * MPSAFE 430 */ 431 static void 432 tbr_timeout(arg) 433 void *arg; 434 { 435 VNET_ITERATOR_DECL(vnet_iter); 436 struct ifnet *ifp; 437 struct epoch_tracker et; 438 int active; 439 440 active = 0; 441 NET_EPOCH_ENTER(et); 442 VNET_LIST_RLOCK_NOSLEEP(); 443 VNET_FOREACH(vnet_iter) { 444 CURVNET_SET(vnet_iter); 445 for (ifp = CK_STAILQ_FIRST(&V_ifnet); ifp; 446 ifp = CK_STAILQ_NEXT(ifp, if_link)) { 447 /* read from if_snd unlocked */ 448 if (!TBR_IS_ENABLED(&ifp->if_snd)) 449 continue; 450 active++; 451 if (!IFQ_IS_EMPTY(&ifp->if_snd) && 452 ifp->if_start != NULL) 453 (*ifp->if_start)(ifp); 454 } 455 CURVNET_RESTORE(); 456 } 457 VNET_LIST_RUNLOCK_NOSLEEP(); 458 NET_EPOCH_EXIT(et); 459 if (active > 0) 460 CALLOUT_RESET(&tbr_callout, 1, tbr_timeout, (void *)0); 461 else 462 tbr_timer = 0; /* don't need tbr_timer anymore */ 463 } 464 465 /* 466 * attach a discipline to the interface. if one already exists, it is 467 * overridden. 468 * Locking is done in the discipline specific attach functions. Basically 469 * they call back to altq_attach which takes care of the attach and locking. 470 */ 471 int 472 altq_pfattach(struct pf_altq *a) 473 { 474 int error = 0; 475 476 switch (a->scheduler) { 477 case ALTQT_NONE: 478 break; 479 #ifdef ALTQ_CBQ 480 case ALTQT_CBQ: 481 error = cbq_pfattach(a); 482 break; 483 #endif 484 #ifdef ALTQ_PRIQ 485 case ALTQT_PRIQ: 486 error = priq_pfattach(a); 487 break; 488 #endif 489 #ifdef ALTQ_HFSC 490 case ALTQT_HFSC: 491 error = hfsc_pfattach(a); 492 break; 493 #endif 494 #ifdef ALTQ_FAIRQ 495 case ALTQT_FAIRQ: 496 error = fairq_pfattach(a); 497 break; 498 #endif 499 #ifdef ALTQ_CODEL 500 case ALTQT_CODEL: 501 error = codel_pfattach(a); 502 break; 503 #endif 504 default: 505 error = ENXIO; 506 } 507 508 return (error); 509 } 510 511 /* 512 * detach a discipline from the interface. 513 * it is possible that the discipline was already overridden by another 514 * discipline. 515 */ 516 int 517 altq_pfdetach(struct pf_altq *a) 518 { 519 struct ifnet *ifp; 520 int s, error = 0; 521 522 if ((ifp = ifunit(a->ifname)) == NULL) 523 return (EINVAL); 524 525 /* if this discipline is no longer referenced, just return */ 526 /* read unlocked from if_snd */ 527 if (a->altq_disc == NULL || a->altq_disc != ifp->if_snd.altq_disc) 528 return (0); 529 530 s = splnet(); 531 /* read unlocked from if_snd, _disable and _detach take care */ 532 if (ALTQ_IS_ENABLED(&ifp->if_snd)) 533 error = altq_disable(&ifp->if_snd); 534 if (error == 0) 535 error = altq_detach(&ifp->if_snd); 536 splx(s); 537 538 return (error); 539 } 540 541 /* 542 * add a discipline or a queue 543 * Locking is done in the discipline specific functions with regards to 544 * malloc with WAITOK, also it is not yet clear which lock to use. 545 */ 546 int 547 altq_add(struct ifnet *ifp, struct pf_altq *a) 548 { 549 int error = 0; 550 551 if (a->qname[0] != 0) 552 return (altq_add_queue(a)); 553 554 if (machclk_freq == 0) 555 init_machclk(); 556 if (machclk_freq == 0) 557 panic("altq_add: no cpu clock"); 558 559 switch (a->scheduler) { 560 #ifdef ALTQ_CBQ 561 case ALTQT_CBQ: 562 error = cbq_add_altq(ifp, a); 563 break; 564 #endif 565 #ifdef ALTQ_PRIQ 566 case ALTQT_PRIQ: 567 error = priq_add_altq(ifp, a); 568 break; 569 #endif 570 #ifdef ALTQ_HFSC 571 case ALTQT_HFSC: 572 error = hfsc_add_altq(ifp, a); 573 break; 574 #endif 575 #ifdef ALTQ_FAIRQ 576 case ALTQT_FAIRQ: 577 error = fairq_add_altq(ifp, a); 578 break; 579 #endif 580 #ifdef ALTQ_CODEL 581 case ALTQT_CODEL: 582 error = codel_add_altq(ifp, a); 583 break; 584 #endif 585 default: 586 error = ENXIO; 587 } 588 589 return (error); 590 } 591 592 /* 593 * remove a discipline or a queue 594 * It is yet unclear what lock to use to protect this operation, the 595 * discipline specific functions will determine and grab it 596 */ 597 int 598 altq_remove(struct pf_altq *a) 599 { 600 int error = 0; 601 602 if (a->qname[0] != 0) 603 return (altq_remove_queue(a)); 604 605 switch (a->scheduler) { 606 #ifdef ALTQ_CBQ 607 case ALTQT_CBQ: 608 error = cbq_remove_altq(a); 609 break; 610 #endif 611 #ifdef ALTQ_PRIQ 612 case ALTQT_PRIQ: 613 error = priq_remove_altq(a); 614 break; 615 #endif 616 #ifdef ALTQ_HFSC 617 case ALTQT_HFSC: 618 error = hfsc_remove_altq(a); 619 break; 620 #endif 621 #ifdef ALTQ_FAIRQ 622 case ALTQT_FAIRQ: 623 error = fairq_remove_altq(a); 624 break; 625 #endif 626 #ifdef ALTQ_CODEL 627 case ALTQT_CODEL: 628 error = codel_remove_altq(a); 629 break; 630 #endif 631 default: 632 error = ENXIO; 633 } 634 635 return (error); 636 } 637 638 /* 639 * add a queue to the discipline 640 * It is yet unclear what lock to use to protect this operation, the 641 * discipline specific functions will determine and grab it 642 */ 643 int 644 altq_add_queue(struct pf_altq *a) 645 { 646 int error = 0; 647 648 switch (a->scheduler) { 649 #ifdef ALTQ_CBQ 650 case ALTQT_CBQ: 651 error = cbq_add_queue(a); 652 break; 653 #endif 654 #ifdef ALTQ_PRIQ 655 case ALTQT_PRIQ: 656 error = priq_add_queue(a); 657 break; 658 #endif 659 #ifdef ALTQ_HFSC 660 case ALTQT_HFSC: 661 error = hfsc_add_queue(a); 662 break; 663 #endif 664 #ifdef ALTQ_FAIRQ 665 case ALTQT_FAIRQ: 666 error = fairq_add_queue(a); 667 break; 668 #endif 669 default: 670 error = ENXIO; 671 } 672 673 return (error); 674 } 675 676 /* 677 * remove a queue from the discipline 678 * It is yet unclear what lock to use to protect this operation, the 679 * discipline specific functions will determine and grab it 680 */ 681 int 682 altq_remove_queue(struct pf_altq *a) 683 { 684 int error = 0; 685 686 switch (a->scheduler) { 687 #ifdef ALTQ_CBQ 688 case ALTQT_CBQ: 689 error = cbq_remove_queue(a); 690 break; 691 #endif 692 #ifdef ALTQ_PRIQ 693 case ALTQT_PRIQ: 694 error = priq_remove_queue(a); 695 break; 696 #endif 697 #ifdef ALTQ_HFSC 698 case ALTQT_HFSC: 699 error = hfsc_remove_queue(a); 700 break; 701 #endif 702 #ifdef ALTQ_FAIRQ 703 case ALTQT_FAIRQ: 704 error = fairq_remove_queue(a); 705 break; 706 #endif 707 default: 708 error = ENXIO; 709 } 710 711 return (error); 712 } 713 714 /* 715 * get queue statistics 716 * Locking is done in the discipline specific functions with regards to 717 * copyout operations, also it is not yet clear which lock to use. 718 */ 719 int 720 altq_getqstats(struct pf_altq *a, void *ubuf, int *nbytes, int version) 721 { 722 int error = 0; 723 724 switch (a->scheduler) { 725 #ifdef ALTQ_CBQ 726 case ALTQT_CBQ: 727 error = cbq_getqstats(a, ubuf, nbytes, version); 728 break; 729 #endif 730 #ifdef ALTQ_PRIQ 731 case ALTQT_PRIQ: 732 error = priq_getqstats(a, ubuf, nbytes, version); 733 break; 734 #endif 735 #ifdef ALTQ_HFSC 736 case ALTQT_HFSC: 737 error = hfsc_getqstats(a, ubuf, nbytes, version); 738 break; 739 #endif 740 #ifdef ALTQ_FAIRQ 741 case ALTQT_FAIRQ: 742 error = fairq_getqstats(a, ubuf, nbytes, version); 743 break; 744 #endif 745 #ifdef ALTQ_CODEL 746 case ALTQT_CODEL: 747 error = codel_getqstats(a, ubuf, nbytes, version); 748 break; 749 #endif 750 default: 751 error = ENXIO; 752 } 753 754 return (error); 755 } 756 757 /* 758 * read and write diffserv field in IPv4 or IPv6 header 759 */ 760 u_int8_t 761 read_dsfield(m, pktattr) 762 struct mbuf *m; 763 struct altq_pktattr *pktattr; 764 { 765 struct mbuf *m0; 766 u_int8_t ds_field = 0; 767 768 if (pktattr == NULL || 769 (pktattr->pattr_af != AF_INET && pktattr->pattr_af != AF_INET6)) 770 return ((u_int8_t)0); 771 772 /* verify that pattr_hdr is within the mbuf data */ 773 for (m0 = m; m0 != NULL; m0 = m0->m_next) 774 if ((pktattr->pattr_hdr >= m0->m_data) && 775 (pktattr->pattr_hdr < m0->m_data + m0->m_len)) 776 break; 777 if (m0 == NULL) { 778 /* ick, pattr_hdr is stale */ 779 pktattr->pattr_af = AF_UNSPEC; 780 #ifdef ALTQ_DEBUG 781 printf("read_dsfield: can't locate header!\n"); 782 #endif 783 return ((u_int8_t)0); 784 } 785 786 if (pktattr->pattr_af == AF_INET) { 787 struct ip *ip = (struct ip *)pktattr->pattr_hdr; 788 789 if (ip->ip_v != 4) 790 return ((u_int8_t)0); /* version mismatch! */ 791 ds_field = ip->ip_tos; 792 } 793 #ifdef INET6 794 else if (pktattr->pattr_af == AF_INET6) { 795 struct ip6_hdr *ip6 = (struct ip6_hdr *)pktattr->pattr_hdr; 796 u_int32_t flowlabel; 797 798 flowlabel = ntohl(ip6->ip6_flow); 799 if ((flowlabel >> 28) != 6) 800 return ((u_int8_t)0); /* version mismatch! */ 801 ds_field = (flowlabel >> 20) & 0xff; 802 } 803 #endif 804 return (ds_field); 805 } 806 807 void 808 write_dsfield(struct mbuf *m, struct altq_pktattr *pktattr, u_int8_t dsfield) 809 { 810 struct mbuf *m0; 811 812 if (pktattr == NULL || 813 (pktattr->pattr_af != AF_INET && pktattr->pattr_af != AF_INET6)) 814 return; 815 816 /* verify that pattr_hdr is within the mbuf data */ 817 for (m0 = m; m0 != NULL; m0 = m0->m_next) 818 if ((pktattr->pattr_hdr >= m0->m_data) && 819 (pktattr->pattr_hdr < m0->m_data + m0->m_len)) 820 break; 821 if (m0 == NULL) { 822 /* ick, pattr_hdr is stale */ 823 pktattr->pattr_af = AF_UNSPEC; 824 #ifdef ALTQ_DEBUG 825 printf("write_dsfield: can't locate header!\n"); 826 #endif 827 return; 828 } 829 830 if (pktattr->pattr_af == AF_INET) { 831 struct ip *ip = (struct ip *)pktattr->pattr_hdr; 832 u_int8_t old; 833 int32_t sum; 834 835 if (ip->ip_v != 4) 836 return; /* version mismatch! */ 837 old = ip->ip_tos; 838 dsfield |= old & 3; /* leave CU bits */ 839 if (old == dsfield) 840 return; 841 ip->ip_tos = dsfield; 842 /* 843 * update checksum (from RFC1624) 844 * HC' = ~(~HC + ~m + m') 845 */ 846 sum = ~ntohs(ip->ip_sum) & 0xffff; 847 sum += 0xff00 + (~old & 0xff) + dsfield; 848 sum = (sum >> 16) + (sum & 0xffff); 849 sum += (sum >> 16); /* add carry */ 850 851 ip->ip_sum = htons(~sum & 0xffff); 852 } 853 #ifdef INET6 854 else if (pktattr->pattr_af == AF_INET6) { 855 struct ip6_hdr *ip6 = (struct ip6_hdr *)pktattr->pattr_hdr; 856 u_int32_t flowlabel; 857 858 flowlabel = ntohl(ip6->ip6_flow); 859 if ((flowlabel >> 28) != 6) 860 return; /* version mismatch! */ 861 flowlabel = (flowlabel & 0xf03fffff) | (dsfield << 20); 862 ip6->ip6_flow = htonl(flowlabel); 863 } 864 #endif 865 return; 866 } 867 868 /* 869 * high resolution clock support taking advantage of a machine dependent 870 * high resolution time counter (e.g., timestamp counter of intel pentium). 871 * we assume 872 * - 64-bit-long monotonically-increasing counter 873 * - frequency range is 100M-4GHz (CPU speed) 874 */ 875 /* if pcc is not available or disabled, emulate 256MHz using microtime() */ 876 #define MACHCLK_SHIFT 8 877 878 int machclk_usepcc; 879 u_int32_t machclk_freq; 880 u_int32_t machclk_per_tick; 881 882 #if defined(__i386__) && defined(__NetBSD__) 883 extern u_int64_t cpu_tsc_freq; 884 #endif 885 886 #if (__FreeBSD_version >= 700035) 887 /* Update TSC freq with the value indicated by the caller. */ 888 static void 889 tsc_freq_changed(void *arg, const struct cf_level *level, int status) 890 { 891 /* If there was an error during the transition, don't do anything. */ 892 if (status != 0) 893 return; 894 895 #if (__FreeBSD_version >= 701102) && (defined(__amd64__) || defined(__i386__)) 896 /* If TSC is P-state invariant, don't do anything. */ 897 if (tsc_is_invariant) 898 return; 899 #endif 900 901 /* Total setting for this level gives the new frequency in MHz. */ 902 init_machclk(); 903 } 904 EVENTHANDLER_DEFINE(cpufreq_post_change, tsc_freq_changed, NULL, 905 EVENTHANDLER_PRI_LAST); 906 #endif /* __FreeBSD_version >= 700035 */ 907 908 static void 909 init_machclk_setup(void) 910 { 911 #if (__FreeBSD_version >= 600000) 912 callout_init(&tbr_callout, 0); 913 #endif 914 915 machclk_usepcc = 1; 916 917 #if (!defined(__amd64__) && !defined(__i386__)) || defined(ALTQ_NOPCC) 918 machclk_usepcc = 0; 919 #endif 920 #if defined(__FreeBSD__) && defined(SMP) 921 machclk_usepcc = 0; 922 #endif 923 #if defined(__NetBSD__) && defined(MULTIPROCESSOR) 924 machclk_usepcc = 0; 925 #endif 926 #if defined(__amd64__) || defined(__i386__) 927 /* check if TSC is available */ 928 if ((cpu_feature & CPUID_TSC) == 0 || 929 atomic_load_acq_64(&tsc_freq) == 0) 930 machclk_usepcc = 0; 931 #endif 932 } 933 934 void 935 init_machclk(void) 936 { 937 static int called; 938 939 /* Call one-time initialization function. */ 940 if (!called) { 941 init_machclk_setup(); 942 called = 1; 943 } 944 945 if (machclk_usepcc == 0) { 946 /* emulate 256MHz using microtime() */ 947 machclk_freq = 1000000 << MACHCLK_SHIFT; 948 machclk_per_tick = machclk_freq / hz; 949 #ifdef ALTQ_DEBUG 950 printf("altq: emulate %uHz cpu clock\n", machclk_freq); 951 #endif 952 return; 953 } 954 955 /* 956 * if the clock frequency (of Pentium TSC or Alpha PCC) is 957 * accessible, just use it. 958 */ 959 #if defined(__amd64__) || defined(__i386__) 960 machclk_freq = atomic_load_acq_64(&tsc_freq); 961 #endif 962 963 /* 964 * if we don't know the clock frequency, measure it. 965 */ 966 if (machclk_freq == 0) { 967 static int wait; 968 struct timeval tv_start, tv_end; 969 u_int64_t start, end, diff; 970 int timo; 971 972 microtime(&tv_start); 973 start = read_machclk(); 974 timo = hz; /* 1 sec */ 975 (void)tsleep(&wait, PWAIT | PCATCH, "init_machclk", timo); 976 microtime(&tv_end); 977 end = read_machclk(); 978 diff = (u_int64_t)(tv_end.tv_sec - tv_start.tv_sec) * 1000000 979 + tv_end.tv_usec - tv_start.tv_usec; 980 if (diff != 0) 981 machclk_freq = (u_int)((end - start) * 1000000 / diff); 982 } 983 984 machclk_per_tick = machclk_freq / hz; 985 986 #ifdef ALTQ_DEBUG 987 printf("altq: CPU clock: %uHz\n", machclk_freq); 988 #endif 989 } 990 991 #if defined(__OpenBSD__) && defined(__i386__) 992 static __inline u_int64_t 993 rdtsc(void) 994 { 995 u_int64_t rv; 996 __asm __volatile(".byte 0x0f, 0x31" : "=A" (rv)); 997 return (rv); 998 } 999 #endif /* __OpenBSD__ && __i386__ */ 1000 1001 u_int64_t 1002 read_machclk(void) 1003 { 1004 u_int64_t val; 1005 1006 if (machclk_usepcc) { 1007 #if defined(__amd64__) || defined(__i386__) 1008 val = rdtsc(); 1009 #else 1010 panic("read_machclk"); 1011 #endif 1012 } else { 1013 struct timeval tv, boottime; 1014 1015 microtime(&tv); 1016 getboottime(&boottime); 1017 val = (((u_int64_t)(tv.tv_sec - boottime.tv_sec) * 1000000 1018 + tv.tv_usec) << MACHCLK_SHIFT); 1019 } 1020 return (val); 1021 } 1022 1023 #ifdef ALTQ3_CLFIER_COMPAT 1024 1025 #ifndef IPPROTO_ESP 1026 #define IPPROTO_ESP 50 /* encapsulating security payload */ 1027 #endif 1028 #ifndef IPPROTO_AH 1029 #define IPPROTO_AH 51 /* authentication header */ 1030 #endif 1031 1032 /* 1033 * extract flow information from a given packet. 1034 * filt_mask shows flowinfo fields required. 1035 * we assume the ip header is in one mbuf, and addresses and ports are 1036 * in network byte order. 1037 */ 1038 int 1039 altq_extractflow(m, af, flow, filt_bmask) 1040 struct mbuf *m; 1041 int af; 1042 struct flowinfo *flow; 1043 u_int32_t filt_bmask; 1044 { 1045 1046 switch (af) { 1047 case PF_INET: { 1048 struct flowinfo_in *fin; 1049 struct ip *ip; 1050 1051 ip = mtod(m, struct ip *); 1052 1053 if (ip->ip_v != 4) 1054 break; 1055 1056 fin = (struct flowinfo_in *)flow; 1057 fin->fi_len = sizeof(struct flowinfo_in); 1058 fin->fi_family = AF_INET; 1059 1060 fin->fi_proto = ip->ip_p; 1061 fin->fi_tos = ip->ip_tos; 1062 1063 fin->fi_src.s_addr = ip->ip_src.s_addr; 1064 fin->fi_dst.s_addr = ip->ip_dst.s_addr; 1065 1066 if (filt_bmask & FIMB4_PORTS) 1067 /* if port info is required, extract port numbers */ 1068 extract_ports4(m, ip, fin); 1069 else { 1070 fin->fi_sport = 0; 1071 fin->fi_dport = 0; 1072 fin->fi_gpi = 0; 1073 } 1074 return (1); 1075 } 1076 1077 #ifdef INET6 1078 case PF_INET6: { 1079 struct flowinfo_in6 *fin6; 1080 struct ip6_hdr *ip6; 1081 1082 ip6 = mtod(m, struct ip6_hdr *); 1083 /* should we check the ip version? */ 1084 1085 fin6 = (struct flowinfo_in6 *)flow; 1086 fin6->fi6_len = sizeof(struct flowinfo_in6); 1087 fin6->fi6_family = AF_INET6; 1088 1089 fin6->fi6_proto = ip6->ip6_nxt; 1090 fin6->fi6_tclass = (ntohl(ip6->ip6_flow) >> 20) & 0xff; 1091 1092 fin6->fi6_flowlabel = ip6->ip6_flow & htonl(0x000fffff); 1093 fin6->fi6_src = ip6->ip6_src; 1094 fin6->fi6_dst = ip6->ip6_dst; 1095 1096 if ((filt_bmask & FIMB6_PORTS) || 1097 ((filt_bmask & FIMB6_PROTO) 1098 && ip6->ip6_nxt > IPPROTO_IPV6)) 1099 /* 1100 * if port info is required, or proto is required 1101 * but there are option headers, extract port 1102 * and protocol numbers. 1103 */ 1104 extract_ports6(m, ip6, fin6); 1105 else { 1106 fin6->fi6_sport = 0; 1107 fin6->fi6_dport = 0; 1108 fin6->fi6_gpi = 0; 1109 } 1110 return (1); 1111 } 1112 #endif /* INET6 */ 1113 1114 default: 1115 break; 1116 } 1117 1118 /* failed */ 1119 flow->fi_len = sizeof(struct flowinfo); 1120 flow->fi_family = AF_UNSPEC; 1121 return (0); 1122 } 1123 1124 /* 1125 * helper routine to extract port numbers 1126 */ 1127 /* structure for ipsec and ipv6 option header template */ 1128 struct _opt6 { 1129 u_int8_t opt6_nxt; /* next header */ 1130 u_int8_t opt6_hlen; /* header extension length */ 1131 u_int16_t _pad; 1132 u_int32_t ah_spi; /* security parameter index 1133 for authentication header */ 1134 }; 1135 1136 /* 1137 * extract port numbers from a ipv4 packet. 1138 */ 1139 static int 1140 extract_ports4(m, ip, fin) 1141 struct mbuf *m; 1142 struct ip *ip; 1143 struct flowinfo_in *fin; 1144 { 1145 struct mbuf *m0; 1146 u_short ip_off; 1147 u_int8_t proto; 1148 int off; 1149 1150 fin->fi_sport = 0; 1151 fin->fi_dport = 0; 1152 fin->fi_gpi = 0; 1153 1154 ip_off = ntohs(ip->ip_off); 1155 /* if it is a fragment, try cached fragment info */ 1156 if (ip_off & IP_OFFMASK) { 1157 ip4f_lookup(ip, fin); 1158 return (1); 1159 } 1160 1161 /* locate the mbuf containing the protocol header */ 1162 for (m0 = m; m0 != NULL; m0 = m0->m_next) 1163 if (((caddr_t)ip >= m0->m_data) && 1164 ((caddr_t)ip < m0->m_data + m0->m_len)) 1165 break; 1166 if (m0 == NULL) { 1167 #ifdef ALTQ_DEBUG 1168 printf("extract_ports4: can't locate header! ip=%p\n", ip); 1169 #endif 1170 return (0); 1171 } 1172 off = ((caddr_t)ip - m0->m_data) + (ip->ip_hl << 2); 1173 proto = ip->ip_p; 1174 1175 #ifdef ALTQ_IPSEC 1176 again: 1177 #endif 1178 while (off >= m0->m_len) { 1179 off -= m0->m_len; 1180 m0 = m0->m_next; 1181 if (m0 == NULL) 1182 return (0); /* bogus ip_hl! */ 1183 } 1184 if (m0->m_len < off + 4) 1185 return (0); 1186 1187 switch (proto) { 1188 case IPPROTO_TCP: 1189 case IPPROTO_UDP: { 1190 struct udphdr *udp; 1191 1192 udp = (struct udphdr *)(mtod(m0, caddr_t) + off); 1193 fin->fi_sport = udp->uh_sport; 1194 fin->fi_dport = udp->uh_dport; 1195 fin->fi_proto = proto; 1196 } 1197 break; 1198 1199 #ifdef ALTQ_IPSEC 1200 case IPPROTO_ESP: 1201 if (fin->fi_gpi == 0){ 1202 u_int32_t *gpi; 1203 1204 gpi = (u_int32_t *)(mtod(m0, caddr_t) + off); 1205 fin->fi_gpi = *gpi; 1206 } 1207 fin->fi_proto = proto; 1208 break; 1209 1210 case IPPROTO_AH: { 1211 /* get next header and header length */ 1212 struct _opt6 *opt6; 1213 1214 opt6 = (struct _opt6 *)(mtod(m0, caddr_t) + off); 1215 proto = opt6->opt6_nxt; 1216 off += 8 + (opt6->opt6_hlen * 4); 1217 if (fin->fi_gpi == 0 && m0->m_len >= off + 8) 1218 fin->fi_gpi = opt6->ah_spi; 1219 } 1220 /* goto the next header */ 1221 goto again; 1222 #endif /* ALTQ_IPSEC */ 1223 1224 default: 1225 fin->fi_proto = proto; 1226 return (0); 1227 } 1228 1229 /* if this is a first fragment, cache it. */ 1230 if (ip_off & IP_MF) 1231 ip4f_cache(ip, fin); 1232 1233 return (1); 1234 } 1235 1236 #ifdef INET6 1237 static int 1238 extract_ports6(m, ip6, fin6) 1239 struct mbuf *m; 1240 struct ip6_hdr *ip6; 1241 struct flowinfo_in6 *fin6; 1242 { 1243 struct mbuf *m0; 1244 int off; 1245 u_int8_t proto; 1246 1247 fin6->fi6_gpi = 0; 1248 fin6->fi6_sport = 0; 1249 fin6->fi6_dport = 0; 1250 1251 /* locate the mbuf containing the protocol header */ 1252 for (m0 = m; m0 != NULL; m0 = m0->m_next) 1253 if (((caddr_t)ip6 >= m0->m_data) && 1254 ((caddr_t)ip6 < m0->m_data + m0->m_len)) 1255 break; 1256 if (m0 == NULL) { 1257 #ifdef ALTQ_DEBUG 1258 printf("extract_ports6: can't locate header! ip6=%p\n", ip6); 1259 #endif 1260 return (0); 1261 } 1262 off = ((caddr_t)ip6 - m0->m_data) + sizeof(struct ip6_hdr); 1263 1264 proto = ip6->ip6_nxt; 1265 do { 1266 while (off >= m0->m_len) { 1267 off -= m0->m_len; 1268 m0 = m0->m_next; 1269 if (m0 == NULL) 1270 return (0); 1271 } 1272 if (m0->m_len < off + 4) 1273 return (0); 1274 1275 switch (proto) { 1276 case IPPROTO_TCP: 1277 case IPPROTO_UDP: { 1278 struct udphdr *udp; 1279 1280 udp = (struct udphdr *)(mtod(m0, caddr_t) + off); 1281 fin6->fi6_sport = udp->uh_sport; 1282 fin6->fi6_dport = udp->uh_dport; 1283 fin6->fi6_proto = proto; 1284 } 1285 return (1); 1286 1287 case IPPROTO_ESP: 1288 if (fin6->fi6_gpi == 0) { 1289 u_int32_t *gpi; 1290 1291 gpi = (u_int32_t *)(mtod(m0, caddr_t) + off); 1292 fin6->fi6_gpi = *gpi; 1293 } 1294 fin6->fi6_proto = proto; 1295 return (1); 1296 1297 case IPPROTO_AH: { 1298 /* get next header and header length */ 1299 struct _opt6 *opt6; 1300 1301 opt6 = (struct _opt6 *)(mtod(m0, caddr_t) + off); 1302 if (fin6->fi6_gpi == 0 && m0->m_len >= off + 8) 1303 fin6->fi6_gpi = opt6->ah_spi; 1304 proto = opt6->opt6_nxt; 1305 off += 8 + (opt6->opt6_hlen * 4); 1306 /* goto the next header */ 1307 break; 1308 } 1309 1310 case IPPROTO_HOPOPTS: 1311 case IPPROTO_ROUTING: 1312 case IPPROTO_DSTOPTS: { 1313 /* get next header and header length */ 1314 struct _opt6 *opt6; 1315 1316 opt6 = (struct _opt6 *)(mtod(m0, caddr_t) + off); 1317 proto = opt6->opt6_nxt; 1318 off += (opt6->opt6_hlen + 1) * 8; 1319 /* goto the next header */ 1320 break; 1321 } 1322 1323 case IPPROTO_FRAGMENT: 1324 /* ipv6 fragmentations are not supported yet */ 1325 default: 1326 fin6->fi6_proto = proto; 1327 return (0); 1328 } 1329 } while (1); 1330 /*NOTREACHED*/ 1331 } 1332 #endif /* INET6 */ 1333 1334 /* 1335 * altq common classifier 1336 */ 1337 int 1338 acc_add_filter(classifier, filter, class, phandle) 1339 struct acc_classifier *classifier; 1340 struct flow_filter *filter; 1341 void *class; 1342 u_long *phandle; 1343 { 1344 struct acc_filter *afp, *prev, *tmp; 1345 int i, s; 1346 1347 #ifdef INET6 1348 if (filter->ff_flow.fi_family != AF_INET && 1349 filter->ff_flow.fi_family != AF_INET6) 1350 return (EINVAL); 1351 #else 1352 if (filter->ff_flow.fi_family != AF_INET) 1353 return (EINVAL); 1354 #endif 1355 1356 afp = malloc(sizeof(struct acc_filter), 1357 M_DEVBUF, M_WAITOK); 1358 if (afp == NULL) 1359 return (ENOMEM); 1360 bzero(afp, sizeof(struct acc_filter)); 1361 1362 afp->f_filter = *filter; 1363 afp->f_class = class; 1364 1365 i = ACC_WILDCARD_INDEX; 1366 if (filter->ff_flow.fi_family == AF_INET) { 1367 struct flow_filter *filter4 = &afp->f_filter; 1368 1369 /* 1370 * if address is 0, it's a wildcard. if address mask 1371 * isn't set, use full mask. 1372 */ 1373 if (filter4->ff_flow.fi_dst.s_addr == 0) 1374 filter4->ff_mask.mask_dst.s_addr = 0; 1375 else if (filter4->ff_mask.mask_dst.s_addr == 0) 1376 filter4->ff_mask.mask_dst.s_addr = 0xffffffff; 1377 if (filter4->ff_flow.fi_src.s_addr == 0) 1378 filter4->ff_mask.mask_src.s_addr = 0; 1379 else if (filter4->ff_mask.mask_src.s_addr == 0) 1380 filter4->ff_mask.mask_src.s_addr = 0xffffffff; 1381 1382 /* clear extra bits in addresses */ 1383 filter4->ff_flow.fi_dst.s_addr &= 1384 filter4->ff_mask.mask_dst.s_addr; 1385 filter4->ff_flow.fi_src.s_addr &= 1386 filter4->ff_mask.mask_src.s_addr; 1387 1388 /* 1389 * if dst address is a wildcard, use hash-entry 1390 * ACC_WILDCARD_INDEX. 1391 */ 1392 if (filter4->ff_mask.mask_dst.s_addr != 0xffffffff) 1393 i = ACC_WILDCARD_INDEX; 1394 else 1395 i = ACC_GET_HASH_INDEX(filter4->ff_flow.fi_dst.s_addr); 1396 } 1397 #ifdef INET6 1398 else if (filter->ff_flow.fi_family == AF_INET6) { 1399 struct flow_filter6 *filter6 = 1400 (struct flow_filter6 *)&afp->f_filter; 1401 #ifndef IN6MASK0 /* taken from kame ipv6 */ 1402 #define IN6MASK0 {{{ 0, 0, 0, 0 }}} 1403 #define IN6MASK128 {{{ 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff }}} 1404 const struct in6_addr in6mask0 = IN6MASK0; 1405 const struct in6_addr in6mask128 = IN6MASK128; 1406 #endif 1407 1408 if (IN6_IS_ADDR_UNSPECIFIED(&filter6->ff_flow6.fi6_dst)) 1409 filter6->ff_mask6.mask6_dst = in6mask0; 1410 else if (IN6_IS_ADDR_UNSPECIFIED(&filter6->ff_mask6.mask6_dst)) 1411 filter6->ff_mask6.mask6_dst = in6mask128; 1412 if (IN6_IS_ADDR_UNSPECIFIED(&filter6->ff_flow6.fi6_src)) 1413 filter6->ff_mask6.mask6_src = in6mask0; 1414 else if (IN6_IS_ADDR_UNSPECIFIED(&filter6->ff_mask6.mask6_src)) 1415 filter6->ff_mask6.mask6_src = in6mask128; 1416 1417 /* clear extra bits in addresses */ 1418 for (i = 0; i < 16; i++) 1419 filter6->ff_flow6.fi6_dst.s6_addr[i] &= 1420 filter6->ff_mask6.mask6_dst.s6_addr[i]; 1421 for (i = 0; i < 16; i++) 1422 filter6->ff_flow6.fi6_src.s6_addr[i] &= 1423 filter6->ff_mask6.mask6_src.s6_addr[i]; 1424 1425 if (filter6->ff_flow6.fi6_flowlabel == 0) 1426 i = ACC_WILDCARD_INDEX; 1427 else 1428 i = ACC_GET_HASH_INDEX(filter6->ff_flow6.fi6_flowlabel); 1429 } 1430 #endif /* INET6 */ 1431 1432 afp->f_handle = get_filt_handle(classifier, i); 1433 1434 /* update filter bitmask */ 1435 afp->f_fbmask = filt2fibmask(filter); 1436 classifier->acc_fbmask |= afp->f_fbmask; 1437 1438 /* 1439 * add this filter to the filter list. 1440 * filters are ordered from the highest rule number. 1441 */ 1442 s = splnet(); 1443 prev = NULL; 1444 LIST_FOREACH(tmp, &classifier->acc_filters[i], f_chain) { 1445 if (tmp->f_filter.ff_ruleno > afp->f_filter.ff_ruleno) 1446 prev = tmp; 1447 else 1448 break; 1449 } 1450 if (prev == NULL) 1451 LIST_INSERT_HEAD(&classifier->acc_filters[i], afp, f_chain); 1452 else 1453 LIST_INSERT_AFTER(prev, afp, f_chain); 1454 splx(s); 1455 1456 *phandle = afp->f_handle; 1457 return (0); 1458 } 1459 1460 int 1461 acc_delete_filter(classifier, handle) 1462 struct acc_classifier *classifier; 1463 u_long handle; 1464 { 1465 struct acc_filter *afp; 1466 int s; 1467 1468 if ((afp = filth_to_filtp(classifier, handle)) == NULL) 1469 return (EINVAL); 1470 1471 s = splnet(); 1472 LIST_REMOVE(afp, f_chain); 1473 splx(s); 1474 1475 free(afp, M_DEVBUF); 1476 1477 /* todo: update filt_bmask */ 1478 1479 return (0); 1480 } 1481 1482 /* 1483 * delete filters referencing to the specified class. 1484 * if the all flag is not 0, delete all the filters. 1485 */ 1486 int 1487 acc_discard_filters(classifier, class, all) 1488 struct acc_classifier *classifier; 1489 void *class; 1490 int all; 1491 { 1492 struct acc_filter *afp; 1493 int i, s; 1494 1495 s = splnet(); 1496 for (i = 0; i < ACC_FILTER_TABLESIZE; i++) { 1497 do { 1498 LIST_FOREACH(afp, &classifier->acc_filters[i], f_chain) 1499 if (all || afp->f_class == class) { 1500 LIST_REMOVE(afp, f_chain); 1501 free(afp, M_DEVBUF); 1502 /* start again from the head */ 1503 break; 1504 } 1505 } while (afp != NULL); 1506 } 1507 splx(s); 1508 1509 if (all) 1510 classifier->acc_fbmask = 0; 1511 1512 return (0); 1513 } 1514 1515 void * 1516 acc_classify(clfier, m, af) 1517 void *clfier; 1518 struct mbuf *m; 1519 int af; 1520 { 1521 struct acc_classifier *classifier; 1522 struct flowinfo flow; 1523 struct acc_filter *afp; 1524 int i; 1525 1526 classifier = (struct acc_classifier *)clfier; 1527 altq_extractflow(m, af, &flow, classifier->acc_fbmask); 1528 1529 if (flow.fi_family == AF_INET) { 1530 struct flowinfo_in *fp = (struct flowinfo_in *)&flow; 1531 1532 if ((classifier->acc_fbmask & FIMB4_ALL) == FIMB4_TOS) { 1533 /* only tos is used */ 1534 LIST_FOREACH(afp, 1535 &classifier->acc_filters[ACC_WILDCARD_INDEX], 1536 f_chain) 1537 if (apply_tosfilter4(afp->f_fbmask, 1538 &afp->f_filter, fp)) 1539 /* filter matched */ 1540 return (afp->f_class); 1541 } else if ((classifier->acc_fbmask & 1542 (~(FIMB4_PROTO|FIMB4_SPORT|FIMB4_DPORT) & FIMB4_ALL)) 1543 == 0) { 1544 /* only proto and ports are used */ 1545 LIST_FOREACH(afp, 1546 &classifier->acc_filters[ACC_WILDCARD_INDEX], 1547 f_chain) 1548 if (apply_ppfilter4(afp->f_fbmask, 1549 &afp->f_filter, fp)) 1550 /* filter matched */ 1551 return (afp->f_class); 1552 } else { 1553 /* get the filter hash entry from its dest address */ 1554 i = ACC_GET_HASH_INDEX(fp->fi_dst.s_addr); 1555 do { 1556 /* 1557 * go through this loop twice. first for dst 1558 * hash, second for wildcards. 1559 */ 1560 LIST_FOREACH(afp, &classifier->acc_filters[i], 1561 f_chain) 1562 if (apply_filter4(afp->f_fbmask, 1563 &afp->f_filter, fp)) 1564 /* filter matched */ 1565 return (afp->f_class); 1566 1567 /* 1568 * check again for filters with a dst addr 1569 * wildcard. 1570 * (daddr == 0 || dmask != 0xffffffff). 1571 */ 1572 if (i != ACC_WILDCARD_INDEX) 1573 i = ACC_WILDCARD_INDEX; 1574 else 1575 break; 1576 } while (1); 1577 } 1578 } 1579 #ifdef INET6 1580 else if (flow.fi_family == AF_INET6) { 1581 struct flowinfo_in6 *fp6 = (struct flowinfo_in6 *)&flow; 1582 1583 /* get the filter hash entry from its flow ID */ 1584 if (fp6->fi6_flowlabel != 0) 1585 i = ACC_GET_HASH_INDEX(fp6->fi6_flowlabel); 1586 else 1587 /* flowlable can be zero */ 1588 i = ACC_WILDCARD_INDEX; 1589 1590 /* go through this loop twice. first for flow hash, second 1591 for wildcards. */ 1592 do { 1593 LIST_FOREACH(afp, &classifier->acc_filters[i], f_chain) 1594 if (apply_filter6(afp->f_fbmask, 1595 (struct flow_filter6 *)&afp->f_filter, 1596 fp6)) 1597 /* filter matched */ 1598 return (afp->f_class); 1599 1600 /* 1601 * check again for filters with a wildcard. 1602 */ 1603 if (i != ACC_WILDCARD_INDEX) 1604 i = ACC_WILDCARD_INDEX; 1605 else 1606 break; 1607 } while (1); 1608 } 1609 #endif /* INET6 */ 1610 1611 /* no filter matched */ 1612 return (NULL); 1613 } 1614 1615 static int 1616 apply_filter4(fbmask, filt, pkt) 1617 u_int32_t fbmask; 1618 struct flow_filter *filt; 1619 struct flowinfo_in *pkt; 1620 { 1621 if (filt->ff_flow.fi_family != AF_INET) 1622 return (0); 1623 if ((fbmask & FIMB4_SPORT) && filt->ff_flow.fi_sport != pkt->fi_sport) 1624 return (0); 1625 if ((fbmask & FIMB4_DPORT) && filt->ff_flow.fi_dport != pkt->fi_dport) 1626 return (0); 1627 if ((fbmask & FIMB4_DADDR) && 1628 filt->ff_flow.fi_dst.s_addr != 1629 (pkt->fi_dst.s_addr & filt->ff_mask.mask_dst.s_addr)) 1630 return (0); 1631 if ((fbmask & FIMB4_SADDR) && 1632 filt->ff_flow.fi_src.s_addr != 1633 (pkt->fi_src.s_addr & filt->ff_mask.mask_src.s_addr)) 1634 return (0); 1635 if ((fbmask & FIMB4_PROTO) && filt->ff_flow.fi_proto != pkt->fi_proto) 1636 return (0); 1637 if ((fbmask & FIMB4_TOS) && filt->ff_flow.fi_tos != 1638 (pkt->fi_tos & filt->ff_mask.mask_tos)) 1639 return (0); 1640 if ((fbmask & FIMB4_GPI) && filt->ff_flow.fi_gpi != (pkt->fi_gpi)) 1641 return (0); 1642 /* match */ 1643 return (1); 1644 } 1645 1646 /* 1647 * filter matching function optimized for a common case that checks 1648 * only protocol and port numbers 1649 */ 1650 static int 1651 apply_ppfilter4(fbmask, filt, pkt) 1652 u_int32_t fbmask; 1653 struct flow_filter *filt; 1654 struct flowinfo_in *pkt; 1655 { 1656 if (filt->ff_flow.fi_family != AF_INET) 1657 return (0); 1658 if ((fbmask & FIMB4_SPORT) && filt->ff_flow.fi_sport != pkt->fi_sport) 1659 return (0); 1660 if ((fbmask & FIMB4_DPORT) && filt->ff_flow.fi_dport != pkt->fi_dport) 1661 return (0); 1662 if ((fbmask & FIMB4_PROTO) && filt->ff_flow.fi_proto != pkt->fi_proto) 1663 return (0); 1664 /* match */ 1665 return (1); 1666 } 1667 1668 /* 1669 * filter matching function only for tos field. 1670 */ 1671 static int 1672 apply_tosfilter4(fbmask, filt, pkt) 1673 u_int32_t fbmask; 1674 struct flow_filter *filt; 1675 struct flowinfo_in *pkt; 1676 { 1677 if (filt->ff_flow.fi_family != AF_INET) 1678 return (0); 1679 if ((fbmask & FIMB4_TOS) && filt->ff_flow.fi_tos != 1680 (pkt->fi_tos & filt->ff_mask.mask_tos)) 1681 return (0); 1682 /* match */ 1683 return (1); 1684 } 1685 1686 #ifdef INET6 1687 static int 1688 apply_filter6(fbmask, filt, pkt) 1689 u_int32_t fbmask; 1690 struct flow_filter6 *filt; 1691 struct flowinfo_in6 *pkt; 1692 { 1693 int i; 1694 1695 if (filt->ff_flow6.fi6_family != AF_INET6) 1696 return (0); 1697 if ((fbmask & FIMB6_FLABEL) && 1698 filt->ff_flow6.fi6_flowlabel != pkt->fi6_flowlabel) 1699 return (0); 1700 if ((fbmask & FIMB6_PROTO) && 1701 filt->ff_flow6.fi6_proto != pkt->fi6_proto) 1702 return (0); 1703 if ((fbmask & FIMB6_SPORT) && 1704 filt->ff_flow6.fi6_sport != pkt->fi6_sport) 1705 return (0); 1706 if ((fbmask & FIMB6_DPORT) && 1707 filt->ff_flow6.fi6_dport != pkt->fi6_dport) 1708 return (0); 1709 if (fbmask & FIMB6_SADDR) { 1710 for (i = 0; i < 4; i++) 1711 if (filt->ff_flow6.fi6_src.s6_addr32[i] != 1712 (pkt->fi6_src.s6_addr32[i] & 1713 filt->ff_mask6.mask6_src.s6_addr32[i])) 1714 return (0); 1715 } 1716 if (fbmask & FIMB6_DADDR) { 1717 for (i = 0; i < 4; i++) 1718 if (filt->ff_flow6.fi6_dst.s6_addr32[i] != 1719 (pkt->fi6_dst.s6_addr32[i] & 1720 filt->ff_mask6.mask6_dst.s6_addr32[i])) 1721 return (0); 1722 } 1723 if ((fbmask & FIMB6_TCLASS) && 1724 filt->ff_flow6.fi6_tclass != 1725 (pkt->fi6_tclass & filt->ff_mask6.mask6_tclass)) 1726 return (0); 1727 if ((fbmask & FIMB6_GPI) && 1728 filt->ff_flow6.fi6_gpi != pkt->fi6_gpi) 1729 return (0); 1730 /* match */ 1731 return (1); 1732 } 1733 #endif /* INET6 */ 1734 1735 /* 1736 * filter handle: 1737 * bit 20-28: index to the filter hash table 1738 * bit 0-19: unique id in the hash bucket. 1739 */ 1740 static u_long 1741 get_filt_handle(classifier, i) 1742 struct acc_classifier *classifier; 1743 int i; 1744 { 1745 static u_long handle_number = 1; 1746 u_long handle; 1747 struct acc_filter *afp; 1748 1749 while (1) { 1750 handle = handle_number++ & 0x000fffff; 1751 1752 if (LIST_EMPTY(&classifier->acc_filters[i])) 1753 break; 1754 1755 LIST_FOREACH(afp, &classifier->acc_filters[i], f_chain) 1756 if ((afp->f_handle & 0x000fffff) == handle) 1757 break; 1758 if (afp == NULL) 1759 break; 1760 /* this handle is already used, try again */ 1761 } 1762 1763 return ((i << 20) | handle); 1764 } 1765 1766 /* convert filter handle to filter pointer */ 1767 static struct acc_filter * 1768 filth_to_filtp(classifier, handle) 1769 struct acc_classifier *classifier; 1770 u_long handle; 1771 { 1772 struct acc_filter *afp; 1773 int i; 1774 1775 i = ACC_GET_HINDEX(handle); 1776 1777 LIST_FOREACH(afp, &classifier->acc_filters[i], f_chain) 1778 if (afp->f_handle == handle) 1779 return (afp); 1780 1781 return (NULL); 1782 } 1783 1784 /* create flowinfo bitmask */ 1785 static u_int32_t 1786 filt2fibmask(filt) 1787 struct flow_filter *filt; 1788 { 1789 u_int32_t mask = 0; 1790 #ifdef INET6 1791 struct flow_filter6 *filt6; 1792 #endif 1793 1794 switch (filt->ff_flow.fi_family) { 1795 case AF_INET: 1796 if (filt->ff_flow.fi_proto != 0) 1797 mask |= FIMB4_PROTO; 1798 if (filt->ff_flow.fi_tos != 0) 1799 mask |= FIMB4_TOS; 1800 if (filt->ff_flow.fi_dst.s_addr != 0) 1801 mask |= FIMB4_DADDR; 1802 if (filt->ff_flow.fi_src.s_addr != 0) 1803 mask |= FIMB4_SADDR; 1804 if (filt->ff_flow.fi_sport != 0) 1805 mask |= FIMB4_SPORT; 1806 if (filt->ff_flow.fi_dport != 0) 1807 mask |= FIMB4_DPORT; 1808 if (filt->ff_flow.fi_gpi != 0) 1809 mask |= FIMB4_GPI; 1810 break; 1811 #ifdef INET6 1812 case AF_INET6: 1813 filt6 = (struct flow_filter6 *)filt; 1814 1815 if (filt6->ff_flow6.fi6_proto != 0) 1816 mask |= FIMB6_PROTO; 1817 if (filt6->ff_flow6.fi6_tclass != 0) 1818 mask |= FIMB6_TCLASS; 1819 if (!IN6_IS_ADDR_UNSPECIFIED(&filt6->ff_flow6.fi6_dst)) 1820 mask |= FIMB6_DADDR; 1821 if (!IN6_IS_ADDR_UNSPECIFIED(&filt6->ff_flow6.fi6_src)) 1822 mask |= FIMB6_SADDR; 1823 if (filt6->ff_flow6.fi6_sport != 0) 1824 mask |= FIMB6_SPORT; 1825 if (filt6->ff_flow6.fi6_dport != 0) 1826 mask |= FIMB6_DPORT; 1827 if (filt6->ff_flow6.fi6_gpi != 0) 1828 mask |= FIMB6_GPI; 1829 if (filt6->ff_flow6.fi6_flowlabel != 0) 1830 mask |= FIMB6_FLABEL; 1831 break; 1832 #endif /* INET6 */ 1833 } 1834 return (mask); 1835 } 1836 1837 /* 1838 * helper functions to handle IPv4 fragments. 1839 * currently only in-sequence fragments are handled. 1840 * - fragment info is cached in a LRU list. 1841 * - when a first fragment is found, cache its flow info. 1842 * - when a non-first fragment is found, lookup the cache. 1843 */ 1844 1845 struct ip4_frag { 1846 TAILQ_ENTRY(ip4_frag) ip4f_chain; 1847 char ip4f_valid; 1848 u_short ip4f_id; 1849 struct flowinfo_in ip4f_info; 1850 }; 1851 1852 static TAILQ_HEAD(ip4f_list, ip4_frag) ip4f_list; /* IPv4 fragment cache */ 1853 1854 #define IP4F_TABSIZE 16 /* IPv4 fragment cache size */ 1855 1856 static void 1857 ip4f_cache(ip, fin) 1858 struct ip *ip; 1859 struct flowinfo_in *fin; 1860 { 1861 struct ip4_frag *fp; 1862 1863 if (TAILQ_EMPTY(&ip4f_list)) { 1864 /* first time call, allocate fragment cache entries. */ 1865 if (ip4f_init() < 0) 1866 /* allocation failed! */ 1867 return; 1868 } 1869 1870 fp = ip4f_alloc(); 1871 fp->ip4f_id = ip->ip_id; 1872 fp->ip4f_info.fi_proto = ip->ip_p; 1873 fp->ip4f_info.fi_src.s_addr = ip->ip_src.s_addr; 1874 fp->ip4f_info.fi_dst.s_addr = ip->ip_dst.s_addr; 1875 1876 /* save port numbers */ 1877 fp->ip4f_info.fi_sport = fin->fi_sport; 1878 fp->ip4f_info.fi_dport = fin->fi_dport; 1879 fp->ip4f_info.fi_gpi = fin->fi_gpi; 1880 } 1881 1882 static int 1883 ip4f_lookup(ip, fin) 1884 struct ip *ip; 1885 struct flowinfo_in *fin; 1886 { 1887 struct ip4_frag *fp; 1888 1889 for (fp = TAILQ_FIRST(&ip4f_list); fp != NULL && fp->ip4f_valid; 1890 fp = TAILQ_NEXT(fp, ip4f_chain)) 1891 if (ip->ip_id == fp->ip4f_id && 1892 ip->ip_src.s_addr == fp->ip4f_info.fi_src.s_addr && 1893 ip->ip_dst.s_addr == fp->ip4f_info.fi_dst.s_addr && 1894 ip->ip_p == fp->ip4f_info.fi_proto) { 1895 /* found the matching entry */ 1896 fin->fi_sport = fp->ip4f_info.fi_sport; 1897 fin->fi_dport = fp->ip4f_info.fi_dport; 1898 fin->fi_gpi = fp->ip4f_info.fi_gpi; 1899 1900 if ((ntohs(ip->ip_off) & IP_MF) == 0) 1901 /* this is the last fragment, 1902 release the entry. */ 1903 ip4f_free(fp); 1904 1905 return (1); 1906 } 1907 1908 /* no matching entry found */ 1909 return (0); 1910 } 1911 1912 static int 1913 ip4f_init(void) 1914 { 1915 struct ip4_frag *fp; 1916 int i; 1917 1918 TAILQ_INIT(&ip4f_list); 1919 for (i=0; i<IP4F_TABSIZE; i++) { 1920 fp = malloc(sizeof(struct ip4_frag), 1921 M_DEVBUF, M_NOWAIT); 1922 if (fp == NULL) { 1923 printf("ip4f_init: can't alloc %dth entry!\n", i); 1924 if (i == 0) 1925 return (-1); 1926 return (0); 1927 } 1928 fp->ip4f_valid = 0; 1929 TAILQ_INSERT_TAIL(&ip4f_list, fp, ip4f_chain); 1930 } 1931 return (0); 1932 } 1933 1934 static struct ip4_frag * 1935 ip4f_alloc(void) 1936 { 1937 struct ip4_frag *fp; 1938 1939 /* reclaim an entry at the tail, put it at the head */ 1940 fp = TAILQ_LAST(&ip4f_list, ip4f_list); 1941 TAILQ_REMOVE(&ip4f_list, fp, ip4f_chain); 1942 fp->ip4f_valid = 1; 1943 TAILQ_INSERT_HEAD(&ip4f_list, fp, ip4f_chain); 1944 return (fp); 1945 } 1946 1947 static void 1948 ip4f_free(fp) 1949 struct ip4_frag *fp; 1950 { 1951 TAILQ_REMOVE(&ip4f_list, fp, ip4f_chain); 1952 fp->ip4f_valid = 0; 1953 TAILQ_INSERT_TAIL(&ip4f_list, fp, ip4f_chain); 1954 } 1955 1956 #endif /* ALTQ3_CLFIER_COMPAT */ 1957