1 /*- 2 * Copyright (C) 1997-2003 3 * Sony Computer Science Laboratories Inc. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 * 26 * $KAME: altq_subr.c,v 1.21 2003/11/06 06:32:53 kjc Exp $ 27 * $FreeBSD$ 28 */ 29 30 #include "opt_altq.h" 31 #include "opt_inet.h" 32 #include "opt_inet6.h" 33 34 #include <sys/param.h> 35 #include <sys/malloc.h> 36 #include <sys/mbuf.h> 37 #include <sys/systm.h> 38 #include <sys/proc.h> 39 #include <sys/socket.h> 40 #include <sys/socketvar.h> 41 #include <sys/kernel.h> 42 #include <sys/errno.h> 43 #include <sys/syslog.h> 44 #include <sys/sysctl.h> 45 #include <sys/queue.h> 46 47 #include <net/if.h> 48 #include <net/if_var.h> 49 #include <net/if_dl.h> 50 #include <net/if_types.h> 51 #include <net/vnet.h> 52 53 #include <netinet/in.h> 54 #include <netinet/in_systm.h> 55 #include <netinet/ip.h> 56 #ifdef INET6 57 #include <netinet/ip6.h> 58 #endif 59 #include <netinet/tcp.h> 60 #include <netinet/udp.h> 61 62 #include <netpfil/pf/pf.h> 63 #include <netpfil/pf/pf_altq.h> 64 #include <net/altq/altq.h> 65 66 /* machine dependent clock related includes */ 67 #include <sys/bus.h> 68 #include <sys/cpu.h> 69 #include <sys/eventhandler.h> 70 #include <machine/clock.h> 71 #if defined(__amd64__) || defined(__i386__) 72 #include <machine/cpufunc.h> /* for pentium tsc */ 73 #include <machine/specialreg.h> /* for CPUID_TSC */ 74 #include <machine/md_var.h> /* for cpu_feature */ 75 #endif /* __amd64 || __i386__ */ 76 77 /* 78 * internal function prototypes 79 */ 80 static void tbr_timeout(void *); 81 int (*altq_input)(struct mbuf *, int) = NULL; 82 static struct mbuf *tbr_dequeue(struct ifaltq *, int); 83 static int tbr_timer = 0; /* token bucket regulator timer */ 84 #if !defined(__FreeBSD__) || (__FreeBSD_version < 600000) 85 static struct callout tbr_callout = CALLOUT_INITIALIZER; 86 #else 87 static struct callout tbr_callout; 88 #endif 89 90 #ifdef ALTQ3_CLFIER_COMPAT 91 static int extract_ports4(struct mbuf *, struct ip *, struct flowinfo_in *); 92 #ifdef INET6 93 static int extract_ports6(struct mbuf *, struct ip6_hdr *, 94 struct flowinfo_in6 *); 95 #endif 96 static int apply_filter4(u_int32_t, struct flow_filter *, 97 struct flowinfo_in *); 98 static int apply_ppfilter4(u_int32_t, struct flow_filter *, 99 struct flowinfo_in *); 100 #ifdef INET6 101 static int apply_filter6(u_int32_t, struct flow_filter6 *, 102 struct flowinfo_in6 *); 103 #endif 104 static int apply_tosfilter4(u_int32_t, struct flow_filter *, 105 struct flowinfo_in *); 106 static u_long get_filt_handle(struct acc_classifier *, int); 107 static struct acc_filter *filth_to_filtp(struct acc_classifier *, u_long); 108 static u_int32_t filt2fibmask(struct flow_filter *); 109 110 static void ip4f_cache(struct ip *, struct flowinfo_in *); 111 static int ip4f_lookup(struct ip *, struct flowinfo_in *); 112 static int ip4f_init(void); 113 static struct ip4_frag *ip4f_alloc(void); 114 static void ip4f_free(struct ip4_frag *); 115 #endif /* ALTQ3_CLFIER_COMPAT */ 116 117 /* 118 * alternate queueing support routines 119 */ 120 121 /* look up the queue state by the interface name and the queueing type. */ 122 void * 123 altq_lookup(name, type) 124 char *name; 125 int type; 126 { 127 struct ifnet *ifp; 128 129 if ((ifp = ifunit(name)) != NULL) { 130 /* read if_snd unlocked */ 131 if (type != ALTQT_NONE && ifp->if_snd.altq_type == type) 132 return (ifp->if_snd.altq_disc); 133 } 134 135 return NULL; 136 } 137 138 int 139 altq_attach(ifq, type, discipline, enqueue, dequeue, request, clfier, classify) 140 struct ifaltq *ifq; 141 int type; 142 void *discipline; 143 int (*enqueue)(struct ifaltq *, struct mbuf *, struct altq_pktattr *); 144 struct mbuf *(*dequeue)(struct ifaltq *, int); 145 int (*request)(struct ifaltq *, int, void *); 146 void *clfier; 147 void *(*classify)(void *, struct mbuf *, int); 148 { 149 IFQ_LOCK(ifq); 150 if (!ALTQ_IS_READY(ifq)) { 151 IFQ_UNLOCK(ifq); 152 return ENXIO; 153 } 154 155 ifq->altq_type = type; 156 ifq->altq_disc = discipline; 157 ifq->altq_enqueue = enqueue; 158 ifq->altq_dequeue = dequeue; 159 ifq->altq_request = request; 160 ifq->altq_clfier = clfier; 161 ifq->altq_classify = classify; 162 ifq->altq_flags &= (ALTQF_CANTCHANGE|ALTQF_ENABLED); 163 IFQ_UNLOCK(ifq); 164 return 0; 165 } 166 167 int 168 altq_detach(ifq) 169 struct ifaltq *ifq; 170 { 171 IFQ_LOCK(ifq); 172 173 if (!ALTQ_IS_READY(ifq)) { 174 IFQ_UNLOCK(ifq); 175 return ENXIO; 176 } 177 if (ALTQ_IS_ENABLED(ifq)) { 178 IFQ_UNLOCK(ifq); 179 return EBUSY; 180 } 181 if (!ALTQ_IS_ATTACHED(ifq)) { 182 IFQ_UNLOCK(ifq); 183 return (0); 184 } 185 186 ifq->altq_type = ALTQT_NONE; 187 ifq->altq_disc = NULL; 188 ifq->altq_enqueue = NULL; 189 ifq->altq_dequeue = NULL; 190 ifq->altq_request = NULL; 191 ifq->altq_clfier = NULL; 192 ifq->altq_classify = NULL; 193 ifq->altq_flags &= ALTQF_CANTCHANGE; 194 195 IFQ_UNLOCK(ifq); 196 return 0; 197 } 198 199 int 200 altq_enable(ifq) 201 struct ifaltq *ifq; 202 { 203 int s; 204 205 IFQ_LOCK(ifq); 206 207 if (!ALTQ_IS_READY(ifq)) { 208 IFQ_UNLOCK(ifq); 209 return ENXIO; 210 } 211 if (ALTQ_IS_ENABLED(ifq)) { 212 IFQ_UNLOCK(ifq); 213 return 0; 214 } 215 216 s = splnet(); 217 IFQ_PURGE_NOLOCK(ifq); 218 ASSERT(ifq->ifq_len == 0); 219 ifq->ifq_drv_maxlen = 0; /* disable bulk dequeue */ 220 ifq->altq_flags |= ALTQF_ENABLED; 221 if (ifq->altq_clfier != NULL) 222 ifq->altq_flags |= ALTQF_CLASSIFY; 223 splx(s); 224 225 IFQ_UNLOCK(ifq); 226 return 0; 227 } 228 229 int 230 altq_disable(ifq) 231 struct ifaltq *ifq; 232 { 233 int s; 234 235 IFQ_LOCK(ifq); 236 if (!ALTQ_IS_ENABLED(ifq)) { 237 IFQ_UNLOCK(ifq); 238 return 0; 239 } 240 241 s = splnet(); 242 IFQ_PURGE_NOLOCK(ifq); 243 ASSERT(ifq->ifq_len == 0); 244 ifq->altq_flags &= ~(ALTQF_ENABLED|ALTQF_CLASSIFY); 245 splx(s); 246 247 IFQ_UNLOCK(ifq); 248 return 0; 249 } 250 251 #ifdef ALTQ_DEBUG 252 void 253 altq_assert(file, line, failedexpr) 254 const char *file, *failedexpr; 255 int line; 256 { 257 (void)printf("altq assertion \"%s\" failed: file \"%s\", line %d\n", 258 failedexpr, file, line); 259 panic("altq assertion"); 260 /* NOTREACHED */ 261 } 262 #endif 263 264 /* 265 * internal representation of token bucket parameters 266 * rate: (byte_per_unittime << TBR_SHIFT) / machclk_freq 267 * (((bits_per_sec) / 8) << TBR_SHIFT) / machclk_freq 268 * depth: byte << TBR_SHIFT 269 * 270 */ 271 #define TBR_SHIFT 29 272 #define TBR_SCALE(x) ((int64_t)(x) << TBR_SHIFT) 273 #define TBR_UNSCALE(x) ((x) >> TBR_SHIFT) 274 275 static struct mbuf * 276 tbr_dequeue(ifq, op) 277 struct ifaltq *ifq; 278 int op; 279 { 280 struct tb_regulator *tbr; 281 struct mbuf *m; 282 int64_t interval; 283 u_int64_t now; 284 285 IFQ_LOCK_ASSERT(ifq); 286 tbr = ifq->altq_tbr; 287 if (op == ALTDQ_REMOVE && tbr->tbr_lastop == ALTDQ_POLL) { 288 /* if this is a remove after poll, bypass tbr check */ 289 } else { 290 /* update token only when it is negative */ 291 if (tbr->tbr_token <= 0) { 292 now = read_machclk(); 293 interval = now - tbr->tbr_last; 294 if (interval >= tbr->tbr_filluptime) 295 tbr->tbr_token = tbr->tbr_depth; 296 else { 297 tbr->tbr_token += interval * tbr->tbr_rate; 298 if (tbr->tbr_token > tbr->tbr_depth) 299 tbr->tbr_token = tbr->tbr_depth; 300 } 301 tbr->tbr_last = now; 302 } 303 /* if token is still negative, don't allow dequeue */ 304 if (tbr->tbr_token <= 0) 305 return (NULL); 306 } 307 308 if (ALTQ_IS_ENABLED(ifq)) 309 m = (*ifq->altq_dequeue)(ifq, op); 310 else { 311 if (op == ALTDQ_POLL) 312 _IF_POLL(ifq, m); 313 else 314 _IF_DEQUEUE(ifq, m); 315 } 316 317 if (m != NULL && op == ALTDQ_REMOVE) 318 tbr->tbr_token -= TBR_SCALE(m_pktlen(m)); 319 tbr->tbr_lastop = op; 320 return (m); 321 } 322 323 /* 324 * set a token bucket regulator. 325 * if the specified rate is zero, the token bucket regulator is deleted. 326 */ 327 int 328 tbr_set(ifq, profile) 329 struct ifaltq *ifq; 330 struct tb_profile *profile; 331 { 332 struct tb_regulator *tbr, *otbr; 333 334 if (tbr_dequeue_ptr == NULL) 335 tbr_dequeue_ptr = tbr_dequeue; 336 337 if (machclk_freq == 0) 338 init_machclk(); 339 if (machclk_freq == 0) { 340 printf("tbr_set: no cpu clock available!\n"); 341 return (ENXIO); 342 } 343 344 IFQ_LOCK(ifq); 345 if (profile->rate == 0) { 346 /* delete this tbr */ 347 if ((tbr = ifq->altq_tbr) == NULL) { 348 IFQ_UNLOCK(ifq); 349 return (ENOENT); 350 } 351 ifq->altq_tbr = NULL; 352 free(tbr, M_DEVBUF); 353 IFQ_UNLOCK(ifq); 354 return (0); 355 } 356 357 tbr = malloc(sizeof(struct tb_regulator), M_DEVBUF, M_NOWAIT | M_ZERO); 358 if (tbr == NULL) { 359 IFQ_UNLOCK(ifq); 360 return (ENOMEM); 361 } 362 363 tbr->tbr_rate = TBR_SCALE(profile->rate / 8) / machclk_freq; 364 tbr->tbr_depth = TBR_SCALE(profile->depth); 365 if (tbr->tbr_rate > 0) 366 tbr->tbr_filluptime = tbr->tbr_depth / tbr->tbr_rate; 367 else 368 tbr->tbr_filluptime = LLONG_MAX; 369 /* 370 * The longest time between tbr_dequeue() calls will be about 1 371 * system tick, as the callout that drives it is scheduled once per 372 * tick. The refill-time detection logic in tbr_dequeue() can only 373 * properly detect the passage of up to LLONG_MAX machclk ticks. 374 * Therefore, in order for this logic to function properly in the 375 * extreme case, the maximum value of tbr_filluptime should be 376 * LLONG_MAX less one system tick's worth of machclk ticks less 377 * some additional slop factor (here one more system tick's worth 378 * of machclk ticks). 379 */ 380 if (tbr->tbr_filluptime > (LLONG_MAX - 2 * machclk_per_tick)) 381 tbr->tbr_filluptime = LLONG_MAX - 2 * machclk_per_tick; 382 tbr->tbr_token = tbr->tbr_depth; 383 tbr->tbr_last = read_machclk(); 384 tbr->tbr_lastop = ALTDQ_REMOVE; 385 386 otbr = ifq->altq_tbr; 387 ifq->altq_tbr = tbr; /* set the new tbr */ 388 389 if (otbr != NULL) 390 free(otbr, M_DEVBUF); 391 else { 392 if (tbr_timer == 0) { 393 CALLOUT_RESET(&tbr_callout, 1, tbr_timeout, (void *)0); 394 tbr_timer = 1; 395 } 396 } 397 IFQ_UNLOCK(ifq); 398 return (0); 399 } 400 401 /* 402 * tbr_timeout goes through the interface list, and kicks the drivers 403 * if necessary. 404 * 405 * MPSAFE 406 */ 407 static void 408 tbr_timeout(arg) 409 void *arg; 410 { 411 VNET_ITERATOR_DECL(vnet_iter); 412 struct ifnet *ifp; 413 int active, s; 414 415 active = 0; 416 s = splnet(); 417 IFNET_RLOCK_NOSLEEP(); 418 VNET_LIST_RLOCK_NOSLEEP(); 419 VNET_FOREACH(vnet_iter) { 420 CURVNET_SET(vnet_iter); 421 for (ifp = CK_STAILQ_FIRST(&V_ifnet); ifp; 422 ifp = CK_STAILQ_NEXT(ifp, if_link)) { 423 /* read from if_snd unlocked */ 424 if (!TBR_IS_ENABLED(&ifp->if_snd)) 425 continue; 426 active++; 427 if (!IFQ_IS_EMPTY(&ifp->if_snd) && 428 ifp->if_start != NULL) 429 (*ifp->if_start)(ifp); 430 } 431 CURVNET_RESTORE(); 432 } 433 VNET_LIST_RUNLOCK_NOSLEEP(); 434 IFNET_RUNLOCK_NOSLEEP(); 435 splx(s); 436 if (active > 0) 437 CALLOUT_RESET(&tbr_callout, 1, tbr_timeout, (void *)0); 438 else 439 tbr_timer = 0; /* don't need tbr_timer anymore */ 440 } 441 442 /* 443 * attach a discipline to the interface. if one already exists, it is 444 * overridden. 445 * Locking is done in the discipline specific attach functions. Basically 446 * they call back to altq_attach which takes care of the attach and locking. 447 */ 448 int 449 altq_pfattach(struct pf_altq *a) 450 { 451 int error = 0; 452 453 switch (a->scheduler) { 454 case ALTQT_NONE: 455 break; 456 #ifdef ALTQ_CBQ 457 case ALTQT_CBQ: 458 error = cbq_pfattach(a); 459 break; 460 #endif 461 #ifdef ALTQ_PRIQ 462 case ALTQT_PRIQ: 463 error = priq_pfattach(a); 464 break; 465 #endif 466 #ifdef ALTQ_HFSC 467 case ALTQT_HFSC: 468 error = hfsc_pfattach(a); 469 break; 470 #endif 471 #ifdef ALTQ_FAIRQ 472 case ALTQT_FAIRQ: 473 error = fairq_pfattach(a); 474 break; 475 #endif 476 #ifdef ALTQ_CODEL 477 case ALTQT_CODEL: 478 error = codel_pfattach(a); 479 break; 480 #endif 481 default: 482 error = ENXIO; 483 } 484 485 return (error); 486 } 487 488 /* 489 * detach a discipline from the interface. 490 * it is possible that the discipline was already overridden by another 491 * discipline. 492 */ 493 int 494 altq_pfdetach(struct pf_altq *a) 495 { 496 struct ifnet *ifp; 497 int s, error = 0; 498 499 if ((ifp = ifunit(a->ifname)) == NULL) 500 return (EINVAL); 501 502 /* if this discipline is no longer referenced, just return */ 503 /* read unlocked from if_snd */ 504 if (a->altq_disc == NULL || a->altq_disc != ifp->if_snd.altq_disc) 505 return (0); 506 507 s = splnet(); 508 /* read unlocked from if_snd, _disable and _detach take care */ 509 if (ALTQ_IS_ENABLED(&ifp->if_snd)) 510 error = altq_disable(&ifp->if_snd); 511 if (error == 0) 512 error = altq_detach(&ifp->if_snd); 513 splx(s); 514 515 return (error); 516 } 517 518 /* 519 * add a discipline or a queue 520 * Locking is done in the discipline specific functions with regards to 521 * malloc with WAITOK, also it is not yet clear which lock to use. 522 */ 523 int 524 altq_add(struct pf_altq *a) 525 { 526 int error = 0; 527 528 if (a->qname[0] != 0) 529 return (altq_add_queue(a)); 530 531 if (machclk_freq == 0) 532 init_machclk(); 533 if (machclk_freq == 0) 534 panic("altq_add: no cpu clock"); 535 536 switch (a->scheduler) { 537 #ifdef ALTQ_CBQ 538 case ALTQT_CBQ: 539 error = cbq_add_altq(a); 540 break; 541 #endif 542 #ifdef ALTQ_PRIQ 543 case ALTQT_PRIQ: 544 error = priq_add_altq(a); 545 break; 546 #endif 547 #ifdef ALTQ_HFSC 548 case ALTQT_HFSC: 549 error = hfsc_add_altq(a); 550 break; 551 #endif 552 #ifdef ALTQ_FAIRQ 553 case ALTQT_FAIRQ: 554 error = fairq_add_altq(a); 555 break; 556 #endif 557 #ifdef ALTQ_CODEL 558 case ALTQT_CODEL: 559 error = codel_add_altq(a); 560 break; 561 #endif 562 default: 563 error = ENXIO; 564 } 565 566 return (error); 567 } 568 569 /* 570 * remove a discipline or a queue 571 * It is yet unclear what lock to use to protect this operation, the 572 * discipline specific functions will determine and grab it 573 */ 574 int 575 altq_remove(struct pf_altq *a) 576 { 577 int error = 0; 578 579 if (a->qname[0] != 0) 580 return (altq_remove_queue(a)); 581 582 switch (a->scheduler) { 583 #ifdef ALTQ_CBQ 584 case ALTQT_CBQ: 585 error = cbq_remove_altq(a); 586 break; 587 #endif 588 #ifdef ALTQ_PRIQ 589 case ALTQT_PRIQ: 590 error = priq_remove_altq(a); 591 break; 592 #endif 593 #ifdef ALTQ_HFSC 594 case ALTQT_HFSC: 595 error = hfsc_remove_altq(a); 596 break; 597 #endif 598 #ifdef ALTQ_FAIRQ 599 case ALTQT_FAIRQ: 600 error = fairq_remove_altq(a); 601 break; 602 #endif 603 #ifdef ALTQ_CODEL 604 case ALTQT_CODEL: 605 error = codel_remove_altq(a); 606 break; 607 #endif 608 default: 609 error = ENXIO; 610 } 611 612 return (error); 613 } 614 615 /* 616 * add a queue to the discipline 617 * It is yet unclear what lock to use to protect this operation, the 618 * discipline specific functions will determine and grab it 619 */ 620 int 621 altq_add_queue(struct pf_altq *a) 622 { 623 int error = 0; 624 625 switch (a->scheduler) { 626 #ifdef ALTQ_CBQ 627 case ALTQT_CBQ: 628 error = cbq_add_queue(a); 629 break; 630 #endif 631 #ifdef ALTQ_PRIQ 632 case ALTQT_PRIQ: 633 error = priq_add_queue(a); 634 break; 635 #endif 636 #ifdef ALTQ_HFSC 637 case ALTQT_HFSC: 638 error = hfsc_add_queue(a); 639 break; 640 #endif 641 #ifdef ALTQ_FAIRQ 642 case ALTQT_FAIRQ: 643 error = fairq_add_queue(a); 644 break; 645 #endif 646 default: 647 error = ENXIO; 648 } 649 650 return (error); 651 } 652 653 /* 654 * remove a queue from the discipline 655 * It is yet unclear what lock to use to protect this operation, the 656 * discipline specific functions will determine and grab it 657 */ 658 int 659 altq_remove_queue(struct pf_altq *a) 660 { 661 int error = 0; 662 663 switch (a->scheduler) { 664 #ifdef ALTQ_CBQ 665 case ALTQT_CBQ: 666 error = cbq_remove_queue(a); 667 break; 668 #endif 669 #ifdef ALTQ_PRIQ 670 case ALTQT_PRIQ: 671 error = priq_remove_queue(a); 672 break; 673 #endif 674 #ifdef ALTQ_HFSC 675 case ALTQT_HFSC: 676 error = hfsc_remove_queue(a); 677 break; 678 #endif 679 #ifdef ALTQ_FAIRQ 680 case ALTQT_FAIRQ: 681 error = fairq_remove_queue(a); 682 break; 683 #endif 684 default: 685 error = ENXIO; 686 } 687 688 return (error); 689 } 690 691 /* 692 * get queue statistics 693 * Locking is done in the discipline specific functions with regards to 694 * copyout operations, also it is not yet clear which lock to use. 695 */ 696 int 697 altq_getqstats(struct pf_altq *a, void *ubuf, int *nbytes, int version) 698 { 699 int error = 0; 700 701 switch (a->scheduler) { 702 #ifdef ALTQ_CBQ 703 case ALTQT_CBQ: 704 error = cbq_getqstats(a, ubuf, nbytes, version); 705 break; 706 #endif 707 #ifdef ALTQ_PRIQ 708 case ALTQT_PRIQ: 709 error = priq_getqstats(a, ubuf, nbytes, version); 710 break; 711 #endif 712 #ifdef ALTQ_HFSC 713 case ALTQT_HFSC: 714 error = hfsc_getqstats(a, ubuf, nbytes, version); 715 break; 716 #endif 717 #ifdef ALTQ_FAIRQ 718 case ALTQT_FAIRQ: 719 error = fairq_getqstats(a, ubuf, nbytes, version); 720 break; 721 #endif 722 #ifdef ALTQ_CODEL 723 case ALTQT_CODEL: 724 error = codel_getqstats(a, ubuf, nbytes, version); 725 break; 726 #endif 727 default: 728 error = ENXIO; 729 } 730 731 return (error); 732 } 733 734 /* 735 * read and write diffserv field in IPv4 or IPv6 header 736 */ 737 u_int8_t 738 read_dsfield(m, pktattr) 739 struct mbuf *m; 740 struct altq_pktattr *pktattr; 741 { 742 struct mbuf *m0; 743 u_int8_t ds_field = 0; 744 745 if (pktattr == NULL || 746 (pktattr->pattr_af != AF_INET && pktattr->pattr_af != AF_INET6)) 747 return ((u_int8_t)0); 748 749 /* verify that pattr_hdr is within the mbuf data */ 750 for (m0 = m; m0 != NULL; m0 = m0->m_next) 751 if ((pktattr->pattr_hdr >= m0->m_data) && 752 (pktattr->pattr_hdr < m0->m_data + m0->m_len)) 753 break; 754 if (m0 == NULL) { 755 /* ick, pattr_hdr is stale */ 756 pktattr->pattr_af = AF_UNSPEC; 757 #ifdef ALTQ_DEBUG 758 printf("read_dsfield: can't locate header!\n"); 759 #endif 760 return ((u_int8_t)0); 761 } 762 763 if (pktattr->pattr_af == AF_INET) { 764 struct ip *ip = (struct ip *)pktattr->pattr_hdr; 765 766 if (ip->ip_v != 4) 767 return ((u_int8_t)0); /* version mismatch! */ 768 ds_field = ip->ip_tos; 769 } 770 #ifdef INET6 771 else if (pktattr->pattr_af == AF_INET6) { 772 struct ip6_hdr *ip6 = (struct ip6_hdr *)pktattr->pattr_hdr; 773 u_int32_t flowlabel; 774 775 flowlabel = ntohl(ip6->ip6_flow); 776 if ((flowlabel >> 28) != 6) 777 return ((u_int8_t)0); /* version mismatch! */ 778 ds_field = (flowlabel >> 20) & 0xff; 779 } 780 #endif 781 return (ds_field); 782 } 783 784 void 785 write_dsfield(struct mbuf *m, struct altq_pktattr *pktattr, u_int8_t dsfield) 786 { 787 struct mbuf *m0; 788 789 if (pktattr == NULL || 790 (pktattr->pattr_af != AF_INET && pktattr->pattr_af != AF_INET6)) 791 return; 792 793 /* verify that pattr_hdr is within the mbuf data */ 794 for (m0 = m; m0 != NULL; m0 = m0->m_next) 795 if ((pktattr->pattr_hdr >= m0->m_data) && 796 (pktattr->pattr_hdr < m0->m_data + m0->m_len)) 797 break; 798 if (m0 == NULL) { 799 /* ick, pattr_hdr is stale */ 800 pktattr->pattr_af = AF_UNSPEC; 801 #ifdef ALTQ_DEBUG 802 printf("write_dsfield: can't locate header!\n"); 803 #endif 804 return; 805 } 806 807 if (pktattr->pattr_af == AF_INET) { 808 struct ip *ip = (struct ip *)pktattr->pattr_hdr; 809 u_int8_t old; 810 int32_t sum; 811 812 if (ip->ip_v != 4) 813 return; /* version mismatch! */ 814 old = ip->ip_tos; 815 dsfield |= old & 3; /* leave CU bits */ 816 if (old == dsfield) 817 return; 818 ip->ip_tos = dsfield; 819 /* 820 * update checksum (from RFC1624) 821 * HC' = ~(~HC + ~m + m') 822 */ 823 sum = ~ntohs(ip->ip_sum) & 0xffff; 824 sum += 0xff00 + (~old & 0xff) + dsfield; 825 sum = (sum >> 16) + (sum & 0xffff); 826 sum += (sum >> 16); /* add carry */ 827 828 ip->ip_sum = htons(~sum & 0xffff); 829 } 830 #ifdef INET6 831 else if (pktattr->pattr_af == AF_INET6) { 832 struct ip6_hdr *ip6 = (struct ip6_hdr *)pktattr->pattr_hdr; 833 u_int32_t flowlabel; 834 835 flowlabel = ntohl(ip6->ip6_flow); 836 if ((flowlabel >> 28) != 6) 837 return; /* version mismatch! */ 838 flowlabel = (flowlabel & 0xf03fffff) | (dsfield << 20); 839 ip6->ip6_flow = htonl(flowlabel); 840 } 841 #endif 842 return; 843 } 844 845 846 /* 847 * high resolution clock support taking advantage of a machine dependent 848 * high resolution time counter (e.g., timestamp counter of intel pentium). 849 * we assume 850 * - 64-bit-long monotonically-increasing counter 851 * - frequency range is 100M-4GHz (CPU speed) 852 */ 853 /* if pcc is not available or disabled, emulate 256MHz using microtime() */ 854 #define MACHCLK_SHIFT 8 855 856 int machclk_usepcc; 857 u_int32_t machclk_freq; 858 u_int32_t machclk_per_tick; 859 860 #if defined(__i386__) && defined(__NetBSD__) 861 extern u_int64_t cpu_tsc_freq; 862 #endif 863 864 #if (__FreeBSD_version >= 700035) 865 /* Update TSC freq with the value indicated by the caller. */ 866 static void 867 tsc_freq_changed(void *arg, const struct cf_level *level, int status) 868 { 869 /* If there was an error during the transition, don't do anything. */ 870 if (status != 0) 871 return; 872 873 #if (__FreeBSD_version >= 701102) && (defined(__amd64__) || defined(__i386__)) 874 /* If TSC is P-state invariant, don't do anything. */ 875 if (tsc_is_invariant) 876 return; 877 #endif 878 879 /* Total setting for this level gives the new frequency in MHz. */ 880 init_machclk(); 881 } 882 EVENTHANDLER_DEFINE(cpufreq_post_change, tsc_freq_changed, NULL, 883 EVENTHANDLER_PRI_LAST); 884 #endif /* __FreeBSD_version >= 700035 */ 885 886 static void 887 init_machclk_setup(void) 888 { 889 #if (__FreeBSD_version >= 600000) 890 callout_init(&tbr_callout, 0); 891 #endif 892 893 machclk_usepcc = 1; 894 895 #if (!defined(__amd64__) && !defined(__i386__)) || defined(ALTQ_NOPCC) 896 machclk_usepcc = 0; 897 #endif 898 #if defined(__FreeBSD__) && defined(SMP) 899 machclk_usepcc = 0; 900 #endif 901 #if defined(__NetBSD__) && defined(MULTIPROCESSOR) 902 machclk_usepcc = 0; 903 #endif 904 #if defined(__amd64__) || defined(__i386__) 905 /* check if TSC is available */ 906 if ((cpu_feature & CPUID_TSC) == 0 || 907 atomic_load_acq_64(&tsc_freq) == 0) 908 machclk_usepcc = 0; 909 #endif 910 } 911 912 void 913 init_machclk(void) 914 { 915 static int called; 916 917 /* Call one-time initialization function. */ 918 if (!called) { 919 init_machclk_setup(); 920 called = 1; 921 } 922 923 if (machclk_usepcc == 0) { 924 /* emulate 256MHz using microtime() */ 925 machclk_freq = 1000000 << MACHCLK_SHIFT; 926 machclk_per_tick = machclk_freq / hz; 927 #ifdef ALTQ_DEBUG 928 printf("altq: emulate %uHz cpu clock\n", machclk_freq); 929 #endif 930 return; 931 } 932 933 /* 934 * if the clock frequency (of Pentium TSC or Alpha PCC) is 935 * accessible, just use it. 936 */ 937 #if defined(__amd64__) || defined(__i386__) 938 machclk_freq = atomic_load_acq_64(&tsc_freq); 939 #endif 940 941 /* 942 * if we don't know the clock frequency, measure it. 943 */ 944 if (machclk_freq == 0) { 945 static int wait; 946 struct timeval tv_start, tv_end; 947 u_int64_t start, end, diff; 948 int timo; 949 950 microtime(&tv_start); 951 start = read_machclk(); 952 timo = hz; /* 1 sec */ 953 (void)tsleep(&wait, PWAIT | PCATCH, "init_machclk", timo); 954 microtime(&tv_end); 955 end = read_machclk(); 956 diff = (u_int64_t)(tv_end.tv_sec - tv_start.tv_sec) * 1000000 957 + tv_end.tv_usec - tv_start.tv_usec; 958 if (diff != 0) 959 machclk_freq = (u_int)((end - start) * 1000000 / diff); 960 } 961 962 machclk_per_tick = machclk_freq / hz; 963 964 #ifdef ALTQ_DEBUG 965 printf("altq: CPU clock: %uHz\n", machclk_freq); 966 #endif 967 } 968 969 #if defined(__OpenBSD__) && defined(__i386__) 970 static __inline u_int64_t 971 rdtsc(void) 972 { 973 u_int64_t rv; 974 __asm __volatile(".byte 0x0f, 0x31" : "=A" (rv)); 975 return (rv); 976 } 977 #endif /* __OpenBSD__ && __i386__ */ 978 979 u_int64_t 980 read_machclk(void) 981 { 982 u_int64_t val; 983 984 if (machclk_usepcc) { 985 #if defined(__amd64__) || defined(__i386__) 986 val = rdtsc(); 987 #else 988 panic("read_machclk"); 989 #endif 990 } else { 991 struct timeval tv, boottime; 992 993 microtime(&tv); 994 getboottime(&boottime); 995 val = (((u_int64_t)(tv.tv_sec - boottime.tv_sec) * 1000000 996 + tv.tv_usec) << MACHCLK_SHIFT); 997 } 998 return (val); 999 } 1000 1001 #ifdef ALTQ3_CLFIER_COMPAT 1002 1003 #ifndef IPPROTO_ESP 1004 #define IPPROTO_ESP 50 /* encapsulating security payload */ 1005 #endif 1006 #ifndef IPPROTO_AH 1007 #define IPPROTO_AH 51 /* authentication header */ 1008 #endif 1009 1010 /* 1011 * extract flow information from a given packet. 1012 * filt_mask shows flowinfo fields required. 1013 * we assume the ip header is in one mbuf, and addresses and ports are 1014 * in network byte order. 1015 */ 1016 int 1017 altq_extractflow(m, af, flow, filt_bmask) 1018 struct mbuf *m; 1019 int af; 1020 struct flowinfo *flow; 1021 u_int32_t filt_bmask; 1022 { 1023 1024 switch (af) { 1025 case PF_INET: { 1026 struct flowinfo_in *fin; 1027 struct ip *ip; 1028 1029 ip = mtod(m, struct ip *); 1030 1031 if (ip->ip_v != 4) 1032 break; 1033 1034 fin = (struct flowinfo_in *)flow; 1035 fin->fi_len = sizeof(struct flowinfo_in); 1036 fin->fi_family = AF_INET; 1037 1038 fin->fi_proto = ip->ip_p; 1039 fin->fi_tos = ip->ip_tos; 1040 1041 fin->fi_src.s_addr = ip->ip_src.s_addr; 1042 fin->fi_dst.s_addr = ip->ip_dst.s_addr; 1043 1044 if (filt_bmask & FIMB4_PORTS) 1045 /* if port info is required, extract port numbers */ 1046 extract_ports4(m, ip, fin); 1047 else { 1048 fin->fi_sport = 0; 1049 fin->fi_dport = 0; 1050 fin->fi_gpi = 0; 1051 } 1052 return (1); 1053 } 1054 1055 #ifdef INET6 1056 case PF_INET6: { 1057 struct flowinfo_in6 *fin6; 1058 struct ip6_hdr *ip6; 1059 1060 ip6 = mtod(m, struct ip6_hdr *); 1061 /* should we check the ip version? */ 1062 1063 fin6 = (struct flowinfo_in6 *)flow; 1064 fin6->fi6_len = sizeof(struct flowinfo_in6); 1065 fin6->fi6_family = AF_INET6; 1066 1067 fin6->fi6_proto = ip6->ip6_nxt; 1068 fin6->fi6_tclass = (ntohl(ip6->ip6_flow) >> 20) & 0xff; 1069 1070 fin6->fi6_flowlabel = ip6->ip6_flow & htonl(0x000fffff); 1071 fin6->fi6_src = ip6->ip6_src; 1072 fin6->fi6_dst = ip6->ip6_dst; 1073 1074 if ((filt_bmask & FIMB6_PORTS) || 1075 ((filt_bmask & FIMB6_PROTO) 1076 && ip6->ip6_nxt > IPPROTO_IPV6)) 1077 /* 1078 * if port info is required, or proto is required 1079 * but there are option headers, extract port 1080 * and protocol numbers. 1081 */ 1082 extract_ports6(m, ip6, fin6); 1083 else { 1084 fin6->fi6_sport = 0; 1085 fin6->fi6_dport = 0; 1086 fin6->fi6_gpi = 0; 1087 } 1088 return (1); 1089 } 1090 #endif /* INET6 */ 1091 1092 default: 1093 break; 1094 } 1095 1096 /* failed */ 1097 flow->fi_len = sizeof(struct flowinfo); 1098 flow->fi_family = AF_UNSPEC; 1099 return (0); 1100 } 1101 1102 /* 1103 * helper routine to extract port numbers 1104 */ 1105 /* structure for ipsec and ipv6 option header template */ 1106 struct _opt6 { 1107 u_int8_t opt6_nxt; /* next header */ 1108 u_int8_t opt6_hlen; /* header extension length */ 1109 u_int16_t _pad; 1110 u_int32_t ah_spi; /* security parameter index 1111 for authentication header */ 1112 }; 1113 1114 /* 1115 * extract port numbers from a ipv4 packet. 1116 */ 1117 static int 1118 extract_ports4(m, ip, fin) 1119 struct mbuf *m; 1120 struct ip *ip; 1121 struct flowinfo_in *fin; 1122 { 1123 struct mbuf *m0; 1124 u_short ip_off; 1125 u_int8_t proto; 1126 int off; 1127 1128 fin->fi_sport = 0; 1129 fin->fi_dport = 0; 1130 fin->fi_gpi = 0; 1131 1132 ip_off = ntohs(ip->ip_off); 1133 /* if it is a fragment, try cached fragment info */ 1134 if (ip_off & IP_OFFMASK) { 1135 ip4f_lookup(ip, fin); 1136 return (1); 1137 } 1138 1139 /* locate the mbuf containing the protocol header */ 1140 for (m0 = m; m0 != NULL; m0 = m0->m_next) 1141 if (((caddr_t)ip >= m0->m_data) && 1142 ((caddr_t)ip < m0->m_data + m0->m_len)) 1143 break; 1144 if (m0 == NULL) { 1145 #ifdef ALTQ_DEBUG 1146 printf("extract_ports4: can't locate header! ip=%p\n", ip); 1147 #endif 1148 return (0); 1149 } 1150 off = ((caddr_t)ip - m0->m_data) + (ip->ip_hl << 2); 1151 proto = ip->ip_p; 1152 1153 #ifdef ALTQ_IPSEC 1154 again: 1155 #endif 1156 while (off >= m0->m_len) { 1157 off -= m0->m_len; 1158 m0 = m0->m_next; 1159 if (m0 == NULL) 1160 return (0); /* bogus ip_hl! */ 1161 } 1162 if (m0->m_len < off + 4) 1163 return (0); 1164 1165 switch (proto) { 1166 case IPPROTO_TCP: 1167 case IPPROTO_UDP: { 1168 struct udphdr *udp; 1169 1170 udp = (struct udphdr *)(mtod(m0, caddr_t) + off); 1171 fin->fi_sport = udp->uh_sport; 1172 fin->fi_dport = udp->uh_dport; 1173 fin->fi_proto = proto; 1174 } 1175 break; 1176 1177 #ifdef ALTQ_IPSEC 1178 case IPPROTO_ESP: 1179 if (fin->fi_gpi == 0){ 1180 u_int32_t *gpi; 1181 1182 gpi = (u_int32_t *)(mtod(m0, caddr_t) + off); 1183 fin->fi_gpi = *gpi; 1184 } 1185 fin->fi_proto = proto; 1186 break; 1187 1188 case IPPROTO_AH: { 1189 /* get next header and header length */ 1190 struct _opt6 *opt6; 1191 1192 opt6 = (struct _opt6 *)(mtod(m0, caddr_t) + off); 1193 proto = opt6->opt6_nxt; 1194 off += 8 + (opt6->opt6_hlen * 4); 1195 if (fin->fi_gpi == 0 && m0->m_len >= off + 8) 1196 fin->fi_gpi = opt6->ah_spi; 1197 } 1198 /* goto the next header */ 1199 goto again; 1200 #endif /* ALTQ_IPSEC */ 1201 1202 default: 1203 fin->fi_proto = proto; 1204 return (0); 1205 } 1206 1207 /* if this is a first fragment, cache it. */ 1208 if (ip_off & IP_MF) 1209 ip4f_cache(ip, fin); 1210 1211 return (1); 1212 } 1213 1214 #ifdef INET6 1215 static int 1216 extract_ports6(m, ip6, fin6) 1217 struct mbuf *m; 1218 struct ip6_hdr *ip6; 1219 struct flowinfo_in6 *fin6; 1220 { 1221 struct mbuf *m0; 1222 int off; 1223 u_int8_t proto; 1224 1225 fin6->fi6_gpi = 0; 1226 fin6->fi6_sport = 0; 1227 fin6->fi6_dport = 0; 1228 1229 /* locate the mbuf containing the protocol header */ 1230 for (m0 = m; m0 != NULL; m0 = m0->m_next) 1231 if (((caddr_t)ip6 >= m0->m_data) && 1232 ((caddr_t)ip6 < m0->m_data + m0->m_len)) 1233 break; 1234 if (m0 == NULL) { 1235 #ifdef ALTQ_DEBUG 1236 printf("extract_ports6: can't locate header! ip6=%p\n", ip6); 1237 #endif 1238 return (0); 1239 } 1240 off = ((caddr_t)ip6 - m0->m_data) + sizeof(struct ip6_hdr); 1241 1242 proto = ip6->ip6_nxt; 1243 do { 1244 while (off >= m0->m_len) { 1245 off -= m0->m_len; 1246 m0 = m0->m_next; 1247 if (m0 == NULL) 1248 return (0); 1249 } 1250 if (m0->m_len < off + 4) 1251 return (0); 1252 1253 switch (proto) { 1254 case IPPROTO_TCP: 1255 case IPPROTO_UDP: { 1256 struct udphdr *udp; 1257 1258 udp = (struct udphdr *)(mtod(m0, caddr_t) + off); 1259 fin6->fi6_sport = udp->uh_sport; 1260 fin6->fi6_dport = udp->uh_dport; 1261 fin6->fi6_proto = proto; 1262 } 1263 return (1); 1264 1265 case IPPROTO_ESP: 1266 if (fin6->fi6_gpi == 0) { 1267 u_int32_t *gpi; 1268 1269 gpi = (u_int32_t *)(mtod(m0, caddr_t) + off); 1270 fin6->fi6_gpi = *gpi; 1271 } 1272 fin6->fi6_proto = proto; 1273 return (1); 1274 1275 case IPPROTO_AH: { 1276 /* get next header and header length */ 1277 struct _opt6 *opt6; 1278 1279 opt6 = (struct _opt6 *)(mtod(m0, caddr_t) + off); 1280 if (fin6->fi6_gpi == 0 && m0->m_len >= off + 8) 1281 fin6->fi6_gpi = opt6->ah_spi; 1282 proto = opt6->opt6_nxt; 1283 off += 8 + (opt6->opt6_hlen * 4); 1284 /* goto the next header */ 1285 break; 1286 } 1287 1288 case IPPROTO_HOPOPTS: 1289 case IPPROTO_ROUTING: 1290 case IPPROTO_DSTOPTS: { 1291 /* get next header and header length */ 1292 struct _opt6 *opt6; 1293 1294 opt6 = (struct _opt6 *)(mtod(m0, caddr_t) + off); 1295 proto = opt6->opt6_nxt; 1296 off += (opt6->opt6_hlen + 1) * 8; 1297 /* goto the next header */ 1298 break; 1299 } 1300 1301 case IPPROTO_FRAGMENT: 1302 /* ipv6 fragmentations are not supported yet */ 1303 default: 1304 fin6->fi6_proto = proto; 1305 return (0); 1306 } 1307 } while (1); 1308 /*NOTREACHED*/ 1309 } 1310 #endif /* INET6 */ 1311 1312 /* 1313 * altq common classifier 1314 */ 1315 int 1316 acc_add_filter(classifier, filter, class, phandle) 1317 struct acc_classifier *classifier; 1318 struct flow_filter *filter; 1319 void *class; 1320 u_long *phandle; 1321 { 1322 struct acc_filter *afp, *prev, *tmp; 1323 int i, s; 1324 1325 #ifdef INET6 1326 if (filter->ff_flow.fi_family != AF_INET && 1327 filter->ff_flow.fi_family != AF_INET6) 1328 return (EINVAL); 1329 #else 1330 if (filter->ff_flow.fi_family != AF_INET) 1331 return (EINVAL); 1332 #endif 1333 1334 afp = malloc(sizeof(struct acc_filter), 1335 M_DEVBUF, M_WAITOK); 1336 if (afp == NULL) 1337 return (ENOMEM); 1338 bzero(afp, sizeof(struct acc_filter)); 1339 1340 afp->f_filter = *filter; 1341 afp->f_class = class; 1342 1343 i = ACC_WILDCARD_INDEX; 1344 if (filter->ff_flow.fi_family == AF_INET) { 1345 struct flow_filter *filter4 = &afp->f_filter; 1346 1347 /* 1348 * if address is 0, it's a wildcard. if address mask 1349 * isn't set, use full mask. 1350 */ 1351 if (filter4->ff_flow.fi_dst.s_addr == 0) 1352 filter4->ff_mask.mask_dst.s_addr = 0; 1353 else if (filter4->ff_mask.mask_dst.s_addr == 0) 1354 filter4->ff_mask.mask_dst.s_addr = 0xffffffff; 1355 if (filter4->ff_flow.fi_src.s_addr == 0) 1356 filter4->ff_mask.mask_src.s_addr = 0; 1357 else if (filter4->ff_mask.mask_src.s_addr == 0) 1358 filter4->ff_mask.mask_src.s_addr = 0xffffffff; 1359 1360 /* clear extra bits in addresses */ 1361 filter4->ff_flow.fi_dst.s_addr &= 1362 filter4->ff_mask.mask_dst.s_addr; 1363 filter4->ff_flow.fi_src.s_addr &= 1364 filter4->ff_mask.mask_src.s_addr; 1365 1366 /* 1367 * if dst address is a wildcard, use hash-entry 1368 * ACC_WILDCARD_INDEX. 1369 */ 1370 if (filter4->ff_mask.mask_dst.s_addr != 0xffffffff) 1371 i = ACC_WILDCARD_INDEX; 1372 else 1373 i = ACC_GET_HASH_INDEX(filter4->ff_flow.fi_dst.s_addr); 1374 } 1375 #ifdef INET6 1376 else if (filter->ff_flow.fi_family == AF_INET6) { 1377 struct flow_filter6 *filter6 = 1378 (struct flow_filter6 *)&afp->f_filter; 1379 #ifndef IN6MASK0 /* taken from kame ipv6 */ 1380 #define IN6MASK0 {{{ 0, 0, 0, 0 }}} 1381 #define IN6MASK128 {{{ 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff }}} 1382 const struct in6_addr in6mask0 = IN6MASK0; 1383 const struct in6_addr in6mask128 = IN6MASK128; 1384 #endif 1385 1386 if (IN6_IS_ADDR_UNSPECIFIED(&filter6->ff_flow6.fi6_dst)) 1387 filter6->ff_mask6.mask6_dst = in6mask0; 1388 else if (IN6_IS_ADDR_UNSPECIFIED(&filter6->ff_mask6.mask6_dst)) 1389 filter6->ff_mask6.mask6_dst = in6mask128; 1390 if (IN6_IS_ADDR_UNSPECIFIED(&filter6->ff_flow6.fi6_src)) 1391 filter6->ff_mask6.mask6_src = in6mask0; 1392 else if (IN6_IS_ADDR_UNSPECIFIED(&filter6->ff_mask6.mask6_src)) 1393 filter6->ff_mask6.mask6_src = in6mask128; 1394 1395 /* clear extra bits in addresses */ 1396 for (i = 0; i < 16; i++) 1397 filter6->ff_flow6.fi6_dst.s6_addr[i] &= 1398 filter6->ff_mask6.mask6_dst.s6_addr[i]; 1399 for (i = 0; i < 16; i++) 1400 filter6->ff_flow6.fi6_src.s6_addr[i] &= 1401 filter6->ff_mask6.mask6_src.s6_addr[i]; 1402 1403 if (filter6->ff_flow6.fi6_flowlabel == 0) 1404 i = ACC_WILDCARD_INDEX; 1405 else 1406 i = ACC_GET_HASH_INDEX(filter6->ff_flow6.fi6_flowlabel); 1407 } 1408 #endif /* INET6 */ 1409 1410 afp->f_handle = get_filt_handle(classifier, i); 1411 1412 /* update filter bitmask */ 1413 afp->f_fbmask = filt2fibmask(filter); 1414 classifier->acc_fbmask |= afp->f_fbmask; 1415 1416 /* 1417 * add this filter to the filter list. 1418 * filters are ordered from the highest rule number. 1419 */ 1420 s = splnet(); 1421 prev = NULL; 1422 LIST_FOREACH(tmp, &classifier->acc_filters[i], f_chain) { 1423 if (tmp->f_filter.ff_ruleno > afp->f_filter.ff_ruleno) 1424 prev = tmp; 1425 else 1426 break; 1427 } 1428 if (prev == NULL) 1429 LIST_INSERT_HEAD(&classifier->acc_filters[i], afp, f_chain); 1430 else 1431 LIST_INSERT_AFTER(prev, afp, f_chain); 1432 splx(s); 1433 1434 *phandle = afp->f_handle; 1435 return (0); 1436 } 1437 1438 int 1439 acc_delete_filter(classifier, handle) 1440 struct acc_classifier *classifier; 1441 u_long handle; 1442 { 1443 struct acc_filter *afp; 1444 int s; 1445 1446 if ((afp = filth_to_filtp(classifier, handle)) == NULL) 1447 return (EINVAL); 1448 1449 s = splnet(); 1450 LIST_REMOVE(afp, f_chain); 1451 splx(s); 1452 1453 free(afp, M_DEVBUF); 1454 1455 /* todo: update filt_bmask */ 1456 1457 return (0); 1458 } 1459 1460 /* 1461 * delete filters referencing to the specified class. 1462 * if the all flag is not 0, delete all the filters. 1463 */ 1464 int 1465 acc_discard_filters(classifier, class, all) 1466 struct acc_classifier *classifier; 1467 void *class; 1468 int all; 1469 { 1470 struct acc_filter *afp; 1471 int i, s; 1472 1473 s = splnet(); 1474 for (i = 0; i < ACC_FILTER_TABLESIZE; i++) { 1475 do { 1476 LIST_FOREACH(afp, &classifier->acc_filters[i], f_chain) 1477 if (all || afp->f_class == class) { 1478 LIST_REMOVE(afp, f_chain); 1479 free(afp, M_DEVBUF); 1480 /* start again from the head */ 1481 break; 1482 } 1483 } while (afp != NULL); 1484 } 1485 splx(s); 1486 1487 if (all) 1488 classifier->acc_fbmask = 0; 1489 1490 return (0); 1491 } 1492 1493 void * 1494 acc_classify(clfier, m, af) 1495 void *clfier; 1496 struct mbuf *m; 1497 int af; 1498 { 1499 struct acc_classifier *classifier; 1500 struct flowinfo flow; 1501 struct acc_filter *afp; 1502 int i; 1503 1504 classifier = (struct acc_classifier *)clfier; 1505 altq_extractflow(m, af, &flow, classifier->acc_fbmask); 1506 1507 if (flow.fi_family == AF_INET) { 1508 struct flowinfo_in *fp = (struct flowinfo_in *)&flow; 1509 1510 if ((classifier->acc_fbmask & FIMB4_ALL) == FIMB4_TOS) { 1511 /* only tos is used */ 1512 LIST_FOREACH(afp, 1513 &classifier->acc_filters[ACC_WILDCARD_INDEX], 1514 f_chain) 1515 if (apply_tosfilter4(afp->f_fbmask, 1516 &afp->f_filter, fp)) 1517 /* filter matched */ 1518 return (afp->f_class); 1519 } else if ((classifier->acc_fbmask & 1520 (~(FIMB4_PROTO|FIMB4_SPORT|FIMB4_DPORT) & FIMB4_ALL)) 1521 == 0) { 1522 /* only proto and ports are used */ 1523 LIST_FOREACH(afp, 1524 &classifier->acc_filters[ACC_WILDCARD_INDEX], 1525 f_chain) 1526 if (apply_ppfilter4(afp->f_fbmask, 1527 &afp->f_filter, fp)) 1528 /* filter matched */ 1529 return (afp->f_class); 1530 } else { 1531 /* get the filter hash entry from its dest address */ 1532 i = ACC_GET_HASH_INDEX(fp->fi_dst.s_addr); 1533 do { 1534 /* 1535 * go through this loop twice. first for dst 1536 * hash, second for wildcards. 1537 */ 1538 LIST_FOREACH(afp, &classifier->acc_filters[i], 1539 f_chain) 1540 if (apply_filter4(afp->f_fbmask, 1541 &afp->f_filter, fp)) 1542 /* filter matched */ 1543 return (afp->f_class); 1544 1545 /* 1546 * check again for filters with a dst addr 1547 * wildcard. 1548 * (daddr == 0 || dmask != 0xffffffff). 1549 */ 1550 if (i != ACC_WILDCARD_INDEX) 1551 i = ACC_WILDCARD_INDEX; 1552 else 1553 break; 1554 } while (1); 1555 } 1556 } 1557 #ifdef INET6 1558 else if (flow.fi_family == AF_INET6) { 1559 struct flowinfo_in6 *fp6 = (struct flowinfo_in6 *)&flow; 1560 1561 /* get the filter hash entry from its flow ID */ 1562 if (fp6->fi6_flowlabel != 0) 1563 i = ACC_GET_HASH_INDEX(fp6->fi6_flowlabel); 1564 else 1565 /* flowlable can be zero */ 1566 i = ACC_WILDCARD_INDEX; 1567 1568 /* go through this loop twice. first for flow hash, second 1569 for wildcards. */ 1570 do { 1571 LIST_FOREACH(afp, &classifier->acc_filters[i], f_chain) 1572 if (apply_filter6(afp->f_fbmask, 1573 (struct flow_filter6 *)&afp->f_filter, 1574 fp6)) 1575 /* filter matched */ 1576 return (afp->f_class); 1577 1578 /* 1579 * check again for filters with a wildcard. 1580 */ 1581 if (i != ACC_WILDCARD_INDEX) 1582 i = ACC_WILDCARD_INDEX; 1583 else 1584 break; 1585 } while (1); 1586 } 1587 #endif /* INET6 */ 1588 1589 /* no filter matched */ 1590 return (NULL); 1591 } 1592 1593 static int 1594 apply_filter4(fbmask, filt, pkt) 1595 u_int32_t fbmask; 1596 struct flow_filter *filt; 1597 struct flowinfo_in *pkt; 1598 { 1599 if (filt->ff_flow.fi_family != AF_INET) 1600 return (0); 1601 if ((fbmask & FIMB4_SPORT) && filt->ff_flow.fi_sport != pkt->fi_sport) 1602 return (0); 1603 if ((fbmask & FIMB4_DPORT) && filt->ff_flow.fi_dport != pkt->fi_dport) 1604 return (0); 1605 if ((fbmask & FIMB4_DADDR) && 1606 filt->ff_flow.fi_dst.s_addr != 1607 (pkt->fi_dst.s_addr & filt->ff_mask.mask_dst.s_addr)) 1608 return (0); 1609 if ((fbmask & FIMB4_SADDR) && 1610 filt->ff_flow.fi_src.s_addr != 1611 (pkt->fi_src.s_addr & filt->ff_mask.mask_src.s_addr)) 1612 return (0); 1613 if ((fbmask & FIMB4_PROTO) && filt->ff_flow.fi_proto != pkt->fi_proto) 1614 return (0); 1615 if ((fbmask & FIMB4_TOS) && filt->ff_flow.fi_tos != 1616 (pkt->fi_tos & filt->ff_mask.mask_tos)) 1617 return (0); 1618 if ((fbmask & FIMB4_GPI) && filt->ff_flow.fi_gpi != (pkt->fi_gpi)) 1619 return (0); 1620 /* match */ 1621 return (1); 1622 } 1623 1624 /* 1625 * filter matching function optimized for a common case that checks 1626 * only protocol and port numbers 1627 */ 1628 static int 1629 apply_ppfilter4(fbmask, filt, pkt) 1630 u_int32_t fbmask; 1631 struct flow_filter *filt; 1632 struct flowinfo_in *pkt; 1633 { 1634 if (filt->ff_flow.fi_family != AF_INET) 1635 return (0); 1636 if ((fbmask & FIMB4_SPORT) && filt->ff_flow.fi_sport != pkt->fi_sport) 1637 return (0); 1638 if ((fbmask & FIMB4_DPORT) && filt->ff_flow.fi_dport != pkt->fi_dport) 1639 return (0); 1640 if ((fbmask & FIMB4_PROTO) && filt->ff_flow.fi_proto != pkt->fi_proto) 1641 return (0); 1642 /* match */ 1643 return (1); 1644 } 1645 1646 /* 1647 * filter matching function only for tos field. 1648 */ 1649 static int 1650 apply_tosfilter4(fbmask, filt, pkt) 1651 u_int32_t fbmask; 1652 struct flow_filter *filt; 1653 struct flowinfo_in *pkt; 1654 { 1655 if (filt->ff_flow.fi_family != AF_INET) 1656 return (0); 1657 if ((fbmask & FIMB4_TOS) && filt->ff_flow.fi_tos != 1658 (pkt->fi_tos & filt->ff_mask.mask_tos)) 1659 return (0); 1660 /* match */ 1661 return (1); 1662 } 1663 1664 #ifdef INET6 1665 static int 1666 apply_filter6(fbmask, filt, pkt) 1667 u_int32_t fbmask; 1668 struct flow_filter6 *filt; 1669 struct flowinfo_in6 *pkt; 1670 { 1671 int i; 1672 1673 if (filt->ff_flow6.fi6_family != AF_INET6) 1674 return (0); 1675 if ((fbmask & FIMB6_FLABEL) && 1676 filt->ff_flow6.fi6_flowlabel != pkt->fi6_flowlabel) 1677 return (0); 1678 if ((fbmask & FIMB6_PROTO) && 1679 filt->ff_flow6.fi6_proto != pkt->fi6_proto) 1680 return (0); 1681 if ((fbmask & FIMB6_SPORT) && 1682 filt->ff_flow6.fi6_sport != pkt->fi6_sport) 1683 return (0); 1684 if ((fbmask & FIMB6_DPORT) && 1685 filt->ff_flow6.fi6_dport != pkt->fi6_dport) 1686 return (0); 1687 if (fbmask & FIMB6_SADDR) { 1688 for (i = 0; i < 4; i++) 1689 if (filt->ff_flow6.fi6_src.s6_addr32[i] != 1690 (pkt->fi6_src.s6_addr32[i] & 1691 filt->ff_mask6.mask6_src.s6_addr32[i])) 1692 return (0); 1693 } 1694 if (fbmask & FIMB6_DADDR) { 1695 for (i = 0; i < 4; i++) 1696 if (filt->ff_flow6.fi6_dst.s6_addr32[i] != 1697 (pkt->fi6_dst.s6_addr32[i] & 1698 filt->ff_mask6.mask6_dst.s6_addr32[i])) 1699 return (0); 1700 } 1701 if ((fbmask & FIMB6_TCLASS) && 1702 filt->ff_flow6.fi6_tclass != 1703 (pkt->fi6_tclass & filt->ff_mask6.mask6_tclass)) 1704 return (0); 1705 if ((fbmask & FIMB6_GPI) && 1706 filt->ff_flow6.fi6_gpi != pkt->fi6_gpi) 1707 return (0); 1708 /* match */ 1709 return (1); 1710 } 1711 #endif /* INET6 */ 1712 1713 /* 1714 * filter handle: 1715 * bit 20-28: index to the filter hash table 1716 * bit 0-19: unique id in the hash bucket. 1717 */ 1718 static u_long 1719 get_filt_handle(classifier, i) 1720 struct acc_classifier *classifier; 1721 int i; 1722 { 1723 static u_long handle_number = 1; 1724 u_long handle; 1725 struct acc_filter *afp; 1726 1727 while (1) { 1728 handle = handle_number++ & 0x000fffff; 1729 1730 if (LIST_EMPTY(&classifier->acc_filters[i])) 1731 break; 1732 1733 LIST_FOREACH(afp, &classifier->acc_filters[i], f_chain) 1734 if ((afp->f_handle & 0x000fffff) == handle) 1735 break; 1736 if (afp == NULL) 1737 break; 1738 /* this handle is already used, try again */ 1739 } 1740 1741 return ((i << 20) | handle); 1742 } 1743 1744 /* convert filter handle to filter pointer */ 1745 static struct acc_filter * 1746 filth_to_filtp(classifier, handle) 1747 struct acc_classifier *classifier; 1748 u_long handle; 1749 { 1750 struct acc_filter *afp; 1751 int i; 1752 1753 i = ACC_GET_HINDEX(handle); 1754 1755 LIST_FOREACH(afp, &classifier->acc_filters[i], f_chain) 1756 if (afp->f_handle == handle) 1757 return (afp); 1758 1759 return (NULL); 1760 } 1761 1762 /* create flowinfo bitmask */ 1763 static u_int32_t 1764 filt2fibmask(filt) 1765 struct flow_filter *filt; 1766 { 1767 u_int32_t mask = 0; 1768 #ifdef INET6 1769 struct flow_filter6 *filt6; 1770 #endif 1771 1772 switch (filt->ff_flow.fi_family) { 1773 case AF_INET: 1774 if (filt->ff_flow.fi_proto != 0) 1775 mask |= FIMB4_PROTO; 1776 if (filt->ff_flow.fi_tos != 0) 1777 mask |= FIMB4_TOS; 1778 if (filt->ff_flow.fi_dst.s_addr != 0) 1779 mask |= FIMB4_DADDR; 1780 if (filt->ff_flow.fi_src.s_addr != 0) 1781 mask |= FIMB4_SADDR; 1782 if (filt->ff_flow.fi_sport != 0) 1783 mask |= FIMB4_SPORT; 1784 if (filt->ff_flow.fi_dport != 0) 1785 mask |= FIMB4_DPORT; 1786 if (filt->ff_flow.fi_gpi != 0) 1787 mask |= FIMB4_GPI; 1788 break; 1789 #ifdef INET6 1790 case AF_INET6: 1791 filt6 = (struct flow_filter6 *)filt; 1792 1793 if (filt6->ff_flow6.fi6_proto != 0) 1794 mask |= FIMB6_PROTO; 1795 if (filt6->ff_flow6.fi6_tclass != 0) 1796 mask |= FIMB6_TCLASS; 1797 if (!IN6_IS_ADDR_UNSPECIFIED(&filt6->ff_flow6.fi6_dst)) 1798 mask |= FIMB6_DADDR; 1799 if (!IN6_IS_ADDR_UNSPECIFIED(&filt6->ff_flow6.fi6_src)) 1800 mask |= FIMB6_SADDR; 1801 if (filt6->ff_flow6.fi6_sport != 0) 1802 mask |= FIMB6_SPORT; 1803 if (filt6->ff_flow6.fi6_dport != 0) 1804 mask |= FIMB6_DPORT; 1805 if (filt6->ff_flow6.fi6_gpi != 0) 1806 mask |= FIMB6_GPI; 1807 if (filt6->ff_flow6.fi6_flowlabel != 0) 1808 mask |= FIMB6_FLABEL; 1809 break; 1810 #endif /* INET6 */ 1811 } 1812 return (mask); 1813 } 1814 1815 1816 /* 1817 * helper functions to handle IPv4 fragments. 1818 * currently only in-sequence fragments are handled. 1819 * - fragment info is cached in a LRU list. 1820 * - when a first fragment is found, cache its flow info. 1821 * - when a non-first fragment is found, lookup the cache. 1822 */ 1823 1824 struct ip4_frag { 1825 TAILQ_ENTRY(ip4_frag) ip4f_chain; 1826 char ip4f_valid; 1827 u_short ip4f_id; 1828 struct flowinfo_in ip4f_info; 1829 }; 1830 1831 static TAILQ_HEAD(ip4f_list, ip4_frag) ip4f_list; /* IPv4 fragment cache */ 1832 1833 #define IP4F_TABSIZE 16 /* IPv4 fragment cache size */ 1834 1835 1836 static void 1837 ip4f_cache(ip, fin) 1838 struct ip *ip; 1839 struct flowinfo_in *fin; 1840 { 1841 struct ip4_frag *fp; 1842 1843 if (TAILQ_EMPTY(&ip4f_list)) { 1844 /* first time call, allocate fragment cache entries. */ 1845 if (ip4f_init() < 0) 1846 /* allocation failed! */ 1847 return; 1848 } 1849 1850 fp = ip4f_alloc(); 1851 fp->ip4f_id = ip->ip_id; 1852 fp->ip4f_info.fi_proto = ip->ip_p; 1853 fp->ip4f_info.fi_src.s_addr = ip->ip_src.s_addr; 1854 fp->ip4f_info.fi_dst.s_addr = ip->ip_dst.s_addr; 1855 1856 /* save port numbers */ 1857 fp->ip4f_info.fi_sport = fin->fi_sport; 1858 fp->ip4f_info.fi_dport = fin->fi_dport; 1859 fp->ip4f_info.fi_gpi = fin->fi_gpi; 1860 } 1861 1862 static int 1863 ip4f_lookup(ip, fin) 1864 struct ip *ip; 1865 struct flowinfo_in *fin; 1866 { 1867 struct ip4_frag *fp; 1868 1869 for (fp = TAILQ_FIRST(&ip4f_list); fp != NULL && fp->ip4f_valid; 1870 fp = TAILQ_NEXT(fp, ip4f_chain)) 1871 if (ip->ip_id == fp->ip4f_id && 1872 ip->ip_src.s_addr == fp->ip4f_info.fi_src.s_addr && 1873 ip->ip_dst.s_addr == fp->ip4f_info.fi_dst.s_addr && 1874 ip->ip_p == fp->ip4f_info.fi_proto) { 1875 1876 /* found the matching entry */ 1877 fin->fi_sport = fp->ip4f_info.fi_sport; 1878 fin->fi_dport = fp->ip4f_info.fi_dport; 1879 fin->fi_gpi = fp->ip4f_info.fi_gpi; 1880 1881 if ((ntohs(ip->ip_off) & IP_MF) == 0) 1882 /* this is the last fragment, 1883 release the entry. */ 1884 ip4f_free(fp); 1885 1886 return (1); 1887 } 1888 1889 /* no matching entry found */ 1890 return (0); 1891 } 1892 1893 static int 1894 ip4f_init(void) 1895 { 1896 struct ip4_frag *fp; 1897 int i; 1898 1899 TAILQ_INIT(&ip4f_list); 1900 for (i=0; i<IP4F_TABSIZE; i++) { 1901 fp = malloc(sizeof(struct ip4_frag), 1902 M_DEVBUF, M_NOWAIT); 1903 if (fp == NULL) { 1904 printf("ip4f_init: can't alloc %dth entry!\n", i); 1905 if (i == 0) 1906 return (-1); 1907 return (0); 1908 } 1909 fp->ip4f_valid = 0; 1910 TAILQ_INSERT_TAIL(&ip4f_list, fp, ip4f_chain); 1911 } 1912 return (0); 1913 } 1914 1915 static struct ip4_frag * 1916 ip4f_alloc(void) 1917 { 1918 struct ip4_frag *fp; 1919 1920 /* reclaim an entry at the tail, put it at the head */ 1921 fp = TAILQ_LAST(&ip4f_list, ip4f_list); 1922 TAILQ_REMOVE(&ip4f_list, fp, ip4f_chain); 1923 fp->ip4f_valid = 1; 1924 TAILQ_INSERT_HEAD(&ip4f_list, fp, ip4f_chain); 1925 return (fp); 1926 } 1927 1928 static void 1929 ip4f_free(fp) 1930 struct ip4_frag *fp; 1931 { 1932 TAILQ_REMOVE(&ip4f_list, fp, ip4f_chain); 1933 fp->ip4f_valid = 0; 1934 TAILQ_INSERT_TAIL(&ip4f_list, fp, ip4f_chain); 1935 } 1936 1937 #endif /* ALTQ3_CLFIER_COMPAT */ 1938