1 /*- 2 * Copyright (C) 1997-2003 3 * Sony Computer Science Laboratories Inc. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 * 26 * $KAME: altq_subr.c,v 1.21 2003/11/06 06:32:53 kjc Exp $ 27 * $FreeBSD$ 28 */ 29 30 #include "opt_altq.h" 31 #include "opt_inet.h" 32 #include "opt_inet6.h" 33 34 #include <sys/param.h> 35 #include <sys/malloc.h> 36 #include <sys/mbuf.h> 37 #include <sys/systm.h> 38 #include <sys/proc.h> 39 #include <sys/socket.h> 40 #include <sys/socketvar.h> 41 #include <sys/kernel.h> 42 #include <sys/errno.h> 43 #include <sys/syslog.h> 44 #include <sys/sysctl.h> 45 #include <sys/queue.h> 46 47 #include <net/if.h> 48 #include <net/if_var.h> 49 #include <net/if_dl.h> 50 #include <net/if_types.h> 51 #include <net/vnet.h> 52 53 #include <netinet/in.h> 54 #include <netinet/in_systm.h> 55 #include <netinet/ip.h> 56 #ifdef INET6 57 #include <netinet/ip6.h> 58 #endif 59 #include <netinet/tcp.h> 60 #include <netinet/udp.h> 61 62 #include <netpfil/pf/pf.h> 63 #include <netpfil/pf/pf_altq.h> 64 #include <net/altq/altq.h> 65 66 /* machine dependent clock related includes */ 67 #include <sys/bus.h> 68 #include <sys/cpu.h> 69 #include <sys/eventhandler.h> 70 #include <machine/clock.h> 71 #if defined(__amd64__) || defined(__i386__) 72 #include <machine/cpufunc.h> /* for pentium tsc */ 73 #include <machine/specialreg.h> /* for CPUID_TSC */ 74 #include <machine/md_var.h> /* for cpu_feature */ 75 #endif /* __amd64 || __i386__ */ 76 77 /* 78 * internal function prototypes 79 */ 80 static void tbr_timeout(void *); 81 int (*altq_input)(struct mbuf *, int) = NULL; 82 static struct mbuf *tbr_dequeue(struct ifaltq *, int); 83 static int tbr_timer = 0; /* token bucket regulator timer */ 84 #if !defined(__FreeBSD__) || (__FreeBSD_version < 600000) 85 static struct callout tbr_callout = CALLOUT_INITIALIZER; 86 #else 87 static struct callout tbr_callout; 88 #endif 89 90 #ifdef ALTQ3_CLFIER_COMPAT 91 static int extract_ports4(struct mbuf *, struct ip *, struct flowinfo_in *); 92 #ifdef INET6 93 static int extract_ports6(struct mbuf *, struct ip6_hdr *, 94 struct flowinfo_in6 *); 95 #endif 96 static int apply_filter4(u_int32_t, struct flow_filter *, 97 struct flowinfo_in *); 98 static int apply_ppfilter4(u_int32_t, struct flow_filter *, 99 struct flowinfo_in *); 100 #ifdef INET6 101 static int apply_filter6(u_int32_t, struct flow_filter6 *, 102 struct flowinfo_in6 *); 103 #endif 104 static int apply_tosfilter4(u_int32_t, struct flow_filter *, 105 struct flowinfo_in *); 106 static u_long get_filt_handle(struct acc_classifier *, int); 107 static struct acc_filter *filth_to_filtp(struct acc_classifier *, u_long); 108 static u_int32_t filt2fibmask(struct flow_filter *); 109 110 static void ip4f_cache(struct ip *, struct flowinfo_in *); 111 static int ip4f_lookup(struct ip *, struct flowinfo_in *); 112 static int ip4f_init(void); 113 static struct ip4_frag *ip4f_alloc(void); 114 static void ip4f_free(struct ip4_frag *); 115 #endif /* ALTQ3_CLFIER_COMPAT */ 116 117 /* 118 * alternate queueing support routines 119 */ 120 121 /* look up the queue state by the interface name and the queueing type. */ 122 void * 123 altq_lookup(name, type) 124 char *name; 125 int type; 126 { 127 struct ifnet *ifp; 128 129 if ((ifp = ifunit(name)) != NULL) { 130 /* read if_snd unlocked */ 131 if (type != ALTQT_NONE && ifp->if_snd.altq_type == type) 132 return (ifp->if_snd.altq_disc); 133 } 134 135 return NULL; 136 } 137 138 int 139 altq_attach(ifq, type, discipline, enqueue, dequeue, request, clfier, classify) 140 struct ifaltq *ifq; 141 int type; 142 void *discipline; 143 int (*enqueue)(struct ifaltq *, struct mbuf *, struct altq_pktattr *); 144 struct mbuf *(*dequeue)(struct ifaltq *, int); 145 int (*request)(struct ifaltq *, int, void *); 146 void *clfier; 147 void *(*classify)(void *, struct mbuf *, int); 148 { 149 IFQ_LOCK(ifq); 150 if (!ALTQ_IS_READY(ifq)) { 151 IFQ_UNLOCK(ifq); 152 return ENXIO; 153 } 154 155 ifq->altq_type = type; 156 ifq->altq_disc = discipline; 157 ifq->altq_enqueue = enqueue; 158 ifq->altq_dequeue = dequeue; 159 ifq->altq_request = request; 160 ifq->altq_clfier = clfier; 161 ifq->altq_classify = classify; 162 ifq->altq_flags &= (ALTQF_CANTCHANGE|ALTQF_ENABLED); 163 IFQ_UNLOCK(ifq); 164 return 0; 165 } 166 167 int 168 altq_detach(ifq) 169 struct ifaltq *ifq; 170 { 171 IFQ_LOCK(ifq); 172 173 if (!ALTQ_IS_READY(ifq)) { 174 IFQ_UNLOCK(ifq); 175 return ENXIO; 176 } 177 if (ALTQ_IS_ENABLED(ifq)) { 178 IFQ_UNLOCK(ifq); 179 return EBUSY; 180 } 181 if (!ALTQ_IS_ATTACHED(ifq)) { 182 IFQ_UNLOCK(ifq); 183 return (0); 184 } 185 186 ifq->altq_type = ALTQT_NONE; 187 ifq->altq_disc = NULL; 188 ifq->altq_enqueue = NULL; 189 ifq->altq_dequeue = NULL; 190 ifq->altq_request = NULL; 191 ifq->altq_clfier = NULL; 192 ifq->altq_classify = NULL; 193 ifq->altq_flags &= ALTQF_CANTCHANGE; 194 195 IFQ_UNLOCK(ifq); 196 return 0; 197 } 198 199 int 200 altq_enable(ifq) 201 struct ifaltq *ifq; 202 { 203 int s; 204 205 IFQ_LOCK(ifq); 206 207 if (!ALTQ_IS_READY(ifq)) { 208 IFQ_UNLOCK(ifq); 209 return ENXIO; 210 } 211 if (ALTQ_IS_ENABLED(ifq)) { 212 IFQ_UNLOCK(ifq); 213 return 0; 214 } 215 216 s = splnet(); 217 IFQ_PURGE_NOLOCK(ifq); 218 ASSERT(ifq->ifq_len == 0); 219 ifq->ifq_drv_maxlen = 0; /* disable bulk dequeue */ 220 ifq->altq_flags |= ALTQF_ENABLED; 221 if (ifq->altq_clfier != NULL) 222 ifq->altq_flags |= ALTQF_CLASSIFY; 223 splx(s); 224 225 IFQ_UNLOCK(ifq); 226 return 0; 227 } 228 229 int 230 altq_disable(ifq) 231 struct ifaltq *ifq; 232 { 233 int s; 234 235 IFQ_LOCK(ifq); 236 if (!ALTQ_IS_ENABLED(ifq)) { 237 IFQ_UNLOCK(ifq); 238 return 0; 239 } 240 241 s = splnet(); 242 IFQ_PURGE_NOLOCK(ifq); 243 ASSERT(ifq->ifq_len == 0); 244 ifq->altq_flags &= ~(ALTQF_ENABLED|ALTQF_CLASSIFY); 245 splx(s); 246 247 IFQ_UNLOCK(ifq); 248 return 0; 249 } 250 251 #ifdef ALTQ_DEBUG 252 void 253 altq_assert(file, line, failedexpr) 254 const char *file, *failedexpr; 255 int line; 256 { 257 (void)printf("altq assertion \"%s\" failed: file \"%s\", line %d\n", 258 failedexpr, file, line); 259 panic("altq assertion"); 260 /* NOTREACHED */ 261 } 262 #endif 263 264 /* 265 * internal representation of token bucket parameters 266 * rate: (byte_per_unittime << TBR_SHIFT) / machclk_freq 267 * (((bits_per_sec) / 8) << TBR_SHIFT) / machclk_freq 268 * depth: byte << TBR_SHIFT 269 * 270 */ 271 #define TBR_SHIFT 29 272 #define TBR_SCALE(x) ((int64_t)(x) << TBR_SHIFT) 273 #define TBR_UNSCALE(x) ((x) >> TBR_SHIFT) 274 275 static struct mbuf * 276 tbr_dequeue(ifq, op) 277 struct ifaltq *ifq; 278 int op; 279 { 280 struct tb_regulator *tbr; 281 struct mbuf *m; 282 int64_t interval; 283 u_int64_t now; 284 285 IFQ_LOCK_ASSERT(ifq); 286 tbr = ifq->altq_tbr; 287 if (op == ALTDQ_REMOVE && tbr->tbr_lastop == ALTDQ_POLL) { 288 /* if this is a remove after poll, bypass tbr check */ 289 } else { 290 /* update token only when it is negative */ 291 if (tbr->tbr_token <= 0) { 292 now = read_machclk(); 293 interval = now - tbr->tbr_last; 294 if (interval >= tbr->tbr_filluptime) 295 tbr->tbr_token = tbr->tbr_depth; 296 else { 297 tbr->tbr_token += interval * tbr->tbr_rate; 298 if (tbr->tbr_token > tbr->tbr_depth) 299 tbr->tbr_token = tbr->tbr_depth; 300 } 301 tbr->tbr_last = now; 302 } 303 /* if token is still negative, don't allow dequeue */ 304 if (tbr->tbr_token <= 0) 305 return (NULL); 306 } 307 308 if (ALTQ_IS_ENABLED(ifq)) 309 m = (*ifq->altq_dequeue)(ifq, op); 310 else { 311 if (op == ALTDQ_POLL) 312 _IF_POLL(ifq, m); 313 else 314 _IF_DEQUEUE(ifq, m); 315 } 316 317 if (m != NULL && op == ALTDQ_REMOVE) 318 tbr->tbr_token -= TBR_SCALE(m_pktlen(m)); 319 tbr->tbr_lastop = op; 320 return (m); 321 } 322 323 /* 324 * set a token bucket regulator. 325 * if the specified rate is zero, the token bucket regulator is deleted. 326 */ 327 int 328 tbr_set(ifq, profile) 329 struct ifaltq *ifq; 330 struct tb_profile *profile; 331 { 332 struct tb_regulator *tbr, *otbr; 333 334 if (tbr_dequeue_ptr == NULL) 335 tbr_dequeue_ptr = tbr_dequeue; 336 337 if (machclk_freq == 0) 338 init_machclk(); 339 if (machclk_freq == 0) { 340 printf("tbr_set: no cpu clock available!\n"); 341 return (ENXIO); 342 } 343 344 IFQ_LOCK(ifq); 345 if (profile->rate == 0) { 346 /* delete this tbr */ 347 if ((tbr = ifq->altq_tbr) == NULL) { 348 IFQ_UNLOCK(ifq); 349 return (ENOENT); 350 } 351 ifq->altq_tbr = NULL; 352 free(tbr, M_DEVBUF); 353 IFQ_UNLOCK(ifq); 354 return (0); 355 } 356 357 tbr = malloc(sizeof(struct tb_regulator), M_DEVBUF, M_NOWAIT | M_ZERO); 358 if (tbr == NULL) { 359 IFQ_UNLOCK(ifq); 360 return (ENOMEM); 361 } 362 363 tbr->tbr_rate = TBR_SCALE(profile->rate / 8) / machclk_freq; 364 tbr->tbr_depth = TBR_SCALE(profile->depth); 365 if (tbr->tbr_rate > 0) 366 tbr->tbr_filluptime = tbr->tbr_depth / tbr->tbr_rate; 367 else 368 tbr->tbr_filluptime = LLONG_MAX; 369 /* 370 * The longest time between tbr_dequeue() calls will be about 1 371 * system tick, as the callout that drives it is scheduled once per 372 * tick. The refill-time detection logic in tbr_dequeue() can only 373 * properly detect the passage of up to LLONG_MAX machclk ticks. 374 * Therefore, in order for this logic to function properly in the 375 * extreme case, the maximum value of tbr_filluptime should be 376 * LLONG_MAX less one system tick's worth of machclk ticks less 377 * some additional slop factor (here one more system tick's worth 378 * of machclk ticks). 379 */ 380 if (tbr->tbr_filluptime > (LLONG_MAX - 2 * machclk_per_tick)) 381 tbr->tbr_filluptime = LLONG_MAX - 2 * machclk_per_tick; 382 tbr->tbr_token = tbr->tbr_depth; 383 tbr->tbr_last = read_machclk(); 384 tbr->tbr_lastop = ALTDQ_REMOVE; 385 386 otbr = ifq->altq_tbr; 387 ifq->altq_tbr = tbr; /* set the new tbr */ 388 389 if (otbr != NULL) 390 free(otbr, M_DEVBUF); 391 else { 392 if (tbr_timer == 0) { 393 CALLOUT_RESET(&tbr_callout, 1, tbr_timeout, (void *)0); 394 tbr_timer = 1; 395 } 396 } 397 IFQ_UNLOCK(ifq); 398 return (0); 399 } 400 401 /* 402 * tbr_timeout goes through the interface list, and kicks the drivers 403 * if necessary. 404 * 405 * MPSAFE 406 */ 407 static void 408 tbr_timeout(arg) 409 void *arg; 410 { 411 VNET_ITERATOR_DECL(vnet_iter); 412 struct ifnet *ifp; 413 struct epoch_tracker et; 414 int active; 415 416 active = 0; 417 NET_EPOCH_ENTER(et); 418 VNET_LIST_RLOCK_NOSLEEP(); 419 VNET_FOREACH(vnet_iter) { 420 CURVNET_SET(vnet_iter); 421 for (ifp = CK_STAILQ_FIRST(&V_ifnet); ifp; 422 ifp = CK_STAILQ_NEXT(ifp, if_link)) { 423 /* read from if_snd unlocked */ 424 if (!TBR_IS_ENABLED(&ifp->if_snd)) 425 continue; 426 active++; 427 if (!IFQ_IS_EMPTY(&ifp->if_snd) && 428 ifp->if_start != NULL) 429 (*ifp->if_start)(ifp); 430 } 431 CURVNET_RESTORE(); 432 } 433 VNET_LIST_RUNLOCK_NOSLEEP(); 434 NET_EPOCH_EXIT(et); 435 if (active > 0) 436 CALLOUT_RESET(&tbr_callout, 1, tbr_timeout, (void *)0); 437 else 438 tbr_timer = 0; /* don't need tbr_timer anymore */ 439 } 440 441 /* 442 * attach a discipline to the interface. if one already exists, it is 443 * overridden. 444 * Locking is done in the discipline specific attach functions. Basically 445 * they call back to altq_attach which takes care of the attach and locking. 446 */ 447 int 448 altq_pfattach(struct pf_altq *a) 449 { 450 int error = 0; 451 452 switch (a->scheduler) { 453 case ALTQT_NONE: 454 break; 455 #ifdef ALTQ_CBQ 456 case ALTQT_CBQ: 457 error = cbq_pfattach(a); 458 break; 459 #endif 460 #ifdef ALTQ_PRIQ 461 case ALTQT_PRIQ: 462 error = priq_pfattach(a); 463 break; 464 #endif 465 #ifdef ALTQ_HFSC 466 case ALTQT_HFSC: 467 error = hfsc_pfattach(a); 468 break; 469 #endif 470 #ifdef ALTQ_FAIRQ 471 case ALTQT_FAIRQ: 472 error = fairq_pfattach(a); 473 break; 474 #endif 475 #ifdef ALTQ_CODEL 476 case ALTQT_CODEL: 477 error = codel_pfattach(a); 478 break; 479 #endif 480 default: 481 error = ENXIO; 482 } 483 484 return (error); 485 } 486 487 /* 488 * detach a discipline from the interface. 489 * it is possible that the discipline was already overridden by another 490 * discipline. 491 */ 492 int 493 altq_pfdetach(struct pf_altq *a) 494 { 495 struct ifnet *ifp; 496 int s, error = 0; 497 498 if ((ifp = ifunit(a->ifname)) == NULL) 499 return (EINVAL); 500 501 /* if this discipline is no longer referenced, just return */ 502 /* read unlocked from if_snd */ 503 if (a->altq_disc == NULL || a->altq_disc != ifp->if_snd.altq_disc) 504 return (0); 505 506 s = splnet(); 507 /* read unlocked from if_snd, _disable and _detach take care */ 508 if (ALTQ_IS_ENABLED(&ifp->if_snd)) 509 error = altq_disable(&ifp->if_snd); 510 if (error == 0) 511 error = altq_detach(&ifp->if_snd); 512 splx(s); 513 514 return (error); 515 } 516 517 /* 518 * add a discipline or a queue 519 * Locking is done in the discipline specific functions with regards to 520 * malloc with WAITOK, also it is not yet clear which lock to use. 521 */ 522 int 523 altq_add(struct ifnet *ifp, struct pf_altq *a) 524 { 525 int error = 0; 526 527 if (a->qname[0] != 0) 528 return (altq_add_queue(a)); 529 530 if (machclk_freq == 0) 531 init_machclk(); 532 if (machclk_freq == 0) 533 panic("altq_add: no cpu clock"); 534 535 switch (a->scheduler) { 536 #ifdef ALTQ_CBQ 537 case ALTQT_CBQ: 538 error = cbq_add_altq(ifp, a); 539 break; 540 #endif 541 #ifdef ALTQ_PRIQ 542 case ALTQT_PRIQ: 543 error = priq_add_altq(ifp, a); 544 break; 545 #endif 546 #ifdef ALTQ_HFSC 547 case ALTQT_HFSC: 548 error = hfsc_add_altq(ifp, a); 549 break; 550 #endif 551 #ifdef ALTQ_FAIRQ 552 case ALTQT_FAIRQ: 553 error = fairq_add_altq(ifp, a); 554 break; 555 #endif 556 #ifdef ALTQ_CODEL 557 case ALTQT_CODEL: 558 error = codel_add_altq(ifp, a); 559 break; 560 #endif 561 default: 562 error = ENXIO; 563 } 564 565 return (error); 566 } 567 568 /* 569 * remove a discipline or a queue 570 * It is yet unclear what lock to use to protect this operation, the 571 * discipline specific functions will determine and grab it 572 */ 573 int 574 altq_remove(struct pf_altq *a) 575 { 576 int error = 0; 577 578 if (a->qname[0] != 0) 579 return (altq_remove_queue(a)); 580 581 switch (a->scheduler) { 582 #ifdef ALTQ_CBQ 583 case ALTQT_CBQ: 584 error = cbq_remove_altq(a); 585 break; 586 #endif 587 #ifdef ALTQ_PRIQ 588 case ALTQT_PRIQ: 589 error = priq_remove_altq(a); 590 break; 591 #endif 592 #ifdef ALTQ_HFSC 593 case ALTQT_HFSC: 594 error = hfsc_remove_altq(a); 595 break; 596 #endif 597 #ifdef ALTQ_FAIRQ 598 case ALTQT_FAIRQ: 599 error = fairq_remove_altq(a); 600 break; 601 #endif 602 #ifdef ALTQ_CODEL 603 case ALTQT_CODEL: 604 error = codel_remove_altq(a); 605 break; 606 #endif 607 default: 608 error = ENXIO; 609 } 610 611 return (error); 612 } 613 614 /* 615 * add a queue to the discipline 616 * It is yet unclear what lock to use to protect this operation, the 617 * discipline specific functions will determine and grab it 618 */ 619 int 620 altq_add_queue(struct pf_altq *a) 621 { 622 int error = 0; 623 624 switch (a->scheduler) { 625 #ifdef ALTQ_CBQ 626 case ALTQT_CBQ: 627 error = cbq_add_queue(a); 628 break; 629 #endif 630 #ifdef ALTQ_PRIQ 631 case ALTQT_PRIQ: 632 error = priq_add_queue(a); 633 break; 634 #endif 635 #ifdef ALTQ_HFSC 636 case ALTQT_HFSC: 637 error = hfsc_add_queue(a); 638 break; 639 #endif 640 #ifdef ALTQ_FAIRQ 641 case ALTQT_FAIRQ: 642 error = fairq_add_queue(a); 643 break; 644 #endif 645 default: 646 error = ENXIO; 647 } 648 649 return (error); 650 } 651 652 /* 653 * remove a queue from the discipline 654 * It is yet unclear what lock to use to protect this operation, the 655 * discipline specific functions will determine and grab it 656 */ 657 int 658 altq_remove_queue(struct pf_altq *a) 659 { 660 int error = 0; 661 662 switch (a->scheduler) { 663 #ifdef ALTQ_CBQ 664 case ALTQT_CBQ: 665 error = cbq_remove_queue(a); 666 break; 667 #endif 668 #ifdef ALTQ_PRIQ 669 case ALTQT_PRIQ: 670 error = priq_remove_queue(a); 671 break; 672 #endif 673 #ifdef ALTQ_HFSC 674 case ALTQT_HFSC: 675 error = hfsc_remove_queue(a); 676 break; 677 #endif 678 #ifdef ALTQ_FAIRQ 679 case ALTQT_FAIRQ: 680 error = fairq_remove_queue(a); 681 break; 682 #endif 683 default: 684 error = ENXIO; 685 } 686 687 return (error); 688 } 689 690 /* 691 * get queue statistics 692 * Locking is done in the discipline specific functions with regards to 693 * copyout operations, also it is not yet clear which lock to use. 694 */ 695 int 696 altq_getqstats(struct pf_altq *a, void *ubuf, int *nbytes, int version) 697 { 698 int error = 0; 699 700 switch (a->scheduler) { 701 #ifdef ALTQ_CBQ 702 case ALTQT_CBQ: 703 error = cbq_getqstats(a, ubuf, nbytes, version); 704 break; 705 #endif 706 #ifdef ALTQ_PRIQ 707 case ALTQT_PRIQ: 708 error = priq_getqstats(a, ubuf, nbytes, version); 709 break; 710 #endif 711 #ifdef ALTQ_HFSC 712 case ALTQT_HFSC: 713 error = hfsc_getqstats(a, ubuf, nbytes, version); 714 break; 715 #endif 716 #ifdef ALTQ_FAIRQ 717 case ALTQT_FAIRQ: 718 error = fairq_getqstats(a, ubuf, nbytes, version); 719 break; 720 #endif 721 #ifdef ALTQ_CODEL 722 case ALTQT_CODEL: 723 error = codel_getqstats(a, ubuf, nbytes, version); 724 break; 725 #endif 726 default: 727 error = ENXIO; 728 } 729 730 return (error); 731 } 732 733 /* 734 * read and write diffserv field in IPv4 or IPv6 header 735 */ 736 u_int8_t 737 read_dsfield(m, pktattr) 738 struct mbuf *m; 739 struct altq_pktattr *pktattr; 740 { 741 struct mbuf *m0; 742 u_int8_t ds_field = 0; 743 744 if (pktattr == NULL || 745 (pktattr->pattr_af != AF_INET && pktattr->pattr_af != AF_INET6)) 746 return ((u_int8_t)0); 747 748 /* verify that pattr_hdr is within the mbuf data */ 749 for (m0 = m; m0 != NULL; m0 = m0->m_next) 750 if ((pktattr->pattr_hdr >= m0->m_data) && 751 (pktattr->pattr_hdr < m0->m_data + m0->m_len)) 752 break; 753 if (m0 == NULL) { 754 /* ick, pattr_hdr is stale */ 755 pktattr->pattr_af = AF_UNSPEC; 756 #ifdef ALTQ_DEBUG 757 printf("read_dsfield: can't locate header!\n"); 758 #endif 759 return ((u_int8_t)0); 760 } 761 762 if (pktattr->pattr_af == AF_INET) { 763 struct ip *ip = (struct ip *)pktattr->pattr_hdr; 764 765 if (ip->ip_v != 4) 766 return ((u_int8_t)0); /* version mismatch! */ 767 ds_field = ip->ip_tos; 768 } 769 #ifdef INET6 770 else if (pktattr->pattr_af == AF_INET6) { 771 struct ip6_hdr *ip6 = (struct ip6_hdr *)pktattr->pattr_hdr; 772 u_int32_t flowlabel; 773 774 flowlabel = ntohl(ip6->ip6_flow); 775 if ((flowlabel >> 28) != 6) 776 return ((u_int8_t)0); /* version mismatch! */ 777 ds_field = (flowlabel >> 20) & 0xff; 778 } 779 #endif 780 return (ds_field); 781 } 782 783 void 784 write_dsfield(struct mbuf *m, struct altq_pktattr *pktattr, u_int8_t dsfield) 785 { 786 struct mbuf *m0; 787 788 if (pktattr == NULL || 789 (pktattr->pattr_af != AF_INET && pktattr->pattr_af != AF_INET6)) 790 return; 791 792 /* verify that pattr_hdr is within the mbuf data */ 793 for (m0 = m; m0 != NULL; m0 = m0->m_next) 794 if ((pktattr->pattr_hdr >= m0->m_data) && 795 (pktattr->pattr_hdr < m0->m_data + m0->m_len)) 796 break; 797 if (m0 == NULL) { 798 /* ick, pattr_hdr is stale */ 799 pktattr->pattr_af = AF_UNSPEC; 800 #ifdef ALTQ_DEBUG 801 printf("write_dsfield: can't locate header!\n"); 802 #endif 803 return; 804 } 805 806 if (pktattr->pattr_af == AF_INET) { 807 struct ip *ip = (struct ip *)pktattr->pattr_hdr; 808 u_int8_t old; 809 int32_t sum; 810 811 if (ip->ip_v != 4) 812 return; /* version mismatch! */ 813 old = ip->ip_tos; 814 dsfield |= old & 3; /* leave CU bits */ 815 if (old == dsfield) 816 return; 817 ip->ip_tos = dsfield; 818 /* 819 * update checksum (from RFC1624) 820 * HC' = ~(~HC + ~m + m') 821 */ 822 sum = ~ntohs(ip->ip_sum) & 0xffff; 823 sum += 0xff00 + (~old & 0xff) + dsfield; 824 sum = (sum >> 16) + (sum & 0xffff); 825 sum += (sum >> 16); /* add carry */ 826 827 ip->ip_sum = htons(~sum & 0xffff); 828 } 829 #ifdef INET6 830 else if (pktattr->pattr_af == AF_INET6) { 831 struct ip6_hdr *ip6 = (struct ip6_hdr *)pktattr->pattr_hdr; 832 u_int32_t flowlabel; 833 834 flowlabel = ntohl(ip6->ip6_flow); 835 if ((flowlabel >> 28) != 6) 836 return; /* version mismatch! */ 837 flowlabel = (flowlabel & 0xf03fffff) | (dsfield << 20); 838 ip6->ip6_flow = htonl(flowlabel); 839 } 840 #endif 841 return; 842 } 843 844 845 /* 846 * high resolution clock support taking advantage of a machine dependent 847 * high resolution time counter (e.g., timestamp counter of intel pentium). 848 * we assume 849 * - 64-bit-long monotonically-increasing counter 850 * - frequency range is 100M-4GHz (CPU speed) 851 */ 852 /* if pcc is not available or disabled, emulate 256MHz using microtime() */ 853 #define MACHCLK_SHIFT 8 854 855 int machclk_usepcc; 856 u_int32_t machclk_freq; 857 u_int32_t machclk_per_tick; 858 859 #if defined(__i386__) && defined(__NetBSD__) 860 extern u_int64_t cpu_tsc_freq; 861 #endif 862 863 #if (__FreeBSD_version >= 700035) 864 /* Update TSC freq with the value indicated by the caller. */ 865 static void 866 tsc_freq_changed(void *arg, const struct cf_level *level, int status) 867 { 868 /* If there was an error during the transition, don't do anything. */ 869 if (status != 0) 870 return; 871 872 #if (__FreeBSD_version >= 701102) && (defined(__amd64__) || defined(__i386__)) 873 /* If TSC is P-state invariant, don't do anything. */ 874 if (tsc_is_invariant) 875 return; 876 #endif 877 878 /* Total setting for this level gives the new frequency in MHz. */ 879 init_machclk(); 880 } 881 EVENTHANDLER_DEFINE(cpufreq_post_change, tsc_freq_changed, NULL, 882 EVENTHANDLER_PRI_LAST); 883 #endif /* __FreeBSD_version >= 700035 */ 884 885 static void 886 init_machclk_setup(void) 887 { 888 #if (__FreeBSD_version >= 600000) 889 callout_init(&tbr_callout, 0); 890 #endif 891 892 machclk_usepcc = 1; 893 894 #if (!defined(__amd64__) && !defined(__i386__)) || defined(ALTQ_NOPCC) 895 machclk_usepcc = 0; 896 #endif 897 #if defined(__FreeBSD__) && defined(SMP) 898 machclk_usepcc = 0; 899 #endif 900 #if defined(__NetBSD__) && defined(MULTIPROCESSOR) 901 machclk_usepcc = 0; 902 #endif 903 #if defined(__amd64__) || defined(__i386__) 904 /* check if TSC is available */ 905 if ((cpu_feature & CPUID_TSC) == 0 || 906 atomic_load_acq_64(&tsc_freq) == 0) 907 machclk_usepcc = 0; 908 #endif 909 } 910 911 void 912 init_machclk(void) 913 { 914 static int called; 915 916 /* Call one-time initialization function. */ 917 if (!called) { 918 init_machclk_setup(); 919 called = 1; 920 } 921 922 if (machclk_usepcc == 0) { 923 /* emulate 256MHz using microtime() */ 924 machclk_freq = 1000000 << MACHCLK_SHIFT; 925 machclk_per_tick = machclk_freq / hz; 926 #ifdef ALTQ_DEBUG 927 printf("altq: emulate %uHz cpu clock\n", machclk_freq); 928 #endif 929 return; 930 } 931 932 /* 933 * if the clock frequency (of Pentium TSC or Alpha PCC) is 934 * accessible, just use it. 935 */ 936 #if defined(__amd64__) || defined(__i386__) 937 machclk_freq = atomic_load_acq_64(&tsc_freq); 938 #endif 939 940 /* 941 * if we don't know the clock frequency, measure it. 942 */ 943 if (machclk_freq == 0) { 944 static int wait; 945 struct timeval tv_start, tv_end; 946 u_int64_t start, end, diff; 947 int timo; 948 949 microtime(&tv_start); 950 start = read_machclk(); 951 timo = hz; /* 1 sec */ 952 (void)tsleep(&wait, PWAIT | PCATCH, "init_machclk", timo); 953 microtime(&tv_end); 954 end = read_machclk(); 955 diff = (u_int64_t)(tv_end.tv_sec - tv_start.tv_sec) * 1000000 956 + tv_end.tv_usec - tv_start.tv_usec; 957 if (diff != 0) 958 machclk_freq = (u_int)((end - start) * 1000000 / diff); 959 } 960 961 machclk_per_tick = machclk_freq / hz; 962 963 #ifdef ALTQ_DEBUG 964 printf("altq: CPU clock: %uHz\n", machclk_freq); 965 #endif 966 } 967 968 #if defined(__OpenBSD__) && defined(__i386__) 969 static __inline u_int64_t 970 rdtsc(void) 971 { 972 u_int64_t rv; 973 __asm __volatile(".byte 0x0f, 0x31" : "=A" (rv)); 974 return (rv); 975 } 976 #endif /* __OpenBSD__ && __i386__ */ 977 978 u_int64_t 979 read_machclk(void) 980 { 981 u_int64_t val; 982 983 if (machclk_usepcc) { 984 #if defined(__amd64__) || defined(__i386__) 985 val = rdtsc(); 986 #else 987 panic("read_machclk"); 988 #endif 989 } else { 990 struct timeval tv, boottime; 991 992 microtime(&tv); 993 getboottime(&boottime); 994 val = (((u_int64_t)(tv.tv_sec - boottime.tv_sec) * 1000000 995 + tv.tv_usec) << MACHCLK_SHIFT); 996 } 997 return (val); 998 } 999 1000 #ifdef ALTQ3_CLFIER_COMPAT 1001 1002 #ifndef IPPROTO_ESP 1003 #define IPPROTO_ESP 50 /* encapsulating security payload */ 1004 #endif 1005 #ifndef IPPROTO_AH 1006 #define IPPROTO_AH 51 /* authentication header */ 1007 #endif 1008 1009 /* 1010 * extract flow information from a given packet. 1011 * filt_mask shows flowinfo fields required. 1012 * we assume the ip header is in one mbuf, and addresses and ports are 1013 * in network byte order. 1014 */ 1015 int 1016 altq_extractflow(m, af, flow, filt_bmask) 1017 struct mbuf *m; 1018 int af; 1019 struct flowinfo *flow; 1020 u_int32_t filt_bmask; 1021 { 1022 1023 switch (af) { 1024 case PF_INET: { 1025 struct flowinfo_in *fin; 1026 struct ip *ip; 1027 1028 ip = mtod(m, struct ip *); 1029 1030 if (ip->ip_v != 4) 1031 break; 1032 1033 fin = (struct flowinfo_in *)flow; 1034 fin->fi_len = sizeof(struct flowinfo_in); 1035 fin->fi_family = AF_INET; 1036 1037 fin->fi_proto = ip->ip_p; 1038 fin->fi_tos = ip->ip_tos; 1039 1040 fin->fi_src.s_addr = ip->ip_src.s_addr; 1041 fin->fi_dst.s_addr = ip->ip_dst.s_addr; 1042 1043 if (filt_bmask & FIMB4_PORTS) 1044 /* if port info is required, extract port numbers */ 1045 extract_ports4(m, ip, fin); 1046 else { 1047 fin->fi_sport = 0; 1048 fin->fi_dport = 0; 1049 fin->fi_gpi = 0; 1050 } 1051 return (1); 1052 } 1053 1054 #ifdef INET6 1055 case PF_INET6: { 1056 struct flowinfo_in6 *fin6; 1057 struct ip6_hdr *ip6; 1058 1059 ip6 = mtod(m, struct ip6_hdr *); 1060 /* should we check the ip version? */ 1061 1062 fin6 = (struct flowinfo_in6 *)flow; 1063 fin6->fi6_len = sizeof(struct flowinfo_in6); 1064 fin6->fi6_family = AF_INET6; 1065 1066 fin6->fi6_proto = ip6->ip6_nxt; 1067 fin6->fi6_tclass = (ntohl(ip6->ip6_flow) >> 20) & 0xff; 1068 1069 fin6->fi6_flowlabel = ip6->ip6_flow & htonl(0x000fffff); 1070 fin6->fi6_src = ip6->ip6_src; 1071 fin6->fi6_dst = ip6->ip6_dst; 1072 1073 if ((filt_bmask & FIMB6_PORTS) || 1074 ((filt_bmask & FIMB6_PROTO) 1075 && ip6->ip6_nxt > IPPROTO_IPV6)) 1076 /* 1077 * if port info is required, or proto is required 1078 * but there are option headers, extract port 1079 * and protocol numbers. 1080 */ 1081 extract_ports6(m, ip6, fin6); 1082 else { 1083 fin6->fi6_sport = 0; 1084 fin6->fi6_dport = 0; 1085 fin6->fi6_gpi = 0; 1086 } 1087 return (1); 1088 } 1089 #endif /* INET6 */ 1090 1091 default: 1092 break; 1093 } 1094 1095 /* failed */ 1096 flow->fi_len = sizeof(struct flowinfo); 1097 flow->fi_family = AF_UNSPEC; 1098 return (0); 1099 } 1100 1101 /* 1102 * helper routine to extract port numbers 1103 */ 1104 /* structure for ipsec and ipv6 option header template */ 1105 struct _opt6 { 1106 u_int8_t opt6_nxt; /* next header */ 1107 u_int8_t opt6_hlen; /* header extension length */ 1108 u_int16_t _pad; 1109 u_int32_t ah_spi; /* security parameter index 1110 for authentication header */ 1111 }; 1112 1113 /* 1114 * extract port numbers from a ipv4 packet. 1115 */ 1116 static int 1117 extract_ports4(m, ip, fin) 1118 struct mbuf *m; 1119 struct ip *ip; 1120 struct flowinfo_in *fin; 1121 { 1122 struct mbuf *m0; 1123 u_short ip_off; 1124 u_int8_t proto; 1125 int off; 1126 1127 fin->fi_sport = 0; 1128 fin->fi_dport = 0; 1129 fin->fi_gpi = 0; 1130 1131 ip_off = ntohs(ip->ip_off); 1132 /* if it is a fragment, try cached fragment info */ 1133 if (ip_off & IP_OFFMASK) { 1134 ip4f_lookup(ip, fin); 1135 return (1); 1136 } 1137 1138 /* locate the mbuf containing the protocol header */ 1139 for (m0 = m; m0 != NULL; m0 = m0->m_next) 1140 if (((caddr_t)ip >= m0->m_data) && 1141 ((caddr_t)ip < m0->m_data + m0->m_len)) 1142 break; 1143 if (m0 == NULL) { 1144 #ifdef ALTQ_DEBUG 1145 printf("extract_ports4: can't locate header! ip=%p\n", ip); 1146 #endif 1147 return (0); 1148 } 1149 off = ((caddr_t)ip - m0->m_data) + (ip->ip_hl << 2); 1150 proto = ip->ip_p; 1151 1152 #ifdef ALTQ_IPSEC 1153 again: 1154 #endif 1155 while (off >= m0->m_len) { 1156 off -= m0->m_len; 1157 m0 = m0->m_next; 1158 if (m0 == NULL) 1159 return (0); /* bogus ip_hl! */ 1160 } 1161 if (m0->m_len < off + 4) 1162 return (0); 1163 1164 switch (proto) { 1165 case IPPROTO_TCP: 1166 case IPPROTO_UDP: { 1167 struct udphdr *udp; 1168 1169 udp = (struct udphdr *)(mtod(m0, caddr_t) + off); 1170 fin->fi_sport = udp->uh_sport; 1171 fin->fi_dport = udp->uh_dport; 1172 fin->fi_proto = proto; 1173 } 1174 break; 1175 1176 #ifdef ALTQ_IPSEC 1177 case IPPROTO_ESP: 1178 if (fin->fi_gpi == 0){ 1179 u_int32_t *gpi; 1180 1181 gpi = (u_int32_t *)(mtod(m0, caddr_t) + off); 1182 fin->fi_gpi = *gpi; 1183 } 1184 fin->fi_proto = proto; 1185 break; 1186 1187 case IPPROTO_AH: { 1188 /* get next header and header length */ 1189 struct _opt6 *opt6; 1190 1191 opt6 = (struct _opt6 *)(mtod(m0, caddr_t) + off); 1192 proto = opt6->opt6_nxt; 1193 off += 8 + (opt6->opt6_hlen * 4); 1194 if (fin->fi_gpi == 0 && m0->m_len >= off + 8) 1195 fin->fi_gpi = opt6->ah_spi; 1196 } 1197 /* goto the next header */ 1198 goto again; 1199 #endif /* ALTQ_IPSEC */ 1200 1201 default: 1202 fin->fi_proto = proto; 1203 return (0); 1204 } 1205 1206 /* if this is a first fragment, cache it. */ 1207 if (ip_off & IP_MF) 1208 ip4f_cache(ip, fin); 1209 1210 return (1); 1211 } 1212 1213 #ifdef INET6 1214 static int 1215 extract_ports6(m, ip6, fin6) 1216 struct mbuf *m; 1217 struct ip6_hdr *ip6; 1218 struct flowinfo_in6 *fin6; 1219 { 1220 struct mbuf *m0; 1221 int off; 1222 u_int8_t proto; 1223 1224 fin6->fi6_gpi = 0; 1225 fin6->fi6_sport = 0; 1226 fin6->fi6_dport = 0; 1227 1228 /* locate the mbuf containing the protocol header */ 1229 for (m0 = m; m0 != NULL; m0 = m0->m_next) 1230 if (((caddr_t)ip6 >= m0->m_data) && 1231 ((caddr_t)ip6 < m0->m_data + m0->m_len)) 1232 break; 1233 if (m0 == NULL) { 1234 #ifdef ALTQ_DEBUG 1235 printf("extract_ports6: can't locate header! ip6=%p\n", ip6); 1236 #endif 1237 return (0); 1238 } 1239 off = ((caddr_t)ip6 - m0->m_data) + sizeof(struct ip6_hdr); 1240 1241 proto = ip6->ip6_nxt; 1242 do { 1243 while (off >= m0->m_len) { 1244 off -= m0->m_len; 1245 m0 = m0->m_next; 1246 if (m0 == NULL) 1247 return (0); 1248 } 1249 if (m0->m_len < off + 4) 1250 return (0); 1251 1252 switch (proto) { 1253 case IPPROTO_TCP: 1254 case IPPROTO_UDP: { 1255 struct udphdr *udp; 1256 1257 udp = (struct udphdr *)(mtod(m0, caddr_t) + off); 1258 fin6->fi6_sport = udp->uh_sport; 1259 fin6->fi6_dport = udp->uh_dport; 1260 fin6->fi6_proto = proto; 1261 } 1262 return (1); 1263 1264 case IPPROTO_ESP: 1265 if (fin6->fi6_gpi == 0) { 1266 u_int32_t *gpi; 1267 1268 gpi = (u_int32_t *)(mtod(m0, caddr_t) + off); 1269 fin6->fi6_gpi = *gpi; 1270 } 1271 fin6->fi6_proto = proto; 1272 return (1); 1273 1274 case IPPROTO_AH: { 1275 /* get next header and header length */ 1276 struct _opt6 *opt6; 1277 1278 opt6 = (struct _opt6 *)(mtod(m0, caddr_t) + off); 1279 if (fin6->fi6_gpi == 0 && m0->m_len >= off + 8) 1280 fin6->fi6_gpi = opt6->ah_spi; 1281 proto = opt6->opt6_nxt; 1282 off += 8 + (opt6->opt6_hlen * 4); 1283 /* goto the next header */ 1284 break; 1285 } 1286 1287 case IPPROTO_HOPOPTS: 1288 case IPPROTO_ROUTING: 1289 case IPPROTO_DSTOPTS: { 1290 /* get next header and header length */ 1291 struct _opt6 *opt6; 1292 1293 opt6 = (struct _opt6 *)(mtod(m0, caddr_t) + off); 1294 proto = opt6->opt6_nxt; 1295 off += (opt6->opt6_hlen + 1) * 8; 1296 /* goto the next header */ 1297 break; 1298 } 1299 1300 case IPPROTO_FRAGMENT: 1301 /* ipv6 fragmentations are not supported yet */ 1302 default: 1303 fin6->fi6_proto = proto; 1304 return (0); 1305 } 1306 } while (1); 1307 /*NOTREACHED*/ 1308 } 1309 #endif /* INET6 */ 1310 1311 /* 1312 * altq common classifier 1313 */ 1314 int 1315 acc_add_filter(classifier, filter, class, phandle) 1316 struct acc_classifier *classifier; 1317 struct flow_filter *filter; 1318 void *class; 1319 u_long *phandle; 1320 { 1321 struct acc_filter *afp, *prev, *tmp; 1322 int i, s; 1323 1324 #ifdef INET6 1325 if (filter->ff_flow.fi_family != AF_INET && 1326 filter->ff_flow.fi_family != AF_INET6) 1327 return (EINVAL); 1328 #else 1329 if (filter->ff_flow.fi_family != AF_INET) 1330 return (EINVAL); 1331 #endif 1332 1333 afp = malloc(sizeof(struct acc_filter), 1334 M_DEVBUF, M_WAITOK); 1335 if (afp == NULL) 1336 return (ENOMEM); 1337 bzero(afp, sizeof(struct acc_filter)); 1338 1339 afp->f_filter = *filter; 1340 afp->f_class = class; 1341 1342 i = ACC_WILDCARD_INDEX; 1343 if (filter->ff_flow.fi_family == AF_INET) { 1344 struct flow_filter *filter4 = &afp->f_filter; 1345 1346 /* 1347 * if address is 0, it's a wildcard. if address mask 1348 * isn't set, use full mask. 1349 */ 1350 if (filter4->ff_flow.fi_dst.s_addr == 0) 1351 filter4->ff_mask.mask_dst.s_addr = 0; 1352 else if (filter4->ff_mask.mask_dst.s_addr == 0) 1353 filter4->ff_mask.mask_dst.s_addr = 0xffffffff; 1354 if (filter4->ff_flow.fi_src.s_addr == 0) 1355 filter4->ff_mask.mask_src.s_addr = 0; 1356 else if (filter4->ff_mask.mask_src.s_addr == 0) 1357 filter4->ff_mask.mask_src.s_addr = 0xffffffff; 1358 1359 /* clear extra bits in addresses */ 1360 filter4->ff_flow.fi_dst.s_addr &= 1361 filter4->ff_mask.mask_dst.s_addr; 1362 filter4->ff_flow.fi_src.s_addr &= 1363 filter4->ff_mask.mask_src.s_addr; 1364 1365 /* 1366 * if dst address is a wildcard, use hash-entry 1367 * ACC_WILDCARD_INDEX. 1368 */ 1369 if (filter4->ff_mask.mask_dst.s_addr != 0xffffffff) 1370 i = ACC_WILDCARD_INDEX; 1371 else 1372 i = ACC_GET_HASH_INDEX(filter4->ff_flow.fi_dst.s_addr); 1373 } 1374 #ifdef INET6 1375 else if (filter->ff_flow.fi_family == AF_INET6) { 1376 struct flow_filter6 *filter6 = 1377 (struct flow_filter6 *)&afp->f_filter; 1378 #ifndef IN6MASK0 /* taken from kame ipv6 */ 1379 #define IN6MASK0 {{{ 0, 0, 0, 0 }}} 1380 #define IN6MASK128 {{{ 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff }}} 1381 const struct in6_addr in6mask0 = IN6MASK0; 1382 const struct in6_addr in6mask128 = IN6MASK128; 1383 #endif 1384 1385 if (IN6_IS_ADDR_UNSPECIFIED(&filter6->ff_flow6.fi6_dst)) 1386 filter6->ff_mask6.mask6_dst = in6mask0; 1387 else if (IN6_IS_ADDR_UNSPECIFIED(&filter6->ff_mask6.mask6_dst)) 1388 filter6->ff_mask6.mask6_dst = in6mask128; 1389 if (IN6_IS_ADDR_UNSPECIFIED(&filter6->ff_flow6.fi6_src)) 1390 filter6->ff_mask6.mask6_src = in6mask0; 1391 else if (IN6_IS_ADDR_UNSPECIFIED(&filter6->ff_mask6.mask6_src)) 1392 filter6->ff_mask6.mask6_src = in6mask128; 1393 1394 /* clear extra bits in addresses */ 1395 for (i = 0; i < 16; i++) 1396 filter6->ff_flow6.fi6_dst.s6_addr[i] &= 1397 filter6->ff_mask6.mask6_dst.s6_addr[i]; 1398 for (i = 0; i < 16; i++) 1399 filter6->ff_flow6.fi6_src.s6_addr[i] &= 1400 filter6->ff_mask6.mask6_src.s6_addr[i]; 1401 1402 if (filter6->ff_flow6.fi6_flowlabel == 0) 1403 i = ACC_WILDCARD_INDEX; 1404 else 1405 i = ACC_GET_HASH_INDEX(filter6->ff_flow6.fi6_flowlabel); 1406 } 1407 #endif /* INET6 */ 1408 1409 afp->f_handle = get_filt_handle(classifier, i); 1410 1411 /* update filter bitmask */ 1412 afp->f_fbmask = filt2fibmask(filter); 1413 classifier->acc_fbmask |= afp->f_fbmask; 1414 1415 /* 1416 * add this filter to the filter list. 1417 * filters are ordered from the highest rule number. 1418 */ 1419 s = splnet(); 1420 prev = NULL; 1421 LIST_FOREACH(tmp, &classifier->acc_filters[i], f_chain) { 1422 if (tmp->f_filter.ff_ruleno > afp->f_filter.ff_ruleno) 1423 prev = tmp; 1424 else 1425 break; 1426 } 1427 if (prev == NULL) 1428 LIST_INSERT_HEAD(&classifier->acc_filters[i], afp, f_chain); 1429 else 1430 LIST_INSERT_AFTER(prev, afp, f_chain); 1431 splx(s); 1432 1433 *phandle = afp->f_handle; 1434 return (0); 1435 } 1436 1437 int 1438 acc_delete_filter(classifier, handle) 1439 struct acc_classifier *classifier; 1440 u_long handle; 1441 { 1442 struct acc_filter *afp; 1443 int s; 1444 1445 if ((afp = filth_to_filtp(classifier, handle)) == NULL) 1446 return (EINVAL); 1447 1448 s = splnet(); 1449 LIST_REMOVE(afp, f_chain); 1450 splx(s); 1451 1452 free(afp, M_DEVBUF); 1453 1454 /* todo: update filt_bmask */ 1455 1456 return (0); 1457 } 1458 1459 /* 1460 * delete filters referencing to the specified class. 1461 * if the all flag is not 0, delete all the filters. 1462 */ 1463 int 1464 acc_discard_filters(classifier, class, all) 1465 struct acc_classifier *classifier; 1466 void *class; 1467 int all; 1468 { 1469 struct acc_filter *afp; 1470 int i, s; 1471 1472 s = splnet(); 1473 for (i = 0; i < ACC_FILTER_TABLESIZE; i++) { 1474 do { 1475 LIST_FOREACH(afp, &classifier->acc_filters[i], f_chain) 1476 if (all || afp->f_class == class) { 1477 LIST_REMOVE(afp, f_chain); 1478 free(afp, M_DEVBUF); 1479 /* start again from the head */ 1480 break; 1481 } 1482 } while (afp != NULL); 1483 } 1484 splx(s); 1485 1486 if (all) 1487 classifier->acc_fbmask = 0; 1488 1489 return (0); 1490 } 1491 1492 void * 1493 acc_classify(clfier, m, af) 1494 void *clfier; 1495 struct mbuf *m; 1496 int af; 1497 { 1498 struct acc_classifier *classifier; 1499 struct flowinfo flow; 1500 struct acc_filter *afp; 1501 int i; 1502 1503 classifier = (struct acc_classifier *)clfier; 1504 altq_extractflow(m, af, &flow, classifier->acc_fbmask); 1505 1506 if (flow.fi_family == AF_INET) { 1507 struct flowinfo_in *fp = (struct flowinfo_in *)&flow; 1508 1509 if ((classifier->acc_fbmask & FIMB4_ALL) == FIMB4_TOS) { 1510 /* only tos is used */ 1511 LIST_FOREACH(afp, 1512 &classifier->acc_filters[ACC_WILDCARD_INDEX], 1513 f_chain) 1514 if (apply_tosfilter4(afp->f_fbmask, 1515 &afp->f_filter, fp)) 1516 /* filter matched */ 1517 return (afp->f_class); 1518 } else if ((classifier->acc_fbmask & 1519 (~(FIMB4_PROTO|FIMB4_SPORT|FIMB4_DPORT) & FIMB4_ALL)) 1520 == 0) { 1521 /* only proto and ports are used */ 1522 LIST_FOREACH(afp, 1523 &classifier->acc_filters[ACC_WILDCARD_INDEX], 1524 f_chain) 1525 if (apply_ppfilter4(afp->f_fbmask, 1526 &afp->f_filter, fp)) 1527 /* filter matched */ 1528 return (afp->f_class); 1529 } else { 1530 /* get the filter hash entry from its dest address */ 1531 i = ACC_GET_HASH_INDEX(fp->fi_dst.s_addr); 1532 do { 1533 /* 1534 * go through this loop twice. first for dst 1535 * hash, second for wildcards. 1536 */ 1537 LIST_FOREACH(afp, &classifier->acc_filters[i], 1538 f_chain) 1539 if (apply_filter4(afp->f_fbmask, 1540 &afp->f_filter, fp)) 1541 /* filter matched */ 1542 return (afp->f_class); 1543 1544 /* 1545 * check again for filters with a dst addr 1546 * wildcard. 1547 * (daddr == 0 || dmask != 0xffffffff). 1548 */ 1549 if (i != ACC_WILDCARD_INDEX) 1550 i = ACC_WILDCARD_INDEX; 1551 else 1552 break; 1553 } while (1); 1554 } 1555 } 1556 #ifdef INET6 1557 else if (flow.fi_family == AF_INET6) { 1558 struct flowinfo_in6 *fp6 = (struct flowinfo_in6 *)&flow; 1559 1560 /* get the filter hash entry from its flow ID */ 1561 if (fp6->fi6_flowlabel != 0) 1562 i = ACC_GET_HASH_INDEX(fp6->fi6_flowlabel); 1563 else 1564 /* flowlable can be zero */ 1565 i = ACC_WILDCARD_INDEX; 1566 1567 /* go through this loop twice. first for flow hash, second 1568 for wildcards. */ 1569 do { 1570 LIST_FOREACH(afp, &classifier->acc_filters[i], f_chain) 1571 if (apply_filter6(afp->f_fbmask, 1572 (struct flow_filter6 *)&afp->f_filter, 1573 fp6)) 1574 /* filter matched */ 1575 return (afp->f_class); 1576 1577 /* 1578 * check again for filters with a wildcard. 1579 */ 1580 if (i != ACC_WILDCARD_INDEX) 1581 i = ACC_WILDCARD_INDEX; 1582 else 1583 break; 1584 } while (1); 1585 } 1586 #endif /* INET6 */ 1587 1588 /* no filter matched */ 1589 return (NULL); 1590 } 1591 1592 static int 1593 apply_filter4(fbmask, filt, pkt) 1594 u_int32_t fbmask; 1595 struct flow_filter *filt; 1596 struct flowinfo_in *pkt; 1597 { 1598 if (filt->ff_flow.fi_family != AF_INET) 1599 return (0); 1600 if ((fbmask & FIMB4_SPORT) && filt->ff_flow.fi_sport != pkt->fi_sport) 1601 return (0); 1602 if ((fbmask & FIMB4_DPORT) && filt->ff_flow.fi_dport != pkt->fi_dport) 1603 return (0); 1604 if ((fbmask & FIMB4_DADDR) && 1605 filt->ff_flow.fi_dst.s_addr != 1606 (pkt->fi_dst.s_addr & filt->ff_mask.mask_dst.s_addr)) 1607 return (0); 1608 if ((fbmask & FIMB4_SADDR) && 1609 filt->ff_flow.fi_src.s_addr != 1610 (pkt->fi_src.s_addr & filt->ff_mask.mask_src.s_addr)) 1611 return (0); 1612 if ((fbmask & FIMB4_PROTO) && filt->ff_flow.fi_proto != pkt->fi_proto) 1613 return (0); 1614 if ((fbmask & FIMB4_TOS) && filt->ff_flow.fi_tos != 1615 (pkt->fi_tos & filt->ff_mask.mask_tos)) 1616 return (0); 1617 if ((fbmask & FIMB4_GPI) && filt->ff_flow.fi_gpi != (pkt->fi_gpi)) 1618 return (0); 1619 /* match */ 1620 return (1); 1621 } 1622 1623 /* 1624 * filter matching function optimized for a common case that checks 1625 * only protocol and port numbers 1626 */ 1627 static int 1628 apply_ppfilter4(fbmask, filt, pkt) 1629 u_int32_t fbmask; 1630 struct flow_filter *filt; 1631 struct flowinfo_in *pkt; 1632 { 1633 if (filt->ff_flow.fi_family != AF_INET) 1634 return (0); 1635 if ((fbmask & FIMB4_SPORT) && filt->ff_flow.fi_sport != pkt->fi_sport) 1636 return (0); 1637 if ((fbmask & FIMB4_DPORT) && filt->ff_flow.fi_dport != pkt->fi_dport) 1638 return (0); 1639 if ((fbmask & FIMB4_PROTO) && filt->ff_flow.fi_proto != pkt->fi_proto) 1640 return (0); 1641 /* match */ 1642 return (1); 1643 } 1644 1645 /* 1646 * filter matching function only for tos field. 1647 */ 1648 static int 1649 apply_tosfilter4(fbmask, filt, pkt) 1650 u_int32_t fbmask; 1651 struct flow_filter *filt; 1652 struct flowinfo_in *pkt; 1653 { 1654 if (filt->ff_flow.fi_family != AF_INET) 1655 return (0); 1656 if ((fbmask & FIMB4_TOS) && filt->ff_flow.fi_tos != 1657 (pkt->fi_tos & filt->ff_mask.mask_tos)) 1658 return (0); 1659 /* match */ 1660 return (1); 1661 } 1662 1663 #ifdef INET6 1664 static int 1665 apply_filter6(fbmask, filt, pkt) 1666 u_int32_t fbmask; 1667 struct flow_filter6 *filt; 1668 struct flowinfo_in6 *pkt; 1669 { 1670 int i; 1671 1672 if (filt->ff_flow6.fi6_family != AF_INET6) 1673 return (0); 1674 if ((fbmask & FIMB6_FLABEL) && 1675 filt->ff_flow6.fi6_flowlabel != pkt->fi6_flowlabel) 1676 return (0); 1677 if ((fbmask & FIMB6_PROTO) && 1678 filt->ff_flow6.fi6_proto != pkt->fi6_proto) 1679 return (0); 1680 if ((fbmask & FIMB6_SPORT) && 1681 filt->ff_flow6.fi6_sport != pkt->fi6_sport) 1682 return (0); 1683 if ((fbmask & FIMB6_DPORT) && 1684 filt->ff_flow6.fi6_dport != pkt->fi6_dport) 1685 return (0); 1686 if (fbmask & FIMB6_SADDR) { 1687 for (i = 0; i < 4; i++) 1688 if (filt->ff_flow6.fi6_src.s6_addr32[i] != 1689 (pkt->fi6_src.s6_addr32[i] & 1690 filt->ff_mask6.mask6_src.s6_addr32[i])) 1691 return (0); 1692 } 1693 if (fbmask & FIMB6_DADDR) { 1694 for (i = 0; i < 4; i++) 1695 if (filt->ff_flow6.fi6_dst.s6_addr32[i] != 1696 (pkt->fi6_dst.s6_addr32[i] & 1697 filt->ff_mask6.mask6_dst.s6_addr32[i])) 1698 return (0); 1699 } 1700 if ((fbmask & FIMB6_TCLASS) && 1701 filt->ff_flow6.fi6_tclass != 1702 (pkt->fi6_tclass & filt->ff_mask6.mask6_tclass)) 1703 return (0); 1704 if ((fbmask & FIMB6_GPI) && 1705 filt->ff_flow6.fi6_gpi != pkt->fi6_gpi) 1706 return (0); 1707 /* match */ 1708 return (1); 1709 } 1710 #endif /* INET6 */ 1711 1712 /* 1713 * filter handle: 1714 * bit 20-28: index to the filter hash table 1715 * bit 0-19: unique id in the hash bucket. 1716 */ 1717 static u_long 1718 get_filt_handle(classifier, i) 1719 struct acc_classifier *classifier; 1720 int i; 1721 { 1722 static u_long handle_number = 1; 1723 u_long handle; 1724 struct acc_filter *afp; 1725 1726 while (1) { 1727 handle = handle_number++ & 0x000fffff; 1728 1729 if (LIST_EMPTY(&classifier->acc_filters[i])) 1730 break; 1731 1732 LIST_FOREACH(afp, &classifier->acc_filters[i], f_chain) 1733 if ((afp->f_handle & 0x000fffff) == handle) 1734 break; 1735 if (afp == NULL) 1736 break; 1737 /* this handle is already used, try again */ 1738 } 1739 1740 return ((i << 20) | handle); 1741 } 1742 1743 /* convert filter handle to filter pointer */ 1744 static struct acc_filter * 1745 filth_to_filtp(classifier, handle) 1746 struct acc_classifier *classifier; 1747 u_long handle; 1748 { 1749 struct acc_filter *afp; 1750 int i; 1751 1752 i = ACC_GET_HINDEX(handle); 1753 1754 LIST_FOREACH(afp, &classifier->acc_filters[i], f_chain) 1755 if (afp->f_handle == handle) 1756 return (afp); 1757 1758 return (NULL); 1759 } 1760 1761 /* create flowinfo bitmask */ 1762 static u_int32_t 1763 filt2fibmask(filt) 1764 struct flow_filter *filt; 1765 { 1766 u_int32_t mask = 0; 1767 #ifdef INET6 1768 struct flow_filter6 *filt6; 1769 #endif 1770 1771 switch (filt->ff_flow.fi_family) { 1772 case AF_INET: 1773 if (filt->ff_flow.fi_proto != 0) 1774 mask |= FIMB4_PROTO; 1775 if (filt->ff_flow.fi_tos != 0) 1776 mask |= FIMB4_TOS; 1777 if (filt->ff_flow.fi_dst.s_addr != 0) 1778 mask |= FIMB4_DADDR; 1779 if (filt->ff_flow.fi_src.s_addr != 0) 1780 mask |= FIMB4_SADDR; 1781 if (filt->ff_flow.fi_sport != 0) 1782 mask |= FIMB4_SPORT; 1783 if (filt->ff_flow.fi_dport != 0) 1784 mask |= FIMB4_DPORT; 1785 if (filt->ff_flow.fi_gpi != 0) 1786 mask |= FIMB4_GPI; 1787 break; 1788 #ifdef INET6 1789 case AF_INET6: 1790 filt6 = (struct flow_filter6 *)filt; 1791 1792 if (filt6->ff_flow6.fi6_proto != 0) 1793 mask |= FIMB6_PROTO; 1794 if (filt6->ff_flow6.fi6_tclass != 0) 1795 mask |= FIMB6_TCLASS; 1796 if (!IN6_IS_ADDR_UNSPECIFIED(&filt6->ff_flow6.fi6_dst)) 1797 mask |= FIMB6_DADDR; 1798 if (!IN6_IS_ADDR_UNSPECIFIED(&filt6->ff_flow6.fi6_src)) 1799 mask |= FIMB6_SADDR; 1800 if (filt6->ff_flow6.fi6_sport != 0) 1801 mask |= FIMB6_SPORT; 1802 if (filt6->ff_flow6.fi6_dport != 0) 1803 mask |= FIMB6_DPORT; 1804 if (filt6->ff_flow6.fi6_gpi != 0) 1805 mask |= FIMB6_GPI; 1806 if (filt6->ff_flow6.fi6_flowlabel != 0) 1807 mask |= FIMB6_FLABEL; 1808 break; 1809 #endif /* INET6 */ 1810 } 1811 return (mask); 1812 } 1813 1814 1815 /* 1816 * helper functions to handle IPv4 fragments. 1817 * currently only in-sequence fragments are handled. 1818 * - fragment info is cached in a LRU list. 1819 * - when a first fragment is found, cache its flow info. 1820 * - when a non-first fragment is found, lookup the cache. 1821 */ 1822 1823 struct ip4_frag { 1824 TAILQ_ENTRY(ip4_frag) ip4f_chain; 1825 char ip4f_valid; 1826 u_short ip4f_id; 1827 struct flowinfo_in ip4f_info; 1828 }; 1829 1830 static TAILQ_HEAD(ip4f_list, ip4_frag) ip4f_list; /* IPv4 fragment cache */ 1831 1832 #define IP4F_TABSIZE 16 /* IPv4 fragment cache size */ 1833 1834 1835 static void 1836 ip4f_cache(ip, fin) 1837 struct ip *ip; 1838 struct flowinfo_in *fin; 1839 { 1840 struct ip4_frag *fp; 1841 1842 if (TAILQ_EMPTY(&ip4f_list)) { 1843 /* first time call, allocate fragment cache entries. */ 1844 if (ip4f_init() < 0) 1845 /* allocation failed! */ 1846 return; 1847 } 1848 1849 fp = ip4f_alloc(); 1850 fp->ip4f_id = ip->ip_id; 1851 fp->ip4f_info.fi_proto = ip->ip_p; 1852 fp->ip4f_info.fi_src.s_addr = ip->ip_src.s_addr; 1853 fp->ip4f_info.fi_dst.s_addr = ip->ip_dst.s_addr; 1854 1855 /* save port numbers */ 1856 fp->ip4f_info.fi_sport = fin->fi_sport; 1857 fp->ip4f_info.fi_dport = fin->fi_dport; 1858 fp->ip4f_info.fi_gpi = fin->fi_gpi; 1859 } 1860 1861 static int 1862 ip4f_lookup(ip, fin) 1863 struct ip *ip; 1864 struct flowinfo_in *fin; 1865 { 1866 struct ip4_frag *fp; 1867 1868 for (fp = TAILQ_FIRST(&ip4f_list); fp != NULL && fp->ip4f_valid; 1869 fp = TAILQ_NEXT(fp, ip4f_chain)) 1870 if (ip->ip_id == fp->ip4f_id && 1871 ip->ip_src.s_addr == fp->ip4f_info.fi_src.s_addr && 1872 ip->ip_dst.s_addr == fp->ip4f_info.fi_dst.s_addr && 1873 ip->ip_p == fp->ip4f_info.fi_proto) { 1874 1875 /* found the matching entry */ 1876 fin->fi_sport = fp->ip4f_info.fi_sport; 1877 fin->fi_dport = fp->ip4f_info.fi_dport; 1878 fin->fi_gpi = fp->ip4f_info.fi_gpi; 1879 1880 if ((ntohs(ip->ip_off) & IP_MF) == 0) 1881 /* this is the last fragment, 1882 release the entry. */ 1883 ip4f_free(fp); 1884 1885 return (1); 1886 } 1887 1888 /* no matching entry found */ 1889 return (0); 1890 } 1891 1892 static int 1893 ip4f_init(void) 1894 { 1895 struct ip4_frag *fp; 1896 int i; 1897 1898 TAILQ_INIT(&ip4f_list); 1899 for (i=0; i<IP4F_TABSIZE; i++) { 1900 fp = malloc(sizeof(struct ip4_frag), 1901 M_DEVBUF, M_NOWAIT); 1902 if (fp == NULL) { 1903 printf("ip4f_init: can't alloc %dth entry!\n", i); 1904 if (i == 0) 1905 return (-1); 1906 return (0); 1907 } 1908 fp->ip4f_valid = 0; 1909 TAILQ_INSERT_TAIL(&ip4f_list, fp, ip4f_chain); 1910 } 1911 return (0); 1912 } 1913 1914 static struct ip4_frag * 1915 ip4f_alloc(void) 1916 { 1917 struct ip4_frag *fp; 1918 1919 /* reclaim an entry at the tail, put it at the head */ 1920 fp = TAILQ_LAST(&ip4f_list, ip4f_list); 1921 TAILQ_REMOVE(&ip4f_list, fp, ip4f_chain); 1922 fp->ip4f_valid = 1; 1923 TAILQ_INSERT_HEAD(&ip4f_list, fp, ip4f_chain); 1924 return (fp); 1925 } 1926 1927 static void 1928 ip4f_free(fp) 1929 struct ip4_frag *fp; 1930 { 1931 TAILQ_REMOVE(&ip4f_list, fp, ip4f_chain); 1932 fp->ip4f_valid = 0; 1933 TAILQ_INSERT_TAIL(&ip4f_list, fp, ip4f_chain); 1934 } 1935 1936 #endif /* ALTQ3_CLFIER_COMPAT */ 1937