1 /*- 2 * Copyright (C) 1997-2003 3 * Sony Computer Science Laboratories Inc. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 * 26 */ 27 /*- 28 * Copyright (c) 1990-1994 Regents of the University of California. 29 * All rights reserved. 30 * 31 * Redistribution and use in source and binary forms, with or without 32 * modification, are permitted provided that the following conditions 33 * are met: 34 * 1. Redistributions of source code must retain the above copyright 35 * notice, this list of conditions and the following disclaimer. 36 * 2. Redistributions in binary form must reproduce the above copyright 37 * notice, this list of conditions and the following disclaimer in the 38 * documentation and/or other materials provided with the distribution. 39 * 3. All advertising materials mentioning features or use of this software 40 * must display the following acknowledgement: 41 * This product includes software developed by the Computer Systems 42 * Engineering Group at Lawrence Berkeley Laboratory. 43 * 4. Neither the name of the University nor of the Laboratory may be used 44 * to endorse or promote products derived from this software without 45 * specific prior written permission. 46 * 47 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 48 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 49 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 50 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 51 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 52 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 53 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 54 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 55 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 56 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 57 * SUCH DAMAGE. 58 * 59 * $KAME: altq_red.c,v 1.18 2003/09/05 22:40:36 itojun Exp $ 60 * $FreeBSD$ 61 */ 62 63 #include "opt_altq.h" 64 #include "opt_inet.h" 65 #include "opt_inet6.h" 66 #ifdef ALTQ_RED /* red is enabled by ALTQ_RED option in opt_altq.h */ 67 68 #include <sys/param.h> 69 #include <sys/malloc.h> 70 #include <sys/mbuf.h> 71 #include <sys/socket.h> 72 #include <sys/systm.h> 73 #include <sys/errno.h> 74 #if 1 /* ALTQ3_COMPAT */ 75 #include <sys/sockio.h> 76 #include <sys/proc.h> 77 #include <sys/kernel.h> 78 #ifdef ALTQ_FLOWVALVE 79 #include <sys/queue.h> 80 #include <sys/time.h> 81 #endif 82 #endif /* ALTQ3_COMPAT */ 83 84 #include <net/if.h> 85 #include <net/if_var.h> 86 87 #include <netinet/in.h> 88 #include <netinet/in_systm.h> 89 #include <netinet/ip.h> 90 #ifdef INET6 91 #include <netinet/ip6.h> 92 #endif 93 94 #include <netpfil/pf/pf.h> 95 #include <netpfil/pf/pf_altq.h> 96 #include <netpfil/pf/pf_mtag.h> 97 #include <net/altq/altq.h> 98 #include <net/altq/altq_red.h> 99 #ifdef ALTQ3_COMPAT 100 #include <net/altq/altq_conf.h> 101 #ifdef ALTQ_FLOWVALVE 102 #include <net/altq/altq_flowvalve.h> 103 #endif 104 #endif 105 106 /* 107 * ALTQ/RED (Random Early Detection) implementation using 32-bit 108 * fixed-point calculation. 109 * 110 * written by kjc using the ns code as a reference. 111 * you can learn more about red and ns from Sally's home page at 112 * http://www-nrg.ee.lbl.gov/floyd/ 113 * 114 * most of the red parameter values are fixed in this implementation 115 * to prevent fixed-point overflow/underflow. 116 * if you change the parameters, watch out for overflow/underflow! 117 * 118 * the parameters used are recommended values by Sally. 119 * the corresponding ns config looks: 120 * q_weight=0.00195 121 * minthresh=5 maxthresh=15 queue-size=60 122 * linterm=30 123 * dropmech=drop-tail 124 * bytes=false (can't be handled by 32-bit fixed-point) 125 * doubleq=false dqthresh=false 126 * wait=true 127 */ 128 /* 129 * alternative red parameters for a slow link. 130 * 131 * assume the queue length becomes from zero to L and keeps L, it takes 132 * N packets for q_avg to reach 63% of L. 133 * when q_weight is 0.002, N is about 500 packets. 134 * for a slow link like dial-up, 500 packets takes more than 1 minute! 135 * when q_weight is 0.008, N is about 127 packets. 136 * when q_weight is 0.016, N is about 63 packets. 137 * bursts of 50 packets are allowed for 0.002, bursts of 25 packets 138 * are allowed for 0.016. 139 * see Sally's paper for more details. 140 */ 141 /* normal red parameters */ 142 #define W_WEIGHT 512 /* inverse of weight of EWMA (511/512) */ 143 /* q_weight = 0.00195 */ 144 145 /* red parameters for a slow link */ 146 #define W_WEIGHT_1 128 /* inverse of weight of EWMA (127/128) */ 147 /* q_weight = 0.0078125 */ 148 149 /* red parameters for a very slow link (e.g., dialup) */ 150 #define W_WEIGHT_2 64 /* inverse of weight of EWMA (63/64) */ 151 /* q_weight = 0.015625 */ 152 153 /* fixed-point uses 12-bit decimal places */ 154 #define FP_SHIFT 12 /* fixed-point shift */ 155 156 /* red parameters for drop probability */ 157 #define INV_P_MAX 10 /* inverse of max drop probability */ 158 #define TH_MIN 5 /* min threshold */ 159 #define TH_MAX 15 /* max threshold */ 160 161 #define RED_LIMIT 60 /* default max queue length */ 162 #define RED_STATS /* collect statistics */ 163 164 /* 165 * our default policy for forced-drop is drop-tail. 166 * (in altq-1.1.2 or earlier, the default was random-drop. 167 * but it makes more sense to punish the cause of the surge.) 168 * to switch to the random-drop policy, define "RED_RANDOM_DROP". 169 */ 170 171 #ifdef ALTQ3_COMPAT 172 #ifdef ALTQ_FLOWVALVE 173 /* 174 * flow-valve is an extension to protect red from unresponsive flows 175 * and to promote end-to-end congestion control. 176 * flow-valve observes the average drop rates of the flows that have 177 * experienced packet drops in the recent past. 178 * when the average drop rate exceeds the threshold, the flow is 179 * blocked by the flow-valve. the trapped flow should back off 180 * exponentially to escape from the flow-valve. 181 */ 182 #ifdef RED_RANDOM_DROP 183 #error "random-drop can't be used with flow-valve!" 184 #endif 185 #endif /* ALTQ_FLOWVALVE */ 186 187 /* red_list keeps all red_queue_t's allocated. */ 188 static red_queue_t *red_list = NULL; 189 190 #endif /* ALTQ3_COMPAT */ 191 192 /* default red parameter values */ 193 static int default_th_min = TH_MIN; 194 static int default_th_max = TH_MAX; 195 static int default_inv_pmax = INV_P_MAX; 196 197 #ifdef ALTQ3_COMPAT 198 /* internal function prototypes */ 199 static int red_enqueue(struct ifaltq *, struct mbuf *, struct altq_pktattr *); 200 static struct mbuf *red_dequeue(struct ifaltq *, int); 201 static int red_request(struct ifaltq *, int, void *); 202 static void red_purgeq(red_queue_t *); 203 static int red_detach(red_queue_t *); 204 #ifdef ALTQ_FLOWVALVE 205 static __inline struct fve *flowlist_lookup(struct flowvalve *, 206 struct altq_pktattr *, struct timeval *); 207 static __inline struct fve *flowlist_reclaim(struct flowvalve *, 208 struct altq_pktattr *); 209 static __inline void flowlist_move_to_head(struct flowvalve *, struct fve *); 210 static __inline int fv_p2f(struct flowvalve *, int); 211 #if 0 /* XXX: make the compiler happy (fv_alloc unused) */ 212 static struct flowvalve *fv_alloc(struct red *); 213 #endif 214 static void fv_destroy(struct flowvalve *); 215 static int fv_checkflow(struct flowvalve *, struct altq_pktattr *, 216 struct fve **); 217 static void fv_dropbyred(struct flowvalve *fv, struct altq_pktattr *, 218 struct fve *); 219 #endif 220 #endif /* ALTQ3_COMPAT */ 221 222 /* 223 * red support routines 224 */ 225 red_t * 226 red_alloc(int weight, int inv_pmax, int th_min, int th_max, int flags, 227 int pkttime) 228 { 229 red_t *rp; 230 int w, i; 231 int npkts_per_sec; 232 233 rp = malloc(sizeof(red_t), M_DEVBUF, M_NOWAIT | M_ZERO); 234 if (rp == NULL) 235 return (NULL); 236 237 if (weight == 0) 238 rp->red_weight = W_WEIGHT; 239 else 240 rp->red_weight = weight; 241 242 /* allocate weight table */ 243 rp->red_wtab = wtab_alloc(rp->red_weight); 244 if (rp->red_wtab == NULL) { 245 free(rp, M_DEVBUF); 246 return (NULL); 247 } 248 249 rp->red_avg = 0; 250 rp->red_idle = 1; 251 252 if (inv_pmax == 0) 253 rp->red_inv_pmax = default_inv_pmax; 254 else 255 rp->red_inv_pmax = inv_pmax; 256 if (th_min == 0) 257 rp->red_thmin = default_th_min; 258 else 259 rp->red_thmin = th_min; 260 if (th_max == 0) 261 rp->red_thmax = default_th_max; 262 else 263 rp->red_thmax = th_max; 264 265 rp->red_flags = flags; 266 267 if (pkttime == 0) 268 /* default packet time: 1000 bytes / 10Mbps * 8 * 1000000 */ 269 rp->red_pkttime = 800; 270 else 271 rp->red_pkttime = pkttime; 272 273 if (weight == 0) { 274 /* when the link is very slow, adjust red parameters */ 275 npkts_per_sec = 1000000 / rp->red_pkttime; 276 if (npkts_per_sec < 50) { 277 /* up to about 400Kbps */ 278 rp->red_weight = W_WEIGHT_2; 279 } else if (npkts_per_sec < 300) { 280 /* up to about 2.4Mbps */ 281 rp->red_weight = W_WEIGHT_1; 282 } 283 } 284 285 /* calculate wshift. weight must be power of 2 */ 286 w = rp->red_weight; 287 for (i = 0; w > 1; i++) 288 w = w >> 1; 289 rp->red_wshift = i; 290 w = 1 << rp->red_wshift; 291 if (w != rp->red_weight) { 292 printf("invalid weight value %d for red! use %d\n", 293 rp->red_weight, w); 294 rp->red_weight = w; 295 } 296 297 /* 298 * thmin_s and thmax_s are scaled versions of th_min and th_max 299 * to be compared with avg. 300 */ 301 rp->red_thmin_s = rp->red_thmin << (rp->red_wshift + FP_SHIFT); 302 rp->red_thmax_s = rp->red_thmax << (rp->red_wshift + FP_SHIFT); 303 304 /* 305 * precompute probability denominator 306 * probd = (2 * (TH_MAX-TH_MIN) / pmax) in fixed-point 307 */ 308 rp->red_probd = (2 * (rp->red_thmax - rp->red_thmin) 309 * rp->red_inv_pmax) << FP_SHIFT; 310 311 microtime(&rp->red_last); 312 return (rp); 313 } 314 315 void 316 red_destroy(red_t *rp) 317 { 318 #ifdef ALTQ3_COMPAT 319 #ifdef ALTQ_FLOWVALVE 320 if (rp->red_flowvalve != NULL) 321 fv_destroy(rp->red_flowvalve); 322 #endif 323 #endif /* ALTQ3_COMPAT */ 324 wtab_destroy(rp->red_wtab); 325 free(rp, M_DEVBUF); 326 } 327 328 void 329 red_getstats(red_t *rp, struct redstats *sp) 330 { 331 sp->q_avg = rp->red_avg >> rp->red_wshift; 332 sp->xmit_cnt = rp->red_stats.xmit_cnt; 333 sp->drop_cnt = rp->red_stats.drop_cnt; 334 sp->drop_forced = rp->red_stats.drop_forced; 335 sp->drop_unforced = rp->red_stats.drop_unforced; 336 sp->marked_packets = rp->red_stats.marked_packets; 337 } 338 339 int 340 red_addq(red_t *rp, class_queue_t *q, struct mbuf *m, 341 struct altq_pktattr *pktattr) 342 { 343 int avg, droptype; 344 int n; 345 #ifdef ALTQ3_COMPAT 346 #ifdef ALTQ_FLOWVALVE 347 struct fve *fve = NULL; 348 349 if (rp->red_flowvalve != NULL && rp->red_flowvalve->fv_flows > 0) 350 if (fv_checkflow(rp->red_flowvalve, pktattr, &fve)) { 351 m_freem(m); 352 return (-1); 353 } 354 #endif 355 #endif /* ALTQ3_COMPAT */ 356 357 avg = rp->red_avg; 358 359 /* 360 * if we were idle, we pretend that n packets arrived during 361 * the idle period. 362 */ 363 if (rp->red_idle) { 364 struct timeval now; 365 int t; 366 367 rp->red_idle = 0; 368 microtime(&now); 369 t = (now.tv_sec - rp->red_last.tv_sec); 370 if (t > 60) { 371 /* 372 * being idle for more than 1 minute, set avg to zero. 373 * this prevents t from overflow. 374 */ 375 avg = 0; 376 } else { 377 t = t * 1000000 + (now.tv_usec - rp->red_last.tv_usec); 378 n = t / rp->red_pkttime - 1; 379 380 /* the following line does (avg = (1 - Wq)^n * avg) */ 381 if (n > 0) 382 avg = (avg >> FP_SHIFT) * 383 pow_w(rp->red_wtab, n); 384 } 385 } 386 387 /* run estimator. (note: avg is scaled by WEIGHT in fixed-point) */ 388 avg += (qlen(q) << FP_SHIFT) - (avg >> rp->red_wshift); 389 rp->red_avg = avg; /* save the new value */ 390 391 /* 392 * red_count keeps a tally of arriving traffic that has not 393 * been dropped. 394 */ 395 rp->red_count++; 396 397 /* see if we drop early */ 398 droptype = DTYPE_NODROP; 399 if (avg >= rp->red_thmin_s && qlen(q) > 1) { 400 if (avg >= rp->red_thmax_s) { 401 /* avg >= th_max: forced drop */ 402 droptype = DTYPE_FORCED; 403 } else if (rp->red_old == 0) { 404 /* first exceeds th_min */ 405 rp->red_count = 1; 406 rp->red_old = 1; 407 } else if (drop_early((avg - rp->red_thmin_s) >> rp->red_wshift, 408 rp->red_probd, rp->red_count)) { 409 /* mark or drop by red */ 410 if ((rp->red_flags & REDF_ECN) && 411 mark_ecn(m, pktattr, rp->red_flags)) { 412 /* successfully marked. do not drop. */ 413 rp->red_count = 0; 414 #ifdef RED_STATS 415 rp->red_stats.marked_packets++; 416 #endif 417 } else { 418 /* unforced drop by red */ 419 droptype = DTYPE_EARLY; 420 } 421 } 422 } else { 423 /* avg < th_min */ 424 rp->red_old = 0; 425 } 426 427 /* 428 * if the queue length hits the hard limit, it's a forced drop. 429 */ 430 if (droptype == DTYPE_NODROP && qlen(q) >= qlimit(q)) 431 droptype = DTYPE_FORCED; 432 433 #ifdef RED_RANDOM_DROP 434 /* if successful or forced drop, enqueue this packet. */ 435 if (droptype != DTYPE_EARLY) 436 _addq(q, m); 437 #else 438 /* if successful, enqueue this packet. */ 439 if (droptype == DTYPE_NODROP) 440 _addq(q, m); 441 #endif 442 if (droptype != DTYPE_NODROP) { 443 if (droptype == DTYPE_EARLY) { 444 /* drop the incoming packet */ 445 #ifdef RED_STATS 446 rp->red_stats.drop_unforced++; 447 #endif 448 } else { 449 /* forced drop, select a victim packet in the queue. */ 450 #ifdef RED_RANDOM_DROP 451 m = _getq_random(q); 452 #endif 453 #ifdef RED_STATS 454 rp->red_stats.drop_forced++; 455 #endif 456 } 457 #ifdef RED_STATS 458 PKTCNTR_ADD(&rp->red_stats.drop_cnt, m_pktlen(m)); 459 #endif 460 rp->red_count = 0; 461 #ifdef ALTQ3_COMPAT 462 #ifdef ALTQ_FLOWVALVE 463 if (rp->red_flowvalve != NULL) 464 fv_dropbyred(rp->red_flowvalve, pktattr, fve); 465 #endif 466 #endif /* ALTQ3_COMPAT */ 467 m_freem(m); 468 return (-1); 469 } 470 /* successfully queued */ 471 #ifdef RED_STATS 472 PKTCNTR_ADD(&rp->red_stats.xmit_cnt, m_pktlen(m)); 473 #endif 474 return (0); 475 } 476 477 /* 478 * early-drop probability is calculated as follows: 479 * prob = p_max * (avg - th_min) / (th_max - th_min) 480 * prob_a = prob / (2 - count*prob) 481 * = (avg-th_min) / (2*(th_max-th_min)*inv_p_max - count*(avg-th_min)) 482 * here prob_a increases as successive undrop count increases. 483 * (prob_a starts from prob/2, becomes prob when (count == (1 / prob)), 484 * becomes 1 when (count >= (2 / prob))). 485 */ 486 int 487 drop_early(int fp_len, int fp_probd, int count) 488 { 489 int d; /* denominator of drop-probability */ 490 491 d = fp_probd - count * fp_len; 492 if (d <= 0) 493 /* count exceeds the hard limit: drop or mark */ 494 return (1); 495 496 /* 497 * now the range of d is [1..600] in fixed-point. (when 498 * th_max-th_min=10 and p_max=1/30) 499 * drop probability = (avg - TH_MIN) / d 500 */ 501 502 if ((arc4random() % d) < fp_len) { 503 /* drop or mark */ 504 return (1); 505 } 506 /* no drop/mark */ 507 return (0); 508 } 509 510 /* 511 * try to mark CE bit to the packet. 512 * returns 1 if successfully marked, 0 otherwise. 513 */ 514 int 515 mark_ecn(struct mbuf *m, struct altq_pktattr *pktattr, int flags) 516 { 517 struct mbuf *m0; 518 struct pf_mtag *at; 519 void *hdr; 520 521 at = pf_find_mtag(m); 522 if (at != NULL) { 523 hdr = at->hdr; 524 #ifdef ALTQ3_COMPAT 525 } else if (pktattr != NULL) { 526 af = pktattr->pattr_af; 527 hdr = pktattr->pattr_hdr; 528 #endif /* ALTQ3_COMPAT */ 529 } else 530 return (0); 531 532 /* verify that pattr_hdr is within the mbuf data */ 533 for (m0 = m; m0 != NULL; m0 = m0->m_next) 534 if (((caddr_t)hdr >= m0->m_data) && 535 ((caddr_t)hdr < m0->m_data + m0->m_len)) 536 break; 537 if (m0 == NULL) { 538 /* ick, tag info is stale */ 539 return (0); 540 } 541 542 switch (((struct ip *)hdr)->ip_v) { 543 case IPVERSION: 544 if (flags & REDF_ECN4) { 545 struct ip *ip = hdr; 546 u_int8_t otos; 547 int sum; 548 549 if (ip->ip_v != 4) 550 return (0); /* version mismatch! */ 551 552 if ((ip->ip_tos & IPTOS_ECN_MASK) == IPTOS_ECN_NOTECT) 553 return (0); /* not-ECT */ 554 if ((ip->ip_tos & IPTOS_ECN_MASK) == IPTOS_ECN_CE) 555 return (1); /* already marked */ 556 557 /* 558 * ecn-capable but not marked, 559 * mark CE and update checksum 560 */ 561 otos = ip->ip_tos; 562 ip->ip_tos |= IPTOS_ECN_CE; 563 /* 564 * update checksum (from RFC1624) 565 * HC' = ~(~HC + ~m + m') 566 */ 567 sum = ~ntohs(ip->ip_sum) & 0xffff; 568 sum += (~otos & 0xffff) + ip->ip_tos; 569 sum = (sum >> 16) + (sum & 0xffff); 570 sum += (sum >> 16); /* add carry */ 571 ip->ip_sum = htons(~sum & 0xffff); 572 return (1); 573 } 574 break; 575 #ifdef INET6 576 case (IPV6_VERSION >> 4): 577 if (flags & REDF_ECN6) { 578 struct ip6_hdr *ip6 = hdr; 579 u_int32_t flowlabel; 580 581 flowlabel = ntohl(ip6->ip6_flow); 582 if ((flowlabel >> 28) != 6) 583 return (0); /* version mismatch! */ 584 if ((flowlabel & (IPTOS_ECN_MASK << 20)) == 585 (IPTOS_ECN_NOTECT << 20)) 586 return (0); /* not-ECT */ 587 if ((flowlabel & (IPTOS_ECN_MASK << 20)) == 588 (IPTOS_ECN_CE << 20)) 589 return (1); /* already marked */ 590 /* 591 * ecn-capable but not marked, mark CE 592 */ 593 flowlabel |= (IPTOS_ECN_CE << 20); 594 ip6->ip6_flow = htonl(flowlabel); 595 return (1); 596 } 597 break; 598 #endif /* INET6 */ 599 } 600 601 /* not marked */ 602 return (0); 603 } 604 605 struct mbuf * 606 red_getq(rp, q) 607 red_t *rp; 608 class_queue_t *q; 609 { 610 struct mbuf *m; 611 612 if ((m = _getq(q)) == NULL) { 613 if (rp->red_idle == 0) { 614 rp->red_idle = 1; 615 microtime(&rp->red_last); 616 } 617 return NULL; 618 } 619 620 rp->red_idle = 0; 621 return (m); 622 } 623 624 /* 625 * helper routine to calibrate avg during idle. 626 * pow_w(wtab, n) returns (1 - Wq)^n in fixed-point 627 * here Wq = 1/weight and the code assumes Wq is close to zero. 628 * 629 * w_tab[n] holds ((1 - Wq)^(2^n)) in fixed-point. 630 */ 631 static struct wtab *wtab_list = NULL; /* pointer to wtab list */ 632 633 struct wtab * 634 wtab_alloc(int weight) 635 { 636 struct wtab *w; 637 int i; 638 639 for (w = wtab_list; w != NULL; w = w->w_next) 640 if (w->w_weight == weight) { 641 w->w_refcount++; 642 return (w); 643 } 644 645 w = malloc(sizeof(struct wtab), M_DEVBUF, M_NOWAIT | M_ZERO); 646 if (w == NULL) 647 return (NULL); 648 w->w_weight = weight; 649 w->w_refcount = 1; 650 w->w_next = wtab_list; 651 wtab_list = w; 652 653 /* initialize the weight table */ 654 w->w_tab[0] = ((weight - 1) << FP_SHIFT) / weight; 655 for (i = 1; i < 32; i++) { 656 w->w_tab[i] = (w->w_tab[i-1] * w->w_tab[i-1]) >> FP_SHIFT; 657 if (w->w_tab[i] == 0 && w->w_param_max == 0) 658 w->w_param_max = 1 << i; 659 } 660 661 return (w); 662 } 663 664 int 665 wtab_destroy(struct wtab *w) 666 { 667 struct wtab *prev; 668 669 if (--w->w_refcount > 0) 670 return (0); 671 672 if (wtab_list == w) 673 wtab_list = w->w_next; 674 else for (prev = wtab_list; prev->w_next != NULL; prev = prev->w_next) 675 if (prev->w_next == w) { 676 prev->w_next = w->w_next; 677 break; 678 } 679 680 free(w, M_DEVBUF); 681 return (0); 682 } 683 684 int32_t 685 pow_w(struct wtab *w, int n) 686 { 687 int i, bit; 688 int32_t val; 689 690 if (n >= w->w_param_max) 691 return (0); 692 693 val = 1 << FP_SHIFT; 694 if (n <= 0) 695 return (val); 696 697 bit = 1; 698 i = 0; 699 while (n) { 700 if (n & bit) { 701 val = (val * w->w_tab[i]) >> FP_SHIFT; 702 n &= ~bit; 703 } 704 i++; 705 bit <<= 1; 706 } 707 return (val); 708 } 709 710 #ifdef ALTQ3_COMPAT 711 /* 712 * red device interface 713 */ 714 altqdev_decl(red); 715 716 int 717 redopen(dev, flag, fmt, p) 718 dev_t dev; 719 int flag, fmt; 720 #if (__FreeBSD_version > 500000) 721 struct thread *p; 722 #else 723 struct proc *p; 724 #endif 725 { 726 /* everything will be done when the queueing scheme is attached. */ 727 return 0; 728 } 729 730 int 731 redclose(dev, flag, fmt, p) 732 dev_t dev; 733 int flag, fmt; 734 #if (__FreeBSD_version > 500000) 735 struct thread *p; 736 #else 737 struct proc *p; 738 #endif 739 { 740 red_queue_t *rqp; 741 int err, error = 0; 742 743 while ((rqp = red_list) != NULL) { 744 /* destroy all */ 745 err = red_detach(rqp); 746 if (err != 0 && error == 0) 747 error = err; 748 } 749 750 return error; 751 } 752 753 int 754 redioctl(dev, cmd, addr, flag, p) 755 dev_t dev; 756 ioctlcmd_t cmd; 757 caddr_t addr; 758 int flag; 759 #if (__FreeBSD_version > 500000) 760 struct thread *p; 761 #else 762 struct proc *p; 763 #endif 764 { 765 red_queue_t *rqp; 766 struct red_interface *ifacep; 767 struct ifnet *ifp; 768 int error = 0; 769 770 /* check super-user privilege */ 771 switch (cmd) { 772 case RED_GETSTATS: 773 break; 774 default: 775 #if (__FreeBSD_version > 700000) 776 if ((error = priv_check(p, PRIV_ALTQ_MANAGE)) != 0) 777 #elsif (__FreeBSD_version > 400000) 778 if ((error = suser(p)) != 0) 779 #else 780 if ((error = suser(p->p_ucred, &p->p_acflag)) != 0) 781 #endif 782 return (error); 783 break; 784 } 785 786 switch (cmd) { 787 788 case RED_ENABLE: 789 ifacep = (struct red_interface *)addr; 790 if ((rqp = altq_lookup(ifacep->red_ifname, ALTQT_RED)) == NULL) { 791 error = EBADF; 792 break; 793 } 794 error = altq_enable(rqp->rq_ifq); 795 break; 796 797 case RED_DISABLE: 798 ifacep = (struct red_interface *)addr; 799 if ((rqp = altq_lookup(ifacep->red_ifname, ALTQT_RED)) == NULL) { 800 error = EBADF; 801 break; 802 } 803 error = altq_disable(rqp->rq_ifq); 804 break; 805 806 case RED_IF_ATTACH: 807 ifp = ifunit(((struct red_interface *)addr)->red_ifname); 808 if (ifp == NULL) { 809 error = ENXIO; 810 break; 811 } 812 813 /* allocate and initialize red_queue_t */ 814 rqp = malloc(sizeof(red_queue_t), M_DEVBUF, M_WAITOK); 815 if (rqp == NULL) { 816 error = ENOMEM; 817 break; 818 } 819 bzero(rqp, sizeof(red_queue_t)); 820 821 rqp->rq_q = malloc(sizeof(class_queue_t), 822 M_DEVBUF, M_WAITOK); 823 if (rqp->rq_q == NULL) { 824 free(rqp, M_DEVBUF); 825 error = ENOMEM; 826 break; 827 } 828 bzero(rqp->rq_q, sizeof(class_queue_t)); 829 830 rqp->rq_red = red_alloc(0, 0, 0, 0, 0, 0); 831 if (rqp->rq_red == NULL) { 832 free(rqp->rq_q, M_DEVBUF); 833 free(rqp, M_DEVBUF); 834 error = ENOMEM; 835 break; 836 } 837 838 rqp->rq_ifq = &ifp->if_snd; 839 qtail(rqp->rq_q) = NULL; 840 qlen(rqp->rq_q) = 0; 841 qlimit(rqp->rq_q) = RED_LIMIT; 842 qtype(rqp->rq_q) = Q_RED; 843 844 /* 845 * set RED to this ifnet structure. 846 */ 847 error = altq_attach(rqp->rq_ifq, ALTQT_RED, rqp, 848 red_enqueue, red_dequeue, red_request, 849 NULL, NULL); 850 if (error) { 851 red_destroy(rqp->rq_red); 852 free(rqp->rq_q, M_DEVBUF); 853 free(rqp, M_DEVBUF); 854 break; 855 } 856 857 /* add this state to the red list */ 858 rqp->rq_next = red_list; 859 red_list = rqp; 860 break; 861 862 case RED_IF_DETACH: 863 ifacep = (struct red_interface *)addr; 864 if ((rqp = altq_lookup(ifacep->red_ifname, ALTQT_RED)) == NULL) { 865 error = EBADF; 866 break; 867 } 868 error = red_detach(rqp); 869 break; 870 871 case RED_GETSTATS: 872 do { 873 struct red_stats *q_stats; 874 red_t *rp; 875 876 q_stats = (struct red_stats *)addr; 877 if ((rqp = altq_lookup(q_stats->iface.red_ifname, 878 ALTQT_RED)) == NULL) { 879 error = EBADF; 880 break; 881 } 882 883 q_stats->q_len = qlen(rqp->rq_q); 884 q_stats->q_limit = qlimit(rqp->rq_q); 885 886 rp = rqp->rq_red; 887 q_stats->q_avg = rp->red_avg >> rp->red_wshift; 888 q_stats->xmit_cnt = rp->red_stats.xmit_cnt; 889 q_stats->drop_cnt = rp->red_stats.drop_cnt; 890 q_stats->drop_forced = rp->red_stats.drop_forced; 891 q_stats->drop_unforced = rp->red_stats.drop_unforced; 892 q_stats->marked_packets = rp->red_stats.marked_packets; 893 894 q_stats->weight = rp->red_weight; 895 q_stats->inv_pmax = rp->red_inv_pmax; 896 q_stats->th_min = rp->red_thmin; 897 q_stats->th_max = rp->red_thmax; 898 899 #ifdef ALTQ_FLOWVALVE 900 if (rp->red_flowvalve != NULL) { 901 struct flowvalve *fv = rp->red_flowvalve; 902 q_stats->fv_flows = fv->fv_flows; 903 q_stats->fv_pass = fv->fv_stats.pass; 904 q_stats->fv_predrop = fv->fv_stats.predrop; 905 q_stats->fv_alloc = fv->fv_stats.alloc; 906 q_stats->fv_escape = fv->fv_stats.escape; 907 } else { 908 #endif /* ALTQ_FLOWVALVE */ 909 q_stats->fv_flows = 0; 910 q_stats->fv_pass = 0; 911 q_stats->fv_predrop = 0; 912 q_stats->fv_alloc = 0; 913 q_stats->fv_escape = 0; 914 #ifdef ALTQ_FLOWVALVE 915 } 916 #endif /* ALTQ_FLOWVALVE */ 917 } while (/*CONSTCOND*/ 0); 918 break; 919 920 case RED_CONFIG: 921 do { 922 struct red_conf *fc; 923 red_t *new; 924 int s, limit; 925 926 fc = (struct red_conf *)addr; 927 if ((rqp = altq_lookup(fc->iface.red_ifname, 928 ALTQT_RED)) == NULL) { 929 error = EBADF; 930 break; 931 } 932 new = red_alloc(fc->red_weight, 933 fc->red_inv_pmax, 934 fc->red_thmin, 935 fc->red_thmax, 936 fc->red_flags, 937 fc->red_pkttime); 938 if (new == NULL) { 939 error = ENOMEM; 940 break; 941 } 942 943 s = splnet(); 944 red_purgeq(rqp); 945 limit = fc->red_limit; 946 if (limit < fc->red_thmax) 947 limit = fc->red_thmax; 948 qlimit(rqp->rq_q) = limit; 949 fc->red_limit = limit; /* write back the new value */ 950 951 red_destroy(rqp->rq_red); 952 rqp->rq_red = new; 953 954 splx(s); 955 956 /* write back new values */ 957 fc->red_limit = limit; 958 fc->red_inv_pmax = rqp->rq_red->red_inv_pmax; 959 fc->red_thmin = rqp->rq_red->red_thmin; 960 fc->red_thmax = rqp->rq_red->red_thmax; 961 962 } while (/*CONSTCOND*/ 0); 963 break; 964 965 case RED_SETDEFAULTS: 966 do { 967 struct redparams *rp; 968 969 rp = (struct redparams *)addr; 970 971 default_th_min = rp->th_min; 972 default_th_max = rp->th_max; 973 default_inv_pmax = rp->inv_pmax; 974 } while (/*CONSTCOND*/ 0); 975 break; 976 977 default: 978 error = EINVAL; 979 break; 980 } 981 return error; 982 } 983 984 static int 985 red_detach(rqp) 986 red_queue_t *rqp; 987 { 988 red_queue_t *tmp; 989 int error = 0; 990 991 if (ALTQ_IS_ENABLED(rqp->rq_ifq)) 992 altq_disable(rqp->rq_ifq); 993 994 if ((error = altq_detach(rqp->rq_ifq))) 995 return (error); 996 997 if (red_list == rqp) 998 red_list = rqp->rq_next; 999 else { 1000 for (tmp = red_list; tmp != NULL; tmp = tmp->rq_next) 1001 if (tmp->rq_next == rqp) { 1002 tmp->rq_next = rqp->rq_next; 1003 break; 1004 } 1005 if (tmp == NULL) 1006 printf("red_detach: no state found in red_list!\n"); 1007 } 1008 1009 red_destroy(rqp->rq_red); 1010 free(rqp->rq_q, M_DEVBUF); 1011 free(rqp, M_DEVBUF); 1012 return (error); 1013 } 1014 1015 /* 1016 * enqueue routine: 1017 * 1018 * returns: 0 when successfully queued. 1019 * ENOBUFS when drop occurs. 1020 */ 1021 static int 1022 red_enqueue(ifq, m, pktattr) 1023 struct ifaltq *ifq; 1024 struct mbuf *m; 1025 struct altq_pktattr *pktattr; 1026 { 1027 red_queue_t *rqp = (red_queue_t *)ifq->altq_disc; 1028 1029 IFQ_LOCK_ASSERT(ifq); 1030 1031 if (red_addq(rqp->rq_red, rqp->rq_q, m, pktattr) < 0) 1032 return ENOBUFS; 1033 ifq->ifq_len++; 1034 return 0; 1035 } 1036 1037 /* 1038 * dequeue routine: 1039 * must be called in splimp. 1040 * 1041 * returns: mbuf dequeued. 1042 * NULL when no packet is available in the queue. 1043 */ 1044 1045 static struct mbuf * 1046 red_dequeue(ifq, op) 1047 struct ifaltq *ifq; 1048 int op; 1049 { 1050 red_queue_t *rqp = (red_queue_t *)ifq->altq_disc; 1051 struct mbuf *m; 1052 1053 IFQ_LOCK_ASSERT(ifq); 1054 1055 if (op == ALTDQ_POLL) 1056 return qhead(rqp->rq_q); 1057 1058 /* op == ALTDQ_REMOVE */ 1059 m = red_getq(rqp->rq_red, rqp->rq_q); 1060 if (m != NULL) 1061 ifq->ifq_len--; 1062 return (m); 1063 } 1064 1065 static int 1066 red_request(ifq, req, arg) 1067 struct ifaltq *ifq; 1068 int req; 1069 void *arg; 1070 { 1071 red_queue_t *rqp = (red_queue_t *)ifq->altq_disc; 1072 1073 IFQ_LOCK_ASSERT(ifq); 1074 1075 switch (req) { 1076 case ALTRQ_PURGE: 1077 red_purgeq(rqp); 1078 break; 1079 } 1080 return (0); 1081 } 1082 1083 static void 1084 red_purgeq(rqp) 1085 red_queue_t *rqp; 1086 { 1087 _flushq(rqp->rq_q); 1088 if (ALTQ_IS_ENABLED(rqp->rq_ifq)) 1089 rqp->rq_ifq->ifq_len = 0; 1090 } 1091 1092 #ifdef ALTQ_FLOWVALVE 1093 1094 #define FV_PSHIFT 7 /* weight of average drop rate -- 1/128 */ 1095 #define FV_PSCALE(x) ((x) << FV_PSHIFT) 1096 #define FV_PUNSCALE(x) ((x) >> FV_PSHIFT) 1097 #define FV_FSHIFT 5 /* weight of average fraction -- 1/32 */ 1098 #define FV_FSCALE(x) ((x) << FV_FSHIFT) 1099 #define FV_FUNSCALE(x) ((x) >> FV_FSHIFT) 1100 1101 #define FV_TIMER (3 * hz) /* timer value for garbage collector */ 1102 #define FV_FLOWLISTSIZE 64 /* how many flows in flowlist */ 1103 1104 #define FV_N 10 /* update fve_f every FV_N packets */ 1105 1106 #define FV_BACKOFFTHRESH 1 /* backoff threshold interval in second */ 1107 #define FV_TTHRESH 3 /* time threshold to delete fve */ 1108 #define FV_ALPHA 5 /* extra packet count */ 1109 1110 #define FV_STATS 1111 1112 #if (__FreeBSD_version > 300000) 1113 #define FV_TIMESTAMP(tp) getmicrotime(tp) 1114 #else 1115 #define FV_TIMESTAMP(tp) { (*(tp)) = time; } 1116 #endif 1117 1118 /* 1119 * Brtt table: 127 entry table to convert drop rate (p) to 1120 * the corresponding bandwidth fraction (f) 1121 * the following equation is implemented to use scaled values, 1122 * fve_p and fve_f, in the fixed point format. 1123 * 1124 * Brtt(p) = 1 /(sqrt(4*p/3) + min(1,3*sqrt(p*6/8)) * p * (1+32 * p*p)) 1125 * f = Brtt(p) / (max_th + alpha) 1126 */ 1127 #define BRTT_SIZE 128 1128 #define BRTT_SHIFT 12 1129 #define BRTT_MASK 0x0007f000 1130 #define BRTT_PMAX (1 << (FV_PSHIFT + FP_SHIFT)) 1131 1132 const int brtt_tab[BRTT_SIZE] = { 1133 0, 1262010, 877019, 703694, 598706, 525854, 471107, 427728, 1134 392026, 361788, 335598, 312506, 291850, 273158, 256081, 240361, 1135 225800, 212247, 199585, 187788, 178388, 169544, 161207, 153333, 1136 145888, 138841, 132165, 125836, 119834, 114141, 108739, 103612, 1137 98747, 94129, 89746, 85585, 81637, 77889, 74333, 70957, 1138 67752, 64711, 61824, 59084, 56482, 54013, 51667, 49440, 1139 47325, 45315, 43406, 41591, 39866, 38227, 36667, 35184, 1140 33773, 32430, 31151, 29933, 28774, 27668, 26615, 25611, 1141 24653, 23740, 22868, 22035, 21240, 20481, 19755, 19062, 1142 18399, 17764, 17157, 16576, 16020, 15487, 14976, 14487, 1143 14017, 13567, 13136, 12721, 12323, 11941, 11574, 11222, 1144 10883, 10557, 10243, 9942, 9652, 9372, 9103, 8844, 1145 8594, 8354, 8122, 7898, 7682, 7474, 7273, 7079, 1146 6892, 6711, 6536, 6367, 6204, 6046, 5893, 5746, 1147 5603, 5464, 5330, 5201, 5075, 4954, 4836, 4722, 1148 4611, 4504, 4400, 4299, 4201, 4106, 4014, 3924 1149 }; 1150 1151 static __inline struct fve * 1152 flowlist_lookup(fv, pktattr, now) 1153 struct flowvalve *fv; 1154 struct altq_pktattr *pktattr; 1155 struct timeval *now; 1156 { 1157 struct fve *fve; 1158 int flows; 1159 struct ip *ip; 1160 #ifdef INET6 1161 struct ip6_hdr *ip6; 1162 #endif 1163 struct timeval tthresh; 1164 1165 if (pktattr == NULL) 1166 return (NULL); 1167 1168 tthresh.tv_sec = now->tv_sec - FV_TTHRESH; 1169 flows = 0; 1170 /* 1171 * search the flow list 1172 */ 1173 switch (pktattr->pattr_af) { 1174 case AF_INET: 1175 ip = (struct ip *)pktattr->pattr_hdr; 1176 TAILQ_FOREACH(fve, &fv->fv_flowlist, fve_lru){ 1177 if (fve->fve_lastdrop.tv_sec == 0) 1178 break; 1179 if (fve->fve_lastdrop.tv_sec < tthresh.tv_sec) { 1180 fve->fve_lastdrop.tv_sec = 0; 1181 break; 1182 } 1183 if (fve->fve_flow.flow_af == AF_INET && 1184 fve->fve_flow.flow_ip.ip_src.s_addr == 1185 ip->ip_src.s_addr && 1186 fve->fve_flow.flow_ip.ip_dst.s_addr == 1187 ip->ip_dst.s_addr) 1188 return (fve); 1189 flows++; 1190 } 1191 break; 1192 #ifdef INET6 1193 case AF_INET6: 1194 ip6 = (struct ip6_hdr *)pktattr->pattr_hdr; 1195 TAILQ_FOREACH(fve, &fv->fv_flowlist, fve_lru){ 1196 if (fve->fve_lastdrop.tv_sec == 0) 1197 break; 1198 if (fve->fve_lastdrop.tv_sec < tthresh.tv_sec) { 1199 fve->fve_lastdrop.tv_sec = 0; 1200 break; 1201 } 1202 if (fve->fve_flow.flow_af == AF_INET6 && 1203 IN6_ARE_ADDR_EQUAL(&fve->fve_flow.flow_ip6.ip6_src, 1204 &ip6->ip6_src) && 1205 IN6_ARE_ADDR_EQUAL(&fve->fve_flow.flow_ip6.ip6_dst, 1206 &ip6->ip6_dst)) 1207 return (fve); 1208 flows++; 1209 } 1210 break; 1211 #endif /* INET6 */ 1212 1213 default: 1214 /* unknown protocol. no drop. */ 1215 return (NULL); 1216 } 1217 fv->fv_flows = flows; /* save the number of active fve's */ 1218 return (NULL); 1219 } 1220 1221 static __inline struct fve * 1222 flowlist_reclaim(fv, pktattr) 1223 struct flowvalve *fv; 1224 struct altq_pktattr *pktattr; 1225 { 1226 struct fve *fve; 1227 struct ip *ip; 1228 #ifdef INET6 1229 struct ip6_hdr *ip6; 1230 #endif 1231 1232 /* 1233 * get an entry from the tail of the LRU list. 1234 */ 1235 fve = TAILQ_LAST(&fv->fv_flowlist, fv_flowhead); 1236 1237 switch (pktattr->pattr_af) { 1238 case AF_INET: 1239 ip = (struct ip *)pktattr->pattr_hdr; 1240 fve->fve_flow.flow_af = AF_INET; 1241 fve->fve_flow.flow_ip.ip_src = ip->ip_src; 1242 fve->fve_flow.flow_ip.ip_dst = ip->ip_dst; 1243 break; 1244 #ifdef INET6 1245 case AF_INET6: 1246 ip6 = (struct ip6_hdr *)pktattr->pattr_hdr; 1247 fve->fve_flow.flow_af = AF_INET6; 1248 fve->fve_flow.flow_ip6.ip6_src = ip6->ip6_src; 1249 fve->fve_flow.flow_ip6.ip6_dst = ip6->ip6_dst; 1250 break; 1251 #endif 1252 } 1253 1254 fve->fve_state = Green; 1255 fve->fve_p = 0.0; 1256 fve->fve_f = 0.0; 1257 fve->fve_ifseq = fv->fv_ifseq - 1; 1258 fve->fve_count = 0; 1259 1260 fv->fv_flows++; 1261 #ifdef FV_STATS 1262 fv->fv_stats.alloc++; 1263 #endif 1264 return (fve); 1265 } 1266 1267 static __inline void 1268 flowlist_move_to_head(fv, fve) 1269 struct flowvalve *fv; 1270 struct fve *fve; 1271 { 1272 if (TAILQ_FIRST(&fv->fv_flowlist) != fve) { 1273 TAILQ_REMOVE(&fv->fv_flowlist, fve, fve_lru); 1274 TAILQ_INSERT_HEAD(&fv->fv_flowlist, fve, fve_lru); 1275 } 1276 } 1277 1278 #if 0 /* XXX: make the compiler happy (fv_alloc unused) */ 1279 /* 1280 * allocate flowvalve structure 1281 */ 1282 static struct flowvalve * 1283 fv_alloc(rp) 1284 struct red *rp; 1285 { 1286 struct flowvalve *fv; 1287 struct fve *fve; 1288 int i, num; 1289 1290 num = FV_FLOWLISTSIZE; 1291 fv = malloc(sizeof(struct flowvalve), 1292 M_DEVBUF, M_WAITOK); 1293 if (fv == NULL) 1294 return (NULL); 1295 bzero(fv, sizeof(struct flowvalve)); 1296 1297 fv->fv_fves = malloc(sizeof(struct fve) * num, 1298 M_DEVBUF, M_WAITOK); 1299 if (fv->fv_fves == NULL) { 1300 free(fv, M_DEVBUF); 1301 return (NULL); 1302 } 1303 bzero(fv->fv_fves, sizeof(struct fve) * num); 1304 1305 fv->fv_flows = 0; 1306 TAILQ_INIT(&fv->fv_flowlist); 1307 for (i = 0; i < num; i++) { 1308 fve = &fv->fv_fves[i]; 1309 fve->fve_lastdrop.tv_sec = 0; 1310 TAILQ_INSERT_TAIL(&fv->fv_flowlist, fve, fve_lru); 1311 } 1312 1313 /* initialize drop rate threshold in scaled fixed-point */ 1314 fv->fv_pthresh = (FV_PSCALE(1) << FP_SHIFT) / rp->red_inv_pmax; 1315 1316 /* initialize drop rate to fraction table */ 1317 fv->fv_p2ftab = malloc(sizeof(int) * BRTT_SIZE, 1318 M_DEVBUF, M_WAITOK); 1319 if (fv->fv_p2ftab == NULL) { 1320 free(fv->fv_fves, M_DEVBUF); 1321 free(fv, M_DEVBUF); 1322 return (NULL); 1323 } 1324 /* 1325 * create the p2f table. 1326 * (shift is used to keep the precision) 1327 */ 1328 for (i = 1; i < BRTT_SIZE; i++) { 1329 int f; 1330 1331 f = brtt_tab[i] << 8; 1332 fv->fv_p2ftab[i] = (f / (rp->red_thmax + FV_ALPHA)) >> 8; 1333 } 1334 1335 return (fv); 1336 } 1337 #endif 1338 1339 static void fv_destroy(fv) 1340 struct flowvalve *fv; 1341 { 1342 free(fv->fv_p2ftab, M_DEVBUF); 1343 free(fv->fv_fves, M_DEVBUF); 1344 free(fv, M_DEVBUF); 1345 } 1346 1347 static __inline int 1348 fv_p2f(fv, p) 1349 struct flowvalve *fv; 1350 int p; 1351 { 1352 int val, f; 1353 1354 if (p >= BRTT_PMAX) 1355 f = fv->fv_p2ftab[BRTT_SIZE-1]; 1356 else if ((val = (p & BRTT_MASK))) 1357 f = fv->fv_p2ftab[(val >> BRTT_SHIFT)]; 1358 else 1359 f = fv->fv_p2ftab[1]; 1360 return (f); 1361 } 1362 1363 /* 1364 * check if an arriving packet should be pre-dropped. 1365 * called from red_addq() when a packet arrives. 1366 * returns 1 when the packet should be pre-dropped. 1367 * should be called in splimp. 1368 */ 1369 static int 1370 fv_checkflow(fv, pktattr, fcache) 1371 struct flowvalve *fv; 1372 struct altq_pktattr *pktattr; 1373 struct fve **fcache; 1374 { 1375 struct fve *fve; 1376 struct timeval now; 1377 1378 fv->fv_ifseq++; 1379 FV_TIMESTAMP(&now); 1380 1381 if ((fve = flowlist_lookup(fv, pktattr, &now)) == NULL) 1382 /* no matching entry in the flowlist */ 1383 return (0); 1384 1385 *fcache = fve; 1386 1387 /* update fraction f for every FV_N packets */ 1388 if (++fve->fve_count == FV_N) { 1389 /* 1390 * f = Wf * N / (fv_ifseq - fve_ifseq) + (1 - Wf) * f 1391 */ 1392 fve->fve_f = 1393 (FV_N << FP_SHIFT) / (fv->fv_ifseq - fve->fve_ifseq) 1394 + fve->fve_f - FV_FUNSCALE(fve->fve_f); 1395 fve->fve_ifseq = fv->fv_ifseq; 1396 fve->fve_count = 0; 1397 } 1398 1399 /* 1400 * overpumping test 1401 */ 1402 if (fve->fve_state == Green && fve->fve_p > fv->fv_pthresh) { 1403 int fthresh; 1404 1405 /* calculate a threshold */ 1406 fthresh = fv_p2f(fv, fve->fve_p); 1407 if (fve->fve_f > fthresh) 1408 fve->fve_state = Red; 1409 } 1410 1411 if (fve->fve_state == Red) { 1412 /* 1413 * backoff test 1414 */ 1415 if (now.tv_sec - fve->fve_lastdrop.tv_sec > FV_BACKOFFTHRESH) { 1416 /* no drop for at least FV_BACKOFFTHRESH sec */ 1417 fve->fve_p = 0; 1418 fve->fve_state = Green; 1419 #ifdef FV_STATS 1420 fv->fv_stats.escape++; 1421 #endif 1422 } else { 1423 /* block this flow */ 1424 flowlist_move_to_head(fv, fve); 1425 fve->fve_lastdrop = now; 1426 #ifdef FV_STATS 1427 fv->fv_stats.predrop++; 1428 #endif 1429 return (1); 1430 } 1431 } 1432 1433 /* 1434 * p = (1 - Wp) * p 1435 */ 1436 fve->fve_p -= FV_PUNSCALE(fve->fve_p); 1437 if (fve->fve_p < 0) 1438 fve->fve_p = 0; 1439 #ifdef FV_STATS 1440 fv->fv_stats.pass++; 1441 #endif 1442 return (0); 1443 } 1444 1445 /* 1446 * called from red_addq when a packet is dropped by red. 1447 * should be called in splimp. 1448 */ 1449 static void fv_dropbyred(fv, pktattr, fcache) 1450 struct flowvalve *fv; 1451 struct altq_pktattr *pktattr; 1452 struct fve *fcache; 1453 { 1454 struct fve *fve; 1455 struct timeval now; 1456 1457 if (pktattr == NULL) 1458 return; 1459 FV_TIMESTAMP(&now); 1460 1461 if (fcache != NULL) 1462 /* the fve of this packet is already cached */ 1463 fve = fcache; 1464 else if ((fve = flowlist_lookup(fv, pktattr, &now)) == NULL) 1465 fve = flowlist_reclaim(fv, pktattr); 1466 1467 flowlist_move_to_head(fv, fve); 1468 1469 /* 1470 * update p: the following line cancels the update 1471 * in fv_checkflow() and calculate 1472 * p = Wp + (1 - Wp) * p 1473 */ 1474 fve->fve_p = (1 << FP_SHIFT) + fve->fve_p; 1475 1476 fve->fve_lastdrop = now; 1477 } 1478 1479 #endif /* ALTQ_FLOWVALVE */ 1480 1481 #ifdef KLD_MODULE 1482 1483 static struct altqsw red_sw = 1484 {"red", redopen, redclose, redioctl}; 1485 1486 ALTQ_MODULE(altq_red, ALTQT_RED, &red_sw); 1487 MODULE_VERSION(altq_red, 1); 1488 1489 #endif /* KLD_MODULE */ 1490 #endif /* ALTQ3_COMPAT */ 1491 1492 #endif /* ALTQ_RED */ 1493