1 /*- 2 * Copyright (c) 1991-1997 Regents of the University of California. 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 3. All advertising materials mentioning features or use of this software 14 * must display the following acknowledgement: 15 * This product includes software developed by the Network Research 16 * Group at Lawrence Berkeley Laboratory. 17 * 4. Neither the name of the University nor of the Laboratory may be used 18 * to endorse or promote products derived from this software without 19 * specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 * SUCH DAMAGE. 32 * 33 * LBL code modified by speer@eng.sun.com, May 1977. 34 * For questions and/or comments, please send mail to cbq@ee.lbl.gov 35 * 36 * @(#)rm_class.c 1.48 97/12/05 SMI 37 * $KAME: altq_rmclass.c,v 1.19 2005/04/13 03:44:25 suz Exp $ 38 * $FreeBSD$ 39 */ 40 #include "opt_altq.h" 41 #include "opt_inet.h" 42 #include "opt_inet6.h" 43 #ifdef ALTQ_CBQ /* cbq is enabled by ALTQ_CBQ option in opt_altq.h */ 44 45 #include <sys/param.h> 46 #include <sys/malloc.h> 47 #include <sys/mbuf.h> 48 #include <sys/socket.h> 49 #include <sys/systm.h> 50 #include <sys/errno.h> 51 #include <sys/time.h> 52 #ifdef ALTQ3_COMPAT 53 #include <sys/kernel.h> 54 #endif 55 56 #include <net/if.h> 57 #include <net/if_var.h> 58 #ifdef ALTQ3_COMPAT 59 #include <netinet/in.h> 60 #include <netinet/in_systm.h> 61 #include <netinet/ip.h> 62 #endif 63 64 #include <net/altq/if_altq.h> 65 #include <net/altq/altq.h> 66 #include <net/altq/altq_rmclass.h> 67 #include <net/altq/altq_rmclass_debug.h> 68 #include <net/altq/altq_red.h> 69 #include <net/altq/altq_rio.h> 70 71 /* 72 * Local Macros 73 */ 74 75 #define reset_cutoff(ifd) { ifd->cutoff_ = RM_MAXDEPTH; } 76 77 /* 78 * Local routines. 79 */ 80 81 static int rmc_satisfied(struct rm_class *, struct timeval *); 82 static void rmc_wrr_set_weights(struct rm_ifdat *); 83 static void rmc_depth_compute(struct rm_class *); 84 static void rmc_depth_recompute(rm_class_t *); 85 86 static mbuf_t *_rmc_wrr_dequeue_next(struct rm_ifdat *, int); 87 static mbuf_t *_rmc_prr_dequeue_next(struct rm_ifdat *, int); 88 89 static int _rmc_addq(rm_class_t *, mbuf_t *); 90 static void _rmc_dropq(rm_class_t *); 91 static mbuf_t *_rmc_getq(rm_class_t *); 92 static mbuf_t *_rmc_pollq(rm_class_t *); 93 94 static int rmc_under_limit(struct rm_class *, struct timeval *); 95 static void rmc_tl_satisfied(struct rm_ifdat *, struct timeval *); 96 static void rmc_drop_action(struct rm_class *); 97 static void rmc_restart(struct rm_class *); 98 static void rmc_root_overlimit(struct rm_class *, struct rm_class *); 99 100 #define BORROW_OFFTIME 101 /* 102 * BORROW_OFFTIME (experimental): 103 * borrow the offtime of the class borrowing from. 104 * the reason is that when its own offtime is set, the class is unable 105 * to borrow much, especially when cutoff is taking effect. 106 * but when the borrowed class is overloaded (advidle is close to minidle), 107 * use the borrowing class's offtime to avoid overload. 108 */ 109 #define ADJUST_CUTOFF 110 /* 111 * ADJUST_CUTOFF (experimental): 112 * if no underlimit class is found due to cutoff, increase cutoff and 113 * retry the scheduling loop. 114 * also, don't invoke delay_actions while cutoff is taking effect, 115 * since a sleeping class won't have a chance to be scheduled in the 116 * next loop. 117 * 118 * now heuristics for setting the top-level variable (cutoff_) becomes: 119 * 1. if a packet arrives for a not-overlimit class, set cutoff 120 * to the depth of the class. 121 * 2. if cutoff is i, and a packet arrives for an overlimit class 122 * with an underlimit ancestor at a lower level than i (say j), 123 * then set cutoff to j. 124 * 3. at scheduling a packet, if there is no underlimit class 125 * due to the current cutoff level, increase cutoff by 1 and 126 * then try to schedule again. 127 */ 128 129 /* 130 * rm_class_t * 131 * rmc_newclass(...) - Create a new resource management class at priority 132 * 'pri' on the interface given by 'ifd'. 133 * 134 * nsecPerByte is the data rate of the interface in nanoseconds/byte. 135 * E.g., 800 for a 10Mb/s ethernet. If the class gets less 136 * than 100% of the bandwidth, this number should be the 137 * 'effective' rate for the class. Let f be the 138 * bandwidth fraction allocated to this class, and let 139 * nsPerByte be the data rate of the output link in 140 * nanoseconds/byte. Then nsecPerByte is set to 141 * nsPerByte / f. E.g., 1600 (= 800 / .5) 142 * for a class that gets 50% of an ethernet's bandwidth. 143 * 144 * action the routine to call when the class is over limit. 145 * 146 * maxq max allowable queue size for class (in packets). 147 * 148 * parent parent class pointer. 149 * 150 * borrow class to borrow from (should be either 'parent' or null). 151 * 152 * maxidle max value allowed for class 'idle' time estimate (this 153 * parameter determines how large an initial burst of packets 154 * can be before overlimit action is invoked. 155 * 156 * offtime how long 'delay' action will delay when class goes over 157 * limit (this parameter determines the steady-state burst 158 * size when a class is running over its limit). 159 * 160 * Maxidle and offtime have to be computed from the following: If the 161 * average packet size is s, the bandwidth fraction allocated to this 162 * class is f, we want to allow b packet bursts, and the gain of the 163 * averaging filter is g (= 1 - 2^(-RM_FILTER_GAIN)), then: 164 * 165 * ptime = s * nsPerByte * (1 - f) / f 166 * maxidle = ptime * (1 - g^b) / g^b 167 * minidle = -ptime * (1 / (f - 1)) 168 * offtime = ptime * (1 + 1/(1 - g) * (1 - g^(b - 1)) / g^(b - 1) 169 * 170 * Operationally, it's convenient to specify maxidle & offtime in units 171 * independent of the link bandwidth so the maxidle & offtime passed to 172 * this routine are the above values multiplied by 8*f/(1000*nsPerByte). 173 * (The constant factor is a scale factor needed to make the parameters 174 * integers. This scaling also means that the 'unscaled' values of 175 * maxidle*nsecPerByte/8 and offtime*nsecPerByte/8 will be in microseconds, 176 * not nanoseconds.) Also note that the 'idle' filter computation keeps 177 * an estimate scaled upward by 2^RM_FILTER_GAIN so the passed value of 178 * maxidle also must be scaled upward by this value. Thus, the passed 179 * values for maxidle and offtime can be computed as follows: 180 * 181 * maxidle = maxidle * 2^RM_FILTER_GAIN * 8 / (1000 * nsecPerByte) 182 * offtime = offtime * 8 / (1000 * nsecPerByte) 183 * 184 * When USE_HRTIME is employed, then maxidle and offtime become: 185 * maxidle = maxilde * (8.0 / nsecPerByte); 186 * offtime = offtime * (8.0 / nsecPerByte); 187 */ 188 struct rm_class * 189 rmc_newclass(int pri, struct rm_ifdat *ifd, u_int nsecPerByte, 190 void (*action)(rm_class_t *, rm_class_t *), int maxq, 191 struct rm_class *parent, struct rm_class *borrow, u_int maxidle, 192 int minidle, u_int offtime, int pktsize, int flags) 193 { 194 struct rm_class *cl; 195 struct rm_class *peer; 196 int s; 197 198 if (pri >= RM_MAXPRIO) 199 return (NULL); 200 #ifndef ALTQ_RED 201 if (flags & RMCF_RED) { 202 #ifdef ALTQ_DEBUG 203 printf("rmc_newclass: RED not configured for CBQ!\n"); 204 #endif 205 return (NULL); 206 } 207 #endif 208 #ifndef ALTQ_RIO 209 if (flags & RMCF_RIO) { 210 #ifdef ALTQ_DEBUG 211 printf("rmc_newclass: RIO not configured for CBQ!\n"); 212 #endif 213 return (NULL); 214 } 215 #endif 216 217 cl = malloc(sizeof(struct rm_class), M_DEVBUF, M_NOWAIT | M_ZERO); 218 if (cl == NULL) 219 return (NULL); 220 CALLOUT_INIT(&cl->callout_); 221 cl->q_ = malloc(sizeof(class_queue_t), M_DEVBUF, M_NOWAIT | M_ZERO); 222 if (cl->q_ == NULL) { 223 free(cl, M_DEVBUF); 224 return (NULL); 225 } 226 227 /* 228 * Class initialization. 229 */ 230 cl->children_ = NULL; 231 cl->parent_ = parent; 232 cl->borrow_ = borrow; 233 cl->leaf_ = 1; 234 cl->ifdat_ = ifd; 235 cl->pri_ = pri; 236 cl->allotment_ = RM_NS_PER_SEC / nsecPerByte; /* Bytes per sec */ 237 cl->depth_ = 0; 238 cl->qthresh_ = 0; 239 cl->ns_per_byte_ = nsecPerByte; 240 241 qlimit(cl->q_) = maxq; 242 qtype(cl->q_) = Q_DROPHEAD; 243 qlen(cl->q_) = 0; 244 cl->flags_ = flags; 245 246 #if 1 /* minidle is also scaled in ALTQ */ 247 cl->minidle_ = (minidle * (int)nsecPerByte) / 8; 248 if (cl->minidle_ > 0) 249 cl->minidle_ = 0; 250 #else 251 cl->minidle_ = minidle; 252 #endif 253 cl->maxidle_ = (maxidle * nsecPerByte) / 8; 254 if (cl->maxidle_ == 0) 255 cl->maxidle_ = 1; 256 #if 1 /* offtime is also scaled in ALTQ */ 257 cl->avgidle_ = cl->maxidle_; 258 cl->offtime_ = ((offtime * nsecPerByte) / 8) >> RM_FILTER_GAIN; 259 if (cl->offtime_ == 0) 260 cl->offtime_ = 1; 261 #else 262 cl->avgidle_ = 0; 263 cl->offtime_ = (offtime * nsecPerByte) / 8; 264 #endif 265 cl->overlimit = action; 266 267 #ifdef ALTQ_RED 268 if (flags & (RMCF_RED|RMCF_RIO)) { 269 int red_flags, red_pkttime; 270 271 red_flags = 0; 272 if (flags & RMCF_ECN) 273 red_flags |= REDF_ECN; 274 if (flags & RMCF_FLOWVALVE) 275 red_flags |= REDF_FLOWVALVE; 276 #ifdef ALTQ_RIO 277 if (flags & RMCF_CLEARDSCP) 278 red_flags |= RIOF_CLEARDSCP; 279 #endif 280 red_pkttime = nsecPerByte * pktsize / 1000; 281 282 if (flags & RMCF_RED) { 283 cl->red_ = red_alloc(0, 0, 284 qlimit(cl->q_) * 10/100, 285 qlimit(cl->q_) * 30/100, 286 red_flags, red_pkttime); 287 if (cl->red_ != NULL) 288 qtype(cl->q_) = Q_RED; 289 } 290 #ifdef ALTQ_RIO 291 else { 292 cl->red_ = (red_t *)rio_alloc(0, NULL, 293 red_flags, red_pkttime); 294 if (cl->red_ != NULL) 295 qtype(cl->q_) = Q_RIO; 296 } 297 #endif 298 } 299 #endif /* ALTQ_RED */ 300 301 /* 302 * put the class into the class tree 303 */ 304 s = splnet(); 305 IFQ_LOCK(ifd->ifq_); 306 if ((peer = ifd->active_[pri]) != NULL) { 307 /* find the last class at this pri */ 308 cl->peer_ = peer; 309 while (peer->peer_ != ifd->active_[pri]) 310 peer = peer->peer_; 311 peer->peer_ = cl; 312 } else { 313 ifd->active_[pri] = cl; 314 cl->peer_ = cl; 315 } 316 317 if (cl->parent_) { 318 cl->next_ = parent->children_; 319 parent->children_ = cl; 320 parent->leaf_ = 0; 321 } 322 323 /* 324 * Compute the depth of this class and its ancestors in the class 325 * hierarchy. 326 */ 327 rmc_depth_compute(cl); 328 329 /* 330 * If CBQ's WRR is enabled, then initialize the class WRR state. 331 */ 332 if (ifd->wrr_) { 333 ifd->num_[pri]++; 334 ifd->alloc_[pri] += cl->allotment_; 335 rmc_wrr_set_weights(ifd); 336 } 337 IFQ_UNLOCK(ifd->ifq_); 338 splx(s); 339 return (cl); 340 } 341 342 int 343 rmc_modclass(struct rm_class *cl, u_int nsecPerByte, int maxq, u_int maxidle, 344 int minidle, u_int offtime, int pktsize) 345 { 346 struct rm_ifdat *ifd; 347 u_int old_allotment; 348 int s; 349 350 ifd = cl->ifdat_; 351 old_allotment = cl->allotment_; 352 353 s = splnet(); 354 IFQ_LOCK(ifd->ifq_); 355 cl->allotment_ = RM_NS_PER_SEC / nsecPerByte; /* Bytes per sec */ 356 cl->qthresh_ = 0; 357 cl->ns_per_byte_ = nsecPerByte; 358 359 qlimit(cl->q_) = maxq; 360 361 #if 1 /* minidle is also scaled in ALTQ */ 362 cl->minidle_ = (minidle * nsecPerByte) / 8; 363 if (cl->minidle_ > 0) 364 cl->minidle_ = 0; 365 #else 366 cl->minidle_ = minidle; 367 #endif 368 cl->maxidle_ = (maxidle * nsecPerByte) / 8; 369 if (cl->maxidle_ == 0) 370 cl->maxidle_ = 1; 371 #if 1 /* offtime is also scaled in ALTQ */ 372 cl->avgidle_ = cl->maxidle_; 373 cl->offtime_ = ((offtime * nsecPerByte) / 8) >> RM_FILTER_GAIN; 374 if (cl->offtime_ == 0) 375 cl->offtime_ = 1; 376 #else 377 cl->avgidle_ = 0; 378 cl->offtime_ = (offtime * nsecPerByte) / 8; 379 #endif 380 381 /* 382 * If CBQ's WRR is enabled, then initialize the class WRR state. 383 */ 384 if (ifd->wrr_) { 385 ifd->alloc_[cl->pri_] += cl->allotment_ - old_allotment; 386 rmc_wrr_set_weights(ifd); 387 } 388 IFQ_UNLOCK(ifd->ifq_); 389 splx(s); 390 return (0); 391 } 392 393 /* 394 * static void 395 * rmc_wrr_set_weights(struct rm_ifdat *ifdat) - This function computes 396 * the appropriate run robin weights for the CBQ weighted round robin 397 * algorithm. 398 * 399 * Returns: NONE 400 */ 401 402 static void 403 rmc_wrr_set_weights(struct rm_ifdat *ifd) 404 { 405 int i; 406 struct rm_class *cl, *clh; 407 408 for (i = 0; i < RM_MAXPRIO; i++) { 409 /* 410 * This is inverted from that of the simulator to 411 * maintain precision. 412 */ 413 if (ifd->num_[i] == 0) 414 ifd->M_[i] = 0; 415 else 416 ifd->M_[i] = ifd->alloc_[i] / 417 (ifd->num_[i] * ifd->maxpkt_); 418 /* 419 * Compute the weighted allotment for each class. 420 * This takes the expensive div instruction out 421 * of the main loop for the wrr scheduling path. 422 * These only get recomputed when a class comes or 423 * goes. 424 */ 425 if (ifd->active_[i] != NULL) { 426 clh = cl = ifd->active_[i]; 427 do { 428 /* safe-guard for slow link or alloc_ == 0 */ 429 if (ifd->M_[i] == 0) 430 cl->w_allotment_ = 0; 431 else 432 cl->w_allotment_ = cl->allotment_ / 433 ifd->M_[i]; 434 cl = cl->peer_; 435 } while ((cl != NULL) && (cl != clh)); 436 } 437 } 438 } 439 440 int 441 rmc_get_weight(struct rm_ifdat *ifd, int pri) 442 { 443 if ((pri >= 0) && (pri < RM_MAXPRIO)) 444 return (ifd->M_[pri]); 445 else 446 return (0); 447 } 448 449 /* 450 * static void 451 * rmc_depth_compute(struct rm_class *cl) - This function computes the 452 * appropriate depth of class 'cl' and its ancestors. 453 * 454 * Returns: NONE 455 */ 456 457 static void 458 rmc_depth_compute(struct rm_class *cl) 459 { 460 rm_class_t *t = cl, *p; 461 462 /* 463 * Recompute the depth for the branch of the tree. 464 */ 465 while (t != NULL) { 466 p = t->parent_; 467 if (p && (t->depth_ >= p->depth_)) { 468 p->depth_ = t->depth_ + 1; 469 t = p; 470 } else 471 t = NULL; 472 } 473 } 474 475 /* 476 * static void 477 * rmc_depth_recompute(struct rm_class *cl) - This function re-computes 478 * the depth of the tree after a class has been deleted. 479 * 480 * Returns: NONE 481 */ 482 483 static void 484 rmc_depth_recompute(rm_class_t *cl) 485 { 486 #if 1 /* ALTQ */ 487 rm_class_t *p, *t; 488 489 p = cl; 490 while (p != NULL) { 491 if ((t = p->children_) == NULL) { 492 p->depth_ = 0; 493 } else { 494 int cdepth = 0; 495 496 while (t != NULL) { 497 if (t->depth_ > cdepth) 498 cdepth = t->depth_; 499 t = t->next_; 500 } 501 502 if (p->depth_ == cdepth + 1) 503 /* no change to this parent */ 504 return; 505 506 p->depth_ = cdepth + 1; 507 } 508 509 p = p->parent_; 510 } 511 #else 512 rm_class_t *t; 513 514 if (cl->depth_ >= 1) { 515 if (cl->children_ == NULL) { 516 cl->depth_ = 0; 517 } else if ((t = cl->children_) != NULL) { 518 while (t != NULL) { 519 if (t->children_ != NULL) 520 rmc_depth_recompute(t); 521 t = t->next_; 522 } 523 } else 524 rmc_depth_compute(cl); 525 } 526 #endif 527 } 528 529 /* 530 * void 531 * rmc_delete_class(struct rm_ifdat *ifdat, struct rm_class *cl) - This 532 * function deletes a class from the link-sharing structure and frees 533 * all resources associated with the class. 534 * 535 * Returns: NONE 536 */ 537 538 void 539 rmc_delete_class(struct rm_ifdat *ifd, struct rm_class *cl) 540 { 541 struct rm_class *p, *head, *previous; 542 int s; 543 544 ASSERT(cl->children_ == NULL); 545 546 if (cl->sleeping_) 547 CALLOUT_STOP(&cl->callout_); 548 549 s = splnet(); 550 IFQ_LOCK(ifd->ifq_); 551 /* 552 * Free packets in the packet queue. 553 * XXX - this may not be a desired behavior. Packets should be 554 * re-queued. 555 */ 556 rmc_dropall(cl); 557 558 /* 559 * If the class has a parent, then remove the class from the 560 * class from the parent's children chain. 561 */ 562 if (cl->parent_ != NULL) { 563 head = cl->parent_->children_; 564 p = previous = head; 565 if (head->next_ == NULL) { 566 ASSERT(head == cl); 567 cl->parent_->children_ = NULL; 568 cl->parent_->leaf_ = 1; 569 } else while (p != NULL) { 570 if (p == cl) { 571 if (cl == head) 572 cl->parent_->children_ = cl->next_; 573 else 574 previous->next_ = cl->next_; 575 cl->next_ = NULL; 576 p = NULL; 577 } else { 578 previous = p; 579 p = p->next_; 580 } 581 } 582 } 583 584 /* 585 * Delete class from class priority peer list. 586 */ 587 if ((p = ifd->active_[cl->pri_]) != NULL) { 588 /* 589 * If there is more than one member of this priority 590 * level, then look for class(cl) in the priority level. 591 */ 592 if (p != p->peer_) { 593 while (p->peer_ != cl) 594 p = p->peer_; 595 p->peer_ = cl->peer_; 596 597 if (ifd->active_[cl->pri_] == cl) 598 ifd->active_[cl->pri_] = cl->peer_; 599 } else { 600 ASSERT(p == cl); 601 ifd->active_[cl->pri_] = NULL; 602 } 603 } 604 605 /* 606 * Recompute the WRR weights. 607 */ 608 if (ifd->wrr_) { 609 ifd->alloc_[cl->pri_] -= cl->allotment_; 610 ifd->num_[cl->pri_]--; 611 rmc_wrr_set_weights(ifd); 612 } 613 614 /* 615 * Re-compute the depth of the tree. 616 */ 617 #if 1 /* ALTQ */ 618 rmc_depth_recompute(cl->parent_); 619 #else 620 rmc_depth_recompute(ifd->root_); 621 #endif 622 623 IFQ_UNLOCK(ifd->ifq_); 624 splx(s); 625 626 /* 627 * Free the class structure. 628 */ 629 if (cl->red_ != NULL) { 630 #ifdef ALTQ_RIO 631 if (q_is_rio(cl->q_)) 632 rio_destroy((rio_t *)cl->red_); 633 #endif 634 #ifdef ALTQ_RED 635 if (q_is_red(cl->q_)) 636 red_destroy(cl->red_); 637 #endif 638 } 639 free(cl->q_, M_DEVBUF); 640 free(cl, M_DEVBUF); 641 } 642 643 644 /* 645 * void 646 * rmc_init(...) - Initialize the resource management data structures 647 * associated with the output portion of interface 'ifp'. 'ifd' is 648 * where the structures will be built (for backwards compatibility, the 649 * structures aren't kept in the ifnet struct). 'nsecPerByte' 650 * gives the link speed (inverse of bandwidth) in nanoseconds/byte. 651 * 'restart' is the driver-specific routine that the generic 'delay 652 * until under limit' action will call to restart output. `maxq' 653 * is the queue size of the 'link' & 'default' classes. 'maxqueued' 654 * is the maximum number of packets that the resource management 655 * code will allow to be queued 'downstream' (this is typically 1). 656 * 657 * Returns: NONE 658 */ 659 660 void 661 rmc_init(struct ifaltq *ifq, struct rm_ifdat *ifd, u_int nsecPerByte, 662 void (*restart)(struct ifaltq *), int maxq, int maxqueued, u_int maxidle, 663 int minidle, u_int offtime, int flags) 664 { 665 int i, mtu; 666 667 /* 668 * Initialize the CBQ tracing/debug facility. 669 */ 670 CBQTRACEINIT(); 671 672 bzero((char *)ifd, sizeof (*ifd)); 673 mtu = ifq->altq_ifp->if_mtu; 674 ifd->ifq_ = ifq; 675 ifd->restart = restart; 676 ifd->maxqueued_ = maxqueued; 677 ifd->ns_per_byte_ = nsecPerByte; 678 ifd->maxpkt_ = mtu; 679 ifd->wrr_ = (flags & RMCF_WRR) ? 1 : 0; 680 ifd->efficient_ = (flags & RMCF_EFFICIENT) ? 1 : 0; 681 #if 1 682 ifd->maxiftime_ = mtu * nsecPerByte / 1000 * 16; 683 if (mtu * nsecPerByte > 10 * 1000000) 684 ifd->maxiftime_ /= 4; 685 #endif 686 687 reset_cutoff(ifd); 688 CBQTRACE(rmc_init, 'INIT', ifd->cutoff_); 689 690 /* 691 * Initialize the CBQ's WRR state. 692 */ 693 for (i = 0; i < RM_MAXPRIO; i++) { 694 ifd->alloc_[i] = 0; 695 ifd->M_[i] = 0; 696 ifd->num_[i] = 0; 697 ifd->na_[i] = 0; 698 ifd->active_[i] = NULL; 699 } 700 701 /* 702 * Initialize current packet state. 703 */ 704 ifd->qi_ = 0; 705 ifd->qo_ = 0; 706 for (i = 0; i < RM_MAXQUEUED; i++) { 707 ifd->class_[i] = NULL; 708 ifd->curlen_[i] = 0; 709 ifd->borrowed_[i] = NULL; 710 } 711 712 /* 713 * Create the root class of the link-sharing structure. 714 */ 715 if ((ifd->root_ = rmc_newclass(0, ifd, 716 nsecPerByte, 717 rmc_root_overlimit, maxq, 0, 0, 718 maxidle, minidle, offtime, 719 0, 0)) == NULL) { 720 printf("rmc_init: root class not allocated\n"); 721 return ; 722 } 723 ifd->root_->depth_ = 0; 724 } 725 726 /* 727 * void 728 * rmc_queue_packet(struct rm_class *cl, mbuf_t *m) - Add packet given by 729 * mbuf 'm' to queue for resource class 'cl'. This routine is called 730 * by a driver's if_output routine. This routine must be called with 731 * output packet completion interrupts locked out (to avoid racing with 732 * rmc_dequeue_next). 733 * 734 * Returns: 0 on successful queueing 735 * -1 when packet drop occurs 736 */ 737 int 738 rmc_queue_packet(struct rm_class *cl, mbuf_t *m) 739 { 740 struct timeval now; 741 struct rm_ifdat *ifd = cl->ifdat_; 742 int cpri = cl->pri_; 743 int is_empty = qempty(cl->q_); 744 745 RM_GETTIME(now); 746 if (ifd->cutoff_ > 0) { 747 if (TV_LT(&cl->undertime_, &now)) { 748 if (ifd->cutoff_ > cl->depth_) 749 ifd->cutoff_ = cl->depth_; 750 CBQTRACE(rmc_queue_packet, 'ffoc', cl->depth_); 751 } 752 #if 1 /* ALTQ */ 753 else { 754 /* 755 * the class is overlimit. if the class has 756 * underlimit ancestors, set cutoff to the lowest 757 * depth among them. 758 */ 759 struct rm_class *borrow = cl->borrow_; 760 761 while (borrow != NULL && 762 borrow->depth_ < ifd->cutoff_) { 763 if (TV_LT(&borrow->undertime_, &now)) { 764 ifd->cutoff_ = borrow->depth_; 765 CBQTRACE(rmc_queue_packet, 'ffob', ifd->cutoff_); 766 break; 767 } 768 borrow = borrow->borrow_; 769 } 770 } 771 #else /* !ALTQ */ 772 else if ((ifd->cutoff_ > 1) && cl->borrow_) { 773 if (TV_LT(&cl->borrow_->undertime_, &now)) { 774 ifd->cutoff_ = cl->borrow_->depth_; 775 CBQTRACE(rmc_queue_packet, 'ffob', 776 cl->borrow_->depth_); 777 } 778 } 779 #endif /* !ALTQ */ 780 } 781 782 if (_rmc_addq(cl, m) < 0) 783 /* failed */ 784 return (-1); 785 786 if (is_empty) { 787 CBQTRACE(rmc_queue_packet, 'ytpe', cl->stats_.handle); 788 ifd->na_[cpri]++; 789 } 790 791 if (qlen(cl->q_) > qlimit(cl->q_)) { 792 /* note: qlimit can be set to 0 or 1 */ 793 rmc_drop_action(cl); 794 return (-1); 795 } 796 return (0); 797 } 798 799 /* 800 * void 801 * rmc_tl_satisfied(struct rm_ifdat *ifd, struct timeval *now) - Check all 802 * classes to see if there are satified. 803 */ 804 805 static void 806 rmc_tl_satisfied(struct rm_ifdat *ifd, struct timeval *now) 807 { 808 int i; 809 rm_class_t *p, *bp; 810 811 for (i = RM_MAXPRIO - 1; i >= 0; i--) { 812 if ((bp = ifd->active_[i]) != NULL) { 813 p = bp; 814 do { 815 if (!rmc_satisfied(p, now)) { 816 ifd->cutoff_ = p->depth_; 817 return; 818 } 819 p = p->peer_; 820 } while (p != bp); 821 } 822 } 823 824 reset_cutoff(ifd); 825 } 826 827 /* 828 * rmc_satisfied - Return 1 of the class is satisfied. O, otherwise. 829 */ 830 831 static int 832 rmc_satisfied(struct rm_class *cl, struct timeval *now) 833 { 834 rm_class_t *p; 835 836 if (cl == NULL) 837 return (1); 838 if (TV_LT(now, &cl->undertime_)) 839 return (1); 840 if (cl->depth_ == 0) { 841 if (!cl->sleeping_ && (qlen(cl->q_) > cl->qthresh_)) 842 return (0); 843 else 844 return (1); 845 } 846 if (cl->children_ != NULL) { 847 p = cl->children_; 848 while (p != NULL) { 849 if (!rmc_satisfied(p, now)) 850 return (0); 851 p = p->next_; 852 } 853 } 854 855 return (1); 856 } 857 858 /* 859 * Return 1 if class 'cl' is under limit or can borrow from a parent, 860 * 0 if overlimit. As a side-effect, this routine will invoke the 861 * class overlimit action if the class if overlimit. 862 */ 863 864 static int 865 rmc_under_limit(struct rm_class *cl, struct timeval *now) 866 { 867 rm_class_t *p = cl; 868 rm_class_t *top; 869 struct rm_ifdat *ifd = cl->ifdat_; 870 871 ifd->borrowed_[ifd->qi_] = NULL; 872 /* 873 * If cl is the root class, then always return that it is 874 * underlimit. Otherwise, check to see if the class is underlimit. 875 */ 876 if (cl->parent_ == NULL) 877 return (1); 878 879 if (cl->sleeping_) { 880 if (TV_LT(now, &cl->undertime_)) 881 return (0); 882 883 CALLOUT_STOP(&cl->callout_); 884 cl->sleeping_ = 0; 885 cl->undertime_.tv_sec = 0; 886 return (1); 887 } 888 889 top = NULL; 890 while (cl->undertime_.tv_sec && TV_LT(now, &cl->undertime_)) { 891 if (((cl = cl->borrow_) == NULL) || 892 (cl->depth_ > ifd->cutoff_)) { 893 #ifdef ADJUST_CUTOFF 894 if (cl != NULL) 895 /* cutoff is taking effect, just 896 return false without calling 897 the delay action. */ 898 return (0); 899 #endif 900 #ifdef BORROW_OFFTIME 901 /* 902 * check if the class can borrow offtime too. 903 * borrow offtime from the top of the borrow 904 * chain if the top class is not overloaded. 905 */ 906 if (cl != NULL) { 907 /* cutoff is taking effect, use this class as top. */ 908 top = cl; 909 CBQTRACE(rmc_under_limit, 'ffou', ifd->cutoff_); 910 } 911 if (top != NULL && top->avgidle_ == top->minidle_) 912 top = NULL; 913 p->overtime_ = *now; 914 (p->overlimit)(p, top); 915 #else 916 p->overtime_ = *now; 917 (p->overlimit)(p, NULL); 918 #endif 919 return (0); 920 } 921 top = cl; 922 } 923 924 if (cl != p) 925 ifd->borrowed_[ifd->qi_] = cl; 926 return (1); 927 } 928 929 /* 930 * _rmc_wrr_dequeue_next() - This is scheduler for WRR as opposed to 931 * Packet-by-packet round robin. 932 * 933 * The heart of the weighted round-robin scheduler, which decides which 934 * class next gets to send a packet. Highest priority first, then 935 * weighted round-robin within priorites. 936 * 937 * Each able-to-send class gets to send until its byte allocation is 938 * exhausted. Thus, the active pointer is only changed after a class has 939 * exhausted its allocation. 940 * 941 * If the scheduler finds no class that is underlimit or able to borrow, 942 * then the first class found that had a nonzero queue and is allowed to 943 * borrow gets to send. 944 */ 945 946 static mbuf_t * 947 _rmc_wrr_dequeue_next(struct rm_ifdat *ifd, int op) 948 { 949 struct rm_class *cl = NULL, *first = NULL; 950 u_int deficit; 951 int cpri; 952 mbuf_t *m; 953 struct timeval now; 954 955 RM_GETTIME(now); 956 957 /* 958 * if the driver polls the top of the queue and then removes 959 * the polled packet, we must return the same packet. 960 */ 961 if (op == ALTDQ_REMOVE && ifd->pollcache_) { 962 cl = ifd->pollcache_; 963 cpri = cl->pri_; 964 if (ifd->efficient_) { 965 /* check if this class is overlimit */ 966 if (cl->undertime_.tv_sec != 0 && 967 rmc_under_limit(cl, &now) == 0) 968 first = cl; 969 } 970 ifd->pollcache_ = NULL; 971 goto _wrr_out; 972 } 973 else { 974 /* mode == ALTDQ_POLL || pollcache == NULL */ 975 ifd->pollcache_ = NULL; 976 ifd->borrowed_[ifd->qi_] = NULL; 977 } 978 #ifdef ADJUST_CUTOFF 979 _again: 980 #endif 981 for (cpri = RM_MAXPRIO - 1; cpri >= 0; cpri--) { 982 if (ifd->na_[cpri] == 0) 983 continue; 984 deficit = 0; 985 /* 986 * Loop through twice for a priority level, if some class 987 * was unable to send a packet the first round because 988 * of the weighted round-robin mechanism. 989 * During the second loop at this level, deficit==2. 990 * (This second loop is not needed if for every class, 991 * "M[cl->pri_])" times "cl->allotment" is greater than 992 * the byte size for the largest packet in the class.) 993 */ 994 _wrr_loop: 995 cl = ifd->active_[cpri]; 996 ASSERT(cl != NULL); 997 do { 998 if ((deficit < 2) && (cl->bytes_alloc_ <= 0)) 999 cl->bytes_alloc_ += cl->w_allotment_; 1000 if (!qempty(cl->q_)) { 1001 if ((cl->undertime_.tv_sec == 0) || 1002 rmc_under_limit(cl, &now)) { 1003 if (cl->bytes_alloc_ > 0 || deficit > 1) 1004 goto _wrr_out; 1005 1006 /* underlimit but no alloc */ 1007 deficit = 1; 1008 #if 1 1009 ifd->borrowed_[ifd->qi_] = NULL; 1010 #endif 1011 } 1012 else if (first == NULL && cl->borrow_ != NULL) 1013 first = cl; /* borrowing candidate */ 1014 } 1015 1016 cl->bytes_alloc_ = 0; 1017 cl = cl->peer_; 1018 } while (cl != ifd->active_[cpri]); 1019 1020 if (deficit == 1) { 1021 /* first loop found an underlimit class with deficit */ 1022 /* Loop on same priority level, with new deficit. */ 1023 deficit = 2; 1024 goto _wrr_loop; 1025 } 1026 } 1027 1028 #ifdef ADJUST_CUTOFF 1029 /* 1030 * no underlimit class found. if cutoff is taking effect, 1031 * increase cutoff and try again. 1032 */ 1033 if (first != NULL && ifd->cutoff_ < ifd->root_->depth_) { 1034 ifd->cutoff_++; 1035 CBQTRACE(_rmc_wrr_dequeue_next, 'ojda', ifd->cutoff_); 1036 goto _again; 1037 } 1038 #endif /* ADJUST_CUTOFF */ 1039 /* 1040 * If LINK_EFFICIENCY is turned on, then the first overlimit 1041 * class we encounter will send a packet if all the classes 1042 * of the link-sharing structure are overlimit. 1043 */ 1044 reset_cutoff(ifd); 1045 CBQTRACE(_rmc_wrr_dequeue_next, 'otsr', ifd->cutoff_); 1046 1047 if (!ifd->efficient_ || first == NULL) 1048 return (NULL); 1049 1050 cl = first; 1051 cpri = cl->pri_; 1052 #if 0 /* too time-consuming for nothing */ 1053 if (cl->sleeping_) 1054 CALLOUT_STOP(&cl->callout_); 1055 cl->sleeping_ = 0; 1056 cl->undertime_.tv_sec = 0; 1057 #endif 1058 ifd->borrowed_[ifd->qi_] = cl->borrow_; 1059 ifd->cutoff_ = cl->borrow_->depth_; 1060 1061 /* 1062 * Deque the packet and do the book keeping... 1063 */ 1064 _wrr_out: 1065 if (op == ALTDQ_REMOVE) { 1066 m = _rmc_getq(cl); 1067 if (m == NULL) 1068 panic("_rmc_wrr_dequeue_next"); 1069 if (qempty(cl->q_)) 1070 ifd->na_[cpri]--; 1071 1072 /* 1073 * Update class statistics and link data. 1074 */ 1075 if (cl->bytes_alloc_ > 0) 1076 cl->bytes_alloc_ -= m_pktlen(m); 1077 1078 if ((cl->bytes_alloc_ <= 0) || first == cl) 1079 ifd->active_[cl->pri_] = cl->peer_; 1080 else 1081 ifd->active_[cl->pri_] = cl; 1082 1083 ifd->class_[ifd->qi_] = cl; 1084 ifd->curlen_[ifd->qi_] = m_pktlen(m); 1085 ifd->now_[ifd->qi_] = now; 1086 ifd->qi_ = (ifd->qi_ + 1) % ifd->maxqueued_; 1087 ifd->queued_++; 1088 } else { 1089 /* mode == ALTDQ_PPOLL */ 1090 m = _rmc_pollq(cl); 1091 ifd->pollcache_ = cl; 1092 } 1093 return (m); 1094 } 1095 1096 /* 1097 * Dequeue & return next packet from the highest priority class that 1098 * has a packet to send & has enough allocation to send it. This 1099 * routine is called by a driver whenever it needs a new packet to 1100 * output. 1101 */ 1102 static mbuf_t * 1103 _rmc_prr_dequeue_next(struct rm_ifdat *ifd, int op) 1104 { 1105 mbuf_t *m; 1106 int cpri; 1107 struct rm_class *cl, *first = NULL; 1108 struct timeval now; 1109 1110 RM_GETTIME(now); 1111 1112 /* 1113 * if the driver polls the top of the queue and then removes 1114 * the polled packet, we must return the same packet. 1115 */ 1116 if (op == ALTDQ_REMOVE && ifd->pollcache_) { 1117 cl = ifd->pollcache_; 1118 cpri = cl->pri_; 1119 ifd->pollcache_ = NULL; 1120 goto _prr_out; 1121 } else { 1122 /* mode == ALTDQ_POLL || pollcache == NULL */ 1123 ifd->pollcache_ = NULL; 1124 ifd->borrowed_[ifd->qi_] = NULL; 1125 } 1126 #ifdef ADJUST_CUTOFF 1127 _again: 1128 #endif 1129 for (cpri = RM_MAXPRIO - 1; cpri >= 0; cpri--) { 1130 if (ifd->na_[cpri] == 0) 1131 continue; 1132 cl = ifd->active_[cpri]; 1133 ASSERT(cl != NULL); 1134 do { 1135 if (!qempty(cl->q_)) { 1136 if ((cl->undertime_.tv_sec == 0) || 1137 rmc_under_limit(cl, &now)) 1138 goto _prr_out; 1139 if (first == NULL && cl->borrow_ != NULL) 1140 first = cl; 1141 } 1142 cl = cl->peer_; 1143 } while (cl != ifd->active_[cpri]); 1144 } 1145 1146 #ifdef ADJUST_CUTOFF 1147 /* 1148 * no underlimit class found. if cutoff is taking effect, increase 1149 * cutoff and try again. 1150 */ 1151 if (first != NULL && ifd->cutoff_ < ifd->root_->depth_) { 1152 ifd->cutoff_++; 1153 goto _again; 1154 } 1155 #endif /* ADJUST_CUTOFF */ 1156 /* 1157 * If LINK_EFFICIENCY is turned on, then the first overlimit 1158 * class we encounter will send a packet if all the classes 1159 * of the link-sharing structure are overlimit. 1160 */ 1161 reset_cutoff(ifd); 1162 if (!ifd->efficient_ || first == NULL) 1163 return (NULL); 1164 1165 cl = first; 1166 cpri = cl->pri_; 1167 #if 0 /* too time-consuming for nothing */ 1168 if (cl->sleeping_) 1169 CALLOUT_STOP(&cl->callout_); 1170 cl->sleeping_ = 0; 1171 cl->undertime_.tv_sec = 0; 1172 #endif 1173 ifd->borrowed_[ifd->qi_] = cl->borrow_; 1174 ifd->cutoff_ = cl->borrow_->depth_; 1175 1176 /* 1177 * Deque the packet and do the book keeping... 1178 */ 1179 _prr_out: 1180 if (op == ALTDQ_REMOVE) { 1181 m = _rmc_getq(cl); 1182 if (m == NULL) 1183 panic("_rmc_prr_dequeue_next"); 1184 if (qempty(cl->q_)) 1185 ifd->na_[cpri]--; 1186 1187 ifd->active_[cpri] = cl->peer_; 1188 1189 ifd->class_[ifd->qi_] = cl; 1190 ifd->curlen_[ifd->qi_] = m_pktlen(m); 1191 ifd->now_[ifd->qi_] = now; 1192 ifd->qi_ = (ifd->qi_ + 1) % ifd->maxqueued_; 1193 ifd->queued_++; 1194 } else { 1195 /* mode == ALTDQ_POLL */ 1196 m = _rmc_pollq(cl); 1197 ifd->pollcache_ = cl; 1198 } 1199 return (m); 1200 } 1201 1202 /* 1203 * mbuf_t * 1204 * rmc_dequeue_next(struct rm_ifdat *ifd, struct timeval *now) - this function 1205 * is invoked by the packet driver to get the next packet to be 1206 * dequeued and output on the link. If WRR is enabled, then the 1207 * WRR dequeue next routine will determine the next packet to sent. 1208 * Otherwise, packet-by-packet round robin is invoked. 1209 * 1210 * Returns: NULL, if a packet is not available or if all 1211 * classes are overlimit. 1212 * 1213 * Otherwise, Pointer to the next packet. 1214 */ 1215 1216 mbuf_t * 1217 rmc_dequeue_next(struct rm_ifdat *ifd, int mode) 1218 { 1219 if (ifd->queued_ >= ifd->maxqueued_) 1220 return (NULL); 1221 else if (ifd->wrr_) 1222 return (_rmc_wrr_dequeue_next(ifd, mode)); 1223 else 1224 return (_rmc_prr_dequeue_next(ifd, mode)); 1225 } 1226 1227 /* 1228 * Update the utilization estimate for the packet that just completed. 1229 * The packet's class & the parent(s) of that class all get their 1230 * estimators updated. This routine is called by the driver's output- 1231 * packet-completion interrupt service routine. 1232 */ 1233 1234 /* 1235 * a macro to approximate "divide by 1000" that gives 0.000999, 1236 * if a value has enough effective digits. 1237 * (on pentium, mul takes 9 cycles but div takes 46!) 1238 */ 1239 #define NSEC_TO_USEC(t) (((t) >> 10) + ((t) >> 16) + ((t) >> 17)) 1240 void 1241 rmc_update_class_util(struct rm_ifdat *ifd) 1242 { 1243 int idle, avgidle, pktlen; 1244 int pkt_time, tidle; 1245 rm_class_t *cl, *borrowed; 1246 rm_class_t *borrows; 1247 struct timeval *nowp; 1248 1249 /* 1250 * Get the most recent completed class. 1251 */ 1252 if ((cl = ifd->class_[ifd->qo_]) == NULL) 1253 return; 1254 1255 pktlen = ifd->curlen_[ifd->qo_]; 1256 borrowed = ifd->borrowed_[ifd->qo_]; 1257 borrows = borrowed; 1258 1259 PKTCNTR_ADD(&cl->stats_.xmit_cnt, pktlen); 1260 1261 /* 1262 * Run estimator on class and its ancestors. 1263 */ 1264 /* 1265 * rm_update_class_util is designed to be called when the 1266 * transfer is completed from a xmit complete interrupt, 1267 * but most drivers don't implement an upcall for that. 1268 * so, just use estimated completion time. 1269 * as a result, ifd->qi_ and ifd->qo_ are always synced. 1270 */ 1271 nowp = &ifd->now_[ifd->qo_]; 1272 /* get pkt_time (for link) in usec */ 1273 #if 1 /* use approximation */ 1274 pkt_time = ifd->curlen_[ifd->qo_] * ifd->ns_per_byte_; 1275 pkt_time = NSEC_TO_USEC(pkt_time); 1276 #else 1277 pkt_time = ifd->curlen_[ifd->qo_] * ifd->ns_per_byte_ / 1000; 1278 #endif 1279 #if 1 /* ALTQ4PPP */ 1280 if (TV_LT(nowp, &ifd->ifnow_)) { 1281 int iftime; 1282 1283 /* 1284 * make sure the estimated completion time does not go 1285 * too far. it can happen when the link layer supports 1286 * data compression or the interface speed is set to 1287 * a much lower value. 1288 */ 1289 TV_DELTA(&ifd->ifnow_, nowp, iftime); 1290 if (iftime+pkt_time < ifd->maxiftime_) { 1291 TV_ADD_DELTA(&ifd->ifnow_, pkt_time, &ifd->ifnow_); 1292 } else { 1293 TV_ADD_DELTA(nowp, ifd->maxiftime_, &ifd->ifnow_); 1294 } 1295 } else { 1296 TV_ADD_DELTA(nowp, pkt_time, &ifd->ifnow_); 1297 } 1298 #else 1299 if (TV_LT(nowp, &ifd->ifnow_)) { 1300 TV_ADD_DELTA(&ifd->ifnow_, pkt_time, &ifd->ifnow_); 1301 } else { 1302 TV_ADD_DELTA(nowp, pkt_time, &ifd->ifnow_); 1303 } 1304 #endif 1305 1306 while (cl != NULL) { 1307 TV_DELTA(&ifd->ifnow_, &cl->last_, idle); 1308 if (idle >= 2000000) 1309 /* 1310 * this class is idle enough, reset avgidle. 1311 * (TV_DELTA returns 2000000 us when delta is large.) 1312 */ 1313 cl->avgidle_ = cl->maxidle_; 1314 1315 /* get pkt_time (for class) in usec */ 1316 #if 1 /* use approximation */ 1317 pkt_time = pktlen * cl->ns_per_byte_; 1318 pkt_time = NSEC_TO_USEC(pkt_time); 1319 #else 1320 pkt_time = pktlen * cl->ns_per_byte_ / 1000; 1321 #endif 1322 idle -= pkt_time; 1323 1324 avgidle = cl->avgidle_; 1325 avgidle += idle - (avgidle >> RM_FILTER_GAIN); 1326 cl->avgidle_ = avgidle; 1327 1328 /* Are we overlimit ? */ 1329 if (avgidle <= 0) { 1330 CBQTRACE(rmc_update_class_util, 'milo', cl->stats_.handle); 1331 #if 1 /* ALTQ */ 1332 /* 1333 * need some lower bound for avgidle, otherwise 1334 * a borrowing class gets unbounded penalty. 1335 */ 1336 if (avgidle < cl->minidle_) 1337 avgidle = cl->avgidle_ = cl->minidle_; 1338 #endif 1339 /* set next idle to make avgidle 0 */ 1340 tidle = pkt_time + 1341 (((1 - RM_POWER) * avgidle) >> RM_FILTER_GAIN); 1342 TV_ADD_DELTA(nowp, tidle, &cl->undertime_); 1343 ++cl->stats_.over; 1344 } else { 1345 cl->avgidle_ = 1346 (avgidle > cl->maxidle_) ? cl->maxidle_ : avgidle; 1347 cl->undertime_.tv_sec = 0; 1348 if (cl->sleeping_) { 1349 CALLOUT_STOP(&cl->callout_); 1350 cl->sleeping_ = 0; 1351 } 1352 } 1353 1354 if (borrows != NULL) { 1355 if (borrows != cl) 1356 ++cl->stats_.borrows; 1357 else 1358 borrows = NULL; 1359 } 1360 cl->last_ = ifd->ifnow_; 1361 cl->last_pkttime_ = pkt_time; 1362 1363 #if 1 1364 if (cl->parent_ == NULL) { 1365 /* take stats of root class */ 1366 PKTCNTR_ADD(&cl->stats_.xmit_cnt, pktlen); 1367 } 1368 #endif 1369 1370 cl = cl->parent_; 1371 } 1372 1373 /* 1374 * Check to see if cutoff needs to set to a new level. 1375 */ 1376 cl = ifd->class_[ifd->qo_]; 1377 if (borrowed && (ifd->cutoff_ >= borrowed->depth_)) { 1378 #if 1 /* ALTQ */ 1379 if ((qlen(cl->q_) <= 0) || TV_LT(nowp, &borrowed->undertime_)) { 1380 rmc_tl_satisfied(ifd, nowp); 1381 CBQTRACE(rmc_update_class_util, 'broe', ifd->cutoff_); 1382 } else { 1383 ifd->cutoff_ = borrowed->depth_; 1384 CBQTRACE(rmc_update_class_util, 'ffob', borrowed->depth_); 1385 } 1386 #else /* !ALTQ */ 1387 if ((qlen(cl->q_) <= 1) || TV_LT(&now, &borrowed->undertime_)) { 1388 reset_cutoff(ifd); 1389 #ifdef notdef 1390 rmc_tl_satisfied(ifd, &now); 1391 #endif 1392 CBQTRACE(rmc_update_class_util, 'broe', ifd->cutoff_); 1393 } else { 1394 ifd->cutoff_ = borrowed->depth_; 1395 CBQTRACE(rmc_update_class_util, 'ffob', borrowed->depth_); 1396 } 1397 #endif /* !ALTQ */ 1398 } 1399 1400 /* 1401 * Release class slot 1402 */ 1403 ifd->borrowed_[ifd->qo_] = NULL; 1404 ifd->class_[ifd->qo_] = NULL; 1405 ifd->qo_ = (ifd->qo_ + 1) % ifd->maxqueued_; 1406 ifd->queued_--; 1407 } 1408 1409 /* 1410 * void 1411 * rmc_drop_action(struct rm_class *cl) - Generic (not protocol-specific) 1412 * over-limit action routines. These get invoked by rmc_under_limit() 1413 * if a class with packets to send if over its bandwidth limit & can't 1414 * borrow from a parent class. 1415 * 1416 * Returns: NONE 1417 */ 1418 1419 static void 1420 rmc_drop_action(struct rm_class *cl) 1421 { 1422 struct rm_ifdat *ifd = cl->ifdat_; 1423 1424 ASSERT(qlen(cl->q_) > 0); 1425 _rmc_dropq(cl); 1426 if (qempty(cl->q_)) 1427 ifd->na_[cl->pri_]--; 1428 } 1429 1430 void rmc_dropall(struct rm_class *cl) 1431 { 1432 struct rm_ifdat *ifd = cl->ifdat_; 1433 1434 if (!qempty(cl->q_)) { 1435 _flushq(cl->q_); 1436 1437 ifd->na_[cl->pri_]--; 1438 } 1439 } 1440 1441 #if (__FreeBSD_version > 300000) 1442 /* hzto() is removed from FreeBSD-3.0 */ 1443 static int hzto(struct timeval *); 1444 1445 static int 1446 hzto(tv) 1447 struct timeval *tv; 1448 { 1449 struct timeval t2; 1450 1451 getmicrotime(&t2); 1452 t2.tv_sec = tv->tv_sec - t2.tv_sec; 1453 t2.tv_usec = tv->tv_usec - t2.tv_usec; 1454 return (tvtohz(&t2)); 1455 } 1456 #endif /* __FreeBSD_version > 300000 */ 1457 1458 /* 1459 * void 1460 * rmc_delay_action(struct rm_class *cl) - This function is the generic CBQ 1461 * delay action routine. It is invoked via rmc_under_limit when the 1462 * packet is discoverd to be overlimit. 1463 * 1464 * If the delay action is result of borrow class being overlimit, then 1465 * delay for the offtime of the borrowing class that is overlimit. 1466 * 1467 * Returns: NONE 1468 */ 1469 1470 void 1471 rmc_delay_action(struct rm_class *cl, struct rm_class *borrow) 1472 { 1473 int delay, t, extradelay; 1474 1475 cl->stats_.overactions++; 1476 TV_DELTA(&cl->undertime_, &cl->overtime_, delay); 1477 #ifndef BORROW_OFFTIME 1478 delay += cl->offtime_; 1479 #endif 1480 1481 if (!cl->sleeping_) { 1482 CBQTRACE(rmc_delay_action, 'yled', cl->stats_.handle); 1483 #ifdef BORROW_OFFTIME 1484 if (borrow != NULL) 1485 extradelay = borrow->offtime_; 1486 else 1487 #endif 1488 extradelay = cl->offtime_; 1489 1490 #ifdef ALTQ 1491 /* 1492 * XXX recalculate suspend time: 1493 * current undertime is (tidle + pkt_time) calculated 1494 * from the last transmission. 1495 * tidle: time required to bring avgidle back to 0 1496 * pkt_time: target waiting time for this class 1497 * we need to replace pkt_time by offtime 1498 */ 1499 extradelay -= cl->last_pkttime_; 1500 #endif 1501 if (extradelay > 0) { 1502 TV_ADD_DELTA(&cl->undertime_, extradelay, &cl->undertime_); 1503 delay += extradelay; 1504 } 1505 1506 cl->sleeping_ = 1; 1507 cl->stats_.delays++; 1508 1509 /* 1510 * Since packets are phased randomly with respect to the 1511 * clock, 1 tick (the next clock tick) can be an arbitrarily 1512 * short time so we have to wait for at least two ticks. 1513 * NOTE: If there's no other traffic, we need the timer as 1514 * a 'backstop' to restart this class. 1515 */ 1516 if (delay > tick * 2) { 1517 /* FreeBSD rounds up the tick */ 1518 t = hzto(&cl->undertime_); 1519 } else 1520 t = 2; 1521 CALLOUT_RESET(&cl->callout_, t, 1522 (timeout_t *)rmc_restart, (caddr_t)cl); 1523 } 1524 } 1525 1526 /* 1527 * void 1528 * rmc_restart() - is just a helper routine for rmc_delay_action -- it is 1529 * called by the system timer code & is responsible checking if the 1530 * class is still sleeping (it might have been restarted as a side 1531 * effect of the queue scan on a packet arrival) and, if so, restarting 1532 * output for the class. Inspecting the class state & restarting output 1533 * require locking the class structure. In general the driver is 1534 * responsible for locking but this is the only routine that is not 1535 * called directly or indirectly from the interface driver so it has 1536 * know about system locking conventions. Under bsd, locking is done 1537 * by raising IPL to splimp so that's what's implemented here. On a 1538 * different system this would probably need to be changed. 1539 * 1540 * Returns: NONE 1541 */ 1542 1543 static void 1544 rmc_restart(struct rm_class *cl) 1545 { 1546 struct rm_ifdat *ifd = cl->ifdat_; 1547 int s; 1548 1549 s = splnet(); 1550 IFQ_LOCK(ifd->ifq_); 1551 if (cl->sleeping_) { 1552 cl->sleeping_ = 0; 1553 cl->undertime_.tv_sec = 0; 1554 1555 if (ifd->queued_ < ifd->maxqueued_ && ifd->restart != NULL) { 1556 CBQTRACE(rmc_restart, 'trts', cl->stats_.handle); 1557 (ifd->restart)(ifd->ifq_); 1558 } 1559 } 1560 IFQ_UNLOCK(ifd->ifq_); 1561 splx(s); 1562 } 1563 1564 /* 1565 * void 1566 * rmc_root_overlimit(struct rm_class *cl) - This the generic overlimit 1567 * handling routine for the root class of the link sharing structure. 1568 * 1569 * Returns: NONE 1570 */ 1571 1572 static void 1573 rmc_root_overlimit(struct rm_class *cl, struct rm_class *borrow) 1574 { 1575 panic("rmc_root_overlimit"); 1576 } 1577 1578 /* 1579 * Packet Queue handling routines. Eventually, this is to localize the 1580 * effects on the code whether queues are red queues or droptail 1581 * queues. 1582 */ 1583 1584 static int 1585 _rmc_addq(rm_class_t *cl, mbuf_t *m) 1586 { 1587 #ifdef ALTQ_RIO 1588 if (q_is_rio(cl->q_)) 1589 return rio_addq((rio_t *)cl->red_, cl->q_, m, cl->pktattr_); 1590 #endif 1591 #ifdef ALTQ_RED 1592 if (q_is_red(cl->q_)) 1593 return red_addq(cl->red_, cl->q_, m, cl->pktattr_); 1594 #endif /* ALTQ_RED */ 1595 1596 if (cl->flags_ & RMCF_CLEARDSCP) 1597 write_dsfield(m, cl->pktattr_, 0); 1598 1599 _addq(cl->q_, m); 1600 return (0); 1601 } 1602 1603 /* note: _rmc_dropq is not called for red */ 1604 static void 1605 _rmc_dropq(rm_class_t *cl) 1606 { 1607 mbuf_t *m; 1608 1609 if ((m = _getq(cl->q_)) != NULL) 1610 m_freem(m); 1611 } 1612 1613 static mbuf_t * 1614 _rmc_getq(rm_class_t *cl) 1615 { 1616 #ifdef ALTQ_RIO 1617 if (q_is_rio(cl->q_)) 1618 return rio_getq((rio_t *)cl->red_, cl->q_); 1619 #endif 1620 #ifdef ALTQ_RED 1621 if (q_is_red(cl->q_)) 1622 return red_getq(cl->red_, cl->q_); 1623 #endif 1624 return _getq(cl->q_); 1625 } 1626 1627 static mbuf_t * 1628 _rmc_pollq(rm_class_t *cl) 1629 { 1630 return qhead(cl->q_); 1631 } 1632 1633 #ifdef CBQ_TRACE 1634 1635 struct cbqtrace cbqtrace_buffer[NCBQTRACE+1]; 1636 struct cbqtrace *cbqtrace_ptr = NULL; 1637 int cbqtrace_count; 1638 1639 /* 1640 * DDB hook to trace cbq events: 1641 * the last 1024 events are held in a circular buffer. 1642 * use "call cbqtrace_dump(N)" to display 20 events from Nth event. 1643 */ 1644 void cbqtrace_dump(int); 1645 static char *rmc_funcname(void *); 1646 1647 static struct rmc_funcs { 1648 void *func; 1649 char *name; 1650 } rmc_funcs[] = 1651 { 1652 rmc_init, "rmc_init", 1653 rmc_queue_packet, "rmc_queue_packet", 1654 rmc_under_limit, "rmc_under_limit", 1655 rmc_update_class_util, "rmc_update_class_util", 1656 rmc_delay_action, "rmc_delay_action", 1657 rmc_restart, "rmc_restart", 1658 _rmc_wrr_dequeue_next, "_rmc_wrr_dequeue_next", 1659 NULL, NULL 1660 }; 1661 1662 static char *rmc_funcname(void *func) 1663 { 1664 struct rmc_funcs *fp; 1665 1666 for (fp = rmc_funcs; fp->func != NULL; fp++) 1667 if (fp->func == func) 1668 return (fp->name); 1669 return ("unknown"); 1670 } 1671 1672 void cbqtrace_dump(int counter) 1673 { 1674 int i, *p; 1675 char *cp; 1676 1677 counter = counter % NCBQTRACE; 1678 p = (int *)&cbqtrace_buffer[counter]; 1679 1680 for (i=0; i<20; i++) { 1681 printf("[0x%x] ", *p++); 1682 printf("%s: ", rmc_funcname((void *)*p++)); 1683 cp = (char *)p++; 1684 printf("%c%c%c%c: ", cp[0], cp[1], cp[2], cp[3]); 1685 printf("%d\n",*p++); 1686 1687 if (p >= (int *)&cbqtrace_buffer[NCBQTRACE]) 1688 p = (int *)cbqtrace_buffer; 1689 } 1690 } 1691 #endif /* CBQ_TRACE */ 1692 #endif /* ALTQ_CBQ */ 1693 1694 #if defined(ALTQ_CBQ) || defined(ALTQ_RED) || defined(ALTQ_RIO) || defined(ALTQ_HFSC) || defined(ALTQ_PRIQ) 1695 #if !defined(__GNUC__) || defined(ALTQ_DEBUG) 1696 1697 void 1698 _addq(class_queue_t *q, mbuf_t *m) 1699 { 1700 mbuf_t *m0; 1701 1702 if ((m0 = qtail(q)) != NULL) 1703 m->m_nextpkt = m0->m_nextpkt; 1704 else 1705 m0 = m; 1706 m0->m_nextpkt = m; 1707 qtail(q) = m; 1708 qlen(q)++; 1709 } 1710 1711 mbuf_t * 1712 _getq(class_queue_t *q) 1713 { 1714 mbuf_t *m, *m0; 1715 1716 if ((m = qtail(q)) == NULL) 1717 return (NULL); 1718 if ((m0 = m->m_nextpkt) != m) 1719 m->m_nextpkt = m0->m_nextpkt; 1720 else { 1721 ASSERT(qlen(q) == 1); 1722 qtail(q) = NULL; 1723 } 1724 qlen(q)--; 1725 m0->m_nextpkt = NULL; 1726 return (m0); 1727 } 1728 1729 /* drop a packet at the tail of the queue */ 1730 mbuf_t * 1731 _getq_tail(class_queue_t *q) 1732 { 1733 mbuf_t *m, *m0, *prev; 1734 1735 if ((m = m0 = qtail(q)) == NULL) 1736 return NULL; 1737 do { 1738 prev = m0; 1739 m0 = m0->m_nextpkt; 1740 } while (m0 != m); 1741 prev->m_nextpkt = m->m_nextpkt; 1742 if (prev == m) { 1743 ASSERT(qlen(q) == 1); 1744 qtail(q) = NULL; 1745 } else 1746 qtail(q) = prev; 1747 qlen(q)--; 1748 m->m_nextpkt = NULL; 1749 return (m); 1750 } 1751 1752 /* randomly select a packet in the queue */ 1753 mbuf_t * 1754 _getq_random(class_queue_t *q) 1755 { 1756 struct mbuf *m; 1757 int i, n; 1758 1759 if ((m = qtail(q)) == NULL) 1760 return NULL; 1761 if (m->m_nextpkt == m) { 1762 ASSERT(qlen(q) == 1); 1763 qtail(q) = NULL; 1764 } else { 1765 struct mbuf *prev = NULL; 1766 1767 n = arc4random() % qlen(q) + 1; 1768 for (i = 0; i < n; i++) { 1769 prev = m; 1770 m = m->m_nextpkt; 1771 } 1772 prev->m_nextpkt = m->m_nextpkt; 1773 if (m == qtail(q)) 1774 qtail(q) = prev; 1775 } 1776 qlen(q)--; 1777 m->m_nextpkt = NULL; 1778 return (m); 1779 } 1780 1781 void 1782 _removeq(class_queue_t *q, mbuf_t *m) 1783 { 1784 mbuf_t *m0, *prev; 1785 1786 m0 = qtail(q); 1787 do { 1788 prev = m0; 1789 m0 = m0->m_nextpkt; 1790 } while (m0 != m); 1791 prev->m_nextpkt = m->m_nextpkt; 1792 if (prev == m) 1793 qtail(q) = NULL; 1794 else if (qtail(q) == m) 1795 qtail(q) = prev; 1796 qlen(q)--; 1797 } 1798 1799 void 1800 _flushq(class_queue_t *q) 1801 { 1802 mbuf_t *m; 1803 1804 while ((m = _getq(q)) != NULL) 1805 m_freem(m); 1806 ASSERT(qlen(q) == 0); 1807 } 1808 1809 #endif /* !__GNUC__ || ALTQ_DEBUG */ 1810 #endif /* ALTQ_CBQ || ALTQ_RED || ALTQ_RIO || ALTQ_HFSC || ALTQ_PRIQ */ 1811