1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * net/sched/sch_htb.c Hierarchical token bucket, feed tree version 4 * 5 * Authors: Martin Devera, <devik@cdi.cz> 6 * 7 * Credits (in time order) for older HTB versions: 8 * Stef Coene <stef.coene@docum.org> 9 * HTB support at LARTC mailing list 10 * Ondrej Kraus, <krauso@barr.cz> 11 * found missing INIT_QDISC(htb) 12 * Vladimir Smelhaus, Aamer Akhter, Bert Hubert 13 * helped a lot to locate nasty class stall bug 14 * Andi Kleen, Jamal Hadi, Bert Hubert 15 * code review and helpful comments on shaping 16 * Tomasz Wrona, <tw@eter.tym.pl> 17 * created test case so that I was able to fix nasty bug 18 * Wilfried Weissmann 19 * spotted bug in dequeue code and helped with fix 20 * Jiri Fojtasek 21 * fixed requeue routine 22 * and many others. thanks. 23 */ 24 #include <linux/module.h> 25 #include <linux/moduleparam.h> 26 #include <linux/types.h> 27 #include <linux/kernel.h> 28 #include <linux/string.h> 29 #include <linux/errno.h> 30 #include <linux/skbuff.h> 31 #include <linux/list.h> 32 #include <linux/compiler.h> 33 #include <linux/rbtree.h> 34 #include <linux/workqueue.h> 35 #include <linux/slab.h> 36 #include <net/netlink.h> 37 #include <net/sch_generic.h> 38 #include <net/pkt_sched.h> 39 #include <net/pkt_cls.h> 40 41 /* HTB algorithm. 42 Author: devik@cdi.cz 43 ======================================================================== 44 HTB is like TBF with multiple classes. It is also similar to CBQ because 45 it allows to assign priority to each class in hierarchy. 46 In fact it is another implementation of Floyd's formal sharing. 47 48 Levels: 49 Each class is assigned level. Leaf has ALWAYS level 0 and root 50 classes have level TC_HTB_MAXDEPTH-1. Interior nodes has level 51 one less than their parent. 52 */ 53 54 static int htb_hysteresis __read_mostly = 0; /* whether to use mode hysteresis for speedup */ 55 #define HTB_VER 0x30011 /* major must be matched with number supplied by TC as version */ 56 57 #if HTB_VER >> 16 != TC_HTB_PROTOVER 58 #error "Mismatched sch_htb.c and pkt_sch.h" 59 #endif 60 61 /* Module parameter and sysfs export */ 62 module_param (htb_hysteresis, int, 0640); 63 MODULE_PARM_DESC(htb_hysteresis, "Hysteresis mode, less CPU load, less accurate"); 64 65 static int htb_rate_est = 0; /* htb classes have a default rate estimator */ 66 module_param(htb_rate_est, int, 0640); 67 MODULE_PARM_DESC(htb_rate_est, "setup a default rate estimator (4sec 16sec) for htb classes"); 68 69 /* used internaly to keep status of single class */ 70 enum htb_cmode { 71 HTB_CANT_SEND, /* class can't send and can't borrow */ 72 HTB_MAY_BORROW, /* class can't send but may borrow */ 73 HTB_CAN_SEND /* class can send */ 74 }; 75 76 struct htb_prio { 77 union { 78 struct rb_root row; 79 struct rb_root feed; 80 }; 81 struct rb_node *ptr; 82 /* When class changes from state 1->2 and disconnects from 83 * parent's feed then we lost ptr value and start from the 84 * first child again. Here we store classid of the 85 * last valid ptr (used when ptr is NULL). 86 */ 87 u32 last_ptr_id; 88 }; 89 90 /* interior & leaf nodes; props specific to leaves are marked L: 91 * To reduce false sharing, place mostly read fields at beginning, 92 * and mostly written ones at the end. 93 */ 94 struct htb_class { 95 struct Qdisc_class_common common; 96 struct psched_ratecfg rate; 97 struct psched_ratecfg ceil; 98 s64 buffer, cbuffer;/* token bucket depth/rate */ 99 s64 mbuffer; /* max wait time */ 100 u32 prio; /* these two are used only by leaves... */ 101 int quantum; /* but stored for parent-to-leaf return */ 102 103 struct tcf_proto __rcu *filter_list; /* class attached filters */ 104 struct tcf_block *block; 105 int filter_cnt; 106 107 int level; /* our level (see above) */ 108 unsigned int children; 109 struct htb_class *parent; /* parent class */ 110 111 struct net_rate_estimator __rcu *rate_est; 112 113 /* 114 * Written often fields 115 */ 116 struct gnet_stats_basic_sync bstats; 117 struct gnet_stats_basic_sync bstats_bias; 118 struct tc_htb_xstats xstats; /* our special stats */ 119 120 /* token bucket parameters */ 121 s64 tokens, ctokens;/* current number of tokens */ 122 s64 t_c; /* checkpoint time */ 123 124 union { 125 struct htb_class_leaf { 126 int deficit[TC_HTB_MAXDEPTH]; 127 struct Qdisc *q; 128 struct netdev_queue *offload_queue; 129 } leaf; 130 struct htb_class_inner { 131 struct htb_prio clprio[TC_HTB_NUMPRIO]; 132 } inner; 133 }; 134 s64 pq_key; 135 136 int prio_activity; /* for which prios are we active */ 137 enum htb_cmode cmode; /* current mode of the class */ 138 struct rb_node pq_node; /* node for event queue */ 139 struct rb_node node[TC_HTB_NUMPRIO]; /* node for self or feed tree */ 140 141 unsigned int drops ____cacheline_aligned_in_smp; 142 unsigned int overlimits; 143 }; 144 145 struct htb_level { 146 struct rb_root wait_pq; 147 struct htb_prio hprio[TC_HTB_NUMPRIO]; 148 }; 149 150 struct htb_sched { 151 struct Qdisc_class_hash clhash; 152 int defcls; /* class where unclassified flows go to */ 153 int rate2quantum; /* quant = rate / rate2quantum */ 154 155 /* filters for qdisc itself */ 156 struct tcf_proto __rcu *filter_list; 157 struct tcf_block *block; 158 159 #define HTB_WARN_TOOMANYEVENTS 0x1 160 unsigned int warned; /* only one warning */ 161 int direct_qlen; 162 struct work_struct work; 163 164 /* non shaped skbs; let them go directly thru */ 165 struct qdisc_skb_head direct_queue; 166 u32 direct_pkts; 167 u32 overlimits; 168 169 struct qdisc_watchdog watchdog; 170 171 s64 now; /* cached dequeue time */ 172 173 /* time of nearest event per level (row) */ 174 s64 near_ev_cache[TC_HTB_MAXDEPTH]; 175 176 int row_mask[TC_HTB_MAXDEPTH]; 177 178 struct htb_level hlevel[TC_HTB_MAXDEPTH]; 179 180 struct Qdisc **direct_qdiscs; 181 unsigned int num_direct_qdiscs; 182 183 bool offload; 184 }; 185 186 /* find class in global hash table using given handle */ 187 static inline struct htb_class *htb_find(u32 handle, struct Qdisc *sch) 188 { 189 struct htb_sched *q = qdisc_priv(sch); 190 struct Qdisc_class_common *clc; 191 192 clc = qdisc_class_find(&q->clhash, handle); 193 if (clc == NULL) 194 return NULL; 195 return container_of(clc, struct htb_class, common); 196 } 197 198 static unsigned long htb_search(struct Qdisc *sch, u32 handle) 199 { 200 return (unsigned long)htb_find(handle, sch); 201 } 202 203 #define HTB_DIRECT ((struct htb_class *)-1L) 204 205 /** 206 * htb_classify - classify a packet into class 207 * @skb: the socket buffer 208 * @sch: the active queue discipline 209 * @qerr: pointer for returned status code 210 * 211 * It returns NULL if the packet should be dropped or -1 if the packet 212 * should be passed directly thru. In all other cases leaf class is returned. 213 * We allow direct class selection by classid in priority. The we examine 214 * filters in qdisc and in inner nodes (if higher filter points to the inner 215 * node). If we end up with classid MAJOR:0 we enqueue the skb into special 216 * internal fifo (direct). These packets then go directly thru. If we still 217 * have no valid leaf we try to use MAJOR:default leaf. It still unsuccessful 218 * then finish and return direct queue. 219 */ 220 static struct htb_class *htb_classify(struct sk_buff *skb, struct Qdisc *sch, 221 int *qerr) 222 { 223 struct htb_sched *q = qdisc_priv(sch); 224 struct htb_class *cl; 225 struct tcf_result res; 226 struct tcf_proto *tcf; 227 int result; 228 229 /* allow to select class by setting skb->priority to valid classid; 230 * note that nfmark can be used too by attaching filter fw with no 231 * rules in it 232 */ 233 if (skb->priority == sch->handle) 234 return HTB_DIRECT; /* X:0 (direct flow) selected */ 235 cl = htb_find(skb->priority, sch); 236 if (cl) { 237 if (cl->level == 0) 238 return cl; 239 /* Start with inner filter chain if a non-leaf class is selected */ 240 tcf = rcu_dereference_bh(cl->filter_list); 241 } else { 242 tcf = rcu_dereference_bh(q->filter_list); 243 } 244 245 *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS; 246 while (tcf && (result = tcf_classify(skb, NULL, tcf, &res, false)) >= 0) { 247 #ifdef CONFIG_NET_CLS_ACT 248 switch (result) { 249 case TC_ACT_QUEUED: 250 case TC_ACT_STOLEN: 251 case TC_ACT_TRAP: 252 *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN; 253 fallthrough; 254 case TC_ACT_SHOT: 255 return NULL; 256 } 257 #endif 258 cl = (void *)res.class; 259 if (!cl) { 260 if (res.classid == sch->handle) 261 return HTB_DIRECT; /* X:0 (direct flow) */ 262 cl = htb_find(res.classid, sch); 263 if (!cl) 264 break; /* filter selected invalid classid */ 265 } 266 if (!cl->level) 267 return cl; /* we hit leaf; return it */ 268 269 /* we have got inner class; apply inner filter chain */ 270 tcf = rcu_dereference_bh(cl->filter_list); 271 } 272 /* classification failed; try to use default class */ 273 cl = htb_find(TC_H_MAKE(TC_H_MAJ(sch->handle), q->defcls), sch); 274 if (!cl || cl->level) 275 return HTB_DIRECT; /* bad default .. this is safe bet */ 276 return cl; 277 } 278 279 /** 280 * htb_add_to_id_tree - adds class to the round robin list 281 * @root: the root of the tree 282 * @cl: the class to add 283 * @prio: the give prio in class 284 * 285 * Routine adds class to the list (actually tree) sorted by classid. 286 * Make sure that class is not already on such list for given prio. 287 */ 288 static void htb_add_to_id_tree(struct rb_root *root, 289 struct htb_class *cl, int prio) 290 { 291 struct rb_node **p = &root->rb_node, *parent = NULL; 292 293 while (*p) { 294 struct htb_class *c; 295 parent = *p; 296 c = rb_entry(parent, struct htb_class, node[prio]); 297 298 if (cl->common.classid > c->common.classid) 299 p = &parent->rb_right; 300 else 301 p = &parent->rb_left; 302 } 303 rb_link_node(&cl->node[prio], parent, p); 304 rb_insert_color(&cl->node[prio], root); 305 } 306 307 /** 308 * htb_add_to_wait_tree - adds class to the event queue with delay 309 * @q: the priority event queue 310 * @cl: the class to add 311 * @delay: delay in microseconds 312 * 313 * The class is added to priority event queue to indicate that class will 314 * change its mode in cl->pq_key microseconds. Make sure that class is not 315 * already in the queue. 316 */ 317 static void htb_add_to_wait_tree(struct htb_sched *q, 318 struct htb_class *cl, s64 delay) 319 { 320 struct rb_node **p = &q->hlevel[cl->level].wait_pq.rb_node, *parent = NULL; 321 322 cl->pq_key = q->now + delay; 323 if (cl->pq_key == q->now) 324 cl->pq_key++; 325 326 /* update the nearest event cache */ 327 if (q->near_ev_cache[cl->level] > cl->pq_key) 328 q->near_ev_cache[cl->level] = cl->pq_key; 329 330 while (*p) { 331 struct htb_class *c; 332 parent = *p; 333 c = rb_entry(parent, struct htb_class, pq_node); 334 if (cl->pq_key >= c->pq_key) 335 p = &parent->rb_right; 336 else 337 p = &parent->rb_left; 338 } 339 rb_link_node(&cl->pq_node, parent, p); 340 rb_insert_color(&cl->pq_node, &q->hlevel[cl->level].wait_pq); 341 } 342 343 /** 344 * htb_next_rb_node - finds next node in binary tree 345 * @n: the current node in binary tree 346 * 347 * When we are past last key we return NULL. 348 * Average complexity is 2 steps per call. 349 */ 350 static inline void htb_next_rb_node(struct rb_node **n) 351 { 352 *n = rb_next(*n); 353 } 354 355 /** 356 * htb_add_class_to_row - add class to its row 357 * @q: the priority event queue 358 * @cl: the class to add 359 * @mask: the given priorities in class in bitmap 360 * 361 * The class is added to row at priorities marked in mask. 362 * It does nothing if mask == 0. 363 */ 364 static inline void htb_add_class_to_row(struct htb_sched *q, 365 struct htb_class *cl, int mask) 366 { 367 q->row_mask[cl->level] |= mask; 368 while (mask) { 369 int prio = ffz(~mask); 370 mask &= ~(1 << prio); 371 htb_add_to_id_tree(&q->hlevel[cl->level].hprio[prio].row, cl, prio); 372 } 373 } 374 375 /* If this triggers, it is a bug in this code, but it need not be fatal */ 376 static void htb_safe_rb_erase(struct rb_node *rb, struct rb_root *root) 377 { 378 if (RB_EMPTY_NODE(rb)) { 379 WARN_ON(1); 380 } else { 381 rb_erase(rb, root); 382 RB_CLEAR_NODE(rb); 383 } 384 } 385 386 387 /** 388 * htb_remove_class_from_row - removes class from its row 389 * @q: the priority event queue 390 * @cl: the class to add 391 * @mask: the given priorities in class in bitmap 392 * 393 * The class is removed from row at priorities marked in mask. 394 * It does nothing if mask == 0. 395 */ 396 static inline void htb_remove_class_from_row(struct htb_sched *q, 397 struct htb_class *cl, int mask) 398 { 399 int m = 0; 400 struct htb_level *hlevel = &q->hlevel[cl->level]; 401 402 while (mask) { 403 int prio = ffz(~mask); 404 struct htb_prio *hprio = &hlevel->hprio[prio]; 405 406 mask &= ~(1 << prio); 407 if (hprio->ptr == cl->node + prio) 408 htb_next_rb_node(&hprio->ptr); 409 410 htb_safe_rb_erase(cl->node + prio, &hprio->row); 411 if (!hprio->row.rb_node) 412 m |= 1 << prio; 413 } 414 q->row_mask[cl->level] &= ~m; 415 } 416 417 /** 418 * htb_activate_prios - creates active classe's feed chain 419 * @q: the priority event queue 420 * @cl: the class to activate 421 * 422 * The class is connected to ancestors and/or appropriate rows 423 * for priorities it is participating on. cl->cmode must be new 424 * (activated) mode. It does nothing if cl->prio_activity == 0. 425 */ 426 static void htb_activate_prios(struct htb_sched *q, struct htb_class *cl) 427 { 428 struct htb_class *p = cl->parent; 429 long m, mask = cl->prio_activity; 430 431 while (cl->cmode == HTB_MAY_BORROW && p && mask) { 432 m = mask; 433 while (m) { 434 int prio = ffz(~m); 435 m &= ~(1 << prio); 436 437 if (p->inner.clprio[prio].feed.rb_node) 438 /* parent already has its feed in use so that 439 * reset bit in mask as parent is already ok 440 */ 441 mask &= ~(1 << prio); 442 443 htb_add_to_id_tree(&p->inner.clprio[prio].feed, cl, prio); 444 } 445 p->prio_activity |= mask; 446 cl = p; 447 p = cl->parent; 448 449 } 450 if (cl->cmode == HTB_CAN_SEND && mask) 451 htb_add_class_to_row(q, cl, mask); 452 } 453 454 /** 455 * htb_deactivate_prios - remove class from feed chain 456 * @q: the priority event queue 457 * @cl: the class to deactivate 458 * 459 * cl->cmode must represent old mode (before deactivation). It does 460 * nothing if cl->prio_activity == 0. Class is removed from all feed 461 * chains and rows. 462 */ 463 static void htb_deactivate_prios(struct htb_sched *q, struct htb_class *cl) 464 { 465 struct htb_class *p = cl->parent; 466 long m, mask = cl->prio_activity; 467 468 while (cl->cmode == HTB_MAY_BORROW && p && mask) { 469 m = mask; 470 mask = 0; 471 while (m) { 472 int prio = ffz(~m); 473 m &= ~(1 << prio); 474 475 if (p->inner.clprio[prio].ptr == cl->node + prio) { 476 /* we are removing child which is pointed to from 477 * parent feed - forget the pointer but remember 478 * classid 479 */ 480 p->inner.clprio[prio].last_ptr_id = cl->common.classid; 481 p->inner.clprio[prio].ptr = NULL; 482 } 483 484 htb_safe_rb_erase(cl->node + prio, 485 &p->inner.clprio[prio].feed); 486 487 if (!p->inner.clprio[prio].feed.rb_node) 488 mask |= 1 << prio; 489 } 490 491 p->prio_activity &= ~mask; 492 cl = p; 493 p = cl->parent; 494 495 } 496 if (cl->cmode == HTB_CAN_SEND && mask) 497 htb_remove_class_from_row(q, cl, mask); 498 } 499 500 static inline s64 htb_lowater(const struct htb_class *cl) 501 { 502 if (htb_hysteresis) 503 return cl->cmode != HTB_CANT_SEND ? -cl->cbuffer : 0; 504 else 505 return 0; 506 } 507 static inline s64 htb_hiwater(const struct htb_class *cl) 508 { 509 if (htb_hysteresis) 510 return cl->cmode == HTB_CAN_SEND ? -cl->buffer : 0; 511 else 512 return 0; 513 } 514 515 516 /** 517 * htb_class_mode - computes and returns current class mode 518 * @cl: the target class 519 * @diff: diff time in microseconds 520 * 521 * It computes cl's mode at time cl->t_c+diff and returns it. If mode 522 * is not HTB_CAN_SEND then cl->pq_key is updated to time difference 523 * from now to time when cl will change its state. 524 * Also it is worth to note that class mode doesn't change simply 525 * at cl->{c,}tokens == 0 but there can rather be hysteresis of 526 * 0 .. -cl->{c,}buffer range. It is meant to limit number of 527 * mode transitions per time unit. The speed gain is about 1/6. 528 */ 529 static inline enum htb_cmode 530 htb_class_mode(struct htb_class *cl, s64 *diff) 531 { 532 s64 toks; 533 534 if ((toks = (cl->ctokens + *diff)) < htb_lowater(cl)) { 535 *diff = -toks; 536 return HTB_CANT_SEND; 537 } 538 539 if ((toks = (cl->tokens + *diff)) >= htb_hiwater(cl)) 540 return HTB_CAN_SEND; 541 542 *diff = -toks; 543 return HTB_MAY_BORROW; 544 } 545 546 /** 547 * htb_change_class_mode - changes classe's mode 548 * @q: the priority event queue 549 * @cl: the target class 550 * @diff: diff time in microseconds 551 * 552 * This should be the only way how to change classe's mode under normal 553 * circumstances. Routine will update feed lists linkage, change mode 554 * and add class to the wait event queue if appropriate. New mode should 555 * be different from old one and cl->pq_key has to be valid if changing 556 * to mode other than HTB_CAN_SEND (see htb_add_to_wait_tree). 557 */ 558 static void 559 htb_change_class_mode(struct htb_sched *q, struct htb_class *cl, s64 *diff) 560 { 561 enum htb_cmode new_mode = htb_class_mode(cl, diff); 562 563 if (new_mode == cl->cmode) 564 return; 565 566 if (new_mode == HTB_CANT_SEND) { 567 cl->overlimits++; 568 q->overlimits++; 569 } 570 571 if (cl->prio_activity) { /* not necessary: speed optimization */ 572 if (cl->cmode != HTB_CANT_SEND) 573 htb_deactivate_prios(q, cl); 574 cl->cmode = new_mode; 575 if (new_mode != HTB_CANT_SEND) 576 htb_activate_prios(q, cl); 577 } else 578 cl->cmode = new_mode; 579 } 580 581 /** 582 * htb_activate - inserts leaf cl into appropriate active feeds 583 * @q: the priority event queue 584 * @cl: the target class 585 * 586 * Routine learns (new) priority of leaf and activates feed chain 587 * for the prio. It can be called on already active leaf safely. 588 * It also adds leaf into droplist. 589 */ 590 static inline void htb_activate(struct htb_sched *q, struct htb_class *cl) 591 { 592 WARN_ON(cl->level || !cl->leaf.q || !cl->leaf.q->q.qlen); 593 594 if (!cl->prio_activity) { 595 cl->prio_activity = 1 << cl->prio; 596 htb_activate_prios(q, cl); 597 } 598 } 599 600 /** 601 * htb_deactivate - remove leaf cl from active feeds 602 * @q: the priority event queue 603 * @cl: the target class 604 * 605 * Make sure that leaf is active. In the other words it can't be called 606 * with non-active leaf. It also removes class from the drop list. 607 */ 608 static inline void htb_deactivate(struct htb_sched *q, struct htb_class *cl) 609 { 610 WARN_ON(!cl->prio_activity); 611 612 htb_deactivate_prios(q, cl); 613 cl->prio_activity = 0; 614 } 615 616 static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch, 617 struct sk_buff **to_free) 618 { 619 int ret; 620 unsigned int len = qdisc_pkt_len(skb); 621 struct htb_sched *q = qdisc_priv(sch); 622 struct htb_class *cl = htb_classify(skb, sch, &ret); 623 624 if (cl == HTB_DIRECT) { 625 /* enqueue to helper queue */ 626 if (q->direct_queue.qlen < q->direct_qlen) { 627 __qdisc_enqueue_tail(skb, &q->direct_queue); 628 q->direct_pkts++; 629 } else { 630 return qdisc_drop(skb, sch, to_free); 631 } 632 #ifdef CONFIG_NET_CLS_ACT 633 } else if (!cl) { 634 if (ret & __NET_XMIT_BYPASS) 635 qdisc_qstats_drop(sch); 636 __qdisc_drop(skb, to_free); 637 return ret; 638 #endif 639 } else if ((ret = qdisc_enqueue(skb, cl->leaf.q, 640 to_free)) != NET_XMIT_SUCCESS) { 641 if (net_xmit_drop_count(ret)) { 642 qdisc_qstats_drop(sch); 643 cl->drops++; 644 } 645 return ret; 646 } else { 647 htb_activate(q, cl); 648 } 649 650 sch->qstats.backlog += len; 651 sch->q.qlen++; 652 return NET_XMIT_SUCCESS; 653 } 654 655 static inline void htb_accnt_tokens(struct htb_class *cl, int bytes, s64 diff) 656 { 657 s64 toks = diff + cl->tokens; 658 659 if (toks > cl->buffer) 660 toks = cl->buffer; 661 toks -= (s64) psched_l2t_ns(&cl->rate, bytes); 662 if (toks <= -cl->mbuffer) 663 toks = 1 - cl->mbuffer; 664 665 cl->tokens = toks; 666 } 667 668 static inline void htb_accnt_ctokens(struct htb_class *cl, int bytes, s64 diff) 669 { 670 s64 toks = diff + cl->ctokens; 671 672 if (toks > cl->cbuffer) 673 toks = cl->cbuffer; 674 toks -= (s64) psched_l2t_ns(&cl->ceil, bytes); 675 if (toks <= -cl->mbuffer) 676 toks = 1 - cl->mbuffer; 677 678 cl->ctokens = toks; 679 } 680 681 /** 682 * htb_charge_class - charges amount "bytes" to leaf and ancestors 683 * @q: the priority event queue 684 * @cl: the class to start iterate 685 * @level: the minimum level to account 686 * @skb: the socket buffer 687 * 688 * Routine assumes that packet "bytes" long was dequeued from leaf cl 689 * borrowing from "level". It accounts bytes to ceil leaky bucket for 690 * leaf and all ancestors and to rate bucket for ancestors at levels 691 * "level" and higher. It also handles possible change of mode resulting 692 * from the update. Note that mode can also increase here (MAY_BORROW to 693 * CAN_SEND) because we can use more precise clock that event queue here. 694 * In such case we remove class from event queue first. 695 */ 696 static void htb_charge_class(struct htb_sched *q, struct htb_class *cl, 697 int level, struct sk_buff *skb) 698 { 699 int bytes = qdisc_pkt_len(skb); 700 enum htb_cmode old_mode; 701 s64 diff; 702 703 while (cl) { 704 diff = min_t(s64, q->now - cl->t_c, cl->mbuffer); 705 if (cl->level >= level) { 706 if (cl->level == level) 707 cl->xstats.lends++; 708 htb_accnt_tokens(cl, bytes, diff); 709 } else { 710 cl->xstats.borrows++; 711 cl->tokens += diff; /* we moved t_c; update tokens */ 712 } 713 htb_accnt_ctokens(cl, bytes, diff); 714 cl->t_c = q->now; 715 716 old_mode = cl->cmode; 717 diff = 0; 718 htb_change_class_mode(q, cl, &diff); 719 if (old_mode != cl->cmode) { 720 if (old_mode != HTB_CAN_SEND) 721 htb_safe_rb_erase(&cl->pq_node, &q->hlevel[cl->level].wait_pq); 722 if (cl->cmode != HTB_CAN_SEND) 723 htb_add_to_wait_tree(q, cl, diff); 724 } 725 726 /* update basic stats except for leaves which are already updated */ 727 if (cl->level) 728 bstats_update(&cl->bstats, skb); 729 730 cl = cl->parent; 731 } 732 } 733 734 /** 735 * htb_do_events - make mode changes to classes at the level 736 * @q: the priority event queue 737 * @level: which wait_pq in 'q->hlevel' 738 * @start: start jiffies 739 * 740 * Scans event queue for pending events and applies them. Returns time of 741 * next pending event (0 for no event in pq, q->now for too many events). 742 * Note: Applied are events whose have cl->pq_key <= q->now. 743 */ 744 static s64 htb_do_events(struct htb_sched *q, const int level, 745 unsigned long start) 746 { 747 /* don't run for longer than 2 jiffies; 2 is used instead of 748 * 1 to simplify things when jiffy is going to be incremented 749 * too soon 750 */ 751 unsigned long stop_at = start + 2; 752 struct rb_root *wait_pq = &q->hlevel[level].wait_pq; 753 754 while (time_before(jiffies, stop_at)) { 755 struct htb_class *cl; 756 s64 diff; 757 struct rb_node *p = rb_first(wait_pq); 758 759 if (!p) 760 return 0; 761 762 cl = rb_entry(p, struct htb_class, pq_node); 763 if (cl->pq_key > q->now) 764 return cl->pq_key; 765 766 htb_safe_rb_erase(p, wait_pq); 767 diff = min_t(s64, q->now - cl->t_c, cl->mbuffer); 768 htb_change_class_mode(q, cl, &diff); 769 if (cl->cmode != HTB_CAN_SEND) 770 htb_add_to_wait_tree(q, cl, diff); 771 } 772 773 /* too much load - let's continue after a break for scheduling */ 774 if (!(q->warned & HTB_WARN_TOOMANYEVENTS)) { 775 pr_warn("htb: too many events!\n"); 776 q->warned |= HTB_WARN_TOOMANYEVENTS; 777 } 778 779 return q->now; 780 } 781 782 /* Returns class->node+prio from id-tree where classe's id is >= id. NULL 783 * is no such one exists. 784 */ 785 static struct rb_node *htb_id_find_next_upper(int prio, struct rb_node *n, 786 u32 id) 787 { 788 struct rb_node *r = NULL; 789 while (n) { 790 struct htb_class *cl = 791 rb_entry(n, struct htb_class, node[prio]); 792 793 if (id > cl->common.classid) { 794 n = n->rb_right; 795 } else if (id < cl->common.classid) { 796 r = n; 797 n = n->rb_left; 798 } else { 799 return n; 800 } 801 } 802 return r; 803 } 804 805 /** 806 * htb_lookup_leaf - returns next leaf class in DRR order 807 * @hprio: the current one 808 * @prio: which prio in class 809 * 810 * Find leaf where current feed pointers points to. 811 */ 812 static struct htb_class *htb_lookup_leaf(struct htb_prio *hprio, const int prio) 813 { 814 int i; 815 struct { 816 struct rb_node *root; 817 struct rb_node **pptr; 818 u32 *pid; 819 } stk[TC_HTB_MAXDEPTH], *sp = stk; 820 821 BUG_ON(!hprio->row.rb_node); 822 sp->root = hprio->row.rb_node; 823 sp->pptr = &hprio->ptr; 824 sp->pid = &hprio->last_ptr_id; 825 826 for (i = 0; i < 65535; i++) { 827 if (!*sp->pptr && *sp->pid) { 828 /* ptr was invalidated but id is valid - try to recover 829 * the original or next ptr 830 */ 831 *sp->pptr = 832 htb_id_find_next_upper(prio, sp->root, *sp->pid); 833 } 834 *sp->pid = 0; /* ptr is valid now so that remove this hint as it 835 * can become out of date quickly 836 */ 837 if (!*sp->pptr) { /* we are at right end; rewind & go up */ 838 *sp->pptr = sp->root; 839 while ((*sp->pptr)->rb_left) 840 *sp->pptr = (*sp->pptr)->rb_left; 841 if (sp > stk) { 842 sp--; 843 if (!*sp->pptr) { 844 WARN_ON(1); 845 return NULL; 846 } 847 htb_next_rb_node(sp->pptr); 848 } 849 } else { 850 struct htb_class *cl; 851 struct htb_prio *clp; 852 853 cl = rb_entry(*sp->pptr, struct htb_class, node[prio]); 854 if (!cl->level) 855 return cl; 856 clp = &cl->inner.clprio[prio]; 857 (++sp)->root = clp->feed.rb_node; 858 sp->pptr = &clp->ptr; 859 sp->pid = &clp->last_ptr_id; 860 } 861 } 862 WARN_ON(1); 863 return NULL; 864 } 865 866 /* dequeues packet at given priority and level; call only if 867 * you are sure that there is active class at prio/level 868 */ 869 static struct sk_buff *htb_dequeue_tree(struct htb_sched *q, const int prio, 870 const int level) 871 { 872 struct sk_buff *skb = NULL; 873 struct htb_class *cl, *start; 874 struct htb_level *hlevel = &q->hlevel[level]; 875 struct htb_prio *hprio = &hlevel->hprio[prio]; 876 877 /* look initial class up in the row */ 878 start = cl = htb_lookup_leaf(hprio, prio); 879 880 do { 881 next: 882 if (unlikely(!cl)) 883 return NULL; 884 885 /* class can be empty - it is unlikely but can be true if leaf 886 * qdisc drops packets in enqueue routine or if someone used 887 * graft operation on the leaf since last dequeue; 888 * simply deactivate and skip such class 889 */ 890 if (unlikely(cl->leaf.q->q.qlen == 0)) { 891 struct htb_class *next; 892 htb_deactivate(q, cl); 893 894 /* row/level might become empty */ 895 if ((q->row_mask[level] & (1 << prio)) == 0) 896 return NULL; 897 898 next = htb_lookup_leaf(hprio, prio); 899 900 if (cl == start) /* fix start if we just deleted it */ 901 start = next; 902 cl = next; 903 goto next; 904 } 905 906 skb = cl->leaf.q->dequeue(cl->leaf.q); 907 if (likely(skb != NULL)) 908 break; 909 910 qdisc_warn_nonwc("htb", cl->leaf.q); 911 htb_next_rb_node(level ? &cl->parent->inner.clprio[prio].ptr: 912 &q->hlevel[0].hprio[prio].ptr); 913 cl = htb_lookup_leaf(hprio, prio); 914 915 } while (cl != start); 916 917 if (likely(skb != NULL)) { 918 bstats_update(&cl->bstats, skb); 919 cl->leaf.deficit[level] -= qdisc_pkt_len(skb); 920 if (cl->leaf.deficit[level] < 0) { 921 cl->leaf.deficit[level] += cl->quantum; 922 htb_next_rb_node(level ? &cl->parent->inner.clprio[prio].ptr : 923 &q->hlevel[0].hprio[prio].ptr); 924 } 925 /* this used to be after charge_class but this constelation 926 * gives us slightly better performance 927 */ 928 if (!cl->leaf.q->q.qlen) 929 htb_deactivate(q, cl); 930 htb_charge_class(q, cl, level, skb); 931 } 932 return skb; 933 } 934 935 static struct sk_buff *htb_dequeue(struct Qdisc *sch) 936 { 937 struct sk_buff *skb; 938 struct htb_sched *q = qdisc_priv(sch); 939 int level; 940 s64 next_event; 941 unsigned long start_at; 942 943 /* try to dequeue direct packets as high prio (!) to minimize cpu work */ 944 skb = __qdisc_dequeue_head(&q->direct_queue); 945 if (skb != NULL) { 946 ok: 947 qdisc_bstats_update(sch, skb); 948 qdisc_qstats_backlog_dec(sch, skb); 949 sch->q.qlen--; 950 return skb; 951 } 952 953 if (!sch->q.qlen) 954 goto fin; 955 q->now = ktime_get_ns(); 956 start_at = jiffies; 957 958 next_event = q->now + 5LLU * NSEC_PER_SEC; 959 960 for (level = 0; level < TC_HTB_MAXDEPTH; level++) { 961 /* common case optimization - skip event handler quickly */ 962 int m; 963 s64 event = q->near_ev_cache[level]; 964 965 if (q->now >= event) { 966 event = htb_do_events(q, level, start_at); 967 if (!event) 968 event = q->now + NSEC_PER_SEC; 969 q->near_ev_cache[level] = event; 970 } 971 972 if (next_event > event) 973 next_event = event; 974 975 m = ~q->row_mask[level]; 976 while (m != (int)(-1)) { 977 int prio = ffz(m); 978 979 m |= 1 << prio; 980 skb = htb_dequeue_tree(q, prio, level); 981 if (likely(skb != NULL)) 982 goto ok; 983 } 984 } 985 if (likely(next_event > q->now)) 986 qdisc_watchdog_schedule_ns(&q->watchdog, next_event); 987 else 988 schedule_work(&q->work); 989 fin: 990 return skb; 991 } 992 993 /* reset all classes */ 994 /* always caled under BH & queue lock */ 995 static void htb_reset(struct Qdisc *sch) 996 { 997 struct htb_sched *q = qdisc_priv(sch); 998 struct htb_class *cl; 999 unsigned int i; 1000 1001 for (i = 0; i < q->clhash.hashsize; i++) { 1002 hlist_for_each_entry(cl, &q->clhash.hash[i], common.hnode) { 1003 if (cl->level) 1004 memset(&cl->inner, 0, sizeof(cl->inner)); 1005 else { 1006 if (cl->leaf.q && !q->offload) 1007 qdisc_reset(cl->leaf.q); 1008 } 1009 cl->prio_activity = 0; 1010 cl->cmode = HTB_CAN_SEND; 1011 } 1012 } 1013 qdisc_watchdog_cancel(&q->watchdog); 1014 __qdisc_reset_queue(&q->direct_queue); 1015 memset(q->hlevel, 0, sizeof(q->hlevel)); 1016 memset(q->row_mask, 0, sizeof(q->row_mask)); 1017 } 1018 1019 static const struct nla_policy htb_policy[TCA_HTB_MAX + 1] = { 1020 [TCA_HTB_PARMS] = { .len = sizeof(struct tc_htb_opt) }, 1021 [TCA_HTB_INIT] = { .len = sizeof(struct tc_htb_glob) }, 1022 [TCA_HTB_CTAB] = { .type = NLA_BINARY, .len = TC_RTAB_SIZE }, 1023 [TCA_HTB_RTAB] = { .type = NLA_BINARY, .len = TC_RTAB_SIZE }, 1024 [TCA_HTB_DIRECT_QLEN] = { .type = NLA_U32 }, 1025 [TCA_HTB_RATE64] = { .type = NLA_U64 }, 1026 [TCA_HTB_CEIL64] = { .type = NLA_U64 }, 1027 [TCA_HTB_OFFLOAD] = { .type = NLA_FLAG }, 1028 }; 1029 1030 static void htb_work_func(struct work_struct *work) 1031 { 1032 struct htb_sched *q = container_of(work, struct htb_sched, work); 1033 struct Qdisc *sch = q->watchdog.qdisc; 1034 1035 rcu_read_lock(); 1036 __netif_schedule(qdisc_root(sch)); 1037 rcu_read_unlock(); 1038 } 1039 1040 static void htb_set_lockdep_class_child(struct Qdisc *q) 1041 { 1042 static struct lock_class_key child_key; 1043 1044 lockdep_set_class(qdisc_lock(q), &child_key); 1045 } 1046 1047 static int htb_offload(struct net_device *dev, struct tc_htb_qopt_offload *opt) 1048 { 1049 return dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_HTB, opt); 1050 } 1051 1052 static int htb_init(struct Qdisc *sch, struct nlattr *opt, 1053 struct netlink_ext_ack *extack) 1054 { 1055 struct net_device *dev = qdisc_dev(sch); 1056 struct tc_htb_qopt_offload offload_opt; 1057 struct htb_sched *q = qdisc_priv(sch); 1058 struct nlattr *tb[TCA_HTB_MAX + 1]; 1059 struct tc_htb_glob *gopt; 1060 unsigned int ntx; 1061 bool offload; 1062 int err; 1063 1064 qdisc_watchdog_init(&q->watchdog, sch); 1065 INIT_WORK(&q->work, htb_work_func); 1066 1067 if (!opt) 1068 return -EINVAL; 1069 1070 err = tcf_block_get(&q->block, &q->filter_list, sch, extack); 1071 if (err) 1072 return err; 1073 1074 err = nla_parse_nested_deprecated(tb, TCA_HTB_MAX, opt, htb_policy, 1075 NULL); 1076 if (err < 0) 1077 return err; 1078 1079 if (!tb[TCA_HTB_INIT]) 1080 return -EINVAL; 1081 1082 gopt = nla_data(tb[TCA_HTB_INIT]); 1083 if (gopt->version != HTB_VER >> 16) 1084 return -EINVAL; 1085 1086 offload = nla_get_flag(tb[TCA_HTB_OFFLOAD]); 1087 1088 if (offload) { 1089 if (sch->parent != TC_H_ROOT) { 1090 NL_SET_ERR_MSG(extack, "HTB must be the root qdisc to use offload"); 1091 return -EOPNOTSUPP; 1092 } 1093 1094 if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc) { 1095 NL_SET_ERR_MSG(extack, "hw-tc-offload ethtool feature flag must be on"); 1096 return -EOPNOTSUPP; 1097 } 1098 1099 q->num_direct_qdiscs = dev->real_num_tx_queues; 1100 q->direct_qdiscs = kcalloc(q->num_direct_qdiscs, 1101 sizeof(*q->direct_qdiscs), 1102 GFP_KERNEL); 1103 if (!q->direct_qdiscs) 1104 return -ENOMEM; 1105 } 1106 1107 err = qdisc_class_hash_init(&q->clhash); 1108 if (err < 0) 1109 return err; 1110 1111 if (tb[TCA_HTB_DIRECT_QLEN]) 1112 q->direct_qlen = nla_get_u32(tb[TCA_HTB_DIRECT_QLEN]); 1113 else 1114 q->direct_qlen = qdisc_dev(sch)->tx_queue_len; 1115 1116 if ((q->rate2quantum = gopt->rate2quantum) < 1) 1117 q->rate2quantum = 1; 1118 q->defcls = gopt->defcls; 1119 1120 if (!offload) 1121 return 0; 1122 1123 for (ntx = 0; ntx < q->num_direct_qdiscs; ntx++) { 1124 struct netdev_queue *dev_queue = netdev_get_tx_queue(dev, ntx); 1125 struct Qdisc *qdisc; 1126 1127 qdisc = qdisc_create_dflt(dev_queue, &pfifo_qdisc_ops, 1128 TC_H_MAKE(sch->handle, 0), extack); 1129 if (!qdisc) { 1130 return -ENOMEM; 1131 } 1132 1133 htb_set_lockdep_class_child(qdisc); 1134 q->direct_qdiscs[ntx] = qdisc; 1135 qdisc->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT; 1136 } 1137 1138 sch->flags |= TCQ_F_MQROOT; 1139 1140 offload_opt = (struct tc_htb_qopt_offload) { 1141 .command = TC_HTB_CREATE, 1142 .parent_classid = TC_H_MAJ(sch->handle) >> 16, 1143 .classid = TC_H_MIN(q->defcls), 1144 .extack = extack, 1145 }; 1146 err = htb_offload(dev, &offload_opt); 1147 if (err) 1148 return err; 1149 1150 /* Defer this assignment, so that htb_destroy skips offload-related 1151 * parts (especially calling ndo_setup_tc) on errors. 1152 */ 1153 q->offload = true; 1154 1155 return 0; 1156 } 1157 1158 static void htb_attach_offload(struct Qdisc *sch) 1159 { 1160 struct net_device *dev = qdisc_dev(sch); 1161 struct htb_sched *q = qdisc_priv(sch); 1162 unsigned int ntx; 1163 1164 for (ntx = 0; ntx < q->num_direct_qdiscs; ntx++) { 1165 struct Qdisc *old, *qdisc = q->direct_qdiscs[ntx]; 1166 1167 old = dev_graft_qdisc(qdisc->dev_queue, qdisc); 1168 qdisc_put(old); 1169 qdisc_hash_add(qdisc, false); 1170 } 1171 for (ntx = q->num_direct_qdiscs; ntx < dev->num_tx_queues; ntx++) { 1172 struct netdev_queue *dev_queue = netdev_get_tx_queue(dev, ntx); 1173 struct Qdisc *old = dev_graft_qdisc(dev_queue, NULL); 1174 1175 qdisc_put(old); 1176 } 1177 1178 kfree(q->direct_qdiscs); 1179 q->direct_qdiscs = NULL; 1180 } 1181 1182 static void htb_attach_software(struct Qdisc *sch) 1183 { 1184 struct net_device *dev = qdisc_dev(sch); 1185 unsigned int ntx; 1186 1187 /* Resemble qdisc_graft behavior. */ 1188 for (ntx = 0; ntx < dev->num_tx_queues; ntx++) { 1189 struct netdev_queue *dev_queue = netdev_get_tx_queue(dev, ntx); 1190 struct Qdisc *old = dev_graft_qdisc(dev_queue, sch); 1191 1192 qdisc_refcount_inc(sch); 1193 1194 qdisc_put(old); 1195 } 1196 } 1197 1198 static void htb_attach(struct Qdisc *sch) 1199 { 1200 struct htb_sched *q = qdisc_priv(sch); 1201 1202 if (q->offload) 1203 htb_attach_offload(sch); 1204 else 1205 htb_attach_software(sch); 1206 } 1207 1208 static int htb_dump(struct Qdisc *sch, struct sk_buff *skb) 1209 { 1210 struct htb_sched *q = qdisc_priv(sch); 1211 struct nlattr *nest; 1212 struct tc_htb_glob gopt; 1213 1214 if (q->offload) 1215 sch->flags |= TCQ_F_OFFLOADED; 1216 else 1217 sch->flags &= ~TCQ_F_OFFLOADED; 1218 1219 sch->qstats.overlimits = q->overlimits; 1220 /* Its safe to not acquire qdisc lock. As we hold RTNL, 1221 * no change can happen on the qdisc parameters. 1222 */ 1223 1224 gopt.direct_pkts = q->direct_pkts; 1225 gopt.version = HTB_VER; 1226 gopt.rate2quantum = q->rate2quantum; 1227 gopt.defcls = q->defcls; 1228 gopt.debug = 0; 1229 1230 nest = nla_nest_start_noflag(skb, TCA_OPTIONS); 1231 if (nest == NULL) 1232 goto nla_put_failure; 1233 if (nla_put(skb, TCA_HTB_INIT, sizeof(gopt), &gopt) || 1234 nla_put_u32(skb, TCA_HTB_DIRECT_QLEN, q->direct_qlen)) 1235 goto nla_put_failure; 1236 if (q->offload && nla_put_flag(skb, TCA_HTB_OFFLOAD)) 1237 goto nla_put_failure; 1238 1239 return nla_nest_end(skb, nest); 1240 1241 nla_put_failure: 1242 nla_nest_cancel(skb, nest); 1243 return -1; 1244 } 1245 1246 static int htb_dump_class(struct Qdisc *sch, unsigned long arg, 1247 struct sk_buff *skb, struct tcmsg *tcm) 1248 { 1249 struct htb_class *cl = (struct htb_class *)arg; 1250 struct htb_sched *q = qdisc_priv(sch); 1251 struct nlattr *nest; 1252 struct tc_htb_opt opt; 1253 1254 /* Its safe to not acquire qdisc lock. As we hold RTNL, 1255 * no change can happen on the class parameters. 1256 */ 1257 tcm->tcm_parent = cl->parent ? cl->parent->common.classid : TC_H_ROOT; 1258 tcm->tcm_handle = cl->common.classid; 1259 if (!cl->level && cl->leaf.q) 1260 tcm->tcm_info = cl->leaf.q->handle; 1261 1262 nest = nla_nest_start_noflag(skb, TCA_OPTIONS); 1263 if (nest == NULL) 1264 goto nla_put_failure; 1265 1266 memset(&opt, 0, sizeof(opt)); 1267 1268 psched_ratecfg_getrate(&opt.rate, &cl->rate); 1269 opt.buffer = PSCHED_NS2TICKS(cl->buffer); 1270 psched_ratecfg_getrate(&opt.ceil, &cl->ceil); 1271 opt.cbuffer = PSCHED_NS2TICKS(cl->cbuffer); 1272 opt.quantum = cl->quantum; 1273 opt.prio = cl->prio; 1274 opt.level = cl->level; 1275 if (nla_put(skb, TCA_HTB_PARMS, sizeof(opt), &opt)) 1276 goto nla_put_failure; 1277 if (q->offload && nla_put_flag(skb, TCA_HTB_OFFLOAD)) 1278 goto nla_put_failure; 1279 if ((cl->rate.rate_bytes_ps >= (1ULL << 32)) && 1280 nla_put_u64_64bit(skb, TCA_HTB_RATE64, cl->rate.rate_bytes_ps, 1281 TCA_HTB_PAD)) 1282 goto nla_put_failure; 1283 if ((cl->ceil.rate_bytes_ps >= (1ULL << 32)) && 1284 nla_put_u64_64bit(skb, TCA_HTB_CEIL64, cl->ceil.rate_bytes_ps, 1285 TCA_HTB_PAD)) 1286 goto nla_put_failure; 1287 1288 return nla_nest_end(skb, nest); 1289 1290 nla_put_failure: 1291 nla_nest_cancel(skb, nest); 1292 return -1; 1293 } 1294 1295 static void htb_offload_aggregate_stats(struct htb_sched *q, 1296 struct htb_class *cl) 1297 { 1298 u64 bytes = 0, packets = 0; 1299 struct htb_class *c; 1300 unsigned int i; 1301 1302 gnet_stats_basic_sync_init(&cl->bstats); 1303 1304 for (i = 0; i < q->clhash.hashsize; i++) { 1305 hlist_for_each_entry(c, &q->clhash.hash[i], common.hnode) { 1306 struct htb_class *p = c; 1307 1308 while (p && p->level < cl->level) 1309 p = p->parent; 1310 1311 if (p != cl) 1312 continue; 1313 1314 bytes += u64_stats_read(&c->bstats_bias.bytes); 1315 packets += u64_stats_read(&c->bstats_bias.packets); 1316 if (c->level == 0) { 1317 bytes += u64_stats_read(&c->leaf.q->bstats.bytes); 1318 packets += u64_stats_read(&c->leaf.q->bstats.packets); 1319 } 1320 } 1321 } 1322 _bstats_update(&cl->bstats, bytes, packets); 1323 } 1324 1325 static int 1326 htb_dump_class_stats(struct Qdisc *sch, unsigned long arg, struct gnet_dump *d) 1327 { 1328 struct htb_class *cl = (struct htb_class *)arg; 1329 struct htb_sched *q = qdisc_priv(sch); 1330 struct gnet_stats_queue qs = { 1331 .drops = cl->drops, 1332 .overlimits = cl->overlimits, 1333 }; 1334 __u32 qlen = 0; 1335 1336 if (!cl->level && cl->leaf.q) 1337 qdisc_qstats_qlen_backlog(cl->leaf.q, &qlen, &qs.backlog); 1338 1339 cl->xstats.tokens = clamp_t(s64, PSCHED_NS2TICKS(cl->tokens), 1340 INT_MIN, INT_MAX); 1341 cl->xstats.ctokens = clamp_t(s64, PSCHED_NS2TICKS(cl->ctokens), 1342 INT_MIN, INT_MAX); 1343 1344 if (q->offload) { 1345 if (!cl->level) { 1346 if (cl->leaf.q) 1347 cl->bstats = cl->leaf.q->bstats; 1348 else 1349 gnet_stats_basic_sync_init(&cl->bstats); 1350 _bstats_update(&cl->bstats, 1351 u64_stats_read(&cl->bstats_bias.bytes), 1352 u64_stats_read(&cl->bstats_bias.packets)); 1353 } else { 1354 htb_offload_aggregate_stats(q, cl); 1355 } 1356 } 1357 1358 if (gnet_stats_copy_basic(d, NULL, &cl->bstats, true) < 0 || 1359 gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 || 1360 gnet_stats_copy_queue(d, NULL, &qs, qlen) < 0) 1361 return -1; 1362 1363 return gnet_stats_copy_app(d, &cl->xstats, sizeof(cl->xstats)); 1364 } 1365 1366 static struct netdev_queue * 1367 htb_select_queue(struct Qdisc *sch, struct tcmsg *tcm) 1368 { 1369 struct net_device *dev = qdisc_dev(sch); 1370 struct tc_htb_qopt_offload offload_opt; 1371 struct htb_sched *q = qdisc_priv(sch); 1372 int err; 1373 1374 if (!q->offload) 1375 return sch->dev_queue; 1376 1377 offload_opt = (struct tc_htb_qopt_offload) { 1378 .command = TC_HTB_LEAF_QUERY_QUEUE, 1379 .classid = TC_H_MIN(tcm->tcm_parent), 1380 }; 1381 err = htb_offload(dev, &offload_opt); 1382 if (err || offload_opt.qid >= dev->num_tx_queues) 1383 return NULL; 1384 return netdev_get_tx_queue(dev, offload_opt.qid); 1385 } 1386 1387 static struct Qdisc * 1388 htb_graft_helper(struct netdev_queue *dev_queue, struct Qdisc *new_q) 1389 { 1390 struct net_device *dev = dev_queue->dev; 1391 struct Qdisc *old_q; 1392 1393 if (dev->flags & IFF_UP) 1394 dev_deactivate(dev); 1395 old_q = dev_graft_qdisc(dev_queue, new_q); 1396 if (new_q) 1397 new_q->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT; 1398 if (dev->flags & IFF_UP) 1399 dev_activate(dev); 1400 1401 return old_q; 1402 } 1403 1404 static struct netdev_queue *htb_offload_get_queue(struct htb_class *cl) 1405 { 1406 struct netdev_queue *queue; 1407 1408 queue = cl->leaf.offload_queue; 1409 if (!(cl->leaf.q->flags & TCQ_F_BUILTIN)) 1410 WARN_ON(cl->leaf.q->dev_queue != queue); 1411 1412 return queue; 1413 } 1414 1415 static void htb_offload_move_qdisc(struct Qdisc *sch, struct htb_class *cl_old, 1416 struct htb_class *cl_new, bool destroying) 1417 { 1418 struct netdev_queue *queue_old, *queue_new; 1419 struct net_device *dev = qdisc_dev(sch); 1420 1421 queue_old = htb_offload_get_queue(cl_old); 1422 queue_new = htb_offload_get_queue(cl_new); 1423 1424 if (!destroying) { 1425 struct Qdisc *qdisc; 1426 1427 if (dev->flags & IFF_UP) 1428 dev_deactivate(dev); 1429 qdisc = dev_graft_qdisc(queue_old, NULL); 1430 WARN_ON(qdisc != cl_old->leaf.q); 1431 } 1432 1433 if (!(cl_old->leaf.q->flags & TCQ_F_BUILTIN)) 1434 cl_old->leaf.q->dev_queue = queue_new; 1435 cl_old->leaf.offload_queue = queue_new; 1436 1437 if (!destroying) { 1438 struct Qdisc *qdisc; 1439 1440 qdisc = dev_graft_qdisc(queue_new, cl_old->leaf.q); 1441 if (dev->flags & IFF_UP) 1442 dev_activate(dev); 1443 WARN_ON(!(qdisc->flags & TCQ_F_BUILTIN)); 1444 } 1445 } 1446 1447 static int htb_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, 1448 struct Qdisc **old, struct netlink_ext_ack *extack) 1449 { 1450 struct netdev_queue *dev_queue = sch->dev_queue; 1451 struct htb_class *cl = (struct htb_class *)arg; 1452 struct htb_sched *q = qdisc_priv(sch); 1453 struct Qdisc *old_q; 1454 1455 if (cl->level) 1456 return -EINVAL; 1457 1458 if (q->offload) 1459 dev_queue = htb_offload_get_queue(cl); 1460 1461 if (!new) { 1462 new = qdisc_create_dflt(dev_queue, &pfifo_qdisc_ops, 1463 cl->common.classid, extack); 1464 if (!new) 1465 return -ENOBUFS; 1466 } 1467 1468 if (q->offload) { 1469 htb_set_lockdep_class_child(new); 1470 /* One ref for cl->leaf.q, the other for dev_queue->qdisc. */ 1471 qdisc_refcount_inc(new); 1472 old_q = htb_graft_helper(dev_queue, new); 1473 } 1474 1475 *old = qdisc_replace(sch, new, &cl->leaf.q); 1476 1477 if (q->offload) { 1478 WARN_ON(old_q != *old); 1479 qdisc_put(old_q); 1480 } 1481 1482 return 0; 1483 } 1484 1485 static struct Qdisc *htb_leaf(struct Qdisc *sch, unsigned long arg) 1486 { 1487 struct htb_class *cl = (struct htb_class *)arg; 1488 return !cl->level ? cl->leaf.q : NULL; 1489 } 1490 1491 static void htb_qlen_notify(struct Qdisc *sch, unsigned long arg) 1492 { 1493 struct htb_class *cl = (struct htb_class *)arg; 1494 1495 htb_deactivate(qdisc_priv(sch), cl); 1496 } 1497 1498 static inline int htb_parent_last_child(struct htb_class *cl) 1499 { 1500 if (!cl->parent) 1501 /* the root class */ 1502 return 0; 1503 if (cl->parent->children > 1) 1504 /* not the last child */ 1505 return 0; 1506 return 1; 1507 } 1508 1509 static void htb_parent_to_leaf(struct Qdisc *sch, struct htb_class *cl, 1510 struct Qdisc *new_q) 1511 { 1512 struct htb_sched *q = qdisc_priv(sch); 1513 struct htb_class *parent = cl->parent; 1514 1515 WARN_ON(cl->level || !cl->leaf.q || cl->prio_activity); 1516 1517 if (parent->cmode != HTB_CAN_SEND) 1518 htb_safe_rb_erase(&parent->pq_node, 1519 &q->hlevel[parent->level].wait_pq); 1520 1521 parent->level = 0; 1522 memset(&parent->inner, 0, sizeof(parent->inner)); 1523 parent->leaf.q = new_q ? new_q : &noop_qdisc; 1524 parent->tokens = parent->buffer; 1525 parent->ctokens = parent->cbuffer; 1526 parent->t_c = ktime_get_ns(); 1527 parent->cmode = HTB_CAN_SEND; 1528 if (q->offload) 1529 parent->leaf.offload_queue = cl->leaf.offload_queue; 1530 } 1531 1532 static void htb_parent_to_leaf_offload(struct Qdisc *sch, 1533 struct netdev_queue *dev_queue, 1534 struct Qdisc *new_q) 1535 { 1536 struct Qdisc *old_q; 1537 1538 /* One ref for cl->leaf.q, the other for dev_queue->qdisc. */ 1539 if (new_q) 1540 qdisc_refcount_inc(new_q); 1541 old_q = htb_graft_helper(dev_queue, new_q); 1542 WARN_ON(!(old_q->flags & TCQ_F_BUILTIN)); 1543 } 1544 1545 static int htb_destroy_class_offload(struct Qdisc *sch, struct htb_class *cl, 1546 bool last_child, bool destroying, 1547 struct netlink_ext_ack *extack) 1548 { 1549 struct tc_htb_qopt_offload offload_opt; 1550 struct netdev_queue *dev_queue; 1551 struct Qdisc *q = cl->leaf.q; 1552 struct Qdisc *old = NULL; 1553 int err; 1554 1555 if (cl->level) 1556 return -EINVAL; 1557 1558 WARN_ON(!q); 1559 dev_queue = htb_offload_get_queue(cl); 1560 old = htb_graft_helper(dev_queue, NULL); 1561 if (destroying) 1562 /* Before HTB is destroyed, the kernel grafts noop_qdisc to 1563 * all queues. 1564 */ 1565 WARN_ON(!(old->flags & TCQ_F_BUILTIN)); 1566 else 1567 WARN_ON(old != q); 1568 1569 if (cl->parent) { 1570 _bstats_update(&cl->parent->bstats_bias, 1571 u64_stats_read(&q->bstats.bytes), 1572 u64_stats_read(&q->bstats.packets)); 1573 } 1574 1575 offload_opt = (struct tc_htb_qopt_offload) { 1576 .command = !last_child ? TC_HTB_LEAF_DEL : 1577 destroying ? TC_HTB_LEAF_DEL_LAST_FORCE : 1578 TC_HTB_LEAF_DEL_LAST, 1579 .classid = cl->common.classid, 1580 .extack = extack, 1581 }; 1582 err = htb_offload(qdisc_dev(sch), &offload_opt); 1583 1584 if (!err || destroying) 1585 qdisc_put(old); 1586 else 1587 htb_graft_helper(dev_queue, old); 1588 1589 if (last_child) 1590 return err; 1591 1592 if (!err && offload_opt.classid != TC_H_MIN(cl->common.classid)) { 1593 u32 classid = TC_H_MAJ(sch->handle) | 1594 TC_H_MIN(offload_opt.classid); 1595 struct htb_class *moved_cl = htb_find(classid, sch); 1596 1597 htb_offload_move_qdisc(sch, moved_cl, cl, destroying); 1598 } 1599 1600 return err; 1601 } 1602 1603 static void htb_destroy_class(struct Qdisc *sch, struct htb_class *cl) 1604 { 1605 if (!cl->level) { 1606 WARN_ON(!cl->leaf.q); 1607 qdisc_put(cl->leaf.q); 1608 } 1609 gen_kill_estimator(&cl->rate_est); 1610 tcf_block_put(cl->block); 1611 kfree(cl); 1612 } 1613 1614 static void htb_destroy(struct Qdisc *sch) 1615 { 1616 struct net_device *dev = qdisc_dev(sch); 1617 struct tc_htb_qopt_offload offload_opt; 1618 struct htb_sched *q = qdisc_priv(sch); 1619 struct hlist_node *next; 1620 bool nonempty, changed; 1621 struct htb_class *cl; 1622 unsigned int i; 1623 1624 cancel_work_sync(&q->work); 1625 qdisc_watchdog_cancel(&q->watchdog); 1626 /* This line used to be after htb_destroy_class call below 1627 * and surprisingly it worked in 2.4. But it must precede it 1628 * because filter need its target class alive to be able to call 1629 * unbind_filter on it (without Oops). 1630 */ 1631 tcf_block_put(q->block); 1632 1633 for (i = 0; i < q->clhash.hashsize; i++) { 1634 hlist_for_each_entry(cl, &q->clhash.hash[i], common.hnode) { 1635 tcf_block_put(cl->block); 1636 cl->block = NULL; 1637 } 1638 } 1639 1640 do { 1641 nonempty = false; 1642 changed = false; 1643 for (i = 0; i < q->clhash.hashsize; i++) { 1644 hlist_for_each_entry_safe(cl, next, &q->clhash.hash[i], 1645 common.hnode) { 1646 bool last_child; 1647 1648 if (!q->offload) { 1649 htb_destroy_class(sch, cl); 1650 continue; 1651 } 1652 1653 nonempty = true; 1654 1655 if (cl->level) 1656 continue; 1657 1658 changed = true; 1659 1660 last_child = htb_parent_last_child(cl); 1661 htb_destroy_class_offload(sch, cl, last_child, 1662 true, NULL); 1663 qdisc_class_hash_remove(&q->clhash, 1664 &cl->common); 1665 if (cl->parent) 1666 cl->parent->children--; 1667 if (last_child) 1668 htb_parent_to_leaf(sch, cl, NULL); 1669 htb_destroy_class(sch, cl); 1670 } 1671 } 1672 } while (changed); 1673 WARN_ON(nonempty); 1674 1675 qdisc_class_hash_destroy(&q->clhash); 1676 __qdisc_reset_queue(&q->direct_queue); 1677 1678 if (q->offload) { 1679 offload_opt = (struct tc_htb_qopt_offload) { 1680 .command = TC_HTB_DESTROY, 1681 }; 1682 htb_offload(dev, &offload_opt); 1683 } 1684 1685 if (!q->direct_qdiscs) 1686 return; 1687 for (i = 0; i < q->num_direct_qdiscs && q->direct_qdiscs[i]; i++) 1688 qdisc_put(q->direct_qdiscs[i]); 1689 kfree(q->direct_qdiscs); 1690 } 1691 1692 static int htb_delete(struct Qdisc *sch, unsigned long arg, 1693 struct netlink_ext_ack *extack) 1694 { 1695 struct htb_sched *q = qdisc_priv(sch); 1696 struct htb_class *cl = (struct htb_class *)arg; 1697 struct Qdisc *new_q = NULL; 1698 int last_child = 0; 1699 int err; 1700 1701 /* TODO: why don't allow to delete subtree ? references ? does 1702 * tc subsys guarantee us that in htb_destroy it holds no class 1703 * refs so that we can remove children safely there ? 1704 */ 1705 if (cl->children || cl->filter_cnt) 1706 return -EBUSY; 1707 1708 if (!cl->level && htb_parent_last_child(cl)) 1709 last_child = 1; 1710 1711 if (q->offload) { 1712 err = htb_destroy_class_offload(sch, cl, last_child, false, 1713 extack); 1714 if (err) 1715 return err; 1716 } 1717 1718 if (last_child) { 1719 struct netdev_queue *dev_queue = sch->dev_queue; 1720 1721 if (q->offload) 1722 dev_queue = htb_offload_get_queue(cl); 1723 1724 new_q = qdisc_create_dflt(dev_queue, &pfifo_qdisc_ops, 1725 cl->parent->common.classid, 1726 NULL); 1727 if (q->offload) { 1728 if (new_q) 1729 htb_set_lockdep_class_child(new_q); 1730 htb_parent_to_leaf_offload(sch, dev_queue, new_q); 1731 } 1732 } 1733 1734 sch_tree_lock(sch); 1735 1736 if (!cl->level) 1737 qdisc_purge_queue(cl->leaf.q); 1738 1739 /* delete from hash and active; remainder in destroy_class */ 1740 qdisc_class_hash_remove(&q->clhash, &cl->common); 1741 if (cl->parent) 1742 cl->parent->children--; 1743 1744 if (cl->prio_activity) 1745 htb_deactivate(q, cl); 1746 1747 if (cl->cmode != HTB_CAN_SEND) 1748 htb_safe_rb_erase(&cl->pq_node, 1749 &q->hlevel[cl->level].wait_pq); 1750 1751 if (last_child) 1752 htb_parent_to_leaf(sch, cl, new_q); 1753 1754 sch_tree_unlock(sch); 1755 1756 htb_destroy_class(sch, cl); 1757 return 0; 1758 } 1759 1760 static int htb_change_class(struct Qdisc *sch, u32 classid, 1761 u32 parentid, struct nlattr **tca, 1762 unsigned long *arg, struct netlink_ext_ack *extack) 1763 { 1764 int err = -EINVAL; 1765 struct htb_sched *q = qdisc_priv(sch); 1766 struct htb_class *cl = (struct htb_class *)*arg, *parent; 1767 struct tc_htb_qopt_offload offload_opt; 1768 struct nlattr *opt = tca[TCA_OPTIONS]; 1769 struct nlattr *tb[TCA_HTB_MAX + 1]; 1770 struct Qdisc *parent_qdisc = NULL; 1771 struct netdev_queue *dev_queue; 1772 struct tc_htb_opt *hopt; 1773 u64 rate64, ceil64; 1774 int warn = 0; 1775 1776 /* extract all subattrs from opt attr */ 1777 if (!opt) 1778 goto failure; 1779 1780 err = nla_parse_nested_deprecated(tb, TCA_HTB_MAX, opt, htb_policy, 1781 NULL); 1782 if (err < 0) 1783 goto failure; 1784 1785 err = -EINVAL; 1786 if (tb[TCA_HTB_PARMS] == NULL) 1787 goto failure; 1788 1789 parent = parentid == TC_H_ROOT ? NULL : htb_find(parentid, sch); 1790 1791 hopt = nla_data(tb[TCA_HTB_PARMS]); 1792 if (!hopt->rate.rate || !hopt->ceil.rate) 1793 goto failure; 1794 1795 if (q->offload) { 1796 /* Options not supported by the offload. */ 1797 if (hopt->rate.overhead || hopt->ceil.overhead) { 1798 NL_SET_ERR_MSG(extack, "HTB offload doesn't support the overhead parameter"); 1799 goto failure; 1800 } 1801 if (hopt->rate.mpu || hopt->ceil.mpu) { 1802 NL_SET_ERR_MSG(extack, "HTB offload doesn't support the mpu parameter"); 1803 goto failure; 1804 } 1805 if (hopt->quantum) { 1806 NL_SET_ERR_MSG(extack, "HTB offload doesn't support the quantum parameter"); 1807 goto failure; 1808 } 1809 if (hopt->prio) { 1810 NL_SET_ERR_MSG(extack, "HTB offload doesn't support the prio parameter"); 1811 goto failure; 1812 } 1813 } 1814 1815 /* Keeping backward compatible with rate_table based iproute2 tc */ 1816 if (hopt->rate.linklayer == TC_LINKLAYER_UNAWARE) 1817 qdisc_put_rtab(qdisc_get_rtab(&hopt->rate, tb[TCA_HTB_RTAB], 1818 NULL)); 1819 1820 if (hopt->ceil.linklayer == TC_LINKLAYER_UNAWARE) 1821 qdisc_put_rtab(qdisc_get_rtab(&hopt->ceil, tb[TCA_HTB_CTAB], 1822 NULL)); 1823 1824 rate64 = tb[TCA_HTB_RATE64] ? nla_get_u64(tb[TCA_HTB_RATE64]) : 0; 1825 ceil64 = tb[TCA_HTB_CEIL64] ? nla_get_u64(tb[TCA_HTB_CEIL64]) : 0; 1826 1827 if (!cl) { /* new class */ 1828 struct net_device *dev = qdisc_dev(sch); 1829 struct Qdisc *new_q, *old_q; 1830 int prio; 1831 struct { 1832 struct nlattr nla; 1833 struct gnet_estimator opt; 1834 } est = { 1835 .nla = { 1836 .nla_len = nla_attr_size(sizeof(est.opt)), 1837 .nla_type = TCA_RATE, 1838 }, 1839 .opt = { 1840 /* 4s interval, 16s averaging constant */ 1841 .interval = 2, 1842 .ewma_log = 2, 1843 }, 1844 }; 1845 1846 /* check for valid classid */ 1847 if (!classid || TC_H_MAJ(classid ^ sch->handle) || 1848 htb_find(classid, sch)) 1849 goto failure; 1850 1851 /* check maximal depth */ 1852 if (parent && parent->parent && parent->parent->level < 2) { 1853 pr_err("htb: tree is too deep\n"); 1854 goto failure; 1855 } 1856 err = -ENOBUFS; 1857 cl = kzalloc(sizeof(*cl), GFP_KERNEL); 1858 if (!cl) 1859 goto failure; 1860 1861 gnet_stats_basic_sync_init(&cl->bstats); 1862 gnet_stats_basic_sync_init(&cl->bstats_bias); 1863 1864 err = tcf_block_get(&cl->block, &cl->filter_list, sch, extack); 1865 if (err) { 1866 kfree(cl); 1867 goto failure; 1868 } 1869 if (htb_rate_est || tca[TCA_RATE]) { 1870 err = gen_new_estimator(&cl->bstats, NULL, 1871 &cl->rate_est, 1872 NULL, 1873 true, 1874 tca[TCA_RATE] ? : &est.nla); 1875 if (err) 1876 goto err_block_put; 1877 } 1878 1879 cl->children = 0; 1880 RB_CLEAR_NODE(&cl->pq_node); 1881 1882 for (prio = 0; prio < TC_HTB_NUMPRIO; prio++) 1883 RB_CLEAR_NODE(&cl->node[prio]); 1884 1885 cl->common.classid = classid; 1886 1887 /* Make sure nothing interrupts us in between of two 1888 * ndo_setup_tc calls. 1889 */ 1890 ASSERT_RTNL(); 1891 1892 /* create leaf qdisc early because it uses kmalloc(GFP_KERNEL) 1893 * so that can't be used inside of sch_tree_lock 1894 * -- thanks to Karlis Peisenieks 1895 */ 1896 if (!q->offload) { 1897 dev_queue = sch->dev_queue; 1898 } else if (!(parent && !parent->level)) { 1899 /* Assign a dev_queue to this classid. */ 1900 offload_opt = (struct tc_htb_qopt_offload) { 1901 .command = TC_HTB_LEAF_ALLOC_QUEUE, 1902 .classid = cl->common.classid, 1903 .parent_classid = parent ? 1904 TC_H_MIN(parent->common.classid) : 1905 TC_HTB_CLASSID_ROOT, 1906 .rate = max_t(u64, hopt->rate.rate, rate64), 1907 .ceil = max_t(u64, hopt->ceil.rate, ceil64), 1908 .extack = extack, 1909 }; 1910 err = htb_offload(dev, &offload_opt); 1911 if (err) { 1912 pr_err("htb: TC_HTB_LEAF_ALLOC_QUEUE failed with err = %d\n", 1913 err); 1914 goto err_kill_estimator; 1915 } 1916 dev_queue = netdev_get_tx_queue(dev, offload_opt.qid); 1917 } else { /* First child. */ 1918 dev_queue = htb_offload_get_queue(parent); 1919 old_q = htb_graft_helper(dev_queue, NULL); 1920 WARN_ON(old_q != parent->leaf.q); 1921 offload_opt = (struct tc_htb_qopt_offload) { 1922 .command = TC_HTB_LEAF_TO_INNER, 1923 .classid = cl->common.classid, 1924 .parent_classid = 1925 TC_H_MIN(parent->common.classid), 1926 .rate = max_t(u64, hopt->rate.rate, rate64), 1927 .ceil = max_t(u64, hopt->ceil.rate, ceil64), 1928 .extack = extack, 1929 }; 1930 err = htb_offload(dev, &offload_opt); 1931 if (err) { 1932 pr_err("htb: TC_HTB_LEAF_TO_INNER failed with err = %d\n", 1933 err); 1934 htb_graft_helper(dev_queue, old_q); 1935 goto err_kill_estimator; 1936 } 1937 _bstats_update(&parent->bstats_bias, 1938 u64_stats_read(&old_q->bstats.bytes), 1939 u64_stats_read(&old_q->bstats.packets)); 1940 qdisc_put(old_q); 1941 } 1942 new_q = qdisc_create_dflt(dev_queue, &pfifo_qdisc_ops, 1943 classid, NULL); 1944 if (q->offload) { 1945 if (new_q) { 1946 htb_set_lockdep_class_child(new_q); 1947 /* One ref for cl->leaf.q, the other for 1948 * dev_queue->qdisc. 1949 */ 1950 qdisc_refcount_inc(new_q); 1951 } 1952 old_q = htb_graft_helper(dev_queue, new_q); 1953 /* No qdisc_put needed. */ 1954 WARN_ON(!(old_q->flags & TCQ_F_BUILTIN)); 1955 } 1956 sch_tree_lock(sch); 1957 if (parent && !parent->level) { 1958 /* turn parent into inner node */ 1959 qdisc_purge_queue(parent->leaf.q); 1960 parent_qdisc = parent->leaf.q; 1961 if (parent->prio_activity) 1962 htb_deactivate(q, parent); 1963 1964 /* remove from evt list because of level change */ 1965 if (parent->cmode != HTB_CAN_SEND) { 1966 htb_safe_rb_erase(&parent->pq_node, &q->hlevel[0].wait_pq); 1967 parent->cmode = HTB_CAN_SEND; 1968 } 1969 parent->level = (parent->parent ? parent->parent->level 1970 : TC_HTB_MAXDEPTH) - 1; 1971 memset(&parent->inner, 0, sizeof(parent->inner)); 1972 } 1973 1974 /* leaf (we) needs elementary qdisc */ 1975 cl->leaf.q = new_q ? new_q : &noop_qdisc; 1976 if (q->offload) 1977 cl->leaf.offload_queue = dev_queue; 1978 1979 cl->parent = parent; 1980 1981 /* set class to be in HTB_CAN_SEND state */ 1982 cl->tokens = PSCHED_TICKS2NS(hopt->buffer); 1983 cl->ctokens = PSCHED_TICKS2NS(hopt->cbuffer); 1984 cl->mbuffer = 60ULL * NSEC_PER_SEC; /* 1min */ 1985 cl->t_c = ktime_get_ns(); 1986 cl->cmode = HTB_CAN_SEND; 1987 1988 /* attach to the hash list and parent's family */ 1989 qdisc_class_hash_insert(&q->clhash, &cl->common); 1990 if (parent) 1991 parent->children++; 1992 if (cl->leaf.q != &noop_qdisc) 1993 qdisc_hash_add(cl->leaf.q, true); 1994 } else { 1995 if (tca[TCA_RATE]) { 1996 err = gen_replace_estimator(&cl->bstats, NULL, 1997 &cl->rate_est, 1998 NULL, 1999 true, 2000 tca[TCA_RATE]); 2001 if (err) 2002 return err; 2003 } 2004 2005 if (q->offload) { 2006 struct net_device *dev = qdisc_dev(sch); 2007 2008 offload_opt = (struct tc_htb_qopt_offload) { 2009 .command = TC_HTB_NODE_MODIFY, 2010 .classid = cl->common.classid, 2011 .rate = max_t(u64, hopt->rate.rate, rate64), 2012 .ceil = max_t(u64, hopt->ceil.rate, ceil64), 2013 .extack = extack, 2014 }; 2015 err = htb_offload(dev, &offload_opt); 2016 if (err) 2017 /* Estimator was replaced, and rollback may fail 2018 * as well, so we don't try to recover it, and 2019 * the estimator won't work property with the 2020 * offload anyway, because bstats are updated 2021 * only when the stats are queried. 2022 */ 2023 return err; 2024 } 2025 2026 sch_tree_lock(sch); 2027 } 2028 2029 psched_ratecfg_precompute(&cl->rate, &hopt->rate, rate64); 2030 psched_ratecfg_precompute(&cl->ceil, &hopt->ceil, ceil64); 2031 2032 /* it used to be a nasty bug here, we have to check that node 2033 * is really leaf before changing cl->leaf ! 2034 */ 2035 if (!cl->level) { 2036 u64 quantum = cl->rate.rate_bytes_ps; 2037 2038 do_div(quantum, q->rate2quantum); 2039 cl->quantum = min_t(u64, quantum, INT_MAX); 2040 2041 if (!hopt->quantum && cl->quantum < 1000) { 2042 warn = -1; 2043 cl->quantum = 1000; 2044 } 2045 if (!hopt->quantum && cl->quantum > 200000) { 2046 warn = 1; 2047 cl->quantum = 200000; 2048 } 2049 if (hopt->quantum) 2050 cl->quantum = hopt->quantum; 2051 if ((cl->prio = hopt->prio) >= TC_HTB_NUMPRIO) 2052 cl->prio = TC_HTB_NUMPRIO - 1; 2053 } 2054 2055 cl->buffer = PSCHED_TICKS2NS(hopt->buffer); 2056 cl->cbuffer = PSCHED_TICKS2NS(hopt->cbuffer); 2057 2058 sch_tree_unlock(sch); 2059 qdisc_put(parent_qdisc); 2060 2061 if (warn) 2062 pr_warn("HTB: quantum of class %X is %s. Consider r2q change.\n", 2063 cl->common.classid, (warn == -1 ? "small" : "big")); 2064 2065 qdisc_class_hash_grow(sch, &q->clhash); 2066 2067 *arg = (unsigned long)cl; 2068 return 0; 2069 2070 err_kill_estimator: 2071 gen_kill_estimator(&cl->rate_est); 2072 err_block_put: 2073 tcf_block_put(cl->block); 2074 kfree(cl); 2075 failure: 2076 return err; 2077 } 2078 2079 static struct tcf_block *htb_tcf_block(struct Qdisc *sch, unsigned long arg, 2080 struct netlink_ext_ack *extack) 2081 { 2082 struct htb_sched *q = qdisc_priv(sch); 2083 struct htb_class *cl = (struct htb_class *)arg; 2084 2085 return cl ? cl->block : q->block; 2086 } 2087 2088 static unsigned long htb_bind_filter(struct Qdisc *sch, unsigned long parent, 2089 u32 classid) 2090 { 2091 struct htb_class *cl = htb_find(classid, sch); 2092 2093 /*if (cl && !cl->level) return 0; 2094 * The line above used to be there to prevent attaching filters to 2095 * leaves. But at least tc_index filter uses this just to get class 2096 * for other reasons so that we have to allow for it. 2097 * ---- 2098 * 19.6.2002 As Werner explained it is ok - bind filter is just 2099 * another way to "lock" the class - unlike "get" this lock can 2100 * be broken by class during destroy IIUC. 2101 */ 2102 if (cl) 2103 cl->filter_cnt++; 2104 return (unsigned long)cl; 2105 } 2106 2107 static void htb_unbind_filter(struct Qdisc *sch, unsigned long arg) 2108 { 2109 struct htb_class *cl = (struct htb_class *)arg; 2110 2111 if (cl) 2112 cl->filter_cnt--; 2113 } 2114 2115 static void htb_walk(struct Qdisc *sch, struct qdisc_walker *arg) 2116 { 2117 struct htb_sched *q = qdisc_priv(sch); 2118 struct htb_class *cl; 2119 unsigned int i; 2120 2121 if (arg->stop) 2122 return; 2123 2124 for (i = 0; i < q->clhash.hashsize; i++) { 2125 hlist_for_each_entry(cl, &q->clhash.hash[i], common.hnode) { 2126 if (!tc_qdisc_stats_dump(sch, (unsigned long)cl, arg)) 2127 return; 2128 } 2129 } 2130 } 2131 2132 static const struct Qdisc_class_ops htb_class_ops = { 2133 .select_queue = htb_select_queue, 2134 .graft = htb_graft, 2135 .leaf = htb_leaf, 2136 .qlen_notify = htb_qlen_notify, 2137 .find = htb_search, 2138 .change = htb_change_class, 2139 .delete = htb_delete, 2140 .walk = htb_walk, 2141 .tcf_block = htb_tcf_block, 2142 .bind_tcf = htb_bind_filter, 2143 .unbind_tcf = htb_unbind_filter, 2144 .dump = htb_dump_class, 2145 .dump_stats = htb_dump_class_stats, 2146 }; 2147 2148 static struct Qdisc_ops htb_qdisc_ops __read_mostly = { 2149 .cl_ops = &htb_class_ops, 2150 .id = "htb", 2151 .priv_size = sizeof(struct htb_sched), 2152 .enqueue = htb_enqueue, 2153 .dequeue = htb_dequeue, 2154 .peek = qdisc_peek_dequeued, 2155 .init = htb_init, 2156 .attach = htb_attach, 2157 .reset = htb_reset, 2158 .destroy = htb_destroy, 2159 .dump = htb_dump, 2160 .owner = THIS_MODULE, 2161 }; 2162 2163 static int __init htb_module_init(void) 2164 { 2165 return register_qdisc(&htb_qdisc_ops); 2166 } 2167 static void __exit htb_module_exit(void) 2168 { 2169 unregister_qdisc(&htb_qdisc_ops); 2170 } 2171 2172 module_init(htb_module_init) 2173 module_exit(htb_module_exit) 2174 MODULE_LICENSE("GPL"); 2175