1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * net/sched/sch_htb.c Hierarchical token bucket, feed tree version 4 * 5 * Authors: Martin Devera, <devik@cdi.cz> 6 * 7 * Credits (in time order) for older HTB versions: 8 * Stef Coene <stef.coene@docum.org> 9 * HTB support at LARTC mailing list 10 * Ondrej Kraus, <krauso@barr.cz> 11 * found missing INIT_QDISC(htb) 12 * Vladimir Smelhaus, Aamer Akhter, Bert Hubert 13 * helped a lot to locate nasty class stall bug 14 * Andi Kleen, Jamal Hadi, Bert Hubert 15 * code review and helpful comments on shaping 16 * Tomasz Wrona, <tw@eter.tym.pl> 17 * created test case so that I was able to fix nasty bug 18 * Wilfried Weissmann 19 * spotted bug in dequeue code and helped with fix 20 * Jiri Fojtasek 21 * fixed requeue routine 22 * and many others. thanks. 23 */ 24 #include <linux/module.h> 25 #include <linux/moduleparam.h> 26 #include <linux/types.h> 27 #include <linux/kernel.h> 28 #include <linux/string.h> 29 #include <linux/errno.h> 30 #include <linux/skbuff.h> 31 #include <linux/list.h> 32 #include <linux/compiler.h> 33 #include <linux/rbtree.h> 34 #include <linux/workqueue.h> 35 #include <linux/slab.h> 36 #include <net/netlink.h> 37 #include <net/sch_generic.h> 38 #include <net/pkt_sched.h> 39 #include <net/pkt_cls.h> 40 41 /* HTB algorithm. 42 Author: devik@cdi.cz 43 ======================================================================== 44 HTB is like TBF with multiple classes. It is also similar to CBQ because 45 it allows to assign priority to each class in hierarchy. 46 In fact it is another implementation of Floyd's formal sharing. 47 48 Levels: 49 Each class is assigned level. Leaf has ALWAYS level 0 and root 50 classes have level TC_HTB_MAXDEPTH-1. Interior nodes has level 51 one less than their parent. 52 */ 53 54 static int htb_hysteresis __read_mostly = 0; /* whether to use mode hysteresis for speedup */ 55 #define HTB_VER 0x30011 /* major must be matched with number supplied by TC as version */ 56 57 #if HTB_VER >> 16 != TC_HTB_PROTOVER 58 #error "Mismatched sch_htb.c and pkt_sch.h" 59 #endif 60 61 /* Module parameter and sysfs export */ 62 module_param (htb_hysteresis, int, 0640); 63 MODULE_PARM_DESC(htb_hysteresis, "Hysteresis mode, less CPU load, less accurate"); 64 65 static int htb_rate_est = 0; /* htb classes have a default rate estimator */ 66 module_param(htb_rate_est, int, 0640); 67 MODULE_PARM_DESC(htb_rate_est, "setup a default rate estimator (4sec 16sec) for htb classes"); 68 69 /* used internaly to keep status of single class */ 70 enum htb_cmode { 71 HTB_CANT_SEND, /* class can't send and can't borrow */ 72 HTB_MAY_BORROW, /* class can't send but may borrow */ 73 HTB_CAN_SEND /* class can send */ 74 }; 75 76 struct htb_prio { 77 union { 78 struct rb_root row; 79 struct rb_root feed; 80 }; 81 struct rb_node *ptr; 82 /* When class changes from state 1->2 and disconnects from 83 * parent's feed then we lost ptr value and start from the 84 * first child again. Here we store classid of the 85 * last valid ptr (used when ptr is NULL). 86 */ 87 u32 last_ptr_id; 88 }; 89 90 /* interior & leaf nodes; props specific to leaves are marked L: 91 * To reduce false sharing, place mostly read fields at beginning, 92 * and mostly written ones at the end. 93 */ 94 struct htb_class { 95 struct Qdisc_class_common common; 96 struct psched_ratecfg rate; 97 struct psched_ratecfg ceil; 98 s64 buffer, cbuffer;/* token bucket depth/rate */ 99 s64 mbuffer; /* max wait time */ 100 u32 prio; /* these two are used only by leaves... */ 101 int quantum; /* but stored for parent-to-leaf return */ 102 103 struct tcf_proto __rcu *filter_list; /* class attached filters */ 104 struct tcf_block *block; 105 int filter_cnt; 106 107 int level; /* our level (see above) */ 108 unsigned int children; 109 struct htb_class *parent; /* parent class */ 110 111 struct net_rate_estimator __rcu *rate_est; 112 113 /* 114 * Written often fields 115 */ 116 struct gnet_stats_basic_sync bstats; 117 struct gnet_stats_basic_sync bstats_bias; 118 struct tc_htb_xstats xstats; /* our special stats */ 119 120 /* token bucket parameters */ 121 s64 tokens, ctokens;/* current number of tokens */ 122 s64 t_c; /* checkpoint time */ 123 124 union { 125 struct htb_class_leaf { 126 int deficit[TC_HTB_MAXDEPTH]; 127 struct Qdisc *q; 128 struct netdev_queue *offload_queue; 129 } leaf; 130 struct htb_class_inner { 131 struct htb_prio clprio[TC_HTB_NUMPRIO]; 132 } inner; 133 }; 134 s64 pq_key; 135 136 int prio_activity; /* for which prios are we active */ 137 enum htb_cmode cmode; /* current mode of the class */ 138 struct rb_node pq_node; /* node for event queue */ 139 struct rb_node node[TC_HTB_NUMPRIO]; /* node for self or feed tree */ 140 141 unsigned int drops ____cacheline_aligned_in_smp; 142 unsigned int overlimits; 143 }; 144 145 struct htb_level { 146 struct rb_root wait_pq; 147 struct htb_prio hprio[TC_HTB_NUMPRIO]; 148 }; 149 150 struct htb_sched { 151 struct Qdisc_class_hash clhash; 152 int defcls; /* class where unclassified flows go to */ 153 int rate2quantum; /* quant = rate / rate2quantum */ 154 155 /* filters for qdisc itself */ 156 struct tcf_proto __rcu *filter_list; 157 struct tcf_block *block; 158 159 #define HTB_WARN_TOOMANYEVENTS 0x1 160 unsigned int warned; /* only one warning */ 161 int direct_qlen; 162 struct work_struct work; 163 164 /* non shaped skbs; let them go directly thru */ 165 struct qdisc_skb_head direct_queue; 166 u32 direct_pkts; 167 u32 overlimits; 168 169 struct qdisc_watchdog watchdog; 170 171 s64 now; /* cached dequeue time */ 172 173 /* time of nearest event per level (row) */ 174 s64 near_ev_cache[TC_HTB_MAXDEPTH]; 175 176 int row_mask[TC_HTB_MAXDEPTH]; 177 178 struct htb_level hlevel[TC_HTB_MAXDEPTH]; 179 180 struct Qdisc **direct_qdiscs; 181 unsigned int num_direct_qdiscs; 182 183 bool offload; 184 }; 185 186 /* find class in global hash table using given handle */ 187 static inline struct htb_class *htb_find(u32 handle, struct Qdisc *sch) 188 { 189 struct htb_sched *q = qdisc_priv(sch); 190 struct Qdisc_class_common *clc; 191 192 clc = qdisc_class_find(&q->clhash, handle); 193 if (clc == NULL) 194 return NULL; 195 return container_of(clc, struct htb_class, common); 196 } 197 198 static unsigned long htb_search(struct Qdisc *sch, u32 handle) 199 { 200 return (unsigned long)htb_find(handle, sch); 201 } 202 203 #define HTB_DIRECT ((struct htb_class *)-1L) 204 205 /** 206 * htb_classify - classify a packet into class 207 * @skb: the socket buffer 208 * @sch: the active queue discipline 209 * @qerr: pointer for returned status code 210 * 211 * It returns NULL if the packet should be dropped or -1 if the packet 212 * should be passed directly thru. In all other cases leaf class is returned. 213 * We allow direct class selection by classid in priority. The we examine 214 * filters in qdisc and in inner nodes (if higher filter points to the inner 215 * node). If we end up with classid MAJOR:0 we enqueue the skb into special 216 * internal fifo (direct). These packets then go directly thru. If we still 217 * have no valid leaf we try to use MAJOR:default leaf. It still unsuccessful 218 * then finish and return direct queue. 219 */ 220 static struct htb_class *htb_classify(struct sk_buff *skb, struct Qdisc *sch, 221 int *qerr) 222 { 223 struct htb_sched *q = qdisc_priv(sch); 224 struct htb_class *cl; 225 struct tcf_result res; 226 struct tcf_proto *tcf; 227 int result; 228 229 /* allow to select class by setting skb->priority to valid classid; 230 * note that nfmark can be used too by attaching filter fw with no 231 * rules in it 232 */ 233 if (skb->priority == sch->handle) 234 return HTB_DIRECT; /* X:0 (direct flow) selected */ 235 cl = htb_find(skb->priority, sch); 236 if (cl) { 237 if (cl->level == 0) 238 return cl; 239 /* Start with inner filter chain if a non-leaf class is selected */ 240 tcf = rcu_dereference_bh(cl->filter_list); 241 } else { 242 tcf = rcu_dereference_bh(q->filter_list); 243 } 244 245 *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS; 246 while (tcf && (result = tcf_classify(skb, NULL, tcf, &res, false)) >= 0) { 247 #ifdef CONFIG_NET_CLS_ACT 248 switch (result) { 249 case TC_ACT_QUEUED: 250 case TC_ACT_STOLEN: 251 case TC_ACT_TRAP: 252 *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN; 253 fallthrough; 254 case TC_ACT_SHOT: 255 return NULL; 256 } 257 #endif 258 cl = (void *)res.class; 259 if (!cl) { 260 if (res.classid == sch->handle) 261 return HTB_DIRECT; /* X:0 (direct flow) */ 262 cl = htb_find(res.classid, sch); 263 if (!cl) 264 break; /* filter selected invalid classid */ 265 } 266 if (!cl->level) 267 return cl; /* we hit leaf; return it */ 268 269 /* we have got inner class; apply inner filter chain */ 270 tcf = rcu_dereference_bh(cl->filter_list); 271 } 272 /* classification failed; try to use default class */ 273 cl = htb_find(TC_H_MAKE(TC_H_MAJ(sch->handle), q->defcls), sch); 274 if (!cl || cl->level) 275 return HTB_DIRECT; /* bad default .. this is safe bet */ 276 return cl; 277 } 278 279 /** 280 * htb_add_to_id_tree - adds class to the round robin list 281 * @root: the root of the tree 282 * @cl: the class to add 283 * @prio: the give prio in class 284 * 285 * Routine adds class to the list (actually tree) sorted by classid. 286 * Make sure that class is not already on such list for given prio. 287 */ 288 static void htb_add_to_id_tree(struct rb_root *root, 289 struct htb_class *cl, int prio) 290 { 291 struct rb_node **p = &root->rb_node, *parent = NULL; 292 293 while (*p) { 294 struct htb_class *c; 295 parent = *p; 296 c = rb_entry(parent, struct htb_class, node[prio]); 297 298 if (cl->common.classid > c->common.classid) 299 p = &parent->rb_right; 300 else 301 p = &parent->rb_left; 302 } 303 rb_link_node(&cl->node[prio], parent, p); 304 rb_insert_color(&cl->node[prio], root); 305 } 306 307 /** 308 * htb_add_to_wait_tree - adds class to the event queue with delay 309 * @q: the priority event queue 310 * @cl: the class to add 311 * @delay: delay in microseconds 312 * 313 * The class is added to priority event queue to indicate that class will 314 * change its mode in cl->pq_key microseconds. Make sure that class is not 315 * already in the queue. 316 */ 317 static void htb_add_to_wait_tree(struct htb_sched *q, 318 struct htb_class *cl, s64 delay) 319 { 320 struct rb_node **p = &q->hlevel[cl->level].wait_pq.rb_node, *parent = NULL; 321 322 cl->pq_key = q->now + delay; 323 if (cl->pq_key == q->now) 324 cl->pq_key++; 325 326 /* update the nearest event cache */ 327 if (q->near_ev_cache[cl->level] > cl->pq_key) 328 q->near_ev_cache[cl->level] = cl->pq_key; 329 330 while (*p) { 331 struct htb_class *c; 332 parent = *p; 333 c = rb_entry(parent, struct htb_class, pq_node); 334 if (cl->pq_key >= c->pq_key) 335 p = &parent->rb_right; 336 else 337 p = &parent->rb_left; 338 } 339 rb_link_node(&cl->pq_node, parent, p); 340 rb_insert_color(&cl->pq_node, &q->hlevel[cl->level].wait_pq); 341 } 342 343 /** 344 * htb_next_rb_node - finds next node in binary tree 345 * @n: the current node in binary tree 346 * 347 * When we are past last key we return NULL. 348 * Average complexity is 2 steps per call. 349 */ 350 static inline void htb_next_rb_node(struct rb_node **n) 351 { 352 *n = rb_next(*n); 353 } 354 355 /** 356 * htb_add_class_to_row - add class to its row 357 * @q: the priority event queue 358 * @cl: the class to add 359 * @mask: the given priorities in class in bitmap 360 * 361 * The class is added to row at priorities marked in mask. 362 * It does nothing if mask == 0. 363 */ 364 static inline void htb_add_class_to_row(struct htb_sched *q, 365 struct htb_class *cl, int mask) 366 { 367 q->row_mask[cl->level] |= mask; 368 while (mask) { 369 int prio = ffz(~mask); 370 mask &= ~(1 << prio); 371 htb_add_to_id_tree(&q->hlevel[cl->level].hprio[prio].row, cl, prio); 372 } 373 } 374 375 /* If this triggers, it is a bug in this code, but it need not be fatal */ 376 static void htb_safe_rb_erase(struct rb_node *rb, struct rb_root *root) 377 { 378 if (RB_EMPTY_NODE(rb)) { 379 WARN_ON(1); 380 } else { 381 rb_erase(rb, root); 382 RB_CLEAR_NODE(rb); 383 } 384 } 385 386 387 /** 388 * htb_remove_class_from_row - removes class from its row 389 * @q: the priority event queue 390 * @cl: the class to add 391 * @mask: the given priorities in class in bitmap 392 * 393 * The class is removed from row at priorities marked in mask. 394 * It does nothing if mask == 0. 395 */ 396 static inline void htb_remove_class_from_row(struct htb_sched *q, 397 struct htb_class *cl, int mask) 398 { 399 int m = 0; 400 struct htb_level *hlevel = &q->hlevel[cl->level]; 401 402 while (mask) { 403 int prio = ffz(~mask); 404 struct htb_prio *hprio = &hlevel->hprio[prio]; 405 406 mask &= ~(1 << prio); 407 if (hprio->ptr == cl->node + prio) 408 htb_next_rb_node(&hprio->ptr); 409 410 htb_safe_rb_erase(cl->node + prio, &hprio->row); 411 if (!hprio->row.rb_node) 412 m |= 1 << prio; 413 } 414 q->row_mask[cl->level] &= ~m; 415 } 416 417 /** 418 * htb_activate_prios - creates active classe's feed chain 419 * @q: the priority event queue 420 * @cl: the class to activate 421 * 422 * The class is connected to ancestors and/or appropriate rows 423 * for priorities it is participating on. cl->cmode must be new 424 * (activated) mode. It does nothing if cl->prio_activity == 0. 425 */ 426 static void htb_activate_prios(struct htb_sched *q, struct htb_class *cl) 427 { 428 struct htb_class *p = cl->parent; 429 long m, mask = cl->prio_activity; 430 431 while (cl->cmode == HTB_MAY_BORROW && p && mask) { 432 m = mask; 433 while (m) { 434 int prio = ffz(~m); 435 m &= ~(1 << prio); 436 437 if (p->inner.clprio[prio].feed.rb_node) 438 /* parent already has its feed in use so that 439 * reset bit in mask as parent is already ok 440 */ 441 mask &= ~(1 << prio); 442 443 htb_add_to_id_tree(&p->inner.clprio[prio].feed, cl, prio); 444 } 445 p->prio_activity |= mask; 446 cl = p; 447 p = cl->parent; 448 449 } 450 if (cl->cmode == HTB_CAN_SEND && mask) 451 htb_add_class_to_row(q, cl, mask); 452 } 453 454 /** 455 * htb_deactivate_prios - remove class from feed chain 456 * @q: the priority event queue 457 * @cl: the class to deactivate 458 * 459 * cl->cmode must represent old mode (before deactivation). It does 460 * nothing if cl->prio_activity == 0. Class is removed from all feed 461 * chains and rows. 462 */ 463 static void htb_deactivate_prios(struct htb_sched *q, struct htb_class *cl) 464 { 465 struct htb_class *p = cl->parent; 466 long m, mask = cl->prio_activity; 467 468 while (cl->cmode == HTB_MAY_BORROW && p && mask) { 469 m = mask; 470 mask = 0; 471 while (m) { 472 int prio = ffz(~m); 473 m &= ~(1 << prio); 474 475 if (p->inner.clprio[prio].ptr == cl->node + prio) { 476 /* we are removing child which is pointed to from 477 * parent feed - forget the pointer but remember 478 * classid 479 */ 480 p->inner.clprio[prio].last_ptr_id = cl->common.classid; 481 p->inner.clprio[prio].ptr = NULL; 482 } 483 484 htb_safe_rb_erase(cl->node + prio, 485 &p->inner.clprio[prio].feed); 486 487 if (!p->inner.clprio[prio].feed.rb_node) 488 mask |= 1 << prio; 489 } 490 491 p->prio_activity &= ~mask; 492 cl = p; 493 p = cl->parent; 494 495 } 496 if (cl->cmode == HTB_CAN_SEND && mask) 497 htb_remove_class_from_row(q, cl, mask); 498 } 499 500 static inline s64 htb_lowater(const struct htb_class *cl) 501 { 502 if (htb_hysteresis) 503 return cl->cmode != HTB_CANT_SEND ? -cl->cbuffer : 0; 504 else 505 return 0; 506 } 507 static inline s64 htb_hiwater(const struct htb_class *cl) 508 { 509 if (htb_hysteresis) 510 return cl->cmode == HTB_CAN_SEND ? -cl->buffer : 0; 511 else 512 return 0; 513 } 514 515 516 /** 517 * htb_class_mode - computes and returns current class mode 518 * @cl: the target class 519 * @diff: diff time in microseconds 520 * 521 * It computes cl's mode at time cl->t_c+diff and returns it. If mode 522 * is not HTB_CAN_SEND then cl->pq_key is updated to time difference 523 * from now to time when cl will change its state. 524 * Also it is worth to note that class mode doesn't change simply 525 * at cl->{c,}tokens == 0 but there can rather be hysteresis of 526 * 0 .. -cl->{c,}buffer range. It is meant to limit number of 527 * mode transitions per time unit. The speed gain is about 1/6. 528 */ 529 static inline enum htb_cmode 530 htb_class_mode(struct htb_class *cl, s64 *diff) 531 { 532 s64 toks; 533 534 if ((toks = (cl->ctokens + *diff)) < htb_lowater(cl)) { 535 *diff = -toks; 536 return HTB_CANT_SEND; 537 } 538 539 if ((toks = (cl->tokens + *diff)) >= htb_hiwater(cl)) 540 return HTB_CAN_SEND; 541 542 *diff = -toks; 543 return HTB_MAY_BORROW; 544 } 545 546 /** 547 * htb_change_class_mode - changes classe's mode 548 * @q: the priority event queue 549 * @cl: the target class 550 * @diff: diff time in microseconds 551 * 552 * This should be the only way how to change classe's mode under normal 553 * circumstances. Routine will update feed lists linkage, change mode 554 * and add class to the wait event queue if appropriate. New mode should 555 * be different from old one and cl->pq_key has to be valid if changing 556 * to mode other than HTB_CAN_SEND (see htb_add_to_wait_tree). 557 */ 558 static void 559 htb_change_class_mode(struct htb_sched *q, struct htb_class *cl, s64 *diff) 560 { 561 enum htb_cmode new_mode = htb_class_mode(cl, diff); 562 563 if (new_mode == cl->cmode) 564 return; 565 566 if (new_mode == HTB_CANT_SEND) { 567 cl->overlimits++; 568 q->overlimits++; 569 } 570 571 if (cl->prio_activity) { /* not necessary: speed optimization */ 572 if (cl->cmode != HTB_CANT_SEND) 573 htb_deactivate_prios(q, cl); 574 cl->cmode = new_mode; 575 if (new_mode != HTB_CANT_SEND) 576 htb_activate_prios(q, cl); 577 } else 578 cl->cmode = new_mode; 579 } 580 581 /** 582 * htb_activate - inserts leaf cl into appropriate active feeds 583 * @q: the priority event queue 584 * @cl: the target class 585 * 586 * Routine learns (new) priority of leaf and activates feed chain 587 * for the prio. It can be called on already active leaf safely. 588 * It also adds leaf into droplist. 589 */ 590 static inline void htb_activate(struct htb_sched *q, struct htb_class *cl) 591 { 592 WARN_ON(cl->level || !cl->leaf.q || !cl->leaf.q->q.qlen); 593 594 if (!cl->prio_activity) { 595 cl->prio_activity = 1 << cl->prio; 596 htb_activate_prios(q, cl); 597 } 598 } 599 600 /** 601 * htb_deactivate - remove leaf cl from active feeds 602 * @q: the priority event queue 603 * @cl: the target class 604 * 605 * Make sure that leaf is active. In the other words it can't be called 606 * with non-active leaf. It also removes class from the drop list. 607 */ 608 static inline void htb_deactivate(struct htb_sched *q, struct htb_class *cl) 609 { 610 WARN_ON(!cl->prio_activity); 611 612 htb_deactivate_prios(q, cl); 613 cl->prio_activity = 0; 614 } 615 616 static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch, 617 struct sk_buff **to_free) 618 { 619 int ret; 620 unsigned int len = qdisc_pkt_len(skb); 621 struct htb_sched *q = qdisc_priv(sch); 622 struct htb_class *cl = htb_classify(skb, sch, &ret); 623 624 if (cl == HTB_DIRECT) { 625 /* enqueue to helper queue */ 626 if (q->direct_queue.qlen < q->direct_qlen) { 627 __qdisc_enqueue_tail(skb, &q->direct_queue); 628 q->direct_pkts++; 629 } else { 630 return qdisc_drop(skb, sch, to_free); 631 } 632 #ifdef CONFIG_NET_CLS_ACT 633 } else if (!cl) { 634 if (ret & __NET_XMIT_BYPASS) 635 qdisc_qstats_drop(sch); 636 __qdisc_drop(skb, to_free); 637 return ret; 638 #endif 639 } else if ((ret = qdisc_enqueue(skb, cl->leaf.q, 640 to_free)) != NET_XMIT_SUCCESS) { 641 if (net_xmit_drop_count(ret)) { 642 qdisc_qstats_drop(sch); 643 cl->drops++; 644 } 645 return ret; 646 } else { 647 htb_activate(q, cl); 648 } 649 650 sch->qstats.backlog += len; 651 sch->q.qlen++; 652 return NET_XMIT_SUCCESS; 653 } 654 655 static inline void htb_accnt_tokens(struct htb_class *cl, int bytes, s64 diff) 656 { 657 s64 toks = diff + cl->tokens; 658 659 if (toks > cl->buffer) 660 toks = cl->buffer; 661 toks -= (s64) psched_l2t_ns(&cl->rate, bytes); 662 if (toks <= -cl->mbuffer) 663 toks = 1 - cl->mbuffer; 664 665 cl->tokens = toks; 666 } 667 668 static inline void htb_accnt_ctokens(struct htb_class *cl, int bytes, s64 diff) 669 { 670 s64 toks = diff + cl->ctokens; 671 672 if (toks > cl->cbuffer) 673 toks = cl->cbuffer; 674 toks -= (s64) psched_l2t_ns(&cl->ceil, bytes); 675 if (toks <= -cl->mbuffer) 676 toks = 1 - cl->mbuffer; 677 678 cl->ctokens = toks; 679 } 680 681 /** 682 * htb_charge_class - charges amount "bytes" to leaf and ancestors 683 * @q: the priority event queue 684 * @cl: the class to start iterate 685 * @level: the minimum level to account 686 * @skb: the socket buffer 687 * 688 * Routine assumes that packet "bytes" long was dequeued from leaf cl 689 * borrowing from "level". It accounts bytes to ceil leaky bucket for 690 * leaf and all ancestors and to rate bucket for ancestors at levels 691 * "level" and higher. It also handles possible change of mode resulting 692 * from the update. Note that mode can also increase here (MAY_BORROW to 693 * CAN_SEND) because we can use more precise clock that event queue here. 694 * In such case we remove class from event queue first. 695 */ 696 static void htb_charge_class(struct htb_sched *q, struct htb_class *cl, 697 int level, struct sk_buff *skb) 698 { 699 int bytes = qdisc_pkt_len(skb); 700 enum htb_cmode old_mode; 701 s64 diff; 702 703 while (cl) { 704 diff = min_t(s64, q->now - cl->t_c, cl->mbuffer); 705 if (cl->level >= level) { 706 if (cl->level == level) 707 cl->xstats.lends++; 708 htb_accnt_tokens(cl, bytes, diff); 709 } else { 710 cl->xstats.borrows++; 711 cl->tokens += diff; /* we moved t_c; update tokens */ 712 } 713 htb_accnt_ctokens(cl, bytes, diff); 714 cl->t_c = q->now; 715 716 old_mode = cl->cmode; 717 diff = 0; 718 htb_change_class_mode(q, cl, &diff); 719 if (old_mode != cl->cmode) { 720 if (old_mode != HTB_CAN_SEND) 721 htb_safe_rb_erase(&cl->pq_node, &q->hlevel[cl->level].wait_pq); 722 if (cl->cmode != HTB_CAN_SEND) 723 htb_add_to_wait_tree(q, cl, diff); 724 } 725 726 /* update basic stats except for leaves which are already updated */ 727 if (cl->level) 728 bstats_update(&cl->bstats, skb); 729 730 cl = cl->parent; 731 } 732 } 733 734 /** 735 * htb_do_events - make mode changes to classes at the level 736 * @q: the priority event queue 737 * @level: which wait_pq in 'q->hlevel' 738 * @start: start jiffies 739 * 740 * Scans event queue for pending events and applies them. Returns time of 741 * next pending event (0 for no event in pq, q->now for too many events). 742 * Note: Applied are events whose have cl->pq_key <= q->now. 743 */ 744 static s64 htb_do_events(struct htb_sched *q, const int level, 745 unsigned long start) 746 { 747 /* don't run for longer than 2 jiffies; 2 is used instead of 748 * 1 to simplify things when jiffy is going to be incremented 749 * too soon 750 */ 751 unsigned long stop_at = start + 2; 752 struct rb_root *wait_pq = &q->hlevel[level].wait_pq; 753 754 while (time_before(jiffies, stop_at)) { 755 struct htb_class *cl; 756 s64 diff; 757 struct rb_node *p = rb_first(wait_pq); 758 759 if (!p) 760 return 0; 761 762 cl = rb_entry(p, struct htb_class, pq_node); 763 if (cl->pq_key > q->now) 764 return cl->pq_key; 765 766 htb_safe_rb_erase(p, wait_pq); 767 diff = min_t(s64, q->now - cl->t_c, cl->mbuffer); 768 htb_change_class_mode(q, cl, &diff); 769 if (cl->cmode != HTB_CAN_SEND) 770 htb_add_to_wait_tree(q, cl, diff); 771 } 772 773 /* too much load - let's continue after a break for scheduling */ 774 if (!(q->warned & HTB_WARN_TOOMANYEVENTS)) { 775 pr_warn("htb: too many events!\n"); 776 q->warned |= HTB_WARN_TOOMANYEVENTS; 777 } 778 779 return q->now; 780 } 781 782 /* Returns class->node+prio from id-tree where classe's id is >= id. NULL 783 * is no such one exists. 784 */ 785 static struct rb_node *htb_id_find_next_upper(int prio, struct rb_node *n, 786 u32 id) 787 { 788 struct rb_node *r = NULL; 789 while (n) { 790 struct htb_class *cl = 791 rb_entry(n, struct htb_class, node[prio]); 792 793 if (id > cl->common.classid) { 794 n = n->rb_right; 795 } else if (id < cl->common.classid) { 796 r = n; 797 n = n->rb_left; 798 } else { 799 return n; 800 } 801 } 802 return r; 803 } 804 805 /** 806 * htb_lookup_leaf - returns next leaf class in DRR order 807 * @hprio: the current one 808 * @prio: which prio in class 809 * 810 * Find leaf where current feed pointers points to. 811 */ 812 static struct htb_class *htb_lookup_leaf(struct htb_prio *hprio, const int prio) 813 { 814 int i; 815 struct { 816 struct rb_node *root; 817 struct rb_node **pptr; 818 u32 *pid; 819 } stk[TC_HTB_MAXDEPTH], *sp = stk; 820 821 BUG_ON(!hprio->row.rb_node); 822 sp->root = hprio->row.rb_node; 823 sp->pptr = &hprio->ptr; 824 sp->pid = &hprio->last_ptr_id; 825 826 for (i = 0; i < 65535; i++) { 827 if (!*sp->pptr && *sp->pid) { 828 /* ptr was invalidated but id is valid - try to recover 829 * the original or next ptr 830 */ 831 *sp->pptr = 832 htb_id_find_next_upper(prio, sp->root, *sp->pid); 833 } 834 *sp->pid = 0; /* ptr is valid now so that remove this hint as it 835 * can become out of date quickly 836 */ 837 if (!*sp->pptr) { /* we are at right end; rewind & go up */ 838 *sp->pptr = sp->root; 839 while ((*sp->pptr)->rb_left) 840 *sp->pptr = (*sp->pptr)->rb_left; 841 if (sp > stk) { 842 sp--; 843 if (!*sp->pptr) { 844 WARN_ON(1); 845 return NULL; 846 } 847 htb_next_rb_node(sp->pptr); 848 } 849 } else { 850 struct htb_class *cl; 851 struct htb_prio *clp; 852 853 cl = rb_entry(*sp->pptr, struct htb_class, node[prio]); 854 if (!cl->level) 855 return cl; 856 clp = &cl->inner.clprio[prio]; 857 (++sp)->root = clp->feed.rb_node; 858 sp->pptr = &clp->ptr; 859 sp->pid = &clp->last_ptr_id; 860 } 861 } 862 WARN_ON(1); 863 return NULL; 864 } 865 866 /* dequeues packet at given priority and level; call only if 867 * you are sure that there is active class at prio/level 868 */ 869 static struct sk_buff *htb_dequeue_tree(struct htb_sched *q, const int prio, 870 const int level) 871 { 872 struct sk_buff *skb = NULL; 873 struct htb_class *cl, *start; 874 struct htb_level *hlevel = &q->hlevel[level]; 875 struct htb_prio *hprio = &hlevel->hprio[prio]; 876 877 /* look initial class up in the row */ 878 start = cl = htb_lookup_leaf(hprio, prio); 879 880 do { 881 next: 882 if (unlikely(!cl)) 883 return NULL; 884 885 /* class can be empty - it is unlikely but can be true if leaf 886 * qdisc drops packets in enqueue routine or if someone used 887 * graft operation on the leaf since last dequeue; 888 * simply deactivate and skip such class 889 */ 890 if (unlikely(cl->leaf.q->q.qlen == 0)) { 891 struct htb_class *next; 892 htb_deactivate(q, cl); 893 894 /* row/level might become empty */ 895 if ((q->row_mask[level] & (1 << prio)) == 0) 896 return NULL; 897 898 next = htb_lookup_leaf(hprio, prio); 899 900 if (cl == start) /* fix start if we just deleted it */ 901 start = next; 902 cl = next; 903 goto next; 904 } 905 906 skb = cl->leaf.q->dequeue(cl->leaf.q); 907 if (likely(skb != NULL)) 908 break; 909 910 qdisc_warn_nonwc("htb", cl->leaf.q); 911 htb_next_rb_node(level ? &cl->parent->inner.clprio[prio].ptr: 912 &q->hlevel[0].hprio[prio].ptr); 913 cl = htb_lookup_leaf(hprio, prio); 914 915 } while (cl != start); 916 917 if (likely(skb != NULL)) { 918 bstats_update(&cl->bstats, skb); 919 cl->leaf.deficit[level] -= qdisc_pkt_len(skb); 920 if (cl->leaf.deficit[level] < 0) { 921 cl->leaf.deficit[level] += cl->quantum; 922 htb_next_rb_node(level ? &cl->parent->inner.clprio[prio].ptr : 923 &q->hlevel[0].hprio[prio].ptr); 924 } 925 /* this used to be after charge_class but this constelation 926 * gives us slightly better performance 927 */ 928 if (!cl->leaf.q->q.qlen) 929 htb_deactivate(q, cl); 930 htb_charge_class(q, cl, level, skb); 931 } 932 return skb; 933 } 934 935 static struct sk_buff *htb_dequeue(struct Qdisc *sch) 936 { 937 struct sk_buff *skb; 938 struct htb_sched *q = qdisc_priv(sch); 939 int level; 940 s64 next_event; 941 unsigned long start_at; 942 943 /* try to dequeue direct packets as high prio (!) to minimize cpu work */ 944 skb = __qdisc_dequeue_head(&q->direct_queue); 945 if (skb != NULL) { 946 ok: 947 qdisc_bstats_update(sch, skb); 948 qdisc_qstats_backlog_dec(sch, skb); 949 sch->q.qlen--; 950 return skb; 951 } 952 953 if (!sch->q.qlen) 954 goto fin; 955 q->now = ktime_get_ns(); 956 start_at = jiffies; 957 958 next_event = q->now + 5LLU * NSEC_PER_SEC; 959 960 for (level = 0; level < TC_HTB_MAXDEPTH; level++) { 961 /* common case optimization - skip event handler quickly */ 962 int m; 963 s64 event = q->near_ev_cache[level]; 964 965 if (q->now >= event) { 966 event = htb_do_events(q, level, start_at); 967 if (!event) 968 event = q->now + NSEC_PER_SEC; 969 q->near_ev_cache[level] = event; 970 } 971 972 if (next_event > event) 973 next_event = event; 974 975 m = ~q->row_mask[level]; 976 while (m != (int)(-1)) { 977 int prio = ffz(m); 978 979 m |= 1 << prio; 980 skb = htb_dequeue_tree(q, prio, level); 981 if (likely(skb != NULL)) 982 goto ok; 983 } 984 } 985 if (likely(next_event > q->now)) 986 qdisc_watchdog_schedule_ns(&q->watchdog, next_event); 987 else 988 schedule_work(&q->work); 989 fin: 990 return skb; 991 } 992 993 /* reset all classes */ 994 /* always caled under BH & queue lock */ 995 static void htb_reset(struct Qdisc *sch) 996 { 997 struct htb_sched *q = qdisc_priv(sch); 998 struct htb_class *cl; 999 unsigned int i; 1000 1001 for (i = 0; i < q->clhash.hashsize; i++) { 1002 hlist_for_each_entry(cl, &q->clhash.hash[i], common.hnode) { 1003 if (cl->level) 1004 memset(&cl->inner, 0, sizeof(cl->inner)); 1005 else { 1006 if (cl->leaf.q && !q->offload) 1007 qdisc_reset(cl->leaf.q); 1008 } 1009 cl->prio_activity = 0; 1010 cl->cmode = HTB_CAN_SEND; 1011 } 1012 } 1013 qdisc_watchdog_cancel(&q->watchdog); 1014 __qdisc_reset_queue(&q->direct_queue); 1015 memset(q->hlevel, 0, sizeof(q->hlevel)); 1016 memset(q->row_mask, 0, sizeof(q->row_mask)); 1017 } 1018 1019 static const struct nla_policy htb_policy[TCA_HTB_MAX + 1] = { 1020 [TCA_HTB_PARMS] = { .len = sizeof(struct tc_htb_opt) }, 1021 [TCA_HTB_INIT] = { .len = sizeof(struct tc_htb_glob) }, 1022 [TCA_HTB_CTAB] = { .type = NLA_BINARY, .len = TC_RTAB_SIZE }, 1023 [TCA_HTB_RTAB] = { .type = NLA_BINARY, .len = TC_RTAB_SIZE }, 1024 [TCA_HTB_DIRECT_QLEN] = { .type = NLA_U32 }, 1025 [TCA_HTB_RATE64] = { .type = NLA_U64 }, 1026 [TCA_HTB_CEIL64] = { .type = NLA_U64 }, 1027 [TCA_HTB_OFFLOAD] = { .type = NLA_FLAG }, 1028 }; 1029 1030 static void htb_work_func(struct work_struct *work) 1031 { 1032 struct htb_sched *q = container_of(work, struct htb_sched, work); 1033 struct Qdisc *sch = q->watchdog.qdisc; 1034 1035 rcu_read_lock(); 1036 __netif_schedule(qdisc_root(sch)); 1037 rcu_read_unlock(); 1038 } 1039 1040 static void htb_set_lockdep_class_child(struct Qdisc *q) 1041 { 1042 static struct lock_class_key child_key; 1043 1044 lockdep_set_class(qdisc_lock(q), &child_key); 1045 } 1046 1047 static int htb_offload(struct net_device *dev, struct tc_htb_qopt_offload *opt) 1048 { 1049 return dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_HTB, opt); 1050 } 1051 1052 static int htb_init(struct Qdisc *sch, struct nlattr *opt, 1053 struct netlink_ext_ack *extack) 1054 { 1055 struct net_device *dev = qdisc_dev(sch); 1056 struct tc_htb_qopt_offload offload_opt; 1057 struct htb_sched *q = qdisc_priv(sch); 1058 struct nlattr *tb[TCA_HTB_MAX + 1]; 1059 struct tc_htb_glob *gopt; 1060 unsigned int ntx; 1061 bool offload; 1062 int err; 1063 1064 qdisc_watchdog_init(&q->watchdog, sch); 1065 INIT_WORK(&q->work, htb_work_func); 1066 1067 if (!opt) 1068 return -EINVAL; 1069 1070 err = tcf_block_get(&q->block, &q->filter_list, sch, extack); 1071 if (err) 1072 return err; 1073 1074 err = nla_parse_nested_deprecated(tb, TCA_HTB_MAX, opt, htb_policy, 1075 NULL); 1076 if (err < 0) 1077 return err; 1078 1079 if (!tb[TCA_HTB_INIT]) 1080 return -EINVAL; 1081 1082 gopt = nla_data(tb[TCA_HTB_INIT]); 1083 if (gopt->version != HTB_VER >> 16) 1084 return -EINVAL; 1085 1086 offload = nla_get_flag(tb[TCA_HTB_OFFLOAD]); 1087 1088 if (offload) { 1089 if (sch->parent != TC_H_ROOT) { 1090 NL_SET_ERR_MSG(extack, "HTB must be the root qdisc to use offload"); 1091 return -EOPNOTSUPP; 1092 } 1093 1094 if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc) { 1095 NL_SET_ERR_MSG(extack, "hw-tc-offload ethtool feature flag must be on"); 1096 return -EOPNOTSUPP; 1097 } 1098 1099 q->num_direct_qdiscs = dev->real_num_tx_queues; 1100 q->direct_qdiscs = kcalloc(q->num_direct_qdiscs, 1101 sizeof(*q->direct_qdiscs), 1102 GFP_KERNEL); 1103 if (!q->direct_qdiscs) 1104 return -ENOMEM; 1105 } 1106 1107 err = qdisc_class_hash_init(&q->clhash); 1108 if (err < 0) 1109 return err; 1110 1111 if (tb[TCA_HTB_DIRECT_QLEN]) 1112 q->direct_qlen = nla_get_u32(tb[TCA_HTB_DIRECT_QLEN]); 1113 else 1114 q->direct_qlen = qdisc_dev(sch)->tx_queue_len; 1115 1116 if ((q->rate2quantum = gopt->rate2quantum) < 1) 1117 q->rate2quantum = 1; 1118 q->defcls = gopt->defcls; 1119 1120 if (!offload) 1121 return 0; 1122 1123 for (ntx = 0; ntx < q->num_direct_qdiscs; ntx++) { 1124 struct netdev_queue *dev_queue = netdev_get_tx_queue(dev, ntx); 1125 struct Qdisc *qdisc; 1126 1127 qdisc = qdisc_create_dflt(dev_queue, &pfifo_qdisc_ops, 1128 TC_H_MAKE(sch->handle, 0), extack); 1129 if (!qdisc) { 1130 return -ENOMEM; 1131 } 1132 1133 htb_set_lockdep_class_child(qdisc); 1134 q->direct_qdiscs[ntx] = qdisc; 1135 qdisc->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT; 1136 } 1137 1138 sch->flags |= TCQ_F_MQROOT; 1139 1140 offload_opt = (struct tc_htb_qopt_offload) { 1141 .command = TC_HTB_CREATE, 1142 .parent_classid = TC_H_MAJ(sch->handle) >> 16, 1143 .classid = TC_H_MIN(q->defcls), 1144 .extack = extack, 1145 }; 1146 err = htb_offload(dev, &offload_opt); 1147 if (err) 1148 return err; 1149 1150 /* Defer this assignment, so that htb_destroy skips offload-related 1151 * parts (especially calling ndo_setup_tc) on errors. 1152 */ 1153 q->offload = true; 1154 1155 return 0; 1156 } 1157 1158 static void htb_attach_offload(struct Qdisc *sch) 1159 { 1160 struct net_device *dev = qdisc_dev(sch); 1161 struct htb_sched *q = qdisc_priv(sch); 1162 unsigned int ntx; 1163 1164 for (ntx = 0; ntx < q->num_direct_qdiscs; ntx++) { 1165 struct Qdisc *old, *qdisc = q->direct_qdiscs[ntx]; 1166 1167 old = dev_graft_qdisc(qdisc->dev_queue, qdisc); 1168 qdisc_put(old); 1169 qdisc_hash_add(qdisc, false); 1170 } 1171 for (ntx = q->num_direct_qdiscs; ntx < dev->num_tx_queues; ntx++) { 1172 struct netdev_queue *dev_queue = netdev_get_tx_queue(dev, ntx); 1173 struct Qdisc *old = dev_graft_qdisc(dev_queue, NULL); 1174 1175 qdisc_put(old); 1176 } 1177 1178 kfree(q->direct_qdiscs); 1179 q->direct_qdiscs = NULL; 1180 } 1181 1182 static void htb_attach_software(struct Qdisc *sch) 1183 { 1184 struct net_device *dev = qdisc_dev(sch); 1185 unsigned int ntx; 1186 1187 /* Resemble qdisc_graft behavior. */ 1188 for (ntx = 0; ntx < dev->num_tx_queues; ntx++) { 1189 struct netdev_queue *dev_queue = netdev_get_tx_queue(dev, ntx); 1190 struct Qdisc *old = dev_graft_qdisc(dev_queue, sch); 1191 1192 qdisc_refcount_inc(sch); 1193 1194 qdisc_put(old); 1195 } 1196 } 1197 1198 static void htb_attach(struct Qdisc *sch) 1199 { 1200 struct htb_sched *q = qdisc_priv(sch); 1201 1202 if (q->offload) 1203 htb_attach_offload(sch); 1204 else 1205 htb_attach_software(sch); 1206 } 1207 1208 static int htb_dump(struct Qdisc *sch, struct sk_buff *skb) 1209 { 1210 struct htb_sched *q = qdisc_priv(sch); 1211 struct nlattr *nest; 1212 struct tc_htb_glob gopt; 1213 1214 if (q->offload) 1215 sch->flags |= TCQ_F_OFFLOADED; 1216 else 1217 sch->flags &= ~TCQ_F_OFFLOADED; 1218 1219 sch->qstats.overlimits = q->overlimits; 1220 /* Its safe to not acquire qdisc lock. As we hold RTNL, 1221 * no change can happen on the qdisc parameters. 1222 */ 1223 1224 gopt.direct_pkts = q->direct_pkts; 1225 gopt.version = HTB_VER; 1226 gopt.rate2quantum = q->rate2quantum; 1227 gopt.defcls = q->defcls; 1228 gopt.debug = 0; 1229 1230 nest = nla_nest_start_noflag(skb, TCA_OPTIONS); 1231 if (nest == NULL) 1232 goto nla_put_failure; 1233 if (nla_put(skb, TCA_HTB_INIT, sizeof(gopt), &gopt) || 1234 nla_put_u32(skb, TCA_HTB_DIRECT_QLEN, q->direct_qlen)) 1235 goto nla_put_failure; 1236 if (q->offload && nla_put_flag(skb, TCA_HTB_OFFLOAD)) 1237 goto nla_put_failure; 1238 1239 return nla_nest_end(skb, nest); 1240 1241 nla_put_failure: 1242 nla_nest_cancel(skb, nest); 1243 return -1; 1244 } 1245 1246 static int htb_dump_class(struct Qdisc *sch, unsigned long arg, 1247 struct sk_buff *skb, struct tcmsg *tcm) 1248 { 1249 struct htb_class *cl = (struct htb_class *)arg; 1250 struct htb_sched *q = qdisc_priv(sch); 1251 struct nlattr *nest; 1252 struct tc_htb_opt opt; 1253 1254 /* Its safe to not acquire qdisc lock. As we hold RTNL, 1255 * no change can happen on the class parameters. 1256 */ 1257 tcm->tcm_parent = cl->parent ? cl->parent->common.classid : TC_H_ROOT; 1258 tcm->tcm_handle = cl->common.classid; 1259 if (!cl->level && cl->leaf.q) 1260 tcm->tcm_info = cl->leaf.q->handle; 1261 1262 nest = nla_nest_start_noflag(skb, TCA_OPTIONS); 1263 if (nest == NULL) 1264 goto nla_put_failure; 1265 1266 memset(&opt, 0, sizeof(opt)); 1267 1268 psched_ratecfg_getrate(&opt.rate, &cl->rate); 1269 opt.buffer = PSCHED_NS2TICKS(cl->buffer); 1270 psched_ratecfg_getrate(&opt.ceil, &cl->ceil); 1271 opt.cbuffer = PSCHED_NS2TICKS(cl->cbuffer); 1272 opt.quantum = cl->quantum; 1273 opt.prio = cl->prio; 1274 opt.level = cl->level; 1275 if (nla_put(skb, TCA_HTB_PARMS, sizeof(opt), &opt)) 1276 goto nla_put_failure; 1277 if (q->offload && nla_put_flag(skb, TCA_HTB_OFFLOAD)) 1278 goto nla_put_failure; 1279 if ((cl->rate.rate_bytes_ps >= (1ULL << 32)) && 1280 nla_put_u64_64bit(skb, TCA_HTB_RATE64, cl->rate.rate_bytes_ps, 1281 TCA_HTB_PAD)) 1282 goto nla_put_failure; 1283 if ((cl->ceil.rate_bytes_ps >= (1ULL << 32)) && 1284 nla_put_u64_64bit(skb, TCA_HTB_CEIL64, cl->ceil.rate_bytes_ps, 1285 TCA_HTB_PAD)) 1286 goto nla_put_failure; 1287 1288 return nla_nest_end(skb, nest); 1289 1290 nla_put_failure: 1291 nla_nest_cancel(skb, nest); 1292 return -1; 1293 } 1294 1295 static void htb_offload_aggregate_stats(struct htb_sched *q, 1296 struct htb_class *cl) 1297 { 1298 u64 bytes = 0, packets = 0; 1299 struct htb_class *c; 1300 unsigned int i; 1301 1302 gnet_stats_basic_sync_init(&cl->bstats); 1303 1304 for (i = 0; i < q->clhash.hashsize; i++) { 1305 hlist_for_each_entry(c, &q->clhash.hash[i], common.hnode) { 1306 struct htb_class *p = c; 1307 1308 while (p && p->level < cl->level) 1309 p = p->parent; 1310 1311 if (p != cl) 1312 continue; 1313 1314 bytes += u64_stats_read(&c->bstats_bias.bytes); 1315 packets += u64_stats_read(&c->bstats_bias.packets); 1316 if (c->level == 0) { 1317 bytes += u64_stats_read(&c->leaf.q->bstats.bytes); 1318 packets += u64_stats_read(&c->leaf.q->bstats.packets); 1319 } 1320 } 1321 } 1322 _bstats_update(&cl->bstats, bytes, packets); 1323 } 1324 1325 static int 1326 htb_dump_class_stats(struct Qdisc *sch, unsigned long arg, struct gnet_dump *d) 1327 { 1328 struct htb_class *cl = (struct htb_class *)arg; 1329 struct htb_sched *q = qdisc_priv(sch); 1330 struct gnet_stats_queue qs = { 1331 .drops = cl->drops, 1332 .overlimits = cl->overlimits, 1333 }; 1334 __u32 qlen = 0; 1335 1336 if (!cl->level && cl->leaf.q) 1337 qdisc_qstats_qlen_backlog(cl->leaf.q, &qlen, &qs.backlog); 1338 1339 cl->xstats.tokens = clamp_t(s64, PSCHED_NS2TICKS(cl->tokens), 1340 INT_MIN, INT_MAX); 1341 cl->xstats.ctokens = clamp_t(s64, PSCHED_NS2TICKS(cl->ctokens), 1342 INT_MIN, INT_MAX); 1343 1344 if (q->offload) { 1345 if (!cl->level) { 1346 if (cl->leaf.q) 1347 cl->bstats = cl->leaf.q->bstats; 1348 else 1349 gnet_stats_basic_sync_init(&cl->bstats); 1350 _bstats_update(&cl->bstats, 1351 u64_stats_read(&cl->bstats_bias.bytes), 1352 u64_stats_read(&cl->bstats_bias.packets)); 1353 } else { 1354 htb_offload_aggregate_stats(q, cl); 1355 } 1356 } 1357 1358 if (gnet_stats_copy_basic(d, NULL, &cl->bstats, true) < 0 || 1359 gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 || 1360 gnet_stats_copy_queue(d, NULL, &qs, qlen) < 0) 1361 return -1; 1362 1363 return gnet_stats_copy_app(d, &cl->xstats, sizeof(cl->xstats)); 1364 } 1365 1366 static struct netdev_queue * 1367 htb_select_queue(struct Qdisc *sch, struct tcmsg *tcm) 1368 { 1369 struct net_device *dev = qdisc_dev(sch); 1370 struct tc_htb_qopt_offload offload_opt; 1371 struct htb_sched *q = qdisc_priv(sch); 1372 int err; 1373 1374 if (!q->offload) 1375 return sch->dev_queue; 1376 1377 offload_opt = (struct tc_htb_qopt_offload) { 1378 .command = TC_HTB_LEAF_QUERY_QUEUE, 1379 .classid = TC_H_MIN(tcm->tcm_parent), 1380 }; 1381 err = htb_offload(dev, &offload_opt); 1382 if (err || offload_opt.qid >= dev->num_tx_queues) 1383 return NULL; 1384 return netdev_get_tx_queue(dev, offload_opt.qid); 1385 } 1386 1387 static struct Qdisc * 1388 htb_graft_helper(struct netdev_queue *dev_queue, struct Qdisc *new_q) 1389 { 1390 struct net_device *dev = dev_queue->dev; 1391 struct Qdisc *old_q; 1392 1393 if (dev->flags & IFF_UP) 1394 dev_deactivate(dev); 1395 old_q = dev_graft_qdisc(dev_queue, new_q); 1396 if (new_q) 1397 new_q->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT; 1398 if (dev->flags & IFF_UP) 1399 dev_activate(dev); 1400 1401 return old_q; 1402 } 1403 1404 static struct netdev_queue *htb_offload_get_queue(struct htb_class *cl) 1405 { 1406 struct netdev_queue *queue; 1407 1408 queue = cl->leaf.offload_queue; 1409 if (!(cl->leaf.q->flags & TCQ_F_BUILTIN)) 1410 WARN_ON(cl->leaf.q->dev_queue != queue); 1411 1412 return queue; 1413 } 1414 1415 static void htb_offload_move_qdisc(struct Qdisc *sch, struct htb_class *cl_old, 1416 struct htb_class *cl_new, bool destroying) 1417 { 1418 struct netdev_queue *queue_old, *queue_new; 1419 struct net_device *dev = qdisc_dev(sch); 1420 1421 queue_old = htb_offload_get_queue(cl_old); 1422 queue_new = htb_offload_get_queue(cl_new); 1423 1424 if (!destroying) { 1425 struct Qdisc *qdisc; 1426 1427 if (dev->flags & IFF_UP) 1428 dev_deactivate(dev); 1429 qdisc = dev_graft_qdisc(queue_old, NULL); 1430 WARN_ON(qdisc != cl_old->leaf.q); 1431 } 1432 1433 if (!(cl_old->leaf.q->flags & TCQ_F_BUILTIN)) 1434 cl_old->leaf.q->dev_queue = queue_new; 1435 cl_old->leaf.offload_queue = queue_new; 1436 1437 if (!destroying) { 1438 struct Qdisc *qdisc; 1439 1440 qdisc = dev_graft_qdisc(queue_new, cl_old->leaf.q); 1441 if (dev->flags & IFF_UP) 1442 dev_activate(dev); 1443 WARN_ON(!(qdisc->flags & TCQ_F_BUILTIN)); 1444 } 1445 } 1446 1447 static int htb_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, 1448 struct Qdisc **old, struct netlink_ext_ack *extack) 1449 { 1450 struct netdev_queue *dev_queue = sch->dev_queue; 1451 struct htb_class *cl = (struct htb_class *)arg; 1452 struct htb_sched *q = qdisc_priv(sch); 1453 struct Qdisc *old_q; 1454 1455 if (cl->level) 1456 return -EINVAL; 1457 1458 if (q->offload) 1459 dev_queue = htb_offload_get_queue(cl); 1460 1461 if (!new) { 1462 new = qdisc_create_dflt(dev_queue, &pfifo_qdisc_ops, 1463 cl->common.classid, extack); 1464 if (!new) 1465 return -ENOBUFS; 1466 } 1467 1468 if (q->offload) { 1469 htb_set_lockdep_class_child(new); 1470 /* One ref for cl->leaf.q, the other for dev_queue->qdisc. */ 1471 qdisc_refcount_inc(new); 1472 old_q = htb_graft_helper(dev_queue, new); 1473 } 1474 1475 *old = qdisc_replace(sch, new, &cl->leaf.q); 1476 1477 if (q->offload) { 1478 WARN_ON(old_q != *old); 1479 qdisc_put(old_q); 1480 } 1481 1482 return 0; 1483 } 1484 1485 static struct Qdisc *htb_leaf(struct Qdisc *sch, unsigned long arg) 1486 { 1487 struct htb_class *cl = (struct htb_class *)arg; 1488 return !cl->level ? cl->leaf.q : NULL; 1489 } 1490 1491 static void htb_qlen_notify(struct Qdisc *sch, unsigned long arg) 1492 { 1493 struct htb_class *cl = (struct htb_class *)arg; 1494 1495 htb_deactivate(qdisc_priv(sch), cl); 1496 } 1497 1498 static inline int htb_parent_last_child(struct htb_class *cl) 1499 { 1500 if (!cl->parent) 1501 /* the root class */ 1502 return 0; 1503 if (cl->parent->children > 1) 1504 /* not the last child */ 1505 return 0; 1506 return 1; 1507 } 1508 1509 static void htb_parent_to_leaf(struct Qdisc *sch, struct htb_class *cl, 1510 struct Qdisc *new_q) 1511 { 1512 struct htb_sched *q = qdisc_priv(sch); 1513 struct htb_class *parent = cl->parent; 1514 1515 WARN_ON(cl->level || !cl->leaf.q || cl->prio_activity); 1516 1517 if (parent->cmode != HTB_CAN_SEND) 1518 htb_safe_rb_erase(&parent->pq_node, 1519 &q->hlevel[parent->level].wait_pq); 1520 1521 parent->level = 0; 1522 memset(&parent->inner, 0, sizeof(parent->inner)); 1523 parent->leaf.q = new_q ? new_q : &noop_qdisc; 1524 parent->tokens = parent->buffer; 1525 parent->ctokens = parent->cbuffer; 1526 parent->t_c = ktime_get_ns(); 1527 parent->cmode = HTB_CAN_SEND; 1528 if (q->offload) 1529 parent->leaf.offload_queue = cl->leaf.offload_queue; 1530 } 1531 1532 static void htb_parent_to_leaf_offload(struct Qdisc *sch, 1533 struct netdev_queue *dev_queue, 1534 struct Qdisc *new_q) 1535 { 1536 struct Qdisc *old_q; 1537 1538 /* One ref for cl->leaf.q, the other for dev_queue->qdisc. */ 1539 if (new_q) 1540 qdisc_refcount_inc(new_q); 1541 old_q = htb_graft_helper(dev_queue, new_q); 1542 WARN_ON(!(old_q->flags & TCQ_F_BUILTIN)); 1543 } 1544 1545 static int htb_destroy_class_offload(struct Qdisc *sch, struct htb_class *cl, 1546 bool last_child, bool destroying, 1547 struct netlink_ext_ack *extack) 1548 { 1549 struct tc_htb_qopt_offload offload_opt; 1550 struct netdev_queue *dev_queue; 1551 struct Qdisc *q = cl->leaf.q; 1552 struct Qdisc *old; 1553 int err; 1554 1555 if (cl->level) 1556 return -EINVAL; 1557 1558 WARN_ON(!q); 1559 dev_queue = htb_offload_get_queue(cl); 1560 /* When destroying, caller qdisc_graft grafts the new qdisc and invokes 1561 * qdisc_put for the qdisc being destroyed. htb_destroy_class_offload 1562 * does not need to graft or qdisc_put the qdisc being destroyed. 1563 */ 1564 if (!destroying) { 1565 old = htb_graft_helper(dev_queue, NULL); 1566 /* Last qdisc grafted should be the same as cl->leaf.q when 1567 * calling htb_delete. 1568 */ 1569 WARN_ON(old != q); 1570 } 1571 1572 if (cl->parent) { 1573 _bstats_update(&cl->parent->bstats_bias, 1574 u64_stats_read(&q->bstats.bytes), 1575 u64_stats_read(&q->bstats.packets)); 1576 } 1577 1578 offload_opt = (struct tc_htb_qopt_offload) { 1579 .command = !last_child ? TC_HTB_LEAF_DEL : 1580 destroying ? TC_HTB_LEAF_DEL_LAST_FORCE : 1581 TC_HTB_LEAF_DEL_LAST, 1582 .classid = cl->common.classid, 1583 .extack = extack, 1584 }; 1585 err = htb_offload(qdisc_dev(sch), &offload_opt); 1586 1587 if (!destroying) { 1588 if (!err) 1589 qdisc_put(old); 1590 else 1591 htb_graft_helper(dev_queue, old); 1592 } 1593 1594 if (last_child) 1595 return err; 1596 1597 if (!err && offload_opt.classid != TC_H_MIN(cl->common.classid)) { 1598 u32 classid = TC_H_MAJ(sch->handle) | 1599 TC_H_MIN(offload_opt.classid); 1600 struct htb_class *moved_cl = htb_find(classid, sch); 1601 1602 htb_offload_move_qdisc(sch, moved_cl, cl, destroying); 1603 } 1604 1605 return err; 1606 } 1607 1608 static void htb_destroy_class(struct Qdisc *sch, struct htb_class *cl) 1609 { 1610 if (!cl->level) { 1611 WARN_ON(!cl->leaf.q); 1612 qdisc_put(cl->leaf.q); 1613 } 1614 gen_kill_estimator(&cl->rate_est); 1615 tcf_block_put(cl->block); 1616 kfree(cl); 1617 } 1618 1619 static void htb_destroy(struct Qdisc *sch) 1620 { 1621 struct net_device *dev = qdisc_dev(sch); 1622 struct tc_htb_qopt_offload offload_opt; 1623 struct htb_sched *q = qdisc_priv(sch); 1624 struct hlist_node *next; 1625 bool nonempty, changed; 1626 struct htb_class *cl; 1627 unsigned int i; 1628 1629 cancel_work_sync(&q->work); 1630 qdisc_watchdog_cancel(&q->watchdog); 1631 /* This line used to be after htb_destroy_class call below 1632 * and surprisingly it worked in 2.4. But it must precede it 1633 * because filter need its target class alive to be able to call 1634 * unbind_filter on it (without Oops). 1635 */ 1636 tcf_block_put(q->block); 1637 1638 for (i = 0; i < q->clhash.hashsize; i++) { 1639 hlist_for_each_entry(cl, &q->clhash.hash[i], common.hnode) { 1640 tcf_block_put(cl->block); 1641 cl->block = NULL; 1642 } 1643 } 1644 1645 do { 1646 nonempty = false; 1647 changed = false; 1648 for (i = 0; i < q->clhash.hashsize; i++) { 1649 hlist_for_each_entry_safe(cl, next, &q->clhash.hash[i], 1650 common.hnode) { 1651 bool last_child; 1652 1653 if (!q->offload) { 1654 htb_destroy_class(sch, cl); 1655 continue; 1656 } 1657 1658 nonempty = true; 1659 1660 if (cl->level) 1661 continue; 1662 1663 changed = true; 1664 1665 last_child = htb_parent_last_child(cl); 1666 htb_destroy_class_offload(sch, cl, last_child, 1667 true, NULL); 1668 qdisc_class_hash_remove(&q->clhash, 1669 &cl->common); 1670 if (cl->parent) 1671 cl->parent->children--; 1672 if (last_child) 1673 htb_parent_to_leaf(sch, cl, NULL); 1674 htb_destroy_class(sch, cl); 1675 } 1676 } 1677 } while (changed); 1678 WARN_ON(nonempty); 1679 1680 qdisc_class_hash_destroy(&q->clhash); 1681 __qdisc_reset_queue(&q->direct_queue); 1682 1683 if (q->offload) { 1684 offload_opt = (struct tc_htb_qopt_offload) { 1685 .command = TC_HTB_DESTROY, 1686 }; 1687 htb_offload(dev, &offload_opt); 1688 } 1689 1690 if (!q->direct_qdiscs) 1691 return; 1692 for (i = 0; i < q->num_direct_qdiscs && q->direct_qdiscs[i]; i++) 1693 qdisc_put(q->direct_qdiscs[i]); 1694 kfree(q->direct_qdiscs); 1695 } 1696 1697 static int htb_delete(struct Qdisc *sch, unsigned long arg, 1698 struct netlink_ext_ack *extack) 1699 { 1700 struct htb_sched *q = qdisc_priv(sch); 1701 struct htb_class *cl = (struct htb_class *)arg; 1702 struct Qdisc *new_q = NULL; 1703 int last_child = 0; 1704 int err; 1705 1706 /* TODO: why don't allow to delete subtree ? references ? does 1707 * tc subsys guarantee us that in htb_destroy it holds no class 1708 * refs so that we can remove children safely there ? 1709 */ 1710 if (cl->children || cl->filter_cnt) 1711 return -EBUSY; 1712 1713 if (!cl->level && htb_parent_last_child(cl)) 1714 last_child = 1; 1715 1716 if (q->offload) { 1717 err = htb_destroy_class_offload(sch, cl, last_child, false, 1718 extack); 1719 if (err) 1720 return err; 1721 } 1722 1723 if (last_child) { 1724 struct netdev_queue *dev_queue = sch->dev_queue; 1725 1726 if (q->offload) 1727 dev_queue = htb_offload_get_queue(cl); 1728 1729 new_q = qdisc_create_dflt(dev_queue, &pfifo_qdisc_ops, 1730 cl->parent->common.classid, 1731 NULL); 1732 if (q->offload) { 1733 if (new_q) 1734 htb_set_lockdep_class_child(new_q); 1735 htb_parent_to_leaf_offload(sch, dev_queue, new_q); 1736 } 1737 } 1738 1739 sch_tree_lock(sch); 1740 1741 if (!cl->level) 1742 qdisc_purge_queue(cl->leaf.q); 1743 1744 /* delete from hash and active; remainder in destroy_class */ 1745 qdisc_class_hash_remove(&q->clhash, &cl->common); 1746 if (cl->parent) 1747 cl->parent->children--; 1748 1749 if (cl->prio_activity) 1750 htb_deactivate(q, cl); 1751 1752 if (cl->cmode != HTB_CAN_SEND) 1753 htb_safe_rb_erase(&cl->pq_node, 1754 &q->hlevel[cl->level].wait_pq); 1755 1756 if (last_child) 1757 htb_parent_to_leaf(sch, cl, new_q); 1758 1759 sch_tree_unlock(sch); 1760 1761 htb_destroy_class(sch, cl); 1762 return 0; 1763 } 1764 1765 static int htb_change_class(struct Qdisc *sch, u32 classid, 1766 u32 parentid, struct nlattr **tca, 1767 unsigned long *arg, struct netlink_ext_ack *extack) 1768 { 1769 int err = -EINVAL; 1770 struct htb_sched *q = qdisc_priv(sch); 1771 struct htb_class *cl = (struct htb_class *)*arg, *parent; 1772 struct tc_htb_qopt_offload offload_opt; 1773 struct nlattr *opt = tca[TCA_OPTIONS]; 1774 struct nlattr *tb[TCA_HTB_MAX + 1]; 1775 struct Qdisc *parent_qdisc = NULL; 1776 struct netdev_queue *dev_queue; 1777 struct tc_htb_opt *hopt; 1778 u64 rate64, ceil64; 1779 int warn = 0; 1780 1781 /* extract all subattrs from opt attr */ 1782 if (!opt) 1783 goto failure; 1784 1785 err = nla_parse_nested_deprecated(tb, TCA_HTB_MAX, opt, htb_policy, 1786 NULL); 1787 if (err < 0) 1788 goto failure; 1789 1790 err = -EINVAL; 1791 if (tb[TCA_HTB_PARMS] == NULL) 1792 goto failure; 1793 1794 parent = parentid == TC_H_ROOT ? NULL : htb_find(parentid, sch); 1795 1796 hopt = nla_data(tb[TCA_HTB_PARMS]); 1797 if (!hopt->rate.rate || !hopt->ceil.rate) 1798 goto failure; 1799 1800 if (q->offload) { 1801 /* Options not supported by the offload. */ 1802 if (hopt->rate.overhead || hopt->ceil.overhead) { 1803 NL_SET_ERR_MSG(extack, "HTB offload doesn't support the overhead parameter"); 1804 goto failure; 1805 } 1806 if (hopt->rate.mpu || hopt->ceil.mpu) { 1807 NL_SET_ERR_MSG(extack, "HTB offload doesn't support the mpu parameter"); 1808 goto failure; 1809 } 1810 if (hopt->quantum) { 1811 NL_SET_ERR_MSG(extack, "HTB offload doesn't support the quantum parameter"); 1812 goto failure; 1813 } 1814 if (hopt->prio) { 1815 NL_SET_ERR_MSG(extack, "HTB offload doesn't support the prio parameter"); 1816 goto failure; 1817 } 1818 } 1819 1820 /* Keeping backward compatible with rate_table based iproute2 tc */ 1821 if (hopt->rate.linklayer == TC_LINKLAYER_UNAWARE) 1822 qdisc_put_rtab(qdisc_get_rtab(&hopt->rate, tb[TCA_HTB_RTAB], 1823 NULL)); 1824 1825 if (hopt->ceil.linklayer == TC_LINKLAYER_UNAWARE) 1826 qdisc_put_rtab(qdisc_get_rtab(&hopt->ceil, tb[TCA_HTB_CTAB], 1827 NULL)); 1828 1829 rate64 = tb[TCA_HTB_RATE64] ? nla_get_u64(tb[TCA_HTB_RATE64]) : 0; 1830 ceil64 = tb[TCA_HTB_CEIL64] ? nla_get_u64(tb[TCA_HTB_CEIL64]) : 0; 1831 1832 if (!cl) { /* new class */ 1833 struct net_device *dev = qdisc_dev(sch); 1834 struct Qdisc *new_q, *old_q; 1835 int prio; 1836 struct { 1837 struct nlattr nla; 1838 struct gnet_estimator opt; 1839 } est = { 1840 .nla = { 1841 .nla_len = nla_attr_size(sizeof(est.opt)), 1842 .nla_type = TCA_RATE, 1843 }, 1844 .opt = { 1845 /* 4s interval, 16s averaging constant */ 1846 .interval = 2, 1847 .ewma_log = 2, 1848 }, 1849 }; 1850 1851 /* check for valid classid */ 1852 if (!classid || TC_H_MAJ(classid ^ sch->handle) || 1853 htb_find(classid, sch)) 1854 goto failure; 1855 1856 /* check maximal depth */ 1857 if (parent && parent->parent && parent->parent->level < 2) { 1858 pr_err("htb: tree is too deep\n"); 1859 goto failure; 1860 } 1861 err = -ENOBUFS; 1862 cl = kzalloc(sizeof(*cl), GFP_KERNEL); 1863 if (!cl) 1864 goto failure; 1865 1866 gnet_stats_basic_sync_init(&cl->bstats); 1867 gnet_stats_basic_sync_init(&cl->bstats_bias); 1868 1869 err = tcf_block_get(&cl->block, &cl->filter_list, sch, extack); 1870 if (err) { 1871 kfree(cl); 1872 goto failure; 1873 } 1874 if (htb_rate_est || tca[TCA_RATE]) { 1875 err = gen_new_estimator(&cl->bstats, NULL, 1876 &cl->rate_est, 1877 NULL, 1878 true, 1879 tca[TCA_RATE] ? : &est.nla); 1880 if (err) 1881 goto err_block_put; 1882 } 1883 1884 cl->children = 0; 1885 RB_CLEAR_NODE(&cl->pq_node); 1886 1887 for (prio = 0; prio < TC_HTB_NUMPRIO; prio++) 1888 RB_CLEAR_NODE(&cl->node[prio]); 1889 1890 cl->common.classid = classid; 1891 1892 /* Make sure nothing interrupts us in between of two 1893 * ndo_setup_tc calls. 1894 */ 1895 ASSERT_RTNL(); 1896 1897 /* create leaf qdisc early because it uses kmalloc(GFP_KERNEL) 1898 * so that can't be used inside of sch_tree_lock 1899 * -- thanks to Karlis Peisenieks 1900 */ 1901 if (!q->offload) { 1902 dev_queue = sch->dev_queue; 1903 } else if (!(parent && !parent->level)) { 1904 /* Assign a dev_queue to this classid. */ 1905 offload_opt = (struct tc_htb_qopt_offload) { 1906 .command = TC_HTB_LEAF_ALLOC_QUEUE, 1907 .classid = cl->common.classid, 1908 .parent_classid = parent ? 1909 TC_H_MIN(parent->common.classid) : 1910 TC_HTB_CLASSID_ROOT, 1911 .rate = max_t(u64, hopt->rate.rate, rate64), 1912 .ceil = max_t(u64, hopt->ceil.rate, ceil64), 1913 .extack = extack, 1914 }; 1915 err = htb_offload(dev, &offload_opt); 1916 if (err) { 1917 pr_err("htb: TC_HTB_LEAF_ALLOC_QUEUE failed with err = %d\n", 1918 err); 1919 goto err_kill_estimator; 1920 } 1921 dev_queue = netdev_get_tx_queue(dev, offload_opt.qid); 1922 } else { /* First child. */ 1923 dev_queue = htb_offload_get_queue(parent); 1924 old_q = htb_graft_helper(dev_queue, NULL); 1925 WARN_ON(old_q != parent->leaf.q); 1926 offload_opt = (struct tc_htb_qopt_offload) { 1927 .command = TC_HTB_LEAF_TO_INNER, 1928 .classid = cl->common.classid, 1929 .parent_classid = 1930 TC_H_MIN(parent->common.classid), 1931 .rate = max_t(u64, hopt->rate.rate, rate64), 1932 .ceil = max_t(u64, hopt->ceil.rate, ceil64), 1933 .extack = extack, 1934 }; 1935 err = htb_offload(dev, &offload_opt); 1936 if (err) { 1937 pr_err("htb: TC_HTB_LEAF_TO_INNER failed with err = %d\n", 1938 err); 1939 htb_graft_helper(dev_queue, old_q); 1940 goto err_kill_estimator; 1941 } 1942 _bstats_update(&parent->bstats_bias, 1943 u64_stats_read(&old_q->bstats.bytes), 1944 u64_stats_read(&old_q->bstats.packets)); 1945 qdisc_put(old_q); 1946 } 1947 new_q = qdisc_create_dflt(dev_queue, &pfifo_qdisc_ops, 1948 classid, NULL); 1949 if (q->offload) { 1950 if (new_q) { 1951 htb_set_lockdep_class_child(new_q); 1952 /* One ref for cl->leaf.q, the other for 1953 * dev_queue->qdisc. 1954 */ 1955 qdisc_refcount_inc(new_q); 1956 } 1957 old_q = htb_graft_helper(dev_queue, new_q); 1958 /* No qdisc_put needed. */ 1959 WARN_ON(!(old_q->flags & TCQ_F_BUILTIN)); 1960 } 1961 sch_tree_lock(sch); 1962 if (parent && !parent->level) { 1963 /* turn parent into inner node */ 1964 qdisc_purge_queue(parent->leaf.q); 1965 parent_qdisc = parent->leaf.q; 1966 if (parent->prio_activity) 1967 htb_deactivate(q, parent); 1968 1969 /* remove from evt list because of level change */ 1970 if (parent->cmode != HTB_CAN_SEND) { 1971 htb_safe_rb_erase(&parent->pq_node, &q->hlevel[0].wait_pq); 1972 parent->cmode = HTB_CAN_SEND; 1973 } 1974 parent->level = (parent->parent ? parent->parent->level 1975 : TC_HTB_MAXDEPTH) - 1; 1976 memset(&parent->inner, 0, sizeof(parent->inner)); 1977 } 1978 1979 /* leaf (we) needs elementary qdisc */ 1980 cl->leaf.q = new_q ? new_q : &noop_qdisc; 1981 if (q->offload) 1982 cl->leaf.offload_queue = dev_queue; 1983 1984 cl->parent = parent; 1985 1986 /* set class to be in HTB_CAN_SEND state */ 1987 cl->tokens = PSCHED_TICKS2NS(hopt->buffer); 1988 cl->ctokens = PSCHED_TICKS2NS(hopt->cbuffer); 1989 cl->mbuffer = 60ULL * NSEC_PER_SEC; /* 1min */ 1990 cl->t_c = ktime_get_ns(); 1991 cl->cmode = HTB_CAN_SEND; 1992 1993 /* attach to the hash list and parent's family */ 1994 qdisc_class_hash_insert(&q->clhash, &cl->common); 1995 if (parent) 1996 parent->children++; 1997 if (cl->leaf.q != &noop_qdisc) 1998 qdisc_hash_add(cl->leaf.q, true); 1999 } else { 2000 if (tca[TCA_RATE]) { 2001 err = gen_replace_estimator(&cl->bstats, NULL, 2002 &cl->rate_est, 2003 NULL, 2004 true, 2005 tca[TCA_RATE]); 2006 if (err) 2007 return err; 2008 } 2009 2010 if (q->offload) { 2011 struct net_device *dev = qdisc_dev(sch); 2012 2013 offload_opt = (struct tc_htb_qopt_offload) { 2014 .command = TC_HTB_NODE_MODIFY, 2015 .classid = cl->common.classid, 2016 .rate = max_t(u64, hopt->rate.rate, rate64), 2017 .ceil = max_t(u64, hopt->ceil.rate, ceil64), 2018 .extack = extack, 2019 }; 2020 err = htb_offload(dev, &offload_opt); 2021 if (err) 2022 /* Estimator was replaced, and rollback may fail 2023 * as well, so we don't try to recover it, and 2024 * the estimator won't work property with the 2025 * offload anyway, because bstats are updated 2026 * only when the stats are queried. 2027 */ 2028 return err; 2029 } 2030 2031 sch_tree_lock(sch); 2032 } 2033 2034 psched_ratecfg_precompute(&cl->rate, &hopt->rate, rate64); 2035 psched_ratecfg_precompute(&cl->ceil, &hopt->ceil, ceil64); 2036 2037 /* it used to be a nasty bug here, we have to check that node 2038 * is really leaf before changing cl->leaf ! 2039 */ 2040 if (!cl->level) { 2041 u64 quantum = cl->rate.rate_bytes_ps; 2042 2043 do_div(quantum, q->rate2quantum); 2044 cl->quantum = min_t(u64, quantum, INT_MAX); 2045 2046 if (!hopt->quantum && cl->quantum < 1000) { 2047 warn = -1; 2048 cl->quantum = 1000; 2049 } 2050 if (!hopt->quantum && cl->quantum > 200000) { 2051 warn = 1; 2052 cl->quantum = 200000; 2053 } 2054 if (hopt->quantum) 2055 cl->quantum = hopt->quantum; 2056 if ((cl->prio = hopt->prio) >= TC_HTB_NUMPRIO) 2057 cl->prio = TC_HTB_NUMPRIO - 1; 2058 } 2059 2060 cl->buffer = PSCHED_TICKS2NS(hopt->buffer); 2061 cl->cbuffer = PSCHED_TICKS2NS(hopt->cbuffer); 2062 2063 sch_tree_unlock(sch); 2064 qdisc_put(parent_qdisc); 2065 2066 if (warn) 2067 pr_warn("HTB: quantum of class %X is %s. Consider r2q change.\n", 2068 cl->common.classid, (warn == -1 ? "small" : "big")); 2069 2070 qdisc_class_hash_grow(sch, &q->clhash); 2071 2072 *arg = (unsigned long)cl; 2073 return 0; 2074 2075 err_kill_estimator: 2076 gen_kill_estimator(&cl->rate_est); 2077 err_block_put: 2078 tcf_block_put(cl->block); 2079 kfree(cl); 2080 failure: 2081 return err; 2082 } 2083 2084 static struct tcf_block *htb_tcf_block(struct Qdisc *sch, unsigned long arg, 2085 struct netlink_ext_ack *extack) 2086 { 2087 struct htb_sched *q = qdisc_priv(sch); 2088 struct htb_class *cl = (struct htb_class *)arg; 2089 2090 return cl ? cl->block : q->block; 2091 } 2092 2093 static unsigned long htb_bind_filter(struct Qdisc *sch, unsigned long parent, 2094 u32 classid) 2095 { 2096 struct htb_class *cl = htb_find(classid, sch); 2097 2098 /*if (cl && !cl->level) return 0; 2099 * The line above used to be there to prevent attaching filters to 2100 * leaves. But at least tc_index filter uses this just to get class 2101 * for other reasons so that we have to allow for it. 2102 * ---- 2103 * 19.6.2002 As Werner explained it is ok - bind filter is just 2104 * another way to "lock" the class - unlike "get" this lock can 2105 * be broken by class during destroy IIUC. 2106 */ 2107 if (cl) 2108 cl->filter_cnt++; 2109 return (unsigned long)cl; 2110 } 2111 2112 static void htb_unbind_filter(struct Qdisc *sch, unsigned long arg) 2113 { 2114 struct htb_class *cl = (struct htb_class *)arg; 2115 2116 if (cl) 2117 cl->filter_cnt--; 2118 } 2119 2120 static void htb_walk(struct Qdisc *sch, struct qdisc_walker *arg) 2121 { 2122 struct htb_sched *q = qdisc_priv(sch); 2123 struct htb_class *cl; 2124 unsigned int i; 2125 2126 if (arg->stop) 2127 return; 2128 2129 for (i = 0; i < q->clhash.hashsize; i++) { 2130 hlist_for_each_entry(cl, &q->clhash.hash[i], common.hnode) { 2131 if (!tc_qdisc_stats_dump(sch, (unsigned long)cl, arg)) 2132 return; 2133 } 2134 } 2135 } 2136 2137 static const struct Qdisc_class_ops htb_class_ops = { 2138 .select_queue = htb_select_queue, 2139 .graft = htb_graft, 2140 .leaf = htb_leaf, 2141 .qlen_notify = htb_qlen_notify, 2142 .find = htb_search, 2143 .change = htb_change_class, 2144 .delete = htb_delete, 2145 .walk = htb_walk, 2146 .tcf_block = htb_tcf_block, 2147 .bind_tcf = htb_bind_filter, 2148 .unbind_tcf = htb_unbind_filter, 2149 .dump = htb_dump_class, 2150 .dump_stats = htb_dump_class_stats, 2151 }; 2152 2153 static struct Qdisc_ops htb_qdisc_ops __read_mostly = { 2154 .cl_ops = &htb_class_ops, 2155 .id = "htb", 2156 .priv_size = sizeof(struct htb_sched), 2157 .enqueue = htb_enqueue, 2158 .dequeue = htb_dequeue, 2159 .peek = qdisc_peek_dequeued, 2160 .init = htb_init, 2161 .attach = htb_attach, 2162 .reset = htb_reset, 2163 .destroy = htb_destroy, 2164 .dump = htb_dump, 2165 .owner = THIS_MODULE, 2166 }; 2167 2168 static int __init htb_module_init(void) 2169 { 2170 return register_qdisc(&htb_qdisc_ops); 2171 } 2172 static void __exit htb_module_exit(void) 2173 { 2174 unregister_qdisc(&htb_qdisc_ops); 2175 } 2176 2177 module_init(htb_module_init) 2178 module_exit(htb_module_exit) 2179 MODULE_LICENSE("GPL"); 2180