1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * net/sched/sch_api.c Packet scheduler API. 4 * 5 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> 6 * 7 * Fixes: 8 * 9 * Rani Assaf <rani@magic.metawire.com> :980802: JIFFIES and CPU clock sources are repaired. 10 * Eduardo J. Blanco <ejbs@netlabs.com.uy> :990222: kmod support 11 * Jamal Hadi Salim <hadi@nortelnetworks.com>: 990601: ingress support 12 */ 13 14 #include <linux/module.h> 15 #include <linux/types.h> 16 #include <linux/kernel.h> 17 #include <linux/string.h> 18 #include <linux/errno.h> 19 #include <linux/skbuff.h> 20 #include <linux/init.h> 21 #include <linux/proc_fs.h> 22 #include <linux/seq_file.h> 23 #include <linux/kmod.h> 24 #include <linux/list.h> 25 #include <linux/hrtimer.h> 26 #include <linux/slab.h> 27 #include <linux/hashtable.h> 28 29 #include <net/net_namespace.h> 30 #include <net/sock.h> 31 #include <net/netlink.h> 32 #include <net/pkt_sched.h> 33 #include <net/pkt_cls.h> 34 35 #include <trace/events/qdisc.h> 36 37 /* 38 39 Short review. 40 ------------- 41 42 This file consists of two interrelated parts: 43 44 1. queueing disciplines manager frontend. 45 2. traffic classes manager frontend. 46 47 Generally, queueing discipline ("qdisc") is a black box, 48 which is able to enqueue packets and to dequeue them (when 49 device is ready to send something) in order and at times 50 determined by algorithm hidden in it. 51 52 qdisc's are divided to two categories: 53 - "queues", which have no internal structure visible from outside. 54 - "schedulers", which split all the packets to "traffic classes", 55 using "packet classifiers" (look at cls_api.c) 56 57 In turn, classes may have child qdiscs (as rule, queues) 58 attached to them etc. etc. etc. 59 60 The goal of the routines in this file is to translate 61 information supplied by user in the form of handles 62 to more intelligible for kernel form, to make some sanity 63 checks and part of work, which is common to all qdiscs 64 and to provide rtnetlink notifications. 65 66 All real intelligent work is done inside qdisc modules. 67 68 69 70 Every discipline has two major routines: enqueue and dequeue. 71 72 ---dequeue 73 74 dequeue usually returns a skb to send. It is allowed to return NULL, 75 but it does not mean that queue is empty, it just means that 76 discipline does not want to send anything this time. 77 Queue is really empty if q->q.qlen == 0. 78 For complicated disciplines with multiple queues q->q is not 79 real packet queue, but however q->q.qlen must be valid. 80 81 ---enqueue 82 83 enqueue returns 0, if packet was enqueued successfully. 84 If packet (this one or another one) was dropped, it returns 85 not zero error code. 86 NET_XMIT_DROP - this packet dropped 87 Expected action: do not backoff, but wait until queue will clear. 88 NET_XMIT_CN - probably this packet enqueued, but another one dropped. 89 Expected action: backoff or ignore 90 91 Auxiliary routines: 92 93 ---peek 94 95 like dequeue but without removing a packet from the queue 96 97 ---reset 98 99 returns qdisc to initial state: purge all buffers, clear all 100 timers, counters (except for statistics) etc. 101 102 ---init 103 104 initializes newly created qdisc. 105 106 ---destroy 107 108 destroys resources allocated by init and during lifetime of qdisc. 109 110 ---change 111 112 changes qdisc parameters. 113 */ 114 115 /* Protects list of registered TC modules. It is pure SMP lock. */ 116 static DEFINE_RWLOCK(qdisc_mod_lock); 117 118 119 /************************************************ 120 * Queueing disciplines manipulation. * 121 ************************************************/ 122 123 124 /* The list of all installed queueing disciplines. */ 125 126 static struct Qdisc_ops *qdisc_base; 127 128 /* Register/unregister queueing discipline */ 129 130 int register_qdisc(struct Qdisc_ops *qops) 131 { 132 struct Qdisc_ops *q, **qp; 133 int rc = -EEXIST; 134 135 write_lock(&qdisc_mod_lock); 136 for (qp = &qdisc_base; (q = *qp) != NULL; qp = &q->next) 137 if (!strcmp(qops->id, q->id)) 138 goto out; 139 140 if (qops->enqueue == NULL) 141 qops->enqueue = noop_qdisc_ops.enqueue; 142 if (qops->peek == NULL) { 143 if (qops->dequeue == NULL) 144 qops->peek = noop_qdisc_ops.peek; 145 else 146 goto out_einval; 147 } 148 if (qops->dequeue == NULL) 149 qops->dequeue = noop_qdisc_ops.dequeue; 150 151 if (qops->cl_ops) { 152 const struct Qdisc_class_ops *cops = qops->cl_ops; 153 154 if (!(cops->find && cops->walk && cops->leaf)) 155 goto out_einval; 156 157 if (cops->tcf_block && !(cops->bind_tcf && cops->unbind_tcf)) 158 goto out_einval; 159 } 160 161 qops->next = NULL; 162 *qp = qops; 163 rc = 0; 164 out: 165 write_unlock(&qdisc_mod_lock); 166 return rc; 167 168 out_einval: 169 rc = -EINVAL; 170 goto out; 171 } 172 EXPORT_SYMBOL(register_qdisc); 173 174 int unregister_qdisc(struct Qdisc_ops *qops) 175 { 176 struct Qdisc_ops *q, **qp; 177 int err = -ENOENT; 178 179 write_lock(&qdisc_mod_lock); 180 for (qp = &qdisc_base; (q = *qp) != NULL; qp = &q->next) 181 if (q == qops) 182 break; 183 if (q) { 184 *qp = q->next; 185 q->next = NULL; 186 err = 0; 187 } 188 write_unlock(&qdisc_mod_lock); 189 return err; 190 } 191 EXPORT_SYMBOL(unregister_qdisc); 192 193 /* Get default qdisc if not otherwise specified */ 194 void qdisc_get_default(char *name, size_t len) 195 { 196 read_lock(&qdisc_mod_lock); 197 strlcpy(name, default_qdisc_ops->id, len); 198 read_unlock(&qdisc_mod_lock); 199 } 200 201 static struct Qdisc_ops *qdisc_lookup_default(const char *name) 202 { 203 struct Qdisc_ops *q = NULL; 204 205 for (q = qdisc_base; q; q = q->next) { 206 if (!strcmp(name, q->id)) { 207 if (!try_module_get(q->owner)) 208 q = NULL; 209 break; 210 } 211 } 212 213 return q; 214 } 215 216 /* Set new default qdisc to use */ 217 int qdisc_set_default(const char *name) 218 { 219 const struct Qdisc_ops *ops; 220 221 if (!capable(CAP_NET_ADMIN)) 222 return -EPERM; 223 224 write_lock(&qdisc_mod_lock); 225 ops = qdisc_lookup_default(name); 226 if (!ops) { 227 /* Not found, drop lock and try to load module */ 228 write_unlock(&qdisc_mod_lock); 229 request_module("sch_%s", name); 230 write_lock(&qdisc_mod_lock); 231 232 ops = qdisc_lookup_default(name); 233 } 234 235 if (ops) { 236 /* Set new default */ 237 module_put(default_qdisc_ops->owner); 238 default_qdisc_ops = ops; 239 } 240 write_unlock(&qdisc_mod_lock); 241 242 return ops ? 0 : -ENOENT; 243 } 244 245 #ifdef CONFIG_NET_SCH_DEFAULT 246 /* Set default value from kernel config */ 247 static int __init sch_default_qdisc(void) 248 { 249 return qdisc_set_default(CONFIG_DEFAULT_NET_SCH); 250 } 251 late_initcall(sch_default_qdisc); 252 #endif 253 254 /* We know handle. Find qdisc among all qdisc's attached to device 255 * (root qdisc, all its children, children of children etc.) 256 * Note: caller either uses rtnl or rcu_read_lock() 257 */ 258 259 static struct Qdisc *qdisc_match_from_root(struct Qdisc *root, u32 handle) 260 { 261 struct Qdisc *q; 262 263 if (!qdisc_dev(root)) 264 return (root->handle == handle ? root : NULL); 265 266 if (!(root->flags & TCQ_F_BUILTIN) && 267 root->handle == handle) 268 return root; 269 270 hash_for_each_possible_rcu(qdisc_dev(root)->qdisc_hash, q, hash, handle) { 271 if (q->handle == handle) 272 return q; 273 } 274 return NULL; 275 } 276 277 void qdisc_hash_add(struct Qdisc *q, bool invisible) 278 { 279 if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) { 280 ASSERT_RTNL(); 281 hash_add_rcu(qdisc_dev(q)->qdisc_hash, &q->hash, q->handle); 282 if (invisible) 283 q->flags |= TCQ_F_INVISIBLE; 284 } 285 } 286 EXPORT_SYMBOL(qdisc_hash_add); 287 288 void qdisc_hash_del(struct Qdisc *q) 289 { 290 if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) { 291 ASSERT_RTNL(); 292 hash_del_rcu(&q->hash); 293 } 294 } 295 EXPORT_SYMBOL(qdisc_hash_del); 296 297 struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle) 298 { 299 struct Qdisc *q; 300 301 if (!handle) 302 return NULL; 303 q = qdisc_match_from_root(dev->qdisc, handle); 304 if (q) 305 goto out; 306 307 if (dev_ingress_queue(dev)) 308 q = qdisc_match_from_root( 309 dev_ingress_queue(dev)->qdisc_sleeping, 310 handle); 311 out: 312 return q; 313 } 314 315 struct Qdisc *qdisc_lookup_rcu(struct net_device *dev, u32 handle) 316 { 317 struct netdev_queue *nq; 318 struct Qdisc *q; 319 320 if (!handle) 321 return NULL; 322 q = qdisc_match_from_root(dev->qdisc, handle); 323 if (q) 324 goto out; 325 326 nq = dev_ingress_queue_rcu(dev); 327 if (nq) 328 q = qdisc_match_from_root(nq->qdisc_sleeping, handle); 329 out: 330 return q; 331 } 332 333 static struct Qdisc *qdisc_leaf(struct Qdisc *p, u32 classid) 334 { 335 unsigned long cl; 336 const struct Qdisc_class_ops *cops = p->ops->cl_ops; 337 338 if (cops == NULL) 339 return NULL; 340 cl = cops->find(p, classid); 341 342 if (cl == 0) 343 return NULL; 344 return cops->leaf(p, cl); 345 } 346 347 /* Find queueing discipline by name */ 348 349 static struct Qdisc_ops *qdisc_lookup_ops(struct nlattr *kind) 350 { 351 struct Qdisc_ops *q = NULL; 352 353 if (kind) { 354 read_lock(&qdisc_mod_lock); 355 for (q = qdisc_base; q; q = q->next) { 356 if (nla_strcmp(kind, q->id) == 0) { 357 if (!try_module_get(q->owner)) 358 q = NULL; 359 break; 360 } 361 } 362 read_unlock(&qdisc_mod_lock); 363 } 364 return q; 365 } 366 367 /* The linklayer setting were not transferred from iproute2, in older 368 * versions, and the rate tables lookup systems have been dropped in 369 * the kernel. To keep backward compatible with older iproute2 tc 370 * utils, we detect the linklayer setting by detecting if the rate 371 * table were modified. 372 * 373 * For linklayer ATM table entries, the rate table will be aligned to 374 * 48 bytes, thus some table entries will contain the same value. The 375 * mpu (min packet unit) is also encoded into the old rate table, thus 376 * starting from the mpu, we find low and high table entries for 377 * mapping this cell. If these entries contain the same value, when 378 * the rate tables have been modified for linklayer ATM. 379 * 380 * This is done by rounding mpu to the nearest 48 bytes cell/entry, 381 * and then roundup to the next cell, calc the table entry one below, 382 * and compare. 383 */ 384 static __u8 __detect_linklayer(struct tc_ratespec *r, __u32 *rtab) 385 { 386 int low = roundup(r->mpu, 48); 387 int high = roundup(low+1, 48); 388 int cell_low = low >> r->cell_log; 389 int cell_high = (high >> r->cell_log) - 1; 390 391 /* rtab is too inaccurate at rates > 100Mbit/s */ 392 if ((r->rate > (100000000/8)) || (rtab[0] == 0)) { 393 pr_debug("TC linklayer: Giving up ATM detection\n"); 394 return TC_LINKLAYER_ETHERNET; 395 } 396 397 if ((cell_high > cell_low) && (cell_high < 256) 398 && (rtab[cell_low] == rtab[cell_high])) { 399 pr_debug("TC linklayer: Detected ATM, low(%d)=high(%d)=%u\n", 400 cell_low, cell_high, rtab[cell_high]); 401 return TC_LINKLAYER_ATM; 402 } 403 return TC_LINKLAYER_ETHERNET; 404 } 405 406 static struct qdisc_rate_table *qdisc_rtab_list; 407 408 struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r, 409 struct nlattr *tab, 410 struct netlink_ext_ack *extack) 411 { 412 struct qdisc_rate_table *rtab; 413 414 if (tab == NULL || r->rate == 0 || r->cell_log == 0 || 415 nla_len(tab) != TC_RTAB_SIZE) { 416 NL_SET_ERR_MSG(extack, "Invalid rate table parameters for searching"); 417 return NULL; 418 } 419 420 for (rtab = qdisc_rtab_list; rtab; rtab = rtab->next) { 421 if (!memcmp(&rtab->rate, r, sizeof(struct tc_ratespec)) && 422 !memcmp(&rtab->data, nla_data(tab), 1024)) { 423 rtab->refcnt++; 424 return rtab; 425 } 426 } 427 428 rtab = kmalloc(sizeof(*rtab), GFP_KERNEL); 429 if (rtab) { 430 rtab->rate = *r; 431 rtab->refcnt = 1; 432 memcpy(rtab->data, nla_data(tab), 1024); 433 if (r->linklayer == TC_LINKLAYER_UNAWARE) 434 r->linklayer = __detect_linklayer(r, rtab->data); 435 rtab->next = qdisc_rtab_list; 436 qdisc_rtab_list = rtab; 437 } else { 438 NL_SET_ERR_MSG(extack, "Failed to allocate new qdisc rate table"); 439 } 440 return rtab; 441 } 442 EXPORT_SYMBOL(qdisc_get_rtab); 443 444 void qdisc_put_rtab(struct qdisc_rate_table *tab) 445 { 446 struct qdisc_rate_table *rtab, **rtabp; 447 448 if (!tab || --tab->refcnt) 449 return; 450 451 for (rtabp = &qdisc_rtab_list; 452 (rtab = *rtabp) != NULL; 453 rtabp = &rtab->next) { 454 if (rtab == tab) { 455 *rtabp = rtab->next; 456 kfree(rtab); 457 return; 458 } 459 } 460 } 461 EXPORT_SYMBOL(qdisc_put_rtab); 462 463 static LIST_HEAD(qdisc_stab_list); 464 465 static const struct nla_policy stab_policy[TCA_STAB_MAX + 1] = { 466 [TCA_STAB_BASE] = { .len = sizeof(struct tc_sizespec) }, 467 [TCA_STAB_DATA] = { .type = NLA_BINARY }, 468 }; 469 470 static struct qdisc_size_table *qdisc_get_stab(struct nlattr *opt, 471 struct netlink_ext_ack *extack) 472 { 473 struct nlattr *tb[TCA_STAB_MAX + 1]; 474 struct qdisc_size_table *stab; 475 struct tc_sizespec *s; 476 unsigned int tsize = 0; 477 u16 *tab = NULL; 478 int err; 479 480 err = nla_parse_nested_deprecated(tb, TCA_STAB_MAX, opt, stab_policy, 481 extack); 482 if (err < 0) 483 return ERR_PTR(err); 484 if (!tb[TCA_STAB_BASE]) { 485 NL_SET_ERR_MSG(extack, "Size table base attribute is missing"); 486 return ERR_PTR(-EINVAL); 487 } 488 489 s = nla_data(tb[TCA_STAB_BASE]); 490 491 if (s->tsize > 0) { 492 if (!tb[TCA_STAB_DATA]) { 493 NL_SET_ERR_MSG(extack, "Size table data attribute is missing"); 494 return ERR_PTR(-EINVAL); 495 } 496 tab = nla_data(tb[TCA_STAB_DATA]); 497 tsize = nla_len(tb[TCA_STAB_DATA]) / sizeof(u16); 498 } 499 500 if (tsize != s->tsize || (!tab && tsize > 0)) { 501 NL_SET_ERR_MSG(extack, "Invalid size of size table"); 502 return ERR_PTR(-EINVAL); 503 } 504 505 list_for_each_entry(stab, &qdisc_stab_list, list) { 506 if (memcmp(&stab->szopts, s, sizeof(*s))) 507 continue; 508 if (tsize > 0 && memcmp(stab->data, tab, tsize * sizeof(u16))) 509 continue; 510 stab->refcnt++; 511 return stab; 512 } 513 514 stab = kmalloc(sizeof(*stab) + tsize * sizeof(u16), GFP_KERNEL); 515 if (!stab) 516 return ERR_PTR(-ENOMEM); 517 518 stab->refcnt = 1; 519 stab->szopts = *s; 520 if (tsize > 0) 521 memcpy(stab->data, tab, tsize * sizeof(u16)); 522 523 list_add_tail(&stab->list, &qdisc_stab_list); 524 525 return stab; 526 } 527 528 void qdisc_put_stab(struct qdisc_size_table *tab) 529 { 530 if (!tab) 531 return; 532 533 if (--tab->refcnt == 0) { 534 list_del(&tab->list); 535 kfree_rcu(tab, rcu); 536 } 537 } 538 EXPORT_SYMBOL(qdisc_put_stab); 539 540 static int qdisc_dump_stab(struct sk_buff *skb, struct qdisc_size_table *stab) 541 { 542 struct nlattr *nest; 543 544 nest = nla_nest_start_noflag(skb, TCA_STAB); 545 if (nest == NULL) 546 goto nla_put_failure; 547 if (nla_put(skb, TCA_STAB_BASE, sizeof(stab->szopts), &stab->szopts)) 548 goto nla_put_failure; 549 nla_nest_end(skb, nest); 550 551 return skb->len; 552 553 nla_put_failure: 554 return -1; 555 } 556 557 void __qdisc_calculate_pkt_len(struct sk_buff *skb, 558 const struct qdisc_size_table *stab) 559 { 560 int pkt_len, slot; 561 562 pkt_len = skb->len + stab->szopts.overhead; 563 if (unlikely(!stab->szopts.tsize)) 564 goto out; 565 566 slot = pkt_len + stab->szopts.cell_align; 567 if (unlikely(slot < 0)) 568 slot = 0; 569 570 slot >>= stab->szopts.cell_log; 571 if (likely(slot < stab->szopts.tsize)) 572 pkt_len = stab->data[slot]; 573 else 574 pkt_len = stab->data[stab->szopts.tsize - 1] * 575 (slot / stab->szopts.tsize) + 576 stab->data[slot % stab->szopts.tsize]; 577 578 pkt_len <<= stab->szopts.size_log; 579 out: 580 if (unlikely(pkt_len < 1)) 581 pkt_len = 1; 582 qdisc_skb_cb(skb)->pkt_len = pkt_len; 583 } 584 EXPORT_SYMBOL(__qdisc_calculate_pkt_len); 585 586 void qdisc_warn_nonwc(const char *txt, struct Qdisc *qdisc) 587 { 588 if (!(qdisc->flags & TCQ_F_WARN_NONWC)) { 589 pr_warn("%s: %s qdisc %X: is non-work-conserving?\n", 590 txt, qdisc->ops->id, qdisc->handle >> 16); 591 qdisc->flags |= TCQ_F_WARN_NONWC; 592 } 593 } 594 EXPORT_SYMBOL(qdisc_warn_nonwc); 595 596 static enum hrtimer_restart qdisc_watchdog(struct hrtimer *timer) 597 { 598 struct qdisc_watchdog *wd = container_of(timer, struct qdisc_watchdog, 599 timer); 600 601 rcu_read_lock(); 602 __netif_schedule(qdisc_root(wd->qdisc)); 603 rcu_read_unlock(); 604 605 return HRTIMER_NORESTART; 606 } 607 608 void qdisc_watchdog_init_clockid(struct qdisc_watchdog *wd, struct Qdisc *qdisc, 609 clockid_t clockid) 610 { 611 hrtimer_init(&wd->timer, clockid, HRTIMER_MODE_ABS_PINNED); 612 wd->timer.function = qdisc_watchdog; 613 wd->qdisc = qdisc; 614 } 615 EXPORT_SYMBOL(qdisc_watchdog_init_clockid); 616 617 void qdisc_watchdog_init(struct qdisc_watchdog *wd, struct Qdisc *qdisc) 618 { 619 qdisc_watchdog_init_clockid(wd, qdisc, CLOCK_MONOTONIC); 620 } 621 EXPORT_SYMBOL(qdisc_watchdog_init); 622 623 void qdisc_watchdog_schedule_range_ns(struct qdisc_watchdog *wd, u64 expires, 624 u64 delta_ns) 625 { 626 if (test_bit(__QDISC_STATE_DEACTIVATED, 627 &qdisc_root_sleeping(wd->qdisc)->state)) 628 return; 629 630 if (hrtimer_is_queued(&wd->timer)) { 631 /* If timer is already set in [expires, expires + delta_ns], 632 * do not reprogram it. 633 */ 634 if (wd->last_expires - expires <= delta_ns) 635 return; 636 } 637 638 wd->last_expires = expires; 639 hrtimer_start_range_ns(&wd->timer, 640 ns_to_ktime(expires), 641 delta_ns, 642 HRTIMER_MODE_ABS_PINNED); 643 } 644 EXPORT_SYMBOL(qdisc_watchdog_schedule_range_ns); 645 646 void qdisc_watchdog_cancel(struct qdisc_watchdog *wd) 647 { 648 hrtimer_cancel(&wd->timer); 649 } 650 EXPORT_SYMBOL(qdisc_watchdog_cancel); 651 652 static struct hlist_head *qdisc_class_hash_alloc(unsigned int n) 653 { 654 struct hlist_head *h; 655 unsigned int i; 656 657 h = kvmalloc_array(n, sizeof(struct hlist_head), GFP_KERNEL); 658 659 if (h != NULL) { 660 for (i = 0; i < n; i++) 661 INIT_HLIST_HEAD(&h[i]); 662 } 663 return h; 664 } 665 666 void qdisc_class_hash_grow(struct Qdisc *sch, struct Qdisc_class_hash *clhash) 667 { 668 struct Qdisc_class_common *cl; 669 struct hlist_node *next; 670 struct hlist_head *nhash, *ohash; 671 unsigned int nsize, nmask, osize; 672 unsigned int i, h; 673 674 /* Rehash when load factor exceeds 0.75 */ 675 if (clhash->hashelems * 4 <= clhash->hashsize * 3) 676 return; 677 nsize = clhash->hashsize * 2; 678 nmask = nsize - 1; 679 nhash = qdisc_class_hash_alloc(nsize); 680 if (nhash == NULL) 681 return; 682 683 ohash = clhash->hash; 684 osize = clhash->hashsize; 685 686 sch_tree_lock(sch); 687 for (i = 0; i < osize; i++) { 688 hlist_for_each_entry_safe(cl, next, &ohash[i], hnode) { 689 h = qdisc_class_hash(cl->classid, nmask); 690 hlist_add_head(&cl->hnode, &nhash[h]); 691 } 692 } 693 clhash->hash = nhash; 694 clhash->hashsize = nsize; 695 clhash->hashmask = nmask; 696 sch_tree_unlock(sch); 697 698 kvfree(ohash); 699 } 700 EXPORT_SYMBOL(qdisc_class_hash_grow); 701 702 int qdisc_class_hash_init(struct Qdisc_class_hash *clhash) 703 { 704 unsigned int size = 4; 705 706 clhash->hash = qdisc_class_hash_alloc(size); 707 if (!clhash->hash) 708 return -ENOMEM; 709 clhash->hashsize = size; 710 clhash->hashmask = size - 1; 711 clhash->hashelems = 0; 712 return 0; 713 } 714 EXPORT_SYMBOL(qdisc_class_hash_init); 715 716 void qdisc_class_hash_destroy(struct Qdisc_class_hash *clhash) 717 { 718 kvfree(clhash->hash); 719 } 720 EXPORT_SYMBOL(qdisc_class_hash_destroy); 721 722 void qdisc_class_hash_insert(struct Qdisc_class_hash *clhash, 723 struct Qdisc_class_common *cl) 724 { 725 unsigned int h; 726 727 INIT_HLIST_NODE(&cl->hnode); 728 h = qdisc_class_hash(cl->classid, clhash->hashmask); 729 hlist_add_head(&cl->hnode, &clhash->hash[h]); 730 clhash->hashelems++; 731 } 732 EXPORT_SYMBOL(qdisc_class_hash_insert); 733 734 void qdisc_class_hash_remove(struct Qdisc_class_hash *clhash, 735 struct Qdisc_class_common *cl) 736 { 737 hlist_del(&cl->hnode); 738 clhash->hashelems--; 739 } 740 EXPORT_SYMBOL(qdisc_class_hash_remove); 741 742 /* Allocate an unique handle from space managed by kernel 743 * Possible range is [8000-FFFF]:0000 (0x8000 values) 744 */ 745 static u32 qdisc_alloc_handle(struct net_device *dev) 746 { 747 int i = 0x8000; 748 static u32 autohandle = TC_H_MAKE(0x80000000U, 0); 749 750 do { 751 autohandle += TC_H_MAKE(0x10000U, 0); 752 if (autohandle == TC_H_MAKE(TC_H_ROOT, 0)) 753 autohandle = TC_H_MAKE(0x80000000U, 0); 754 if (!qdisc_lookup(dev, autohandle)) 755 return autohandle; 756 cond_resched(); 757 } while (--i > 0); 758 759 return 0; 760 } 761 762 void qdisc_tree_reduce_backlog(struct Qdisc *sch, int n, int len) 763 { 764 bool qdisc_is_offloaded = sch->flags & TCQ_F_OFFLOADED; 765 const struct Qdisc_class_ops *cops; 766 unsigned long cl; 767 u32 parentid; 768 bool notify; 769 int drops; 770 771 if (n == 0 && len == 0) 772 return; 773 drops = max_t(int, n, 0); 774 rcu_read_lock(); 775 while ((parentid = sch->parent)) { 776 if (TC_H_MAJ(parentid) == TC_H_MAJ(TC_H_INGRESS)) 777 break; 778 779 if (sch->flags & TCQ_F_NOPARENT) 780 break; 781 /* Notify parent qdisc only if child qdisc becomes empty. 782 * 783 * If child was empty even before update then backlog 784 * counter is screwed and we skip notification because 785 * parent class is already passive. 786 * 787 * If the original child was offloaded then it is allowed 788 * to be seem as empty, so the parent is notified anyway. 789 */ 790 notify = !sch->q.qlen && !WARN_ON_ONCE(!n && 791 !qdisc_is_offloaded); 792 /* TODO: perform the search on a per txq basis */ 793 sch = qdisc_lookup(qdisc_dev(sch), TC_H_MAJ(parentid)); 794 if (sch == NULL) { 795 WARN_ON_ONCE(parentid != TC_H_ROOT); 796 break; 797 } 798 cops = sch->ops->cl_ops; 799 if (notify && cops->qlen_notify) { 800 cl = cops->find(sch, parentid); 801 cops->qlen_notify(sch, cl); 802 } 803 sch->q.qlen -= n; 804 sch->qstats.backlog -= len; 805 __qdisc_qstats_drop(sch, drops); 806 } 807 rcu_read_unlock(); 808 } 809 EXPORT_SYMBOL(qdisc_tree_reduce_backlog); 810 811 int qdisc_offload_dump_helper(struct Qdisc *sch, enum tc_setup_type type, 812 void *type_data) 813 { 814 struct net_device *dev = qdisc_dev(sch); 815 int err; 816 817 sch->flags &= ~TCQ_F_OFFLOADED; 818 if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc) 819 return 0; 820 821 err = dev->netdev_ops->ndo_setup_tc(dev, type, type_data); 822 if (err == -EOPNOTSUPP) 823 return 0; 824 825 if (!err) 826 sch->flags |= TCQ_F_OFFLOADED; 827 828 return err; 829 } 830 EXPORT_SYMBOL(qdisc_offload_dump_helper); 831 832 void qdisc_offload_graft_helper(struct net_device *dev, struct Qdisc *sch, 833 struct Qdisc *new, struct Qdisc *old, 834 enum tc_setup_type type, void *type_data, 835 struct netlink_ext_ack *extack) 836 { 837 bool any_qdisc_is_offloaded; 838 int err; 839 840 if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc) 841 return; 842 843 err = dev->netdev_ops->ndo_setup_tc(dev, type, type_data); 844 845 /* Don't report error if the graft is part of destroy operation. */ 846 if (!err || !new || new == &noop_qdisc) 847 return; 848 849 /* Don't report error if the parent, the old child and the new 850 * one are not offloaded. 851 */ 852 any_qdisc_is_offloaded = new->flags & TCQ_F_OFFLOADED; 853 any_qdisc_is_offloaded |= sch && sch->flags & TCQ_F_OFFLOADED; 854 any_qdisc_is_offloaded |= old && old->flags & TCQ_F_OFFLOADED; 855 856 if (any_qdisc_is_offloaded) 857 NL_SET_ERR_MSG(extack, "Offloading graft operation failed."); 858 } 859 EXPORT_SYMBOL(qdisc_offload_graft_helper); 860 861 static void qdisc_offload_graft_root(struct net_device *dev, 862 struct Qdisc *new, struct Qdisc *old, 863 struct netlink_ext_ack *extack) 864 { 865 struct tc_root_qopt_offload graft_offload = { 866 .command = TC_ROOT_GRAFT, 867 .handle = new ? new->handle : 0, 868 .ingress = (new && new->flags & TCQ_F_INGRESS) || 869 (old && old->flags & TCQ_F_INGRESS), 870 }; 871 872 qdisc_offload_graft_helper(dev, NULL, new, old, 873 TC_SETUP_ROOT_QDISC, &graft_offload, extack); 874 } 875 876 static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid, 877 u32 portid, u32 seq, u16 flags, int event) 878 { 879 struct gnet_stats_basic_cpu __percpu *cpu_bstats = NULL; 880 struct gnet_stats_queue __percpu *cpu_qstats = NULL; 881 struct tcmsg *tcm; 882 struct nlmsghdr *nlh; 883 unsigned char *b = skb_tail_pointer(skb); 884 struct gnet_dump d; 885 struct qdisc_size_table *stab; 886 u32 block_index; 887 __u32 qlen; 888 889 cond_resched(); 890 nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags); 891 if (!nlh) 892 goto out_nlmsg_trim; 893 tcm = nlmsg_data(nlh); 894 tcm->tcm_family = AF_UNSPEC; 895 tcm->tcm__pad1 = 0; 896 tcm->tcm__pad2 = 0; 897 tcm->tcm_ifindex = qdisc_dev(q)->ifindex; 898 tcm->tcm_parent = clid; 899 tcm->tcm_handle = q->handle; 900 tcm->tcm_info = refcount_read(&q->refcnt); 901 if (nla_put_string(skb, TCA_KIND, q->ops->id)) 902 goto nla_put_failure; 903 if (q->ops->ingress_block_get) { 904 block_index = q->ops->ingress_block_get(q); 905 if (block_index && 906 nla_put_u32(skb, TCA_INGRESS_BLOCK, block_index)) 907 goto nla_put_failure; 908 } 909 if (q->ops->egress_block_get) { 910 block_index = q->ops->egress_block_get(q); 911 if (block_index && 912 nla_put_u32(skb, TCA_EGRESS_BLOCK, block_index)) 913 goto nla_put_failure; 914 } 915 if (q->ops->dump && q->ops->dump(q, skb) < 0) 916 goto nla_put_failure; 917 if (nla_put_u8(skb, TCA_HW_OFFLOAD, !!(q->flags & TCQ_F_OFFLOADED))) 918 goto nla_put_failure; 919 qlen = qdisc_qlen_sum(q); 920 921 stab = rtnl_dereference(q->stab); 922 if (stab && qdisc_dump_stab(skb, stab) < 0) 923 goto nla_put_failure; 924 925 if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, TCA_XSTATS, 926 NULL, &d, TCA_PAD) < 0) 927 goto nla_put_failure; 928 929 if (q->ops->dump_stats && q->ops->dump_stats(q, &d) < 0) 930 goto nla_put_failure; 931 932 if (qdisc_is_percpu_stats(q)) { 933 cpu_bstats = q->cpu_bstats; 934 cpu_qstats = q->cpu_qstats; 935 } 936 937 if (gnet_stats_copy_basic(qdisc_root_sleeping_running(q), 938 &d, cpu_bstats, &q->bstats) < 0 || 939 gnet_stats_copy_rate_est(&d, &q->rate_est) < 0 || 940 gnet_stats_copy_queue(&d, cpu_qstats, &q->qstats, qlen) < 0) 941 goto nla_put_failure; 942 943 if (gnet_stats_finish_copy(&d) < 0) 944 goto nla_put_failure; 945 946 nlh->nlmsg_len = skb_tail_pointer(skb) - b; 947 return skb->len; 948 949 out_nlmsg_trim: 950 nla_put_failure: 951 nlmsg_trim(skb, b); 952 return -1; 953 } 954 955 static bool tc_qdisc_dump_ignore(struct Qdisc *q, bool dump_invisible) 956 { 957 if (q->flags & TCQ_F_BUILTIN) 958 return true; 959 if ((q->flags & TCQ_F_INVISIBLE) && !dump_invisible) 960 return true; 961 962 return false; 963 } 964 965 static int qdisc_notify(struct net *net, struct sk_buff *oskb, 966 struct nlmsghdr *n, u32 clid, 967 struct Qdisc *old, struct Qdisc *new) 968 { 969 struct sk_buff *skb; 970 u32 portid = oskb ? NETLINK_CB(oskb).portid : 0; 971 972 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); 973 if (!skb) 974 return -ENOBUFS; 975 976 if (old && !tc_qdisc_dump_ignore(old, false)) { 977 if (tc_fill_qdisc(skb, old, clid, portid, n->nlmsg_seq, 978 0, RTM_DELQDISC) < 0) 979 goto err_out; 980 } 981 if (new && !tc_qdisc_dump_ignore(new, false)) { 982 if (tc_fill_qdisc(skb, new, clid, portid, n->nlmsg_seq, 983 old ? NLM_F_REPLACE : 0, RTM_NEWQDISC) < 0) 984 goto err_out; 985 } 986 987 if (skb->len) 988 return rtnetlink_send(skb, net, portid, RTNLGRP_TC, 989 n->nlmsg_flags & NLM_F_ECHO); 990 991 err_out: 992 kfree_skb(skb); 993 return -EINVAL; 994 } 995 996 static void notify_and_destroy(struct net *net, struct sk_buff *skb, 997 struct nlmsghdr *n, u32 clid, 998 struct Qdisc *old, struct Qdisc *new) 999 { 1000 if (new || old) 1001 qdisc_notify(net, skb, n, clid, old, new); 1002 1003 if (old) 1004 qdisc_put(old); 1005 } 1006 1007 static void qdisc_clear_nolock(struct Qdisc *sch) 1008 { 1009 sch->flags &= ~TCQ_F_NOLOCK; 1010 if (!(sch->flags & TCQ_F_CPUSTATS)) 1011 return; 1012 1013 free_percpu(sch->cpu_bstats); 1014 free_percpu(sch->cpu_qstats); 1015 sch->cpu_bstats = NULL; 1016 sch->cpu_qstats = NULL; 1017 sch->flags &= ~TCQ_F_CPUSTATS; 1018 } 1019 1020 /* Graft qdisc "new" to class "classid" of qdisc "parent" or 1021 * to device "dev". 1022 * 1023 * When appropriate send a netlink notification using 'skb' 1024 * and "n". 1025 * 1026 * On success, destroy old qdisc. 1027 */ 1028 1029 static int qdisc_graft(struct net_device *dev, struct Qdisc *parent, 1030 struct sk_buff *skb, struct nlmsghdr *n, u32 classid, 1031 struct Qdisc *new, struct Qdisc *old, 1032 struct netlink_ext_ack *extack) 1033 { 1034 struct Qdisc *q = old; 1035 struct net *net = dev_net(dev); 1036 1037 if (parent == NULL) { 1038 unsigned int i, num_q, ingress; 1039 1040 ingress = 0; 1041 num_q = dev->num_tx_queues; 1042 if ((q && q->flags & TCQ_F_INGRESS) || 1043 (new && new->flags & TCQ_F_INGRESS)) { 1044 num_q = 1; 1045 ingress = 1; 1046 if (!dev_ingress_queue(dev)) { 1047 NL_SET_ERR_MSG(extack, "Device does not have an ingress queue"); 1048 return -ENOENT; 1049 } 1050 } 1051 1052 if (dev->flags & IFF_UP) 1053 dev_deactivate(dev); 1054 1055 qdisc_offload_graft_root(dev, new, old, extack); 1056 1057 if (new && new->ops->attach) 1058 goto skip; 1059 1060 for (i = 0; i < num_q; i++) { 1061 struct netdev_queue *dev_queue = dev_ingress_queue(dev); 1062 1063 if (!ingress) 1064 dev_queue = netdev_get_tx_queue(dev, i); 1065 1066 old = dev_graft_qdisc(dev_queue, new); 1067 if (new && i > 0) 1068 qdisc_refcount_inc(new); 1069 1070 if (!ingress) 1071 qdisc_put(old); 1072 } 1073 1074 skip: 1075 if (!ingress) { 1076 notify_and_destroy(net, skb, n, classid, 1077 dev->qdisc, new); 1078 if (new && !new->ops->attach) 1079 qdisc_refcount_inc(new); 1080 dev->qdisc = new ? : &noop_qdisc; 1081 1082 if (new && new->ops->attach) 1083 new->ops->attach(new); 1084 } else { 1085 notify_and_destroy(net, skb, n, classid, old, new); 1086 } 1087 1088 if (dev->flags & IFF_UP) 1089 dev_activate(dev); 1090 } else { 1091 const struct Qdisc_class_ops *cops = parent->ops->cl_ops; 1092 unsigned long cl; 1093 int err; 1094 1095 /* Only support running class lockless if parent is lockless */ 1096 if (new && (new->flags & TCQ_F_NOLOCK) && !(parent->flags & TCQ_F_NOLOCK)) 1097 qdisc_clear_nolock(new); 1098 1099 if (!cops || !cops->graft) 1100 return -EOPNOTSUPP; 1101 1102 cl = cops->find(parent, classid); 1103 if (!cl) { 1104 NL_SET_ERR_MSG(extack, "Specified class not found"); 1105 return -ENOENT; 1106 } 1107 1108 err = cops->graft(parent, cl, new, &old, extack); 1109 if (err) 1110 return err; 1111 notify_and_destroy(net, skb, n, classid, old, new); 1112 } 1113 return 0; 1114 } 1115 1116 static int qdisc_block_indexes_set(struct Qdisc *sch, struct nlattr **tca, 1117 struct netlink_ext_ack *extack) 1118 { 1119 u32 block_index; 1120 1121 if (tca[TCA_INGRESS_BLOCK]) { 1122 block_index = nla_get_u32(tca[TCA_INGRESS_BLOCK]); 1123 1124 if (!block_index) { 1125 NL_SET_ERR_MSG(extack, "Ingress block index cannot be 0"); 1126 return -EINVAL; 1127 } 1128 if (!sch->ops->ingress_block_set) { 1129 NL_SET_ERR_MSG(extack, "Ingress block sharing is not supported"); 1130 return -EOPNOTSUPP; 1131 } 1132 sch->ops->ingress_block_set(sch, block_index); 1133 } 1134 if (tca[TCA_EGRESS_BLOCK]) { 1135 block_index = nla_get_u32(tca[TCA_EGRESS_BLOCK]); 1136 1137 if (!block_index) { 1138 NL_SET_ERR_MSG(extack, "Egress block index cannot be 0"); 1139 return -EINVAL; 1140 } 1141 if (!sch->ops->egress_block_set) { 1142 NL_SET_ERR_MSG(extack, "Egress block sharing is not supported"); 1143 return -EOPNOTSUPP; 1144 } 1145 sch->ops->egress_block_set(sch, block_index); 1146 } 1147 return 0; 1148 } 1149 1150 /* 1151 Allocate and initialize new qdisc. 1152 1153 Parameters are passed via opt. 1154 */ 1155 1156 static struct Qdisc *qdisc_create(struct net_device *dev, 1157 struct netdev_queue *dev_queue, 1158 struct Qdisc *p, u32 parent, u32 handle, 1159 struct nlattr **tca, int *errp, 1160 struct netlink_ext_ack *extack) 1161 { 1162 int err; 1163 struct nlattr *kind = tca[TCA_KIND]; 1164 struct Qdisc *sch; 1165 struct Qdisc_ops *ops; 1166 struct qdisc_size_table *stab; 1167 1168 ops = qdisc_lookup_ops(kind); 1169 #ifdef CONFIG_MODULES 1170 if (ops == NULL && kind != NULL) { 1171 char name[IFNAMSIZ]; 1172 if (nla_strlcpy(name, kind, IFNAMSIZ) < IFNAMSIZ) { 1173 /* We dropped the RTNL semaphore in order to 1174 * perform the module load. So, even if we 1175 * succeeded in loading the module we have to 1176 * tell the caller to replay the request. We 1177 * indicate this using -EAGAIN. 1178 * We replay the request because the device may 1179 * go away in the mean time. 1180 */ 1181 rtnl_unlock(); 1182 request_module("sch_%s", name); 1183 rtnl_lock(); 1184 ops = qdisc_lookup_ops(kind); 1185 if (ops != NULL) { 1186 /* We will try again qdisc_lookup_ops, 1187 * so don't keep a reference. 1188 */ 1189 module_put(ops->owner); 1190 err = -EAGAIN; 1191 goto err_out; 1192 } 1193 } 1194 } 1195 #endif 1196 1197 err = -ENOENT; 1198 if (!ops) { 1199 NL_SET_ERR_MSG(extack, "Specified qdisc not found"); 1200 goto err_out; 1201 } 1202 1203 sch = qdisc_alloc(dev_queue, ops, extack); 1204 if (IS_ERR(sch)) { 1205 err = PTR_ERR(sch); 1206 goto err_out2; 1207 } 1208 1209 sch->parent = parent; 1210 1211 if (handle == TC_H_INGRESS) { 1212 sch->flags |= TCQ_F_INGRESS; 1213 handle = TC_H_MAKE(TC_H_INGRESS, 0); 1214 } else { 1215 if (handle == 0) { 1216 handle = qdisc_alloc_handle(dev); 1217 if (handle == 0) { 1218 NL_SET_ERR_MSG(extack, "Maximum number of qdisc handles was exceeded"); 1219 err = -ENOSPC; 1220 goto err_out3; 1221 } 1222 } 1223 if (!netif_is_multiqueue(dev)) 1224 sch->flags |= TCQ_F_ONETXQUEUE; 1225 } 1226 1227 sch->handle = handle; 1228 1229 /* This exist to keep backward compatible with a userspace 1230 * loophole, what allowed userspace to get IFF_NO_QUEUE 1231 * facility on older kernels by setting tx_queue_len=0 (prior 1232 * to qdisc init), and then forgot to reinit tx_queue_len 1233 * before again attaching a qdisc. 1234 */ 1235 if ((dev->priv_flags & IFF_NO_QUEUE) && (dev->tx_queue_len == 0)) { 1236 dev->tx_queue_len = DEFAULT_TX_QUEUE_LEN; 1237 netdev_info(dev, "Caught tx_queue_len zero misconfig\n"); 1238 } 1239 1240 err = qdisc_block_indexes_set(sch, tca, extack); 1241 if (err) 1242 goto err_out3; 1243 1244 if (ops->init) { 1245 err = ops->init(sch, tca[TCA_OPTIONS], extack); 1246 if (err != 0) 1247 goto err_out5; 1248 } 1249 1250 if (tca[TCA_STAB]) { 1251 stab = qdisc_get_stab(tca[TCA_STAB], extack); 1252 if (IS_ERR(stab)) { 1253 err = PTR_ERR(stab); 1254 goto err_out4; 1255 } 1256 rcu_assign_pointer(sch->stab, stab); 1257 } 1258 if (tca[TCA_RATE]) { 1259 seqcount_t *running; 1260 1261 err = -EOPNOTSUPP; 1262 if (sch->flags & TCQ_F_MQROOT) { 1263 NL_SET_ERR_MSG(extack, "Cannot attach rate estimator to a multi-queue root qdisc"); 1264 goto err_out4; 1265 } 1266 1267 if (sch->parent != TC_H_ROOT && 1268 !(sch->flags & TCQ_F_INGRESS) && 1269 (!p || !(p->flags & TCQ_F_MQROOT))) 1270 running = qdisc_root_sleeping_running(sch); 1271 else 1272 running = &sch->running; 1273 1274 err = gen_new_estimator(&sch->bstats, 1275 sch->cpu_bstats, 1276 &sch->rate_est, 1277 NULL, 1278 running, 1279 tca[TCA_RATE]); 1280 if (err) { 1281 NL_SET_ERR_MSG(extack, "Failed to generate new estimator"); 1282 goto err_out4; 1283 } 1284 } 1285 1286 qdisc_hash_add(sch, false); 1287 trace_qdisc_create(ops, dev, parent); 1288 1289 return sch; 1290 1291 err_out5: 1292 /* ops->init() failed, we call ->destroy() like qdisc_create_dflt() */ 1293 if (ops->destroy) 1294 ops->destroy(sch); 1295 err_out3: 1296 dev_put(dev); 1297 qdisc_free(sch); 1298 err_out2: 1299 module_put(ops->owner); 1300 err_out: 1301 *errp = err; 1302 return NULL; 1303 1304 err_out4: 1305 /* 1306 * Any broken qdiscs that would require a ops->reset() here? 1307 * The qdisc was never in action so it shouldn't be necessary. 1308 */ 1309 qdisc_put_stab(rtnl_dereference(sch->stab)); 1310 if (ops->destroy) 1311 ops->destroy(sch); 1312 goto err_out3; 1313 } 1314 1315 static int qdisc_change(struct Qdisc *sch, struct nlattr **tca, 1316 struct netlink_ext_ack *extack) 1317 { 1318 struct qdisc_size_table *ostab, *stab = NULL; 1319 int err = 0; 1320 1321 if (tca[TCA_OPTIONS]) { 1322 if (!sch->ops->change) { 1323 NL_SET_ERR_MSG(extack, "Change operation not supported by specified qdisc"); 1324 return -EINVAL; 1325 } 1326 if (tca[TCA_INGRESS_BLOCK] || tca[TCA_EGRESS_BLOCK]) { 1327 NL_SET_ERR_MSG(extack, "Change of blocks is not supported"); 1328 return -EOPNOTSUPP; 1329 } 1330 err = sch->ops->change(sch, tca[TCA_OPTIONS], extack); 1331 if (err) 1332 return err; 1333 } 1334 1335 if (tca[TCA_STAB]) { 1336 stab = qdisc_get_stab(tca[TCA_STAB], extack); 1337 if (IS_ERR(stab)) 1338 return PTR_ERR(stab); 1339 } 1340 1341 ostab = rtnl_dereference(sch->stab); 1342 rcu_assign_pointer(sch->stab, stab); 1343 qdisc_put_stab(ostab); 1344 1345 if (tca[TCA_RATE]) { 1346 /* NB: ignores errors from replace_estimator 1347 because change can't be undone. */ 1348 if (sch->flags & TCQ_F_MQROOT) 1349 goto out; 1350 gen_replace_estimator(&sch->bstats, 1351 sch->cpu_bstats, 1352 &sch->rate_est, 1353 NULL, 1354 qdisc_root_sleeping_running(sch), 1355 tca[TCA_RATE]); 1356 } 1357 out: 1358 return 0; 1359 } 1360 1361 struct check_loop_arg { 1362 struct qdisc_walker w; 1363 struct Qdisc *p; 1364 int depth; 1365 }; 1366 1367 static int check_loop_fn(struct Qdisc *q, unsigned long cl, 1368 struct qdisc_walker *w); 1369 1370 static int check_loop(struct Qdisc *q, struct Qdisc *p, int depth) 1371 { 1372 struct check_loop_arg arg; 1373 1374 if (q->ops->cl_ops == NULL) 1375 return 0; 1376 1377 arg.w.stop = arg.w.skip = arg.w.count = 0; 1378 arg.w.fn = check_loop_fn; 1379 arg.depth = depth; 1380 arg.p = p; 1381 q->ops->cl_ops->walk(q, &arg.w); 1382 return arg.w.stop ? -ELOOP : 0; 1383 } 1384 1385 static int 1386 check_loop_fn(struct Qdisc *q, unsigned long cl, struct qdisc_walker *w) 1387 { 1388 struct Qdisc *leaf; 1389 const struct Qdisc_class_ops *cops = q->ops->cl_ops; 1390 struct check_loop_arg *arg = (struct check_loop_arg *)w; 1391 1392 leaf = cops->leaf(q, cl); 1393 if (leaf) { 1394 if (leaf == arg->p || arg->depth > 7) 1395 return -ELOOP; 1396 return check_loop(leaf, arg->p, arg->depth + 1); 1397 } 1398 return 0; 1399 } 1400 1401 const struct nla_policy rtm_tca_policy[TCA_MAX + 1] = { 1402 [TCA_KIND] = { .type = NLA_STRING }, 1403 [TCA_RATE] = { .type = NLA_BINARY, 1404 .len = sizeof(struct tc_estimator) }, 1405 [TCA_STAB] = { .type = NLA_NESTED }, 1406 [TCA_DUMP_INVISIBLE] = { .type = NLA_FLAG }, 1407 [TCA_CHAIN] = { .type = NLA_U32 }, 1408 [TCA_INGRESS_BLOCK] = { .type = NLA_U32 }, 1409 [TCA_EGRESS_BLOCK] = { .type = NLA_U32 }, 1410 }; 1411 1412 /* 1413 * Delete/get qdisc. 1414 */ 1415 1416 static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, 1417 struct netlink_ext_ack *extack) 1418 { 1419 struct net *net = sock_net(skb->sk); 1420 struct tcmsg *tcm = nlmsg_data(n); 1421 struct nlattr *tca[TCA_MAX + 1]; 1422 struct net_device *dev; 1423 u32 clid; 1424 struct Qdisc *q = NULL; 1425 struct Qdisc *p = NULL; 1426 int err; 1427 1428 if ((n->nlmsg_type != RTM_GETQDISC) && 1429 !netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN)) 1430 return -EPERM; 1431 1432 err = nlmsg_parse_deprecated(n, sizeof(*tcm), tca, TCA_MAX, 1433 rtm_tca_policy, extack); 1434 if (err < 0) 1435 return err; 1436 1437 dev = __dev_get_by_index(net, tcm->tcm_ifindex); 1438 if (!dev) 1439 return -ENODEV; 1440 1441 clid = tcm->tcm_parent; 1442 if (clid) { 1443 if (clid != TC_H_ROOT) { 1444 if (TC_H_MAJ(clid) != TC_H_MAJ(TC_H_INGRESS)) { 1445 p = qdisc_lookup(dev, TC_H_MAJ(clid)); 1446 if (!p) { 1447 NL_SET_ERR_MSG(extack, "Failed to find qdisc with specified classid"); 1448 return -ENOENT; 1449 } 1450 q = qdisc_leaf(p, clid); 1451 } else if (dev_ingress_queue(dev)) { 1452 q = dev_ingress_queue(dev)->qdisc_sleeping; 1453 } 1454 } else { 1455 q = dev->qdisc; 1456 } 1457 if (!q) { 1458 NL_SET_ERR_MSG(extack, "Cannot find specified qdisc on specified device"); 1459 return -ENOENT; 1460 } 1461 1462 if (tcm->tcm_handle && q->handle != tcm->tcm_handle) { 1463 NL_SET_ERR_MSG(extack, "Invalid handle"); 1464 return -EINVAL; 1465 } 1466 } else { 1467 q = qdisc_lookup(dev, tcm->tcm_handle); 1468 if (!q) { 1469 NL_SET_ERR_MSG(extack, "Failed to find qdisc with specified handle"); 1470 return -ENOENT; 1471 } 1472 } 1473 1474 if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id)) { 1475 NL_SET_ERR_MSG(extack, "Invalid qdisc name"); 1476 return -EINVAL; 1477 } 1478 1479 if (n->nlmsg_type == RTM_DELQDISC) { 1480 if (!clid) { 1481 NL_SET_ERR_MSG(extack, "Classid cannot be zero"); 1482 return -EINVAL; 1483 } 1484 if (q->handle == 0) { 1485 NL_SET_ERR_MSG(extack, "Cannot delete qdisc with handle of zero"); 1486 return -ENOENT; 1487 } 1488 err = qdisc_graft(dev, p, skb, n, clid, NULL, q, extack); 1489 if (err != 0) 1490 return err; 1491 } else { 1492 qdisc_notify(net, skb, n, clid, NULL, q); 1493 } 1494 return 0; 1495 } 1496 1497 /* 1498 * Create/change qdisc. 1499 */ 1500 1501 static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n, 1502 struct netlink_ext_ack *extack) 1503 { 1504 struct net *net = sock_net(skb->sk); 1505 struct tcmsg *tcm; 1506 struct nlattr *tca[TCA_MAX + 1]; 1507 struct net_device *dev; 1508 u32 clid; 1509 struct Qdisc *q, *p; 1510 int err; 1511 1512 if (!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN)) 1513 return -EPERM; 1514 1515 replay: 1516 /* Reinit, just in case something touches this. */ 1517 err = nlmsg_parse_deprecated(n, sizeof(*tcm), tca, TCA_MAX, 1518 rtm_tca_policy, extack); 1519 if (err < 0) 1520 return err; 1521 1522 tcm = nlmsg_data(n); 1523 clid = tcm->tcm_parent; 1524 q = p = NULL; 1525 1526 dev = __dev_get_by_index(net, tcm->tcm_ifindex); 1527 if (!dev) 1528 return -ENODEV; 1529 1530 1531 if (clid) { 1532 if (clid != TC_H_ROOT) { 1533 if (clid != TC_H_INGRESS) { 1534 p = qdisc_lookup(dev, TC_H_MAJ(clid)); 1535 if (!p) { 1536 NL_SET_ERR_MSG(extack, "Failed to find specified qdisc"); 1537 return -ENOENT; 1538 } 1539 q = qdisc_leaf(p, clid); 1540 } else if (dev_ingress_queue_create(dev)) { 1541 q = dev_ingress_queue(dev)->qdisc_sleeping; 1542 } 1543 } else { 1544 q = dev->qdisc; 1545 } 1546 1547 /* It may be default qdisc, ignore it */ 1548 if (q && q->handle == 0) 1549 q = NULL; 1550 1551 if (!q || !tcm->tcm_handle || q->handle != tcm->tcm_handle) { 1552 if (tcm->tcm_handle) { 1553 if (q && !(n->nlmsg_flags & NLM_F_REPLACE)) { 1554 NL_SET_ERR_MSG(extack, "NLM_F_REPLACE needed to override"); 1555 return -EEXIST; 1556 } 1557 if (TC_H_MIN(tcm->tcm_handle)) { 1558 NL_SET_ERR_MSG(extack, "Invalid minor handle"); 1559 return -EINVAL; 1560 } 1561 q = qdisc_lookup(dev, tcm->tcm_handle); 1562 if (!q) 1563 goto create_n_graft; 1564 if (n->nlmsg_flags & NLM_F_EXCL) { 1565 NL_SET_ERR_MSG(extack, "Exclusivity flag on, cannot override"); 1566 return -EEXIST; 1567 } 1568 if (tca[TCA_KIND] && 1569 nla_strcmp(tca[TCA_KIND], q->ops->id)) { 1570 NL_SET_ERR_MSG(extack, "Invalid qdisc name"); 1571 return -EINVAL; 1572 } 1573 if (q == p || 1574 (p && check_loop(q, p, 0))) { 1575 NL_SET_ERR_MSG(extack, "Qdisc parent/child loop detected"); 1576 return -ELOOP; 1577 } 1578 qdisc_refcount_inc(q); 1579 goto graft; 1580 } else { 1581 if (!q) 1582 goto create_n_graft; 1583 1584 /* This magic test requires explanation. 1585 * 1586 * We know, that some child q is already 1587 * attached to this parent and have choice: 1588 * either to change it or to create/graft new one. 1589 * 1590 * 1. We are allowed to create/graft only 1591 * if CREATE and REPLACE flags are set. 1592 * 1593 * 2. If EXCL is set, requestor wanted to say, 1594 * that qdisc tcm_handle is not expected 1595 * to exist, so that we choose create/graft too. 1596 * 1597 * 3. The last case is when no flags are set. 1598 * Alas, it is sort of hole in API, we 1599 * cannot decide what to do unambiguously. 1600 * For now we select create/graft, if 1601 * user gave KIND, which does not match existing. 1602 */ 1603 if ((n->nlmsg_flags & NLM_F_CREATE) && 1604 (n->nlmsg_flags & NLM_F_REPLACE) && 1605 ((n->nlmsg_flags & NLM_F_EXCL) || 1606 (tca[TCA_KIND] && 1607 nla_strcmp(tca[TCA_KIND], q->ops->id)))) 1608 goto create_n_graft; 1609 } 1610 } 1611 } else { 1612 if (!tcm->tcm_handle) { 1613 NL_SET_ERR_MSG(extack, "Handle cannot be zero"); 1614 return -EINVAL; 1615 } 1616 q = qdisc_lookup(dev, tcm->tcm_handle); 1617 } 1618 1619 /* Change qdisc parameters */ 1620 if (!q) { 1621 NL_SET_ERR_MSG(extack, "Specified qdisc not found"); 1622 return -ENOENT; 1623 } 1624 if (n->nlmsg_flags & NLM_F_EXCL) { 1625 NL_SET_ERR_MSG(extack, "Exclusivity flag on, cannot modify"); 1626 return -EEXIST; 1627 } 1628 if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id)) { 1629 NL_SET_ERR_MSG(extack, "Invalid qdisc name"); 1630 return -EINVAL; 1631 } 1632 err = qdisc_change(q, tca, extack); 1633 if (err == 0) 1634 qdisc_notify(net, skb, n, clid, NULL, q); 1635 return err; 1636 1637 create_n_graft: 1638 if (!(n->nlmsg_flags & NLM_F_CREATE)) { 1639 NL_SET_ERR_MSG(extack, "Qdisc not found. To create specify NLM_F_CREATE flag"); 1640 return -ENOENT; 1641 } 1642 if (clid == TC_H_INGRESS) { 1643 if (dev_ingress_queue(dev)) { 1644 q = qdisc_create(dev, dev_ingress_queue(dev), p, 1645 tcm->tcm_parent, tcm->tcm_parent, 1646 tca, &err, extack); 1647 } else { 1648 NL_SET_ERR_MSG(extack, "Cannot find ingress queue for specified device"); 1649 err = -ENOENT; 1650 } 1651 } else { 1652 struct netdev_queue *dev_queue; 1653 1654 if (p && p->ops->cl_ops && p->ops->cl_ops->select_queue) 1655 dev_queue = p->ops->cl_ops->select_queue(p, tcm); 1656 else if (p) 1657 dev_queue = p->dev_queue; 1658 else 1659 dev_queue = netdev_get_tx_queue(dev, 0); 1660 1661 q = qdisc_create(dev, dev_queue, p, 1662 tcm->tcm_parent, tcm->tcm_handle, 1663 tca, &err, extack); 1664 } 1665 if (q == NULL) { 1666 if (err == -EAGAIN) 1667 goto replay; 1668 return err; 1669 } 1670 1671 graft: 1672 err = qdisc_graft(dev, p, skb, n, clid, q, NULL, extack); 1673 if (err) { 1674 if (q) 1675 qdisc_put(q); 1676 return err; 1677 } 1678 1679 return 0; 1680 } 1681 1682 static int tc_dump_qdisc_root(struct Qdisc *root, struct sk_buff *skb, 1683 struct netlink_callback *cb, 1684 int *q_idx_p, int s_q_idx, bool recur, 1685 bool dump_invisible) 1686 { 1687 int ret = 0, q_idx = *q_idx_p; 1688 struct Qdisc *q; 1689 int b; 1690 1691 if (!root) 1692 return 0; 1693 1694 q = root; 1695 if (q_idx < s_q_idx) { 1696 q_idx++; 1697 } else { 1698 if (!tc_qdisc_dump_ignore(q, dump_invisible) && 1699 tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).portid, 1700 cb->nlh->nlmsg_seq, NLM_F_MULTI, 1701 RTM_NEWQDISC) <= 0) 1702 goto done; 1703 q_idx++; 1704 } 1705 1706 /* If dumping singletons, there is no qdisc_dev(root) and the singleton 1707 * itself has already been dumped. 1708 * 1709 * If we've already dumped the top-level (ingress) qdisc above and the global 1710 * qdisc hashtable, we don't want to hit it again 1711 */ 1712 if (!qdisc_dev(root) || !recur) 1713 goto out; 1714 1715 hash_for_each(qdisc_dev(root)->qdisc_hash, b, q, hash) { 1716 if (q_idx < s_q_idx) { 1717 q_idx++; 1718 continue; 1719 } 1720 if (!tc_qdisc_dump_ignore(q, dump_invisible) && 1721 tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).portid, 1722 cb->nlh->nlmsg_seq, NLM_F_MULTI, 1723 RTM_NEWQDISC) <= 0) 1724 goto done; 1725 q_idx++; 1726 } 1727 1728 out: 1729 *q_idx_p = q_idx; 1730 return ret; 1731 done: 1732 ret = -1; 1733 goto out; 1734 } 1735 1736 static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb) 1737 { 1738 struct net *net = sock_net(skb->sk); 1739 int idx, q_idx; 1740 int s_idx, s_q_idx; 1741 struct net_device *dev; 1742 const struct nlmsghdr *nlh = cb->nlh; 1743 struct nlattr *tca[TCA_MAX + 1]; 1744 int err; 1745 1746 s_idx = cb->args[0]; 1747 s_q_idx = q_idx = cb->args[1]; 1748 1749 idx = 0; 1750 ASSERT_RTNL(); 1751 1752 err = nlmsg_parse_deprecated(nlh, sizeof(struct tcmsg), tca, TCA_MAX, 1753 rtm_tca_policy, cb->extack); 1754 if (err < 0) 1755 return err; 1756 1757 for_each_netdev(net, dev) { 1758 struct netdev_queue *dev_queue; 1759 1760 if (idx < s_idx) 1761 goto cont; 1762 if (idx > s_idx) 1763 s_q_idx = 0; 1764 q_idx = 0; 1765 1766 if (tc_dump_qdisc_root(dev->qdisc, skb, cb, &q_idx, s_q_idx, 1767 true, tca[TCA_DUMP_INVISIBLE]) < 0) 1768 goto done; 1769 1770 dev_queue = dev_ingress_queue(dev); 1771 if (dev_queue && 1772 tc_dump_qdisc_root(dev_queue->qdisc_sleeping, skb, cb, 1773 &q_idx, s_q_idx, false, 1774 tca[TCA_DUMP_INVISIBLE]) < 0) 1775 goto done; 1776 1777 cont: 1778 idx++; 1779 } 1780 1781 done: 1782 cb->args[0] = idx; 1783 cb->args[1] = q_idx; 1784 1785 return skb->len; 1786 } 1787 1788 1789 1790 /************************************************ 1791 * Traffic classes manipulation. * 1792 ************************************************/ 1793 1794 static int tc_fill_tclass(struct sk_buff *skb, struct Qdisc *q, 1795 unsigned long cl, 1796 u32 portid, u32 seq, u16 flags, int event) 1797 { 1798 struct tcmsg *tcm; 1799 struct nlmsghdr *nlh; 1800 unsigned char *b = skb_tail_pointer(skb); 1801 struct gnet_dump d; 1802 const struct Qdisc_class_ops *cl_ops = q->ops->cl_ops; 1803 1804 cond_resched(); 1805 nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags); 1806 if (!nlh) 1807 goto out_nlmsg_trim; 1808 tcm = nlmsg_data(nlh); 1809 tcm->tcm_family = AF_UNSPEC; 1810 tcm->tcm__pad1 = 0; 1811 tcm->tcm__pad2 = 0; 1812 tcm->tcm_ifindex = qdisc_dev(q)->ifindex; 1813 tcm->tcm_parent = q->handle; 1814 tcm->tcm_handle = q->handle; 1815 tcm->tcm_info = 0; 1816 if (nla_put_string(skb, TCA_KIND, q->ops->id)) 1817 goto nla_put_failure; 1818 if (cl_ops->dump && cl_ops->dump(q, cl, skb, tcm) < 0) 1819 goto nla_put_failure; 1820 1821 if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, TCA_XSTATS, 1822 NULL, &d, TCA_PAD) < 0) 1823 goto nla_put_failure; 1824 1825 if (cl_ops->dump_stats && cl_ops->dump_stats(q, cl, &d) < 0) 1826 goto nla_put_failure; 1827 1828 if (gnet_stats_finish_copy(&d) < 0) 1829 goto nla_put_failure; 1830 1831 nlh->nlmsg_len = skb_tail_pointer(skb) - b; 1832 return skb->len; 1833 1834 out_nlmsg_trim: 1835 nla_put_failure: 1836 nlmsg_trim(skb, b); 1837 return -1; 1838 } 1839 1840 static int tclass_notify(struct net *net, struct sk_buff *oskb, 1841 struct nlmsghdr *n, struct Qdisc *q, 1842 unsigned long cl, int event) 1843 { 1844 struct sk_buff *skb; 1845 u32 portid = oskb ? NETLINK_CB(oskb).portid : 0; 1846 int err = 0; 1847 1848 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); 1849 if (!skb) 1850 return -ENOBUFS; 1851 1852 if (tc_fill_tclass(skb, q, cl, portid, n->nlmsg_seq, 0, event) < 0) { 1853 kfree_skb(skb); 1854 return -EINVAL; 1855 } 1856 1857 err = rtnetlink_send(skb, net, portid, RTNLGRP_TC, 1858 n->nlmsg_flags & NLM_F_ECHO); 1859 if (err > 0) 1860 err = 0; 1861 return err; 1862 } 1863 1864 static int tclass_del_notify(struct net *net, 1865 const struct Qdisc_class_ops *cops, 1866 struct sk_buff *oskb, struct nlmsghdr *n, 1867 struct Qdisc *q, unsigned long cl) 1868 { 1869 u32 portid = oskb ? NETLINK_CB(oskb).portid : 0; 1870 struct sk_buff *skb; 1871 int err = 0; 1872 1873 if (!cops->delete) 1874 return -EOPNOTSUPP; 1875 1876 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); 1877 if (!skb) 1878 return -ENOBUFS; 1879 1880 if (tc_fill_tclass(skb, q, cl, portid, n->nlmsg_seq, 0, 1881 RTM_DELTCLASS) < 0) { 1882 kfree_skb(skb); 1883 return -EINVAL; 1884 } 1885 1886 err = cops->delete(q, cl); 1887 if (err) { 1888 kfree_skb(skb); 1889 return err; 1890 } 1891 1892 err = rtnetlink_send(skb, net, portid, RTNLGRP_TC, 1893 n->nlmsg_flags & NLM_F_ECHO); 1894 if (err > 0) 1895 err = 0; 1896 return err; 1897 } 1898 1899 #ifdef CONFIG_NET_CLS 1900 1901 struct tcf_bind_args { 1902 struct tcf_walker w; 1903 unsigned long base; 1904 unsigned long cl; 1905 u32 classid; 1906 }; 1907 1908 static int tcf_node_bind(struct tcf_proto *tp, void *n, struct tcf_walker *arg) 1909 { 1910 struct tcf_bind_args *a = (void *)arg; 1911 1912 if (tp->ops->bind_class) { 1913 struct Qdisc *q = tcf_block_q(tp->chain->block); 1914 1915 sch_tree_lock(q); 1916 tp->ops->bind_class(n, a->classid, a->cl, q, a->base); 1917 sch_tree_unlock(q); 1918 } 1919 return 0; 1920 } 1921 1922 struct tc_bind_class_args { 1923 struct qdisc_walker w; 1924 unsigned long new_cl; 1925 u32 portid; 1926 u32 clid; 1927 }; 1928 1929 static int tc_bind_class_walker(struct Qdisc *q, unsigned long cl, 1930 struct qdisc_walker *w) 1931 { 1932 struct tc_bind_class_args *a = (struct tc_bind_class_args *)w; 1933 const struct Qdisc_class_ops *cops = q->ops->cl_ops; 1934 struct tcf_block *block; 1935 struct tcf_chain *chain; 1936 1937 block = cops->tcf_block(q, cl, NULL); 1938 if (!block) 1939 return 0; 1940 for (chain = tcf_get_next_chain(block, NULL); 1941 chain; 1942 chain = tcf_get_next_chain(block, chain)) { 1943 struct tcf_proto *tp; 1944 1945 for (tp = tcf_get_next_proto(chain, NULL, true); 1946 tp; tp = tcf_get_next_proto(chain, tp, true)) { 1947 struct tcf_bind_args arg = {}; 1948 1949 arg.w.fn = tcf_node_bind; 1950 arg.classid = a->clid; 1951 arg.base = cl; 1952 arg.cl = a->new_cl; 1953 tp->ops->walk(tp, &arg.w, true); 1954 } 1955 } 1956 1957 return 0; 1958 } 1959 1960 static void tc_bind_tclass(struct Qdisc *q, u32 portid, u32 clid, 1961 unsigned long new_cl) 1962 { 1963 const struct Qdisc_class_ops *cops = q->ops->cl_ops; 1964 struct tc_bind_class_args args = {}; 1965 1966 if (!cops->tcf_block) 1967 return; 1968 args.portid = portid; 1969 args.clid = clid; 1970 args.new_cl = new_cl; 1971 args.w.fn = tc_bind_class_walker; 1972 q->ops->cl_ops->walk(q, &args.w); 1973 } 1974 1975 #else 1976 1977 static void tc_bind_tclass(struct Qdisc *q, u32 portid, u32 clid, 1978 unsigned long new_cl) 1979 { 1980 } 1981 1982 #endif 1983 1984 static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, 1985 struct netlink_ext_ack *extack) 1986 { 1987 struct net *net = sock_net(skb->sk); 1988 struct tcmsg *tcm = nlmsg_data(n); 1989 struct nlattr *tca[TCA_MAX + 1]; 1990 struct net_device *dev; 1991 struct Qdisc *q = NULL; 1992 const struct Qdisc_class_ops *cops; 1993 unsigned long cl = 0; 1994 unsigned long new_cl; 1995 u32 portid; 1996 u32 clid; 1997 u32 qid; 1998 int err; 1999 2000 if ((n->nlmsg_type != RTM_GETTCLASS) && 2001 !netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN)) 2002 return -EPERM; 2003 2004 err = nlmsg_parse_deprecated(n, sizeof(*tcm), tca, TCA_MAX, 2005 rtm_tca_policy, extack); 2006 if (err < 0) 2007 return err; 2008 2009 dev = __dev_get_by_index(net, tcm->tcm_ifindex); 2010 if (!dev) 2011 return -ENODEV; 2012 2013 /* 2014 parent == TC_H_UNSPEC - unspecified parent. 2015 parent == TC_H_ROOT - class is root, which has no parent. 2016 parent == X:0 - parent is root class. 2017 parent == X:Y - parent is a node in hierarchy. 2018 parent == 0:Y - parent is X:Y, where X:0 is qdisc. 2019 2020 handle == 0:0 - generate handle from kernel pool. 2021 handle == 0:Y - class is X:Y, where X:0 is qdisc. 2022 handle == X:Y - clear. 2023 handle == X:0 - root class. 2024 */ 2025 2026 /* Step 1. Determine qdisc handle X:0 */ 2027 2028 portid = tcm->tcm_parent; 2029 clid = tcm->tcm_handle; 2030 qid = TC_H_MAJ(clid); 2031 2032 if (portid != TC_H_ROOT) { 2033 u32 qid1 = TC_H_MAJ(portid); 2034 2035 if (qid && qid1) { 2036 /* If both majors are known, they must be identical. */ 2037 if (qid != qid1) 2038 return -EINVAL; 2039 } else if (qid1) { 2040 qid = qid1; 2041 } else if (qid == 0) 2042 qid = dev->qdisc->handle; 2043 2044 /* Now qid is genuine qdisc handle consistent 2045 * both with parent and child. 2046 * 2047 * TC_H_MAJ(portid) still may be unspecified, complete it now. 2048 */ 2049 if (portid) 2050 portid = TC_H_MAKE(qid, portid); 2051 } else { 2052 if (qid == 0) 2053 qid = dev->qdisc->handle; 2054 } 2055 2056 /* OK. Locate qdisc */ 2057 q = qdisc_lookup(dev, qid); 2058 if (!q) 2059 return -ENOENT; 2060 2061 /* An check that it supports classes */ 2062 cops = q->ops->cl_ops; 2063 if (cops == NULL) 2064 return -EINVAL; 2065 2066 /* Now try to get class */ 2067 if (clid == 0) { 2068 if (portid == TC_H_ROOT) 2069 clid = qid; 2070 } else 2071 clid = TC_H_MAKE(qid, clid); 2072 2073 if (clid) 2074 cl = cops->find(q, clid); 2075 2076 if (cl == 0) { 2077 err = -ENOENT; 2078 if (n->nlmsg_type != RTM_NEWTCLASS || 2079 !(n->nlmsg_flags & NLM_F_CREATE)) 2080 goto out; 2081 } else { 2082 switch (n->nlmsg_type) { 2083 case RTM_NEWTCLASS: 2084 err = -EEXIST; 2085 if (n->nlmsg_flags & NLM_F_EXCL) 2086 goto out; 2087 break; 2088 case RTM_DELTCLASS: 2089 err = tclass_del_notify(net, cops, skb, n, q, cl); 2090 /* Unbind the class with flilters with 0 */ 2091 tc_bind_tclass(q, portid, clid, 0); 2092 goto out; 2093 case RTM_GETTCLASS: 2094 err = tclass_notify(net, skb, n, q, cl, RTM_NEWTCLASS); 2095 goto out; 2096 default: 2097 err = -EINVAL; 2098 goto out; 2099 } 2100 } 2101 2102 if (tca[TCA_INGRESS_BLOCK] || tca[TCA_EGRESS_BLOCK]) { 2103 NL_SET_ERR_MSG(extack, "Shared blocks are not supported for classes"); 2104 return -EOPNOTSUPP; 2105 } 2106 2107 new_cl = cl; 2108 err = -EOPNOTSUPP; 2109 if (cops->change) 2110 err = cops->change(q, clid, portid, tca, &new_cl, extack); 2111 if (err == 0) { 2112 tclass_notify(net, skb, n, q, new_cl, RTM_NEWTCLASS); 2113 /* We just create a new class, need to do reverse binding. */ 2114 if (cl != new_cl) 2115 tc_bind_tclass(q, portid, clid, new_cl); 2116 } 2117 out: 2118 return err; 2119 } 2120 2121 struct qdisc_dump_args { 2122 struct qdisc_walker w; 2123 struct sk_buff *skb; 2124 struct netlink_callback *cb; 2125 }; 2126 2127 static int qdisc_class_dump(struct Qdisc *q, unsigned long cl, 2128 struct qdisc_walker *arg) 2129 { 2130 struct qdisc_dump_args *a = (struct qdisc_dump_args *)arg; 2131 2132 return tc_fill_tclass(a->skb, q, cl, NETLINK_CB(a->cb->skb).portid, 2133 a->cb->nlh->nlmsg_seq, NLM_F_MULTI, 2134 RTM_NEWTCLASS); 2135 } 2136 2137 static int tc_dump_tclass_qdisc(struct Qdisc *q, struct sk_buff *skb, 2138 struct tcmsg *tcm, struct netlink_callback *cb, 2139 int *t_p, int s_t) 2140 { 2141 struct qdisc_dump_args arg; 2142 2143 if (tc_qdisc_dump_ignore(q, false) || 2144 *t_p < s_t || !q->ops->cl_ops || 2145 (tcm->tcm_parent && 2146 TC_H_MAJ(tcm->tcm_parent) != q->handle)) { 2147 (*t_p)++; 2148 return 0; 2149 } 2150 if (*t_p > s_t) 2151 memset(&cb->args[1], 0, sizeof(cb->args)-sizeof(cb->args[0])); 2152 arg.w.fn = qdisc_class_dump; 2153 arg.skb = skb; 2154 arg.cb = cb; 2155 arg.w.stop = 0; 2156 arg.w.skip = cb->args[1]; 2157 arg.w.count = 0; 2158 q->ops->cl_ops->walk(q, &arg.w); 2159 cb->args[1] = arg.w.count; 2160 if (arg.w.stop) 2161 return -1; 2162 (*t_p)++; 2163 return 0; 2164 } 2165 2166 static int tc_dump_tclass_root(struct Qdisc *root, struct sk_buff *skb, 2167 struct tcmsg *tcm, struct netlink_callback *cb, 2168 int *t_p, int s_t) 2169 { 2170 struct Qdisc *q; 2171 int b; 2172 2173 if (!root) 2174 return 0; 2175 2176 if (tc_dump_tclass_qdisc(root, skb, tcm, cb, t_p, s_t) < 0) 2177 return -1; 2178 2179 if (!qdisc_dev(root)) 2180 return 0; 2181 2182 if (tcm->tcm_parent) { 2183 q = qdisc_match_from_root(root, TC_H_MAJ(tcm->tcm_parent)); 2184 if (q && q != root && 2185 tc_dump_tclass_qdisc(q, skb, tcm, cb, t_p, s_t) < 0) 2186 return -1; 2187 return 0; 2188 } 2189 hash_for_each(qdisc_dev(root)->qdisc_hash, b, q, hash) { 2190 if (tc_dump_tclass_qdisc(q, skb, tcm, cb, t_p, s_t) < 0) 2191 return -1; 2192 } 2193 2194 return 0; 2195 } 2196 2197 static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb) 2198 { 2199 struct tcmsg *tcm = nlmsg_data(cb->nlh); 2200 struct net *net = sock_net(skb->sk); 2201 struct netdev_queue *dev_queue; 2202 struct net_device *dev; 2203 int t, s_t; 2204 2205 if (nlmsg_len(cb->nlh) < sizeof(*tcm)) 2206 return 0; 2207 dev = dev_get_by_index(net, tcm->tcm_ifindex); 2208 if (!dev) 2209 return 0; 2210 2211 s_t = cb->args[0]; 2212 t = 0; 2213 2214 if (tc_dump_tclass_root(dev->qdisc, skb, tcm, cb, &t, s_t) < 0) 2215 goto done; 2216 2217 dev_queue = dev_ingress_queue(dev); 2218 if (dev_queue && 2219 tc_dump_tclass_root(dev_queue->qdisc_sleeping, skb, tcm, cb, 2220 &t, s_t) < 0) 2221 goto done; 2222 2223 done: 2224 cb->args[0] = t; 2225 2226 dev_put(dev); 2227 return skb->len; 2228 } 2229 2230 #ifdef CONFIG_PROC_FS 2231 static int psched_show(struct seq_file *seq, void *v) 2232 { 2233 seq_printf(seq, "%08x %08x %08x %08x\n", 2234 (u32)NSEC_PER_USEC, (u32)PSCHED_TICKS2NS(1), 2235 1000000, 2236 (u32)NSEC_PER_SEC / hrtimer_resolution); 2237 2238 return 0; 2239 } 2240 2241 static int __net_init psched_net_init(struct net *net) 2242 { 2243 struct proc_dir_entry *e; 2244 2245 e = proc_create_single("psched", 0, net->proc_net, psched_show); 2246 if (e == NULL) 2247 return -ENOMEM; 2248 2249 return 0; 2250 } 2251 2252 static void __net_exit psched_net_exit(struct net *net) 2253 { 2254 remove_proc_entry("psched", net->proc_net); 2255 } 2256 #else 2257 static int __net_init psched_net_init(struct net *net) 2258 { 2259 return 0; 2260 } 2261 2262 static void __net_exit psched_net_exit(struct net *net) 2263 { 2264 } 2265 #endif 2266 2267 static struct pernet_operations psched_net_ops = { 2268 .init = psched_net_init, 2269 .exit = psched_net_exit, 2270 }; 2271 2272 static int __init pktsched_init(void) 2273 { 2274 int err; 2275 2276 err = register_pernet_subsys(&psched_net_ops); 2277 if (err) { 2278 pr_err("pktsched_init: " 2279 "cannot initialize per netns operations\n"); 2280 return err; 2281 } 2282 2283 register_qdisc(&pfifo_fast_ops); 2284 register_qdisc(&pfifo_qdisc_ops); 2285 register_qdisc(&bfifo_qdisc_ops); 2286 register_qdisc(&pfifo_head_drop_qdisc_ops); 2287 register_qdisc(&mq_qdisc_ops); 2288 register_qdisc(&noqueue_qdisc_ops); 2289 2290 rtnl_register(PF_UNSPEC, RTM_NEWQDISC, tc_modify_qdisc, NULL, 0); 2291 rtnl_register(PF_UNSPEC, RTM_DELQDISC, tc_get_qdisc, NULL, 0); 2292 rtnl_register(PF_UNSPEC, RTM_GETQDISC, tc_get_qdisc, tc_dump_qdisc, 2293 0); 2294 rtnl_register(PF_UNSPEC, RTM_NEWTCLASS, tc_ctl_tclass, NULL, 0); 2295 rtnl_register(PF_UNSPEC, RTM_DELTCLASS, tc_ctl_tclass, NULL, 0); 2296 rtnl_register(PF_UNSPEC, RTM_GETTCLASS, tc_ctl_tclass, tc_dump_tclass, 2297 0); 2298 2299 return 0; 2300 } 2301 2302 subsys_initcall(pktsched_init); 2303