1 /* 2 * net/sched/sch_api.c Packet scheduler API. 3 * 4 * This program is free software; you can redistribute it and/or 5 * modify it under the terms of the GNU General Public License 6 * as published by the Free Software Foundation; either version 7 * 2 of the License, or (at your option) any later version. 8 * 9 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> 10 * 11 * Fixes: 12 * 13 * Rani Assaf <rani@magic.metawire.com> :980802: JIFFIES and CPU clock sources are repaired. 14 * Eduardo J. Blanco <ejbs@netlabs.com.uy> :990222: kmod support 15 * Jamal Hadi Salim <hadi@nortelnetworks.com>: 990601: ingress support 16 */ 17 18 #include <linux/module.h> 19 #include <linux/types.h> 20 #include <linux/kernel.h> 21 #include <linux/string.h> 22 #include <linux/errno.h> 23 #include <linux/skbuff.h> 24 #include <linux/init.h> 25 #include <linux/proc_fs.h> 26 #include <linux/seq_file.h> 27 #include <linux/kmod.h> 28 #include <linux/list.h> 29 #include <linux/hrtimer.h> 30 #include <linux/slab.h> 31 #include <linux/hashtable.h> 32 33 #include <net/net_namespace.h> 34 #include <net/sock.h> 35 #include <net/netlink.h> 36 #include <net/pkt_sched.h> 37 #include <net/pkt_cls.h> 38 39 /* 40 41 Short review. 42 ------------- 43 44 This file consists of two interrelated parts: 45 46 1. queueing disciplines manager frontend. 47 2. traffic classes manager frontend. 48 49 Generally, queueing discipline ("qdisc") is a black box, 50 which is able to enqueue packets and to dequeue them (when 51 device is ready to send something) in order and at times 52 determined by algorithm hidden in it. 53 54 qdisc's are divided to two categories: 55 - "queues", which have no internal structure visible from outside. 56 - "schedulers", which split all the packets to "traffic classes", 57 using "packet classifiers" (look at cls_api.c) 58 59 In turn, classes may have child qdiscs (as rule, queues) 60 attached to them etc. etc. etc. 61 62 The goal of the routines in this file is to translate 63 information supplied by user in the form of handles 64 to more intelligible for kernel form, to make some sanity 65 checks and part of work, which is common to all qdiscs 66 and to provide rtnetlink notifications. 67 68 All real intelligent work is done inside qdisc modules. 69 70 71 72 Every discipline has two major routines: enqueue and dequeue. 73 74 ---dequeue 75 76 dequeue usually returns a skb to send. It is allowed to return NULL, 77 but it does not mean that queue is empty, it just means that 78 discipline does not want to send anything this time. 79 Queue is really empty if q->q.qlen == 0. 80 For complicated disciplines with multiple queues q->q is not 81 real packet queue, but however q->q.qlen must be valid. 82 83 ---enqueue 84 85 enqueue returns 0, if packet was enqueued successfully. 86 If packet (this one or another one) was dropped, it returns 87 not zero error code. 88 NET_XMIT_DROP - this packet dropped 89 Expected action: do not backoff, but wait until queue will clear. 90 NET_XMIT_CN - probably this packet enqueued, but another one dropped. 91 Expected action: backoff or ignore 92 93 Auxiliary routines: 94 95 ---peek 96 97 like dequeue but without removing a packet from the queue 98 99 ---reset 100 101 returns qdisc to initial state: purge all buffers, clear all 102 timers, counters (except for statistics) etc. 103 104 ---init 105 106 initializes newly created qdisc. 107 108 ---destroy 109 110 destroys resources allocated by init and during lifetime of qdisc. 111 112 ---change 113 114 changes qdisc parameters. 115 */ 116 117 /* Protects list of registered TC modules. It is pure SMP lock. */ 118 static DEFINE_RWLOCK(qdisc_mod_lock); 119 120 121 /************************************************ 122 * Queueing disciplines manipulation. * 123 ************************************************/ 124 125 126 /* The list of all installed queueing disciplines. */ 127 128 static struct Qdisc_ops *qdisc_base; 129 130 /* Register/unregister queueing discipline */ 131 132 int register_qdisc(struct Qdisc_ops *qops) 133 { 134 struct Qdisc_ops *q, **qp; 135 int rc = -EEXIST; 136 137 write_lock(&qdisc_mod_lock); 138 for (qp = &qdisc_base; (q = *qp) != NULL; qp = &q->next) 139 if (!strcmp(qops->id, q->id)) 140 goto out; 141 142 if (qops->enqueue == NULL) 143 qops->enqueue = noop_qdisc_ops.enqueue; 144 if (qops->peek == NULL) { 145 if (qops->dequeue == NULL) 146 qops->peek = noop_qdisc_ops.peek; 147 else 148 goto out_einval; 149 } 150 if (qops->dequeue == NULL) 151 qops->dequeue = noop_qdisc_ops.dequeue; 152 153 if (qops->cl_ops) { 154 const struct Qdisc_class_ops *cops = qops->cl_ops; 155 156 if (!(cops->find && cops->walk && cops->leaf)) 157 goto out_einval; 158 159 if (cops->tcf_block && !(cops->bind_tcf && cops->unbind_tcf)) 160 goto out_einval; 161 } 162 163 qops->next = NULL; 164 *qp = qops; 165 rc = 0; 166 out: 167 write_unlock(&qdisc_mod_lock); 168 return rc; 169 170 out_einval: 171 rc = -EINVAL; 172 goto out; 173 } 174 EXPORT_SYMBOL(register_qdisc); 175 176 int unregister_qdisc(struct Qdisc_ops *qops) 177 { 178 struct Qdisc_ops *q, **qp; 179 int err = -ENOENT; 180 181 write_lock(&qdisc_mod_lock); 182 for (qp = &qdisc_base; (q = *qp) != NULL; qp = &q->next) 183 if (q == qops) 184 break; 185 if (q) { 186 *qp = q->next; 187 q->next = NULL; 188 err = 0; 189 } 190 write_unlock(&qdisc_mod_lock); 191 return err; 192 } 193 EXPORT_SYMBOL(unregister_qdisc); 194 195 /* Get default qdisc if not otherwise specified */ 196 void qdisc_get_default(char *name, size_t len) 197 { 198 read_lock(&qdisc_mod_lock); 199 strlcpy(name, default_qdisc_ops->id, len); 200 read_unlock(&qdisc_mod_lock); 201 } 202 203 static struct Qdisc_ops *qdisc_lookup_default(const char *name) 204 { 205 struct Qdisc_ops *q = NULL; 206 207 for (q = qdisc_base; q; q = q->next) { 208 if (!strcmp(name, q->id)) { 209 if (!try_module_get(q->owner)) 210 q = NULL; 211 break; 212 } 213 } 214 215 return q; 216 } 217 218 /* Set new default qdisc to use */ 219 int qdisc_set_default(const char *name) 220 { 221 const struct Qdisc_ops *ops; 222 223 if (!capable(CAP_NET_ADMIN)) 224 return -EPERM; 225 226 write_lock(&qdisc_mod_lock); 227 ops = qdisc_lookup_default(name); 228 if (!ops) { 229 /* Not found, drop lock and try to load module */ 230 write_unlock(&qdisc_mod_lock); 231 request_module("sch_%s", name); 232 write_lock(&qdisc_mod_lock); 233 234 ops = qdisc_lookup_default(name); 235 } 236 237 if (ops) { 238 /* Set new default */ 239 module_put(default_qdisc_ops->owner); 240 default_qdisc_ops = ops; 241 } 242 write_unlock(&qdisc_mod_lock); 243 244 return ops ? 0 : -ENOENT; 245 } 246 247 #ifdef CONFIG_NET_SCH_DEFAULT 248 /* Set default value from kernel config */ 249 static int __init sch_default_qdisc(void) 250 { 251 return qdisc_set_default(CONFIG_DEFAULT_NET_SCH); 252 } 253 late_initcall(sch_default_qdisc); 254 #endif 255 256 /* We know handle. Find qdisc among all qdisc's attached to device 257 * (root qdisc, all its children, children of children etc.) 258 * Note: caller either uses rtnl or rcu_read_lock() 259 */ 260 261 static struct Qdisc *qdisc_match_from_root(struct Qdisc *root, u32 handle) 262 { 263 struct Qdisc *q; 264 265 if (!qdisc_dev(root)) 266 return (root->handle == handle ? root : NULL); 267 268 if (!(root->flags & TCQ_F_BUILTIN) && 269 root->handle == handle) 270 return root; 271 272 hash_for_each_possible_rcu(qdisc_dev(root)->qdisc_hash, q, hash, handle) { 273 if (q->handle == handle) 274 return q; 275 } 276 return NULL; 277 } 278 279 void qdisc_hash_add(struct Qdisc *q, bool invisible) 280 { 281 if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) { 282 ASSERT_RTNL(); 283 hash_add_rcu(qdisc_dev(q)->qdisc_hash, &q->hash, q->handle); 284 if (invisible) 285 q->flags |= TCQ_F_INVISIBLE; 286 } 287 } 288 EXPORT_SYMBOL(qdisc_hash_add); 289 290 void qdisc_hash_del(struct Qdisc *q) 291 { 292 if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) { 293 ASSERT_RTNL(); 294 hash_del_rcu(&q->hash); 295 } 296 } 297 EXPORT_SYMBOL(qdisc_hash_del); 298 299 struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle) 300 { 301 struct Qdisc *q; 302 303 if (!handle) 304 return NULL; 305 q = qdisc_match_from_root(dev->qdisc, handle); 306 if (q) 307 goto out; 308 309 if (dev_ingress_queue(dev)) 310 q = qdisc_match_from_root( 311 dev_ingress_queue(dev)->qdisc_sleeping, 312 handle); 313 out: 314 return q; 315 } 316 317 struct Qdisc *qdisc_lookup_rcu(struct net_device *dev, u32 handle) 318 { 319 struct netdev_queue *nq; 320 struct Qdisc *q; 321 322 if (!handle) 323 return NULL; 324 q = qdisc_match_from_root(dev->qdisc, handle); 325 if (q) 326 goto out; 327 328 nq = dev_ingress_queue_rcu(dev); 329 if (nq) 330 q = qdisc_match_from_root(nq->qdisc_sleeping, handle); 331 out: 332 return q; 333 } 334 335 static struct Qdisc *qdisc_leaf(struct Qdisc *p, u32 classid) 336 { 337 unsigned long cl; 338 struct Qdisc *leaf; 339 const struct Qdisc_class_ops *cops = p->ops->cl_ops; 340 341 if (cops == NULL) 342 return NULL; 343 cl = cops->find(p, classid); 344 345 if (cl == 0) 346 return NULL; 347 leaf = cops->leaf(p, cl); 348 return leaf; 349 } 350 351 /* Find queueing discipline by name */ 352 353 static struct Qdisc_ops *qdisc_lookup_ops(struct nlattr *kind) 354 { 355 struct Qdisc_ops *q = NULL; 356 357 if (kind) { 358 read_lock(&qdisc_mod_lock); 359 for (q = qdisc_base; q; q = q->next) { 360 if (nla_strcmp(kind, q->id) == 0) { 361 if (!try_module_get(q->owner)) 362 q = NULL; 363 break; 364 } 365 } 366 read_unlock(&qdisc_mod_lock); 367 } 368 return q; 369 } 370 371 /* The linklayer setting were not transferred from iproute2, in older 372 * versions, and the rate tables lookup systems have been dropped in 373 * the kernel. To keep backward compatible with older iproute2 tc 374 * utils, we detect the linklayer setting by detecting if the rate 375 * table were modified. 376 * 377 * For linklayer ATM table entries, the rate table will be aligned to 378 * 48 bytes, thus some table entries will contain the same value. The 379 * mpu (min packet unit) is also encoded into the old rate table, thus 380 * starting from the mpu, we find low and high table entries for 381 * mapping this cell. If these entries contain the same value, when 382 * the rate tables have been modified for linklayer ATM. 383 * 384 * This is done by rounding mpu to the nearest 48 bytes cell/entry, 385 * and then roundup to the next cell, calc the table entry one below, 386 * and compare. 387 */ 388 static __u8 __detect_linklayer(struct tc_ratespec *r, __u32 *rtab) 389 { 390 int low = roundup(r->mpu, 48); 391 int high = roundup(low+1, 48); 392 int cell_low = low >> r->cell_log; 393 int cell_high = (high >> r->cell_log) - 1; 394 395 /* rtab is too inaccurate at rates > 100Mbit/s */ 396 if ((r->rate > (100000000/8)) || (rtab[0] == 0)) { 397 pr_debug("TC linklayer: Giving up ATM detection\n"); 398 return TC_LINKLAYER_ETHERNET; 399 } 400 401 if ((cell_high > cell_low) && (cell_high < 256) 402 && (rtab[cell_low] == rtab[cell_high])) { 403 pr_debug("TC linklayer: Detected ATM, low(%d)=high(%d)=%u\n", 404 cell_low, cell_high, rtab[cell_high]); 405 return TC_LINKLAYER_ATM; 406 } 407 return TC_LINKLAYER_ETHERNET; 408 } 409 410 static struct qdisc_rate_table *qdisc_rtab_list; 411 412 struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r, 413 struct nlattr *tab, 414 struct netlink_ext_ack *extack) 415 { 416 struct qdisc_rate_table *rtab; 417 418 if (tab == NULL || r->rate == 0 || r->cell_log == 0 || 419 nla_len(tab) != TC_RTAB_SIZE) { 420 NL_SET_ERR_MSG(extack, "Invalid rate table parameters for searching"); 421 return NULL; 422 } 423 424 for (rtab = qdisc_rtab_list; rtab; rtab = rtab->next) { 425 if (!memcmp(&rtab->rate, r, sizeof(struct tc_ratespec)) && 426 !memcmp(&rtab->data, nla_data(tab), 1024)) { 427 rtab->refcnt++; 428 return rtab; 429 } 430 } 431 432 rtab = kmalloc(sizeof(*rtab), GFP_KERNEL); 433 if (rtab) { 434 rtab->rate = *r; 435 rtab->refcnt = 1; 436 memcpy(rtab->data, nla_data(tab), 1024); 437 if (r->linklayer == TC_LINKLAYER_UNAWARE) 438 r->linklayer = __detect_linklayer(r, rtab->data); 439 rtab->next = qdisc_rtab_list; 440 qdisc_rtab_list = rtab; 441 } else { 442 NL_SET_ERR_MSG(extack, "Failed to allocate new qdisc rate table"); 443 } 444 return rtab; 445 } 446 EXPORT_SYMBOL(qdisc_get_rtab); 447 448 void qdisc_put_rtab(struct qdisc_rate_table *tab) 449 { 450 struct qdisc_rate_table *rtab, **rtabp; 451 452 if (!tab || --tab->refcnt) 453 return; 454 455 for (rtabp = &qdisc_rtab_list; 456 (rtab = *rtabp) != NULL; 457 rtabp = &rtab->next) { 458 if (rtab == tab) { 459 *rtabp = rtab->next; 460 kfree(rtab); 461 return; 462 } 463 } 464 } 465 EXPORT_SYMBOL(qdisc_put_rtab); 466 467 static LIST_HEAD(qdisc_stab_list); 468 469 static const struct nla_policy stab_policy[TCA_STAB_MAX + 1] = { 470 [TCA_STAB_BASE] = { .len = sizeof(struct tc_sizespec) }, 471 [TCA_STAB_DATA] = { .type = NLA_BINARY }, 472 }; 473 474 static struct qdisc_size_table *qdisc_get_stab(struct nlattr *opt, 475 struct netlink_ext_ack *extack) 476 { 477 struct nlattr *tb[TCA_STAB_MAX + 1]; 478 struct qdisc_size_table *stab; 479 struct tc_sizespec *s; 480 unsigned int tsize = 0; 481 u16 *tab = NULL; 482 int err; 483 484 err = nla_parse_nested(tb, TCA_STAB_MAX, opt, stab_policy, extack); 485 if (err < 0) 486 return ERR_PTR(err); 487 if (!tb[TCA_STAB_BASE]) { 488 NL_SET_ERR_MSG(extack, "Size table base attribute is missing"); 489 return ERR_PTR(-EINVAL); 490 } 491 492 s = nla_data(tb[TCA_STAB_BASE]); 493 494 if (s->tsize > 0) { 495 if (!tb[TCA_STAB_DATA]) { 496 NL_SET_ERR_MSG(extack, "Size table data attribute is missing"); 497 return ERR_PTR(-EINVAL); 498 } 499 tab = nla_data(tb[TCA_STAB_DATA]); 500 tsize = nla_len(tb[TCA_STAB_DATA]) / sizeof(u16); 501 } 502 503 if (tsize != s->tsize || (!tab && tsize > 0)) { 504 NL_SET_ERR_MSG(extack, "Invalid size of size table"); 505 return ERR_PTR(-EINVAL); 506 } 507 508 list_for_each_entry(stab, &qdisc_stab_list, list) { 509 if (memcmp(&stab->szopts, s, sizeof(*s))) 510 continue; 511 if (tsize > 0 && memcmp(stab->data, tab, tsize * sizeof(u16))) 512 continue; 513 stab->refcnt++; 514 return stab; 515 } 516 517 stab = kmalloc(sizeof(*stab) + tsize * sizeof(u16), GFP_KERNEL); 518 if (!stab) 519 return ERR_PTR(-ENOMEM); 520 521 stab->refcnt = 1; 522 stab->szopts = *s; 523 if (tsize > 0) 524 memcpy(stab->data, tab, tsize * sizeof(u16)); 525 526 list_add_tail(&stab->list, &qdisc_stab_list); 527 528 return stab; 529 } 530 531 static void stab_kfree_rcu(struct rcu_head *head) 532 { 533 kfree(container_of(head, struct qdisc_size_table, rcu)); 534 } 535 536 void qdisc_put_stab(struct qdisc_size_table *tab) 537 { 538 if (!tab) 539 return; 540 541 if (--tab->refcnt == 0) { 542 list_del(&tab->list); 543 call_rcu_bh(&tab->rcu, stab_kfree_rcu); 544 } 545 } 546 EXPORT_SYMBOL(qdisc_put_stab); 547 548 static int qdisc_dump_stab(struct sk_buff *skb, struct qdisc_size_table *stab) 549 { 550 struct nlattr *nest; 551 552 nest = nla_nest_start(skb, TCA_STAB); 553 if (nest == NULL) 554 goto nla_put_failure; 555 if (nla_put(skb, TCA_STAB_BASE, sizeof(stab->szopts), &stab->szopts)) 556 goto nla_put_failure; 557 nla_nest_end(skb, nest); 558 559 return skb->len; 560 561 nla_put_failure: 562 return -1; 563 } 564 565 void __qdisc_calculate_pkt_len(struct sk_buff *skb, 566 const struct qdisc_size_table *stab) 567 { 568 int pkt_len, slot; 569 570 pkt_len = skb->len + stab->szopts.overhead; 571 if (unlikely(!stab->szopts.tsize)) 572 goto out; 573 574 slot = pkt_len + stab->szopts.cell_align; 575 if (unlikely(slot < 0)) 576 slot = 0; 577 578 slot >>= stab->szopts.cell_log; 579 if (likely(slot < stab->szopts.tsize)) 580 pkt_len = stab->data[slot]; 581 else 582 pkt_len = stab->data[stab->szopts.tsize - 1] * 583 (slot / stab->szopts.tsize) + 584 stab->data[slot % stab->szopts.tsize]; 585 586 pkt_len <<= stab->szopts.size_log; 587 out: 588 if (unlikely(pkt_len < 1)) 589 pkt_len = 1; 590 qdisc_skb_cb(skb)->pkt_len = pkt_len; 591 } 592 EXPORT_SYMBOL(__qdisc_calculate_pkt_len); 593 594 void qdisc_warn_nonwc(const char *txt, struct Qdisc *qdisc) 595 { 596 if (!(qdisc->flags & TCQ_F_WARN_NONWC)) { 597 pr_warn("%s: %s qdisc %X: is non-work-conserving?\n", 598 txt, qdisc->ops->id, qdisc->handle >> 16); 599 qdisc->flags |= TCQ_F_WARN_NONWC; 600 } 601 } 602 EXPORT_SYMBOL(qdisc_warn_nonwc); 603 604 static enum hrtimer_restart qdisc_watchdog(struct hrtimer *timer) 605 { 606 struct qdisc_watchdog *wd = container_of(timer, struct qdisc_watchdog, 607 timer); 608 609 rcu_read_lock(); 610 __netif_schedule(qdisc_root(wd->qdisc)); 611 rcu_read_unlock(); 612 613 return HRTIMER_NORESTART; 614 } 615 616 void qdisc_watchdog_init_clockid(struct qdisc_watchdog *wd, struct Qdisc *qdisc, 617 clockid_t clockid) 618 { 619 hrtimer_init(&wd->timer, clockid, HRTIMER_MODE_ABS_PINNED); 620 wd->timer.function = qdisc_watchdog; 621 wd->qdisc = qdisc; 622 } 623 EXPORT_SYMBOL(qdisc_watchdog_init_clockid); 624 625 void qdisc_watchdog_init(struct qdisc_watchdog *wd, struct Qdisc *qdisc) 626 { 627 qdisc_watchdog_init_clockid(wd, qdisc, CLOCK_MONOTONIC); 628 } 629 EXPORT_SYMBOL(qdisc_watchdog_init); 630 631 void qdisc_watchdog_schedule_ns(struct qdisc_watchdog *wd, u64 expires) 632 { 633 if (test_bit(__QDISC_STATE_DEACTIVATED, 634 &qdisc_root_sleeping(wd->qdisc)->state)) 635 return; 636 637 if (wd->last_expires == expires) 638 return; 639 640 wd->last_expires = expires; 641 hrtimer_start(&wd->timer, 642 ns_to_ktime(expires), 643 HRTIMER_MODE_ABS_PINNED); 644 } 645 EXPORT_SYMBOL(qdisc_watchdog_schedule_ns); 646 647 void qdisc_watchdog_cancel(struct qdisc_watchdog *wd) 648 { 649 hrtimer_cancel(&wd->timer); 650 } 651 EXPORT_SYMBOL(qdisc_watchdog_cancel); 652 653 static struct hlist_head *qdisc_class_hash_alloc(unsigned int n) 654 { 655 struct hlist_head *h; 656 unsigned int i; 657 658 h = kvmalloc_array(n, sizeof(struct hlist_head), GFP_KERNEL); 659 660 if (h != NULL) { 661 for (i = 0; i < n; i++) 662 INIT_HLIST_HEAD(&h[i]); 663 } 664 return h; 665 } 666 667 void qdisc_class_hash_grow(struct Qdisc *sch, struct Qdisc_class_hash *clhash) 668 { 669 struct Qdisc_class_common *cl; 670 struct hlist_node *next; 671 struct hlist_head *nhash, *ohash; 672 unsigned int nsize, nmask, osize; 673 unsigned int i, h; 674 675 /* Rehash when load factor exceeds 0.75 */ 676 if (clhash->hashelems * 4 <= clhash->hashsize * 3) 677 return; 678 nsize = clhash->hashsize * 2; 679 nmask = nsize - 1; 680 nhash = qdisc_class_hash_alloc(nsize); 681 if (nhash == NULL) 682 return; 683 684 ohash = clhash->hash; 685 osize = clhash->hashsize; 686 687 sch_tree_lock(sch); 688 for (i = 0; i < osize; i++) { 689 hlist_for_each_entry_safe(cl, next, &ohash[i], hnode) { 690 h = qdisc_class_hash(cl->classid, nmask); 691 hlist_add_head(&cl->hnode, &nhash[h]); 692 } 693 } 694 clhash->hash = nhash; 695 clhash->hashsize = nsize; 696 clhash->hashmask = nmask; 697 sch_tree_unlock(sch); 698 699 kvfree(ohash); 700 } 701 EXPORT_SYMBOL(qdisc_class_hash_grow); 702 703 int qdisc_class_hash_init(struct Qdisc_class_hash *clhash) 704 { 705 unsigned int size = 4; 706 707 clhash->hash = qdisc_class_hash_alloc(size); 708 if (!clhash->hash) 709 return -ENOMEM; 710 clhash->hashsize = size; 711 clhash->hashmask = size - 1; 712 clhash->hashelems = 0; 713 return 0; 714 } 715 EXPORT_SYMBOL(qdisc_class_hash_init); 716 717 void qdisc_class_hash_destroy(struct Qdisc_class_hash *clhash) 718 { 719 kvfree(clhash->hash); 720 } 721 EXPORT_SYMBOL(qdisc_class_hash_destroy); 722 723 void qdisc_class_hash_insert(struct Qdisc_class_hash *clhash, 724 struct Qdisc_class_common *cl) 725 { 726 unsigned int h; 727 728 INIT_HLIST_NODE(&cl->hnode); 729 h = qdisc_class_hash(cl->classid, clhash->hashmask); 730 hlist_add_head(&cl->hnode, &clhash->hash[h]); 731 clhash->hashelems++; 732 } 733 EXPORT_SYMBOL(qdisc_class_hash_insert); 734 735 void qdisc_class_hash_remove(struct Qdisc_class_hash *clhash, 736 struct Qdisc_class_common *cl) 737 { 738 hlist_del(&cl->hnode); 739 clhash->hashelems--; 740 } 741 EXPORT_SYMBOL(qdisc_class_hash_remove); 742 743 /* Allocate an unique handle from space managed by kernel 744 * Possible range is [8000-FFFF]:0000 (0x8000 values) 745 */ 746 static u32 qdisc_alloc_handle(struct net_device *dev) 747 { 748 int i = 0x8000; 749 static u32 autohandle = TC_H_MAKE(0x80000000U, 0); 750 751 do { 752 autohandle += TC_H_MAKE(0x10000U, 0); 753 if (autohandle == TC_H_MAKE(TC_H_ROOT, 0)) 754 autohandle = TC_H_MAKE(0x80000000U, 0); 755 if (!qdisc_lookup(dev, autohandle)) 756 return autohandle; 757 cond_resched(); 758 } while (--i > 0); 759 760 return 0; 761 } 762 763 void qdisc_tree_reduce_backlog(struct Qdisc *sch, unsigned int n, 764 unsigned int len) 765 { 766 bool qdisc_is_offloaded = sch->flags & TCQ_F_OFFLOADED; 767 const struct Qdisc_class_ops *cops; 768 unsigned long cl; 769 u32 parentid; 770 bool notify; 771 int drops; 772 773 if (n == 0 && len == 0) 774 return; 775 drops = max_t(int, n, 0); 776 rcu_read_lock(); 777 while ((parentid = sch->parent)) { 778 if (TC_H_MAJ(parentid) == TC_H_MAJ(TC_H_INGRESS)) 779 break; 780 781 if (sch->flags & TCQ_F_NOPARENT) 782 break; 783 /* Notify parent qdisc only if child qdisc becomes empty. 784 * 785 * If child was empty even before update then backlog 786 * counter is screwed and we skip notification because 787 * parent class is already passive. 788 * 789 * If the original child was offloaded then it is allowed 790 * to be seem as empty, so the parent is notified anyway. 791 */ 792 notify = !sch->q.qlen && !WARN_ON_ONCE(!n && 793 !qdisc_is_offloaded); 794 /* TODO: perform the search on a per txq basis */ 795 sch = qdisc_lookup(qdisc_dev(sch), TC_H_MAJ(parentid)); 796 if (sch == NULL) { 797 WARN_ON_ONCE(parentid != TC_H_ROOT); 798 break; 799 } 800 cops = sch->ops->cl_ops; 801 if (notify && cops->qlen_notify) { 802 cl = cops->find(sch, parentid); 803 cops->qlen_notify(sch, cl); 804 } 805 sch->q.qlen -= n; 806 sch->qstats.backlog -= len; 807 __qdisc_qstats_drop(sch, drops); 808 } 809 rcu_read_unlock(); 810 } 811 EXPORT_SYMBOL(qdisc_tree_reduce_backlog); 812 813 int qdisc_offload_dump_helper(struct Qdisc *sch, enum tc_setup_type type, 814 void *type_data) 815 { 816 struct net_device *dev = qdisc_dev(sch); 817 int err; 818 819 sch->flags &= ~TCQ_F_OFFLOADED; 820 if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc) 821 return 0; 822 823 err = dev->netdev_ops->ndo_setup_tc(dev, type, type_data); 824 if (err == -EOPNOTSUPP) 825 return 0; 826 827 if (!err) 828 sch->flags |= TCQ_F_OFFLOADED; 829 830 return err; 831 } 832 EXPORT_SYMBOL(qdisc_offload_dump_helper); 833 834 void qdisc_offload_graft_helper(struct net_device *dev, struct Qdisc *sch, 835 struct Qdisc *new, struct Qdisc *old, 836 enum tc_setup_type type, void *type_data, 837 struct netlink_ext_ack *extack) 838 { 839 bool any_qdisc_is_offloaded; 840 int err; 841 842 if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc) 843 return; 844 845 err = dev->netdev_ops->ndo_setup_tc(dev, type, type_data); 846 847 /* Don't report error if the graft is part of destroy operation. */ 848 if (!err || !new || new == &noop_qdisc) 849 return; 850 851 /* Don't report error if the parent, the old child and the new 852 * one are not offloaded. 853 */ 854 any_qdisc_is_offloaded = new->flags & TCQ_F_OFFLOADED; 855 any_qdisc_is_offloaded |= sch && sch->flags & TCQ_F_OFFLOADED; 856 any_qdisc_is_offloaded |= old && old->flags & TCQ_F_OFFLOADED; 857 858 if (any_qdisc_is_offloaded) 859 NL_SET_ERR_MSG(extack, "Offloading graft operation failed."); 860 } 861 EXPORT_SYMBOL(qdisc_offload_graft_helper); 862 863 static void qdisc_offload_graft_root(struct net_device *dev, 864 struct Qdisc *new, struct Qdisc *old, 865 struct netlink_ext_ack *extack) 866 { 867 struct tc_root_qopt_offload graft_offload = { 868 .command = TC_ROOT_GRAFT, 869 .handle = new ? new->handle : 0, 870 .ingress = (new && new->flags & TCQ_F_INGRESS) || 871 (old && old->flags & TCQ_F_INGRESS), 872 }; 873 874 qdisc_offload_graft_helper(dev, NULL, new, old, 875 TC_SETUP_ROOT_QDISC, &graft_offload, extack); 876 } 877 878 static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid, 879 u32 portid, u32 seq, u16 flags, int event) 880 { 881 struct gnet_stats_basic_cpu __percpu *cpu_bstats = NULL; 882 struct gnet_stats_queue __percpu *cpu_qstats = NULL; 883 struct tcmsg *tcm; 884 struct nlmsghdr *nlh; 885 unsigned char *b = skb_tail_pointer(skb); 886 struct gnet_dump d; 887 struct qdisc_size_table *stab; 888 u32 block_index; 889 __u32 qlen; 890 891 cond_resched(); 892 nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags); 893 if (!nlh) 894 goto out_nlmsg_trim; 895 tcm = nlmsg_data(nlh); 896 tcm->tcm_family = AF_UNSPEC; 897 tcm->tcm__pad1 = 0; 898 tcm->tcm__pad2 = 0; 899 tcm->tcm_ifindex = qdisc_dev(q)->ifindex; 900 tcm->tcm_parent = clid; 901 tcm->tcm_handle = q->handle; 902 tcm->tcm_info = refcount_read(&q->refcnt); 903 if (nla_put_string(skb, TCA_KIND, q->ops->id)) 904 goto nla_put_failure; 905 if (q->ops->ingress_block_get) { 906 block_index = q->ops->ingress_block_get(q); 907 if (block_index && 908 nla_put_u32(skb, TCA_INGRESS_BLOCK, block_index)) 909 goto nla_put_failure; 910 } 911 if (q->ops->egress_block_get) { 912 block_index = q->ops->egress_block_get(q); 913 if (block_index && 914 nla_put_u32(skb, TCA_EGRESS_BLOCK, block_index)) 915 goto nla_put_failure; 916 } 917 if (q->ops->dump && q->ops->dump(q, skb) < 0) 918 goto nla_put_failure; 919 if (nla_put_u8(skb, TCA_HW_OFFLOAD, !!(q->flags & TCQ_F_OFFLOADED))) 920 goto nla_put_failure; 921 qlen = qdisc_qlen_sum(q); 922 923 stab = rtnl_dereference(q->stab); 924 if (stab && qdisc_dump_stab(skb, stab) < 0) 925 goto nla_put_failure; 926 927 if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, TCA_XSTATS, 928 NULL, &d, TCA_PAD) < 0) 929 goto nla_put_failure; 930 931 if (q->ops->dump_stats && q->ops->dump_stats(q, &d) < 0) 932 goto nla_put_failure; 933 934 if (qdisc_is_percpu_stats(q)) { 935 cpu_bstats = q->cpu_bstats; 936 cpu_qstats = q->cpu_qstats; 937 } 938 939 if (gnet_stats_copy_basic(qdisc_root_sleeping_running(q), 940 &d, cpu_bstats, &q->bstats) < 0 || 941 gnet_stats_copy_rate_est(&d, &q->rate_est) < 0 || 942 gnet_stats_copy_queue(&d, cpu_qstats, &q->qstats, qlen) < 0) 943 goto nla_put_failure; 944 945 if (gnet_stats_finish_copy(&d) < 0) 946 goto nla_put_failure; 947 948 nlh->nlmsg_len = skb_tail_pointer(skb) - b; 949 return skb->len; 950 951 out_nlmsg_trim: 952 nla_put_failure: 953 nlmsg_trim(skb, b); 954 return -1; 955 } 956 957 static bool tc_qdisc_dump_ignore(struct Qdisc *q, bool dump_invisible) 958 { 959 if (q->flags & TCQ_F_BUILTIN) 960 return true; 961 if ((q->flags & TCQ_F_INVISIBLE) && !dump_invisible) 962 return true; 963 964 return false; 965 } 966 967 static int qdisc_notify(struct net *net, struct sk_buff *oskb, 968 struct nlmsghdr *n, u32 clid, 969 struct Qdisc *old, struct Qdisc *new) 970 { 971 struct sk_buff *skb; 972 u32 portid = oskb ? NETLINK_CB(oskb).portid : 0; 973 974 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); 975 if (!skb) 976 return -ENOBUFS; 977 978 if (old && !tc_qdisc_dump_ignore(old, false)) { 979 if (tc_fill_qdisc(skb, old, clid, portid, n->nlmsg_seq, 980 0, RTM_DELQDISC) < 0) 981 goto err_out; 982 } 983 if (new && !tc_qdisc_dump_ignore(new, false)) { 984 if (tc_fill_qdisc(skb, new, clid, portid, n->nlmsg_seq, 985 old ? NLM_F_REPLACE : 0, RTM_NEWQDISC) < 0) 986 goto err_out; 987 } 988 989 if (skb->len) 990 return rtnetlink_send(skb, net, portid, RTNLGRP_TC, 991 n->nlmsg_flags & NLM_F_ECHO); 992 993 err_out: 994 kfree_skb(skb); 995 return -EINVAL; 996 } 997 998 static void notify_and_destroy(struct net *net, struct sk_buff *skb, 999 struct nlmsghdr *n, u32 clid, 1000 struct Qdisc *old, struct Qdisc *new) 1001 { 1002 if (new || old) 1003 qdisc_notify(net, skb, n, clid, old, new); 1004 1005 if (old) 1006 qdisc_put(old); 1007 } 1008 1009 /* Graft qdisc "new" to class "classid" of qdisc "parent" or 1010 * to device "dev". 1011 * 1012 * When appropriate send a netlink notification using 'skb' 1013 * and "n". 1014 * 1015 * On success, destroy old qdisc. 1016 */ 1017 1018 static int qdisc_graft(struct net_device *dev, struct Qdisc *parent, 1019 struct sk_buff *skb, struct nlmsghdr *n, u32 classid, 1020 struct Qdisc *new, struct Qdisc *old, 1021 struct netlink_ext_ack *extack) 1022 { 1023 struct Qdisc *q = old; 1024 struct net *net = dev_net(dev); 1025 1026 if (parent == NULL) { 1027 unsigned int i, num_q, ingress; 1028 1029 ingress = 0; 1030 num_q = dev->num_tx_queues; 1031 if ((q && q->flags & TCQ_F_INGRESS) || 1032 (new && new->flags & TCQ_F_INGRESS)) { 1033 num_q = 1; 1034 ingress = 1; 1035 if (!dev_ingress_queue(dev)) { 1036 NL_SET_ERR_MSG(extack, "Device does not have an ingress queue"); 1037 return -ENOENT; 1038 } 1039 } 1040 1041 if (dev->flags & IFF_UP) 1042 dev_deactivate(dev); 1043 1044 qdisc_offload_graft_root(dev, new, old, extack); 1045 1046 if (new && new->ops->attach) 1047 goto skip; 1048 1049 for (i = 0; i < num_q; i++) { 1050 struct netdev_queue *dev_queue = dev_ingress_queue(dev); 1051 1052 if (!ingress) 1053 dev_queue = netdev_get_tx_queue(dev, i); 1054 1055 old = dev_graft_qdisc(dev_queue, new); 1056 if (new && i > 0) 1057 qdisc_refcount_inc(new); 1058 1059 if (!ingress) 1060 qdisc_put(old); 1061 } 1062 1063 skip: 1064 if (!ingress) { 1065 notify_and_destroy(net, skb, n, classid, 1066 dev->qdisc, new); 1067 if (new && !new->ops->attach) 1068 qdisc_refcount_inc(new); 1069 dev->qdisc = new ? : &noop_qdisc; 1070 1071 if (new && new->ops->attach) 1072 new->ops->attach(new); 1073 } else { 1074 notify_and_destroy(net, skb, n, classid, old, new); 1075 } 1076 1077 if (dev->flags & IFF_UP) 1078 dev_activate(dev); 1079 } else { 1080 const struct Qdisc_class_ops *cops = parent->ops->cl_ops; 1081 unsigned long cl; 1082 int err; 1083 1084 /* Only support running class lockless if parent is lockless */ 1085 if (new && (new->flags & TCQ_F_NOLOCK) && 1086 parent && !(parent->flags & TCQ_F_NOLOCK)) 1087 new->flags &= ~TCQ_F_NOLOCK; 1088 1089 if (!cops || !cops->graft) 1090 return -EOPNOTSUPP; 1091 1092 cl = cops->find(parent, classid); 1093 if (!cl) { 1094 NL_SET_ERR_MSG(extack, "Specified class not found"); 1095 return -ENOENT; 1096 } 1097 1098 err = cops->graft(parent, cl, new, &old, extack); 1099 if (err) 1100 return err; 1101 notify_and_destroy(net, skb, n, classid, old, new); 1102 } 1103 return 0; 1104 } 1105 1106 static int qdisc_block_indexes_set(struct Qdisc *sch, struct nlattr **tca, 1107 struct netlink_ext_ack *extack) 1108 { 1109 u32 block_index; 1110 1111 if (tca[TCA_INGRESS_BLOCK]) { 1112 block_index = nla_get_u32(tca[TCA_INGRESS_BLOCK]); 1113 1114 if (!block_index) { 1115 NL_SET_ERR_MSG(extack, "Ingress block index cannot be 0"); 1116 return -EINVAL; 1117 } 1118 if (!sch->ops->ingress_block_set) { 1119 NL_SET_ERR_MSG(extack, "Ingress block sharing is not supported"); 1120 return -EOPNOTSUPP; 1121 } 1122 sch->ops->ingress_block_set(sch, block_index); 1123 } 1124 if (tca[TCA_EGRESS_BLOCK]) { 1125 block_index = nla_get_u32(tca[TCA_EGRESS_BLOCK]); 1126 1127 if (!block_index) { 1128 NL_SET_ERR_MSG(extack, "Egress block index cannot be 0"); 1129 return -EINVAL; 1130 } 1131 if (!sch->ops->egress_block_set) { 1132 NL_SET_ERR_MSG(extack, "Egress block sharing is not supported"); 1133 return -EOPNOTSUPP; 1134 } 1135 sch->ops->egress_block_set(sch, block_index); 1136 } 1137 return 0; 1138 } 1139 1140 /* 1141 Allocate and initialize new qdisc. 1142 1143 Parameters are passed via opt. 1144 */ 1145 1146 static struct Qdisc *qdisc_create(struct net_device *dev, 1147 struct netdev_queue *dev_queue, 1148 struct Qdisc *p, u32 parent, u32 handle, 1149 struct nlattr **tca, int *errp, 1150 struct netlink_ext_ack *extack) 1151 { 1152 int err; 1153 struct nlattr *kind = tca[TCA_KIND]; 1154 struct Qdisc *sch; 1155 struct Qdisc_ops *ops; 1156 struct qdisc_size_table *stab; 1157 1158 ops = qdisc_lookup_ops(kind); 1159 #ifdef CONFIG_MODULES 1160 if (ops == NULL && kind != NULL) { 1161 char name[IFNAMSIZ]; 1162 if (nla_strlcpy(name, kind, IFNAMSIZ) < IFNAMSIZ) { 1163 /* We dropped the RTNL semaphore in order to 1164 * perform the module load. So, even if we 1165 * succeeded in loading the module we have to 1166 * tell the caller to replay the request. We 1167 * indicate this using -EAGAIN. 1168 * We replay the request because the device may 1169 * go away in the mean time. 1170 */ 1171 rtnl_unlock(); 1172 request_module("sch_%s", name); 1173 rtnl_lock(); 1174 ops = qdisc_lookup_ops(kind); 1175 if (ops != NULL) { 1176 /* We will try again qdisc_lookup_ops, 1177 * so don't keep a reference. 1178 */ 1179 module_put(ops->owner); 1180 err = -EAGAIN; 1181 goto err_out; 1182 } 1183 } 1184 } 1185 #endif 1186 1187 err = -ENOENT; 1188 if (!ops) { 1189 NL_SET_ERR_MSG(extack, "Specified qdisc not found"); 1190 goto err_out; 1191 } 1192 1193 sch = qdisc_alloc(dev_queue, ops, extack); 1194 if (IS_ERR(sch)) { 1195 err = PTR_ERR(sch); 1196 goto err_out2; 1197 } 1198 1199 sch->parent = parent; 1200 1201 if (handle == TC_H_INGRESS) { 1202 sch->flags |= TCQ_F_INGRESS; 1203 handle = TC_H_MAKE(TC_H_INGRESS, 0); 1204 } else { 1205 if (handle == 0) { 1206 handle = qdisc_alloc_handle(dev); 1207 err = -ENOMEM; 1208 if (handle == 0) 1209 goto err_out3; 1210 } 1211 if (!netif_is_multiqueue(dev)) 1212 sch->flags |= TCQ_F_ONETXQUEUE; 1213 } 1214 1215 sch->handle = handle; 1216 1217 /* This exist to keep backward compatible with a userspace 1218 * loophole, what allowed userspace to get IFF_NO_QUEUE 1219 * facility on older kernels by setting tx_queue_len=0 (prior 1220 * to qdisc init), and then forgot to reinit tx_queue_len 1221 * before again attaching a qdisc. 1222 */ 1223 if ((dev->priv_flags & IFF_NO_QUEUE) && (dev->tx_queue_len == 0)) { 1224 dev->tx_queue_len = DEFAULT_TX_QUEUE_LEN; 1225 netdev_info(dev, "Caught tx_queue_len zero misconfig\n"); 1226 } 1227 1228 err = qdisc_block_indexes_set(sch, tca, extack); 1229 if (err) 1230 goto err_out3; 1231 1232 if (ops->init) { 1233 err = ops->init(sch, tca[TCA_OPTIONS], extack); 1234 if (err != 0) 1235 goto err_out5; 1236 } 1237 1238 if (tca[TCA_STAB]) { 1239 stab = qdisc_get_stab(tca[TCA_STAB], extack); 1240 if (IS_ERR(stab)) { 1241 err = PTR_ERR(stab); 1242 goto err_out4; 1243 } 1244 rcu_assign_pointer(sch->stab, stab); 1245 } 1246 if (tca[TCA_RATE]) { 1247 seqcount_t *running; 1248 1249 err = -EOPNOTSUPP; 1250 if (sch->flags & TCQ_F_MQROOT) { 1251 NL_SET_ERR_MSG(extack, "Cannot attach rate estimator to a multi-queue root qdisc"); 1252 goto err_out4; 1253 } 1254 1255 if (sch->parent != TC_H_ROOT && 1256 !(sch->flags & TCQ_F_INGRESS) && 1257 (!p || !(p->flags & TCQ_F_MQROOT))) 1258 running = qdisc_root_sleeping_running(sch); 1259 else 1260 running = &sch->running; 1261 1262 err = gen_new_estimator(&sch->bstats, 1263 sch->cpu_bstats, 1264 &sch->rate_est, 1265 NULL, 1266 running, 1267 tca[TCA_RATE]); 1268 if (err) { 1269 NL_SET_ERR_MSG(extack, "Failed to generate new estimator"); 1270 goto err_out4; 1271 } 1272 } 1273 1274 qdisc_hash_add(sch, false); 1275 1276 return sch; 1277 1278 err_out5: 1279 /* ops->init() failed, we call ->destroy() like qdisc_create_dflt() */ 1280 if (ops->destroy) 1281 ops->destroy(sch); 1282 err_out3: 1283 dev_put(dev); 1284 qdisc_free(sch); 1285 err_out2: 1286 module_put(ops->owner); 1287 err_out: 1288 *errp = err; 1289 return NULL; 1290 1291 err_out4: 1292 /* 1293 * Any broken qdiscs that would require a ops->reset() here? 1294 * The qdisc was never in action so it shouldn't be necessary. 1295 */ 1296 qdisc_put_stab(rtnl_dereference(sch->stab)); 1297 if (ops->destroy) 1298 ops->destroy(sch); 1299 goto err_out3; 1300 } 1301 1302 static int qdisc_change(struct Qdisc *sch, struct nlattr **tca, 1303 struct netlink_ext_ack *extack) 1304 { 1305 struct qdisc_size_table *ostab, *stab = NULL; 1306 int err = 0; 1307 1308 if (tca[TCA_OPTIONS]) { 1309 if (!sch->ops->change) { 1310 NL_SET_ERR_MSG(extack, "Change operation not supported by specified qdisc"); 1311 return -EINVAL; 1312 } 1313 if (tca[TCA_INGRESS_BLOCK] || tca[TCA_EGRESS_BLOCK]) { 1314 NL_SET_ERR_MSG(extack, "Change of blocks is not supported"); 1315 return -EOPNOTSUPP; 1316 } 1317 err = sch->ops->change(sch, tca[TCA_OPTIONS], extack); 1318 if (err) 1319 return err; 1320 } 1321 1322 if (tca[TCA_STAB]) { 1323 stab = qdisc_get_stab(tca[TCA_STAB], extack); 1324 if (IS_ERR(stab)) 1325 return PTR_ERR(stab); 1326 } 1327 1328 ostab = rtnl_dereference(sch->stab); 1329 rcu_assign_pointer(sch->stab, stab); 1330 qdisc_put_stab(ostab); 1331 1332 if (tca[TCA_RATE]) { 1333 /* NB: ignores errors from replace_estimator 1334 because change can't be undone. */ 1335 if (sch->flags & TCQ_F_MQROOT) 1336 goto out; 1337 gen_replace_estimator(&sch->bstats, 1338 sch->cpu_bstats, 1339 &sch->rate_est, 1340 NULL, 1341 qdisc_root_sleeping_running(sch), 1342 tca[TCA_RATE]); 1343 } 1344 out: 1345 return 0; 1346 } 1347 1348 struct check_loop_arg { 1349 struct qdisc_walker w; 1350 struct Qdisc *p; 1351 int depth; 1352 }; 1353 1354 static int check_loop_fn(struct Qdisc *q, unsigned long cl, 1355 struct qdisc_walker *w); 1356 1357 static int check_loop(struct Qdisc *q, struct Qdisc *p, int depth) 1358 { 1359 struct check_loop_arg arg; 1360 1361 if (q->ops->cl_ops == NULL) 1362 return 0; 1363 1364 arg.w.stop = arg.w.skip = arg.w.count = 0; 1365 arg.w.fn = check_loop_fn; 1366 arg.depth = depth; 1367 arg.p = p; 1368 q->ops->cl_ops->walk(q, &arg.w); 1369 return arg.w.stop ? -ELOOP : 0; 1370 } 1371 1372 static int 1373 check_loop_fn(struct Qdisc *q, unsigned long cl, struct qdisc_walker *w) 1374 { 1375 struct Qdisc *leaf; 1376 const struct Qdisc_class_ops *cops = q->ops->cl_ops; 1377 struct check_loop_arg *arg = (struct check_loop_arg *)w; 1378 1379 leaf = cops->leaf(q, cl); 1380 if (leaf) { 1381 if (leaf == arg->p || arg->depth > 7) 1382 return -ELOOP; 1383 return check_loop(leaf, arg->p, arg->depth + 1); 1384 } 1385 return 0; 1386 } 1387 1388 const struct nla_policy rtm_tca_policy[TCA_MAX + 1] = { 1389 [TCA_KIND] = { .type = NLA_STRING }, 1390 [TCA_RATE] = { .type = NLA_BINARY, 1391 .len = sizeof(struct tc_estimator) }, 1392 [TCA_STAB] = { .type = NLA_NESTED }, 1393 [TCA_DUMP_INVISIBLE] = { .type = NLA_FLAG }, 1394 [TCA_CHAIN] = { .type = NLA_U32 }, 1395 [TCA_INGRESS_BLOCK] = { .type = NLA_U32 }, 1396 [TCA_EGRESS_BLOCK] = { .type = NLA_U32 }, 1397 }; 1398 1399 /* 1400 * Delete/get qdisc. 1401 */ 1402 1403 static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, 1404 struct netlink_ext_ack *extack) 1405 { 1406 struct net *net = sock_net(skb->sk); 1407 struct tcmsg *tcm = nlmsg_data(n); 1408 struct nlattr *tca[TCA_MAX + 1]; 1409 struct net_device *dev; 1410 u32 clid; 1411 struct Qdisc *q = NULL; 1412 struct Qdisc *p = NULL; 1413 int err; 1414 1415 if ((n->nlmsg_type != RTM_GETQDISC) && 1416 !netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN)) 1417 return -EPERM; 1418 1419 err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, rtm_tca_policy, 1420 extack); 1421 if (err < 0) 1422 return err; 1423 1424 dev = __dev_get_by_index(net, tcm->tcm_ifindex); 1425 if (!dev) 1426 return -ENODEV; 1427 1428 clid = tcm->tcm_parent; 1429 if (clid) { 1430 if (clid != TC_H_ROOT) { 1431 if (TC_H_MAJ(clid) != TC_H_MAJ(TC_H_INGRESS)) { 1432 p = qdisc_lookup(dev, TC_H_MAJ(clid)); 1433 if (!p) { 1434 NL_SET_ERR_MSG(extack, "Failed to find qdisc with specified classid"); 1435 return -ENOENT; 1436 } 1437 q = qdisc_leaf(p, clid); 1438 } else if (dev_ingress_queue(dev)) { 1439 q = dev_ingress_queue(dev)->qdisc_sleeping; 1440 } 1441 } else { 1442 q = dev->qdisc; 1443 } 1444 if (!q) { 1445 NL_SET_ERR_MSG(extack, "Cannot find specified qdisc on specified device"); 1446 return -ENOENT; 1447 } 1448 1449 if (tcm->tcm_handle && q->handle != tcm->tcm_handle) { 1450 NL_SET_ERR_MSG(extack, "Invalid handle"); 1451 return -EINVAL; 1452 } 1453 } else { 1454 q = qdisc_lookup(dev, tcm->tcm_handle); 1455 if (!q) { 1456 NL_SET_ERR_MSG(extack, "Failed to find qdisc with specified handle"); 1457 return -ENOENT; 1458 } 1459 } 1460 1461 if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id)) { 1462 NL_SET_ERR_MSG(extack, "Invalid qdisc name"); 1463 return -EINVAL; 1464 } 1465 1466 if (n->nlmsg_type == RTM_DELQDISC) { 1467 if (!clid) { 1468 NL_SET_ERR_MSG(extack, "Classid cannot be zero"); 1469 return -EINVAL; 1470 } 1471 if (q->handle == 0) { 1472 NL_SET_ERR_MSG(extack, "Cannot delete qdisc with handle of zero"); 1473 return -ENOENT; 1474 } 1475 err = qdisc_graft(dev, p, skb, n, clid, NULL, q, extack); 1476 if (err != 0) 1477 return err; 1478 } else { 1479 qdisc_notify(net, skb, n, clid, NULL, q); 1480 } 1481 return 0; 1482 } 1483 1484 /* 1485 * Create/change qdisc. 1486 */ 1487 1488 static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n, 1489 struct netlink_ext_ack *extack) 1490 { 1491 struct net *net = sock_net(skb->sk); 1492 struct tcmsg *tcm; 1493 struct nlattr *tca[TCA_MAX + 1]; 1494 struct net_device *dev; 1495 u32 clid; 1496 struct Qdisc *q, *p; 1497 int err; 1498 1499 if (!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN)) 1500 return -EPERM; 1501 1502 replay: 1503 /* Reinit, just in case something touches this. */ 1504 err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, rtm_tca_policy, 1505 extack); 1506 if (err < 0) 1507 return err; 1508 1509 tcm = nlmsg_data(n); 1510 clid = tcm->tcm_parent; 1511 q = p = NULL; 1512 1513 dev = __dev_get_by_index(net, tcm->tcm_ifindex); 1514 if (!dev) 1515 return -ENODEV; 1516 1517 1518 if (clid) { 1519 if (clid != TC_H_ROOT) { 1520 if (clid != TC_H_INGRESS) { 1521 p = qdisc_lookup(dev, TC_H_MAJ(clid)); 1522 if (!p) { 1523 NL_SET_ERR_MSG(extack, "Failed to find specified qdisc"); 1524 return -ENOENT; 1525 } 1526 q = qdisc_leaf(p, clid); 1527 } else if (dev_ingress_queue_create(dev)) { 1528 q = dev_ingress_queue(dev)->qdisc_sleeping; 1529 } 1530 } else { 1531 q = dev->qdisc; 1532 } 1533 1534 /* It may be default qdisc, ignore it */ 1535 if (q && q->handle == 0) 1536 q = NULL; 1537 1538 if (!q || !tcm->tcm_handle || q->handle != tcm->tcm_handle) { 1539 if (tcm->tcm_handle) { 1540 if (q && !(n->nlmsg_flags & NLM_F_REPLACE)) { 1541 NL_SET_ERR_MSG(extack, "NLM_F_REPLACE needed to override"); 1542 return -EEXIST; 1543 } 1544 if (TC_H_MIN(tcm->tcm_handle)) { 1545 NL_SET_ERR_MSG(extack, "Invalid minor handle"); 1546 return -EINVAL; 1547 } 1548 q = qdisc_lookup(dev, tcm->tcm_handle); 1549 if (!q) 1550 goto create_n_graft; 1551 if (n->nlmsg_flags & NLM_F_EXCL) { 1552 NL_SET_ERR_MSG(extack, "Exclusivity flag on, cannot override"); 1553 return -EEXIST; 1554 } 1555 if (tca[TCA_KIND] && 1556 nla_strcmp(tca[TCA_KIND], q->ops->id)) { 1557 NL_SET_ERR_MSG(extack, "Invalid qdisc name"); 1558 return -EINVAL; 1559 } 1560 if (q == p || 1561 (p && check_loop(q, p, 0))) { 1562 NL_SET_ERR_MSG(extack, "Qdisc parent/child loop detected"); 1563 return -ELOOP; 1564 } 1565 qdisc_refcount_inc(q); 1566 goto graft; 1567 } else { 1568 if (!q) 1569 goto create_n_graft; 1570 1571 /* This magic test requires explanation. 1572 * 1573 * We know, that some child q is already 1574 * attached to this parent and have choice: 1575 * either to change it or to create/graft new one. 1576 * 1577 * 1. We are allowed to create/graft only 1578 * if CREATE and REPLACE flags are set. 1579 * 1580 * 2. If EXCL is set, requestor wanted to say, 1581 * that qdisc tcm_handle is not expected 1582 * to exist, so that we choose create/graft too. 1583 * 1584 * 3. The last case is when no flags are set. 1585 * Alas, it is sort of hole in API, we 1586 * cannot decide what to do unambiguously. 1587 * For now we select create/graft, if 1588 * user gave KIND, which does not match existing. 1589 */ 1590 if ((n->nlmsg_flags & NLM_F_CREATE) && 1591 (n->nlmsg_flags & NLM_F_REPLACE) && 1592 ((n->nlmsg_flags & NLM_F_EXCL) || 1593 (tca[TCA_KIND] && 1594 nla_strcmp(tca[TCA_KIND], q->ops->id)))) 1595 goto create_n_graft; 1596 } 1597 } 1598 } else { 1599 if (!tcm->tcm_handle) { 1600 NL_SET_ERR_MSG(extack, "Handle cannot be zero"); 1601 return -EINVAL; 1602 } 1603 q = qdisc_lookup(dev, tcm->tcm_handle); 1604 } 1605 1606 /* Change qdisc parameters */ 1607 if (!q) { 1608 NL_SET_ERR_MSG(extack, "Specified qdisc not found"); 1609 return -ENOENT; 1610 } 1611 if (n->nlmsg_flags & NLM_F_EXCL) { 1612 NL_SET_ERR_MSG(extack, "Exclusivity flag on, cannot modify"); 1613 return -EEXIST; 1614 } 1615 if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id)) { 1616 NL_SET_ERR_MSG(extack, "Invalid qdisc name"); 1617 return -EINVAL; 1618 } 1619 err = qdisc_change(q, tca, extack); 1620 if (err == 0) 1621 qdisc_notify(net, skb, n, clid, NULL, q); 1622 return err; 1623 1624 create_n_graft: 1625 if (!(n->nlmsg_flags & NLM_F_CREATE)) { 1626 NL_SET_ERR_MSG(extack, "Qdisc not found. To create specify NLM_F_CREATE flag"); 1627 return -ENOENT; 1628 } 1629 if (clid == TC_H_INGRESS) { 1630 if (dev_ingress_queue(dev)) { 1631 q = qdisc_create(dev, dev_ingress_queue(dev), p, 1632 tcm->tcm_parent, tcm->tcm_parent, 1633 tca, &err, extack); 1634 } else { 1635 NL_SET_ERR_MSG(extack, "Cannot find ingress queue for specified device"); 1636 err = -ENOENT; 1637 } 1638 } else { 1639 struct netdev_queue *dev_queue; 1640 1641 if (p && p->ops->cl_ops && p->ops->cl_ops->select_queue) 1642 dev_queue = p->ops->cl_ops->select_queue(p, tcm); 1643 else if (p) 1644 dev_queue = p->dev_queue; 1645 else 1646 dev_queue = netdev_get_tx_queue(dev, 0); 1647 1648 q = qdisc_create(dev, dev_queue, p, 1649 tcm->tcm_parent, tcm->tcm_handle, 1650 tca, &err, extack); 1651 } 1652 if (q == NULL) { 1653 if (err == -EAGAIN) 1654 goto replay; 1655 return err; 1656 } 1657 1658 graft: 1659 err = qdisc_graft(dev, p, skb, n, clid, q, NULL, extack); 1660 if (err) { 1661 if (q) 1662 qdisc_put(q); 1663 return err; 1664 } 1665 1666 return 0; 1667 } 1668 1669 static int tc_dump_qdisc_root(struct Qdisc *root, struct sk_buff *skb, 1670 struct netlink_callback *cb, 1671 int *q_idx_p, int s_q_idx, bool recur, 1672 bool dump_invisible) 1673 { 1674 int ret = 0, q_idx = *q_idx_p; 1675 struct Qdisc *q; 1676 int b; 1677 1678 if (!root) 1679 return 0; 1680 1681 q = root; 1682 if (q_idx < s_q_idx) { 1683 q_idx++; 1684 } else { 1685 if (!tc_qdisc_dump_ignore(q, dump_invisible) && 1686 tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).portid, 1687 cb->nlh->nlmsg_seq, NLM_F_MULTI, 1688 RTM_NEWQDISC) <= 0) 1689 goto done; 1690 q_idx++; 1691 } 1692 1693 /* If dumping singletons, there is no qdisc_dev(root) and the singleton 1694 * itself has already been dumped. 1695 * 1696 * If we've already dumped the top-level (ingress) qdisc above and the global 1697 * qdisc hashtable, we don't want to hit it again 1698 */ 1699 if (!qdisc_dev(root) || !recur) 1700 goto out; 1701 1702 hash_for_each(qdisc_dev(root)->qdisc_hash, b, q, hash) { 1703 if (q_idx < s_q_idx) { 1704 q_idx++; 1705 continue; 1706 } 1707 if (!tc_qdisc_dump_ignore(q, dump_invisible) && 1708 tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).portid, 1709 cb->nlh->nlmsg_seq, NLM_F_MULTI, 1710 RTM_NEWQDISC) <= 0) 1711 goto done; 1712 q_idx++; 1713 } 1714 1715 out: 1716 *q_idx_p = q_idx; 1717 return ret; 1718 done: 1719 ret = -1; 1720 goto out; 1721 } 1722 1723 static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb) 1724 { 1725 struct net *net = sock_net(skb->sk); 1726 int idx, q_idx; 1727 int s_idx, s_q_idx; 1728 struct net_device *dev; 1729 const struct nlmsghdr *nlh = cb->nlh; 1730 struct nlattr *tca[TCA_MAX + 1]; 1731 int err; 1732 1733 s_idx = cb->args[0]; 1734 s_q_idx = q_idx = cb->args[1]; 1735 1736 idx = 0; 1737 ASSERT_RTNL(); 1738 1739 err = nlmsg_parse(nlh, sizeof(struct tcmsg), tca, TCA_MAX, 1740 rtm_tca_policy, cb->extack); 1741 if (err < 0) 1742 return err; 1743 1744 for_each_netdev(net, dev) { 1745 struct netdev_queue *dev_queue; 1746 1747 if (idx < s_idx) 1748 goto cont; 1749 if (idx > s_idx) 1750 s_q_idx = 0; 1751 q_idx = 0; 1752 1753 if (tc_dump_qdisc_root(dev->qdisc, skb, cb, &q_idx, s_q_idx, 1754 true, tca[TCA_DUMP_INVISIBLE]) < 0) 1755 goto done; 1756 1757 dev_queue = dev_ingress_queue(dev); 1758 if (dev_queue && 1759 tc_dump_qdisc_root(dev_queue->qdisc_sleeping, skb, cb, 1760 &q_idx, s_q_idx, false, 1761 tca[TCA_DUMP_INVISIBLE]) < 0) 1762 goto done; 1763 1764 cont: 1765 idx++; 1766 } 1767 1768 done: 1769 cb->args[0] = idx; 1770 cb->args[1] = q_idx; 1771 1772 return skb->len; 1773 } 1774 1775 1776 1777 /************************************************ 1778 * Traffic classes manipulation. * 1779 ************************************************/ 1780 1781 static int tc_fill_tclass(struct sk_buff *skb, struct Qdisc *q, 1782 unsigned long cl, 1783 u32 portid, u32 seq, u16 flags, int event) 1784 { 1785 struct tcmsg *tcm; 1786 struct nlmsghdr *nlh; 1787 unsigned char *b = skb_tail_pointer(skb); 1788 struct gnet_dump d; 1789 const struct Qdisc_class_ops *cl_ops = q->ops->cl_ops; 1790 1791 cond_resched(); 1792 nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags); 1793 if (!nlh) 1794 goto out_nlmsg_trim; 1795 tcm = nlmsg_data(nlh); 1796 tcm->tcm_family = AF_UNSPEC; 1797 tcm->tcm__pad1 = 0; 1798 tcm->tcm__pad2 = 0; 1799 tcm->tcm_ifindex = qdisc_dev(q)->ifindex; 1800 tcm->tcm_parent = q->handle; 1801 tcm->tcm_handle = q->handle; 1802 tcm->tcm_info = 0; 1803 if (nla_put_string(skb, TCA_KIND, q->ops->id)) 1804 goto nla_put_failure; 1805 if (cl_ops->dump && cl_ops->dump(q, cl, skb, tcm) < 0) 1806 goto nla_put_failure; 1807 1808 if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, TCA_XSTATS, 1809 NULL, &d, TCA_PAD) < 0) 1810 goto nla_put_failure; 1811 1812 if (cl_ops->dump_stats && cl_ops->dump_stats(q, cl, &d) < 0) 1813 goto nla_put_failure; 1814 1815 if (gnet_stats_finish_copy(&d) < 0) 1816 goto nla_put_failure; 1817 1818 nlh->nlmsg_len = skb_tail_pointer(skb) - b; 1819 return skb->len; 1820 1821 out_nlmsg_trim: 1822 nla_put_failure: 1823 nlmsg_trim(skb, b); 1824 return -1; 1825 } 1826 1827 static int tclass_notify(struct net *net, struct sk_buff *oskb, 1828 struct nlmsghdr *n, struct Qdisc *q, 1829 unsigned long cl, int event) 1830 { 1831 struct sk_buff *skb; 1832 u32 portid = oskb ? NETLINK_CB(oskb).portid : 0; 1833 1834 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); 1835 if (!skb) 1836 return -ENOBUFS; 1837 1838 if (tc_fill_tclass(skb, q, cl, portid, n->nlmsg_seq, 0, event) < 0) { 1839 kfree_skb(skb); 1840 return -EINVAL; 1841 } 1842 1843 return rtnetlink_send(skb, net, portid, RTNLGRP_TC, 1844 n->nlmsg_flags & NLM_F_ECHO); 1845 } 1846 1847 static int tclass_del_notify(struct net *net, 1848 const struct Qdisc_class_ops *cops, 1849 struct sk_buff *oskb, struct nlmsghdr *n, 1850 struct Qdisc *q, unsigned long cl) 1851 { 1852 u32 portid = oskb ? NETLINK_CB(oskb).portid : 0; 1853 struct sk_buff *skb; 1854 int err = 0; 1855 1856 if (!cops->delete) 1857 return -EOPNOTSUPP; 1858 1859 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); 1860 if (!skb) 1861 return -ENOBUFS; 1862 1863 if (tc_fill_tclass(skb, q, cl, portid, n->nlmsg_seq, 0, 1864 RTM_DELTCLASS) < 0) { 1865 kfree_skb(skb); 1866 return -EINVAL; 1867 } 1868 1869 err = cops->delete(q, cl); 1870 if (err) { 1871 kfree_skb(skb); 1872 return err; 1873 } 1874 1875 return rtnetlink_send(skb, net, portid, RTNLGRP_TC, 1876 n->nlmsg_flags & NLM_F_ECHO); 1877 } 1878 1879 #ifdef CONFIG_NET_CLS 1880 1881 struct tcf_bind_args { 1882 struct tcf_walker w; 1883 u32 classid; 1884 unsigned long cl; 1885 }; 1886 1887 static int tcf_node_bind(struct tcf_proto *tp, void *n, struct tcf_walker *arg) 1888 { 1889 struct tcf_bind_args *a = (void *)arg; 1890 1891 if (tp->ops->bind_class) { 1892 struct Qdisc *q = tcf_block_q(tp->chain->block); 1893 1894 sch_tree_lock(q); 1895 tp->ops->bind_class(n, a->classid, a->cl); 1896 sch_tree_unlock(q); 1897 } 1898 return 0; 1899 } 1900 1901 static void tc_bind_tclass(struct Qdisc *q, u32 portid, u32 clid, 1902 unsigned long new_cl) 1903 { 1904 const struct Qdisc_class_ops *cops = q->ops->cl_ops; 1905 struct tcf_block *block; 1906 struct tcf_chain *chain; 1907 unsigned long cl; 1908 1909 cl = cops->find(q, portid); 1910 if (!cl) 1911 return; 1912 block = cops->tcf_block(q, cl, NULL); 1913 if (!block) 1914 return; 1915 list_for_each_entry(chain, &block->chain_list, list) { 1916 struct tcf_proto *tp; 1917 1918 for (tp = rtnl_dereference(chain->filter_chain); 1919 tp; tp = rtnl_dereference(tp->next)) { 1920 struct tcf_bind_args arg = {}; 1921 1922 arg.w.fn = tcf_node_bind; 1923 arg.classid = clid; 1924 arg.cl = new_cl; 1925 tp->ops->walk(tp, &arg.w); 1926 } 1927 } 1928 } 1929 1930 #else 1931 1932 static void tc_bind_tclass(struct Qdisc *q, u32 portid, u32 clid, 1933 unsigned long new_cl) 1934 { 1935 } 1936 1937 #endif 1938 1939 static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, 1940 struct netlink_ext_ack *extack) 1941 { 1942 struct net *net = sock_net(skb->sk); 1943 struct tcmsg *tcm = nlmsg_data(n); 1944 struct nlattr *tca[TCA_MAX + 1]; 1945 struct net_device *dev; 1946 struct Qdisc *q = NULL; 1947 const struct Qdisc_class_ops *cops; 1948 unsigned long cl = 0; 1949 unsigned long new_cl; 1950 u32 portid; 1951 u32 clid; 1952 u32 qid; 1953 int err; 1954 1955 if ((n->nlmsg_type != RTM_GETTCLASS) && 1956 !netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN)) 1957 return -EPERM; 1958 1959 err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, rtm_tca_policy, 1960 extack); 1961 if (err < 0) 1962 return err; 1963 1964 dev = __dev_get_by_index(net, tcm->tcm_ifindex); 1965 if (!dev) 1966 return -ENODEV; 1967 1968 /* 1969 parent == TC_H_UNSPEC - unspecified parent. 1970 parent == TC_H_ROOT - class is root, which has no parent. 1971 parent == X:0 - parent is root class. 1972 parent == X:Y - parent is a node in hierarchy. 1973 parent == 0:Y - parent is X:Y, where X:0 is qdisc. 1974 1975 handle == 0:0 - generate handle from kernel pool. 1976 handle == 0:Y - class is X:Y, where X:0 is qdisc. 1977 handle == X:Y - clear. 1978 handle == X:0 - root class. 1979 */ 1980 1981 /* Step 1. Determine qdisc handle X:0 */ 1982 1983 portid = tcm->tcm_parent; 1984 clid = tcm->tcm_handle; 1985 qid = TC_H_MAJ(clid); 1986 1987 if (portid != TC_H_ROOT) { 1988 u32 qid1 = TC_H_MAJ(portid); 1989 1990 if (qid && qid1) { 1991 /* If both majors are known, they must be identical. */ 1992 if (qid != qid1) 1993 return -EINVAL; 1994 } else if (qid1) { 1995 qid = qid1; 1996 } else if (qid == 0) 1997 qid = dev->qdisc->handle; 1998 1999 /* Now qid is genuine qdisc handle consistent 2000 * both with parent and child. 2001 * 2002 * TC_H_MAJ(portid) still may be unspecified, complete it now. 2003 */ 2004 if (portid) 2005 portid = TC_H_MAKE(qid, portid); 2006 } else { 2007 if (qid == 0) 2008 qid = dev->qdisc->handle; 2009 } 2010 2011 /* OK. Locate qdisc */ 2012 q = qdisc_lookup(dev, qid); 2013 if (!q) 2014 return -ENOENT; 2015 2016 /* An check that it supports classes */ 2017 cops = q->ops->cl_ops; 2018 if (cops == NULL) 2019 return -EINVAL; 2020 2021 /* Now try to get class */ 2022 if (clid == 0) { 2023 if (portid == TC_H_ROOT) 2024 clid = qid; 2025 } else 2026 clid = TC_H_MAKE(qid, clid); 2027 2028 if (clid) 2029 cl = cops->find(q, clid); 2030 2031 if (cl == 0) { 2032 err = -ENOENT; 2033 if (n->nlmsg_type != RTM_NEWTCLASS || 2034 !(n->nlmsg_flags & NLM_F_CREATE)) 2035 goto out; 2036 } else { 2037 switch (n->nlmsg_type) { 2038 case RTM_NEWTCLASS: 2039 err = -EEXIST; 2040 if (n->nlmsg_flags & NLM_F_EXCL) 2041 goto out; 2042 break; 2043 case RTM_DELTCLASS: 2044 err = tclass_del_notify(net, cops, skb, n, q, cl); 2045 /* Unbind the class with flilters with 0 */ 2046 tc_bind_tclass(q, portid, clid, 0); 2047 goto out; 2048 case RTM_GETTCLASS: 2049 err = tclass_notify(net, skb, n, q, cl, RTM_NEWTCLASS); 2050 goto out; 2051 default: 2052 err = -EINVAL; 2053 goto out; 2054 } 2055 } 2056 2057 if (tca[TCA_INGRESS_BLOCK] || tca[TCA_EGRESS_BLOCK]) { 2058 NL_SET_ERR_MSG(extack, "Shared blocks are not supported for classes"); 2059 return -EOPNOTSUPP; 2060 } 2061 2062 new_cl = cl; 2063 err = -EOPNOTSUPP; 2064 if (cops->change) 2065 err = cops->change(q, clid, portid, tca, &new_cl, extack); 2066 if (err == 0) { 2067 tclass_notify(net, skb, n, q, new_cl, RTM_NEWTCLASS); 2068 /* We just create a new class, need to do reverse binding. */ 2069 if (cl != new_cl) 2070 tc_bind_tclass(q, portid, clid, new_cl); 2071 } 2072 out: 2073 return err; 2074 } 2075 2076 struct qdisc_dump_args { 2077 struct qdisc_walker w; 2078 struct sk_buff *skb; 2079 struct netlink_callback *cb; 2080 }; 2081 2082 static int qdisc_class_dump(struct Qdisc *q, unsigned long cl, 2083 struct qdisc_walker *arg) 2084 { 2085 struct qdisc_dump_args *a = (struct qdisc_dump_args *)arg; 2086 2087 return tc_fill_tclass(a->skb, q, cl, NETLINK_CB(a->cb->skb).portid, 2088 a->cb->nlh->nlmsg_seq, NLM_F_MULTI, 2089 RTM_NEWTCLASS); 2090 } 2091 2092 static int tc_dump_tclass_qdisc(struct Qdisc *q, struct sk_buff *skb, 2093 struct tcmsg *tcm, struct netlink_callback *cb, 2094 int *t_p, int s_t) 2095 { 2096 struct qdisc_dump_args arg; 2097 2098 if (tc_qdisc_dump_ignore(q, false) || 2099 *t_p < s_t || !q->ops->cl_ops || 2100 (tcm->tcm_parent && 2101 TC_H_MAJ(tcm->tcm_parent) != q->handle)) { 2102 (*t_p)++; 2103 return 0; 2104 } 2105 if (*t_p > s_t) 2106 memset(&cb->args[1], 0, sizeof(cb->args)-sizeof(cb->args[0])); 2107 arg.w.fn = qdisc_class_dump; 2108 arg.skb = skb; 2109 arg.cb = cb; 2110 arg.w.stop = 0; 2111 arg.w.skip = cb->args[1]; 2112 arg.w.count = 0; 2113 q->ops->cl_ops->walk(q, &arg.w); 2114 cb->args[1] = arg.w.count; 2115 if (arg.w.stop) 2116 return -1; 2117 (*t_p)++; 2118 return 0; 2119 } 2120 2121 static int tc_dump_tclass_root(struct Qdisc *root, struct sk_buff *skb, 2122 struct tcmsg *tcm, struct netlink_callback *cb, 2123 int *t_p, int s_t) 2124 { 2125 struct Qdisc *q; 2126 int b; 2127 2128 if (!root) 2129 return 0; 2130 2131 if (tc_dump_tclass_qdisc(root, skb, tcm, cb, t_p, s_t) < 0) 2132 return -1; 2133 2134 if (!qdisc_dev(root)) 2135 return 0; 2136 2137 if (tcm->tcm_parent) { 2138 q = qdisc_match_from_root(root, TC_H_MAJ(tcm->tcm_parent)); 2139 if (q && q != root && 2140 tc_dump_tclass_qdisc(q, skb, tcm, cb, t_p, s_t) < 0) 2141 return -1; 2142 return 0; 2143 } 2144 hash_for_each(qdisc_dev(root)->qdisc_hash, b, q, hash) { 2145 if (tc_dump_tclass_qdisc(q, skb, tcm, cb, t_p, s_t) < 0) 2146 return -1; 2147 } 2148 2149 return 0; 2150 } 2151 2152 static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb) 2153 { 2154 struct tcmsg *tcm = nlmsg_data(cb->nlh); 2155 struct net *net = sock_net(skb->sk); 2156 struct netdev_queue *dev_queue; 2157 struct net_device *dev; 2158 int t, s_t; 2159 2160 if (nlmsg_len(cb->nlh) < sizeof(*tcm)) 2161 return 0; 2162 dev = dev_get_by_index(net, tcm->tcm_ifindex); 2163 if (!dev) 2164 return 0; 2165 2166 s_t = cb->args[0]; 2167 t = 0; 2168 2169 if (tc_dump_tclass_root(dev->qdisc, skb, tcm, cb, &t, s_t) < 0) 2170 goto done; 2171 2172 dev_queue = dev_ingress_queue(dev); 2173 if (dev_queue && 2174 tc_dump_tclass_root(dev_queue->qdisc_sleeping, skb, tcm, cb, 2175 &t, s_t) < 0) 2176 goto done; 2177 2178 done: 2179 cb->args[0] = t; 2180 2181 dev_put(dev); 2182 return skb->len; 2183 } 2184 2185 #ifdef CONFIG_PROC_FS 2186 static int psched_show(struct seq_file *seq, void *v) 2187 { 2188 seq_printf(seq, "%08x %08x %08x %08x\n", 2189 (u32)NSEC_PER_USEC, (u32)PSCHED_TICKS2NS(1), 2190 1000000, 2191 (u32)NSEC_PER_SEC / hrtimer_resolution); 2192 2193 return 0; 2194 } 2195 2196 static int __net_init psched_net_init(struct net *net) 2197 { 2198 struct proc_dir_entry *e; 2199 2200 e = proc_create_single("psched", 0, net->proc_net, psched_show); 2201 if (e == NULL) 2202 return -ENOMEM; 2203 2204 return 0; 2205 } 2206 2207 static void __net_exit psched_net_exit(struct net *net) 2208 { 2209 remove_proc_entry("psched", net->proc_net); 2210 } 2211 #else 2212 static int __net_init psched_net_init(struct net *net) 2213 { 2214 return 0; 2215 } 2216 2217 static void __net_exit psched_net_exit(struct net *net) 2218 { 2219 } 2220 #endif 2221 2222 static struct pernet_operations psched_net_ops = { 2223 .init = psched_net_init, 2224 .exit = psched_net_exit, 2225 }; 2226 2227 static int __init pktsched_init(void) 2228 { 2229 int err; 2230 2231 err = register_pernet_subsys(&psched_net_ops); 2232 if (err) { 2233 pr_err("pktsched_init: " 2234 "cannot initialize per netns operations\n"); 2235 return err; 2236 } 2237 2238 register_qdisc(&pfifo_fast_ops); 2239 register_qdisc(&pfifo_qdisc_ops); 2240 register_qdisc(&bfifo_qdisc_ops); 2241 register_qdisc(&pfifo_head_drop_qdisc_ops); 2242 register_qdisc(&mq_qdisc_ops); 2243 register_qdisc(&noqueue_qdisc_ops); 2244 2245 rtnl_register(PF_UNSPEC, RTM_NEWQDISC, tc_modify_qdisc, NULL, 0); 2246 rtnl_register(PF_UNSPEC, RTM_DELQDISC, tc_get_qdisc, NULL, 0); 2247 rtnl_register(PF_UNSPEC, RTM_GETQDISC, tc_get_qdisc, tc_dump_qdisc, 2248 0); 2249 rtnl_register(PF_UNSPEC, RTM_NEWTCLASS, tc_ctl_tclass, NULL, 0); 2250 rtnl_register(PF_UNSPEC, RTM_DELTCLASS, tc_ctl_tclass, NULL, 0); 2251 rtnl_register(PF_UNSPEC, RTM_GETTCLASS, tc_ctl_tclass, tc_dump_tclass, 2252 0); 2253 2254 return 0; 2255 } 2256 2257 subsys_initcall(pktsched_init); 2258