1 /* 2 * net/sched/sch_api.c Packet scheduler API. 3 * 4 * This program is free software; you can redistribute it and/or 5 * modify it under the terms of the GNU General Public License 6 * as published by the Free Software Foundation; either version 7 * 2 of the License, or (at your option) any later version. 8 * 9 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> 10 * 11 * Fixes: 12 * 13 * Rani Assaf <rani@magic.metawire.com> :980802: JIFFIES and CPU clock sources are repaired. 14 * Eduardo J. Blanco <ejbs@netlabs.com.uy> :990222: kmod support 15 * Jamal Hadi Salim <hadi@nortelnetworks.com>: 990601: ingress support 16 */ 17 18 #include <linux/module.h> 19 #include <linux/types.h> 20 #include <linux/kernel.h> 21 #include <linux/string.h> 22 #include <linux/errno.h> 23 #include <linux/skbuff.h> 24 #include <linux/init.h> 25 #include <linux/proc_fs.h> 26 #include <linux/seq_file.h> 27 #include <linux/kmod.h> 28 #include <linux/list.h> 29 #include <linux/hrtimer.h> 30 #include <linux/lockdep.h> 31 #include <linux/slab.h> 32 #include <linux/hashtable.h> 33 34 #include <net/net_namespace.h> 35 #include <net/sock.h> 36 #include <net/netlink.h> 37 #include <net/pkt_sched.h> 38 #include <net/pkt_cls.h> 39 40 /* 41 42 Short review. 43 ------------- 44 45 This file consists of two interrelated parts: 46 47 1. queueing disciplines manager frontend. 48 2. traffic classes manager frontend. 49 50 Generally, queueing discipline ("qdisc") is a black box, 51 which is able to enqueue packets and to dequeue them (when 52 device is ready to send something) in order and at times 53 determined by algorithm hidden in it. 54 55 qdisc's are divided to two categories: 56 - "queues", which have no internal structure visible from outside. 57 - "schedulers", which split all the packets to "traffic classes", 58 using "packet classifiers" (look at cls_api.c) 59 60 In turn, classes may have child qdiscs (as rule, queues) 61 attached to them etc. etc. etc. 62 63 The goal of the routines in this file is to translate 64 information supplied by user in the form of handles 65 to more intelligible for kernel form, to make some sanity 66 checks and part of work, which is common to all qdiscs 67 and to provide rtnetlink notifications. 68 69 All real intelligent work is done inside qdisc modules. 70 71 72 73 Every discipline has two major routines: enqueue and dequeue. 74 75 ---dequeue 76 77 dequeue usually returns a skb to send. It is allowed to return NULL, 78 but it does not mean that queue is empty, it just means that 79 discipline does not want to send anything this time. 80 Queue is really empty if q->q.qlen == 0. 81 For complicated disciplines with multiple queues q->q is not 82 real packet queue, but however q->q.qlen must be valid. 83 84 ---enqueue 85 86 enqueue returns 0, if packet was enqueued successfully. 87 If packet (this one or another one) was dropped, it returns 88 not zero error code. 89 NET_XMIT_DROP - this packet dropped 90 Expected action: do not backoff, but wait until queue will clear. 91 NET_XMIT_CN - probably this packet enqueued, but another one dropped. 92 Expected action: backoff or ignore 93 94 Auxiliary routines: 95 96 ---peek 97 98 like dequeue but without removing a packet from the queue 99 100 ---reset 101 102 returns qdisc to initial state: purge all buffers, clear all 103 timers, counters (except for statistics) etc. 104 105 ---init 106 107 initializes newly created qdisc. 108 109 ---destroy 110 111 destroys resources allocated by init and during lifetime of qdisc. 112 113 ---change 114 115 changes qdisc parameters. 116 */ 117 118 /* Protects list of registered TC modules. It is pure SMP lock. */ 119 static DEFINE_RWLOCK(qdisc_mod_lock); 120 121 122 /************************************************ 123 * Queueing disciplines manipulation. * 124 ************************************************/ 125 126 127 /* The list of all installed queueing disciplines. */ 128 129 static struct Qdisc_ops *qdisc_base; 130 131 /* Register/unregister queueing discipline */ 132 133 int register_qdisc(struct Qdisc_ops *qops) 134 { 135 struct Qdisc_ops *q, **qp; 136 int rc = -EEXIST; 137 138 write_lock(&qdisc_mod_lock); 139 for (qp = &qdisc_base; (q = *qp) != NULL; qp = &q->next) 140 if (!strcmp(qops->id, q->id)) 141 goto out; 142 143 if (qops->enqueue == NULL) 144 qops->enqueue = noop_qdisc_ops.enqueue; 145 if (qops->peek == NULL) { 146 if (qops->dequeue == NULL) 147 qops->peek = noop_qdisc_ops.peek; 148 else 149 goto out_einval; 150 } 151 if (qops->dequeue == NULL) 152 qops->dequeue = noop_qdisc_ops.dequeue; 153 154 if (qops->cl_ops) { 155 const struct Qdisc_class_ops *cops = qops->cl_ops; 156 157 if (!(cops->find && cops->walk && cops->leaf)) 158 goto out_einval; 159 160 if (cops->tcf_block && !(cops->bind_tcf && cops->unbind_tcf)) 161 goto out_einval; 162 } 163 164 qops->next = NULL; 165 *qp = qops; 166 rc = 0; 167 out: 168 write_unlock(&qdisc_mod_lock); 169 return rc; 170 171 out_einval: 172 rc = -EINVAL; 173 goto out; 174 } 175 EXPORT_SYMBOL(register_qdisc); 176 177 int unregister_qdisc(struct Qdisc_ops *qops) 178 { 179 struct Qdisc_ops *q, **qp; 180 int err = -ENOENT; 181 182 write_lock(&qdisc_mod_lock); 183 for (qp = &qdisc_base; (q = *qp) != NULL; qp = &q->next) 184 if (q == qops) 185 break; 186 if (q) { 187 *qp = q->next; 188 q->next = NULL; 189 err = 0; 190 } 191 write_unlock(&qdisc_mod_lock); 192 return err; 193 } 194 EXPORT_SYMBOL(unregister_qdisc); 195 196 /* Get default qdisc if not otherwise specified */ 197 void qdisc_get_default(char *name, size_t len) 198 { 199 read_lock(&qdisc_mod_lock); 200 strlcpy(name, default_qdisc_ops->id, len); 201 read_unlock(&qdisc_mod_lock); 202 } 203 204 static struct Qdisc_ops *qdisc_lookup_default(const char *name) 205 { 206 struct Qdisc_ops *q = NULL; 207 208 for (q = qdisc_base; q; q = q->next) { 209 if (!strcmp(name, q->id)) { 210 if (!try_module_get(q->owner)) 211 q = NULL; 212 break; 213 } 214 } 215 216 return q; 217 } 218 219 /* Set new default qdisc to use */ 220 int qdisc_set_default(const char *name) 221 { 222 const struct Qdisc_ops *ops; 223 224 if (!capable(CAP_NET_ADMIN)) 225 return -EPERM; 226 227 write_lock(&qdisc_mod_lock); 228 ops = qdisc_lookup_default(name); 229 if (!ops) { 230 /* Not found, drop lock and try to load module */ 231 write_unlock(&qdisc_mod_lock); 232 request_module("sch_%s", name); 233 write_lock(&qdisc_mod_lock); 234 235 ops = qdisc_lookup_default(name); 236 } 237 238 if (ops) { 239 /* Set new default */ 240 module_put(default_qdisc_ops->owner); 241 default_qdisc_ops = ops; 242 } 243 write_unlock(&qdisc_mod_lock); 244 245 return ops ? 0 : -ENOENT; 246 } 247 248 #ifdef CONFIG_NET_SCH_DEFAULT 249 /* Set default value from kernel config */ 250 static int __init sch_default_qdisc(void) 251 { 252 return qdisc_set_default(CONFIG_DEFAULT_NET_SCH); 253 } 254 late_initcall(sch_default_qdisc); 255 #endif 256 257 /* We know handle. Find qdisc among all qdisc's attached to device 258 * (root qdisc, all its children, children of children etc.) 259 * Note: caller either uses rtnl or rcu_read_lock() 260 */ 261 262 static struct Qdisc *qdisc_match_from_root(struct Qdisc *root, u32 handle) 263 { 264 struct Qdisc *q; 265 266 if (!qdisc_dev(root)) 267 return (root->handle == handle ? root : NULL); 268 269 if (!(root->flags & TCQ_F_BUILTIN) && 270 root->handle == handle) 271 return root; 272 273 hash_for_each_possible_rcu(qdisc_dev(root)->qdisc_hash, q, hash, handle) { 274 if (q->handle == handle) 275 return q; 276 } 277 return NULL; 278 } 279 280 void qdisc_hash_add(struct Qdisc *q, bool invisible) 281 { 282 if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) { 283 ASSERT_RTNL(); 284 hash_add_rcu(qdisc_dev(q)->qdisc_hash, &q->hash, q->handle); 285 if (invisible) 286 q->flags |= TCQ_F_INVISIBLE; 287 } 288 } 289 EXPORT_SYMBOL(qdisc_hash_add); 290 291 void qdisc_hash_del(struct Qdisc *q) 292 { 293 if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) { 294 ASSERT_RTNL(); 295 hash_del_rcu(&q->hash); 296 } 297 } 298 EXPORT_SYMBOL(qdisc_hash_del); 299 300 struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle) 301 { 302 struct Qdisc *q; 303 304 if (!handle) 305 return NULL; 306 q = qdisc_match_from_root(dev->qdisc, handle); 307 if (q) 308 goto out; 309 310 if (dev_ingress_queue(dev)) 311 q = qdisc_match_from_root( 312 dev_ingress_queue(dev)->qdisc_sleeping, 313 handle); 314 out: 315 return q; 316 } 317 318 static struct Qdisc *qdisc_leaf(struct Qdisc *p, u32 classid) 319 { 320 unsigned long cl; 321 struct Qdisc *leaf; 322 const struct Qdisc_class_ops *cops = p->ops->cl_ops; 323 324 if (cops == NULL) 325 return NULL; 326 cl = cops->find(p, classid); 327 328 if (cl == 0) 329 return NULL; 330 leaf = cops->leaf(p, cl); 331 return leaf; 332 } 333 334 /* Find queueing discipline by name */ 335 336 static struct Qdisc_ops *qdisc_lookup_ops(struct nlattr *kind) 337 { 338 struct Qdisc_ops *q = NULL; 339 340 if (kind) { 341 read_lock(&qdisc_mod_lock); 342 for (q = qdisc_base; q; q = q->next) { 343 if (nla_strcmp(kind, q->id) == 0) { 344 if (!try_module_get(q->owner)) 345 q = NULL; 346 break; 347 } 348 } 349 read_unlock(&qdisc_mod_lock); 350 } 351 return q; 352 } 353 354 /* The linklayer setting were not transferred from iproute2, in older 355 * versions, and the rate tables lookup systems have been dropped in 356 * the kernel. To keep backward compatible with older iproute2 tc 357 * utils, we detect the linklayer setting by detecting if the rate 358 * table were modified. 359 * 360 * For linklayer ATM table entries, the rate table will be aligned to 361 * 48 bytes, thus some table entries will contain the same value. The 362 * mpu (min packet unit) is also encoded into the old rate table, thus 363 * starting from the mpu, we find low and high table entries for 364 * mapping this cell. If these entries contain the same value, when 365 * the rate tables have been modified for linklayer ATM. 366 * 367 * This is done by rounding mpu to the nearest 48 bytes cell/entry, 368 * and then roundup to the next cell, calc the table entry one below, 369 * and compare. 370 */ 371 static __u8 __detect_linklayer(struct tc_ratespec *r, __u32 *rtab) 372 { 373 int low = roundup(r->mpu, 48); 374 int high = roundup(low+1, 48); 375 int cell_low = low >> r->cell_log; 376 int cell_high = (high >> r->cell_log) - 1; 377 378 /* rtab is too inaccurate at rates > 100Mbit/s */ 379 if ((r->rate > (100000000/8)) || (rtab[0] == 0)) { 380 pr_debug("TC linklayer: Giving up ATM detection\n"); 381 return TC_LINKLAYER_ETHERNET; 382 } 383 384 if ((cell_high > cell_low) && (cell_high < 256) 385 && (rtab[cell_low] == rtab[cell_high])) { 386 pr_debug("TC linklayer: Detected ATM, low(%d)=high(%d)=%u\n", 387 cell_low, cell_high, rtab[cell_high]); 388 return TC_LINKLAYER_ATM; 389 } 390 return TC_LINKLAYER_ETHERNET; 391 } 392 393 static struct qdisc_rate_table *qdisc_rtab_list; 394 395 struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r, 396 struct nlattr *tab, 397 struct netlink_ext_ack *extack) 398 { 399 struct qdisc_rate_table *rtab; 400 401 if (tab == NULL || r->rate == 0 || r->cell_log == 0 || 402 nla_len(tab) != TC_RTAB_SIZE) { 403 NL_SET_ERR_MSG(extack, "Invalid rate table parameters for searching"); 404 return NULL; 405 } 406 407 for (rtab = qdisc_rtab_list; rtab; rtab = rtab->next) { 408 if (!memcmp(&rtab->rate, r, sizeof(struct tc_ratespec)) && 409 !memcmp(&rtab->data, nla_data(tab), 1024)) { 410 rtab->refcnt++; 411 return rtab; 412 } 413 } 414 415 rtab = kmalloc(sizeof(*rtab), GFP_KERNEL); 416 if (rtab) { 417 rtab->rate = *r; 418 rtab->refcnt = 1; 419 memcpy(rtab->data, nla_data(tab), 1024); 420 if (r->linklayer == TC_LINKLAYER_UNAWARE) 421 r->linklayer = __detect_linklayer(r, rtab->data); 422 rtab->next = qdisc_rtab_list; 423 qdisc_rtab_list = rtab; 424 } else { 425 NL_SET_ERR_MSG(extack, "Failed to allocate new qdisc rate table"); 426 } 427 return rtab; 428 } 429 EXPORT_SYMBOL(qdisc_get_rtab); 430 431 void qdisc_put_rtab(struct qdisc_rate_table *tab) 432 { 433 struct qdisc_rate_table *rtab, **rtabp; 434 435 if (!tab || --tab->refcnt) 436 return; 437 438 for (rtabp = &qdisc_rtab_list; 439 (rtab = *rtabp) != NULL; 440 rtabp = &rtab->next) { 441 if (rtab == tab) { 442 *rtabp = rtab->next; 443 kfree(rtab); 444 return; 445 } 446 } 447 } 448 EXPORT_SYMBOL(qdisc_put_rtab); 449 450 static LIST_HEAD(qdisc_stab_list); 451 452 static const struct nla_policy stab_policy[TCA_STAB_MAX + 1] = { 453 [TCA_STAB_BASE] = { .len = sizeof(struct tc_sizespec) }, 454 [TCA_STAB_DATA] = { .type = NLA_BINARY }, 455 }; 456 457 static struct qdisc_size_table *qdisc_get_stab(struct nlattr *opt, 458 struct netlink_ext_ack *extack) 459 { 460 struct nlattr *tb[TCA_STAB_MAX + 1]; 461 struct qdisc_size_table *stab; 462 struct tc_sizespec *s; 463 unsigned int tsize = 0; 464 u16 *tab = NULL; 465 int err; 466 467 err = nla_parse_nested(tb, TCA_STAB_MAX, opt, stab_policy, extack); 468 if (err < 0) 469 return ERR_PTR(err); 470 if (!tb[TCA_STAB_BASE]) { 471 NL_SET_ERR_MSG(extack, "Size table base attribute is missing"); 472 return ERR_PTR(-EINVAL); 473 } 474 475 s = nla_data(tb[TCA_STAB_BASE]); 476 477 if (s->tsize > 0) { 478 if (!tb[TCA_STAB_DATA]) { 479 NL_SET_ERR_MSG(extack, "Size table data attribute is missing"); 480 return ERR_PTR(-EINVAL); 481 } 482 tab = nla_data(tb[TCA_STAB_DATA]); 483 tsize = nla_len(tb[TCA_STAB_DATA]) / sizeof(u16); 484 } 485 486 if (tsize != s->tsize || (!tab && tsize > 0)) { 487 NL_SET_ERR_MSG(extack, "Invalid size of size table"); 488 return ERR_PTR(-EINVAL); 489 } 490 491 list_for_each_entry(stab, &qdisc_stab_list, list) { 492 if (memcmp(&stab->szopts, s, sizeof(*s))) 493 continue; 494 if (tsize > 0 && memcmp(stab->data, tab, tsize * sizeof(u16))) 495 continue; 496 stab->refcnt++; 497 return stab; 498 } 499 500 stab = kmalloc(sizeof(*stab) + tsize * sizeof(u16), GFP_KERNEL); 501 if (!stab) 502 return ERR_PTR(-ENOMEM); 503 504 stab->refcnt = 1; 505 stab->szopts = *s; 506 if (tsize > 0) 507 memcpy(stab->data, tab, tsize * sizeof(u16)); 508 509 list_add_tail(&stab->list, &qdisc_stab_list); 510 511 return stab; 512 } 513 514 static void stab_kfree_rcu(struct rcu_head *head) 515 { 516 kfree(container_of(head, struct qdisc_size_table, rcu)); 517 } 518 519 void qdisc_put_stab(struct qdisc_size_table *tab) 520 { 521 if (!tab) 522 return; 523 524 if (--tab->refcnt == 0) { 525 list_del(&tab->list); 526 call_rcu_bh(&tab->rcu, stab_kfree_rcu); 527 } 528 } 529 EXPORT_SYMBOL(qdisc_put_stab); 530 531 static int qdisc_dump_stab(struct sk_buff *skb, struct qdisc_size_table *stab) 532 { 533 struct nlattr *nest; 534 535 nest = nla_nest_start(skb, TCA_STAB); 536 if (nest == NULL) 537 goto nla_put_failure; 538 if (nla_put(skb, TCA_STAB_BASE, sizeof(stab->szopts), &stab->szopts)) 539 goto nla_put_failure; 540 nla_nest_end(skb, nest); 541 542 return skb->len; 543 544 nla_put_failure: 545 return -1; 546 } 547 548 void __qdisc_calculate_pkt_len(struct sk_buff *skb, 549 const struct qdisc_size_table *stab) 550 { 551 int pkt_len, slot; 552 553 pkt_len = skb->len + stab->szopts.overhead; 554 if (unlikely(!stab->szopts.tsize)) 555 goto out; 556 557 slot = pkt_len + stab->szopts.cell_align; 558 if (unlikely(slot < 0)) 559 slot = 0; 560 561 slot >>= stab->szopts.cell_log; 562 if (likely(slot < stab->szopts.tsize)) 563 pkt_len = stab->data[slot]; 564 else 565 pkt_len = stab->data[stab->szopts.tsize - 1] * 566 (slot / stab->szopts.tsize) + 567 stab->data[slot % stab->szopts.tsize]; 568 569 pkt_len <<= stab->szopts.size_log; 570 out: 571 if (unlikely(pkt_len < 1)) 572 pkt_len = 1; 573 qdisc_skb_cb(skb)->pkt_len = pkt_len; 574 } 575 EXPORT_SYMBOL(__qdisc_calculate_pkt_len); 576 577 void qdisc_warn_nonwc(const char *txt, struct Qdisc *qdisc) 578 { 579 if (!(qdisc->flags & TCQ_F_WARN_NONWC)) { 580 pr_warn("%s: %s qdisc %X: is non-work-conserving?\n", 581 txt, qdisc->ops->id, qdisc->handle >> 16); 582 qdisc->flags |= TCQ_F_WARN_NONWC; 583 } 584 } 585 EXPORT_SYMBOL(qdisc_warn_nonwc); 586 587 static enum hrtimer_restart qdisc_watchdog(struct hrtimer *timer) 588 { 589 struct qdisc_watchdog *wd = container_of(timer, struct qdisc_watchdog, 590 timer); 591 592 rcu_read_lock(); 593 __netif_schedule(qdisc_root(wd->qdisc)); 594 rcu_read_unlock(); 595 596 return HRTIMER_NORESTART; 597 } 598 599 void qdisc_watchdog_init(struct qdisc_watchdog *wd, struct Qdisc *qdisc) 600 { 601 hrtimer_init(&wd->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED); 602 wd->timer.function = qdisc_watchdog; 603 wd->qdisc = qdisc; 604 } 605 EXPORT_SYMBOL(qdisc_watchdog_init); 606 607 void qdisc_watchdog_schedule_ns(struct qdisc_watchdog *wd, u64 expires) 608 { 609 if (test_bit(__QDISC_STATE_DEACTIVATED, 610 &qdisc_root_sleeping(wd->qdisc)->state)) 611 return; 612 613 if (wd->last_expires == expires) 614 return; 615 616 wd->last_expires = expires; 617 hrtimer_start(&wd->timer, 618 ns_to_ktime(expires), 619 HRTIMER_MODE_ABS_PINNED); 620 } 621 EXPORT_SYMBOL(qdisc_watchdog_schedule_ns); 622 623 void qdisc_watchdog_cancel(struct qdisc_watchdog *wd) 624 { 625 hrtimer_cancel(&wd->timer); 626 } 627 EXPORT_SYMBOL(qdisc_watchdog_cancel); 628 629 static struct hlist_head *qdisc_class_hash_alloc(unsigned int n) 630 { 631 struct hlist_head *h; 632 unsigned int i; 633 634 h = kvmalloc_array(n, sizeof(struct hlist_head), GFP_KERNEL); 635 636 if (h != NULL) { 637 for (i = 0; i < n; i++) 638 INIT_HLIST_HEAD(&h[i]); 639 } 640 return h; 641 } 642 643 void qdisc_class_hash_grow(struct Qdisc *sch, struct Qdisc_class_hash *clhash) 644 { 645 struct Qdisc_class_common *cl; 646 struct hlist_node *next; 647 struct hlist_head *nhash, *ohash; 648 unsigned int nsize, nmask, osize; 649 unsigned int i, h; 650 651 /* Rehash when load factor exceeds 0.75 */ 652 if (clhash->hashelems * 4 <= clhash->hashsize * 3) 653 return; 654 nsize = clhash->hashsize * 2; 655 nmask = nsize - 1; 656 nhash = qdisc_class_hash_alloc(nsize); 657 if (nhash == NULL) 658 return; 659 660 ohash = clhash->hash; 661 osize = clhash->hashsize; 662 663 sch_tree_lock(sch); 664 for (i = 0; i < osize; i++) { 665 hlist_for_each_entry_safe(cl, next, &ohash[i], hnode) { 666 h = qdisc_class_hash(cl->classid, nmask); 667 hlist_add_head(&cl->hnode, &nhash[h]); 668 } 669 } 670 clhash->hash = nhash; 671 clhash->hashsize = nsize; 672 clhash->hashmask = nmask; 673 sch_tree_unlock(sch); 674 675 kvfree(ohash); 676 } 677 EXPORT_SYMBOL(qdisc_class_hash_grow); 678 679 int qdisc_class_hash_init(struct Qdisc_class_hash *clhash) 680 { 681 unsigned int size = 4; 682 683 clhash->hash = qdisc_class_hash_alloc(size); 684 if (!clhash->hash) 685 return -ENOMEM; 686 clhash->hashsize = size; 687 clhash->hashmask = size - 1; 688 clhash->hashelems = 0; 689 return 0; 690 } 691 EXPORT_SYMBOL(qdisc_class_hash_init); 692 693 void qdisc_class_hash_destroy(struct Qdisc_class_hash *clhash) 694 { 695 kvfree(clhash->hash); 696 } 697 EXPORT_SYMBOL(qdisc_class_hash_destroy); 698 699 void qdisc_class_hash_insert(struct Qdisc_class_hash *clhash, 700 struct Qdisc_class_common *cl) 701 { 702 unsigned int h; 703 704 INIT_HLIST_NODE(&cl->hnode); 705 h = qdisc_class_hash(cl->classid, clhash->hashmask); 706 hlist_add_head(&cl->hnode, &clhash->hash[h]); 707 clhash->hashelems++; 708 } 709 EXPORT_SYMBOL(qdisc_class_hash_insert); 710 711 void qdisc_class_hash_remove(struct Qdisc_class_hash *clhash, 712 struct Qdisc_class_common *cl) 713 { 714 hlist_del(&cl->hnode); 715 clhash->hashelems--; 716 } 717 EXPORT_SYMBOL(qdisc_class_hash_remove); 718 719 /* Allocate an unique handle from space managed by kernel 720 * Possible range is [8000-FFFF]:0000 (0x8000 values) 721 */ 722 static u32 qdisc_alloc_handle(struct net_device *dev) 723 { 724 int i = 0x8000; 725 static u32 autohandle = TC_H_MAKE(0x80000000U, 0); 726 727 do { 728 autohandle += TC_H_MAKE(0x10000U, 0); 729 if (autohandle == TC_H_MAKE(TC_H_ROOT, 0)) 730 autohandle = TC_H_MAKE(0x80000000U, 0); 731 if (!qdisc_lookup(dev, autohandle)) 732 return autohandle; 733 cond_resched(); 734 } while (--i > 0); 735 736 return 0; 737 } 738 739 void qdisc_tree_reduce_backlog(struct Qdisc *sch, unsigned int n, 740 unsigned int len) 741 { 742 const struct Qdisc_class_ops *cops; 743 unsigned long cl; 744 u32 parentid; 745 bool notify; 746 int drops; 747 748 if (n == 0 && len == 0) 749 return; 750 drops = max_t(int, n, 0); 751 rcu_read_lock(); 752 while ((parentid = sch->parent)) { 753 if (TC_H_MAJ(parentid) == TC_H_MAJ(TC_H_INGRESS)) 754 break; 755 756 if (sch->flags & TCQ_F_NOPARENT) 757 break; 758 /* Notify parent qdisc only if child qdisc becomes empty. 759 * 760 * If child was empty even before update then backlog 761 * counter is screwed and we skip notification because 762 * parent class is already passive. 763 */ 764 notify = !sch->q.qlen && !WARN_ON_ONCE(!n); 765 /* TODO: perform the search on a per txq basis */ 766 sch = qdisc_lookup(qdisc_dev(sch), TC_H_MAJ(parentid)); 767 if (sch == NULL) { 768 WARN_ON_ONCE(parentid != TC_H_ROOT); 769 break; 770 } 771 cops = sch->ops->cl_ops; 772 if (notify && cops->qlen_notify) { 773 cl = cops->find(sch, parentid); 774 cops->qlen_notify(sch, cl); 775 } 776 sch->q.qlen -= n; 777 sch->qstats.backlog -= len; 778 __qdisc_qstats_drop(sch, drops); 779 } 780 rcu_read_unlock(); 781 } 782 EXPORT_SYMBOL(qdisc_tree_reduce_backlog); 783 784 static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid, 785 u32 portid, u32 seq, u16 flags, int event) 786 { 787 struct gnet_stats_basic_cpu __percpu *cpu_bstats = NULL; 788 struct gnet_stats_queue __percpu *cpu_qstats = NULL; 789 struct tcmsg *tcm; 790 struct nlmsghdr *nlh; 791 unsigned char *b = skb_tail_pointer(skb); 792 struct gnet_dump d; 793 struct qdisc_size_table *stab; 794 __u32 qlen; 795 796 cond_resched(); 797 nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags); 798 if (!nlh) 799 goto out_nlmsg_trim; 800 tcm = nlmsg_data(nlh); 801 tcm->tcm_family = AF_UNSPEC; 802 tcm->tcm__pad1 = 0; 803 tcm->tcm__pad2 = 0; 804 tcm->tcm_ifindex = qdisc_dev(q)->ifindex; 805 tcm->tcm_parent = clid; 806 tcm->tcm_handle = q->handle; 807 tcm->tcm_info = refcount_read(&q->refcnt); 808 if (nla_put_string(skb, TCA_KIND, q->ops->id)) 809 goto nla_put_failure; 810 if (nla_put_u8(skb, TCA_HW_OFFLOAD, !!(q->flags & TCQ_F_OFFLOADED))) 811 goto nla_put_failure; 812 if (q->ops->dump && q->ops->dump(q, skb) < 0) 813 goto nla_put_failure; 814 815 qlen = qdisc_qlen_sum(q); 816 817 stab = rtnl_dereference(q->stab); 818 if (stab && qdisc_dump_stab(skb, stab) < 0) 819 goto nla_put_failure; 820 821 if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, TCA_XSTATS, 822 NULL, &d, TCA_PAD) < 0) 823 goto nla_put_failure; 824 825 if (q->ops->dump_stats && q->ops->dump_stats(q, &d) < 0) 826 goto nla_put_failure; 827 828 if (qdisc_is_percpu_stats(q)) { 829 cpu_bstats = q->cpu_bstats; 830 cpu_qstats = q->cpu_qstats; 831 } 832 833 if (gnet_stats_copy_basic(qdisc_root_sleeping_running(q), 834 &d, cpu_bstats, &q->bstats) < 0 || 835 gnet_stats_copy_rate_est(&d, &q->rate_est) < 0 || 836 gnet_stats_copy_queue(&d, cpu_qstats, &q->qstats, qlen) < 0) 837 goto nla_put_failure; 838 839 if (gnet_stats_finish_copy(&d) < 0) 840 goto nla_put_failure; 841 842 nlh->nlmsg_len = skb_tail_pointer(skb) - b; 843 return skb->len; 844 845 out_nlmsg_trim: 846 nla_put_failure: 847 nlmsg_trim(skb, b); 848 return -1; 849 } 850 851 static bool tc_qdisc_dump_ignore(struct Qdisc *q, bool dump_invisible) 852 { 853 if (q->flags & TCQ_F_BUILTIN) 854 return true; 855 if ((q->flags & TCQ_F_INVISIBLE) && !dump_invisible) 856 return true; 857 858 return false; 859 } 860 861 static int qdisc_notify(struct net *net, struct sk_buff *oskb, 862 struct nlmsghdr *n, u32 clid, 863 struct Qdisc *old, struct Qdisc *new) 864 { 865 struct sk_buff *skb; 866 u32 portid = oskb ? NETLINK_CB(oskb).portid : 0; 867 868 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); 869 if (!skb) 870 return -ENOBUFS; 871 872 if (old && !tc_qdisc_dump_ignore(old, false)) { 873 if (tc_fill_qdisc(skb, old, clid, portid, n->nlmsg_seq, 874 0, RTM_DELQDISC) < 0) 875 goto err_out; 876 } 877 if (new && !tc_qdisc_dump_ignore(new, false)) { 878 if (tc_fill_qdisc(skb, new, clid, portid, n->nlmsg_seq, 879 old ? NLM_F_REPLACE : 0, RTM_NEWQDISC) < 0) 880 goto err_out; 881 } 882 883 if (skb->len) 884 return rtnetlink_send(skb, net, portid, RTNLGRP_TC, 885 n->nlmsg_flags & NLM_F_ECHO); 886 887 err_out: 888 kfree_skb(skb); 889 return -EINVAL; 890 } 891 892 static void notify_and_destroy(struct net *net, struct sk_buff *skb, 893 struct nlmsghdr *n, u32 clid, 894 struct Qdisc *old, struct Qdisc *new) 895 { 896 if (new || old) 897 qdisc_notify(net, skb, n, clid, old, new); 898 899 if (old) 900 qdisc_destroy(old); 901 } 902 903 /* Graft qdisc "new" to class "classid" of qdisc "parent" or 904 * to device "dev". 905 * 906 * When appropriate send a netlink notification using 'skb' 907 * and "n". 908 * 909 * On success, destroy old qdisc. 910 */ 911 912 static int qdisc_graft(struct net_device *dev, struct Qdisc *parent, 913 struct sk_buff *skb, struct nlmsghdr *n, u32 classid, 914 struct Qdisc *new, struct Qdisc *old, 915 struct netlink_ext_ack *extack) 916 { 917 struct Qdisc *q = old; 918 struct net *net = dev_net(dev); 919 int err = 0; 920 921 if (parent == NULL) { 922 unsigned int i, num_q, ingress; 923 924 ingress = 0; 925 num_q = dev->num_tx_queues; 926 if ((q && q->flags & TCQ_F_INGRESS) || 927 (new && new->flags & TCQ_F_INGRESS)) { 928 num_q = 1; 929 ingress = 1; 930 if (!dev_ingress_queue(dev)) { 931 NL_SET_ERR_MSG(extack, "Device does not have an ingress queue"); 932 return -ENOENT; 933 } 934 } 935 936 if (dev->flags & IFF_UP) 937 dev_deactivate(dev); 938 939 if (new && new->ops->attach) 940 goto skip; 941 942 for (i = 0; i < num_q; i++) { 943 struct netdev_queue *dev_queue = dev_ingress_queue(dev); 944 945 if (!ingress) 946 dev_queue = netdev_get_tx_queue(dev, i); 947 948 old = dev_graft_qdisc(dev_queue, new); 949 if (new && i > 0) 950 qdisc_refcount_inc(new); 951 952 if (!ingress) 953 qdisc_destroy(old); 954 } 955 956 skip: 957 if (!ingress) { 958 notify_and_destroy(net, skb, n, classid, 959 dev->qdisc, new); 960 if (new && !new->ops->attach) 961 qdisc_refcount_inc(new); 962 dev->qdisc = new ? : &noop_qdisc; 963 964 if (new && new->ops->attach) 965 new->ops->attach(new); 966 } else { 967 notify_and_destroy(net, skb, n, classid, old, new); 968 } 969 970 if (dev->flags & IFF_UP) 971 dev_activate(dev); 972 } else { 973 const struct Qdisc_class_ops *cops = parent->ops->cl_ops; 974 975 /* Only support running class lockless if parent is lockless */ 976 if (new && (new->flags & TCQ_F_NOLOCK) && 977 parent && !(parent->flags & TCQ_F_NOLOCK)) 978 new->flags &= ~TCQ_F_NOLOCK; 979 980 err = -EOPNOTSUPP; 981 if (cops && cops->graft) { 982 unsigned long cl = cops->find(parent, classid); 983 984 if (cl) { 985 err = cops->graft(parent, cl, new, &old, 986 extack); 987 } else { 988 NL_SET_ERR_MSG(extack, "Specified class not found"); 989 err = -ENOENT; 990 } 991 } 992 if (!err) 993 notify_and_destroy(net, skb, n, classid, old, new); 994 } 995 return err; 996 } 997 998 /* lockdep annotation is needed for ingress; egress gets it only for name */ 999 static struct lock_class_key qdisc_tx_lock; 1000 static struct lock_class_key qdisc_rx_lock; 1001 1002 /* 1003 Allocate and initialize new qdisc. 1004 1005 Parameters are passed via opt. 1006 */ 1007 1008 static struct Qdisc *qdisc_create(struct net_device *dev, 1009 struct netdev_queue *dev_queue, 1010 struct Qdisc *p, u32 parent, u32 handle, 1011 struct nlattr **tca, int *errp, 1012 struct netlink_ext_ack *extack) 1013 { 1014 int err; 1015 struct nlattr *kind = tca[TCA_KIND]; 1016 struct Qdisc *sch; 1017 struct Qdisc_ops *ops; 1018 struct qdisc_size_table *stab; 1019 1020 ops = qdisc_lookup_ops(kind); 1021 #ifdef CONFIG_MODULES 1022 if (ops == NULL && kind != NULL) { 1023 char name[IFNAMSIZ]; 1024 if (nla_strlcpy(name, kind, IFNAMSIZ) < IFNAMSIZ) { 1025 /* We dropped the RTNL semaphore in order to 1026 * perform the module load. So, even if we 1027 * succeeded in loading the module we have to 1028 * tell the caller to replay the request. We 1029 * indicate this using -EAGAIN. 1030 * We replay the request because the device may 1031 * go away in the mean time. 1032 */ 1033 rtnl_unlock(); 1034 request_module("sch_%s", name); 1035 rtnl_lock(); 1036 ops = qdisc_lookup_ops(kind); 1037 if (ops != NULL) { 1038 /* We will try again qdisc_lookup_ops, 1039 * so don't keep a reference. 1040 */ 1041 module_put(ops->owner); 1042 err = -EAGAIN; 1043 goto err_out; 1044 } 1045 } 1046 } 1047 #endif 1048 1049 err = -ENOENT; 1050 if (!ops) { 1051 NL_SET_ERR_MSG(extack, "Specified qdisc not found"); 1052 goto err_out; 1053 } 1054 1055 sch = qdisc_alloc(dev_queue, ops, extack); 1056 if (IS_ERR(sch)) { 1057 err = PTR_ERR(sch); 1058 goto err_out2; 1059 } 1060 1061 sch->parent = parent; 1062 1063 if (handle == TC_H_INGRESS) { 1064 sch->flags |= TCQ_F_INGRESS; 1065 handle = TC_H_MAKE(TC_H_INGRESS, 0); 1066 lockdep_set_class(qdisc_lock(sch), &qdisc_rx_lock); 1067 } else { 1068 if (handle == 0) { 1069 handle = qdisc_alloc_handle(dev); 1070 err = -ENOMEM; 1071 if (handle == 0) 1072 goto err_out3; 1073 } 1074 lockdep_set_class(qdisc_lock(sch), &qdisc_tx_lock); 1075 if (!netif_is_multiqueue(dev)) 1076 sch->flags |= TCQ_F_ONETXQUEUE; 1077 } 1078 1079 sch->handle = handle; 1080 1081 /* This exist to keep backward compatible with a userspace 1082 * loophole, what allowed userspace to get IFF_NO_QUEUE 1083 * facility on older kernels by setting tx_queue_len=0 (prior 1084 * to qdisc init), and then forgot to reinit tx_queue_len 1085 * before again attaching a qdisc. 1086 */ 1087 if ((dev->priv_flags & IFF_NO_QUEUE) && (dev->tx_queue_len == 0)) { 1088 dev->tx_queue_len = DEFAULT_TX_QUEUE_LEN; 1089 netdev_info(dev, "Caught tx_queue_len zero misconfig\n"); 1090 } 1091 1092 if (ops->init) { 1093 err = ops->init(sch, tca[TCA_OPTIONS], extack); 1094 if (err != 0) 1095 goto err_out5; 1096 } 1097 1098 if (qdisc_is_percpu_stats(sch)) { 1099 sch->cpu_bstats = 1100 netdev_alloc_pcpu_stats(struct gnet_stats_basic_cpu); 1101 if (!sch->cpu_bstats) 1102 goto err_out4; 1103 1104 sch->cpu_qstats = alloc_percpu(struct gnet_stats_queue); 1105 if (!sch->cpu_qstats) 1106 goto err_out4; 1107 } 1108 1109 if (tca[TCA_STAB]) { 1110 stab = qdisc_get_stab(tca[TCA_STAB], extack); 1111 if (IS_ERR(stab)) { 1112 err = PTR_ERR(stab); 1113 goto err_out4; 1114 } 1115 rcu_assign_pointer(sch->stab, stab); 1116 } 1117 if (tca[TCA_RATE]) { 1118 seqcount_t *running; 1119 1120 err = -EOPNOTSUPP; 1121 if (sch->flags & TCQ_F_MQROOT) { 1122 NL_SET_ERR_MSG(extack, "Cannot attach rate estimator to a multi-queue root qdisc"); 1123 goto err_out4; 1124 } 1125 1126 if (sch->parent != TC_H_ROOT && 1127 !(sch->flags & TCQ_F_INGRESS) && 1128 (!p || !(p->flags & TCQ_F_MQROOT))) 1129 running = qdisc_root_sleeping_running(sch); 1130 else 1131 running = &sch->running; 1132 1133 err = gen_new_estimator(&sch->bstats, 1134 sch->cpu_bstats, 1135 &sch->rate_est, 1136 NULL, 1137 running, 1138 tca[TCA_RATE]); 1139 if (err) { 1140 NL_SET_ERR_MSG(extack, "Failed to generate new estimator"); 1141 goto err_out4; 1142 } 1143 } 1144 1145 qdisc_hash_add(sch, false); 1146 1147 return sch; 1148 1149 err_out5: 1150 /* ops->init() failed, we call ->destroy() like qdisc_create_dflt() */ 1151 if (ops->destroy) 1152 ops->destroy(sch); 1153 err_out3: 1154 dev_put(dev); 1155 kfree((char *) sch - sch->padded); 1156 err_out2: 1157 module_put(ops->owner); 1158 err_out: 1159 *errp = err; 1160 return NULL; 1161 1162 err_out4: 1163 free_percpu(sch->cpu_bstats); 1164 free_percpu(sch->cpu_qstats); 1165 /* 1166 * Any broken qdiscs that would require a ops->reset() here? 1167 * The qdisc was never in action so it shouldn't be necessary. 1168 */ 1169 qdisc_put_stab(rtnl_dereference(sch->stab)); 1170 if (ops->destroy) 1171 ops->destroy(sch); 1172 goto err_out3; 1173 } 1174 1175 static int qdisc_change(struct Qdisc *sch, struct nlattr **tca, 1176 struct netlink_ext_ack *extack) 1177 { 1178 struct qdisc_size_table *ostab, *stab = NULL; 1179 int err = 0; 1180 1181 if (tca[TCA_OPTIONS]) { 1182 if (!sch->ops->change) { 1183 NL_SET_ERR_MSG(extack, "Change operation not supported by specified qdisc"); 1184 return -EINVAL; 1185 } 1186 err = sch->ops->change(sch, tca[TCA_OPTIONS], extack); 1187 if (err) 1188 return err; 1189 } 1190 1191 if (tca[TCA_STAB]) { 1192 stab = qdisc_get_stab(tca[TCA_STAB], extack); 1193 if (IS_ERR(stab)) 1194 return PTR_ERR(stab); 1195 } 1196 1197 ostab = rtnl_dereference(sch->stab); 1198 rcu_assign_pointer(sch->stab, stab); 1199 qdisc_put_stab(ostab); 1200 1201 if (tca[TCA_RATE]) { 1202 /* NB: ignores errors from replace_estimator 1203 because change can't be undone. */ 1204 if (sch->flags & TCQ_F_MQROOT) 1205 goto out; 1206 gen_replace_estimator(&sch->bstats, 1207 sch->cpu_bstats, 1208 &sch->rate_est, 1209 NULL, 1210 qdisc_root_sleeping_running(sch), 1211 tca[TCA_RATE]); 1212 } 1213 out: 1214 return 0; 1215 } 1216 1217 struct check_loop_arg { 1218 struct qdisc_walker w; 1219 struct Qdisc *p; 1220 int depth; 1221 }; 1222 1223 static int check_loop_fn(struct Qdisc *q, unsigned long cl, 1224 struct qdisc_walker *w); 1225 1226 static int check_loop(struct Qdisc *q, struct Qdisc *p, int depth) 1227 { 1228 struct check_loop_arg arg; 1229 1230 if (q->ops->cl_ops == NULL) 1231 return 0; 1232 1233 arg.w.stop = arg.w.skip = arg.w.count = 0; 1234 arg.w.fn = check_loop_fn; 1235 arg.depth = depth; 1236 arg.p = p; 1237 q->ops->cl_ops->walk(q, &arg.w); 1238 return arg.w.stop ? -ELOOP : 0; 1239 } 1240 1241 static int 1242 check_loop_fn(struct Qdisc *q, unsigned long cl, struct qdisc_walker *w) 1243 { 1244 struct Qdisc *leaf; 1245 const struct Qdisc_class_ops *cops = q->ops->cl_ops; 1246 struct check_loop_arg *arg = (struct check_loop_arg *)w; 1247 1248 leaf = cops->leaf(q, cl); 1249 if (leaf) { 1250 if (leaf == arg->p || arg->depth > 7) 1251 return -ELOOP; 1252 return check_loop(leaf, arg->p, arg->depth + 1); 1253 } 1254 return 0; 1255 } 1256 1257 /* 1258 * Delete/get qdisc. 1259 */ 1260 1261 static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, 1262 struct netlink_ext_ack *extack) 1263 { 1264 struct net *net = sock_net(skb->sk); 1265 struct tcmsg *tcm = nlmsg_data(n); 1266 struct nlattr *tca[TCA_MAX + 1]; 1267 struct net_device *dev; 1268 u32 clid; 1269 struct Qdisc *q = NULL; 1270 struct Qdisc *p = NULL; 1271 int err; 1272 1273 if ((n->nlmsg_type != RTM_GETQDISC) && 1274 !netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN)) 1275 return -EPERM; 1276 1277 err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL, extack); 1278 if (err < 0) 1279 return err; 1280 1281 dev = __dev_get_by_index(net, tcm->tcm_ifindex); 1282 if (!dev) 1283 return -ENODEV; 1284 1285 clid = tcm->tcm_parent; 1286 if (clid) { 1287 if (clid != TC_H_ROOT) { 1288 if (TC_H_MAJ(clid) != TC_H_MAJ(TC_H_INGRESS)) { 1289 p = qdisc_lookup(dev, TC_H_MAJ(clid)); 1290 if (!p) { 1291 NL_SET_ERR_MSG(extack, "Failed to find qdisc with specified classid"); 1292 return -ENOENT; 1293 } 1294 q = qdisc_leaf(p, clid); 1295 } else if (dev_ingress_queue(dev)) { 1296 q = dev_ingress_queue(dev)->qdisc_sleeping; 1297 } 1298 } else { 1299 q = dev->qdisc; 1300 } 1301 if (!q) { 1302 NL_SET_ERR_MSG(extack, "Cannot find specified qdisc on specified device"); 1303 return -ENOENT; 1304 } 1305 1306 if (tcm->tcm_handle && q->handle != tcm->tcm_handle) { 1307 NL_SET_ERR_MSG(extack, "Invalid handle"); 1308 return -EINVAL; 1309 } 1310 } else { 1311 q = qdisc_lookup(dev, tcm->tcm_handle); 1312 if (!q) { 1313 NL_SET_ERR_MSG(extack, "Failed to find qdisc with specified handle"); 1314 return -ENOENT; 1315 } 1316 } 1317 1318 if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id)) { 1319 NL_SET_ERR_MSG(extack, "Invalid qdisc name"); 1320 return -EINVAL; 1321 } 1322 1323 if (n->nlmsg_type == RTM_DELQDISC) { 1324 if (!clid) { 1325 NL_SET_ERR_MSG(extack, "Classid cannot be zero"); 1326 return -EINVAL; 1327 } 1328 if (q->handle == 0) { 1329 NL_SET_ERR_MSG(extack, "Cannot delete qdisc with handle of zero"); 1330 return -ENOENT; 1331 } 1332 err = qdisc_graft(dev, p, skb, n, clid, NULL, q, extack); 1333 if (err != 0) 1334 return err; 1335 } else { 1336 qdisc_notify(net, skb, n, clid, NULL, q); 1337 } 1338 return 0; 1339 } 1340 1341 /* 1342 * Create/change qdisc. 1343 */ 1344 1345 static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n, 1346 struct netlink_ext_ack *extack) 1347 { 1348 struct net *net = sock_net(skb->sk); 1349 struct tcmsg *tcm; 1350 struct nlattr *tca[TCA_MAX + 1]; 1351 struct net_device *dev; 1352 u32 clid; 1353 struct Qdisc *q, *p; 1354 int err; 1355 1356 if (!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN)) 1357 return -EPERM; 1358 1359 replay: 1360 /* Reinit, just in case something touches this. */ 1361 err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL, extack); 1362 if (err < 0) 1363 return err; 1364 1365 tcm = nlmsg_data(n); 1366 clid = tcm->tcm_parent; 1367 q = p = NULL; 1368 1369 dev = __dev_get_by_index(net, tcm->tcm_ifindex); 1370 if (!dev) 1371 return -ENODEV; 1372 1373 1374 if (clid) { 1375 if (clid != TC_H_ROOT) { 1376 if (clid != TC_H_INGRESS) { 1377 p = qdisc_lookup(dev, TC_H_MAJ(clid)); 1378 if (!p) { 1379 NL_SET_ERR_MSG(extack, "Failed to find specified qdisc"); 1380 return -ENOENT; 1381 } 1382 q = qdisc_leaf(p, clid); 1383 } else if (dev_ingress_queue_create(dev)) { 1384 q = dev_ingress_queue(dev)->qdisc_sleeping; 1385 } 1386 } else { 1387 q = dev->qdisc; 1388 } 1389 1390 /* It may be default qdisc, ignore it */ 1391 if (q && q->handle == 0) 1392 q = NULL; 1393 1394 if (!q || !tcm->tcm_handle || q->handle != tcm->tcm_handle) { 1395 if (tcm->tcm_handle) { 1396 if (q && !(n->nlmsg_flags & NLM_F_REPLACE)) { 1397 NL_SET_ERR_MSG(extack, "NLM_F_REPLACE needed to override"); 1398 return -EEXIST; 1399 } 1400 if (TC_H_MIN(tcm->tcm_handle)) { 1401 NL_SET_ERR_MSG(extack, "Invalid minor handle"); 1402 return -EINVAL; 1403 } 1404 q = qdisc_lookup(dev, tcm->tcm_handle); 1405 if (!q) 1406 goto create_n_graft; 1407 if (n->nlmsg_flags & NLM_F_EXCL) { 1408 NL_SET_ERR_MSG(extack, "Exclusivity flag on, cannot override"); 1409 return -EEXIST; 1410 } 1411 if (tca[TCA_KIND] && 1412 nla_strcmp(tca[TCA_KIND], q->ops->id)) { 1413 NL_SET_ERR_MSG(extack, "Invalid qdisc name"); 1414 return -EINVAL; 1415 } 1416 if (q == p || 1417 (p && check_loop(q, p, 0))) { 1418 NL_SET_ERR_MSG(extack, "Qdisc parent/child loop detected"); 1419 return -ELOOP; 1420 } 1421 qdisc_refcount_inc(q); 1422 goto graft; 1423 } else { 1424 if (!q) 1425 goto create_n_graft; 1426 1427 /* This magic test requires explanation. 1428 * 1429 * We know, that some child q is already 1430 * attached to this parent and have choice: 1431 * either to change it or to create/graft new one. 1432 * 1433 * 1. We are allowed to create/graft only 1434 * if CREATE and REPLACE flags are set. 1435 * 1436 * 2. If EXCL is set, requestor wanted to say, 1437 * that qdisc tcm_handle is not expected 1438 * to exist, so that we choose create/graft too. 1439 * 1440 * 3. The last case is when no flags are set. 1441 * Alas, it is sort of hole in API, we 1442 * cannot decide what to do unambiguously. 1443 * For now we select create/graft, if 1444 * user gave KIND, which does not match existing. 1445 */ 1446 if ((n->nlmsg_flags & NLM_F_CREATE) && 1447 (n->nlmsg_flags & NLM_F_REPLACE) && 1448 ((n->nlmsg_flags & NLM_F_EXCL) || 1449 (tca[TCA_KIND] && 1450 nla_strcmp(tca[TCA_KIND], q->ops->id)))) 1451 goto create_n_graft; 1452 } 1453 } 1454 } else { 1455 if (!tcm->tcm_handle) { 1456 NL_SET_ERR_MSG(extack, "Handle cannot be zero"); 1457 return -EINVAL; 1458 } 1459 q = qdisc_lookup(dev, tcm->tcm_handle); 1460 } 1461 1462 /* Change qdisc parameters */ 1463 if (!q) { 1464 NL_SET_ERR_MSG(extack, "Specified qdisc not found"); 1465 return -ENOENT; 1466 } 1467 if (n->nlmsg_flags & NLM_F_EXCL) { 1468 NL_SET_ERR_MSG(extack, "Exclusivity flag on, cannot modify"); 1469 return -EEXIST; 1470 } 1471 if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id)) { 1472 NL_SET_ERR_MSG(extack, "Invalid qdisc name"); 1473 return -EINVAL; 1474 } 1475 err = qdisc_change(q, tca, extack); 1476 if (err == 0) 1477 qdisc_notify(net, skb, n, clid, NULL, q); 1478 return err; 1479 1480 create_n_graft: 1481 if (!(n->nlmsg_flags & NLM_F_CREATE)) { 1482 NL_SET_ERR_MSG(extack, "Qdisc not found. To create specify NLM_F_CREATE flag"); 1483 return -ENOENT; 1484 } 1485 if (clid == TC_H_INGRESS) { 1486 if (dev_ingress_queue(dev)) { 1487 q = qdisc_create(dev, dev_ingress_queue(dev), p, 1488 tcm->tcm_parent, tcm->tcm_parent, 1489 tca, &err, extack); 1490 } else { 1491 NL_SET_ERR_MSG(extack, "Cannot find ingress queue for specified device"); 1492 err = -ENOENT; 1493 } 1494 } else { 1495 struct netdev_queue *dev_queue; 1496 1497 if (p && p->ops->cl_ops && p->ops->cl_ops->select_queue) 1498 dev_queue = p->ops->cl_ops->select_queue(p, tcm); 1499 else if (p) 1500 dev_queue = p->dev_queue; 1501 else 1502 dev_queue = netdev_get_tx_queue(dev, 0); 1503 1504 q = qdisc_create(dev, dev_queue, p, 1505 tcm->tcm_parent, tcm->tcm_handle, 1506 tca, &err, extack); 1507 } 1508 if (q == NULL) { 1509 if (err == -EAGAIN) 1510 goto replay; 1511 return err; 1512 } 1513 1514 graft: 1515 err = qdisc_graft(dev, p, skb, n, clid, q, NULL, extack); 1516 if (err) { 1517 if (q) 1518 qdisc_destroy(q); 1519 return err; 1520 } 1521 1522 return 0; 1523 } 1524 1525 static int tc_dump_qdisc_root(struct Qdisc *root, struct sk_buff *skb, 1526 struct netlink_callback *cb, 1527 int *q_idx_p, int s_q_idx, bool recur, 1528 bool dump_invisible) 1529 { 1530 int ret = 0, q_idx = *q_idx_p; 1531 struct Qdisc *q; 1532 int b; 1533 1534 if (!root) 1535 return 0; 1536 1537 q = root; 1538 if (q_idx < s_q_idx) { 1539 q_idx++; 1540 } else { 1541 if (!tc_qdisc_dump_ignore(q, dump_invisible) && 1542 tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).portid, 1543 cb->nlh->nlmsg_seq, NLM_F_MULTI, 1544 RTM_NEWQDISC) <= 0) 1545 goto done; 1546 q_idx++; 1547 } 1548 1549 /* If dumping singletons, there is no qdisc_dev(root) and the singleton 1550 * itself has already been dumped. 1551 * 1552 * If we've already dumped the top-level (ingress) qdisc above and the global 1553 * qdisc hashtable, we don't want to hit it again 1554 */ 1555 if (!qdisc_dev(root) || !recur) 1556 goto out; 1557 1558 hash_for_each(qdisc_dev(root)->qdisc_hash, b, q, hash) { 1559 if (q_idx < s_q_idx) { 1560 q_idx++; 1561 continue; 1562 } 1563 if (!tc_qdisc_dump_ignore(q, dump_invisible) && 1564 tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).portid, 1565 cb->nlh->nlmsg_seq, NLM_F_MULTI, 1566 RTM_NEWQDISC) <= 0) 1567 goto done; 1568 q_idx++; 1569 } 1570 1571 out: 1572 *q_idx_p = q_idx; 1573 return ret; 1574 done: 1575 ret = -1; 1576 goto out; 1577 } 1578 1579 static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb) 1580 { 1581 struct net *net = sock_net(skb->sk); 1582 int idx, q_idx; 1583 int s_idx, s_q_idx; 1584 struct net_device *dev; 1585 const struct nlmsghdr *nlh = cb->nlh; 1586 struct nlattr *tca[TCA_MAX + 1]; 1587 int err; 1588 1589 s_idx = cb->args[0]; 1590 s_q_idx = q_idx = cb->args[1]; 1591 1592 idx = 0; 1593 ASSERT_RTNL(); 1594 1595 err = nlmsg_parse(nlh, sizeof(struct tcmsg), tca, TCA_MAX, NULL, NULL); 1596 if (err < 0) 1597 return err; 1598 1599 for_each_netdev(net, dev) { 1600 struct netdev_queue *dev_queue; 1601 1602 if (idx < s_idx) 1603 goto cont; 1604 if (idx > s_idx) 1605 s_q_idx = 0; 1606 q_idx = 0; 1607 1608 if (tc_dump_qdisc_root(dev->qdisc, skb, cb, &q_idx, s_q_idx, 1609 true, tca[TCA_DUMP_INVISIBLE]) < 0) 1610 goto done; 1611 1612 dev_queue = dev_ingress_queue(dev); 1613 if (dev_queue && 1614 tc_dump_qdisc_root(dev_queue->qdisc_sleeping, skb, cb, 1615 &q_idx, s_q_idx, false, 1616 tca[TCA_DUMP_INVISIBLE]) < 0) 1617 goto done; 1618 1619 cont: 1620 idx++; 1621 } 1622 1623 done: 1624 cb->args[0] = idx; 1625 cb->args[1] = q_idx; 1626 1627 return skb->len; 1628 } 1629 1630 1631 1632 /************************************************ 1633 * Traffic classes manipulation. * 1634 ************************************************/ 1635 1636 static int tc_fill_tclass(struct sk_buff *skb, struct Qdisc *q, 1637 unsigned long cl, 1638 u32 portid, u32 seq, u16 flags, int event) 1639 { 1640 struct tcmsg *tcm; 1641 struct nlmsghdr *nlh; 1642 unsigned char *b = skb_tail_pointer(skb); 1643 struct gnet_dump d; 1644 const struct Qdisc_class_ops *cl_ops = q->ops->cl_ops; 1645 1646 cond_resched(); 1647 nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags); 1648 if (!nlh) 1649 goto out_nlmsg_trim; 1650 tcm = nlmsg_data(nlh); 1651 tcm->tcm_family = AF_UNSPEC; 1652 tcm->tcm__pad1 = 0; 1653 tcm->tcm__pad2 = 0; 1654 tcm->tcm_ifindex = qdisc_dev(q)->ifindex; 1655 tcm->tcm_parent = q->handle; 1656 tcm->tcm_handle = q->handle; 1657 tcm->tcm_info = 0; 1658 if (nla_put_string(skb, TCA_KIND, q->ops->id)) 1659 goto nla_put_failure; 1660 if (cl_ops->dump && cl_ops->dump(q, cl, skb, tcm) < 0) 1661 goto nla_put_failure; 1662 1663 if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, TCA_XSTATS, 1664 NULL, &d, TCA_PAD) < 0) 1665 goto nla_put_failure; 1666 1667 if (cl_ops->dump_stats && cl_ops->dump_stats(q, cl, &d) < 0) 1668 goto nla_put_failure; 1669 1670 if (gnet_stats_finish_copy(&d) < 0) 1671 goto nla_put_failure; 1672 1673 nlh->nlmsg_len = skb_tail_pointer(skb) - b; 1674 return skb->len; 1675 1676 out_nlmsg_trim: 1677 nla_put_failure: 1678 nlmsg_trim(skb, b); 1679 return -1; 1680 } 1681 1682 static int tclass_notify(struct net *net, struct sk_buff *oskb, 1683 struct nlmsghdr *n, struct Qdisc *q, 1684 unsigned long cl, int event) 1685 { 1686 struct sk_buff *skb; 1687 u32 portid = oskb ? NETLINK_CB(oskb).portid : 0; 1688 1689 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); 1690 if (!skb) 1691 return -ENOBUFS; 1692 1693 if (tc_fill_tclass(skb, q, cl, portid, n->nlmsg_seq, 0, event) < 0) { 1694 kfree_skb(skb); 1695 return -EINVAL; 1696 } 1697 1698 return rtnetlink_send(skb, net, portid, RTNLGRP_TC, 1699 n->nlmsg_flags & NLM_F_ECHO); 1700 } 1701 1702 static int tclass_del_notify(struct net *net, 1703 const struct Qdisc_class_ops *cops, 1704 struct sk_buff *oskb, struct nlmsghdr *n, 1705 struct Qdisc *q, unsigned long cl) 1706 { 1707 u32 portid = oskb ? NETLINK_CB(oskb).portid : 0; 1708 struct sk_buff *skb; 1709 int err = 0; 1710 1711 if (!cops->delete) 1712 return -EOPNOTSUPP; 1713 1714 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); 1715 if (!skb) 1716 return -ENOBUFS; 1717 1718 if (tc_fill_tclass(skb, q, cl, portid, n->nlmsg_seq, 0, 1719 RTM_DELTCLASS) < 0) { 1720 kfree_skb(skb); 1721 return -EINVAL; 1722 } 1723 1724 err = cops->delete(q, cl); 1725 if (err) { 1726 kfree_skb(skb); 1727 return err; 1728 } 1729 1730 return rtnetlink_send(skb, net, portid, RTNLGRP_TC, 1731 n->nlmsg_flags & NLM_F_ECHO); 1732 } 1733 1734 #ifdef CONFIG_NET_CLS 1735 1736 struct tcf_bind_args { 1737 struct tcf_walker w; 1738 u32 classid; 1739 unsigned long cl; 1740 }; 1741 1742 static int tcf_node_bind(struct tcf_proto *tp, void *n, struct tcf_walker *arg) 1743 { 1744 struct tcf_bind_args *a = (void *)arg; 1745 1746 if (tp->ops->bind_class) { 1747 struct Qdisc *q = tcf_block_q(tp->chain->block); 1748 1749 sch_tree_lock(q); 1750 tp->ops->bind_class(n, a->classid, a->cl); 1751 sch_tree_unlock(q); 1752 } 1753 return 0; 1754 } 1755 1756 static void tc_bind_tclass(struct Qdisc *q, u32 portid, u32 clid, 1757 unsigned long new_cl) 1758 { 1759 const struct Qdisc_class_ops *cops = q->ops->cl_ops; 1760 struct tcf_block *block; 1761 struct tcf_chain *chain; 1762 unsigned long cl; 1763 1764 cl = cops->find(q, portid); 1765 if (!cl) 1766 return; 1767 block = cops->tcf_block(q, cl, NULL); 1768 if (!block) 1769 return; 1770 list_for_each_entry(chain, &block->chain_list, list) { 1771 struct tcf_proto *tp; 1772 1773 for (tp = rtnl_dereference(chain->filter_chain); 1774 tp; tp = rtnl_dereference(tp->next)) { 1775 struct tcf_bind_args arg = {}; 1776 1777 arg.w.fn = tcf_node_bind; 1778 arg.classid = clid; 1779 arg.cl = new_cl; 1780 tp->ops->walk(tp, &arg.w); 1781 } 1782 } 1783 } 1784 1785 #else 1786 1787 static void tc_bind_tclass(struct Qdisc *q, u32 portid, u32 clid, 1788 unsigned long new_cl) 1789 { 1790 } 1791 1792 #endif 1793 1794 static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, 1795 struct netlink_ext_ack *extack) 1796 { 1797 struct net *net = sock_net(skb->sk); 1798 struct tcmsg *tcm = nlmsg_data(n); 1799 struct nlattr *tca[TCA_MAX + 1]; 1800 struct net_device *dev; 1801 struct Qdisc *q = NULL; 1802 const struct Qdisc_class_ops *cops; 1803 unsigned long cl = 0; 1804 unsigned long new_cl; 1805 u32 portid; 1806 u32 clid; 1807 u32 qid; 1808 int err; 1809 1810 if ((n->nlmsg_type != RTM_GETTCLASS) && 1811 !netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN)) 1812 return -EPERM; 1813 1814 err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL, extack); 1815 if (err < 0) 1816 return err; 1817 1818 dev = __dev_get_by_index(net, tcm->tcm_ifindex); 1819 if (!dev) 1820 return -ENODEV; 1821 1822 /* 1823 parent == TC_H_UNSPEC - unspecified parent. 1824 parent == TC_H_ROOT - class is root, which has no parent. 1825 parent == X:0 - parent is root class. 1826 parent == X:Y - parent is a node in hierarchy. 1827 parent == 0:Y - parent is X:Y, where X:0 is qdisc. 1828 1829 handle == 0:0 - generate handle from kernel pool. 1830 handle == 0:Y - class is X:Y, where X:0 is qdisc. 1831 handle == X:Y - clear. 1832 handle == X:0 - root class. 1833 */ 1834 1835 /* Step 1. Determine qdisc handle X:0 */ 1836 1837 portid = tcm->tcm_parent; 1838 clid = tcm->tcm_handle; 1839 qid = TC_H_MAJ(clid); 1840 1841 if (portid != TC_H_ROOT) { 1842 u32 qid1 = TC_H_MAJ(portid); 1843 1844 if (qid && qid1) { 1845 /* If both majors are known, they must be identical. */ 1846 if (qid != qid1) 1847 return -EINVAL; 1848 } else if (qid1) { 1849 qid = qid1; 1850 } else if (qid == 0) 1851 qid = dev->qdisc->handle; 1852 1853 /* Now qid is genuine qdisc handle consistent 1854 * both with parent and child. 1855 * 1856 * TC_H_MAJ(portid) still may be unspecified, complete it now. 1857 */ 1858 if (portid) 1859 portid = TC_H_MAKE(qid, portid); 1860 } else { 1861 if (qid == 0) 1862 qid = dev->qdisc->handle; 1863 } 1864 1865 /* OK. Locate qdisc */ 1866 q = qdisc_lookup(dev, qid); 1867 if (!q) 1868 return -ENOENT; 1869 1870 /* An check that it supports classes */ 1871 cops = q->ops->cl_ops; 1872 if (cops == NULL) 1873 return -EINVAL; 1874 1875 /* Now try to get class */ 1876 if (clid == 0) { 1877 if (portid == TC_H_ROOT) 1878 clid = qid; 1879 } else 1880 clid = TC_H_MAKE(qid, clid); 1881 1882 if (clid) 1883 cl = cops->find(q, clid); 1884 1885 if (cl == 0) { 1886 err = -ENOENT; 1887 if (n->nlmsg_type != RTM_NEWTCLASS || 1888 !(n->nlmsg_flags & NLM_F_CREATE)) 1889 goto out; 1890 } else { 1891 switch (n->nlmsg_type) { 1892 case RTM_NEWTCLASS: 1893 err = -EEXIST; 1894 if (n->nlmsg_flags & NLM_F_EXCL) 1895 goto out; 1896 break; 1897 case RTM_DELTCLASS: 1898 err = tclass_del_notify(net, cops, skb, n, q, cl); 1899 /* Unbind the class with flilters with 0 */ 1900 tc_bind_tclass(q, portid, clid, 0); 1901 goto out; 1902 case RTM_GETTCLASS: 1903 err = tclass_notify(net, skb, n, q, cl, RTM_NEWTCLASS); 1904 goto out; 1905 default: 1906 err = -EINVAL; 1907 goto out; 1908 } 1909 } 1910 1911 new_cl = cl; 1912 err = -EOPNOTSUPP; 1913 if (cops->change) 1914 err = cops->change(q, clid, portid, tca, &new_cl, extack); 1915 if (err == 0) { 1916 tclass_notify(net, skb, n, q, new_cl, RTM_NEWTCLASS); 1917 /* We just create a new class, need to do reverse binding. */ 1918 if (cl != new_cl) 1919 tc_bind_tclass(q, portid, clid, new_cl); 1920 } 1921 out: 1922 return err; 1923 } 1924 1925 struct qdisc_dump_args { 1926 struct qdisc_walker w; 1927 struct sk_buff *skb; 1928 struct netlink_callback *cb; 1929 }; 1930 1931 static int qdisc_class_dump(struct Qdisc *q, unsigned long cl, 1932 struct qdisc_walker *arg) 1933 { 1934 struct qdisc_dump_args *a = (struct qdisc_dump_args *)arg; 1935 1936 return tc_fill_tclass(a->skb, q, cl, NETLINK_CB(a->cb->skb).portid, 1937 a->cb->nlh->nlmsg_seq, NLM_F_MULTI, 1938 RTM_NEWTCLASS); 1939 } 1940 1941 static int tc_dump_tclass_qdisc(struct Qdisc *q, struct sk_buff *skb, 1942 struct tcmsg *tcm, struct netlink_callback *cb, 1943 int *t_p, int s_t) 1944 { 1945 struct qdisc_dump_args arg; 1946 1947 if (tc_qdisc_dump_ignore(q, false) || 1948 *t_p < s_t || !q->ops->cl_ops || 1949 (tcm->tcm_parent && 1950 TC_H_MAJ(tcm->tcm_parent) != q->handle)) { 1951 (*t_p)++; 1952 return 0; 1953 } 1954 if (*t_p > s_t) 1955 memset(&cb->args[1], 0, sizeof(cb->args)-sizeof(cb->args[0])); 1956 arg.w.fn = qdisc_class_dump; 1957 arg.skb = skb; 1958 arg.cb = cb; 1959 arg.w.stop = 0; 1960 arg.w.skip = cb->args[1]; 1961 arg.w.count = 0; 1962 q->ops->cl_ops->walk(q, &arg.w); 1963 cb->args[1] = arg.w.count; 1964 if (arg.w.stop) 1965 return -1; 1966 (*t_p)++; 1967 return 0; 1968 } 1969 1970 static int tc_dump_tclass_root(struct Qdisc *root, struct sk_buff *skb, 1971 struct tcmsg *tcm, struct netlink_callback *cb, 1972 int *t_p, int s_t) 1973 { 1974 struct Qdisc *q; 1975 int b; 1976 1977 if (!root) 1978 return 0; 1979 1980 if (tc_dump_tclass_qdisc(root, skb, tcm, cb, t_p, s_t) < 0) 1981 return -1; 1982 1983 if (!qdisc_dev(root)) 1984 return 0; 1985 1986 if (tcm->tcm_parent) { 1987 q = qdisc_match_from_root(root, TC_H_MAJ(tcm->tcm_parent)); 1988 if (q && tc_dump_tclass_qdisc(q, skb, tcm, cb, t_p, s_t) < 0) 1989 return -1; 1990 return 0; 1991 } 1992 hash_for_each(qdisc_dev(root)->qdisc_hash, b, q, hash) { 1993 if (tc_dump_tclass_qdisc(q, skb, tcm, cb, t_p, s_t) < 0) 1994 return -1; 1995 } 1996 1997 return 0; 1998 } 1999 2000 static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb) 2001 { 2002 struct tcmsg *tcm = nlmsg_data(cb->nlh); 2003 struct net *net = sock_net(skb->sk); 2004 struct netdev_queue *dev_queue; 2005 struct net_device *dev; 2006 int t, s_t; 2007 2008 if (nlmsg_len(cb->nlh) < sizeof(*tcm)) 2009 return 0; 2010 dev = dev_get_by_index(net, tcm->tcm_ifindex); 2011 if (!dev) 2012 return 0; 2013 2014 s_t = cb->args[0]; 2015 t = 0; 2016 2017 if (tc_dump_tclass_root(dev->qdisc, skb, tcm, cb, &t, s_t) < 0) 2018 goto done; 2019 2020 dev_queue = dev_ingress_queue(dev); 2021 if (dev_queue && 2022 tc_dump_tclass_root(dev_queue->qdisc_sleeping, skb, tcm, cb, 2023 &t, s_t) < 0) 2024 goto done; 2025 2026 done: 2027 cb->args[0] = t; 2028 2029 dev_put(dev); 2030 return skb->len; 2031 } 2032 2033 #ifdef CONFIG_PROC_FS 2034 static int psched_show(struct seq_file *seq, void *v) 2035 { 2036 seq_printf(seq, "%08x %08x %08x %08x\n", 2037 (u32)NSEC_PER_USEC, (u32)PSCHED_TICKS2NS(1), 2038 1000000, 2039 (u32)NSEC_PER_SEC / hrtimer_resolution); 2040 2041 return 0; 2042 } 2043 2044 static int psched_open(struct inode *inode, struct file *file) 2045 { 2046 return single_open(file, psched_show, NULL); 2047 } 2048 2049 static const struct file_operations psched_fops = { 2050 .owner = THIS_MODULE, 2051 .open = psched_open, 2052 .read = seq_read, 2053 .llseek = seq_lseek, 2054 .release = single_release, 2055 }; 2056 2057 static int __net_init psched_net_init(struct net *net) 2058 { 2059 struct proc_dir_entry *e; 2060 2061 e = proc_create("psched", 0, net->proc_net, &psched_fops); 2062 if (e == NULL) 2063 return -ENOMEM; 2064 2065 return 0; 2066 } 2067 2068 static void __net_exit psched_net_exit(struct net *net) 2069 { 2070 remove_proc_entry("psched", net->proc_net); 2071 } 2072 #else 2073 static int __net_init psched_net_init(struct net *net) 2074 { 2075 return 0; 2076 } 2077 2078 static void __net_exit psched_net_exit(struct net *net) 2079 { 2080 } 2081 #endif 2082 2083 static struct pernet_operations psched_net_ops = { 2084 .init = psched_net_init, 2085 .exit = psched_net_exit, 2086 }; 2087 2088 static int __init pktsched_init(void) 2089 { 2090 int err; 2091 2092 err = register_pernet_subsys(&psched_net_ops); 2093 if (err) { 2094 pr_err("pktsched_init: " 2095 "cannot initialize per netns operations\n"); 2096 return err; 2097 } 2098 2099 register_qdisc(&pfifo_fast_ops); 2100 register_qdisc(&pfifo_qdisc_ops); 2101 register_qdisc(&bfifo_qdisc_ops); 2102 register_qdisc(&pfifo_head_drop_qdisc_ops); 2103 register_qdisc(&mq_qdisc_ops); 2104 register_qdisc(&noqueue_qdisc_ops); 2105 2106 rtnl_register(PF_UNSPEC, RTM_NEWQDISC, tc_modify_qdisc, NULL, 0); 2107 rtnl_register(PF_UNSPEC, RTM_DELQDISC, tc_get_qdisc, NULL, 0); 2108 rtnl_register(PF_UNSPEC, RTM_GETQDISC, tc_get_qdisc, tc_dump_qdisc, 2109 0); 2110 rtnl_register(PF_UNSPEC, RTM_NEWTCLASS, tc_ctl_tclass, NULL, 0); 2111 rtnl_register(PF_UNSPEC, RTM_DELTCLASS, tc_ctl_tclass, NULL, 0); 2112 rtnl_register(PF_UNSPEC, RTM_GETTCLASS, tc_ctl_tclass, tc_dump_tclass, 2113 0); 2114 2115 return 0; 2116 } 2117 2118 subsys_initcall(pktsched_init); 2119