1 /* 2 * net/sched/sch_api.c Packet scheduler API. 3 * 4 * This program is free software; you can redistribute it and/or 5 * modify it under the terms of the GNU General Public License 6 * as published by the Free Software Foundation; either version 7 * 2 of the License, or (at your option) any later version. 8 * 9 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> 10 * 11 * Fixes: 12 * 13 * Rani Assaf <rani@magic.metawire.com> :980802: JIFFIES and CPU clock sources are repaired. 14 * Eduardo J. Blanco <ejbs@netlabs.com.uy> :990222: kmod support 15 * Jamal Hadi Salim <hadi@nortelnetworks.com>: 990601: ingress support 16 */ 17 18 #include <linux/module.h> 19 #include <linux/types.h> 20 #include <linux/kernel.h> 21 #include <linux/string.h> 22 #include <linux/errno.h> 23 #include <linux/skbuff.h> 24 #include <linux/init.h> 25 #include <linux/proc_fs.h> 26 #include <linux/seq_file.h> 27 #include <linux/kmod.h> 28 #include <linux/list.h> 29 #include <linux/hrtimer.h> 30 #include <linux/lockdep.h> 31 #include <linux/slab.h> 32 #include <linux/hashtable.h> 33 34 #include <net/net_namespace.h> 35 #include <net/sock.h> 36 #include <net/netlink.h> 37 #include <net/pkt_sched.h> 38 #include <net/pkt_cls.h> 39 40 /* 41 42 Short review. 43 ------------- 44 45 This file consists of two interrelated parts: 46 47 1. queueing disciplines manager frontend. 48 2. traffic classes manager frontend. 49 50 Generally, queueing discipline ("qdisc") is a black box, 51 which is able to enqueue packets and to dequeue them (when 52 device is ready to send something) in order and at times 53 determined by algorithm hidden in it. 54 55 qdisc's are divided to two categories: 56 - "queues", which have no internal structure visible from outside. 57 - "schedulers", which split all the packets to "traffic classes", 58 using "packet classifiers" (look at cls_api.c) 59 60 In turn, classes may have child qdiscs (as rule, queues) 61 attached to them etc. etc. etc. 62 63 The goal of the routines in this file is to translate 64 information supplied by user in the form of handles 65 to more intelligible for kernel form, to make some sanity 66 checks and part of work, which is common to all qdiscs 67 and to provide rtnetlink notifications. 68 69 All real intelligent work is done inside qdisc modules. 70 71 72 73 Every discipline has two major routines: enqueue and dequeue. 74 75 ---dequeue 76 77 dequeue usually returns a skb to send. It is allowed to return NULL, 78 but it does not mean that queue is empty, it just means that 79 discipline does not want to send anything this time. 80 Queue is really empty if q->q.qlen == 0. 81 For complicated disciplines with multiple queues q->q is not 82 real packet queue, but however q->q.qlen must be valid. 83 84 ---enqueue 85 86 enqueue returns 0, if packet was enqueued successfully. 87 If packet (this one or another one) was dropped, it returns 88 not zero error code. 89 NET_XMIT_DROP - this packet dropped 90 Expected action: do not backoff, but wait until queue will clear. 91 NET_XMIT_CN - probably this packet enqueued, but another one dropped. 92 Expected action: backoff or ignore 93 94 Auxiliary routines: 95 96 ---peek 97 98 like dequeue but without removing a packet from the queue 99 100 ---reset 101 102 returns qdisc to initial state: purge all buffers, clear all 103 timers, counters (except for statistics) etc. 104 105 ---init 106 107 initializes newly created qdisc. 108 109 ---destroy 110 111 destroys resources allocated by init and during lifetime of qdisc. 112 113 ---change 114 115 changes qdisc parameters. 116 */ 117 118 /* Protects list of registered TC modules. It is pure SMP lock. */ 119 static DEFINE_RWLOCK(qdisc_mod_lock); 120 121 122 /************************************************ 123 * Queueing disciplines manipulation. * 124 ************************************************/ 125 126 127 /* The list of all installed queueing disciplines. */ 128 129 static struct Qdisc_ops *qdisc_base; 130 131 /* Register/unregister queueing discipline */ 132 133 int register_qdisc(struct Qdisc_ops *qops) 134 { 135 struct Qdisc_ops *q, **qp; 136 int rc = -EEXIST; 137 138 write_lock(&qdisc_mod_lock); 139 for (qp = &qdisc_base; (q = *qp) != NULL; qp = &q->next) 140 if (!strcmp(qops->id, q->id)) 141 goto out; 142 143 if (qops->enqueue == NULL) 144 qops->enqueue = noop_qdisc_ops.enqueue; 145 if (qops->peek == NULL) { 146 if (qops->dequeue == NULL) 147 qops->peek = noop_qdisc_ops.peek; 148 else 149 goto out_einval; 150 } 151 if (qops->dequeue == NULL) 152 qops->dequeue = noop_qdisc_ops.dequeue; 153 154 if (qops->cl_ops) { 155 const struct Qdisc_class_ops *cops = qops->cl_ops; 156 157 if (!(cops->find && cops->walk && cops->leaf)) 158 goto out_einval; 159 160 if (cops->tcf_block && !(cops->bind_tcf && cops->unbind_tcf)) 161 goto out_einval; 162 } 163 164 qops->next = NULL; 165 *qp = qops; 166 rc = 0; 167 out: 168 write_unlock(&qdisc_mod_lock); 169 return rc; 170 171 out_einval: 172 rc = -EINVAL; 173 goto out; 174 } 175 EXPORT_SYMBOL(register_qdisc); 176 177 int unregister_qdisc(struct Qdisc_ops *qops) 178 { 179 struct Qdisc_ops *q, **qp; 180 int err = -ENOENT; 181 182 write_lock(&qdisc_mod_lock); 183 for (qp = &qdisc_base; (q = *qp) != NULL; qp = &q->next) 184 if (q == qops) 185 break; 186 if (q) { 187 *qp = q->next; 188 q->next = NULL; 189 err = 0; 190 } 191 write_unlock(&qdisc_mod_lock); 192 return err; 193 } 194 EXPORT_SYMBOL(unregister_qdisc); 195 196 /* Get default qdisc if not otherwise specified */ 197 void qdisc_get_default(char *name, size_t len) 198 { 199 read_lock(&qdisc_mod_lock); 200 strlcpy(name, default_qdisc_ops->id, len); 201 read_unlock(&qdisc_mod_lock); 202 } 203 204 static struct Qdisc_ops *qdisc_lookup_default(const char *name) 205 { 206 struct Qdisc_ops *q = NULL; 207 208 for (q = qdisc_base; q; q = q->next) { 209 if (!strcmp(name, q->id)) { 210 if (!try_module_get(q->owner)) 211 q = NULL; 212 break; 213 } 214 } 215 216 return q; 217 } 218 219 /* Set new default qdisc to use */ 220 int qdisc_set_default(const char *name) 221 { 222 const struct Qdisc_ops *ops; 223 224 if (!capable(CAP_NET_ADMIN)) 225 return -EPERM; 226 227 write_lock(&qdisc_mod_lock); 228 ops = qdisc_lookup_default(name); 229 if (!ops) { 230 /* Not found, drop lock and try to load module */ 231 write_unlock(&qdisc_mod_lock); 232 request_module("sch_%s", name); 233 write_lock(&qdisc_mod_lock); 234 235 ops = qdisc_lookup_default(name); 236 } 237 238 if (ops) { 239 /* Set new default */ 240 module_put(default_qdisc_ops->owner); 241 default_qdisc_ops = ops; 242 } 243 write_unlock(&qdisc_mod_lock); 244 245 return ops ? 0 : -ENOENT; 246 } 247 248 #ifdef CONFIG_NET_SCH_DEFAULT 249 /* Set default value from kernel config */ 250 static int __init sch_default_qdisc(void) 251 { 252 return qdisc_set_default(CONFIG_DEFAULT_NET_SCH); 253 } 254 late_initcall(sch_default_qdisc); 255 #endif 256 257 /* We know handle. Find qdisc among all qdisc's attached to device 258 * (root qdisc, all its children, children of children etc.) 259 * Note: caller either uses rtnl or rcu_read_lock() 260 */ 261 262 static struct Qdisc *qdisc_match_from_root(struct Qdisc *root, u32 handle) 263 { 264 struct Qdisc *q; 265 266 if (!qdisc_dev(root)) 267 return (root->handle == handle ? root : NULL); 268 269 if (!(root->flags & TCQ_F_BUILTIN) && 270 root->handle == handle) 271 return root; 272 273 hash_for_each_possible_rcu(qdisc_dev(root)->qdisc_hash, q, hash, handle) { 274 if (q->handle == handle) 275 return q; 276 } 277 return NULL; 278 } 279 280 void qdisc_hash_add(struct Qdisc *q, bool invisible) 281 { 282 if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) { 283 ASSERT_RTNL(); 284 hash_add_rcu(qdisc_dev(q)->qdisc_hash, &q->hash, q->handle); 285 if (invisible) 286 q->flags |= TCQ_F_INVISIBLE; 287 } 288 } 289 EXPORT_SYMBOL(qdisc_hash_add); 290 291 void qdisc_hash_del(struct Qdisc *q) 292 { 293 if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) { 294 ASSERT_RTNL(); 295 hash_del_rcu(&q->hash); 296 } 297 } 298 EXPORT_SYMBOL(qdisc_hash_del); 299 300 struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle) 301 { 302 struct Qdisc *q; 303 304 if (!handle) 305 return NULL; 306 q = qdisc_match_from_root(dev->qdisc, handle); 307 if (q) 308 goto out; 309 310 if (dev_ingress_queue(dev)) 311 q = qdisc_match_from_root( 312 dev_ingress_queue(dev)->qdisc_sleeping, 313 handle); 314 out: 315 return q; 316 } 317 318 static struct Qdisc *qdisc_leaf(struct Qdisc *p, u32 classid) 319 { 320 unsigned long cl; 321 struct Qdisc *leaf; 322 const struct Qdisc_class_ops *cops = p->ops->cl_ops; 323 324 if (cops == NULL) 325 return NULL; 326 cl = cops->find(p, classid); 327 328 if (cl == 0) 329 return NULL; 330 leaf = cops->leaf(p, cl); 331 return leaf; 332 } 333 334 /* Find queueing discipline by name */ 335 336 static struct Qdisc_ops *qdisc_lookup_ops(struct nlattr *kind) 337 { 338 struct Qdisc_ops *q = NULL; 339 340 if (kind) { 341 read_lock(&qdisc_mod_lock); 342 for (q = qdisc_base; q; q = q->next) { 343 if (nla_strcmp(kind, q->id) == 0) { 344 if (!try_module_get(q->owner)) 345 q = NULL; 346 break; 347 } 348 } 349 read_unlock(&qdisc_mod_lock); 350 } 351 return q; 352 } 353 354 /* The linklayer setting were not transferred from iproute2, in older 355 * versions, and the rate tables lookup systems have been dropped in 356 * the kernel. To keep backward compatible with older iproute2 tc 357 * utils, we detect the linklayer setting by detecting if the rate 358 * table were modified. 359 * 360 * For linklayer ATM table entries, the rate table will be aligned to 361 * 48 bytes, thus some table entries will contain the same value. The 362 * mpu (min packet unit) is also encoded into the old rate table, thus 363 * starting from the mpu, we find low and high table entries for 364 * mapping this cell. If these entries contain the same value, when 365 * the rate tables have been modified for linklayer ATM. 366 * 367 * This is done by rounding mpu to the nearest 48 bytes cell/entry, 368 * and then roundup to the next cell, calc the table entry one below, 369 * and compare. 370 */ 371 static __u8 __detect_linklayer(struct tc_ratespec *r, __u32 *rtab) 372 { 373 int low = roundup(r->mpu, 48); 374 int high = roundup(low+1, 48); 375 int cell_low = low >> r->cell_log; 376 int cell_high = (high >> r->cell_log) - 1; 377 378 /* rtab is too inaccurate at rates > 100Mbit/s */ 379 if ((r->rate > (100000000/8)) || (rtab[0] == 0)) { 380 pr_debug("TC linklayer: Giving up ATM detection\n"); 381 return TC_LINKLAYER_ETHERNET; 382 } 383 384 if ((cell_high > cell_low) && (cell_high < 256) 385 && (rtab[cell_low] == rtab[cell_high])) { 386 pr_debug("TC linklayer: Detected ATM, low(%d)=high(%d)=%u\n", 387 cell_low, cell_high, rtab[cell_high]); 388 return TC_LINKLAYER_ATM; 389 } 390 return TC_LINKLAYER_ETHERNET; 391 } 392 393 static struct qdisc_rate_table *qdisc_rtab_list; 394 395 struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r, 396 struct nlattr *tab, 397 struct netlink_ext_ack *extack) 398 { 399 struct qdisc_rate_table *rtab; 400 401 if (tab == NULL || r->rate == 0 || r->cell_log == 0 || 402 nla_len(tab) != TC_RTAB_SIZE) { 403 NL_SET_ERR_MSG(extack, "Invalid rate table parameters for searching"); 404 return NULL; 405 } 406 407 for (rtab = qdisc_rtab_list; rtab; rtab = rtab->next) { 408 if (!memcmp(&rtab->rate, r, sizeof(struct tc_ratespec)) && 409 !memcmp(&rtab->data, nla_data(tab), 1024)) { 410 rtab->refcnt++; 411 return rtab; 412 } 413 } 414 415 rtab = kmalloc(sizeof(*rtab), GFP_KERNEL); 416 if (rtab) { 417 rtab->rate = *r; 418 rtab->refcnt = 1; 419 memcpy(rtab->data, nla_data(tab), 1024); 420 if (r->linklayer == TC_LINKLAYER_UNAWARE) 421 r->linklayer = __detect_linklayer(r, rtab->data); 422 rtab->next = qdisc_rtab_list; 423 qdisc_rtab_list = rtab; 424 } else { 425 NL_SET_ERR_MSG(extack, "Failed to allocate new qdisc rate table"); 426 } 427 return rtab; 428 } 429 EXPORT_SYMBOL(qdisc_get_rtab); 430 431 void qdisc_put_rtab(struct qdisc_rate_table *tab) 432 { 433 struct qdisc_rate_table *rtab, **rtabp; 434 435 if (!tab || --tab->refcnt) 436 return; 437 438 for (rtabp = &qdisc_rtab_list; 439 (rtab = *rtabp) != NULL; 440 rtabp = &rtab->next) { 441 if (rtab == tab) { 442 *rtabp = rtab->next; 443 kfree(rtab); 444 return; 445 } 446 } 447 } 448 EXPORT_SYMBOL(qdisc_put_rtab); 449 450 static LIST_HEAD(qdisc_stab_list); 451 452 static const struct nla_policy stab_policy[TCA_STAB_MAX + 1] = { 453 [TCA_STAB_BASE] = { .len = sizeof(struct tc_sizespec) }, 454 [TCA_STAB_DATA] = { .type = NLA_BINARY }, 455 }; 456 457 static struct qdisc_size_table *qdisc_get_stab(struct nlattr *opt, 458 struct netlink_ext_ack *extack) 459 { 460 struct nlattr *tb[TCA_STAB_MAX + 1]; 461 struct qdisc_size_table *stab; 462 struct tc_sizespec *s; 463 unsigned int tsize = 0; 464 u16 *tab = NULL; 465 int err; 466 467 err = nla_parse_nested(tb, TCA_STAB_MAX, opt, stab_policy, extack); 468 if (err < 0) 469 return ERR_PTR(err); 470 if (!tb[TCA_STAB_BASE]) { 471 NL_SET_ERR_MSG(extack, "Size table base attribute is missing"); 472 return ERR_PTR(-EINVAL); 473 } 474 475 s = nla_data(tb[TCA_STAB_BASE]); 476 477 if (s->tsize > 0) { 478 if (!tb[TCA_STAB_DATA]) { 479 NL_SET_ERR_MSG(extack, "Size table data attribute is missing"); 480 return ERR_PTR(-EINVAL); 481 } 482 tab = nla_data(tb[TCA_STAB_DATA]); 483 tsize = nla_len(tb[TCA_STAB_DATA]) / sizeof(u16); 484 } 485 486 if (tsize != s->tsize || (!tab && tsize > 0)) { 487 NL_SET_ERR_MSG(extack, "Invalid size of size table"); 488 return ERR_PTR(-EINVAL); 489 } 490 491 list_for_each_entry(stab, &qdisc_stab_list, list) { 492 if (memcmp(&stab->szopts, s, sizeof(*s))) 493 continue; 494 if (tsize > 0 && memcmp(stab->data, tab, tsize * sizeof(u16))) 495 continue; 496 stab->refcnt++; 497 return stab; 498 } 499 500 stab = kmalloc(sizeof(*stab) + tsize * sizeof(u16), GFP_KERNEL); 501 if (!stab) 502 return ERR_PTR(-ENOMEM); 503 504 stab->refcnt = 1; 505 stab->szopts = *s; 506 if (tsize > 0) 507 memcpy(stab->data, tab, tsize * sizeof(u16)); 508 509 list_add_tail(&stab->list, &qdisc_stab_list); 510 511 return stab; 512 } 513 514 static void stab_kfree_rcu(struct rcu_head *head) 515 { 516 kfree(container_of(head, struct qdisc_size_table, rcu)); 517 } 518 519 void qdisc_put_stab(struct qdisc_size_table *tab) 520 { 521 if (!tab) 522 return; 523 524 if (--tab->refcnt == 0) { 525 list_del(&tab->list); 526 call_rcu_bh(&tab->rcu, stab_kfree_rcu); 527 } 528 } 529 EXPORT_SYMBOL(qdisc_put_stab); 530 531 static int qdisc_dump_stab(struct sk_buff *skb, struct qdisc_size_table *stab) 532 { 533 struct nlattr *nest; 534 535 nest = nla_nest_start(skb, TCA_STAB); 536 if (nest == NULL) 537 goto nla_put_failure; 538 if (nla_put(skb, TCA_STAB_BASE, sizeof(stab->szopts), &stab->szopts)) 539 goto nla_put_failure; 540 nla_nest_end(skb, nest); 541 542 return skb->len; 543 544 nla_put_failure: 545 return -1; 546 } 547 548 void __qdisc_calculate_pkt_len(struct sk_buff *skb, 549 const struct qdisc_size_table *stab) 550 { 551 int pkt_len, slot; 552 553 pkt_len = skb->len + stab->szopts.overhead; 554 if (unlikely(!stab->szopts.tsize)) 555 goto out; 556 557 slot = pkt_len + stab->szopts.cell_align; 558 if (unlikely(slot < 0)) 559 slot = 0; 560 561 slot >>= stab->szopts.cell_log; 562 if (likely(slot < stab->szopts.tsize)) 563 pkt_len = stab->data[slot]; 564 else 565 pkt_len = stab->data[stab->szopts.tsize - 1] * 566 (slot / stab->szopts.tsize) + 567 stab->data[slot % stab->szopts.tsize]; 568 569 pkt_len <<= stab->szopts.size_log; 570 out: 571 if (unlikely(pkt_len < 1)) 572 pkt_len = 1; 573 qdisc_skb_cb(skb)->pkt_len = pkt_len; 574 } 575 EXPORT_SYMBOL(__qdisc_calculate_pkt_len); 576 577 void qdisc_warn_nonwc(const char *txt, struct Qdisc *qdisc) 578 { 579 if (!(qdisc->flags & TCQ_F_WARN_NONWC)) { 580 pr_warn("%s: %s qdisc %X: is non-work-conserving?\n", 581 txt, qdisc->ops->id, qdisc->handle >> 16); 582 qdisc->flags |= TCQ_F_WARN_NONWC; 583 } 584 } 585 EXPORT_SYMBOL(qdisc_warn_nonwc); 586 587 static enum hrtimer_restart qdisc_watchdog(struct hrtimer *timer) 588 { 589 struct qdisc_watchdog *wd = container_of(timer, struct qdisc_watchdog, 590 timer); 591 592 rcu_read_lock(); 593 __netif_schedule(qdisc_root(wd->qdisc)); 594 rcu_read_unlock(); 595 596 return HRTIMER_NORESTART; 597 } 598 599 void qdisc_watchdog_init(struct qdisc_watchdog *wd, struct Qdisc *qdisc) 600 { 601 hrtimer_init(&wd->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED); 602 wd->timer.function = qdisc_watchdog; 603 wd->qdisc = qdisc; 604 } 605 EXPORT_SYMBOL(qdisc_watchdog_init); 606 607 void qdisc_watchdog_schedule_ns(struct qdisc_watchdog *wd, u64 expires) 608 { 609 if (test_bit(__QDISC_STATE_DEACTIVATED, 610 &qdisc_root_sleeping(wd->qdisc)->state)) 611 return; 612 613 if (wd->last_expires == expires) 614 return; 615 616 wd->last_expires = expires; 617 hrtimer_start(&wd->timer, 618 ns_to_ktime(expires), 619 HRTIMER_MODE_ABS_PINNED); 620 } 621 EXPORT_SYMBOL(qdisc_watchdog_schedule_ns); 622 623 void qdisc_watchdog_cancel(struct qdisc_watchdog *wd) 624 { 625 hrtimer_cancel(&wd->timer); 626 } 627 EXPORT_SYMBOL(qdisc_watchdog_cancel); 628 629 static struct hlist_head *qdisc_class_hash_alloc(unsigned int n) 630 { 631 struct hlist_head *h; 632 unsigned int i; 633 634 h = kvmalloc_array(n, sizeof(struct hlist_head), GFP_KERNEL); 635 636 if (h != NULL) { 637 for (i = 0; i < n; i++) 638 INIT_HLIST_HEAD(&h[i]); 639 } 640 return h; 641 } 642 643 void qdisc_class_hash_grow(struct Qdisc *sch, struct Qdisc_class_hash *clhash) 644 { 645 struct Qdisc_class_common *cl; 646 struct hlist_node *next; 647 struct hlist_head *nhash, *ohash; 648 unsigned int nsize, nmask, osize; 649 unsigned int i, h; 650 651 /* Rehash when load factor exceeds 0.75 */ 652 if (clhash->hashelems * 4 <= clhash->hashsize * 3) 653 return; 654 nsize = clhash->hashsize * 2; 655 nmask = nsize - 1; 656 nhash = qdisc_class_hash_alloc(nsize); 657 if (nhash == NULL) 658 return; 659 660 ohash = clhash->hash; 661 osize = clhash->hashsize; 662 663 sch_tree_lock(sch); 664 for (i = 0; i < osize; i++) { 665 hlist_for_each_entry_safe(cl, next, &ohash[i], hnode) { 666 h = qdisc_class_hash(cl->classid, nmask); 667 hlist_add_head(&cl->hnode, &nhash[h]); 668 } 669 } 670 clhash->hash = nhash; 671 clhash->hashsize = nsize; 672 clhash->hashmask = nmask; 673 sch_tree_unlock(sch); 674 675 kvfree(ohash); 676 } 677 EXPORT_SYMBOL(qdisc_class_hash_grow); 678 679 int qdisc_class_hash_init(struct Qdisc_class_hash *clhash) 680 { 681 unsigned int size = 4; 682 683 clhash->hash = qdisc_class_hash_alloc(size); 684 if (!clhash->hash) 685 return -ENOMEM; 686 clhash->hashsize = size; 687 clhash->hashmask = size - 1; 688 clhash->hashelems = 0; 689 return 0; 690 } 691 EXPORT_SYMBOL(qdisc_class_hash_init); 692 693 void qdisc_class_hash_destroy(struct Qdisc_class_hash *clhash) 694 { 695 kvfree(clhash->hash); 696 } 697 EXPORT_SYMBOL(qdisc_class_hash_destroy); 698 699 void qdisc_class_hash_insert(struct Qdisc_class_hash *clhash, 700 struct Qdisc_class_common *cl) 701 { 702 unsigned int h; 703 704 INIT_HLIST_NODE(&cl->hnode); 705 h = qdisc_class_hash(cl->classid, clhash->hashmask); 706 hlist_add_head(&cl->hnode, &clhash->hash[h]); 707 clhash->hashelems++; 708 } 709 EXPORT_SYMBOL(qdisc_class_hash_insert); 710 711 void qdisc_class_hash_remove(struct Qdisc_class_hash *clhash, 712 struct Qdisc_class_common *cl) 713 { 714 hlist_del(&cl->hnode); 715 clhash->hashelems--; 716 } 717 EXPORT_SYMBOL(qdisc_class_hash_remove); 718 719 /* Allocate an unique handle from space managed by kernel 720 * Possible range is [8000-FFFF]:0000 (0x8000 values) 721 */ 722 static u32 qdisc_alloc_handle(struct net_device *dev) 723 { 724 int i = 0x8000; 725 static u32 autohandle = TC_H_MAKE(0x80000000U, 0); 726 727 do { 728 autohandle += TC_H_MAKE(0x10000U, 0); 729 if (autohandle == TC_H_MAKE(TC_H_ROOT, 0)) 730 autohandle = TC_H_MAKE(0x80000000U, 0); 731 if (!qdisc_lookup(dev, autohandle)) 732 return autohandle; 733 cond_resched(); 734 } while (--i > 0); 735 736 return 0; 737 } 738 739 void qdisc_tree_reduce_backlog(struct Qdisc *sch, unsigned int n, 740 unsigned int len) 741 { 742 const struct Qdisc_class_ops *cops; 743 unsigned long cl; 744 u32 parentid; 745 bool notify; 746 int drops; 747 748 if (n == 0 && len == 0) 749 return; 750 drops = max_t(int, n, 0); 751 rcu_read_lock(); 752 while ((parentid = sch->parent)) { 753 if (TC_H_MAJ(parentid) == TC_H_MAJ(TC_H_INGRESS)) 754 break; 755 756 if (sch->flags & TCQ_F_NOPARENT) 757 break; 758 /* Notify parent qdisc only if child qdisc becomes empty. 759 * 760 * If child was empty even before update then backlog 761 * counter is screwed and we skip notification because 762 * parent class is already passive. 763 */ 764 notify = !sch->q.qlen && !WARN_ON_ONCE(!n); 765 /* TODO: perform the search on a per txq basis */ 766 sch = qdisc_lookup(qdisc_dev(sch), TC_H_MAJ(parentid)); 767 if (sch == NULL) { 768 WARN_ON_ONCE(parentid != TC_H_ROOT); 769 break; 770 } 771 cops = sch->ops->cl_ops; 772 if (notify && cops->qlen_notify) { 773 cl = cops->find(sch, parentid); 774 cops->qlen_notify(sch, cl); 775 } 776 sch->q.qlen -= n; 777 sch->qstats.backlog -= len; 778 __qdisc_qstats_drop(sch, drops); 779 } 780 rcu_read_unlock(); 781 } 782 EXPORT_SYMBOL(qdisc_tree_reduce_backlog); 783 784 static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid, 785 u32 portid, u32 seq, u16 flags, int event) 786 { 787 struct gnet_stats_basic_cpu __percpu *cpu_bstats = NULL; 788 struct gnet_stats_queue __percpu *cpu_qstats = NULL; 789 struct tcmsg *tcm; 790 struct nlmsghdr *nlh; 791 unsigned char *b = skb_tail_pointer(skb); 792 struct gnet_dump d; 793 struct qdisc_size_table *stab; 794 __u32 qlen; 795 796 cond_resched(); 797 nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags); 798 if (!nlh) 799 goto out_nlmsg_trim; 800 tcm = nlmsg_data(nlh); 801 tcm->tcm_family = AF_UNSPEC; 802 tcm->tcm__pad1 = 0; 803 tcm->tcm__pad2 = 0; 804 tcm->tcm_ifindex = qdisc_dev(q)->ifindex; 805 tcm->tcm_parent = clid; 806 tcm->tcm_handle = q->handle; 807 tcm->tcm_info = refcount_read(&q->refcnt); 808 if (nla_put_string(skb, TCA_KIND, q->ops->id)) 809 goto nla_put_failure; 810 if (q->ops->dump && q->ops->dump(q, skb) < 0) 811 goto nla_put_failure; 812 if (nla_put_u8(skb, TCA_HW_OFFLOAD, !!(q->flags & TCQ_F_OFFLOADED))) 813 goto nla_put_failure; 814 qlen = qdisc_qlen_sum(q); 815 816 stab = rtnl_dereference(q->stab); 817 if (stab && qdisc_dump_stab(skb, stab) < 0) 818 goto nla_put_failure; 819 820 if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, TCA_XSTATS, 821 NULL, &d, TCA_PAD) < 0) 822 goto nla_put_failure; 823 824 if (q->ops->dump_stats && q->ops->dump_stats(q, &d) < 0) 825 goto nla_put_failure; 826 827 if (qdisc_is_percpu_stats(q)) { 828 cpu_bstats = q->cpu_bstats; 829 cpu_qstats = q->cpu_qstats; 830 } 831 832 if (gnet_stats_copy_basic(qdisc_root_sleeping_running(q), 833 &d, cpu_bstats, &q->bstats) < 0 || 834 gnet_stats_copy_rate_est(&d, &q->rate_est) < 0 || 835 gnet_stats_copy_queue(&d, cpu_qstats, &q->qstats, qlen) < 0) 836 goto nla_put_failure; 837 838 if (gnet_stats_finish_copy(&d) < 0) 839 goto nla_put_failure; 840 841 nlh->nlmsg_len = skb_tail_pointer(skb) - b; 842 return skb->len; 843 844 out_nlmsg_trim: 845 nla_put_failure: 846 nlmsg_trim(skb, b); 847 return -1; 848 } 849 850 static bool tc_qdisc_dump_ignore(struct Qdisc *q, bool dump_invisible) 851 { 852 if (q->flags & TCQ_F_BUILTIN) 853 return true; 854 if ((q->flags & TCQ_F_INVISIBLE) && !dump_invisible) 855 return true; 856 857 return false; 858 } 859 860 static int qdisc_notify(struct net *net, struct sk_buff *oskb, 861 struct nlmsghdr *n, u32 clid, 862 struct Qdisc *old, struct Qdisc *new) 863 { 864 struct sk_buff *skb; 865 u32 portid = oskb ? NETLINK_CB(oskb).portid : 0; 866 867 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); 868 if (!skb) 869 return -ENOBUFS; 870 871 if (old && !tc_qdisc_dump_ignore(old, false)) { 872 if (tc_fill_qdisc(skb, old, clid, portid, n->nlmsg_seq, 873 0, RTM_DELQDISC) < 0) 874 goto err_out; 875 } 876 if (new && !tc_qdisc_dump_ignore(new, false)) { 877 if (tc_fill_qdisc(skb, new, clid, portid, n->nlmsg_seq, 878 old ? NLM_F_REPLACE : 0, RTM_NEWQDISC) < 0) 879 goto err_out; 880 } 881 882 if (skb->len) 883 return rtnetlink_send(skb, net, portid, RTNLGRP_TC, 884 n->nlmsg_flags & NLM_F_ECHO); 885 886 err_out: 887 kfree_skb(skb); 888 return -EINVAL; 889 } 890 891 static void notify_and_destroy(struct net *net, struct sk_buff *skb, 892 struct nlmsghdr *n, u32 clid, 893 struct Qdisc *old, struct Qdisc *new) 894 { 895 if (new || old) 896 qdisc_notify(net, skb, n, clid, old, new); 897 898 if (old) 899 qdisc_destroy(old); 900 } 901 902 /* Graft qdisc "new" to class "classid" of qdisc "parent" or 903 * to device "dev". 904 * 905 * When appropriate send a netlink notification using 'skb' 906 * and "n". 907 * 908 * On success, destroy old qdisc. 909 */ 910 911 static int qdisc_graft(struct net_device *dev, struct Qdisc *parent, 912 struct sk_buff *skb, struct nlmsghdr *n, u32 classid, 913 struct Qdisc *new, struct Qdisc *old, 914 struct netlink_ext_ack *extack) 915 { 916 struct Qdisc *q = old; 917 struct net *net = dev_net(dev); 918 int err = 0; 919 920 if (parent == NULL) { 921 unsigned int i, num_q, ingress; 922 923 ingress = 0; 924 num_q = dev->num_tx_queues; 925 if ((q && q->flags & TCQ_F_INGRESS) || 926 (new && new->flags & TCQ_F_INGRESS)) { 927 num_q = 1; 928 ingress = 1; 929 if (!dev_ingress_queue(dev)) { 930 NL_SET_ERR_MSG(extack, "Device does not have an ingress queue"); 931 return -ENOENT; 932 } 933 } 934 935 if (dev->flags & IFF_UP) 936 dev_deactivate(dev); 937 938 if (new && new->ops->attach) 939 goto skip; 940 941 for (i = 0; i < num_q; i++) { 942 struct netdev_queue *dev_queue = dev_ingress_queue(dev); 943 944 if (!ingress) 945 dev_queue = netdev_get_tx_queue(dev, i); 946 947 old = dev_graft_qdisc(dev_queue, new); 948 if (new && i > 0) 949 qdisc_refcount_inc(new); 950 951 if (!ingress) 952 qdisc_destroy(old); 953 } 954 955 skip: 956 if (!ingress) { 957 notify_and_destroy(net, skb, n, classid, 958 dev->qdisc, new); 959 if (new && !new->ops->attach) 960 qdisc_refcount_inc(new); 961 dev->qdisc = new ? : &noop_qdisc; 962 963 if (new && new->ops->attach) 964 new->ops->attach(new); 965 } else { 966 notify_and_destroy(net, skb, n, classid, old, new); 967 } 968 969 if (dev->flags & IFF_UP) 970 dev_activate(dev); 971 } else { 972 const struct Qdisc_class_ops *cops = parent->ops->cl_ops; 973 974 /* Only support running class lockless if parent is lockless */ 975 if (new && (new->flags & TCQ_F_NOLOCK) && 976 parent && !(parent->flags & TCQ_F_NOLOCK)) 977 new->flags &= ~TCQ_F_NOLOCK; 978 979 err = -EOPNOTSUPP; 980 if (cops && cops->graft) { 981 unsigned long cl = cops->find(parent, classid); 982 983 if (cl) { 984 err = cops->graft(parent, cl, new, &old, 985 extack); 986 } else { 987 NL_SET_ERR_MSG(extack, "Specified class not found"); 988 err = -ENOENT; 989 } 990 } 991 if (!err) 992 notify_and_destroy(net, skb, n, classid, old, new); 993 } 994 return err; 995 } 996 997 /* lockdep annotation is needed for ingress; egress gets it only for name */ 998 static struct lock_class_key qdisc_tx_lock; 999 static struct lock_class_key qdisc_rx_lock; 1000 1001 /* 1002 Allocate and initialize new qdisc. 1003 1004 Parameters are passed via opt. 1005 */ 1006 1007 static struct Qdisc *qdisc_create(struct net_device *dev, 1008 struct netdev_queue *dev_queue, 1009 struct Qdisc *p, u32 parent, u32 handle, 1010 struct nlattr **tca, int *errp, 1011 struct netlink_ext_ack *extack) 1012 { 1013 int err; 1014 struct nlattr *kind = tca[TCA_KIND]; 1015 struct Qdisc *sch; 1016 struct Qdisc_ops *ops; 1017 struct qdisc_size_table *stab; 1018 1019 ops = qdisc_lookup_ops(kind); 1020 #ifdef CONFIG_MODULES 1021 if (ops == NULL && kind != NULL) { 1022 char name[IFNAMSIZ]; 1023 if (nla_strlcpy(name, kind, IFNAMSIZ) < IFNAMSIZ) { 1024 /* We dropped the RTNL semaphore in order to 1025 * perform the module load. So, even if we 1026 * succeeded in loading the module we have to 1027 * tell the caller to replay the request. We 1028 * indicate this using -EAGAIN. 1029 * We replay the request because the device may 1030 * go away in the mean time. 1031 */ 1032 rtnl_unlock(); 1033 request_module("sch_%s", name); 1034 rtnl_lock(); 1035 ops = qdisc_lookup_ops(kind); 1036 if (ops != NULL) { 1037 /* We will try again qdisc_lookup_ops, 1038 * so don't keep a reference. 1039 */ 1040 module_put(ops->owner); 1041 err = -EAGAIN; 1042 goto err_out; 1043 } 1044 } 1045 } 1046 #endif 1047 1048 err = -ENOENT; 1049 if (!ops) { 1050 NL_SET_ERR_MSG(extack, "Specified qdisc not found"); 1051 goto err_out; 1052 } 1053 1054 sch = qdisc_alloc(dev_queue, ops, extack); 1055 if (IS_ERR(sch)) { 1056 err = PTR_ERR(sch); 1057 goto err_out2; 1058 } 1059 1060 sch->parent = parent; 1061 1062 if (handle == TC_H_INGRESS) { 1063 sch->flags |= TCQ_F_INGRESS; 1064 handle = TC_H_MAKE(TC_H_INGRESS, 0); 1065 lockdep_set_class(qdisc_lock(sch), &qdisc_rx_lock); 1066 } else { 1067 if (handle == 0) { 1068 handle = qdisc_alloc_handle(dev); 1069 err = -ENOMEM; 1070 if (handle == 0) 1071 goto err_out3; 1072 } 1073 lockdep_set_class(qdisc_lock(sch), &qdisc_tx_lock); 1074 if (!netif_is_multiqueue(dev)) 1075 sch->flags |= TCQ_F_ONETXQUEUE; 1076 } 1077 1078 sch->handle = handle; 1079 1080 /* This exist to keep backward compatible with a userspace 1081 * loophole, what allowed userspace to get IFF_NO_QUEUE 1082 * facility on older kernels by setting tx_queue_len=0 (prior 1083 * to qdisc init), and then forgot to reinit tx_queue_len 1084 * before again attaching a qdisc. 1085 */ 1086 if ((dev->priv_flags & IFF_NO_QUEUE) && (dev->tx_queue_len == 0)) { 1087 dev->tx_queue_len = DEFAULT_TX_QUEUE_LEN; 1088 netdev_info(dev, "Caught tx_queue_len zero misconfig\n"); 1089 } 1090 1091 if (ops->init) { 1092 err = ops->init(sch, tca[TCA_OPTIONS], extack); 1093 if (err != 0) 1094 goto err_out5; 1095 } 1096 1097 if (qdisc_is_percpu_stats(sch)) { 1098 sch->cpu_bstats = 1099 netdev_alloc_pcpu_stats(struct gnet_stats_basic_cpu); 1100 if (!sch->cpu_bstats) 1101 goto err_out4; 1102 1103 sch->cpu_qstats = alloc_percpu(struct gnet_stats_queue); 1104 if (!sch->cpu_qstats) 1105 goto err_out4; 1106 } 1107 1108 if (tca[TCA_STAB]) { 1109 stab = qdisc_get_stab(tca[TCA_STAB], extack); 1110 if (IS_ERR(stab)) { 1111 err = PTR_ERR(stab); 1112 goto err_out4; 1113 } 1114 rcu_assign_pointer(sch->stab, stab); 1115 } 1116 if (tca[TCA_RATE]) { 1117 seqcount_t *running; 1118 1119 err = -EOPNOTSUPP; 1120 if (sch->flags & TCQ_F_MQROOT) { 1121 NL_SET_ERR_MSG(extack, "Cannot attach rate estimator to a multi-queue root qdisc"); 1122 goto err_out4; 1123 } 1124 1125 if (sch->parent != TC_H_ROOT && 1126 !(sch->flags & TCQ_F_INGRESS) && 1127 (!p || !(p->flags & TCQ_F_MQROOT))) 1128 running = qdisc_root_sleeping_running(sch); 1129 else 1130 running = &sch->running; 1131 1132 err = gen_new_estimator(&sch->bstats, 1133 sch->cpu_bstats, 1134 &sch->rate_est, 1135 NULL, 1136 running, 1137 tca[TCA_RATE]); 1138 if (err) { 1139 NL_SET_ERR_MSG(extack, "Failed to generate new estimator"); 1140 goto err_out4; 1141 } 1142 } 1143 1144 qdisc_hash_add(sch, false); 1145 1146 return sch; 1147 1148 err_out5: 1149 /* ops->init() failed, we call ->destroy() like qdisc_create_dflt() */ 1150 if (ops->destroy) 1151 ops->destroy(sch); 1152 err_out3: 1153 dev_put(dev); 1154 kfree((char *) sch - sch->padded); 1155 err_out2: 1156 module_put(ops->owner); 1157 err_out: 1158 *errp = err; 1159 return NULL; 1160 1161 err_out4: 1162 free_percpu(sch->cpu_bstats); 1163 free_percpu(sch->cpu_qstats); 1164 /* 1165 * Any broken qdiscs that would require a ops->reset() here? 1166 * The qdisc was never in action so it shouldn't be necessary. 1167 */ 1168 qdisc_put_stab(rtnl_dereference(sch->stab)); 1169 if (ops->destroy) 1170 ops->destroy(sch); 1171 goto err_out3; 1172 } 1173 1174 static int qdisc_change(struct Qdisc *sch, struct nlattr **tca, 1175 struct netlink_ext_ack *extack) 1176 { 1177 struct qdisc_size_table *ostab, *stab = NULL; 1178 int err = 0; 1179 1180 if (tca[TCA_OPTIONS]) { 1181 if (!sch->ops->change) { 1182 NL_SET_ERR_MSG(extack, "Change operation not supported by specified qdisc"); 1183 return -EINVAL; 1184 } 1185 err = sch->ops->change(sch, tca[TCA_OPTIONS], extack); 1186 if (err) 1187 return err; 1188 } 1189 1190 if (tca[TCA_STAB]) { 1191 stab = qdisc_get_stab(tca[TCA_STAB], extack); 1192 if (IS_ERR(stab)) 1193 return PTR_ERR(stab); 1194 } 1195 1196 ostab = rtnl_dereference(sch->stab); 1197 rcu_assign_pointer(sch->stab, stab); 1198 qdisc_put_stab(ostab); 1199 1200 if (tca[TCA_RATE]) { 1201 /* NB: ignores errors from replace_estimator 1202 because change can't be undone. */ 1203 if (sch->flags & TCQ_F_MQROOT) 1204 goto out; 1205 gen_replace_estimator(&sch->bstats, 1206 sch->cpu_bstats, 1207 &sch->rate_est, 1208 NULL, 1209 qdisc_root_sleeping_running(sch), 1210 tca[TCA_RATE]); 1211 } 1212 out: 1213 return 0; 1214 } 1215 1216 struct check_loop_arg { 1217 struct qdisc_walker w; 1218 struct Qdisc *p; 1219 int depth; 1220 }; 1221 1222 static int check_loop_fn(struct Qdisc *q, unsigned long cl, 1223 struct qdisc_walker *w); 1224 1225 static int check_loop(struct Qdisc *q, struct Qdisc *p, int depth) 1226 { 1227 struct check_loop_arg arg; 1228 1229 if (q->ops->cl_ops == NULL) 1230 return 0; 1231 1232 arg.w.stop = arg.w.skip = arg.w.count = 0; 1233 arg.w.fn = check_loop_fn; 1234 arg.depth = depth; 1235 arg.p = p; 1236 q->ops->cl_ops->walk(q, &arg.w); 1237 return arg.w.stop ? -ELOOP : 0; 1238 } 1239 1240 static int 1241 check_loop_fn(struct Qdisc *q, unsigned long cl, struct qdisc_walker *w) 1242 { 1243 struct Qdisc *leaf; 1244 const struct Qdisc_class_ops *cops = q->ops->cl_ops; 1245 struct check_loop_arg *arg = (struct check_loop_arg *)w; 1246 1247 leaf = cops->leaf(q, cl); 1248 if (leaf) { 1249 if (leaf == arg->p || arg->depth > 7) 1250 return -ELOOP; 1251 return check_loop(leaf, arg->p, arg->depth + 1); 1252 } 1253 return 0; 1254 } 1255 1256 /* 1257 * Delete/get qdisc. 1258 */ 1259 1260 static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, 1261 struct netlink_ext_ack *extack) 1262 { 1263 struct net *net = sock_net(skb->sk); 1264 struct tcmsg *tcm = nlmsg_data(n); 1265 struct nlattr *tca[TCA_MAX + 1]; 1266 struct net_device *dev; 1267 u32 clid; 1268 struct Qdisc *q = NULL; 1269 struct Qdisc *p = NULL; 1270 int err; 1271 1272 if ((n->nlmsg_type != RTM_GETQDISC) && 1273 !netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN)) 1274 return -EPERM; 1275 1276 err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL, extack); 1277 if (err < 0) 1278 return err; 1279 1280 dev = __dev_get_by_index(net, tcm->tcm_ifindex); 1281 if (!dev) 1282 return -ENODEV; 1283 1284 clid = tcm->tcm_parent; 1285 if (clid) { 1286 if (clid != TC_H_ROOT) { 1287 if (TC_H_MAJ(clid) != TC_H_MAJ(TC_H_INGRESS)) { 1288 p = qdisc_lookup(dev, TC_H_MAJ(clid)); 1289 if (!p) { 1290 NL_SET_ERR_MSG(extack, "Failed to find qdisc with specified classid"); 1291 return -ENOENT; 1292 } 1293 q = qdisc_leaf(p, clid); 1294 } else if (dev_ingress_queue(dev)) { 1295 q = dev_ingress_queue(dev)->qdisc_sleeping; 1296 } 1297 } else { 1298 q = dev->qdisc; 1299 } 1300 if (!q) { 1301 NL_SET_ERR_MSG(extack, "Cannot find specified qdisc on specified device"); 1302 return -ENOENT; 1303 } 1304 1305 if (tcm->tcm_handle && q->handle != tcm->tcm_handle) { 1306 NL_SET_ERR_MSG(extack, "Invalid handle"); 1307 return -EINVAL; 1308 } 1309 } else { 1310 q = qdisc_lookup(dev, tcm->tcm_handle); 1311 if (!q) { 1312 NL_SET_ERR_MSG(extack, "Failed to find qdisc with specified handle"); 1313 return -ENOENT; 1314 } 1315 } 1316 1317 if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id)) { 1318 NL_SET_ERR_MSG(extack, "Invalid qdisc name"); 1319 return -EINVAL; 1320 } 1321 1322 if (n->nlmsg_type == RTM_DELQDISC) { 1323 if (!clid) { 1324 NL_SET_ERR_MSG(extack, "Classid cannot be zero"); 1325 return -EINVAL; 1326 } 1327 if (q->handle == 0) { 1328 NL_SET_ERR_MSG(extack, "Cannot delete qdisc with handle of zero"); 1329 return -ENOENT; 1330 } 1331 err = qdisc_graft(dev, p, skb, n, clid, NULL, q, extack); 1332 if (err != 0) 1333 return err; 1334 } else { 1335 qdisc_notify(net, skb, n, clid, NULL, q); 1336 } 1337 return 0; 1338 } 1339 1340 /* 1341 * Create/change qdisc. 1342 */ 1343 1344 static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n, 1345 struct netlink_ext_ack *extack) 1346 { 1347 struct net *net = sock_net(skb->sk); 1348 struct tcmsg *tcm; 1349 struct nlattr *tca[TCA_MAX + 1]; 1350 struct net_device *dev; 1351 u32 clid; 1352 struct Qdisc *q, *p; 1353 int err; 1354 1355 if (!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN)) 1356 return -EPERM; 1357 1358 replay: 1359 /* Reinit, just in case something touches this. */ 1360 err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL, extack); 1361 if (err < 0) 1362 return err; 1363 1364 tcm = nlmsg_data(n); 1365 clid = tcm->tcm_parent; 1366 q = p = NULL; 1367 1368 dev = __dev_get_by_index(net, tcm->tcm_ifindex); 1369 if (!dev) 1370 return -ENODEV; 1371 1372 1373 if (clid) { 1374 if (clid != TC_H_ROOT) { 1375 if (clid != TC_H_INGRESS) { 1376 p = qdisc_lookup(dev, TC_H_MAJ(clid)); 1377 if (!p) { 1378 NL_SET_ERR_MSG(extack, "Failed to find specified qdisc"); 1379 return -ENOENT; 1380 } 1381 q = qdisc_leaf(p, clid); 1382 } else if (dev_ingress_queue_create(dev)) { 1383 q = dev_ingress_queue(dev)->qdisc_sleeping; 1384 } 1385 } else { 1386 q = dev->qdisc; 1387 } 1388 1389 /* It may be default qdisc, ignore it */ 1390 if (q && q->handle == 0) 1391 q = NULL; 1392 1393 if (!q || !tcm->tcm_handle || q->handle != tcm->tcm_handle) { 1394 if (tcm->tcm_handle) { 1395 if (q && !(n->nlmsg_flags & NLM_F_REPLACE)) { 1396 NL_SET_ERR_MSG(extack, "NLM_F_REPLACE needed to override"); 1397 return -EEXIST; 1398 } 1399 if (TC_H_MIN(tcm->tcm_handle)) { 1400 NL_SET_ERR_MSG(extack, "Invalid minor handle"); 1401 return -EINVAL; 1402 } 1403 q = qdisc_lookup(dev, tcm->tcm_handle); 1404 if (!q) 1405 goto create_n_graft; 1406 if (n->nlmsg_flags & NLM_F_EXCL) { 1407 NL_SET_ERR_MSG(extack, "Exclusivity flag on, cannot override"); 1408 return -EEXIST; 1409 } 1410 if (tca[TCA_KIND] && 1411 nla_strcmp(tca[TCA_KIND], q->ops->id)) { 1412 NL_SET_ERR_MSG(extack, "Invalid qdisc name"); 1413 return -EINVAL; 1414 } 1415 if (q == p || 1416 (p && check_loop(q, p, 0))) { 1417 NL_SET_ERR_MSG(extack, "Qdisc parent/child loop detected"); 1418 return -ELOOP; 1419 } 1420 qdisc_refcount_inc(q); 1421 goto graft; 1422 } else { 1423 if (!q) 1424 goto create_n_graft; 1425 1426 /* This magic test requires explanation. 1427 * 1428 * We know, that some child q is already 1429 * attached to this parent and have choice: 1430 * either to change it or to create/graft new one. 1431 * 1432 * 1. We are allowed to create/graft only 1433 * if CREATE and REPLACE flags are set. 1434 * 1435 * 2. If EXCL is set, requestor wanted to say, 1436 * that qdisc tcm_handle is not expected 1437 * to exist, so that we choose create/graft too. 1438 * 1439 * 3. The last case is when no flags are set. 1440 * Alas, it is sort of hole in API, we 1441 * cannot decide what to do unambiguously. 1442 * For now we select create/graft, if 1443 * user gave KIND, which does not match existing. 1444 */ 1445 if ((n->nlmsg_flags & NLM_F_CREATE) && 1446 (n->nlmsg_flags & NLM_F_REPLACE) && 1447 ((n->nlmsg_flags & NLM_F_EXCL) || 1448 (tca[TCA_KIND] && 1449 nla_strcmp(tca[TCA_KIND], q->ops->id)))) 1450 goto create_n_graft; 1451 } 1452 } 1453 } else { 1454 if (!tcm->tcm_handle) { 1455 NL_SET_ERR_MSG(extack, "Handle cannot be zero"); 1456 return -EINVAL; 1457 } 1458 q = qdisc_lookup(dev, tcm->tcm_handle); 1459 } 1460 1461 /* Change qdisc parameters */ 1462 if (!q) { 1463 NL_SET_ERR_MSG(extack, "Specified qdisc not found"); 1464 return -ENOENT; 1465 } 1466 if (n->nlmsg_flags & NLM_F_EXCL) { 1467 NL_SET_ERR_MSG(extack, "Exclusivity flag on, cannot modify"); 1468 return -EEXIST; 1469 } 1470 if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id)) { 1471 NL_SET_ERR_MSG(extack, "Invalid qdisc name"); 1472 return -EINVAL; 1473 } 1474 err = qdisc_change(q, tca, extack); 1475 if (err == 0) 1476 qdisc_notify(net, skb, n, clid, NULL, q); 1477 return err; 1478 1479 create_n_graft: 1480 if (!(n->nlmsg_flags & NLM_F_CREATE)) { 1481 NL_SET_ERR_MSG(extack, "Qdisc not found. To create specify NLM_F_CREATE flag"); 1482 return -ENOENT; 1483 } 1484 if (clid == TC_H_INGRESS) { 1485 if (dev_ingress_queue(dev)) { 1486 q = qdisc_create(dev, dev_ingress_queue(dev), p, 1487 tcm->tcm_parent, tcm->tcm_parent, 1488 tca, &err, extack); 1489 } else { 1490 NL_SET_ERR_MSG(extack, "Cannot find ingress queue for specified device"); 1491 err = -ENOENT; 1492 } 1493 } else { 1494 struct netdev_queue *dev_queue; 1495 1496 if (p && p->ops->cl_ops && p->ops->cl_ops->select_queue) 1497 dev_queue = p->ops->cl_ops->select_queue(p, tcm); 1498 else if (p) 1499 dev_queue = p->dev_queue; 1500 else 1501 dev_queue = netdev_get_tx_queue(dev, 0); 1502 1503 q = qdisc_create(dev, dev_queue, p, 1504 tcm->tcm_parent, tcm->tcm_handle, 1505 tca, &err, extack); 1506 } 1507 if (q == NULL) { 1508 if (err == -EAGAIN) 1509 goto replay; 1510 return err; 1511 } 1512 1513 graft: 1514 err = qdisc_graft(dev, p, skb, n, clid, q, NULL, extack); 1515 if (err) { 1516 if (q) 1517 qdisc_destroy(q); 1518 return err; 1519 } 1520 1521 return 0; 1522 } 1523 1524 static int tc_dump_qdisc_root(struct Qdisc *root, struct sk_buff *skb, 1525 struct netlink_callback *cb, 1526 int *q_idx_p, int s_q_idx, bool recur, 1527 bool dump_invisible) 1528 { 1529 int ret = 0, q_idx = *q_idx_p; 1530 struct Qdisc *q; 1531 int b; 1532 1533 if (!root) 1534 return 0; 1535 1536 q = root; 1537 if (q_idx < s_q_idx) { 1538 q_idx++; 1539 } else { 1540 if (!tc_qdisc_dump_ignore(q, dump_invisible) && 1541 tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).portid, 1542 cb->nlh->nlmsg_seq, NLM_F_MULTI, 1543 RTM_NEWQDISC) <= 0) 1544 goto done; 1545 q_idx++; 1546 } 1547 1548 /* If dumping singletons, there is no qdisc_dev(root) and the singleton 1549 * itself has already been dumped. 1550 * 1551 * If we've already dumped the top-level (ingress) qdisc above and the global 1552 * qdisc hashtable, we don't want to hit it again 1553 */ 1554 if (!qdisc_dev(root) || !recur) 1555 goto out; 1556 1557 hash_for_each(qdisc_dev(root)->qdisc_hash, b, q, hash) { 1558 if (q_idx < s_q_idx) { 1559 q_idx++; 1560 continue; 1561 } 1562 if (!tc_qdisc_dump_ignore(q, dump_invisible) && 1563 tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).portid, 1564 cb->nlh->nlmsg_seq, NLM_F_MULTI, 1565 RTM_NEWQDISC) <= 0) 1566 goto done; 1567 q_idx++; 1568 } 1569 1570 out: 1571 *q_idx_p = q_idx; 1572 return ret; 1573 done: 1574 ret = -1; 1575 goto out; 1576 } 1577 1578 static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb) 1579 { 1580 struct net *net = sock_net(skb->sk); 1581 int idx, q_idx; 1582 int s_idx, s_q_idx; 1583 struct net_device *dev; 1584 const struct nlmsghdr *nlh = cb->nlh; 1585 struct nlattr *tca[TCA_MAX + 1]; 1586 int err; 1587 1588 s_idx = cb->args[0]; 1589 s_q_idx = q_idx = cb->args[1]; 1590 1591 idx = 0; 1592 ASSERT_RTNL(); 1593 1594 err = nlmsg_parse(nlh, sizeof(struct tcmsg), tca, TCA_MAX, NULL, NULL); 1595 if (err < 0) 1596 return err; 1597 1598 for_each_netdev(net, dev) { 1599 struct netdev_queue *dev_queue; 1600 1601 if (idx < s_idx) 1602 goto cont; 1603 if (idx > s_idx) 1604 s_q_idx = 0; 1605 q_idx = 0; 1606 1607 if (tc_dump_qdisc_root(dev->qdisc, skb, cb, &q_idx, s_q_idx, 1608 true, tca[TCA_DUMP_INVISIBLE]) < 0) 1609 goto done; 1610 1611 dev_queue = dev_ingress_queue(dev); 1612 if (dev_queue && 1613 tc_dump_qdisc_root(dev_queue->qdisc_sleeping, skb, cb, 1614 &q_idx, s_q_idx, false, 1615 tca[TCA_DUMP_INVISIBLE]) < 0) 1616 goto done; 1617 1618 cont: 1619 idx++; 1620 } 1621 1622 done: 1623 cb->args[0] = idx; 1624 cb->args[1] = q_idx; 1625 1626 return skb->len; 1627 } 1628 1629 1630 1631 /************************************************ 1632 * Traffic classes manipulation. * 1633 ************************************************/ 1634 1635 static int tc_fill_tclass(struct sk_buff *skb, struct Qdisc *q, 1636 unsigned long cl, 1637 u32 portid, u32 seq, u16 flags, int event) 1638 { 1639 struct tcmsg *tcm; 1640 struct nlmsghdr *nlh; 1641 unsigned char *b = skb_tail_pointer(skb); 1642 struct gnet_dump d; 1643 const struct Qdisc_class_ops *cl_ops = q->ops->cl_ops; 1644 1645 cond_resched(); 1646 nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags); 1647 if (!nlh) 1648 goto out_nlmsg_trim; 1649 tcm = nlmsg_data(nlh); 1650 tcm->tcm_family = AF_UNSPEC; 1651 tcm->tcm__pad1 = 0; 1652 tcm->tcm__pad2 = 0; 1653 tcm->tcm_ifindex = qdisc_dev(q)->ifindex; 1654 tcm->tcm_parent = q->handle; 1655 tcm->tcm_handle = q->handle; 1656 tcm->tcm_info = 0; 1657 if (nla_put_string(skb, TCA_KIND, q->ops->id)) 1658 goto nla_put_failure; 1659 if (cl_ops->dump && cl_ops->dump(q, cl, skb, tcm) < 0) 1660 goto nla_put_failure; 1661 1662 if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, TCA_XSTATS, 1663 NULL, &d, TCA_PAD) < 0) 1664 goto nla_put_failure; 1665 1666 if (cl_ops->dump_stats && cl_ops->dump_stats(q, cl, &d) < 0) 1667 goto nla_put_failure; 1668 1669 if (gnet_stats_finish_copy(&d) < 0) 1670 goto nla_put_failure; 1671 1672 nlh->nlmsg_len = skb_tail_pointer(skb) - b; 1673 return skb->len; 1674 1675 out_nlmsg_trim: 1676 nla_put_failure: 1677 nlmsg_trim(skb, b); 1678 return -1; 1679 } 1680 1681 static int tclass_notify(struct net *net, struct sk_buff *oskb, 1682 struct nlmsghdr *n, struct Qdisc *q, 1683 unsigned long cl, int event) 1684 { 1685 struct sk_buff *skb; 1686 u32 portid = oskb ? NETLINK_CB(oskb).portid : 0; 1687 1688 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); 1689 if (!skb) 1690 return -ENOBUFS; 1691 1692 if (tc_fill_tclass(skb, q, cl, portid, n->nlmsg_seq, 0, event) < 0) { 1693 kfree_skb(skb); 1694 return -EINVAL; 1695 } 1696 1697 return rtnetlink_send(skb, net, portid, RTNLGRP_TC, 1698 n->nlmsg_flags & NLM_F_ECHO); 1699 } 1700 1701 static int tclass_del_notify(struct net *net, 1702 const struct Qdisc_class_ops *cops, 1703 struct sk_buff *oskb, struct nlmsghdr *n, 1704 struct Qdisc *q, unsigned long cl) 1705 { 1706 u32 portid = oskb ? NETLINK_CB(oskb).portid : 0; 1707 struct sk_buff *skb; 1708 int err = 0; 1709 1710 if (!cops->delete) 1711 return -EOPNOTSUPP; 1712 1713 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); 1714 if (!skb) 1715 return -ENOBUFS; 1716 1717 if (tc_fill_tclass(skb, q, cl, portid, n->nlmsg_seq, 0, 1718 RTM_DELTCLASS) < 0) { 1719 kfree_skb(skb); 1720 return -EINVAL; 1721 } 1722 1723 err = cops->delete(q, cl); 1724 if (err) { 1725 kfree_skb(skb); 1726 return err; 1727 } 1728 1729 return rtnetlink_send(skb, net, portid, RTNLGRP_TC, 1730 n->nlmsg_flags & NLM_F_ECHO); 1731 } 1732 1733 #ifdef CONFIG_NET_CLS 1734 1735 struct tcf_bind_args { 1736 struct tcf_walker w; 1737 u32 classid; 1738 unsigned long cl; 1739 }; 1740 1741 static int tcf_node_bind(struct tcf_proto *tp, void *n, struct tcf_walker *arg) 1742 { 1743 struct tcf_bind_args *a = (void *)arg; 1744 1745 if (tp->ops->bind_class) { 1746 struct Qdisc *q = tcf_block_q(tp->chain->block); 1747 1748 sch_tree_lock(q); 1749 tp->ops->bind_class(n, a->classid, a->cl); 1750 sch_tree_unlock(q); 1751 } 1752 return 0; 1753 } 1754 1755 static void tc_bind_tclass(struct Qdisc *q, u32 portid, u32 clid, 1756 unsigned long new_cl) 1757 { 1758 const struct Qdisc_class_ops *cops = q->ops->cl_ops; 1759 struct tcf_block *block; 1760 struct tcf_chain *chain; 1761 unsigned long cl; 1762 1763 cl = cops->find(q, portid); 1764 if (!cl) 1765 return; 1766 block = cops->tcf_block(q, cl, NULL); 1767 if (!block) 1768 return; 1769 list_for_each_entry(chain, &block->chain_list, list) { 1770 struct tcf_proto *tp; 1771 1772 for (tp = rtnl_dereference(chain->filter_chain); 1773 tp; tp = rtnl_dereference(tp->next)) { 1774 struct tcf_bind_args arg = {}; 1775 1776 arg.w.fn = tcf_node_bind; 1777 arg.classid = clid; 1778 arg.cl = new_cl; 1779 tp->ops->walk(tp, &arg.w); 1780 } 1781 } 1782 } 1783 1784 #else 1785 1786 static void tc_bind_tclass(struct Qdisc *q, u32 portid, u32 clid, 1787 unsigned long new_cl) 1788 { 1789 } 1790 1791 #endif 1792 1793 static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, 1794 struct netlink_ext_ack *extack) 1795 { 1796 struct net *net = sock_net(skb->sk); 1797 struct tcmsg *tcm = nlmsg_data(n); 1798 struct nlattr *tca[TCA_MAX + 1]; 1799 struct net_device *dev; 1800 struct Qdisc *q = NULL; 1801 const struct Qdisc_class_ops *cops; 1802 unsigned long cl = 0; 1803 unsigned long new_cl; 1804 u32 portid; 1805 u32 clid; 1806 u32 qid; 1807 int err; 1808 1809 if ((n->nlmsg_type != RTM_GETTCLASS) && 1810 !netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN)) 1811 return -EPERM; 1812 1813 err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL, extack); 1814 if (err < 0) 1815 return err; 1816 1817 dev = __dev_get_by_index(net, tcm->tcm_ifindex); 1818 if (!dev) 1819 return -ENODEV; 1820 1821 /* 1822 parent == TC_H_UNSPEC - unspecified parent. 1823 parent == TC_H_ROOT - class is root, which has no parent. 1824 parent == X:0 - parent is root class. 1825 parent == X:Y - parent is a node in hierarchy. 1826 parent == 0:Y - parent is X:Y, where X:0 is qdisc. 1827 1828 handle == 0:0 - generate handle from kernel pool. 1829 handle == 0:Y - class is X:Y, where X:0 is qdisc. 1830 handle == X:Y - clear. 1831 handle == X:0 - root class. 1832 */ 1833 1834 /* Step 1. Determine qdisc handle X:0 */ 1835 1836 portid = tcm->tcm_parent; 1837 clid = tcm->tcm_handle; 1838 qid = TC_H_MAJ(clid); 1839 1840 if (portid != TC_H_ROOT) { 1841 u32 qid1 = TC_H_MAJ(portid); 1842 1843 if (qid && qid1) { 1844 /* If both majors are known, they must be identical. */ 1845 if (qid != qid1) 1846 return -EINVAL; 1847 } else if (qid1) { 1848 qid = qid1; 1849 } else if (qid == 0) 1850 qid = dev->qdisc->handle; 1851 1852 /* Now qid is genuine qdisc handle consistent 1853 * both with parent and child. 1854 * 1855 * TC_H_MAJ(portid) still may be unspecified, complete it now. 1856 */ 1857 if (portid) 1858 portid = TC_H_MAKE(qid, portid); 1859 } else { 1860 if (qid == 0) 1861 qid = dev->qdisc->handle; 1862 } 1863 1864 /* OK. Locate qdisc */ 1865 q = qdisc_lookup(dev, qid); 1866 if (!q) 1867 return -ENOENT; 1868 1869 /* An check that it supports classes */ 1870 cops = q->ops->cl_ops; 1871 if (cops == NULL) 1872 return -EINVAL; 1873 1874 /* Now try to get class */ 1875 if (clid == 0) { 1876 if (portid == TC_H_ROOT) 1877 clid = qid; 1878 } else 1879 clid = TC_H_MAKE(qid, clid); 1880 1881 if (clid) 1882 cl = cops->find(q, clid); 1883 1884 if (cl == 0) { 1885 err = -ENOENT; 1886 if (n->nlmsg_type != RTM_NEWTCLASS || 1887 !(n->nlmsg_flags & NLM_F_CREATE)) 1888 goto out; 1889 } else { 1890 switch (n->nlmsg_type) { 1891 case RTM_NEWTCLASS: 1892 err = -EEXIST; 1893 if (n->nlmsg_flags & NLM_F_EXCL) 1894 goto out; 1895 break; 1896 case RTM_DELTCLASS: 1897 err = tclass_del_notify(net, cops, skb, n, q, cl); 1898 /* Unbind the class with flilters with 0 */ 1899 tc_bind_tclass(q, portid, clid, 0); 1900 goto out; 1901 case RTM_GETTCLASS: 1902 err = tclass_notify(net, skb, n, q, cl, RTM_NEWTCLASS); 1903 goto out; 1904 default: 1905 err = -EINVAL; 1906 goto out; 1907 } 1908 } 1909 1910 new_cl = cl; 1911 err = -EOPNOTSUPP; 1912 if (cops->change) 1913 err = cops->change(q, clid, portid, tca, &new_cl, extack); 1914 if (err == 0) { 1915 tclass_notify(net, skb, n, q, new_cl, RTM_NEWTCLASS); 1916 /* We just create a new class, need to do reverse binding. */ 1917 if (cl != new_cl) 1918 tc_bind_tclass(q, portid, clid, new_cl); 1919 } 1920 out: 1921 return err; 1922 } 1923 1924 struct qdisc_dump_args { 1925 struct qdisc_walker w; 1926 struct sk_buff *skb; 1927 struct netlink_callback *cb; 1928 }; 1929 1930 static int qdisc_class_dump(struct Qdisc *q, unsigned long cl, 1931 struct qdisc_walker *arg) 1932 { 1933 struct qdisc_dump_args *a = (struct qdisc_dump_args *)arg; 1934 1935 return tc_fill_tclass(a->skb, q, cl, NETLINK_CB(a->cb->skb).portid, 1936 a->cb->nlh->nlmsg_seq, NLM_F_MULTI, 1937 RTM_NEWTCLASS); 1938 } 1939 1940 static int tc_dump_tclass_qdisc(struct Qdisc *q, struct sk_buff *skb, 1941 struct tcmsg *tcm, struct netlink_callback *cb, 1942 int *t_p, int s_t) 1943 { 1944 struct qdisc_dump_args arg; 1945 1946 if (tc_qdisc_dump_ignore(q, false) || 1947 *t_p < s_t || !q->ops->cl_ops || 1948 (tcm->tcm_parent && 1949 TC_H_MAJ(tcm->tcm_parent) != q->handle)) { 1950 (*t_p)++; 1951 return 0; 1952 } 1953 if (*t_p > s_t) 1954 memset(&cb->args[1], 0, sizeof(cb->args)-sizeof(cb->args[0])); 1955 arg.w.fn = qdisc_class_dump; 1956 arg.skb = skb; 1957 arg.cb = cb; 1958 arg.w.stop = 0; 1959 arg.w.skip = cb->args[1]; 1960 arg.w.count = 0; 1961 q->ops->cl_ops->walk(q, &arg.w); 1962 cb->args[1] = arg.w.count; 1963 if (arg.w.stop) 1964 return -1; 1965 (*t_p)++; 1966 return 0; 1967 } 1968 1969 static int tc_dump_tclass_root(struct Qdisc *root, struct sk_buff *skb, 1970 struct tcmsg *tcm, struct netlink_callback *cb, 1971 int *t_p, int s_t) 1972 { 1973 struct Qdisc *q; 1974 int b; 1975 1976 if (!root) 1977 return 0; 1978 1979 if (tc_dump_tclass_qdisc(root, skb, tcm, cb, t_p, s_t) < 0) 1980 return -1; 1981 1982 if (!qdisc_dev(root)) 1983 return 0; 1984 1985 if (tcm->tcm_parent) { 1986 q = qdisc_match_from_root(root, TC_H_MAJ(tcm->tcm_parent)); 1987 if (q && tc_dump_tclass_qdisc(q, skb, tcm, cb, t_p, s_t) < 0) 1988 return -1; 1989 return 0; 1990 } 1991 hash_for_each(qdisc_dev(root)->qdisc_hash, b, q, hash) { 1992 if (tc_dump_tclass_qdisc(q, skb, tcm, cb, t_p, s_t) < 0) 1993 return -1; 1994 } 1995 1996 return 0; 1997 } 1998 1999 static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb) 2000 { 2001 struct tcmsg *tcm = nlmsg_data(cb->nlh); 2002 struct net *net = sock_net(skb->sk); 2003 struct netdev_queue *dev_queue; 2004 struct net_device *dev; 2005 int t, s_t; 2006 2007 if (nlmsg_len(cb->nlh) < sizeof(*tcm)) 2008 return 0; 2009 dev = dev_get_by_index(net, tcm->tcm_ifindex); 2010 if (!dev) 2011 return 0; 2012 2013 s_t = cb->args[0]; 2014 t = 0; 2015 2016 if (tc_dump_tclass_root(dev->qdisc, skb, tcm, cb, &t, s_t) < 0) 2017 goto done; 2018 2019 dev_queue = dev_ingress_queue(dev); 2020 if (dev_queue && 2021 tc_dump_tclass_root(dev_queue->qdisc_sleeping, skb, tcm, cb, 2022 &t, s_t) < 0) 2023 goto done; 2024 2025 done: 2026 cb->args[0] = t; 2027 2028 dev_put(dev); 2029 return skb->len; 2030 } 2031 2032 #ifdef CONFIG_PROC_FS 2033 static int psched_show(struct seq_file *seq, void *v) 2034 { 2035 seq_printf(seq, "%08x %08x %08x %08x\n", 2036 (u32)NSEC_PER_USEC, (u32)PSCHED_TICKS2NS(1), 2037 1000000, 2038 (u32)NSEC_PER_SEC / hrtimer_resolution); 2039 2040 return 0; 2041 } 2042 2043 static int psched_open(struct inode *inode, struct file *file) 2044 { 2045 return single_open(file, psched_show, NULL); 2046 } 2047 2048 static const struct file_operations psched_fops = { 2049 .open = psched_open, 2050 .read = seq_read, 2051 .llseek = seq_lseek, 2052 .release = single_release, 2053 }; 2054 2055 static int __net_init psched_net_init(struct net *net) 2056 { 2057 struct proc_dir_entry *e; 2058 2059 e = proc_create("psched", 0, net->proc_net, &psched_fops); 2060 if (e == NULL) 2061 return -ENOMEM; 2062 2063 return 0; 2064 } 2065 2066 static void __net_exit psched_net_exit(struct net *net) 2067 { 2068 remove_proc_entry("psched", net->proc_net); 2069 } 2070 #else 2071 static int __net_init psched_net_init(struct net *net) 2072 { 2073 return 0; 2074 } 2075 2076 static void __net_exit psched_net_exit(struct net *net) 2077 { 2078 } 2079 #endif 2080 2081 static struct pernet_operations psched_net_ops = { 2082 .init = psched_net_init, 2083 .exit = psched_net_exit, 2084 }; 2085 2086 static int __init pktsched_init(void) 2087 { 2088 int err; 2089 2090 err = register_pernet_subsys(&psched_net_ops); 2091 if (err) { 2092 pr_err("pktsched_init: " 2093 "cannot initialize per netns operations\n"); 2094 return err; 2095 } 2096 2097 register_qdisc(&pfifo_fast_ops); 2098 register_qdisc(&pfifo_qdisc_ops); 2099 register_qdisc(&bfifo_qdisc_ops); 2100 register_qdisc(&pfifo_head_drop_qdisc_ops); 2101 register_qdisc(&mq_qdisc_ops); 2102 register_qdisc(&noqueue_qdisc_ops); 2103 2104 rtnl_register(PF_UNSPEC, RTM_NEWQDISC, tc_modify_qdisc, NULL, 0); 2105 rtnl_register(PF_UNSPEC, RTM_DELQDISC, tc_get_qdisc, NULL, 0); 2106 rtnl_register(PF_UNSPEC, RTM_GETQDISC, tc_get_qdisc, tc_dump_qdisc, 2107 0); 2108 rtnl_register(PF_UNSPEC, RTM_NEWTCLASS, tc_ctl_tclass, NULL, 0); 2109 rtnl_register(PF_UNSPEC, RTM_DELTCLASS, tc_ctl_tclass, NULL, 0); 2110 rtnl_register(PF_UNSPEC, RTM_GETTCLASS, tc_ctl_tclass, tc_dump_tclass, 2111 0); 2112 2113 return 0; 2114 } 2115 2116 subsys_initcall(pktsched_init); 2117