1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3 * net/sched/sch_api.c Packet scheduler API.
4 *
5 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
6 *
7 * Fixes:
8 *
9 * Rani Assaf <rani@magic.metawire.com> :980802: JIFFIES and CPU clock sources are repaired.
10 * Eduardo J. Blanco <ejbs@netlabs.com.uy> :990222: kmod support
11 * Jamal Hadi Salim <hadi@nortelnetworks.com>: 990601: ingress support
12 */
13
14 #include <linux/module.h>
15 #include <linux/types.h>
16 #include <linux/kernel.h>
17 #include <linux/string.h>
18 #include <linux/errno.h>
19 #include <linux/skbuff.h>
20 #include <linux/init.h>
21 #include <linux/proc_fs.h>
22 #include <linux/seq_file.h>
23 #include <linux/kmod.h>
24 #include <linux/list.h>
25 #include <linux/hrtimer.h>
26 #include <linux/slab.h>
27 #include <linux/hashtable.h>
28 #include <linux/bpf.h>
29
30 #include <net/netdev_lock.h>
31 #include <net/net_namespace.h>
32 #include <net/sock.h>
33 #include <net/netlink.h>
34 #include <net/pkt_sched.h>
35 #include <net/pkt_cls.h>
36 #include <net/tc_wrapper.h>
37
38 #include <trace/events/qdisc.h>
39
40 /*
41
42 Short review.
43 -------------
44
45 This file consists of two interrelated parts:
46
47 1. queueing disciplines manager frontend.
48 2. traffic classes manager frontend.
49
50 Generally, queueing discipline ("qdisc") is a black box,
51 which is able to enqueue packets and to dequeue them (when
52 device is ready to send something) in order and at times
53 determined by algorithm hidden in it.
54
55 qdisc's are divided to two categories:
56 - "queues", which have no internal structure visible from outside.
57 - "schedulers", which split all the packets to "traffic classes",
58 using "packet classifiers" (look at cls_api.c)
59
60 In turn, classes may have child qdiscs (as rule, queues)
61 attached to them etc. etc. etc.
62
63 The goal of the routines in this file is to translate
64 information supplied by user in the form of handles
65 to more intelligible for kernel form, to make some sanity
66 checks and part of work, which is common to all qdiscs
67 and to provide rtnetlink notifications.
68
69 All real intelligent work is done inside qdisc modules.
70
71
72
73 Every discipline has two major routines: enqueue and dequeue.
74
75 ---dequeue
76
77 dequeue usually returns a skb to send. It is allowed to return NULL,
78 but it does not mean that queue is empty, it just means that
79 discipline does not want to send anything this time.
80 Queue is really empty if q->q.qlen == 0.
81 For complicated disciplines with multiple queues q->q is not
82 real packet queue, but however q->q.qlen must be valid.
83
84 ---enqueue
85
86 enqueue returns 0, if packet was enqueued successfully.
87 If packet (this one or another one) was dropped, it returns
88 not zero error code.
89 NET_XMIT_DROP - this packet dropped
90 Expected action: do not backoff, but wait until queue will clear.
91 NET_XMIT_CN - probably this packet enqueued, but another one dropped.
92 Expected action: backoff or ignore
93
94 Auxiliary routines:
95
96 ---peek
97
98 like dequeue but without removing a packet from the queue
99
100 ---reset
101
102 returns qdisc to initial state: purge all buffers, clear all
103 timers, counters (except for statistics) etc.
104
105 ---init
106
107 initializes newly created qdisc.
108
109 ---destroy
110
111 destroys resources allocated by init and during lifetime of qdisc.
112
113 ---change
114
115 changes qdisc parameters.
116 */
117
118 /* Protects list of registered TC modules. It is pure SMP lock. */
119 static DEFINE_RWLOCK(qdisc_mod_lock);
120
121
122 /************************************************
123 * Queueing disciplines manipulation. *
124 ************************************************/
125
126
127 /* The list of all installed queueing disciplines. */
128
129 static struct Qdisc_ops *qdisc_base;
130
131 /* Register/unregister queueing discipline */
132
register_qdisc(struct Qdisc_ops * qops)133 int register_qdisc(struct Qdisc_ops *qops)
134 {
135 struct Qdisc_ops *q, **qp;
136 int rc = -EEXIST;
137
138 write_lock(&qdisc_mod_lock);
139 for (qp = &qdisc_base; (q = *qp) != NULL; qp = &q->next)
140 if (!strcmp(qops->id, q->id))
141 goto out;
142
143 if (qops->enqueue == NULL)
144 qops->enqueue = noop_qdisc_ops.enqueue;
145 if (qops->peek == NULL) {
146 if (qops->dequeue == NULL)
147 qops->peek = noop_qdisc_ops.peek;
148 else
149 goto out_einval;
150 }
151 if (qops->dequeue == NULL)
152 qops->dequeue = noop_qdisc_ops.dequeue;
153
154 if (qops->cl_ops) {
155 const struct Qdisc_class_ops *cops = qops->cl_ops;
156
157 if (!(cops->find && cops->walk && cops->leaf))
158 goto out_einval;
159
160 if (cops->tcf_block && !(cops->bind_tcf && cops->unbind_tcf))
161 goto out_einval;
162 }
163
164 qops->next = NULL;
165 *qp = qops;
166 rc = 0;
167 out:
168 write_unlock(&qdisc_mod_lock);
169 return rc;
170
171 out_einval:
172 rc = -EINVAL;
173 goto out;
174 }
175 EXPORT_SYMBOL(register_qdisc);
176
unregister_qdisc(struct Qdisc_ops * qops)177 void unregister_qdisc(struct Qdisc_ops *qops)
178 {
179 struct Qdisc_ops *q, **qp;
180 int err = -ENOENT;
181
182 write_lock(&qdisc_mod_lock);
183 for (qp = &qdisc_base; (q = *qp) != NULL; qp = &q->next)
184 if (q == qops)
185 break;
186 if (q) {
187 *qp = q->next;
188 q->next = NULL;
189 err = 0;
190 }
191 write_unlock(&qdisc_mod_lock);
192
193 WARN(err, "unregister qdisc(%s) failed\n", qops->id);
194 }
195 EXPORT_SYMBOL(unregister_qdisc);
196
197 /* Get default qdisc if not otherwise specified */
qdisc_get_default(char * name,size_t len)198 void qdisc_get_default(char *name, size_t len)
199 {
200 read_lock(&qdisc_mod_lock);
201 strscpy(name, default_qdisc_ops->id, len);
202 read_unlock(&qdisc_mod_lock);
203 }
204
qdisc_lookup_default(const char * name)205 static struct Qdisc_ops *qdisc_lookup_default(const char *name)
206 {
207 struct Qdisc_ops *q = NULL;
208
209 for (q = qdisc_base; q; q = q->next) {
210 if (!strcmp(name, q->id)) {
211 if (!bpf_try_module_get(q, q->owner))
212 q = NULL;
213 break;
214 }
215 }
216
217 return q;
218 }
219
220 /* Set new default qdisc to use */
qdisc_set_default(const char * name)221 int qdisc_set_default(const char *name)
222 {
223 const struct Qdisc_ops *ops;
224
225 if (!capable(CAP_NET_ADMIN))
226 return -EPERM;
227
228 write_lock(&qdisc_mod_lock);
229 ops = qdisc_lookup_default(name);
230 if (!ops) {
231 /* Not found, drop lock and try to load module */
232 write_unlock(&qdisc_mod_lock);
233 request_module(NET_SCH_ALIAS_PREFIX "%s", name);
234 write_lock(&qdisc_mod_lock);
235
236 ops = qdisc_lookup_default(name);
237 }
238
239 if (ops) {
240 /* Set new default */
241 bpf_module_put(default_qdisc_ops, default_qdisc_ops->owner);
242 default_qdisc_ops = ops;
243 }
244 write_unlock(&qdisc_mod_lock);
245
246 return ops ? 0 : -ENOENT;
247 }
248
249 #ifdef CONFIG_NET_SCH_DEFAULT
250 /* Set default value from kernel config */
sch_default_qdisc(void)251 static int __init sch_default_qdisc(void)
252 {
253 return qdisc_set_default(CONFIG_DEFAULT_NET_SCH);
254 }
255 late_initcall(sch_default_qdisc);
256 #endif
257
258 /* We know handle. Find qdisc among all qdisc's attached to device
259 * (root qdisc, all its children, children of children etc.)
260 * Note: caller either uses rtnl or rcu_read_lock()
261 */
262
qdisc_match_from_root(struct Qdisc * root,u32 handle)263 static struct Qdisc *qdisc_match_from_root(struct Qdisc *root, u32 handle)
264 {
265 struct Qdisc *q;
266
267 if (!qdisc_dev(root))
268 return (root->handle == handle ? root : NULL);
269
270 if (!(root->flags & TCQ_F_BUILTIN) &&
271 root->handle == handle)
272 return root;
273
274 hash_for_each_possible_rcu(qdisc_dev(root)->qdisc_hash, q, hash, handle,
275 lockdep_rtnl_is_held()) {
276 if (q->handle == handle)
277 return q;
278 }
279 return NULL;
280 }
281
qdisc_hash_add(struct Qdisc * q,bool invisible)282 void qdisc_hash_add(struct Qdisc *q, bool invisible)
283 {
284 if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) {
285 ASSERT_RTNL();
286 hash_add_rcu(qdisc_dev(q)->qdisc_hash, &q->hash, q->handle);
287 if (invisible)
288 q->flags |= TCQ_F_INVISIBLE;
289 }
290 }
291 EXPORT_SYMBOL(qdisc_hash_add);
292
qdisc_hash_del(struct Qdisc * q)293 void qdisc_hash_del(struct Qdisc *q)
294 {
295 if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) {
296 ASSERT_RTNL();
297 hash_del_rcu(&q->hash);
298 }
299 }
300 EXPORT_SYMBOL(qdisc_hash_del);
301
qdisc_lookup(struct net_device * dev,u32 handle)302 struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle)
303 {
304 struct Qdisc *q;
305
306 if (!handle)
307 return NULL;
308 q = qdisc_match_from_root(rtnl_dereference(dev->qdisc), handle);
309 if (q)
310 goto out;
311
312 if (dev_ingress_queue(dev))
313 q = qdisc_match_from_root(
314 rtnl_dereference(dev_ingress_queue(dev)->qdisc_sleeping),
315 handle);
316 out:
317 return q;
318 }
319
qdisc_lookup_rcu(struct net_device * dev,u32 handle)320 struct Qdisc *qdisc_lookup_rcu(struct net_device *dev, u32 handle)
321 {
322 struct netdev_queue *nq;
323 struct Qdisc *q;
324
325 if (!handle)
326 return NULL;
327 q = qdisc_match_from_root(rcu_dereference(dev->qdisc), handle);
328 if (q)
329 goto out;
330
331 nq = dev_ingress_queue_rcu(dev);
332 if (nq)
333 q = qdisc_match_from_root(rcu_dereference(nq->qdisc_sleeping),
334 handle);
335 out:
336 return q;
337 }
338
qdisc_leaf(struct Qdisc * p,u32 classid)339 static struct Qdisc *qdisc_leaf(struct Qdisc *p, u32 classid)
340 {
341 unsigned long cl;
342 const struct Qdisc_class_ops *cops = p->ops->cl_ops;
343
344 if (cops == NULL)
345 return NULL;
346 cl = cops->find(p, classid);
347
348 if (cl == 0)
349 return NULL;
350 return cops->leaf(p, cl);
351 }
352
353 /* Find queueing discipline by name */
354
qdisc_lookup_ops(struct nlattr * kind)355 static struct Qdisc_ops *qdisc_lookup_ops(struct nlattr *kind)
356 {
357 struct Qdisc_ops *q = NULL;
358
359 if (kind) {
360 read_lock(&qdisc_mod_lock);
361 for (q = qdisc_base; q; q = q->next) {
362 if (nla_strcmp(kind, q->id) == 0) {
363 if (!bpf_try_module_get(q, q->owner))
364 q = NULL;
365 break;
366 }
367 }
368 read_unlock(&qdisc_mod_lock);
369 }
370 return q;
371 }
372
373 /* The linklayer setting were not transferred from iproute2, in older
374 * versions, and the rate tables lookup systems have been dropped in
375 * the kernel. To keep backward compatible with older iproute2 tc
376 * utils, we detect the linklayer setting by detecting if the rate
377 * table were modified.
378 *
379 * For linklayer ATM table entries, the rate table will be aligned to
380 * 48 bytes, thus some table entries will contain the same value. The
381 * mpu (min packet unit) is also encoded into the old rate table, thus
382 * starting from the mpu, we find low and high table entries for
383 * mapping this cell. If these entries contain the same value, when
384 * the rate tables have been modified for linklayer ATM.
385 *
386 * This is done by rounding mpu to the nearest 48 bytes cell/entry,
387 * and then roundup to the next cell, calc the table entry one below,
388 * and compare.
389 */
__detect_linklayer(struct tc_ratespec * r,__u32 * rtab)390 static __u8 __detect_linklayer(struct tc_ratespec *r, __u32 *rtab)
391 {
392 int low = roundup(r->mpu, 48);
393 int high = roundup(low+1, 48);
394 int cell_low = low >> r->cell_log;
395 int cell_high = (high >> r->cell_log) - 1;
396
397 /* rtab is too inaccurate at rates > 100Mbit/s */
398 if ((r->rate > (100000000/8)) || (rtab[0] == 0)) {
399 pr_debug("TC linklayer: Giving up ATM detection\n");
400 return TC_LINKLAYER_ETHERNET;
401 }
402
403 if ((cell_high > cell_low) && (cell_high < 256)
404 && (rtab[cell_low] == rtab[cell_high])) {
405 pr_debug("TC linklayer: Detected ATM, low(%d)=high(%d)=%u\n",
406 cell_low, cell_high, rtab[cell_high]);
407 return TC_LINKLAYER_ATM;
408 }
409 return TC_LINKLAYER_ETHERNET;
410 }
411
412 static struct qdisc_rate_table *qdisc_rtab_list;
413
qdisc_get_rtab(struct tc_ratespec * r,struct nlattr * tab,struct netlink_ext_ack * extack)414 struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r,
415 struct nlattr *tab,
416 struct netlink_ext_ack *extack)
417 {
418 struct qdisc_rate_table *rtab;
419
420 if (tab == NULL || r->rate == 0 ||
421 r->cell_log == 0 || r->cell_log >= 32 ||
422 nla_len(tab) != TC_RTAB_SIZE) {
423 NL_SET_ERR_MSG(extack, "Invalid rate table parameters for searching");
424 return NULL;
425 }
426
427 for (rtab = qdisc_rtab_list; rtab; rtab = rtab->next) {
428 if (!memcmp(&rtab->rate, r, sizeof(struct tc_ratespec)) &&
429 !memcmp(&rtab->data, nla_data(tab), 1024)) {
430 rtab->refcnt++;
431 return rtab;
432 }
433 }
434
435 rtab = kmalloc(sizeof(*rtab), GFP_KERNEL);
436 if (rtab) {
437 rtab->rate = *r;
438 rtab->refcnt = 1;
439 memcpy(rtab->data, nla_data(tab), 1024);
440 if (r->linklayer == TC_LINKLAYER_UNAWARE)
441 r->linklayer = __detect_linklayer(r, rtab->data);
442 rtab->next = qdisc_rtab_list;
443 qdisc_rtab_list = rtab;
444 } else {
445 NL_SET_ERR_MSG(extack, "Failed to allocate new qdisc rate table");
446 }
447 return rtab;
448 }
449 EXPORT_SYMBOL(qdisc_get_rtab);
450
qdisc_put_rtab(struct qdisc_rate_table * tab)451 void qdisc_put_rtab(struct qdisc_rate_table *tab)
452 {
453 struct qdisc_rate_table *rtab, **rtabp;
454
455 if (!tab || --tab->refcnt)
456 return;
457
458 for (rtabp = &qdisc_rtab_list;
459 (rtab = *rtabp) != NULL;
460 rtabp = &rtab->next) {
461 if (rtab == tab) {
462 *rtabp = rtab->next;
463 kfree(rtab);
464 return;
465 }
466 }
467 }
468 EXPORT_SYMBOL(qdisc_put_rtab);
469
470 static LIST_HEAD(qdisc_stab_list);
471
472 static const struct nla_policy stab_policy[TCA_STAB_MAX + 1] = {
473 [TCA_STAB_BASE] = { .len = sizeof(struct tc_sizespec) },
474 [TCA_STAB_DATA] = { .type = NLA_BINARY },
475 };
476
qdisc_get_stab(struct nlattr * opt,struct netlink_ext_ack * extack)477 static struct qdisc_size_table *qdisc_get_stab(struct nlattr *opt,
478 struct netlink_ext_ack *extack)
479 {
480 struct nlattr *tb[TCA_STAB_MAX + 1];
481 struct qdisc_size_table *stab;
482 struct tc_sizespec *s;
483 unsigned int tsize = 0;
484 u16 *tab = NULL;
485 int err;
486
487 err = nla_parse_nested_deprecated(tb, TCA_STAB_MAX, opt, stab_policy,
488 extack);
489 if (err < 0)
490 return ERR_PTR(err);
491 if (!tb[TCA_STAB_BASE]) {
492 NL_SET_ERR_MSG(extack, "Size table base attribute is missing");
493 return ERR_PTR(-EINVAL);
494 }
495
496 s = nla_data(tb[TCA_STAB_BASE]);
497
498 if (s->tsize > 0) {
499 if (!tb[TCA_STAB_DATA]) {
500 NL_SET_ERR_MSG(extack, "Size table data attribute is missing");
501 return ERR_PTR(-EINVAL);
502 }
503 tab = nla_data(tb[TCA_STAB_DATA]);
504 tsize = nla_len(tb[TCA_STAB_DATA]) / sizeof(u16);
505 }
506
507 if (tsize != s->tsize || (!tab && tsize > 0)) {
508 NL_SET_ERR_MSG(extack, "Invalid size of size table");
509 return ERR_PTR(-EINVAL);
510 }
511
512 list_for_each_entry(stab, &qdisc_stab_list, list) {
513 if (memcmp(&stab->szopts, s, sizeof(*s)))
514 continue;
515 if (tsize > 0 &&
516 memcmp(stab->data, tab, flex_array_size(stab, data, tsize)))
517 continue;
518 stab->refcnt++;
519 return stab;
520 }
521
522 if (s->size_log > STAB_SIZE_LOG_MAX ||
523 s->cell_log > STAB_SIZE_LOG_MAX) {
524 NL_SET_ERR_MSG(extack, "Invalid logarithmic size of size table");
525 return ERR_PTR(-EINVAL);
526 }
527
528 stab = kmalloc(struct_size(stab, data, tsize), GFP_KERNEL);
529 if (!stab)
530 return ERR_PTR(-ENOMEM);
531
532 stab->refcnt = 1;
533 stab->szopts = *s;
534 if (tsize > 0)
535 memcpy(stab->data, tab, flex_array_size(stab, data, tsize));
536
537 list_add_tail(&stab->list, &qdisc_stab_list);
538
539 return stab;
540 }
541
qdisc_put_stab(struct qdisc_size_table * tab)542 void qdisc_put_stab(struct qdisc_size_table *tab)
543 {
544 if (!tab)
545 return;
546
547 if (--tab->refcnt == 0) {
548 list_del(&tab->list);
549 kfree_rcu(tab, rcu);
550 }
551 }
552 EXPORT_SYMBOL(qdisc_put_stab);
553
qdisc_dump_stab(struct sk_buff * skb,struct qdisc_size_table * stab)554 static int qdisc_dump_stab(struct sk_buff *skb, struct qdisc_size_table *stab)
555 {
556 struct nlattr *nest;
557
558 nest = nla_nest_start_noflag(skb, TCA_STAB);
559 if (nest == NULL)
560 goto nla_put_failure;
561 if (nla_put(skb, TCA_STAB_BASE, sizeof(stab->szopts), &stab->szopts))
562 goto nla_put_failure;
563 nla_nest_end(skb, nest);
564
565 return skb->len;
566
567 nla_put_failure:
568 return -1;
569 }
570
__qdisc_calculate_pkt_len(struct sk_buff * skb,const struct qdisc_size_table * stab)571 void __qdisc_calculate_pkt_len(struct sk_buff *skb,
572 const struct qdisc_size_table *stab)
573 {
574 int pkt_len, slot;
575
576 pkt_len = skb->len + stab->szopts.overhead;
577 if (unlikely(!stab->szopts.tsize))
578 goto out;
579
580 slot = pkt_len + stab->szopts.cell_align;
581 if (unlikely(slot < 0))
582 slot = 0;
583
584 slot >>= stab->szopts.cell_log;
585 if (likely(slot < stab->szopts.tsize))
586 pkt_len = stab->data[slot];
587 else
588 pkt_len = stab->data[stab->szopts.tsize - 1] *
589 (slot / stab->szopts.tsize) +
590 stab->data[slot % stab->szopts.tsize];
591
592 pkt_len <<= stab->szopts.size_log;
593 out:
594 if (unlikely(pkt_len < 1))
595 pkt_len = 1;
596 qdisc_skb_cb(skb)->pkt_len = pkt_len;
597 }
598
qdisc_warn_nonwc(const char * txt,struct Qdisc * qdisc)599 void qdisc_warn_nonwc(const char *txt, struct Qdisc *qdisc)
600 {
601 if (!(qdisc->flags & TCQ_F_WARN_NONWC)) {
602 pr_warn("%s: %s qdisc %X: is non-work-conserving?\n",
603 txt, qdisc->ops->id, qdisc->handle >> 16);
604 qdisc->flags |= TCQ_F_WARN_NONWC;
605 }
606 }
607 EXPORT_SYMBOL(qdisc_warn_nonwc);
608
qdisc_watchdog(struct hrtimer * timer)609 static enum hrtimer_restart qdisc_watchdog(struct hrtimer *timer)
610 {
611 struct qdisc_watchdog *wd = container_of(timer, struct qdisc_watchdog,
612 timer);
613
614 rcu_read_lock();
615 __netif_schedule(qdisc_root(wd->qdisc));
616 rcu_read_unlock();
617
618 return HRTIMER_NORESTART;
619 }
620
qdisc_watchdog_init_clockid(struct qdisc_watchdog * wd,struct Qdisc * qdisc,clockid_t clockid)621 void qdisc_watchdog_init_clockid(struct qdisc_watchdog *wd, struct Qdisc *qdisc,
622 clockid_t clockid)
623 {
624 hrtimer_setup(&wd->timer, qdisc_watchdog, clockid, HRTIMER_MODE_ABS_PINNED);
625 wd->qdisc = qdisc;
626 }
627 EXPORT_SYMBOL(qdisc_watchdog_init_clockid);
628
qdisc_watchdog_init(struct qdisc_watchdog * wd,struct Qdisc * qdisc)629 void qdisc_watchdog_init(struct qdisc_watchdog *wd, struct Qdisc *qdisc)
630 {
631 qdisc_watchdog_init_clockid(wd, qdisc, CLOCK_MONOTONIC);
632 }
633 EXPORT_SYMBOL(qdisc_watchdog_init);
634
qdisc_watchdog_schedule_range_ns(struct qdisc_watchdog * wd,u64 expires,u64 delta_ns)635 void qdisc_watchdog_schedule_range_ns(struct qdisc_watchdog *wd, u64 expires,
636 u64 delta_ns)
637 {
638 bool deactivated;
639
640 rcu_read_lock();
641 deactivated = test_bit(__QDISC_STATE_DEACTIVATED,
642 &qdisc_root_sleeping(wd->qdisc)->state);
643 rcu_read_unlock();
644 if (deactivated)
645 return;
646
647 if (hrtimer_is_queued(&wd->timer)) {
648 u64 softexpires;
649
650 softexpires = ktime_to_ns(hrtimer_get_softexpires(&wd->timer));
651 /* If timer is already set in [expires, expires + delta_ns],
652 * do not reprogram it.
653 */
654 if (softexpires - expires <= delta_ns)
655 return;
656 }
657
658 hrtimer_start_range_ns(&wd->timer,
659 ns_to_ktime(expires),
660 delta_ns,
661 HRTIMER_MODE_ABS_PINNED);
662 }
663 EXPORT_SYMBOL(qdisc_watchdog_schedule_range_ns);
664
qdisc_watchdog_cancel(struct qdisc_watchdog * wd)665 void qdisc_watchdog_cancel(struct qdisc_watchdog *wd)
666 {
667 hrtimer_cancel(&wd->timer);
668 }
669 EXPORT_SYMBOL(qdisc_watchdog_cancel);
670
qdisc_class_hash_alloc(unsigned int n)671 static struct hlist_head *qdisc_class_hash_alloc(unsigned int n)
672 {
673 struct hlist_head *h;
674 unsigned int i;
675
676 h = kvmalloc_array(n, sizeof(struct hlist_head), GFP_KERNEL);
677
678 if (h != NULL) {
679 for (i = 0; i < n; i++)
680 INIT_HLIST_HEAD(&h[i]);
681 }
682 return h;
683 }
684
qdisc_class_hash_grow(struct Qdisc * sch,struct Qdisc_class_hash * clhash)685 void qdisc_class_hash_grow(struct Qdisc *sch, struct Qdisc_class_hash *clhash)
686 {
687 struct Qdisc_class_common *cl;
688 struct hlist_node *next;
689 struct hlist_head *nhash, *ohash;
690 unsigned int nsize, nmask, osize;
691 unsigned int i, h;
692
693 /* Rehash when load factor exceeds 0.75 */
694 if (clhash->hashelems * 4 <= clhash->hashsize * 3)
695 return;
696 nsize = clhash->hashsize * 2;
697 nmask = nsize - 1;
698 nhash = qdisc_class_hash_alloc(nsize);
699 if (nhash == NULL)
700 return;
701
702 ohash = clhash->hash;
703 osize = clhash->hashsize;
704
705 sch_tree_lock(sch);
706 for (i = 0; i < osize; i++) {
707 hlist_for_each_entry_safe(cl, next, &ohash[i], hnode) {
708 h = qdisc_class_hash(cl->classid, nmask);
709 hlist_add_head(&cl->hnode, &nhash[h]);
710 }
711 }
712 clhash->hash = nhash;
713 clhash->hashsize = nsize;
714 clhash->hashmask = nmask;
715 sch_tree_unlock(sch);
716
717 kvfree(ohash);
718 }
719 EXPORT_SYMBOL(qdisc_class_hash_grow);
720
qdisc_class_hash_init(struct Qdisc_class_hash * clhash)721 int qdisc_class_hash_init(struct Qdisc_class_hash *clhash)
722 {
723 unsigned int size = 4;
724
725 clhash->hash = qdisc_class_hash_alloc(size);
726 if (!clhash->hash)
727 return -ENOMEM;
728 clhash->hashsize = size;
729 clhash->hashmask = size - 1;
730 clhash->hashelems = 0;
731 return 0;
732 }
733 EXPORT_SYMBOL(qdisc_class_hash_init);
734
qdisc_class_hash_destroy(struct Qdisc_class_hash * clhash)735 void qdisc_class_hash_destroy(struct Qdisc_class_hash *clhash)
736 {
737 kvfree(clhash->hash);
738 }
739 EXPORT_SYMBOL(qdisc_class_hash_destroy);
740
qdisc_class_hash_insert(struct Qdisc_class_hash * clhash,struct Qdisc_class_common * cl)741 void qdisc_class_hash_insert(struct Qdisc_class_hash *clhash,
742 struct Qdisc_class_common *cl)
743 {
744 unsigned int h;
745
746 INIT_HLIST_NODE(&cl->hnode);
747 h = qdisc_class_hash(cl->classid, clhash->hashmask);
748 hlist_add_head(&cl->hnode, &clhash->hash[h]);
749 clhash->hashelems++;
750 }
751 EXPORT_SYMBOL(qdisc_class_hash_insert);
752
qdisc_class_hash_remove(struct Qdisc_class_hash * clhash,struct Qdisc_class_common * cl)753 void qdisc_class_hash_remove(struct Qdisc_class_hash *clhash,
754 struct Qdisc_class_common *cl)
755 {
756 hlist_del(&cl->hnode);
757 clhash->hashelems--;
758 }
759 EXPORT_SYMBOL(qdisc_class_hash_remove);
760
761 /* Allocate an unique handle from space managed by kernel
762 * Possible range is [8000-FFFF]:0000 (0x8000 values)
763 */
qdisc_alloc_handle(struct net_device * dev)764 static u32 qdisc_alloc_handle(struct net_device *dev)
765 {
766 int i = 0x8000;
767 static u32 autohandle = TC_H_MAKE(0x80000000U, 0);
768
769 do {
770 autohandle += TC_H_MAKE(0x10000U, 0);
771 if (autohandle == TC_H_MAKE(TC_H_ROOT, 0))
772 autohandle = TC_H_MAKE(0x80000000U, 0);
773 if (!qdisc_lookup(dev, autohandle))
774 return autohandle;
775 cond_resched();
776 } while (--i > 0);
777
778 return 0;
779 }
780
qdisc_tree_reduce_backlog(struct Qdisc * sch,int n,int len)781 void qdisc_tree_reduce_backlog(struct Qdisc *sch, int n, int len)
782 {
783 const struct Qdisc_class_ops *cops;
784 unsigned long cl;
785 u32 parentid;
786 bool notify;
787 int drops;
788
789 drops = max_t(int, n, 0);
790 rcu_read_lock();
791 while ((parentid = sch->parent)) {
792 if (parentid == TC_H_ROOT)
793 break;
794
795 if (sch->flags & TCQ_F_NOPARENT)
796 break;
797 /* Notify parent qdisc only if child qdisc becomes empty. */
798 notify = !sch->q.qlen;
799 /* TODO: perform the search on a per txq basis */
800 sch = qdisc_lookup_rcu(qdisc_dev(sch), TC_H_MAJ(parentid));
801 if (sch == NULL) {
802 WARN_ON_ONCE(parentid != TC_H_ROOT);
803 break;
804 }
805 cops = sch->ops->cl_ops;
806 if (notify && cops->qlen_notify) {
807 /* Note that qlen_notify must be idempotent as it may get called
808 * multiple times.
809 */
810 cl = cops->find(sch, parentid);
811 cops->qlen_notify(sch, cl);
812 }
813 sch->q.qlen -= n;
814 sch->qstats.backlog -= len;
815 __qdisc_qstats_drop(sch, drops);
816 }
817 rcu_read_unlock();
818 }
819 EXPORT_SYMBOL(qdisc_tree_reduce_backlog);
820
qdisc_offload_dump_helper(struct Qdisc * sch,enum tc_setup_type type,void * type_data)821 int qdisc_offload_dump_helper(struct Qdisc *sch, enum tc_setup_type type,
822 void *type_data)
823 {
824 struct net_device *dev = qdisc_dev(sch);
825 int err;
826
827 sch->flags &= ~TCQ_F_OFFLOADED;
828 if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
829 return 0;
830
831 err = dev->netdev_ops->ndo_setup_tc(dev, type, type_data);
832 if (err == -EOPNOTSUPP)
833 return 0;
834
835 if (!err)
836 sch->flags |= TCQ_F_OFFLOADED;
837
838 return err;
839 }
840 EXPORT_SYMBOL(qdisc_offload_dump_helper);
841
qdisc_offload_graft_helper(struct net_device * dev,struct Qdisc * sch,struct Qdisc * new,struct Qdisc * old,enum tc_setup_type type,void * type_data,struct netlink_ext_ack * extack)842 void qdisc_offload_graft_helper(struct net_device *dev, struct Qdisc *sch,
843 struct Qdisc *new, struct Qdisc *old,
844 enum tc_setup_type type, void *type_data,
845 struct netlink_ext_ack *extack)
846 {
847 bool any_qdisc_is_offloaded;
848 int err;
849
850 if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
851 return;
852
853 err = dev->netdev_ops->ndo_setup_tc(dev, type, type_data);
854
855 /* Don't report error if the graft is part of destroy operation. */
856 if (!err || !new || new == &noop_qdisc)
857 return;
858
859 /* Don't report error if the parent, the old child and the new
860 * one are not offloaded.
861 */
862 any_qdisc_is_offloaded = new->flags & TCQ_F_OFFLOADED;
863 any_qdisc_is_offloaded |= sch && sch->flags & TCQ_F_OFFLOADED;
864 any_qdisc_is_offloaded |= old && old->flags & TCQ_F_OFFLOADED;
865
866 if (any_qdisc_is_offloaded)
867 NL_SET_ERR_MSG(extack, "Offloading graft operation failed.");
868 }
869 EXPORT_SYMBOL(qdisc_offload_graft_helper);
870
qdisc_offload_query_caps(struct net_device * dev,enum tc_setup_type type,void * caps,size_t caps_len)871 void qdisc_offload_query_caps(struct net_device *dev,
872 enum tc_setup_type type,
873 void *caps, size_t caps_len)
874 {
875 const struct net_device_ops *ops = dev->netdev_ops;
876 struct tc_query_caps_base base = {
877 .type = type,
878 .caps = caps,
879 };
880
881 memset(caps, 0, caps_len);
882
883 if (ops->ndo_setup_tc)
884 ops->ndo_setup_tc(dev, TC_QUERY_CAPS, &base);
885 }
886 EXPORT_SYMBOL(qdisc_offload_query_caps);
887
qdisc_offload_graft_root(struct net_device * dev,struct Qdisc * new,struct Qdisc * old,struct netlink_ext_ack * extack)888 static void qdisc_offload_graft_root(struct net_device *dev,
889 struct Qdisc *new, struct Qdisc *old,
890 struct netlink_ext_ack *extack)
891 {
892 struct tc_root_qopt_offload graft_offload = {
893 .command = TC_ROOT_GRAFT,
894 .handle = new ? new->handle : 0,
895 .ingress = (new && new->flags & TCQ_F_INGRESS) ||
896 (old && old->flags & TCQ_F_INGRESS),
897 };
898
899 qdisc_offload_graft_helper(dev, NULL, new, old,
900 TC_SETUP_ROOT_QDISC, &graft_offload, extack);
901 }
902
tc_fill_qdisc(struct sk_buff * skb,struct Qdisc * q,u32 clid,u32 portid,u32 seq,u16 flags,int event,struct netlink_ext_ack * extack)903 static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid,
904 u32 portid, u32 seq, u16 flags, int event,
905 struct netlink_ext_ack *extack)
906 {
907 struct gnet_stats_basic_sync __percpu *cpu_bstats = NULL;
908 struct gnet_stats_queue __percpu *cpu_qstats = NULL;
909 struct tcmsg *tcm;
910 struct nlmsghdr *nlh;
911 unsigned char *b = skb_tail_pointer(skb);
912 struct gnet_dump d;
913 struct qdisc_size_table *stab;
914 u32 block_index;
915 __u32 qlen;
916
917 cond_resched();
918 nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags);
919 if (!nlh)
920 goto out_nlmsg_trim;
921 tcm = nlmsg_data(nlh);
922 tcm->tcm_family = AF_UNSPEC;
923 tcm->tcm__pad1 = 0;
924 tcm->tcm__pad2 = 0;
925 tcm->tcm_ifindex = qdisc_dev(q)->ifindex;
926 tcm->tcm_parent = clid;
927 tcm->tcm_handle = q->handle;
928 tcm->tcm_info = refcount_read(&q->refcnt);
929 if (nla_put_string(skb, TCA_KIND, q->ops->id))
930 goto nla_put_failure;
931 if (q->ops->ingress_block_get) {
932 block_index = q->ops->ingress_block_get(q);
933 if (block_index &&
934 nla_put_u32(skb, TCA_INGRESS_BLOCK, block_index))
935 goto nla_put_failure;
936 }
937 if (q->ops->egress_block_get) {
938 block_index = q->ops->egress_block_get(q);
939 if (block_index &&
940 nla_put_u32(skb, TCA_EGRESS_BLOCK, block_index))
941 goto nla_put_failure;
942 }
943 if (q->ops->dump && q->ops->dump(q, skb) < 0)
944 goto nla_put_failure;
945 if (nla_put_u8(skb, TCA_HW_OFFLOAD, !!(q->flags & TCQ_F_OFFLOADED)))
946 goto nla_put_failure;
947 qlen = qdisc_qlen_sum(q);
948
949 stab = rtnl_dereference(q->stab);
950 if (stab && qdisc_dump_stab(skb, stab) < 0)
951 goto nla_put_failure;
952
953 if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, TCA_XSTATS,
954 NULL, &d, TCA_PAD) < 0)
955 goto nla_put_failure;
956
957 if (q->ops->dump_stats && q->ops->dump_stats(q, &d) < 0)
958 goto nla_put_failure;
959
960 if (qdisc_is_percpu_stats(q)) {
961 cpu_bstats = q->cpu_bstats;
962 cpu_qstats = q->cpu_qstats;
963 }
964
965 if (gnet_stats_copy_basic(&d, cpu_bstats, &q->bstats, true) < 0 ||
966 gnet_stats_copy_rate_est(&d, &q->rate_est) < 0 ||
967 gnet_stats_copy_queue(&d, cpu_qstats, &q->qstats, qlen) < 0)
968 goto nla_put_failure;
969
970 if (gnet_stats_finish_copy(&d) < 0)
971 goto nla_put_failure;
972
973 if (extack && extack->_msg &&
974 nla_put_string(skb, TCA_EXT_WARN_MSG, extack->_msg))
975 goto out_nlmsg_trim;
976
977 nlh->nlmsg_len = skb_tail_pointer(skb) - b;
978
979 return skb->len;
980
981 out_nlmsg_trim:
982 nla_put_failure:
983 nlmsg_trim(skb, b);
984 return -1;
985 }
986
tc_qdisc_dump_ignore(struct Qdisc * q,bool dump_invisible)987 static bool tc_qdisc_dump_ignore(struct Qdisc *q, bool dump_invisible)
988 {
989 if (q->flags & TCQ_F_BUILTIN)
990 return true;
991 if ((q->flags & TCQ_F_INVISIBLE) && !dump_invisible)
992 return true;
993
994 return false;
995 }
996
qdisc_get_notify(struct net * net,struct sk_buff * oskb,struct nlmsghdr * n,u32 clid,struct Qdisc * q,struct netlink_ext_ack * extack)997 static int qdisc_get_notify(struct net *net, struct sk_buff *oskb,
998 struct nlmsghdr *n, u32 clid, struct Qdisc *q,
999 struct netlink_ext_ack *extack)
1000 {
1001 struct sk_buff *skb;
1002 u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
1003
1004 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1005 if (!skb)
1006 return -ENOBUFS;
1007
1008 if (!tc_qdisc_dump_ignore(q, false)) {
1009 if (tc_fill_qdisc(skb, q, clid, portid, n->nlmsg_seq, 0,
1010 RTM_NEWQDISC, extack) < 0)
1011 goto err_out;
1012 }
1013
1014 if (skb->len)
1015 return rtnetlink_send(skb, net, portid, RTNLGRP_TC,
1016 n->nlmsg_flags & NLM_F_ECHO);
1017
1018 err_out:
1019 kfree_skb(skb);
1020 return -EINVAL;
1021 }
1022
qdisc_notify(struct net * net,struct sk_buff * oskb,struct nlmsghdr * n,u32 clid,struct Qdisc * old,struct Qdisc * new,struct netlink_ext_ack * extack)1023 static int qdisc_notify(struct net *net, struct sk_buff *oskb,
1024 struct nlmsghdr *n, u32 clid,
1025 struct Qdisc *old, struct Qdisc *new,
1026 struct netlink_ext_ack *extack)
1027 {
1028 struct sk_buff *skb;
1029 u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
1030
1031 if (!rtnl_notify_needed(net, n->nlmsg_flags, RTNLGRP_TC))
1032 return 0;
1033
1034 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1035 if (!skb)
1036 return -ENOBUFS;
1037
1038 if (old && !tc_qdisc_dump_ignore(old, false)) {
1039 if (tc_fill_qdisc(skb, old, clid, portid, n->nlmsg_seq,
1040 0, RTM_DELQDISC, extack) < 0)
1041 goto err_out;
1042 }
1043 if (new && !tc_qdisc_dump_ignore(new, false)) {
1044 if (tc_fill_qdisc(skb, new, clid, portid, n->nlmsg_seq,
1045 old ? NLM_F_REPLACE : 0, RTM_NEWQDISC, extack) < 0)
1046 goto err_out;
1047 }
1048
1049 if (skb->len)
1050 return rtnetlink_send(skb, net, portid, RTNLGRP_TC,
1051 n->nlmsg_flags & NLM_F_ECHO);
1052
1053 err_out:
1054 kfree_skb(skb);
1055 return -EINVAL;
1056 }
1057
notify_and_destroy(struct net * net,struct sk_buff * skb,struct nlmsghdr * n,u32 clid,struct Qdisc * old,struct Qdisc * new,struct netlink_ext_ack * extack)1058 static void notify_and_destroy(struct net *net, struct sk_buff *skb,
1059 struct nlmsghdr *n, u32 clid,
1060 struct Qdisc *old, struct Qdisc *new,
1061 struct netlink_ext_ack *extack)
1062 {
1063 if (new || old)
1064 qdisc_notify(net, skb, n, clid, old, new, extack);
1065
1066 if (old)
1067 qdisc_put(old);
1068 }
1069
qdisc_clear_nolock(struct Qdisc * sch)1070 static void qdisc_clear_nolock(struct Qdisc *sch)
1071 {
1072 sch->flags &= ~TCQ_F_NOLOCK;
1073 if (!(sch->flags & TCQ_F_CPUSTATS))
1074 return;
1075
1076 free_percpu(sch->cpu_bstats);
1077 free_percpu(sch->cpu_qstats);
1078 sch->cpu_bstats = NULL;
1079 sch->cpu_qstats = NULL;
1080 sch->flags &= ~TCQ_F_CPUSTATS;
1081 }
1082
1083 /* Graft qdisc "new" to class "classid" of qdisc "parent" or
1084 * to device "dev".
1085 *
1086 * When appropriate send a netlink notification using 'skb'
1087 * and "n".
1088 *
1089 * On success, destroy old qdisc.
1090 */
1091
qdisc_graft(struct net_device * dev,struct Qdisc * parent,struct sk_buff * skb,struct nlmsghdr * n,u32 classid,struct Qdisc * new,struct Qdisc * old,struct netlink_ext_ack * extack)1092 static int qdisc_graft(struct net_device *dev, struct Qdisc *parent,
1093 struct sk_buff *skb, struct nlmsghdr *n, u32 classid,
1094 struct Qdisc *new, struct Qdisc *old,
1095 struct netlink_ext_ack *extack)
1096 {
1097 struct Qdisc *q = old;
1098 struct net *net = dev_net(dev);
1099
1100 if (parent == NULL) {
1101 unsigned int i, num_q, ingress;
1102 struct netdev_queue *dev_queue;
1103
1104 ingress = 0;
1105 num_q = dev->num_tx_queues;
1106 if ((q && q->flags & TCQ_F_INGRESS) ||
1107 (new && new->flags & TCQ_F_INGRESS)) {
1108 ingress = 1;
1109 dev_queue = dev_ingress_queue(dev);
1110 if (!dev_queue) {
1111 NL_SET_ERR_MSG(extack, "Device does not have an ingress queue");
1112 return -ENOENT;
1113 }
1114
1115 q = rtnl_dereference(dev_queue->qdisc_sleeping);
1116
1117 /* This is the counterpart of that qdisc_refcount_inc_nz() call in
1118 * __tcf_qdisc_find() for filter requests.
1119 */
1120 if (!qdisc_refcount_dec_if_one(q)) {
1121 NL_SET_ERR_MSG(extack,
1122 "Current ingress or clsact Qdisc has ongoing filter requests");
1123 return -EBUSY;
1124 }
1125 }
1126
1127 if (dev->flags & IFF_UP)
1128 dev_deactivate(dev);
1129
1130 qdisc_offload_graft_root(dev, new, old, extack);
1131
1132 if (new && new->ops->attach && !ingress)
1133 goto skip;
1134
1135 if (!ingress) {
1136 for (i = 0; i < num_q; i++) {
1137 dev_queue = netdev_get_tx_queue(dev, i);
1138 old = dev_graft_qdisc(dev_queue, new);
1139
1140 if (new && i > 0)
1141 qdisc_refcount_inc(new);
1142 qdisc_put(old);
1143 }
1144 } else {
1145 old = dev_graft_qdisc(dev_queue, NULL);
1146
1147 /* {ingress,clsact}_destroy() @old before grafting @new to avoid
1148 * unprotected concurrent accesses to net_device::miniq_{in,e}gress
1149 * pointer(s) in mini_qdisc_pair_swap().
1150 */
1151 qdisc_notify(net, skb, n, classid, old, new, extack);
1152 qdisc_destroy(old);
1153
1154 dev_graft_qdisc(dev_queue, new);
1155 }
1156
1157 skip:
1158 if (!ingress) {
1159 old = rtnl_dereference(dev->qdisc);
1160 if (new && !new->ops->attach)
1161 qdisc_refcount_inc(new);
1162 rcu_assign_pointer(dev->qdisc, new ? : &noop_qdisc);
1163
1164 notify_and_destroy(net, skb, n, classid, old, new, extack);
1165
1166 if (new && new->ops->attach)
1167 new->ops->attach(new);
1168 }
1169
1170 if (dev->flags & IFF_UP)
1171 dev_activate(dev);
1172 } else {
1173 const struct Qdisc_class_ops *cops = parent->ops->cl_ops;
1174 unsigned long cl;
1175 int err;
1176
1177 /* Only support running class lockless if parent is lockless */
1178 if (new && (new->flags & TCQ_F_NOLOCK) && !(parent->flags & TCQ_F_NOLOCK))
1179 qdisc_clear_nolock(new);
1180
1181 if (!cops || !cops->graft)
1182 return -EOPNOTSUPP;
1183
1184 cl = cops->find(parent, classid);
1185 if (!cl) {
1186 NL_SET_ERR_MSG(extack, "Specified class not found");
1187 return -ENOENT;
1188 }
1189
1190 if (new && new->ops == &noqueue_qdisc_ops) {
1191 NL_SET_ERR_MSG(extack, "Cannot assign noqueue to a class");
1192 return -EINVAL;
1193 }
1194
1195 if (new &&
1196 !(parent->flags & TCQ_F_MQROOT) &&
1197 rcu_access_pointer(new->stab)) {
1198 NL_SET_ERR_MSG(extack, "STAB not supported on a non root");
1199 return -EINVAL;
1200 }
1201 err = cops->graft(parent, cl, new, &old, extack);
1202 if (err)
1203 return err;
1204 notify_and_destroy(net, skb, n, classid, old, new, extack);
1205 }
1206 return 0;
1207 }
1208
qdisc_block_indexes_set(struct Qdisc * sch,struct nlattr ** tca,struct netlink_ext_ack * extack)1209 static int qdisc_block_indexes_set(struct Qdisc *sch, struct nlattr **tca,
1210 struct netlink_ext_ack *extack)
1211 {
1212 u32 block_index;
1213
1214 if (tca[TCA_INGRESS_BLOCK]) {
1215 block_index = nla_get_u32(tca[TCA_INGRESS_BLOCK]);
1216
1217 if (!block_index) {
1218 NL_SET_ERR_MSG(extack, "Ingress block index cannot be 0");
1219 return -EINVAL;
1220 }
1221 if (!sch->ops->ingress_block_set) {
1222 NL_SET_ERR_MSG(extack, "Ingress block sharing is not supported");
1223 return -EOPNOTSUPP;
1224 }
1225 sch->ops->ingress_block_set(sch, block_index);
1226 }
1227 if (tca[TCA_EGRESS_BLOCK]) {
1228 block_index = nla_get_u32(tca[TCA_EGRESS_BLOCK]);
1229
1230 if (!block_index) {
1231 NL_SET_ERR_MSG(extack, "Egress block index cannot be 0");
1232 return -EINVAL;
1233 }
1234 if (!sch->ops->egress_block_set) {
1235 NL_SET_ERR_MSG(extack, "Egress block sharing is not supported");
1236 return -EOPNOTSUPP;
1237 }
1238 sch->ops->egress_block_set(sch, block_index);
1239 }
1240 return 0;
1241 }
1242
1243 /*
1244 Allocate and initialize new qdisc.
1245
1246 Parameters are passed via opt.
1247 */
1248
qdisc_create(struct net_device * dev,struct netdev_queue * dev_queue,u32 parent,u32 handle,struct nlattr ** tca,int * errp,struct netlink_ext_ack * extack)1249 static struct Qdisc *qdisc_create(struct net_device *dev,
1250 struct netdev_queue *dev_queue,
1251 u32 parent, u32 handle,
1252 struct nlattr **tca, int *errp,
1253 struct netlink_ext_ack *extack)
1254 {
1255 int err;
1256 struct nlattr *kind = tca[TCA_KIND];
1257 struct Qdisc *sch;
1258 struct Qdisc_ops *ops;
1259 struct qdisc_size_table *stab;
1260
1261 ops = qdisc_lookup_ops(kind);
1262 if (!ops) {
1263 err = -ENOENT;
1264 NL_SET_ERR_MSG(extack, "Specified qdisc kind is unknown");
1265 goto err_out;
1266 }
1267
1268 sch = qdisc_alloc(dev_queue, ops, extack);
1269 if (IS_ERR(sch)) {
1270 err = PTR_ERR(sch);
1271 goto err_out2;
1272 }
1273
1274 sch->parent = parent;
1275
1276 if (handle == TC_H_INGRESS) {
1277 if (!(sch->flags & TCQ_F_INGRESS)) {
1278 NL_SET_ERR_MSG(extack,
1279 "Specified parent ID is reserved for ingress and clsact Qdiscs");
1280 err = -EINVAL;
1281 goto err_out3;
1282 }
1283 handle = TC_H_MAKE(TC_H_INGRESS, 0);
1284 } else {
1285 if (handle == 0) {
1286 handle = qdisc_alloc_handle(dev);
1287 if (handle == 0) {
1288 NL_SET_ERR_MSG(extack, "Maximum number of qdisc handles was exceeded");
1289 err = -ENOSPC;
1290 goto err_out3;
1291 }
1292 }
1293 if (!netif_is_multiqueue(dev))
1294 sch->flags |= TCQ_F_ONETXQUEUE;
1295 }
1296
1297 sch->handle = handle;
1298
1299 /* This exist to keep backward compatible with a userspace
1300 * loophole, what allowed userspace to get IFF_NO_QUEUE
1301 * facility on older kernels by setting tx_queue_len=0 (prior
1302 * to qdisc init), and then forgot to reinit tx_queue_len
1303 * before again attaching a qdisc.
1304 */
1305 if ((dev->priv_flags & IFF_NO_QUEUE) && (dev->tx_queue_len == 0)) {
1306 WRITE_ONCE(dev->tx_queue_len, DEFAULT_TX_QUEUE_LEN);
1307 netdev_info(dev, "Caught tx_queue_len zero misconfig\n");
1308 }
1309
1310 err = qdisc_block_indexes_set(sch, tca, extack);
1311 if (err)
1312 goto err_out3;
1313
1314 if (tca[TCA_STAB]) {
1315 stab = qdisc_get_stab(tca[TCA_STAB], extack);
1316 if (IS_ERR(stab)) {
1317 err = PTR_ERR(stab);
1318 goto err_out3;
1319 }
1320 rcu_assign_pointer(sch->stab, stab);
1321 }
1322
1323 if (ops->init) {
1324 err = ops->init(sch, tca[TCA_OPTIONS], extack);
1325 if (err != 0)
1326 goto err_out4;
1327 }
1328
1329 if (tca[TCA_RATE]) {
1330 err = -EOPNOTSUPP;
1331 if (sch->flags & TCQ_F_MQROOT) {
1332 NL_SET_ERR_MSG(extack, "Cannot attach rate estimator to a multi-queue root qdisc");
1333 goto err_out4;
1334 }
1335
1336 err = gen_new_estimator(&sch->bstats,
1337 sch->cpu_bstats,
1338 &sch->rate_est,
1339 NULL,
1340 true,
1341 tca[TCA_RATE]);
1342 if (err) {
1343 NL_SET_ERR_MSG(extack, "Failed to generate new estimator");
1344 goto err_out4;
1345 }
1346 }
1347
1348 qdisc_hash_add(sch, false);
1349 trace_qdisc_create(ops, dev, parent);
1350
1351 return sch;
1352
1353 err_out4:
1354 /* Even if ops->init() failed, we call ops->destroy()
1355 * like qdisc_create_dflt().
1356 */
1357 if (ops->destroy)
1358 ops->destroy(sch);
1359 qdisc_put_stab(rtnl_dereference(sch->stab));
1360 err_out3:
1361 lockdep_unregister_key(&sch->root_lock_key);
1362 netdev_put(dev, &sch->dev_tracker);
1363 qdisc_free(sch);
1364 err_out2:
1365 bpf_module_put(ops, ops->owner);
1366 err_out:
1367 *errp = err;
1368 return NULL;
1369 }
1370
qdisc_change(struct Qdisc * sch,struct nlattr ** tca,struct netlink_ext_ack * extack)1371 static int qdisc_change(struct Qdisc *sch, struct nlattr **tca,
1372 struct netlink_ext_ack *extack)
1373 {
1374 struct qdisc_size_table *ostab, *stab = NULL;
1375 int err = 0;
1376
1377 if (tca[TCA_OPTIONS]) {
1378 if (!sch->ops->change) {
1379 NL_SET_ERR_MSG(extack, "Change operation not supported by specified qdisc");
1380 return -EINVAL;
1381 }
1382 if (tca[TCA_INGRESS_BLOCK] || tca[TCA_EGRESS_BLOCK]) {
1383 NL_SET_ERR_MSG(extack, "Change of blocks is not supported");
1384 return -EOPNOTSUPP;
1385 }
1386 err = sch->ops->change(sch, tca[TCA_OPTIONS], extack);
1387 if (err)
1388 return err;
1389 }
1390
1391 if (tca[TCA_STAB]) {
1392 stab = qdisc_get_stab(tca[TCA_STAB], extack);
1393 if (IS_ERR(stab))
1394 return PTR_ERR(stab);
1395 }
1396
1397 ostab = rtnl_dereference(sch->stab);
1398 rcu_assign_pointer(sch->stab, stab);
1399 qdisc_put_stab(ostab);
1400
1401 if (tca[TCA_RATE]) {
1402 /* NB: ignores errors from replace_estimator
1403 because change can't be undone. */
1404 if (sch->flags & TCQ_F_MQROOT)
1405 goto out;
1406 gen_replace_estimator(&sch->bstats,
1407 sch->cpu_bstats,
1408 &sch->rate_est,
1409 NULL,
1410 true,
1411 tca[TCA_RATE]);
1412 }
1413 out:
1414 return 0;
1415 }
1416
1417 struct check_loop_arg {
1418 struct qdisc_walker w;
1419 struct Qdisc *p;
1420 int depth;
1421 };
1422
1423 static int check_loop_fn(struct Qdisc *q, unsigned long cl,
1424 struct qdisc_walker *w);
1425
check_loop(struct Qdisc * q,struct Qdisc * p,int depth)1426 static int check_loop(struct Qdisc *q, struct Qdisc *p, int depth)
1427 {
1428 struct check_loop_arg arg;
1429
1430 if (q->ops->cl_ops == NULL)
1431 return 0;
1432
1433 arg.w.stop = arg.w.skip = arg.w.count = 0;
1434 arg.w.fn = check_loop_fn;
1435 arg.depth = depth;
1436 arg.p = p;
1437 q->ops->cl_ops->walk(q, &arg.w);
1438 return arg.w.stop ? -ELOOP : 0;
1439 }
1440
1441 static int
check_loop_fn(struct Qdisc * q,unsigned long cl,struct qdisc_walker * w)1442 check_loop_fn(struct Qdisc *q, unsigned long cl, struct qdisc_walker *w)
1443 {
1444 struct Qdisc *leaf;
1445 const struct Qdisc_class_ops *cops = q->ops->cl_ops;
1446 struct check_loop_arg *arg = (struct check_loop_arg *)w;
1447
1448 leaf = cops->leaf(q, cl);
1449 if (leaf) {
1450 if (leaf == arg->p || arg->depth > 7)
1451 return -ELOOP;
1452 return check_loop(leaf, arg->p, arg->depth + 1);
1453 }
1454 return 0;
1455 }
1456
1457 const struct nla_policy rtm_tca_policy[TCA_MAX + 1] = {
1458 [TCA_KIND] = { .type = NLA_STRING },
1459 [TCA_RATE] = { .type = NLA_BINARY,
1460 .len = sizeof(struct tc_estimator) },
1461 [TCA_STAB] = { .type = NLA_NESTED },
1462 [TCA_DUMP_INVISIBLE] = { .type = NLA_FLAG },
1463 [TCA_CHAIN] = { .type = NLA_U32 },
1464 [TCA_INGRESS_BLOCK] = { .type = NLA_U32 },
1465 [TCA_EGRESS_BLOCK] = { .type = NLA_U32 },
1466 };
1467
1468 /*
1469 * Delete/get qdisc.
1470 */
1471
__tc_get_qdisc(struct sk_buff * skb,struct nlmsghdr * n,struct netlink_ext_ack * extack,struct net_device * dev,struct nlattr * tca[TCA_MAX+1],struct tcmsg * tcm)1472 static int __tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n,
1473 struct netlink_ext_ack *extack,
1474 struct net_device *dev,
1475 struct nlattr *tca[TCA_MAX + 1],
1476 struct tcmsg *tcm)
1477 {
1478 struct net *net = sock_net(skb->sk);
1479 struct Qdisc *q = NULL;
1480 struct Qdisc *p = NULL;
1481 u32 clid;
1482 int err;
1483
1484 clid = tcm->tcm_parent;
1485 if (clid) {
1486 if (clid != TC_H_ROOT) {
1487 if (TC_H_MAJ(clid) != TC_H_MAJ(TC_H_INGRESS)) {
1488 p = qdisc_lookup(dev, TC_H_MAJ(clid));
1489 if (!p) {
1490 NL_SET_ERR_MSG(extack, "Failed to find qdisc with specified classid");
1491 return -ENOENT;
1492 }
1493 q = qdisc_leaf(p, clid);
1494 } else if (dev_ingress_queue(dev)) {
1495 q = rtnl_dereference(dev_ingress_queue(dev)->qdisc_sleeping);
1496 }
1497 } else {
1498 q = rtnl_dereference(dev->qdisc);
1499 }
1500 if (!q) {
1501 NL_SET_ERR_MSG(extack, "Cannot find specified qdisc on specified device");
1502 return -ENOENT;
1503 }
1504
1505 if (tcm->tcm_handle && q->handle != tcm->tcm_handle) {
1506 NL_SET_ERR_MSG(extack, "Invalid handle");
1507 return -EINVAL;
1508 }
1509 } else {
1510 q = qdisc_lookup(dev, tcm->tcm_handle);
1511 if (!q) {
1512 NL_SET_ERR_MSG(extack, "Failed to find qdisc with specified handle");
1513 return -ENOENT;
1514 }
1515 }
1516
1517 if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id)) {
1518 NL_SET_ERR_MSG(extack, "Invalid qdisc name: must match existing qdisc");
1519 return -EINVAL;
1520 }
1521
1522 if (n->nlmsg_type == RTM_DELQDISC) {
1523 if (!clid) {
1524 NL_SET_ERR_MSG(extack, "Classid cannot be zero");
1525 return -EINVAL;
1526 }
1527 if (q->handle == 0) {
1528 NL_SET_ERR_MSG(extack, "Cannot delete qdisc with handle of zero");
1529 return -ENOENT;
1530 }
1531 err = qdisc_graft(dev, p, skb, n, clid, NULL, q, extack);
1532 if (err != 0)
1533 return err;
1534 } else {
1535 qdisc_get_notify(net, skb, n, clid, q, NULL);
1536 }
1537 return 0;
1538 }
1539
tc_get_qdisc(struct sk_buff * skb,struct nlmsghdr * n,struct netlink_ext_ack * extack)1540 static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n,
1541 struct netlink_ext_ack *extack)
1542 {
1543 struct net *net = sock_net(skb->sk);
1544 struct tcmsg *tcm = nlmsg_data(n);
1545 struct nlattr *tca[TCA_MAX + 1];
1546 struct net_device *dev;
1547 int err;
1548
1549 err = nlmsg_parse_deprecated(n, sizeof(*tcm), tca, TCA_MAX,
1550 rtm_tca_policy, extack);
1551 if (err < 0)
1552 return err;
1553
1554 dev = __dev_get_by_index(net, tcm->tcm_ifindex);
1555 if (!dev)
1556 return -ENODEV;
1557
1558 netdev_lock_ops(dev);
1559 err = __tc_get_qdisc(skb, n, extack, dev, tca, tcm);
1560 netdev_unlock_ops(dev);
1561
1562 return err;
1563 }
1564
req_create_or_replace(struct nlmsghdr * n)1565 static bool req_create_or_replace(struct nlmsghdr *n)
1566 {
1567 return (n->nlmsg_flags & NLM_F_CREATE &&
1568 n->nlmsg_flags & NLM_F_REPLACE);
1569 }
1570
req_create_exclusive(struct nlmsghdr * n)1571 static bool req_create_exclusive(struct nlmsghdr *n)
1572 {
1573 return (n->nlmsg_flags & NLM_F_CREATE &&
1574 n->nlmsg_flags & NLM_F_EXCL);
1575 }
1576
req_change(struct nlmsghdr * n)1577 static bool req_change(struct nlmsghdr *n)
1578 {
1579 return (!(n->nlmsg_flags & NLM_F_CREATE) &&
1580 !(n->nlmsg_flags & NLM_F_REPLACE) &&
1581 !(n->nlmsg_flags & NLM_F_EXCL));
1582 }
1583
__tc_modify_qdisc(struct sk_buff * skb,struct nlmsghdr * n,struct netlink_ext_ack * extack,struct net_device * dev,struct nlattr * tca[TCA_MAX+1],struct tcmsg * tcm)1584 static int __tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n,
1585 struct netlink_ext_ack *extack,
1586 struct net_device *dev,
1587 struct nlattr *tca[TCA_MAX + 1],
1588 struct tcmsg *tcm)
1589 {
1590 struct Qdisc *q = NULL;
1591 struct Qdisc *p = NULL;
1592 u32 clid;
1593 int err;
1594
1595 clid = tcm->tcm_parent;
1596
1597 if (clid) {
1598 if (clid != TC_H_ROOT) {
1599 if (clid != TC_H_INGRESS) {
1600 p = qdisc_lookup(dev, TC_H_MAJ(clid));
1601 if (!p) {
1602 NL_SET_ERR_MSG(extack, "Failed to find specified qdisc");
1603 return -ENOENT;
1604 }
1605 q = qdisc_leaf(p, clid);
1606 } else if (dev_ingress_queue_create(dev)) {
1607 q = rtnl_dereference(dev_ingress_queue(dev)->qdisc_sleeping);
1608 }
1609 } else {
1610 q = rtnl_dereference(dev->qdisc);
1611 }
1612
1613 /* It may be default qdisc, ignore it */
1614 if (q && q->handle == 0)
1615 q = NULL;
1616
1617 if (!q || !tcm->tcm_handle || q->handle != tcm->tcm_handle) {
1618 if (tcm->tcm_handle) {
1619 if (q && !(n->nlmsg_flags & NLM_F_REPLACE)) {
1620 NL_SET_ERR_MSG(extack, "NLM_F_REPLACE needed to override");
1621 return -EEXIST;
1622 }
1623 if (TC_H_MIN(tcm->tcm_handle)) {
1624 NL_SET_ERR_MSG(extack, "Invalid minor handle");
1625 return -EINVAL;
1626 }
1627 q = qdisc_lookup(dev, tcm->tcm_handle);
1628 if (!q)
1629 goto create_n_graft;
1630 if (q->parent != tcm->tcm_parent) {
1631 NL_SET_ERR_MSG(extack, "Cannot move an existing qdisc to a different parent");
1632 return -EINVAL;
1633 }
1634 if (n->nlmsg_flags & NLM_F_EXCL) {
1635 NL_SET_ERR_MSG(extack, "Exclusivity flag on, cannot override");
1636 return -EEXIST;
1637 }
1638 if (tca[TCA_KIND] &&
1639 nla_strcmp(tca[TCA_KIND], q->ops->id)) {
1640 NL_SET_ERR_MSG(extack, "Invalid qdisc name: must match existing qdisc");
1641 return -EINVAL;
1642 }
1643 if (q->flags & TCQ_F_INGRESS) {
1644 NL_SET_ERR_MSG(extack,
1645 "Cannot regraft ingress or clsact Qdiscs");
1646 return -EINVAL;
1647 }
1648 if (q == p ||
1649 (p && check_loop(q, p, 0))) {
1650 NL_SET_ERR_MSG(extack, "Qdisc parent/child loop detected");
1651 return -ELOOP;
1652 }
1653 if (clid == TC_H_INGRESS) {
1654 NL_SET_ERR_MSG(extack, "Ingress cannot graft directly");
1655 return -EINVAL;
1656 }
1657 qdisc_refcount_inc(q);
1658 goto graft;
1659 } else {
1660 if (!q)
1661 goto create_n_graft;
1662
1663 /* This magic test requires explanation.
1664 *
1665 * We know, that some child q is already
1666 * attached to this parent and have choice:
1667 * 1) change it or 2) create/graft new one.
1668 * If the requested qdisc kind is different
1669 * than the existing one, then we choose graft.
1670 * If they are the same then this is "change"
1671 * operation - just let it fallthrough..
1672 *
1673 * 1. We are allowed to create/graft only
1674 * if the request is explicitly stating
1675 * "please create if it doesn't exist".
1676 *
1677 * 2. If the request is to exclusive create
1678 * then the qdisc tcm_handle is not expected
1679 * to exist, so that we choose create/graft too.
1680 *
1681 * 3. The last case is when no flags are set.
1682 * This will happen when for example tc
1683 * utility issues a "change" command.
1684 * Alas, it is sort of hole in API, we
1685 * cannot decide what to do unambiguously.
1686 * For now we select create/graft.
1687 */
1688 if (tca[TCA_KIND] &&
1689 nla_strcmp(tca[TCA_KIND], q->ops->id)) {
1690 if (req_create_or_replace(n) ||
1691 req_create_exclusive(n))
1692 goto create_n_graft;
1693 else if (req_change(n))
1694 goto create_n_graft2;
1695 }
1696 }
1697 }
1698 } else {
1699 if (!tcm->tcm_handle) {
1700 NL_SET_ERR_MSG(extack, "Handle cannot be zero");
1701 return -EINVAL;
1702 }
1703 q = qdisc_lookup(dev, tcm->tcm_handle);
1704 }
1705
1706 /* Change qdisc parameters */
1707 if (!q) {
1708 NL_SET_ERR_MSG(extack, "Specified qdisc not found");
1709 return -ENOENT;
1710 }
1711 if (n->nlmsg_flags & NLM_F_EXCL) {
1712 NL_SET_ERR_MSG(extack, "Exclusivity flag on, cannot modify");
1713 return -EEXIST;
1714 }
1715 if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id)) {
1716 NL_SET_ERR_MSG(extack, "Invalid qdisc name: must match existing qdisc");
1717 return -EINVAL;
1718 }
1719 err = qdisc_change(q, tca, extack);
1720 if (err == 0)
1721 qdisc_notify(sock_net(skb->sk), skb, n, clid, NULL, q, extack);
1722 return err;
1723
1724 create_n_graft:
1725 if (!(n->nlmsg_flags & NLM_F_CREATE)) {
1726 NL_SET_ERR_MSG(extack, "Qdisc not found. To create specify NLM_F_CREATE flag");
1727 return -ENOENT;
1728 }
1729 create_n_graft2:
1730 if (clid == TC_H_INGRESS) {
1731 if (dev_ingress_queue(dev)) {
1732 q = qdisc_create(dev, dev_ingress_queue(dev),
1733 tcm->tcm_parent, tcm->tcm_parent,
1734 tca, &err, extack);
1735 } else {
1736 NL_SET_ERR_MSG(extack, "Cannot find ingress queue for specified device");
1737 err = -ENOENT;
1738 }
1739 } else {
1740 struct netdev_queue *dev_queue;
1741
1742 if (p && p->ops->cl_ops && p->ops->cl_ops->select_queue)
1743 dev_queue = p->ops->cl_ops->select_queue(p, tcm);
1744 else if (p)
1745 dev_queue = p->dev_queue;
1746 else
1747 dev_queue = netdev_get_tx_queue(dev, 0);
1748
1749 q = qdisc_create(dev, dev_queue,
1750 tcm->tcm_parent, tcm->tcm_handle,
1751 tca, &err, extack);
1752 }
1753 if (!q)
1754 return err;
1755
1756 graft:
1757 err = qdisc_graft(dev, p, skb, n, clid, q, NULL, extack);
1758 if (err) {
1759 if (q)
1760 qdisc_put(q);
1761 return err;
1762 }
1763
1764 return 0;
1765 }
1766
request_qdisc_module(struct nlattr * kind)1767 static void request_qdisc_module(struct nlattr *kind)
1768 {
1769 struct Qdisc_ops *ops;
1770 char name[IFNAMSIZ];
1771
1772 if (!kind)
1773 return;
1774
1775 ops = qdisc_lookup_ops(kind);
1776 if (ops) {
1777 bpf_module_put(ops, ops->owner);
1778 return;
1779 }
1780
1781 if (nla_strscpy(name, kind, IFNAMSIZ) >= 0) {
1782 rtnl_unlock();
1783 request_module(NET_SCH_ALIAS_PREFIX "%s", name);
1784 rtnl_lock();
1785 }
1786 }
1787
1788 /*
1789 * Create/change qdisc.
1790 */
tc_modify_qdisc(struct sk_buff * skb,struct nlmsghdr * n,struct netlink_ext_ack * extack)1791 static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n,
1792 struct netlink_ext_ack *extack)
1793 {
1794 struct net *net = sock_net(skb->sk);
1795 struct nlattr *tca[TCA_MAX + 1];
1796 struct net_device *dev;
1797 struct tcmsg *tcm;
1798 int err;
1799
1800 err = nlmsg_parse_deprecated(n, sizeof(*tcm), tca, TCA_MAX,
1801 rtm_tca_policy, extack);
1802 if (err < 0)
1803 return err;
1804
1805 request_qdisc_module(tca[TCA_KIND]);
1806
1807 tcm = nlmsg_data(n);
1808 dev = __dev_get_by_index(net, tcm->tcm_ifindex);
1809 if (!dev)
1810 return -ENODEV;
1811
1812 netdev_lock_ops(dev);
1813 err = __tc_modify_qdisc(skb, n, extack, dev, tca, tcm);
1814 netdev_unlock_ops(dev);
1815
1816 return err;
1817 }
1818
tc_dump_qdisc_root(struct Qdisc * root,struct sk_buff * skb,struct netlink_callback * cb,int * q_idx_p,int s_q_idx,bool recur,bool dump_invisible)1819 static int tc_dump_qdisc_root(struct Qdisc *root, struct sk_buff *skb,
1820 struct netlink_callback *cb,
1821 int *q_idx_p, int s_q_idx, bool recur,
1822 bool dump_invisible)
1823 {
1824 int ret = 0, q_idx = *q_idx_p;
1825 struct Qdisc *q;
1826 int b;
1827
1828 if (!root)
1829 return 0;
1830
1831 q = root;
1832 if (q_idx < s_q_idx) {
1833 q_idx++;
1834 } else {
1835 if (!tc_qdisc_dump_ignore(q, dump_invisible) &&
1836 tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).portid,
1837 cb->nlh->nlmsg_seq, NLM_F_MULTI,
1838 RTM_NEWQDISC, NULL) <= 0)
1839 goto done;
1840 q_idx++;
1841 }
1842
1843 /* If dumping singletons, there is no qdisc_dev(root) and the singleton
1844 * itself has already been dumped.
1845 *
1846 * If we've already dumped the top-level (ingress) qdisc above and the global
1847 * qdisc hashtable, we don't want to hit it again
1848 */
1849 if (!qdisc_dev(root) || !recur)
1850 goto out;
1851
1852 hash_for_each(qdisc_dev(root)->qdisc_hash, b, q, hash) {
1853 if (q_idx < s_q_idx) {
1854 q_idx++;
1855 continue;
1856 }
1857 if (!tc_qdisc_dump_ignore(q, dump_invisible) &&
1858 tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).portid,
1859 cb->nlh->nlmsg_seq, NLM_F_MULTI,
1860 RTM_NEWQDISC, NULL) <= 0)
1861 goto done;
1862 q_idx++;
1863 }
1864
1865 out:
1866 *q_idx_p = q_idx;
1867 return ret;
1868 done:
1869 ret = -1;
1870 goto out;
1871 }
1872
tc_dump_qdisc(struct sk_buff * skb,struct netlink_callback * cb)1873 static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb)
1874 {
1875 struct net *net = sock_net(skb->sk);
1876 int idx, q_idx;
1877 int s_idx, s_q_idx;
1878 struct net_device *dev;
1879 const struct nlmsghdr *nlh = cb->nlh;
1880 struct nlattr *tca[TCA_MAX + 1];
1881 int err;
1882
1883 s_idx = cb->args[0];
1884 s_q_idx = q_idx = cb->args[1];
1885
1886 idx = 0;
1887 ASSERT_RTNL();
1888
1889 err = nlmsg_parse_deprecated(nlh, sizeof(struct tcmsg), tca, TCA_MAX,
1890 rtm_tca_policy, cb->extack);
1891 if (err < 0)
1892 return err;
1893
1894 for_each_netdev(net, dev) {
1895 struct netdev_queue *dev_queue;
1896
1897 if (idx < s_idx)
1898 goto cont;
1899 if (idx > s_idx)
1900 s_q_idx = 0;
1901 q_idx = 0;
1902
1903 netdev_lock_ops(dev);
1904 if (tc_dump_qdisc_root(rtnl_dereference(dev->qdisc),
1905 skb, cb, &q_idx, s_q_idx,
1906 true, tca[TCA_DUMP_INVISIBLE]) < 0) {
1907 netdev_unlock_ops(dev);
1908 goto done;
1909 }
1910
1911 dev_queue = dev_ingress_queue(dev);
1912 if (dev_queue &&
1913 tc_dump_qdisc_root(rtnl_dereference(dev_queue->qdisc_sleeping),
1914 skb, cb, &q_idx, s_q_idx, false,
1915 tca[TCA_DUMP_INVISIBLE]) < 0) {
1916 netdev_unlock_ops(dev);
1917 goto done;
1918 }
1919 netdev_unlock_ops(dev);
1920
1921 cont:
1922 idx++;
1923 }
1924
1925 done:
1926 cb->args[0] = idx;
1927 cb->args[1] = q_idx;
1928
1929 return skb->len;
1930 }
1931
1932
1933
1934 /************************************************
1935 * Traffic classes manipulation. *
1936 ************************************************/
1937
tc_fill_tclass(struct sk_buff * skb,struct Qdisc * q,unsigned long cl,u32 portid,u32 seq,u16 flags,int event,struct netlink_ext_ack * extack)1938 static int tc_fill_tclass(struct sk_buff *skb, struct Qdisc *q,
1939 unsigned long cl, u32 portid, u32 seq, u16 flags,
1940 int event, struct netlink_ext_ack *extack)
1941 {
1942 struct tcmsg *tcm;
1943 struct nlmsghdr *nlh;
1944 unsigned char *b = skb_tail_pointer(skb);
1945 struct gnet_dump d;
1946 const struct Qdisc_class_ops *cl_ops = q->ops->cl_ops;
1947
1948 cond_resched();
1949 nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags);
1950 if (!nlh)
1951 goto out_nlmsg_trim;
1952 tcm = nlmsg_data(nlh);
1953 tcm->tcm_family = AF_UNSPEC;
1954 tcm->tcm__pad1 = 0;
1955 tcm->tcm__pad2 = 0;
1956 tcm->tcm_ifindex = qdisc_dev(q)->ifindex;
1957 tcm->tcm_parent = q->handle;
1958 tcm->tcm_handle = q->handle;
1959 tcm->tcm_info = 0;
1960 if (nla_put_string(skb, TCA_KIND, q->ops->id))
1961 goto nla_put_failure;
1962 if (cl_ops->dump && cl_ops->dump(q, cl, skb, tcm) < 0)
1963 goto nla_put_failure;
1964
1965 if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, TCA_XSTATS,
1966 NULL, &d, TCA_PAD) < 0)
1967 goto nla_put_failure;
1968
1969 if (cl_ops->dump_stats && cl_ops->dump_stats(q, cl, &d) < 0)
1970 goto nla_put_failure;
1971
1972 if (gnet_stats_finish_copy(&d) < 0)
1973 goto nla_put_failure;
1974
1975 if (extack && extack->_msg &&
1976 nla_put_string(skb, TCA_EXT_WARN_MSG, extack->_msg))
1977 goto out_nlmsg_trim;
1978
1979 nlh->nlmsg_len = skb_tail_pointer(skb) - b;
1980
1981 return skb->len;
1982
1983 out_nlmsg_trim:
1984 nla_put_failure:
1985 nlmsg_trim(skb, b);
1986 return -1;
1987 }
1988
tclass_notify(struct net * net,struct sk_buff * oskb,struct nlmsghdr * n,struct Qdisc * q,unsigned long cl,int event,struct netlink_ext_ack * extack)1989 static int tclass_notify(struct net *net, struct sk_buff *oskb,
1990 struct nlmsghdr *n, struct Qdisc *q,
1991 unsigned long cl, int event, struct netlink_ext_ack *extack)
1992 {
1993 struct sk_buff *skb;
1994 u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
1995
1996 if (!rtnl_notify_needed(net, n->nlmsg_flags, RTNLGRP_TC))
1997 return 0;
1998
1999 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2000 if (!skb)
2001 return -ENOBUFS;
2002
2003 if (tc_fill_tclass(skb, q, cl, portid, n->nlmsg_seq, 0, event, extack) < 0) {
2004 kfree_skb(skb);
2005 return -EINVAL;
2006 }
2007
2008 return rtnetlink_send(skb, net, portid, RTNLGRP_TC,
2009 n->nlmsg_flags & NLM_F_ECHO);
2010 }
2011
tclass_get_notify(struct net * net,struct sk_buff * oskb,struct nlmsghdr * n,struct Qdisc * q,unsigned long cl,struct netlink_ext_ack * extack)2012 static int tclass_get_notify(struct net *net, struct sk_buff *oskb,
2013 struct nlmsghdr *n, struct Qdisc *q,
2014 unsigned long cl, struct netlink_ext_ack *extack)
2015 {
2016 struct sk_buff *skb;
2017 u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
2018
2019 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2020 if (!skb)
2021 return -ENOBUFS;
2022
2023 if (tc_fill_tclass(skb, q, cl, portid, n->nlmsg_seq, 0, RTM_NEWTCLASS,
2024 extack) < 0) {
2025 kfree_skb(skb);
2026 return -EINVAL;
2027 }
2028
2029 return rtnetlink_send(skb, net, portid, RTNLGRP_TC,
2030 n->nlmsg_flags & NLM_F_ECHO);
2031 }
2032
tclass_del_notify(struct net * net,const struct Qdisc_class_ops * cops,struct sk_buff * oskb,struct nlmsghdr * n,struct Qdisc * q,unsigned long cl,struct netlink_ext_ack * extack)2033 static int tclass_del_notify(struct net *net,
2034 const struct Qdisc_class_ops *cops,
2035 struct sk_buff *oskb, struct nlmsghdr *n,
2036 struct Qdisc *q, unsigned long cl,
2037 struct netlink_ext_ack *extack)
2038 {
2039 u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
2040 struct sk_buff *skb;
2041 int err = 0;
2042
2043 if (!cops->delete)
2044 return -EOPNOTSUPP;
2045
2046 if (rtnl_notify_needed(net, n->nlmsg_flags, RTNLGRP_TC)) {
2047 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2048 if (!skb)
2049 return -ENOBUFS;
2050
2051 if (tc_fill_tclass(skb, q, cl, portid, n->nlmsg_seq, 0,
2052 RTM_DELTCLASS, extack) < 0) {
2053 kfree_skb(skb);
2054 return -EINVAL;
2055 }
2056 } else {
2057 skb = NULL;
2058 }
2059
2060 err = cops->delete(q, cl, extack);
2061 if (err) {
2062 kfree_skb(skb);
2063 return err;
2064 }
2065
2066 err = rtnetlink_maybe_send(skb, net, portid, RTNLGRP_TC,
2067 n->nlmsg_flags & NLM_F_ECHO);
2068 return err;
2069 }
2070
2071 #ifdef CONFIG_NET_CLS
2072
2073 struct tcf_bind_args {
2074 struct tcf_walker w;
2075 unsigned long base;
2076 unsigned long cl;
2077 u32 classid;
2078 };
2079
tcf_node_bind(struct tcf_proto * tp,void * n,struct tcf_walker * arg)2080 static int tcf_node_bind(struct tcf_proto *tp, void *n, struct tcf_walker *arg)
2081 {
2082 struct tcf_bind_args *a = (void *)arg;
2083
2084 if (n && tp->ops->bind_class) {
2085 struct Qdisc *q = tcf_block_q(tp->chain->block);
2086
2087 sch_tree_lock(q);
2088 tp->ops->bind_class(n, a->classid, a->cl, q, a->base);
2089 sch_tree_unlock(q);
2090 }
2091 return 0;
2092 }
2093
2094 struct tc_bind_class_args {
2095 struct qdisc_walker w;
2096 unsigned long new_cl;
2097 u32 portid;
2098 u32 clid;
2099 };
2100
tc_bind_class_walker(struct Qdisc * q,unsigned long cl,struct qdisc_walker * w)2101 static int tc_bind_class_walker(struct Qdisc *q, unsigned long cl,
2102 struct qdisc_walker *w)
2103 {
2104 struct tc_bind_class_args *a = (struct tc_bind_class_args *)w;
2105 const struct Qdisc_class_ops *cops = q->ops->cl_ops;
2106 struct tcf_block *block;
2107 struct tcf_chain *chain;
2108
2109 block = cops->tcf_block(q, cl, NULL);
2110 if (!block)
2111 return 0;
2112 for (chain = tcf_get_next_chain(block, NULL);
2113 chain;
2114 chain = tcf_get_next_chain(block, chain)) {
2115 struct tcf_proto *tp;
2116
2117 for (tp = tcf_get_next_proto(chain, NULL);
2118 tp; tp = tcf_get_next_proto(chain, tp)) {
2119 struct tcf_bind_args arg = {};
2120
2121 arg.w.fn = tcf_node_bind;
2122 arg.classid = a->clid;
2123 arg.base = cl;
2124 arg.cl = a->new_cl;
2125 tp->ops->walk(tp, &arg.w, true);
2126 }
2127 }
2128
2129 return 0;
2130 }
2131
tc_bind_tclass(struct Qdisc * q,u32 portid,u32 clid,unsigned long new_cl)2132 static void tc_bind_tclass(struct Qdisc *q, u32 portid, u32 clid,
2133 unsigned long new_cl)
2134 {
2135 const struct Qdisc_class_ops *cops = q->ops->cl_ops;
2136 struct tc_bind_class_args args = {};
2137
2138 if (!cops->tcf_block)
2139 return;
2140 args.portid = portid;
2141 args.clid = clid;
2142 args.new_cl = new_cl;
2143 args.w.fn = tc_bind_class_walker;
2144 q->ops->cl_ops->walk(q, &args.w);
2145 }
2146
2147 #else
2148
tc_bind_tclass(struct Qdisc * q,u32 portid,u32 clid,unsigned long new_cl)2149 static void tc_bind_tclass(struct Qdisc *q, u32 portid, u32 clid,
2150 unsigned long new_cl)
2151 {
2152 }
2153
2154 #endif
2155
__tc_ctl_tclass(struct sk_buff * skb,struct nlmsghdr * n,struct netlink_ext_ack * extack,struct net_device * dev,struct nlattr * tca[TCA_MAX+1],struct tcmsg * tcm)2156 static int __tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n,
2157 struct netlink_ext_ack *extack,
2158 struct net_device *dev,
2159 struct nlattr *tca[TCA_MAX + 1],
2160 struct tcmsg *tcm)
2161 {
2162 struct net *net = sock_net(skb->sk);
2163 const struct Qdisc_class_ops *cops;
2164 struct Qdisc *q = NULL;
2165 unsigned long cl = 0;
2166 unsigned long new_cl;
2167 u32 portid;
2168 u32 clid;
2169 u32 qid;
2170 int err;
2171
2172 /*
2173 parent == TC_H_UNSPEC - unspecified parent.
2174 parent == TC_H_ROOT - class is root, which has no parent.
2175 parent == X:0 - parent is root class.
2176 parent == X:Y - parent is a node in hierarchy.
2177 parent == 0:Y - parent is X:Y, where X:0 is qdisc.
2178
2179 handle == 0:0 - generate handle from kernel pool.
2180 handle == 0:Y - class is X:Y, where X:0 is qdisc.
2181 handle == X:Y - clear.
2182 handle == X:0 - root class.
2183 */
2184
2185 /* Step 1. Determine qdisc handle X:0 */
2186
2187 portid = tcm->tcm_parent;
2188 clid = tcm->tcm_handle;
2189 qid = TC_H_MAJ(clid);
2190
2191 if (portid != TC_H_ROOT) {
2192 u32 qid1 = TC_H_MAJ(portid);
2193
2194 if (qid && qid1) {
2195 /* If both majors are known, they must be identical. */
2196 if (qid != qid1)
2197 return -EINVAL;
2198 } else if (qid1) {
2199 qid = qid1;
2200 } else if (qid == 0)
2201 qid = rtnl_dereference(dev->qdisc)->handle;
2202
2203 /* Now qid is genuine qdisc handle consistent
2204 * both with parent and child.
2205 *
2206 * TC_H_MAJ(portid) still may be unspecified, complete it now.
2207 */
2208 if (portid)
2209 portid = TC_H_MAKE(qid, portid);
2210 } else {
2211 if (qid == 0)
2212 qid = rtnl_dereference(dev->qdisc)->handle;
2213 }
2214
2215 /* OK. Locate qdisc */
2216 q = qdisc_lookup(dev, qid);
2217 if (!q)
2218 return -ENOENT;
2219
2220 /* An check that it supports classes */
2221 cops = q->ops->cl_ops;
2222 if (cops == NULL)
2223 return -EINVAL;
2224
2225 /* Now try to get class */
2226 if (clid == 0) {
2227 if (portid == TC_H_ROOT)
2228 clid = qid;
2229 } else
2230 clid = TC_H_MAKE(qid, clid);
2231
2232 if (clid)
2233 cl = cops->find(q, clid);
2234
2235 if (cl == 0) {
2236 err = -ENOENT;
2237 if (n->nlmsg_type != RTM_NEWTCLASS ||
2238 !(n->nlmsg_flags & NLM_F_CREATE))
2239 goto out;
2240 } else {
2241 switch (n->nlmsg_type) {
2242 case RTM_NEWTCLASS:
2243 err = -EEXIST;
2244 if (n->nlmsg_flags & NLM_F_EXCL)
2245 goto out;
2246 break;
2247 case RTM_DELTCLASS:
2248 err = tclass_del_notify(net, cops, skb, n, q, cl, extack);
2249 /* Unbind the class with flilters with 0 */
2250 tc_bind_tclass(q, portid, clid, 0);
2251 goto out;
2252 case RTM_GETTCLASS:
2253 err = tclass_get_notify(net, skb, n, q, cl, extack);
2254 goto out;
2255 default:
2256 err = -EINVAL;
2257 goto out;
2258 }
2259 }
2260
2261 if (tca[TCA_INGRESS_BLOCK] || tca[TCA_EGRESS_BLOCK]) {
2262 NL_SET_ERR_MSG(extack, "Shared blocks are not supported for classes");
2263 return -EOPNOTSUPP;
2264 }
2265
2266 /* Prevent creation of traffic classes with classid TC_H_ROOT */
2267 if (clid == TC_H_ROOT) {
2268 NL_SET_ERR_MSG(extack, "Cannot create traffic class with classid TC_H_ROOT");
2269 return -EINVAL;
2270 }
2271
2272 new_cl = cl;
2273 err = -EOPNOTSUPP;
2274 if (cops->change)
2275 err = cops->change(q, clid, portid, tca, &new_cl, extack);
2276 if (err == 0) {
2277 tclass_notify(net, skb, n, q, new_cl, RTM_NEWTCLASS, extack);
2278 /* We just create a new class, need to do reverse binding. */
2279 if (cl != new_cl)
2280 tc_bind_tclass(q, portid, clid, new_cl);
2281 }
2282 out:
2283 return err;
2284 }
2285
tc_ctl_tclass(struct sk_buff * skb,struct nlmsghdr * n,struct netlink_ext_ack * extack)2286 static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n,
2287 struct netlink_ext_ack *extack)
2288 {
2289 struct net *net = sock_net(skb->sk);
2290 struct tcmsg *tcm = nlmsg_data(n);
2291 struct nlattr *tca[TCA_MAX + 1];
2292 struct net_device *dev;
2293 int err;
2294
2295 err = nlmsg_parse_deprecated(n, sizeof(*tcm), tca, TCA_MAX,
2296 rtm_tca_policy, extack);
2297 if (err < 0)
2298 return err;
2299
2300 dev = __dev_get_by_index(net, tcm->tcm_ifindex);
2301 if (!dev)
2302 return -ENODEV;
2303
2304 netdev_lock_ops(dev);
2305 err = __tc_ctl_tclass(skb, n, extack, dev, tca, tcm);
2306 netdev_unlock_ops(dev);
2307
2308 return err;
2309 }
2310
2311 struct qdisc_dump_args {
2312 struct qdisc_walker w;
2313 struct sk_buff *skb;
2314 struct netlink_callback *cb;
2315 };
2316
qdisc_class_dump(struct Qdisc * q,unsigned long cl,struct qdisc_walker * arg)2317 static int qdisc_class_dump(struct Qdisc *q, unsigned long cl,
2318 struct qdisc_walker *arg)
2319 {
2320 struct qdisc_dump_args *a = (struct qdisc_dump_args *)arg;
2321
2322 return tc_fill_tclass(a->skb, q, cl, NETLINK_CB(a->cb->skb).portid,
2323 a->cb->nlh->nlmsg_seq, NLM_F_MULTI,
2324 RTM_NEWTCLASS, NULL);
2325 }
2326
tc_dump_tclass_qdisc(struct Qdisc * q,struct sk_buff * skb,struct tcmsg * tcm,struct netlink_callback * cb,int * t_p,int s_t)2327 static int tc_dump_tclass_qdisc(struct Qdisc *q, struct sk_buff *skb,
2328 struct tcmsg *tcm, struct netlink_callback *cb,
2329 int *t_p, int s_t)
2330 {
2331 struct qdisc_dump_args arg;
2332
2333 if (tc_qdisc_dump_ignore(q, false) ||
2334 *t_p < s_t || !q->ops->cl_ops ||
2335 (tcm->tcm_parent &&
2336 TC_H_MAJ(tcm->tcm_parent) != q->handle)) {
2337 (*t_p)++;
2338 return 0;
2339 }
2340 if (*t_p > s_t)
2341 memset(&cb->args[1], 0, sizeof(cb->args)-sizeof(cb->args[0]));
2342 arg.w.fn = qdisc_class_dump;
2343 arg.skb = skb;
2344 arg.cb = cb;
2345 arg.w.stop = 0;
2346 arg.w.skip = cb->args[1];
2347 arg.w.count = 0;
2348 q->ops->cl_ops->walk(q, &arg.w);
2349 cb->args[1] = arg.w.count;
2350 if (arg.w.stop)
2351 return -1;
2352 (*t_p)++;
2353 return 0;
2354 }
2355
tc_dump_tclass_root(struct Qdisc * root,struct sk_buff * skb,struct tcmsg * tcm,struct netlink_callback * cb,int * t_p,int s_t,bool recur)2356 static int tc_dump_tclass_root(struct Qdisc *root, struct sk_buff *skb,
2357 struct tcmsg *tcm, struct netlink_callback *cb,
2358 int *t_p, int s_t, bool recur)
2359 {
2360 struct Qdisc *q;
2361 int b;
2362
2363 if (!root)
2364 return 0;
2365
2366 if (tc_dump_tclass_qdisc(root, skb, tcm, cb, t_p, s_t) < 0)
2367 return -1;
2368
2369 if (!qdisc_dev(root) || !recur)
2370 return 0;
2371
2372 if (tcm->tcm_parent) {
2373 q = qdisc_match_from_root(root, TC_H_MAJ(tcm->tcm_parent));
2374 if (q && q != root &&
2375 tc_dump_tclass_qdisc(q, skb, tcm, cb, t_p, s_t) < 0)
2376 return -1;
2377 return 0;
2378 }
2379 hash_for_each(qdisc_dev(root)->qdisc_hash, b, q, hash) {
2380 if (tc_dump_tclass_qdisc(q, skb, tcm, cb, t_p, s_t) < 0)
2381 return -1;
2382 }
2383
2384 return 0;
2385 }
2386
__tc_dump_tclass(struct sk_buff * skb,struct netlink_callback * cb,struct tcmsg * tcm,struct net_device * dev)2387 static int __tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb,
2388 struct tcmsg *tcm, struct net_device *dev)
2389 {
2390 struct netdev_queue *dev_queue;
2391 int t, s_t;
2392
2393 s_t = cb->args[0];
2394 t = 0;
2395
2396 if (tc_dump_tclass_root(rtnl_dereference(dev->qdisc),
2397 skb, tcm, cb, &t, s_t, true) < 0)
2398 goto done;
2399
2400 dev_queue = dev_ingress_queue(dev);
2401 if (dev_queue &&
2402 tc_dump_tclass_root(rtnl_dereference(dev_queue->qdisc_sleeping),
2403 skb, tcm, cb, &t, s_t, false) < 0)
2404 goto done;
2405
2406 done:
2407 cb->args[0] = t;
2408
2409 return skb->len;
2410 }
2411
tc_dump_tclass(struct sk_buff * skb,struct netlink_callback * cb)2412 static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb)
2413 {
2414 struct tcmsg *tcm = nlmsg_data(cb->nlh);
2415 struct net *net = sock_net(skb->sk);
2416 struct net_device *dev;
2417 int err;
2418
2419 if (nlmsg_len(cb->nlh) < sizeof(*tcm))
2420 return 0;
2421
2422 dev = dev_get_by_index(net, tcm->tcm_ifindex);
2423 if (!dev)
2424 return 0;
2425
2426 netdev_lock_ops(dev);
2427 err = __tc_dump_tclass(skb, cb, tcm, dev);
2428 netdev_unlock_ops(dev);
2429
2430 dev_put(dev);
2431
2432 return err;
2433 }
2434
2435 #ifdef CONFIG_PROC_FS
psched_show(struct seq_file * seq,void * v)2436 static int psched_show(struct seq_file *seq, void *v)
2437 {
2438 seq_printf(seq, "%08x %08x %08x %08x\n",
2439 (u32)NSEC_PER_USEC, (u32)PSCHED_TICKS2NS(1),
2440 1000000,
2441 (u32)NSEC_PER_SEC / hrtimer_resolution);
2442
2443 return 0;
2444 }
2445
psched_net_init(struct net * net)2446 static int __net_init psched_net_init(struct net *net)
2447 {
2448 struct proc_dir_entry *e;
2449
2450 e = proc_create_single("psched", 0, net->proc_net, psched_show);
2451 if (e == NULL)
2452 return -ENOMEM;
2453
2454 return 0;
2455 }
2456
psched_net_exit(struct net * net)2457 static void __net_exit psched_net_exit(struct net *net)
2458 {
2459 remove_proc_entry("psched", net->proc_net);
2460 }
2461 #else
psched_net_init(struct net * net)2462 static int __net_init psched_net_init(struct net *net)
2463 {
2464 return 0;
2465 }
2466
psched_net_exit(struct net * net)2467 static void __net_exit psched_net_exit(struct net *net)
2468 {
2469 }
2470 #endif
2471
2472 static struct pernet_operations psched_net_ops = {
2473 .init = psched_net_init,
2474 .exit = psched_net_exit,
2475 };
2476
2477 #if IS_ENABLED(CONFIG_MITIGATION_RETPOLINE)
2478 DEFINE_STATIC_KEY_FALSE(tc_skip_wrapper);
2479 #endif
2480
2481 static const struct rtnl_msg_handler psched_rtnl_msg_handlers[] __initconst = {
2482 {.msgtype = RTM_NEWQDISC, .doit = tc_modify_qdisc},
2483 {.msgtype = RTM_DELQDISC, .doit = tc_get_qdisc},
2484 {.msgtype = RTM_GETQDISC, .doit = tc_get_qdisc,
2485 .dumpit = tc_dump_qdisc},
2486 {.msgtype = RTM_NEWTCLASS, .doit = tc_ctl_tclass},
2487 {.msgtype = RTM_DELTCLASS, .doit = tc_ctl_tclass},
2488 {.msgtype = RTM_GETTCLASS, .doit = tc_ctl_tclass,
2489 .dumpit = tc_dump_tclass},
2490 };
2491
pktsched_init(void)2492 static int __init pktsched_init(void)
2493 {
2494 int err;
2495
2496 err = register_pernet_subsys(&psched_net_ops);
2497 if (err) {
2498 pr_err("pktsched_init: "
2499 "cannot initialize per netns operations\n");
2500 return err;
2501 }
2502
2503 register_qdisc(&pfifo_fast_ops);
2504 register_qdisc(&pfifo_qdisc_ops);
2505 register_qdisc(&bfifo_qdisc_ops);
2506 register_qdisc(&pfifo_head_drop_qdisc_ops);
2507 register_qdisc(&mq_qdisc_ops);
2508 register_qdisc(&noqueue_qdisc_ops);
2509
2510 rtnl_register_many(psched_rtnl_msg_handlers);
2511
2512 tc_wrapper_init();
2513
2514 return 0;
2515 }
2516
2517 subsys_initcall(pktsched_init);
2518