1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3 * net/sched/sch_api.c Packet scheduler API.
4 *
5 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
6 *
7 * Fixes:
8 *
9 * Rani Assaf <rani@magic.metawire.com> :980802: JIFFIES and CPU clock sources are repaired.
10 * Eduardo J. Blanco <ejbs@netlabs.com.uy> :990222: kmod support
11 * Jamal Hadi Salim <hadi@nortelnetworks.com>: 990601: ingress support
12 */
13
14 #include <linux/module.h>
15 #include <linux/types.h>
16 #include <linux/kernel.h>
17 #include <linux/string.h>
18 #include <linux/errno.h>
19 #include <linux/skbuff.h>
20 #include <linux/init.h>
21 #include <linux/proc_fs.h>
22 #include <linux/seq_file.h>
23 #include <linux/kmod.h>
24 #include <linux/list.h>
25 #include <linux/hrtimer.h>
26 #include <linux/slab.h>
27 #include <linux/hashtable.h>
28 #include <linux/bpf.h>
29
30 #include <net/netdev_lock.h>
31 #include <net/net_namespace.h>
32 #include <net/sock.h>
33 #include <net/netlink.h>
34 #include <net/pkt_sched.h>
35 #include <net/pkt_cls.h>
36 #include <net/tc_wrapper.h>
37
38 #include <trace/events/qdisc.h>
39
40 /*
41
42 Short review.
43 -------------
44
45 This file consists of two interrelated parts:
46
47 1. queueing disciplines manager frontend.
48 2. traffic classes manager frontend.
49
50 Generally, queueing discipline ("qdisc") is a black box,
51 which is able to enqueue packets and to dequeue them (when
52 device is ready to send something) in order and at times
53 determined by algorithm hidden in it.
54
55 qdisc's are divided to two categories:
56 - "queues", which have no internal structure visible from outside.
57 - "schedulers", which split all the packets to "traffic classes",
58 using "packet classifiers" (look at cls_api.c)
59
60 In turn, classes may have child qdiscs (as rule, queues)
61 attached to them etc. etc. etc.
62
63 The goal of the routines in this file is to translate
64 information supplied by user in the form of handles
65 to more intelligible for kernel form, to make some sanity
66 checks and part of work, which is common to all qdiscs
67 and to provide rtnetlink notifications.
68
69 All real intelligent work is done inside qdisc modules.
70
71
72
73 Every discipline has two major routines: enqueue and dequeue.
74
75 ---dequeue
76
77 dequeue usually returns a skb to send. It is allowed to return NULL,
78 but it does not mean that queue is empty, it just means that
79 discipline does not want to send anything this time.
80 Queue is really empty if q->q.qlen == 0.
81 For complicated disciplines with multiple queues q->q is not
82 real packet queue, but however q->q.qlen must be valid.
83
84 ---enqueue
85
86 enqueue returns 0, if packet was enqueued successfully.
87 If packet (this one or another one) was dropped, it returns
88 not zero error code.
89 NET_XMIT_DROP - this packet dropped
90 Expected action: do not backoff, but wait until queue will clear.
91 NET_XMIT_CN - probably this packet enqueued, but another one dropped.
92 Expected action: backoff or ignore
93
94 Auxiliary routines:
95
96 ---peek
97
98 like dequeue but without removing a packet from the queue
99
100 ---reset
101
102 returns qdisc to initial state: purge all buffers, clear all
103 timers, counters (except for statistics) etc.
104
105 ---init
106
107 initializes newly created qdisc.
108
109 ---destroy
110
111 destroys resources allocated by init and during lifetime of qdisc.
112
113 ---change
114
115 changes qdisc parameters.
116 */
117
118 /* Protects list of registered TC modules. It is pure SMP lock. */
119 static DEFINE_RWLOCK(qdisc_mod_lock);
120
121
122 /************************************************
123 * Queueing disciplines manipulation. *
124 ************************************************/
125
126
127 /* The list of all installed queueing disciplines. */
128
129 static struct Qdisc_ops *qdisc_base;
130
131 /* Register/unregister queueing discipline */
132
register_qdisc(struct Qdisc_ops * qops)133 int register_qdisc(struct Qdisc_ops *qops)
134 {
135 struct Qdisc_ops *q, **qp;
136 int rc = -EEXIST;
137
138 write_lock(&qdisc_mod_lock);
139 for (qp = &qdisc_base; (q = *qp) != NULL; qp = &q->next)
140 if (!strcmp(qops->id, q->id))
141 goto out;
142
143 if (qops->enqueue == NULL)
144 qops->enqueue = noop_qdisc_ops.enqueue;
145 if (qops->peek == NULL) {
146 if (qops->dequeue == NULL)
147 qops->peek = noop_qdisc_ops.peek;
148 else
149 goto out_einval;
150 }
151 if (qops->dequeue == NULL)
152 qops->dequeue = noop_qdisc_ops.dequeue;
153
154 if (qops->cl_ops) {
155 const struct Qdisc_class_ops *cops = qops->cl_ops;
156
157 if (!(cops->find && cops->walk && cops->leaf))
158 goto out_einval;
159
160 if (cops->tcf_block && !(cops->bind_tcf && cops->unbind_tcf))
161 goto out_einval;
162 }
163
164 qops->next = NULL;
165 *qp = qops;
166 rc = 0;
167 out:
168 write_unlock(&qdisc_mod_lock);
169 return rc;
170
171 out_einval:
172 rc = -EINVAL;
173 goto out;
174 }
175 EXPORT_SYMBOL(register_qdisc);
176
unregister_qdisc(struct Qdisc_ops * qops)177 void unregister_qdisc(struct Qdisc_ops *qops)
178 {
179 struct Qdisc_ops *q, **qp;
180 int err = -ENOENT;
181
182 write_lock(&qdisc_mod_lock);
183 for (qp = &qdisc_base; (q = *qp) != NULL; qp = &q->next)
184 if (q == qops)
185 break;
186 if (q) {
187 *qp = q->next;
188 q->next = NULL;
189 err = 0;
190 }
191 write_unlock(&qdisc_mod_lock);
192
193 WARN(err, "unregister qdisc(%s) failed\n", qops->id);
194 }
195 EXPORT_SYMBOL(unregister_qdisc);
196
197 /* Get default qdisc if not otherwise specified */
qdisc_get_default(char * name,size_t len)198 void qdisc_get_default(char *name, size_t len)
199 {
200 read_lock(&qdisc_mod_lock);
201 strscpy(name, default_qdisc_ops->id, len);
202 read_unlock(&qdisc_mod_lock);
203 }
204
qdisc_lookup_default(const char * name)205 static struct Qdisc_ops *qdisc_lookup_default(const char *name)
206 {
207 struct Qdisc_ops *q = NULL;
208
209 for (q = qdisc_base; q; q = q->next) {
210 if (!strcmp(name, q->id)) {
211 if (!bpf_try_module_get(q, q->owner))
212 q = NULL;
213 break;
214 }
215 }
216
217 return q;
218 }
219
220 /* Set new default qdisc to use */
qdisc_set_default(const char * name)221 int qdisc_set_default(const char *name)
222 {
223 const struct Qdisc_ops *ops;
224
225 if (!capable(CAP_NET_ADMIN))
226 return -EPERM;
227
228 write_lock(&qdisc_mod_lock);
229 ops = qdisc_lookup_default(name);
230 if (!ops) {
231 /* Not found, drop lock and try to load module */
232 write_unlock(&qdisc_mod_lock);
233 request_module(NET_SCH_ALIAS_PREFIX "%s", name);
234 write_lock(&qdisc_mod_lock);
235
236 ops = qdisc_lookup_default(name);
237 }
238
239 if (ops) {
240 /* Set new default */
241 bpf_module_put(default_qdisc_ops, default_qdisc_ops->owner);
242 default_qdisc_ops = ops;
243 }
244 write_unlock(&qdisc_mod_lock);
245
246 return ops ? 0 : -ENOENT;
247 }
248
249 #ifdef CONFIG_NET_SCH_DEFAULT
250 /* Set default value from kernel config */
sch_default_qdisc(void)251 static int __init sch_default_qdisc(void)
252 {
253 return qdisc_set_default(CONFIG_DEFAULT_NET_SCH);
254 }
255 late_initcall(sch_default_qdisc);
256 #endif
257
258 /* We know handle. Find qdisc among all qdisc's attached to device
259 * (root qdisc, all its children, children of children etc.)
260 * Note: caller either uses rtnl or rcu_read_lock()
261 */
262
qdisc_match_from_root(struct Qdisc * root,u32 handle)263 static struct Qdisc *qdisc_match_from_root(struct Qdisc *root, u32 handle)
264 {
265 struct Qdisc *q;
266
267 if (!qdisc_dev(root))
268 return (root->handle == handle ? root : NULL);
269
270 if (!(root->flags & TCQ_F_BUILTIN) &&
271 root->handle == handle)
272 return root;
273
274 hash_for_each_possible_rcu(qdisc_dev(root)->qdisc_hash, q, hash, handle,
275 lockdep_rtnl_is_held()) {
276 if (q->handle == handle)
277 return q;
278 }
279 return NULL;
280 }
281
qdisc_hash_add(struct Qdisc * q,bool invisible)282 void qdisc_hash_add(struct Qdisc *q, bool invisible)
283 {
284 if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) {
285 ASSERT_RTNL();
286 hash_add_rcu(qdisc_dev(q)->qdisc_hash, &q->hash, q->handle);
287 if (invisible)
288 q->flags |= TCQ_F_INVISIBLE;
289 }
290 }
291 EXPORT_SYMBOL(qdisc_hash_add);
292
qdisc_hash_del(struct Qdisc * q)293 void qdisc_hash_del(struct Qdisc *q)
294 {
295 if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) {
296 ASSERT_RTNL();
297 hash_del_rcu(&q->hash);
298 }
299 }
300 EXPORT_SYMBOL(qdisc_hash_del);
301
qdisc_lookup(struct net_device * dev,u32 handle)302 struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle)
303 {
304 struct Qdisc *q;
305
306 if (!handle)
307 return NULL;
308 q = qdisc_match_from_root(rtnl_dereference(dev->qdisc), handle);
309 if (q)
310 goto out;
311
312 if (dev_ingress_queue(dev))
313 q = qdisc_match_from_root(
314 rtnl_dereference(dev_ingress_queue(dev)->qdisc_sleeping),
315 handle);
316 out:
317 return q;
318 }
319
qdisc_lookup_rcu(struct net_device * dev,u32 handle)320 struct Qdisc *qdisc_lookup_rcu(struct net_device *dev, u32 handle)
321 {
322 struct netdev_queue *nq;
323 struct Qdisc *q;
324
325 if (!handle)
326 return NULL;
327 q = qdisc_match_from_root(rcu_dereference(dev->qdisc), handle);
328 if (q)
329 goto out;
330
331 nq = dev_ingress_queue_rcu(dev);
332 if (nq)
333 q = qdisc_match_from_root(rcu_dereference(nq->qdisc_sleeping),
334 handle);
335 out:
336 return q;
337 }
338
qdisc_leaf(struct Qdisc * p,u32 classid,struct netlink_ext_ack * extack)339 static struct Qdisc *qdisc_leaf(struct Qdisc *p, u32 classid,
340 struct netlink_ext_ack *extack)
341 {
342 unsigned long cl;
343 const struct Qdisc_class_ops *cops = p->ops->cl_ops;
344
345 if (cops == NULL) {
346 NL_SET_ERR_MSG(extack, "Parent qdisc is not classful");
347 return ERR_PTR(-EOPNOTSUPP);
348 }
349 cl = cops->find(p, classid);
350
351 if (cl == 0) {
352 NL_SET_ERR_MSG(extack, "Specified class not found");
353 return ERR_PTR(-ENOENT);
354 }
355 return cops->leaf(p, cl);
356 }
357
358 /* Find queueing discipline by name */
359
qdisc_lookup_ops(struct nlattr * kind)360 static struct Qdisc_ops *qdisc_lookup_ops(struct nlattr *kind)
361 {
362 struct Qdisc_ops *q = NULL;
363
364 if (kind) {
365 read_lock(&qdisc_mod_lock);
366 for (q = qdisc_base; q; q = q->next) {
367 if (nla_strcmp(kind, q->id) == 0) {
368 if (!bpf_try_module_get(q, q->owner))
369 q = NULL;
370 break;
371 }
372 }
373 read_unlock(&qdisc_mod_lock);
374 }
375 return q;
376 }
377
378 /* The linklayer setting were not transferred from iproute2, in older
379 * versions, and the rate tables lookup systems have been dropped in
380 * the kernel. To keep backward compatible with older iproute2 tc
381 * utils, we detect the linklayer setting by detecting if the rate
382 * table were modified.
383 *
384 * For linklayer ATM table entries, the rate table will be aligned to
385 * 48 bytes, thus some table entries will contain the same value. The
386 * mpu (min packet unit) is also encoded into the old rate table, thus
387 * starting from the mpu, we find low and high table entries for
388 * mapping this cell. If these entries contain the same value, when
389 * the rate tables have been modified for linklayer ATM.
390 *
391 * This is done by rounding mpu to the nearest 48 bytes cell/entry,
392 * and then roundup to the next cell, calc the table entry one below,
393 * and compare.
394 */
__detect_linklayer(struct tc_ratespec * r,__u32 * rtab)395 static __u8 __detect_linklayer(struct tc_ratespec *r, __u32 *rtab)
396 {
397 int low = roundup(r->mpu, 48);
398 int high = roundup(low+1, 48);
399 int cell_low = low >> r->cell_log;
400 int cell_high = (high >> r->cell_log) - 1;
401
402 /* rtab is too inaccurate at rates > 100Mbit/s */
403 if ((r->rate > (100000000/8)) || (rtab[0] == 0)) {
404 pr_debug("TC linklayer: Giving up ATM detection\n");
405 return TC_LINKLAYER_ETHERNET;
406 }
407
408 if ((cell_high > cell_low) && (cell_high < 256)
409 && (rtab[cell_low] == rtab[cell_high])) {
410 pr_debug("TC linklayer: Detected ATM, low(%d)=high(%d)=%u\n",
411 cell_low, cell_high, rtab[cell_high]);
412 return TC_LINKLAYER_ATM;
413 }
414 return TC_LINKLAYER_ETHERNET;
415 }
416
417 static struct qdisc_rate_table *qdisc_rtab_list;
418
qdisc_get_rtab(struct tc_ratespec * r,struct nlattr * tab,struct netlink_ext_ack * extack)419 struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r,
420 struct nlattr *tab,
421 struct netlink_ext_ack *extack)
422 {
423 struct qdisc_rate_table *rtab;
424
425 if (tab == NULL || r->rate == 0 ||
426 r->cell_log == 0 || r->cell_log >= 32 ||
427 nla_len(tab) != TC_RTAB_SIZE) {
428 NL_SET_ERR_MSG(extack, "Invalid rate table parameters for searching");
429 return NULL;
430 }
431
432 for (rtab = qdisc_rtab_list; rtab; rtab = rtab->next) {
433 if (!memcmp(&rtab->rate, r, sizeof(struct tc_ratespec)) &&
434 !memcmp(&rtab->data, nla_data(tab), 1024)) {
435 rtab->refcnt++;
436 return rtab;
437 }
438 }
439
440 rtab = kmalloc(sizeof(*rtab), GFP_KERNEL);
441 if (rtab) {
442 rtab->rate = *r;
443 rtab->refcnt = 1;
444 memcpy(rtab->data, nla_data(tab), 1024);
445 if (r->linklayer == TC_LINKLAYER_UNAWARE)
446 r->linklayer = __detect_linklayer(r, rtab->data);
447 rtab->next = qdisc_rtab_list;
448 qdisc_rtab_list = rtab;
449 } else {
450 NL_SET_ERR_MSG(extack, "Failed to allocate new qdisc rate table");
451 }
452 return rtab;
453 }
454 EXPORT_SYMBOL(qdisc_get_rtab);
455
qdisc_put_rtab(struct qdisc_rate_table * tab)456 void qdisc_put_rtab(struct qdisc_rate_table *tab)
457 {
458 struct qdisc_rate_table *rtab, **rtabp;
459
460 if (!tab || --tab->refcnt)
461 return;
462
463 for (rtabp = &qdisc_rtab_list;
464 (rtab = *rtabp) != NULL;
465 rtabp = &rtab->next) {
466 if (rtab == tab) {
467 *rtabp = rtab->next;
468 kfree(rtab);
469 return;
470 }
471 }
472 }
473 EXPORT_SYMBOL(qdisc_put_rtab);
474
475 static LIST_HEAD(qdisc_stab_list);
476
477 static const struct nla_policy stab_policy[TCA_STAB_MAX + 1] = {
478 [TCA_STAB_BASE] = { .len = sizeof(struct tc_sizespec) },
479 [TCA_STAB_DATA] = { .type = NLA_BINARY },
480 };
481
qdisc_get_stab(struct nlattr * opt,struct netlink_ext_ack * extack)482 static struct qdisc_size_table *qdisc_get_stab(struct nlattr *opt,
483 struct netlink_ext_ack *extack)
484 {
485 struct nlattr *tb[TCA_STAB_MAX + 1];
486 struct qdisc_size_table *stab;
487 struct tc_sizespec *s;
488 unsigned int tsize = 0;
489 u16 *tab = NULL;
490 int err;
491
492 err = nla_parse_nested_deprecated(tb, TCA_STAB_MAX, opt, stab_policy,
493 extack);
494 if (err < 0)
495 return ERR_PTR(err);
496 if (!tb[TCA_STAB_BASE]) {
497 NL_SET_ERR_MSG(extack, "Size table base attribute is missing");
498 return ERR_PTR(-EINVAL);
499 }
500
501 s = nla_data(tb[TCA_STAB_BASE]);
502
503 if (s->tsize > 0) {
504 if (!tb[TCA_STAB_DATA]) {
505 NL_SET_ERR_MSG(extack, "Size table data attribute is missing");
506 return ERR_PTR(-EINVAL);
507 }
508 tab = nla_data(tb[TCA_STAB_DATA]);
509 tsize = nla_len(tb[TCA_STAB_DATA]) / sizeof(u16);
510 }
511
512 if (tsize != s->tsize || (!tab && tsize > 0)) {
513 NL_SET_ERR_MSG(extack, "Invalid size of size table");
514 return ERR_PTR(-EINVAL);
515 }
516
517 list_for_each_entry(stab, &qdisc_stab_list, list) {
518 if (memcmp(&stab->szopts, s, sizeof(*s)))
519 continue;
520 if (tsize > 0 &&
521 memcmp(stab->data, tab, flex_array_size(stab, data, tsize)))
522 continue;
523 stab->refcnt++;
524 return stab;
525 }
526
527 if (s->size_log > STAB_SIZE_LOG_MAX ||
528 s->cell_log > STAB_SIZE_LOG_MAX) {
529 NL_SET_ERR_MSG(extack, "Invalid logarithmic size of size table");
530 return ERR_PTR(-EINVAL);
531 }
532
533 stab = kmalloc(struct_size(stab, data, tsize), GFP_KERNEL);
534 if (!stab)
535 return ERR_PTR(-ENOMEM);
536
537 stab->refcnt = 1;
538 stab->szopts = *s;
539 if (tsize > 0)
540 memcpy(stab->data, tab, flex_array_size(stab, data, tsize));
541
542 list_add_tail(&stab->list, &qdisc_stab_list);
543
544 return stab;
545 }
546
qdisc_put_stab(struct qdisc_size_table * tab)547 void qdisc_put_stab(struct qdisc_size_table *tab)
548 {
549 if (!tab)
550 return;
551
552 if (--tab->refcnt == 0) {
553 list_del(&tab->list);
554 kfree_rcu(tab, rcu);
555 }
556 }
557 EXPORT_SYMBOL(qdisc_put_stab);
558
qdisc_dump_stab(struct sk_buff * skb,struct qdisc_size_table * stab)559 static int qdisc_dump_stab(struct sk_buff *skb, struct qdisc_size_table *stab)
560 {
561 struct nlattr *nest;
562
563 nest = nla_nest_start_noflag(skb, TCA_STAB);
564 if (nest == NULL)
565 goto nla_put_failure;
566 if (nla_put(skb, TCA_STAB_BASE, sizeof(stab->szopts), &stab->szopts))
567 goto nla_put_failure;
568 nla_nest_end(skb, nest);
569
570 return skb->len;
571
572 nla_put_failure:
573 return -1;
574 }
575
__qdisc_calculate_pkt_len(struct sk_buff * skb,const struct qdisc_size_table * stab)576 void __qdisc_calculate_pkt_len(struct sk_buff *skb,
577 const struct qdisc_size_table *stab)
578 {
579 int pkt_len, slot;
580
581 pkt_len = skb->len + stab->szopts.overhead;
582 if (unlikely(!stab->szopts.tsize))
583 goto out;
584
585 slot = pkt_len + stab->szopts.cell_align;
586 if (unlikely(slot < 0))
587 slot = 0;
588
589 slot >>= stab->szopts.cell_log;
590 if (likely(slot < stab->szopts.tsize))
591 pkt_len = stab->data[slot];
592 else
593 pkt_len = stab->data[stab->szopts.tsize - 1] *
594 (slot / stab->szopts.tsize) +
595 stab->data[slot % stab->szopts.tsize];
596
597 pkt_len <<= stab->szopts.size_log;
598 out:
599 if (unlikely(pkt_len < 1))
600 pkt_len = 1;
601 qdisc_skb_cb(skb)->pkt_len = pkt_len;
602 }
603
qdisc_watchdog(struct hrtimer * timer)604 static enum hrtimer_restart qdisc_watchdog(struct hrtimer *timer)
605 {
606 struct qdisc_watchdog *wd = container_of(timer, struct qdisc_watchdog,
607 timer);
608
609 rcu_read_lock();
610 __netif_schedule(qdisc_root(wd->qdisc));
611 rcu_read_unlock();
612
613 return HRTIMER_NORESTART;
614 }
615
qdisc_watchdog_init_clockid(struct qdisc_watchdog * wd,struct Qdisc * qdisc,clockid_t clockid)616 void qdisc_watchdog_init_clockid(struct qdisc_watchdog *wd, struct Qdisc *qdisc,
617 clockid_t clockid)
618 {
619 hrtimer_setup(&wd->timer, qdisc_watchdog, clockid, HRTIMER_MODE_ABS_PINNED);
620 wd->qdisc = qdisc;
621 }
622 EXPORT_SYMBOL(qdisc_watchdog_init_clockid);
623
qdisc_watchdog_init(struct qdisc_watchdog * wd,struct Qdisc * qdisc)624 void qdisc_watchdog_init(struct qdisc_watchdog *wd, struct Qdisc *qdisc)
625 {
626 qdisc_watchdog_init_clockid(wd, qdisc, CLOCK_MONOTONIC);
627 }
628 EXPORT_SYMBOL(qdisc_watchdog_init);
629
qdisc_watchdog_schedule_range_ns(struct qdisc_watchdog * wd,u64 expires,u64 delta_ns)630 void qdisc_watchdog_schedule_range_ns(struct qdisc_watchdog *wd, u64 expires,
631 u64 delta_ns)
632 {
633 bool deactivated;
634
635 rcu_read_lock();
636 deactivated = test_bit(__QDISC_STATE_DEACTIVATED,
637 &qdisc_root_sleeping(wd->qdisc)->state);
638 rcu_read_unlock();
639 if (deactivated)
640 return;
641
642 if (hrtimer_is_queued(&wd->timer)) {
643 u64 softexpires;
644
645 softexpires = ktime_to_ns(hrtimer_get_softexpires(&wd->timer));
646 /* If timer is already set in [expires, expires + delta_ns],
647 * do not reprogram it.
648 */
649 if (softexpires - expires <= delta_ns)
650 return;
651 }
652
653 hrtimer_start_range_ns(&wd->timer,
654 ns_to_ktime(expires),
655 delta_ns,
656 HRTIMER_MODE_ABS_PINNED);
657 }
658 EXPORT_SYMBOL(qdisc_watchdog_schedule_range_ns);
659
qdisc_watchdog_cancel(struct qdisc_watchdog * wd)660 void qdisc_watchdog_cancel(struct qdisc_watchdog *wd)
661 {
662 hrtimer_cancel(&wd->timer);
663 }
664 EXPORT_SYMBOL(qdisc_watchdog_cancel);
665
qdisc_class_hash_alloc(unsigned int n)666 static struct hlist_head *qdisc_class_hash_alloc(unsigned int n)
667 {
668 struct hlist_head *h;
669 unsigned int i;
670
671 h = kvmalloc_array(n, sizeof(struct hlist_head), GFP_KERNEL);
672
673 if (h != NULL) {
674 for (i = 0; i < n; i++)
675 INIT_HLIST_HEAD(&h[i]);
676 }
677 return h;
678 }
679
qdisc_class_hash_grow(struct Qdisc * sch,struct Qdisc_class_hash * clhash)680 void qdisc_class_hash_grow(struct Qdisc *sch, struct Qdisc_class_hash *clhash)
681 {
682 struct Qdisc_class_common *cl;
683 struct hlist_node *next;
684 struct hlist_head *nhash, *ohash;
685 unsigned int nsize, nmask, osize;
686 unsigned int i, h;
687
688 /* Rehash when load factor exceeds 0.75 */
689 if (clhash->hashelems * 4 <= clhash->hashsize * 3)
690 return;
691 nsize = clhash->hashsize * 2;
692 nmask = nsize - 1;
693 nhash = qdisc_class_hash_alloc(nsize);
694 if (nhash == NULL)
695 return;
696
697 ohash = clhash->hash;
698 osize = clhash->hashsize;
699
700 sch_tree_lock(sch);
701 for (i = 0; i < osize; i++) {
702 hlist_for_each_entry_safe(cl, next, &ohash[i], hnode) {
703 h = qdisc_class_hash(cl->classid, nmask);
704 hlist_add_head(&cl->hnode, &nhash[h]);
705 }
706 }
707 clhash->hash = nhash;
708 clhash->hashsize = nsize;
709 clhash->hashmask = nmask;
710 sch_tree_unlock(sch);
711
712 kvfree(ohash);
713 }
714 EXPORT_SYMBOL(qdisc_class_hash_grow);
715
qdisc_class_hash_init(struct Qdisc_class_hash * clhash)716 int qdisc_class_hash_init(struct Qdisc_class_hash *clhash)
717 {
718 unsigned int size = 4;
719
720 clhash->hash = qdisc_class_hash_alloc(size);
721 if (!clhash->hash)
722 return -ENOMEM;
723 clhash->hashsize = size;
724 clhash->hashmask = size - 1;
725 clhash->hashelems = 0;
726 return 0;
727 }
728 EXPORT_SYMBOL(qdisc_class_hash_init);
729
qdisc_class_hash_destroy(struct Qdisc_class_hash * clhash)730 void qdisc_class_hash_destroy(struct Qdisc_class_hash *clhash)
731 {
732 kvfree(clhash->hash);
733 }
734 EXPORT_SYMBOL(qdisc_class_hash_destroy);
735
qdisc_class_hash_insert(struct Qdisc_class_hash * clhash,struct Qdisc_class_common * cl)736 void qdisc_class_hash_insert(struct Qdisc_class_hash *clhash,
737 struct Qdisc_class_common *cl)
738 {
739 unsigned int h;
740
741 INIT_HLIST_NODE(&cl->hnode);
742 h = qdisc_class_hash(cl->classid, clhash->hashmask);
743 hlist_add_head(&cl->hnode, &clhash->hash[h]);
744 clhash->hashelems++;
745 }
746 EXPORT_SYMBOL(qdisc_class_hash_insert);
747
qdisc_class_hash_remove(struct Qdisc_class_hash * clhash,struct Qdisc_class_common * cl)748 void qdisc_class_hash_remove(struct Qdisc_class_hash *clhash,
749 struct Qdisc_class_common *cl)
750 {
751 hlist_del(&cl->hnode);
752 clhash->hashelems--;
753 }
754 EXPORT_SYMBOL(qdisc_class_hash_remove);
755
756 /* Allocate an unique handle from space managed by kernel
757 * Possible range is [8000-FFFF]:0000 (0x8000 values)
758 */
qdisc_alloc_handle(struct net_device * dev)759 static u32 qdisc_alloc_handle(struct net_device *dev)
760 {
761 int i = 0x8000;
762 static u32 autohandle = TC_H_MAKE(0x80000000U, 0);
763
764 do {
765 autohandle += TC_H_MAKE(0x10000U, 0);
766 if (autohandle == TC_H_MAKE(TC_H_ROOT, 0))
767 autohandle = TC_H_MAKE(0x80000000U, 0);
768 if (!qdisc_lookup(dev, autohandle))
769 return autohandle;
770 cond_resched();
771 } while (--i > 0);
772
773 return 0;
774 }
775
qdisc_tree_reduce_backlog(struct Qdisc * sch,int n,int len)776 void qdisc_tree_reduce_backlog(struct Qdisc *sch, int n, int len)
777 {
778 const struct Qdisc_class_ops *cops;
779 unsigned long cl;
780 u32 parentid;
781 bool notify;
782 int drops;
783
784 drops = max_t(int, n, 0);
785 rcu_read_lock();
786 while ((parentid = sch->parent)) {
787 if (parentid == TC_H_ROOT)
788 break;
789
790 if (sch->flags & TCQ_F_NOPARENT)
791 break;
792 /* Notify parent qdisc only if child qdisc becomes empty. */
793 notify = !sch->q.qlen;
794 /* TODO: perform the search on a per txq basis */
795 sch = qdisc_lookup_rcu(qdisc_dev(sch), TC_H_MAJ(parentid));
796 if (sch == NULL) {
797 WARN_ON_ONCE(parentid != TC_H_ROOT);
798 break;
799 }
800 cops = sch->ops->cl_ops;
801 if (notify && cops->qlen_notify) {
802 /* Note that qlen_notify must be idempotent as it may get called
803 * multiple times.
804 */
805 cl = cops->find(sch, parentid);
806 cops->qlen_notify(sch, cl);
807 }
808 sch->q.qlen -= n;
809 sch->qstats.backlog -= len;
810 __qdisc_qstats_drop(sch, drops);
811 }
812 rcu_read_unlock();
813 }
814 EXPORT_SYMBOL(qdisc_tree_reduce_backlog);
815
qdisc_offload_dump_helper(struct Qdisc * sch,enum tc_setup_type type,void * type_data)816 int qdisc_offload_dump_helper(struct Qdisc *sch, enum tc_setup_type type,
817 void *type_data)
818 {
819 struct net_device *dev = qdisc_dev(sch);
820 int err;
821
822 sch->flags &= ~TCQ_F_OFFLOADED;
823 if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
824 return 0;
825
826 err = dev->netdev_ops->ndo_setup_tc(dev, type, type_data);
827 if (err == -EOPNOTSUPP)
828 return 0;
829
830 if (!err)
831 sch->flags |= TCQ_F_OFFLOADED;
832
833 return err;
834 }
835 EXPORT_SYMBOL(qdisc_offload_dump_helper);
836
qdisc_offload_graft_helper(struct net_device * dev,struct Qdisc * sch,struct Qdisc * new,struct Qdisc * old,enum tc_setup_type type,void * type_data,struct netlink_ext_ack * extack)837 void qdisc_offload_graft_helper(struct net_device *dev, struct Qdisc *sch,
838 struct Qdisc *new, struct Qdisc *old,
839 enum tc_setup_type type, void *type_data,
840 struct netlink_ext_ack *extack)
841 {
842 bool any_qdisc_is_offloaded;
843 int err;
844
845 if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
846 return;
847
848 err = dev->netdev_ops->ndo_setup_tc(dev, type, type_data);
849
850 /* Don't report error if the graft is part of destroy operation. */
851 if (!err || !new || new == &noop_qdisc)
852 return;
853
854 /* Don't report error if the parent, the old child and the new
855 * one are not offloaded.
856 */
857 any_qdisc_is_offloaded = new->flags & TCQ_F_OFFLOADED;
858 any_qdisc_is_offloaded |= sch && sch->flags & TCQ_F_OFFLOADED;
859 any_qdisc_is_offloaded |= old && old->flags & TCQ_F_OFFLOADED;
860
861 if (any_qdisc_is_offloaded)
862 NL_SET_ERR_MSG(extack, "Offloading graft operation failed.");
863 }
864 EXPORT_SYMBOL(qdisc_offload_graft_helper);
865
qdisc_offload_query_caps(struct net_device * dev,enum tc_setup_type type,void * caps,size_t caps_len)866 void qdisc_offload_query_caps(struct net_device *dev,
867 enum tc_setup_type type,
868 void *caps, size_t caps_len)
869 {
870 const struct net_device_ops *ops = dev->netdev_ops;
871 struct tc_query_caps_base base = {
872 .type = type,
873 .caps = caps,
874 };
875
876 memset(caps, 0, caps_len);
877
878 if (ops->ndo_setup_tc)
879 ops->ndo_setup_tc(dev, TC_QUERY_CAPS, &base);
880 }
881 EXPORT_SYMBOL(qdisc_offload_query_caps);
882
qdisc_offload_graft_root(struct net_device * dev,struct Qdisc * new,struct Qdisc * old,struct netlink_ext_ack * extack)883 static void qdisc_offload_graft_root(struct net_device *dev,
884 struct Qdisc *new, struct Qdisc *old,
885 struct netlink_ext_ack *extack)
886 {
887 struct tc_root_qopt_offload graft_offload = {
888 .command = TC_ROOT_GRAFT,
889 .handle = new ? new->handle : 0,
890 .ingress = (new && new->flags & TCQ_F_INGRESS) ||
891 (old && old->flags & TCQ_F_INGRESS),
892 };
893
894 qdisc_offload_graft_helper(dev, NULL, new, old,
895 TC_SETUP_ROOT_QDISC, &graft_offload, extack);
896 }
897
tc_fill_qdisc(struct sk_buff * skb,struct Qdisc * q,u32 clid,u32 portid,u32 seq,u16 flags,int event,struct netlink_ext_ack * extack)898 static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid,
899 u32 portid, u32 seq, u16 flags, int event,
900 struct netlink_ext_ack *extack)
901 {
902 struct gnet_stats_basic_sync __percpu *cpu_bstats = NULL;
903 struct gnet_stats_queue __percpu *cpu_qstats = NULL;
904 struct tcmsg *tcm;
905 struct nlmsghdr *nlh;
906 unsigned char *b = skb_tail_pointer(skb);
907 struct gnet_dump d;
908 struct qdisc_size_table *stab;
909 u32 block_index;
910 __u32 qlen;
911
912 cond_resched();
913 nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags);
914 if (!nlh)
915 goto out_nlmsg_trim;
916 tcm = nlmsg_data(nlh);
917 tcm->tcm_family = AF_UNSPEC;
918 tcm->tcm__pad1 = 0;
919 tcm->tcm__pad2 = 0;
920 tcm->tcm_ifindex = qdisc_dev(q)->ifindex;
921 tcm->tcm_parent = clid;
922 tcm->tcm_handle = q->handle;
923 tcm->tcm_info = refcount_read(&q->refcnt);
924 if (nla_put_string(skb, TCA_KIND, q->ops->id))
925 goto nla_put_failure;
926 if (q->ops->ingress_block_get) {
927 block_index = q->ops->ingress_block_get(q);
928 if (block_index &&
929 nla_put_u32(skb, TCA_INGRESS_BLOCK, block_index))
930 goto nla_put_failure;
931 }
932 if (q->ops->egress_block_get) {
933 block_index = q->ops->egress_block_get(q);
934 if (block_index &&
935 nla_put_u32(skb, TCA_EGRESS_BLOCK, block_index))
936 goto nla_put_failure;
937 }
938 if (q->ops->dump && q->ops->dump(q, skb) < 0)
939 goto nla_put_failure;
940 if (nla_put_u8(skb, TCA_HW_OFFLOAD, !!(q->flags & TCQ_F_OFFLOADED)))
941 goto nla_put_failure;
942 qlen = qdisc_qlen_sum(q);
943
944 stab = rtnl_dereference(q->stab);
945 if (stab && qdisc_dump_stab(skb, stab) < 0)
946 goto nla_put_failure;
947
948 if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, TCA_XSTATS,
949 NULL, &d, TCA_PAD) < 0)
950 goto nla_put_failure;
951
952 if (q->ops->dump_stats && q->ops->dump_stats(q, &d) < 0)
953 goto nla_put_failure;
954
955 if (qdisc_is_percpu_stats(q)) {
956 cpu_bstats = q->cpu_bstats;
957 cpu_qstats = q->cpu_qstats;
958 }
959
960 if (gnet_stats_copy_basic(&d, cpu_bstats, &q->bstats, true) < 0 ||
961 gnet_stats_copy_rate_est(&d, &q->rate_est) < 0 ||
962 gnet_stats_copy_queue(&d, cpu_qstats, &q->qstats, qlen) < 0)
963 goto nla_put_failure;
964
965 if (gnet_stats_finish_copy(&d) < 0)
966 goto nla_put_failure;
967
968 if (extack && extack->_msg &&
969 nla_put_string(skb, TCA_EXT_WARN_MSG, extack->_msg))
970 goto out_nlmsg_trim;
971
972 nlh->nlmsg_len = skb_tail_pointer(skb) - b;
973
974 return skb->len;
975
976 out_nlmsg_trim:
977 nla_put_failure:
978 nlmsg_trim(skb, b);
979 return -1;
980 }
981
tc_qdisc_dump_ignore(struct Qdisc * q,bool dump_invisible)982 static bool tc_qdisc_dump_ignore(struct Qdisc *q, bool dump_invisible)
983 {
984 if (q->flags & TCQ_F_BUILTIN)
985 return true;
986 if ((q->flags & TCQ_F_INVISIBLE) && !dump_invisible)
987 return true;
988
989 return false;
990 }
991
qdisc_get_notify(struct net * net,struct sk_buff * oskb,struct nlmsghdr * n,u32 clid,struct Qdisc * q,struct netlink_ext_ack * extack)992 static int qdisc_get_notify(struct net *net, struct sk_buff *oskb,
993 struct nlmsghdr *n, u32 clid, struct Qdisc *q,
994 struct netlink_ext_ack *extack)
995 {
996 struct sk_buff *skb;
997 u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
998
999 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1000 if (!skb)
1001 return -ENOBUFS;
1002
1003 if (!tc_qdisc_dump_ignore(q, false)) {
1004 if (tc_fill_qdisc(skb, q, clid, portid, n->nlmsg_seq, 0,
1005 RTM_NEWQDISC, extack) < 0)
1006 goto err_out;
1007 }
1008
1009 if (skb->len)
1010 return rtnetlink_send(skb, net, portid, RTNLGRP_TC,
1011 n->nlmsg_flags & NLM_F_ECHO);
1012
1013 err_out:
1014 kfree_skb(skb);
1015 return -EINVAL;
1016 }
1017
qdisc_notify(struct net * net,struct sk_buff * oskb,struct nlmsghdr * n,u32 clid,struct Qdisc * old,struct Qdisc * new,struct netlink_ext_ack * extack)1018 static int qdisc_notify(struct net *net, struct sk_buff *oskb,
1019 struct nlmsghdr *n, u32 clid,
1020 struct Qdisc *old, struct Qdisc *new,
1021 struct netlink_ext_ack *extack)
1022 {
1023 struct sk_buff *skb;
1024 u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
1025
1026 if (!rtnl_notify_needed(net, n->nlmsg_flags, RTNLGRP_TC))
1027 return 0;
1028
1029 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1030 if (!skb)
1031 return -ENOBUFS;
1032
1033 if (old && !tc_qdisc_dump_ignore(old, false)) {
1034 if (tc_fill_qdisc(skb, old, clid, portid, n->nlmsg_seq,
1035 0, RTM_DELQDISC, extack) < 0)
1036 goto err_out;
1037 }
1038 if (new && !tc_qdisc_dump_ignore(new, false)) {
1039 if (tc_fill_qdisc(skb, new, clid, portid, n->nlmsg_seq,
1040 old ? NLM_F_REPLACE : 0, RTM_NEWQDISC, extack) < 0)
1041 goto err_out;
1042 }
1043
1044 if (skb->len)
1045 return rtnetlink_send(skb, net, portid, RTNLGRP_TC,
1046 n->nlmsg_flags & NLM_F_ECHO);
1047
1048 err_out:
1049 kfree_skb(skb);
1050 return -EINVAL;
1051 }
1052
notify_and_destroy(struct net * net,struct sk_buff * skb,struct nlmsghdr * n,u32 clid,struct Qdisc * old,struct Qdisc * new,struct netlink_ext_ack * extack)1053 static void notify_and_destroy(struct net *net, struct sk_buff *skb,
1054 struct nlmsghdr *n, u32 clid,
1055 struct Qdisc *old, struct Qdisc *new,
1056 struct netlink_ext_ack *extack)
1057 {
1058 if (new || old)
1059 qdisc_notify(net, skb, n, clid, old, new, extack);
1060
1061 if (old)
1062 qdisc_put(old);
1063 }
1064
qdisc_clear_nolock(struct Qdisc * sch)1065 static void qdisc_clear_nolock(struct Qdisc *sch)
1066 {
1067 sch->flags &= ~TCQ_F_NOLOCK;
1068 if (!(sch->flags & TCQ_F_CPUSTATS))
1069 return;
1070
1071 free_percpu(sch->cpu_bstats);
1072 free_percpu(sch->cpu_qstats);
1073 sch->cpu_bstats = NULL;
1074 sch->cpu_qstats = NULL;
1075 sch->flags &= ~TCQ_F_CPUSTATS;
1076 }
1077
1078 /* Graft qdisc "new" to class "classid" of qdisc "parent" or
1079 * to device "dev".
1080 *
1081 * When appropriate send a netlink notification using 'skb'
1082 * and "n".
1083 *
1084 * On success, destroy old qdisc.
1085 */
1086
qdisc_graft(struct net_device * dev,struct Qdisc * parent,struct sk_buff * skb,struct nlmsghdr * n,u32 classid,struct Qdisc * new,struct Qdisc * old,struct netlink_ext_ack * extack)1087 static int qdisc_graft(struct net_device *dev, struct Qdisc *parent,
1088 struct sk_buff *skb, struct nlmsghdr *n, u32 classid,
1089 struct Qdisc *new, struct Qdisc *old,
1090 struct netlink_ext_ack *extack)
1091 {
1092 struct Qdisc *q = old;
1093 struct net *net = dev_net(dev);
1094
1095 if (parent == NULL) {
1096 unsigned int i, num_q, ingress;
1097 struct netdev_queue *dev_queue;
1098
1099 ingress = 0;
1100 num_q = dev->num_tx_queues;
1101 if ((q && q->flags & TCQ_F_INGRESS) ||
1102 (new && new->flags & TCQ_F_INGRESS)) {
1103 ingress = 1;
1104 dev_queue = dev_ingress_queue(dev);
1105 if (!dev_queue) {
1106 NL_SET_ERR_MSG(extack, "Device does not have an ingress queue");
1107 return -ENOENT;
1108 }
1109
1110 q = rtnl_dereference(dev_queue->qdisc_sleeping);
1111
1112 /* This is the counterpart of that qdisc_refcount_inc_nz() call in
1113 * __tcf_qdisc_find() for filter requests.
1114 */
1115 if (!qdisc_refcount_dec_if_one(q)) {
1116 NL_SET_ERR_MSG(extack,
1117 "Current ingress or clsact Qdisc has ongoing filter requests");
1118 return -EBUSY;
1119 }
1120 }
1121
1122 if (dev->flags & IFF_UP)
1123 dev_deactivate(dev);
1124
1125 qdisc_offload_graft_root(dev, new, old, extack);
1126
1127 if (new && new->ops->attach && !ingress)
1128 goto skip;
1129
1130 if (!ingress) {
1131 for (i = 0; i < num_q; i++) {
1132 dev_queue = netdev_get_tx_queue(dev, i);
1133 old = dev_graft_qdisc(dev_queue, new);
1134
1135 if (new && i > 0)
1136 qdisc_refcount_inc(new);
1137 qdisc_put(old);
1138 }
1139 } else {
1140 old = dev_graft_qdisc(dev_queue, NULL);
1141
1142 /* {ingress,clsact}_destroy() @old before grafting @new to avoid
1143 * unprotected concurrent accesses to net_device::miniq_{in,e}gress
1144 * pointer(s) in mini_qdisc_pair_swap().
1145 */
1146 qdisc_notify(net, skb, n, classid, old, new, extack);
1147 qdisc_destroy(old);
1148
1149 dev_graft_qdisc(dev_queue, new);
1150 }
1151
1152 skip:
1153 if (!ingress) {
1154 old = rtnl_dereference(dev->qdisc);
1155 if (new && !new->ops->attach)
1156 qdisc_refcount_inc(new);
1157 rcu_assign_pointer(dev->qdisc, new ? : &noop_qdisc);
1158
1159 notify_and_destroy(net, skb, n, classid, old, new, extack);
1160
1161 if (new && new->ops->attach)
1162 new->ops->attach(new);
1163 }
1164
1165 if (dev->flags & IFF_UP)
1166 dev_activate(dev);
1167 } else {
1168 const struct Qdisc_class_ops *cops = parent->ops->cl_ops;
1169 unsigned long cl;
1170 int err;
1171
1172 /* Only support running class lockless if parent is lockless */
1173 if (new && (new->flags & TCQ_F_NOLOCK) && !(parent->flags & TCQ_F_NOLOCK))
1174 qdisc_clear_nolock(new);
1175
1176 if (!cops || !cops->graft)
1177 return -EOPNOTSUPP;
1178
1179 cl = cops->find(parent, classid);
1180 if (!cl) {
1181 NL_SET_ERR_MSG(extack, "Specified class not found");
1182 return -ENOENT;
1183 }
1184
1185 if (new && new->ops == &noqueue_qdisc_ops) {
1186 NL_SET_ERR_MSG(extack, "Cannot assign noqueue to a class");
1187 return -EINVAL;
1188 }
1189
1190 if (new &&
1191 !(parent->flags & TCQ_F_MQROOT) &&
1192 rcu_access_pointer(new->stab)) {
1193 NL_SET_ERR_MSG(extack, "STAB not supported on a non root");
1194 return -EINVAL;
1195 }
1196 err = cops->graft(parent, cl, new, &old, extack);
1197 if (err)
1198 return err;
1199 notify_and_destroy(net, skb, n, classid, old, new, extack);
1200 }
1201 return 0;
1202 }
1203
qdisc_block_indexes_set(struct Qdisc * sch,struct nlattr ** tca,struct netlink_ext_ack * extack)1204 static int qdisc_block_indexes_set(struct Qdisc *sch, struct nlattr **tca,
1205 struct netlink_ext_ack *extack)
1206 {
1207 u32 block_index;
1208
1209 if (tca[TCA_INGRESS_BLOCK]) {
1210 block_index = nla_get_u32(tca[TCA_INGRESS_BLOCK]);
1211
1212 if (!block_index) {
1213 NL_SET_ERR_MSG(extack, "Ingress block index cannot be 0");
1214 return -EINVAL;
1215 }
1216 if (!sch->ops->ingress_block_set) {
1217 NL_SET_ERR_MSG(extack, "Ingress block sharing is not supported");
1218 return -EOPNOTSUPP;
1219 }
1220 sch->ops->ingress_block_set(sch, block_index);
1221 }
1222 if (tca[TCA_EGRESS_BLOCK]) {
1223 block_index = nla_get_u32(tca[TCA_EGRESS_BLOCK]);
1224
1225 if (!block_index) {
1226 NL_SET_ERR_MSG(extack, "Egress block index cannot be 0");
1227 return -EINVAL;
1228 }
1229 if (!sch->ops->egress_block_set) {
1230 NL_SET_ERR_MSG(extack, "Egress block sharing is not supported");
1231 return -EOPNOTSUPP;
1232 }
1233 sch->ops->egress_block_set(sch, block_index);
1234 }
1235 return 0;
1236 }
1237
1238 /*
1239 Allocate and initialize new qdisc.
1240
1241 Parameters are passed via opt.
1242 */
1243
qdisc_create(struct net_device * dev,struct netdev_queue * dev_queue,u32 parent,u32 handle,struct nlattr ** tca,int * errp,struct netlink_ext_ack * extack)1244 static struct Qdisc *qdisc_create(struct net_device *dev,
1245 struct netdev_queue *dev_queue,
1246 u32 parent, u32 handle,
1247 struct nlattr **tca, int *errp,
1248 struct netlink_ext_ack *extack)
1249 {
1250 int err;
1251 struct nlattr *kind = tca[TCA_KIND];
1252 struct Qdisc *sch;
1253 struct Qdisc_ops *ops;
1254 struct qdisc_size_table *stab;
1255
1256 ops = qdisc_lookup_ops(kind);
1257 if (!ops) {
1258 err = -ENOENT;
1259 NL_SET_ERR_MSG(extack, "Specified qdisc kind is unknown");
1260 goto err_out;
1261 }
1262
1263 sch = qdisc_alloc(dev_queue, ops, extack);
1264 if (IS_ERR(sch)) {
1265 err = PTR_ERR(sch);
1266 goto err_out2;
1267 }
1268
1269 sch->parent = parent;
1270
1271 if (handle == TC_H_INGRESS) {
1272 if (!(sch->flags & TCQ_F_INGRESS)) {
1273 NL_SET_ERR_MSG(extack,
1274 "Specified parent ID is reserved for ingress and clsact Qdiscs");
1275 err = -EINVAL;
1276 goto err_out3;
1277 }
1278 handle = TC_H_MAKE(TC_H_INGRESS, 0);
1279 } else {
1280 if (handle == 0) {
1281 handle = qdisc_alloc_handle(dev);
1282 if (handle == 0) {
1283 NL_SET_ERR_MSG(extack, "Maximum number of qdisc handles was exceeded");
1284 err = -ENOSPC;
1285 goto err_out3;
1286 }
1287 }
1288 if (!netif_is_multiqueue(dev))
1289 sch->flags |= TCQ_F_ONETXQUEUE;
1290 }
1291
1292 sch->handle = handle;
1293
1294 /* This exist to keep backward compatible with a userspace
1295 * loophole, what allowed userspace to get IFF_NO_QUEUE
1296 * facility on older kernels by setting tx_queue_len=0 (prior
1297 * to qdisc init), and then forgot to reinit tx_queue_len
1298 * before again attaching a qdisc.
1299 */
1300 if ((dev->priv_flags & IFF_NO_QUEUE) && (dev->tx_queue_len == 0)) {
1301 WRITE_ONCE(dev->tx_queue_len, DEFAULT_TX_QUEUE_LEN);
1302 netdev_info(dev, "Caught tx_queue_len zero misconfig\n");
1303 }
1304
1305 err = qdisc_block_indexes_set(sch, tca, extack);
1306 if (err)
1307 goto err_out3;
1308
1309 if (tca[TCA_STAB]) {
1310 stab = qdisc_get_stab(tca[TCA_STAB], extack);
1311 if (IS_ERR(stab)) {
1312 err = PTR_ERR(stab);
1313 goto err_out3;
1314 }
1315 rcu_assign_pointer(sch->stab, stab);
1316 }
1317
1318 if (ops->init) {
1319 err = ops->init(sch, tca[TCA_OPTIONS], extack);
1320 if (err != 0)
1321 goto err_out4;
1322 }
1323
1324 if (tca[TCA_RATE]) {
1325 err = -EOPNOTSUPP;
1326 if (sch->flags & TCQ_F_MQROOT) {
1327 NL_SET_ERR_MSG(extack, "Cannot attach rate estimator to a multi-queue root qdisc");
1328 goto err_out4;
1329 }
1330
1331 err = gen_new_estimator(&sch->bstats,
1332 sch->cpu_bstats,
1333 &sch->rate_est,
1334 NULL,
1335 true,
1336 tca[TCA_RATE]);
1337 if (err) {
1338 NL_SET_ERR_MSG(extack, "Failed to generate new estimator");
1339 goto err_out4;
1340 }
1341 }
1342
1343 qdisc_hash_add(sch, false);
1344 trace_qdisc_create(ops, dev, parent);
1345
1346 return sch;
1347
1348 err_out4:
1349 /* Even if ops->init() failed, we call ops->destroy()
1350 * like qdisc_create_dflt().
1351 */
1352 if (ops->destroy)
1353 ops->destroy(sch);
1354 qdisc_put_stab(rtnl_dereference(sch->stab));
1355 err_out3:
1356 lockdep_unregister_key(&sch->root_lock_key);
1357 netdev_put(dev, &sch->dev_tracker);
1358 qdisc_free(sch);
1359 err_out2:
1360 bpf_module_put(ops, ops->owner);
1361 err_out:
1362 *errp = err;
1363 return NULL;
1364 }
1365
qdisc_change(struct Qdisc * sch,struct nlattr ** tca,struct netlink_ext_ack * extack)1366 static int qdisc_change(struct Qdisc *sch, struct nlattr **tca,
1367 struct netlink_ext_ack *extack)
1368 {
1369 struct qdisc_size_table *ostab, *stab = NULL;
1370 int err = 0;
1371
1372 if (tca[TCA_OPTIONS]) {
1373 if (!sch->ops->change) {
1374 NL_SET_ERR_MSG(extack, "Change operation not supported by specified qdisc");
1375 return -EINVAL;
1376 }
1377 if (tca[TCA_INGRESS_BLOCK] || tca[TCA_EGRESS_BLOCK]) {
1378 NL_SET_ERR_MSG(extack, "Change of blocks is not supported");
1379 return -EOPNOTSUPP;
1380 }
1381 err = sch->ops->change(sch, tca[TCA_OPTIONS], extack);
1382 if (err)
1383 return err;
1384 }
1385
1386 if (tca[TCA_STAB]) {
1387 stab = qdisc_get_stab(tca[TCA_STAB], extack);
1388 if (IS_ERR(stab))
1389 return PTR_ERR(stab);
1390 }
1391
1392 ostab = rtnl_dereference(sch->stab);
1393 rcu_assign_pointer(sch->stab, stab);
1394 qdisc_put_stab(ostab);
1395
1396 if (tca[TCA_RATE]) {
1397 /* NB: ignores errors from replace_estimator
1398 because change can't be undone. */
1399 if (sch->flags & TCQ_F_MQROOT)
1400 goto out;
1401 gen_replace_estimator(&sch->bstats,
1402 sch->cpu_bstats,
1403 &sch->rate_est,
1404 NULL,
1405 true,
1406 tca[TCA_RATE]);
1407 }
1408 out:
1409 return 0;
1410 }
1411
1412 struct check_loop_arg {
1413 struct qdisc_walker w;
1414 struct Qdisc *p;
1415 int depth;
1416 };
1417
1418 static int check_loop_fn(struct Qdisc *q, unsigned long cl,
1419 struct qdisc_walker *w);
1420
check_loop(struct Qdisc * q,struct Qdisc * p,int depth)1421 static int check_loop(struct Qdisc *q, struct Qdisc *p, int depth)
1422 {
1423 struct check_loop_arg arg;
1424
1425 if (q->ops->cl_ops == NULL)
1426 return 0;
1427
1428 arg.w.stop = arg.w.skip = arg.w.count = 0;
1429 arg.w.fn = check_loop_fn;
1430 arg.depth = depth;
1431 arg.p = p;
1432 q->ops->cl_ops->walk(q, &arg.w);
1433 return arg.w.stop ? -ELOOP : 0;
1434 }
1435
1436 static int
check_loop_fn(struct Qdisc * q,unsigned long cl,struct qdisc_walker * w)1437 check_loop_fn(struct Qdisc *q, unsigned long cl, struct qdisc_walker *w)
1438 {
1439 struct Qdisc *leaf;
1440 const struct Qdisc_class_ops *cops = q->ops->cl_ops;
1441 struct check_loop_arg *arg = (struct check_loop_arg *)w;
1442
1443 leaf = cops->leaf(q, cl);
1444 if (leaf) {
1445 if (leaf == arg->p || arg->depth > 7)
1446 return -ELOOP;
1447 return check_loop(leaf, arg->p, arg->depth + 1);
1448 }
1449 return 0;
1450 }
1451
1452 const struct nla_policy rtm_tca_policy[TCA_MAX + 1] = {
1453 [TCA_KIND] = { .type = NLA_STRING },
1454 [TCA_RATE] = { .type = NLA_BINARY,
1455 .len = sizeof(struct tc_estimator) },
1456 [TCA_STAB] = { .type = NLA_NESTED },
1457 [TCA_DUMP_INVISIBLE] = { .type = NLA_FLAG },
1458 [TCA_CHAIN] = { .type = NLA_U32 },
1459 [TCA_INGRESS_BLOCK] = { .type = NLA_U32 },
1460 [TCA_EGRESS_BLOCK] = { .type = NLA_U32 },
1461 };
1462
1463 /*
1464 * Delete/get qdisc.
1465 */
1466
__tc_get_qdisc(struct sk_buff * skb,struct nlmsghdr * n,struct netlink_ext_ack * extack,struct net_device * dev,struct nlattr * tca[TCA_MAX+1],struct tcmsg * tcm)1467 static int __tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n,
1468 struct netlink_ext_ack *extack,
1469 struct net_device *dev,
1470 struct nlattr *tca[TCA_MAX + 1],
1471 struct tcmsg *tcm)
1472 {
1473 struct net *net = sock_net(skb->sk);
1474 struct Qdisc *q = NULL;
1475 struct Qdisc *p = NULL;
1476 u32 clid;
1477 int err;
1478
1479 clid = tcm->tcm_parent;
1480 if (clid) {
1481 if (clid != TC_H_ROOT) {
1482 if (TC_H_MAJ(clid) != TC_H_MAJ(TC_H_INGRESS)) {
1483 p = qdisc_lookup(dev, TC_H_MAJ(clid));
1484 if (!p) {
1485 NL_SET_ERR_MSG(extack, "Failed to find qdisc with specified classid");
1486 return -ENOENT;
1487 }
1488 q = qdisc_leaf(p, clid, extack);
1489 } else if (dev_ingress_queue(dev)) {
1490 q = rtnl_dereference(dev_ingress_queue(dev)->qdisc_sleeping);
1491 }
1492 } else {
1493 q = rtnl_dereference(dev->qdisc);
1494 }
1495 if (!q) {
1496 NL_SET_ERR_MSG(extack, "Cannot find specified qdisc on specified device");
1497 return -ENOENT;
1498 }
1499 if (IS_ERR(q))
1500 return PTR_ERR(q);
1501
1502 if (tcm->tcm_handle && q->handle != tcm->tcm_handle) {
1503 NL_SET_ERR_MSG(extack, "Invalid handle");
1504 return -EINVAL;
1505 }
1506 } else {
1507 q = qdisc_lookup(dev, tcm->tcm_handle);
1508 if (!q) {
1509 NL_SET_ERR_MSG(extack, "Failed to find qdisc with specified handle");
1510 return -ENOENT;
1511 }
1512 }
1513
1514 if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id)) {
1515 NL_SET_ERR_MSG(extack, "Invalid qdisc name: must match existing qdisc");
1516 return -EINVAL;
1517 }
1518
1519 if (n->nlmsg_type == RTM_DELQDISC) {
1520 if (!clid) {
1521 NL_SET_ERR_MSG(extack, "Classid cannot be zero");
1522 return -EINVAL;
1523 }
1524 if (q->handle == 0) {
1525 NL_SET_ERR_MSG(extack, "Cannot delete qdisc with handle of zero");
1526 return -ENOENT;
1527 }
1528 err = qdisc_graft(dev, p, skb, n, clid, NULL, q, extack);
1529 if (err != 0)
1530 return err;
1531 } else {
1532 qdisc_get_notify(net, skb, n, clid, q, NULL);
1533 }
1534 return 0;
1535 }
1536
tc_get_qdisc(struct sk_buff * skb,struct nlmsghdr * n,struct netlink_ext_ack * extack)1537 static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n,
1538 struct netlink_ext_ack *extack)
1539 {
1540 struct net *net = sock_net(skb->sk);
1541 struct tcmsg *tcm = nlmsg_data(n);
1542 struct nlattr *tca[TCA_MAX + 1];
1543 struct net_device *dev;
1544 int err;
1545
1546 err = nlmsg_parse_deprecated(n, sizeof(*tcm), tca, TCA_MAX,
1547 rtm_tca_policy, extack);
1548 if (err < 0)
1549 return err;
1550
1551 dev = __dev_get_by_index(net, tcm->tcm_ifindex);
1552 if (!dev)
1553 return -ENODEV;
1554
1555 netdev_lock_ops(dev);
1556 err = __tc_get_qdisc(skb, n, extack, dev, tca, tcm);
1557 netdev_unlock_ops(dev);
1558
1559 return err;
1560 }
1561
req_create_or_replace(struct nlmsghdr * n)1562 static bool req_create_or_replace(struct nlmsghdr *n)
1563 {
1564 return (n->nlmsg_flags & NLM_F_CREATE &&
1565 n->nlmsg_flags & NLM_F_REPLACE);
1566 }
1567
req_create_exclusive(struct nlmsghdr * n)1568 static bool req_create_exclusive(struct nlmsghdr *n)
1569 {
1570 return (n->nlmsg_flags & NLM_F_CREATE &&
1571 n->nlmsg_flags & NLM_F_EXCL);
1572 }
1573
req_change(struct nlmsghdr * n)1574 static bool req_change(struct nlmsghdr *n)
1575 {
1576 return (!(n->nlmsg_flags & NLM_F_CREATE) &&
1577 !(n->nlmsg_flags & NLM_F_REPLACE) &&
1578 !(n->nlmsg_flags & NLM_F_EXCL));
1579 }
1580
__tc_modify_qdisc(struct sk_buff * skb,struct nlmsghdr * n,struct netlink_ext_ack * extack,struct net_device * dev,struct nlattr * tca[TCA_MAX+1],struct tcmsg * tcm)1581 static int __tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n,
1582 struct netlink_ext_ack *extack,
1583 struct net_device *dev,
1584 struct nlattr *tca[TCA_MAX + 1],
1585 struct tcmsg *tcm)
1586 {
1587 struct Qdisc *q = NULL;
1588 struct Qdisc *p = NULL;
1589 u32 clid;
1590 int err;
1591
1592 clid = tcm->tcm_parent;
1593
1594 if (clid) {
1595 if (clid != TC_H_ROOT) {
1596 if (clid != TC_H_INGRESS) {
1597 p = qdisc_lookup(dev, TC_H_MAJ(clid));
1598 if (!p) {
1599 NL_SET_ERR_MSG(extack, "Failed to find specified qdisc");
1600 return -ENOENT;
1601 }
1602 q = qdisc_leaf(p, clid, extack);
1603 if (IS_ERR(q))
1604 return PTR_ERR(q);
1605 } else if (dev_ingress_queue_create(dev)) {
1606 q = rtnl_dereference(dev_ingress_queue(dev)->qdisc_sleeping);
1607 }
1608 } else {
1609 q = rtnl_dereference(dev->qdisc);
1610 }
1611
1612 /* It may be default qdisc, ignore it */
1613 if (q && q->handle == 0)
1614 q = NULL;
1615
1616 if (!q || !tcm->tcm_handle || q->handle != tcm->tcm_handle) {
1617 if (tcm->tcm_handle) {
1618 if (q && !(n->nlmsg_flags & NLM_F_REPLACE)) {
1619 NL_SET_ERR_MSG(extack, "NLM_F_REPLACE needed to override");
1620 return -EEXIST;
1621 }
1622 if (TC_H_MIN(tcm->tcm_handle)) {
1623 NL_SET_ERR_MSG(extack, "Invalid minor handle");
1624 return -EINVAL;
1625 }
1626 q = qdisc_lookup(dev, tcm->tcm_handle);
1627 if (!q)
1628 goto create_n_graft;
1629 if (q->parent != tcm->tcm_parent) {
1630 NL_SET_ERR_MSG(extack, "Cannot move an existing qdisc to a different parent");
1631 return -EINVAL;
1632 }
1633 if (n->nlmsg_flags & NLM_F_EXCL) {
1634 NL_SET_ERR_MSG(extack, "Exclusivity flag on, cannot override");
1635 return -EEXIST;
1636 }
1637 if (tca[TCA_KIND] &&
1638 nla_strcmp(tca[TCA_KIND], q->ops->id)) {
1639 NL_SET_ERR_MSG(extack, "Invalid qdisc name: must match existing qdisc");
1640 return -EINVAL;
1641 }
1642 if (q->flags & TCQ_F_INGRESS) {
1643 NL_SET_ERR_MSG(extack,
1644 "Cannot regraft ingress or clsact Qdiscs");
1645 return -EINVAL;
1646 }
1647 if (q == p ||
1648 (p && check_loop(q, p, 0))) {
1649 NL_SET_ERR_MSG(extack, "Qdisc parent/child loop detected");
1650 return -ELOOP;
1651 }
1652 if (clid == TC_H_INGRESS) {
1653 NL_SET_ERR_MSG(extack, "Ingress cannot graft directly");
1654 return -EINVAL;
1655 }
1656 qdisc_refcount_inc(q);
1657 goto graft;
1658 } else {
1659 if (!q)
1660 goto create_n_graft;
1661
1662 /* This magic test requires explanation.
1663 *
1664 * We know, that some child q is already
1665 * attached to this parent and have choice:
1666 * 1) change it or 2) create/graft new one.
1667 * If the requested qdisc kind is different
1668 * than the existing one, then we choose graft.
1669 * If they are the same then this is "change"
1670 * operation - just let it fallthrough..
1671 *
1672 * 1. We are allowed to create/graft only
1673 * if the request is explicitly stating
1674 * "please create if it doesn't exist".
1675 *
1676 * 2. If the request is to exclusive create
1677 * then the qdisc tcm_handle is not expected
1678 * to exist, so that we choose create/graft too.
1679 *
1680 * 3. The last case is when no flags are set.
1681 * This will happen when for example tc
1682 * utility issues a "change" command.
1683 * Alas, it is sort of hole in API, we
1684 * cannot decide what to do unambiguously.
1685 * For now we select create/graft.
1686 */
1687 if (tca[TCA_KIND] &&
1688 nla_strcmp(tca[TCA_KIND], q->ops->id)) {
1689 if (req_create_or_replace(n) ||
1690 req_create_exclusive(n))
1691 goto create_n_graft;
1692 else if (req_change(n))
1693 goto create_n_graft2;
1694 }
1695 }
1696 }
1697 } else {
1698 if (!tcm->tcm_handle) {
1699 NL_SET_ERR_MSG(extack, "Handle cannot be zero");
1700 return -EINVAL;
1701 }
1702 q = qdisc_lookup(dev, tcm->tcm_handle);
1703 }
1704
1705 /* Change qdisc parameters */
1706 if (!q) {
1707 NL_SET_ERR_MSG(extack, "Specified qdisc not found");
1708 return -ENOENT;
1709 }
1710 if (n->nlmsg_flags & NLM_F_EXCL) {
1711 NL_SET_ERR_MSG(extack, "Exclusivity flag on, cannot modify");
1712 return -EEXIST;
1713 }
1714 if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id)) {
1715 NL_SET_ERR_MSG(extack, "Invalid qdisc name: must match existing qdisc");
1716 return -EINVAL;
1717 }
1718 err = qdisc_change(q, tca, extack);
1719 if (err == 0)
1720 qdisc_notify(sock_net(skb->sk), skb, n, clid, NULL, q, extack);
1721 return err;
1722
1723 create_n_graft:
1724 if (!(n->nlmsg_flags & NLM_F_CREATE)) {
1725 NL_SET_ERR_MSG(extack, "Qdisc not found. To create specify NLM_F_CREATE flag");
1726 return -ENOENT;
1727 }
1728 create_n_graft2:
1729 if (clid == TC_H_INGRESS) {
1730 if (dev_ingress_queue(dev)) {
1731 q = qdisc_create(dev, dev_ingress_queue(dev),
1732 tcm->tcm_parent, tcm->tcm_parent,
1733 tca, &err, extack);
1734 } else {
1735 NL_SET_ERR_MSG(extack, "Cannot find ingress queue for specified device");
1736 err = -ENOENT;
1737 }
1738 } else {
1739 struct netdev_queue *dev_queue;
1740
1741 if (p && p->ops->cl_ops && p->ops->cl_ops->select_queue)
1742 dev_queue = p->ops->cl_ops->select_queue(p, tcm);
1743 else if (p)
1744 dev_queue = p->dev_queue;
1745 else
1746 dev_queue = netdev_get_tx_queue(dev, 0);
1747
1748 q = qdisc_create(dev, dev_queue,
1749 tcm->tcm_parent, tcm->tcm_handle,
1750 tca, &err, extack);
1751 }
1752 if (!q)
1753 return err;
1754
1755 graft:
1756 err = qdisc_graft(dev, p, skb, n, clid, q, NULL, extack);
1757 if (err) {
1758 if (q)
1759 qdisc_put(q);
1760 return err;
1761 }
1762
1763 return 0;
1764 }
1765
request_qdisc_module(struct nlattr * kind)1766 static void request_qdisc_module(struct nlattr *kind)
1767 {
1768 struct Qdisc_ops *ops;
1769 char name[IFNAMSIZ];
1770
1771 if (!kind)
1772 return;
1773
1774 ops = qdisc_lookup_ops(kind);
1775 if (ops) {
1776 bpf_module_put(ops, ops->owner);
1777 return;
1778 }
1779
1780 if (nla_strscpy(name, kind, IFNAMSIZ) >= 0) {
1781 rtnl_unlock();
1782 request_module(NET_SCH_ALIAS_PREFIX "%s", name);
1783 rtnl_lock();
1784 }
1785 }
1786
1787 /*
1788 * Create/change qdisc.
1789 */
tc_modify_qdisc(struct sk_buff * skb,struct nlmsghdr * n,struct netlink_ext_ack * extack)1790 static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n,
1791 struct netlink_ext_ack *extack)
1792 {
1793 struct net *net = sock_net(skb->sk);
1794 struct nlattr *tca[TCA_MAX + 1];
1795 struct net_device *dev;
1796 struct tcmsg *tcm;
1797 int err;
1798
1799 err = nlmsg_parse_deprecated(n, sizeof(*tcm), tca, TCA_MAX,
1800 rtm_tca_policy, extack);
1801 if (err < 0)
1802 return err;
1803
1804 request_qdisc_module(tca[TCA_KIND]);
1805
1806 tcm = nlmsg_data(n);
1807 dev = __dev_get_by_index(net, tcm->tcm_ifindex);
1808 if (!dev)
1809 return -ENODEV;
1810
1811 netdev_lock_ops(dev);
1812 err = __tc_modify_qdisc(skb, n, extack, dev, tca, tcm);
1813 netdev_unlock_ops(dev);
1814
1815 return err;
1816 }
1817
tc_dump_qdisc_root(struct Qdisc * root,struct sk_buff * skb,struct netlink_callback * cb,int * q_idx_p,int s_q_idx,bool recur,bool dump_invisible)1818 static int tc_dump_qdisc_root(struct Qdisc *root, struct sk_buff *skb,
1819 struct netlink_callback *cb,
1820 int *q_idx_p, int s_q_idx, bool recur,
1821 bool dump_invisible)
1822 {
1823 int ret = 0, q_idx = *q_idx_p;
1824 struct Qdisc *q;
1825 int b;
1826
1827 if (!root)
1828 return 0;
1829
1830 q = root;
1831 if (q_idx < s_q_idx) {
1832 q_idx++;
1833 } else {
1834 if (!tc_qdisc_dump_ignore(q, dump_invisible) &&
1835 tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).portid,
1836 cb->nlh->nlmsg_seq, NLM_F_MULTI,
1837 RTM_NEWQDISC, NULL) <= 0)
1838 goto done;
1839 q_idx++;
1840 }
1841
1842 /* If dumping singletons, there is no qdisc_dev(root) and the singleton
1843 * itself has already been dumped.
1844 *
1845 * If we've already dumped the top-level (ingress) qdisc above and the global
1846 * qdisc hashtable, we don't want to hit it again
1847 */
1848 if (!qdisc_dev(root) || !recur)
1849 goto out;
1850
1851 hash_for_each(qdisc_dev(root)->qdisc_hash, b, q, hash) {
1852 if (q_idx < s_q_idx) {
1853 q_idx++;
1854 continue;
1855 }
1856 if (!tc_qdisc_dump_ignore(q, dump_invisible) &&
1857 tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).portid,
1858 cb->nlh->nlmsg_seq, NLM_F_MULTI,
1859 RTM_NEWQDISC, NULL) <= 0)
1860 goto done;
1861 q_idx++;
1862 }
1863
1864 out:
1865 *q_idx_p = q_idx;
1866 return ret;
1867 done:
1868 ret = -1;
1869 goto out;
1870 }
1871
tc_dump_qdisc(struct sk_buff * skb,struct netlink_callback * cb)1872 static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb)
1873 {
1874 struct net *net = sock_net(skb->sk);
1875 int idx, q_idx;
1876 int s_idx, s_q_idx;
1877 struct net_device *dev;
1878 const struct nlmsghdr *nlh = cb->nlh;
1879 struct nlattr *tca[TCA_MAX + 1];
1880 int err;
1881
1882 s_idx = cb->args[0];
1883 s_q_idx = q_idx = cb->args[1];
1884
1885 idx = 0;
1886 ASSERT_RTNL();
1887
1888 err = nlmsg_parse_deprecated(nlh, sizeof(struct tcmsg), tca, TCA_MAX,
1889 rtm_tca_policy, cb->extack);
1890 if (err < 0)
1891 return err;
1892
1893 for_each_netdev(net, dev) {
1894 struct netdev_queue *dev_queue;
1895
1896 if (idx < s_idx)
1897 goto cont;
1898 if (idx > s_idx)
1899 s_q_idx = 0;
1900 q_idx = 0;
1901
1902 netdev_lock_ops(dev);
1903 if (tc_dump_qdisc_root(rtnl_dereference(dev->qdisc),
1904 skb, cb, &q_idx, s_q_idx,
1905 true, tca[TCA_DUMP_INVISIBLE]) < 0) {
1906 netdev_unlock_ops(dev);
1907 goto done;
1908 }
1909
1910 dev_queue = dev_ingress_queue(dev);
1911 if (dev_queue &&
1912 tc_dump_qdisc_root(rtnl_dereference(dev_queue->qdisc_sleeping),
1913 skb, cb, &q_idx, s_q_idx, false,
1914 tca[TCA_DUMP_INVISIBLE]) < 0) {
1915 netdev_unlock_ops(dev);
1916 goto done;
1917 }
1918 netdev_unlock_ops(dev);
1919
1920 cont:
1921 idx++;
1922 }
1923
1924 done:
1925 cb->args[0] = idx;
1926 cb->args[1] = q_idx;
1927
1928 return skb->len;
1929 }
1930
1931
1932
1933 /************************************************
1934 * Traffic classes manipulation. *
1935 ************************************************/
1936
tc_fill_tclass(struct sk_buff * skb,struct Qdisc * q,unsigned long cl,u32 portid,u32 seq,u16 flags,int event,struct netlink_ext_ack * extack)1937 static int tc_fill_tclass(struct sk_buff *skb, struct Qdisc *q,
1938 unsigned long cl, u32 portid, u32 seq, u16 flags,
1939 int event, struct netlink_ext_ack *extack)
1940 {
1941 struct tcmsg *tcm;
1942 struct nlmsghdr *nlh;
1943 unsigned char *b = skb_tail_pointer(skb);
1944 struct gnet_dump d;
1945 const struct Qdisc_class_ops *cl_ops = q->ops->cl_ops;
1946
1947 cond_resched();
1948 nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags);
1949 if (!nlh)
1950 goto out_nlmsg_trim;
1951 tcm = nlmsg_data(nlh);
1952 tcm->tcm_family = AF_UNSPEC;
1953 tcm->tcm__pad1 = 0;
1954 tcm->tcm__pad2 = 0;
1955 tcm->tcm_ifindex = qdisc_dev(q)->ifindex;
1956 tcm->tcm_parent = q->handle;
1957 tcm->tcm_handle = q->handle;
1958 tcm->tcm_info = 0;
1959 if (nla_put_string(skb, TCA_KIND, q->ops->id))
1960 goto nla_put_failure;
1961 if (cl_ops->dump && cl_ops->dump(q, cl, skb, tcm) < 0)
1962 goto nla_put_failure;
1963
1964 if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, TCA_XSTATS,
1965 NULL, &d, TCA_PAD) < 0)
1966 goto nla_put_failure;
1967
1968 if (cl_ops->dump_stats && cl_ops->dump_stats(q, cl, &d) < 0)
1969 goto nla_put_failure;
1970
1971 if (gnet_stats_finish_copy(&d) < 0)
1972 goto nla_put_failure;
1973
1974 if (extack && extack->_msg &&
1975 nla_put_string(skb, TCA_EXT_WARN_MSG, extack->_msg))
1976 goto out_nlmsg_trim;
1977
1978 nlh->nlmsg_len = skb_tail_pointer(skb) - b;
1979
1980 return skb->len;
1981
1982 out_nlmsg_trim:
1983 nla_put_failure:
1984 nlmsg_trim(skb, b);
1985 return -1;
1986 }
1987
tclass_notify(struct net * net,struct sk_buff * oskb,struct nlmsghdr * n,struct Qdisc * q,unsigned long cl,int event,struct netlink_ext_ack * extack)1988 static int tclass_notify(struct net *net, struct sk_buff *oskb,
1989 struct nlmsghdr *n, struct Qdisc *q,
1990 unsigned long cl, int event, struct netlink_ext_ack *extack)
1991 {
1992 struct sk_buff *skb;
1993 u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
1994
1995 if (!rtnl_notify_needed(net, n->nlmsg_flags, RTNLGRP_TC))
1996 return 0;
1997
1998 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1999 if (!skb)
2000 return -ENOBUFS;
2001
2002 if (tc_fill_tclass(skb, q, cl, portid, n->nlmsg_seq, 0, event, extack) < 0) {
2003 kfree_skb(skb);
2004 return -EINVAL;
2005 }
2006
2007 return rtnetlink_send(skb, net, portid, RTNLGRP_TC,
2008 n->nlmsg_flags & NLM_F_ECHO);
2009 }
2010
tclass_get_notify(struct net * net,struct sk_buff * oskb,struct nlmsghdr * n,struct Qdisc * q,unsigned long cl,struct netlink_ext_ack * extack)2011 static int tclass_get_notify(struct net *net, struct sk_buff *oskb,
2012 struct nlmsghdr *n, struct Qdisc *q,
2013 unsigned long cl, struct netlink_ext_ack *extack)
2014 {
2015 struct sk_buff *skb;
2016 u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
2017
2018 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2019 if (!skb)
2020 return -ENOBUFS;
2021
2022 if (tc_fill_tclass(skb, q, cl, portid, n->nlmsg_seq, 0, RTM_NEWTCLASS,
2023 extack) < 0) {
2024 kfree_skb(skb);
2025 return -EINVAL;
2026 }
2027
2028 return rtnetlink_send(skb, net, portid, RTNLGRP_TC,
2029 n->nlmsg_flags & NLM_F_ECHO);
2030 }
2031
tclass_del_notify(struct net * net,const struct Qdisc_class_ops * cops,struct sk_buff * oskb,struct nlmsghdr * n,struct Qdisc * q,unsigned long cl,struct netlink_ext_ack * extack)2032 static int tclass_del_notify(struct net *net,
2033 const struct Qdisc_class_ops *cops,
2034 struct sk_buff *oskb, struct nlmsghdr *n,
2035 struct Qdisc *q, unsigned long cl,
2036 struct netlink_ext_ack *extack)
2037 {
2038 u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
2039 struct sk_buff *skb;
2040 int err = 0;
2041
2042 if (!cops->delete)
2043 return -EOPNOTSUPP;
2044
2045 if (rtnl_notify_needed(net, n->nlmsg_flags, RTNLGRP_TC)) {
2046 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2047 if (!skb)
2048 return -ENOBUFS;
2049
2050 if (tc_fill_tclass(skb, q, cl, portid, n->nlmsg_seq, 0,
2051 RTM_DELTCLASS, extack) < 0) {
2052 kfree_skb(skb);
2053 return -EINVAL;
2054 }
2055 } else {
2056 skb = NULL;
2057 }
2058
2059 err = cops->delete(q, cl, extack);
2060 if (err) {
2061 kfree_skb(skb);
2062 return err;
2063 }
2064
2065 err = rtnetlink_maybe_send(skb, net, portid, RTNLGRP_TC,
2066 n->nlmsg_flags & NLM_F_ECHO);
2067 return err;
2068 }
2069
2070 #ifdef CONFIG_NET_CLS
2071
2072 struct tcf_bind_args {
2073 struct tcf_walker w;
2074 unsigned long base;
2075 unsigned long cl;
2076 u32 classid;
2077 };
2078
tcf_node_bind(struct tcf_proto * tp,void * n,struct tcf_walker * arg)2079 static int tcf_node_bind(struct tcf_proto *tp, void *n, struct tcf_walker *arg)
2080 {
2081 struct tcf_bind_args *a = (void *)arg;
2082
2083 if (n && tp->ops->bind_class) {
2084 struct Qdisc *q = tcf_block_q(tp->chain->block);
2085
2086 sch_tree_lock(q);
2087 tp->ops->bind_class(n, a->classid, a->cl, q, a->base);
2088 sch_tree_unlock(q);
2089 }
2090 return 0;
2091 }
2092
2093 struct tc_bind_class_args {
2094 struct qdisc_walker w;
2095 unsigned long new_cl;
2096 u32 portid;
2097 u32 clid;
2098 };
2099
tc_bind_class_walker(struct Qdisc * q,unsigned long cl,struct qdisc_walker * w)2100 static int tc_bind_class_walker(struct Qdisc *q, unsigned long cl,
2101 struct qdisc_walker *w)
2102 {
2103 struct tc_bind_class_args *a = (struct tc_bind_class_args *)w;
2104 const struct Qdisc_class_ops *cops = q->ops->cl_ops;
2105 struct tcf_block *block;
2106 struct tcf_chain *chain;
2107
2108 block = cops->tcf_block(q, cl, NULL);
2109 if (!block)
2110 return 0;
2111 for (chain = tcf_get_next_chain(block, NULL);
2112 chain;
2113 chain = tcf_get_next_chain(block, chain)) {
2114 struct tcf_proto *tp;
2115
2116 for (tp = tcf_get_next_proto(chain, NULL);
2117 tp; tp = tcf_get_next_proto(chain, tp)) {
2118 struct tcf_bind_args arg = {};
2119
2120 arg.w.fn = tcf_node_bind;
2121 arg.classid = a->clid;
2122 arg.base = cl;
2123 arg.cl = a->new_cl;
2124 tp->ops->walk(tp, &arg.w, true);
2125 }
2126 }
2127
2128 return 0;
2129 }
2130
tc_bind_tclass(struct Qdisc * q,u32 portid,u32 clid,unsigned long new_cl)2131 static void tc_bind_tclass(struct Qdisc *q, u32 portid, u32 clid,
2132 unsigned long new_cl)
2133 {
2134 const struct Qdisc_class_ops *cops = q->ops->cl_ops;
2135 struct tc_bind_class_args args = {};
2136
2137 if (!cops->tcf_block)
2138 return;
2139 args.portid = portid;
2140 args.clid = clid;
2141 args.new_cl = new_cl;
2142 args.w.fn = tc_bind_class_walker;
2143 q->ops->cl_ops->walk(q, &args.w);
2144 }
2145
2146 #else
2147
tc_bind_tclass(struct Qdisc * q,u32 portid,u32 clid,unsigned long new_cl)2148 static void tc_bind_tclass(struct Qdisc *q, u32 portid, u32 clid,
2149 unsigned long new_cl)
2150 {
2151 }
2152
2153 #endif
2154
__tc_ctl_tclass(struct sk_buff * skb,struct nlmsghdr * n,struct netlink_ext_ack * extack,struct net_device * dev,struct nlattr * tca[TCA_MAX+1],struct tcmsg * tcm)2155 static int __tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n,
2156 struct netlink_ext_ack *extack,
2157 struct net_device *dev,
2158 struct nlattr *tca[TCA_MAX + 1],
2159 struct tcmsg *tcm)
2160 {
2161 struct net *net = sock_net(skb->sk);
2162 const struct Qdisc_class_ops *cops;
2163 struct Qdisc *q = NULL;
2164 unsigned long cl = 0;
2165 unsigned long new_cl;
2166 u32 portid;
2167 u32 clid;
2168 u32 qid;
2169 int err;
2170
2171 /*
2172 parent == TC_H_UNSPEC - unspecified parent.
2173 parent == TC_H_ROOT - class is root, which has no parent.
2174 parent == X:0 - parent is root class.
2175 parent == X:Y - parent is a node in hierarchy.
2176 parent == 0:Y - parent is X:Y, where X:0 is qdisc.
2177
2178 handle == 0:0 - generate handle from kernel pool.
2179 handle == 0:Y - class is X:Y, where X:0 is qdisc.
2180 handle == X:Y - clear.
2181 handle == X:0 - root class.
2182 */
2183
2184 /* Step 1. Determine qdisc handle X:0 */
2185
2186 portid = tcm->tcm_parent;
2187 clid = tcm->tcm_handle;
2188 qid = TC_H_MAJ(clid);
2189
2190 if (portid != TC_H_ROOT) {
2191 u32 qid1 = TC_H_MAJ(portid);
2192
2193 if (qid && qid1) {
2194 /* If both majors are known, they must be identical. */
2195 if (qid != qid1)
2196 return -EINVAL;
2197 } else if (qid1) {
2198 qid = qid1;
2199 } else if (qid == 0)
2200 qid = rtnl_dereference(dev->qdisc)->handle;
2201
2202 /* Now qid is genuine qdisc handle consistent
2203 * both with parent and child.
2204 *
2205 * TC_H_MAJ(portid) still may be unspecified, complete it now.
2206 */
2207 if (portid)
2208 portid = TC_H_MAKE(qid, portid);
2209 } else {
2210 if (qid == 0)
2211 qid = rtnl_dereference(dev->qdisc)->handle;
2212 }
2213
2214 /* OK. Locate qdisc */
2215 q = qdisc_lookup(dev, qid);
2216 if (!q)
2217 return -ENOENT;
2218
2219 /* An check that it supports classes */
2220 cops = q->ops->cl_ops;
2221 if (cops == NULL)
2222 return -EINVAL;
2223
2224 /* Now try to get class */
2225 if (clid == 0) {
2226 if (portid == TC_H_ROOT)
2227 clid = qid;
2228 } else
2229 clid = TC_H_MAKE(qid, clid);
2230
2231 if (clid)
2232 cl = cops->find(q, clid);
2233
2234 if (cl == 0) {
2235 err = -ENOENT;
2236 if (n->nlmsg_type != RTM_NEWTCLASS ||
2237 !(n->nlmsg_flags & NLM_F_CREATE))
2238 goto out;
2239 } else {
2240 switch (n->nlmsg_type) {
2241 case RTM_NEWTCLASS:
2242 err = -EEXIST;
2243 if (n->nlmsg_flags & NLM_F_EXCL)
2244 goto out;
2245 break;
2246 case RTM_DELTCLASS:
2247 err = tclass_del_notify(net, cops, skb, n, q, cl, extack);
2248 /* Unbind the class with flilters with 0 */
2249 tc_bind_tclass(q, portid, clid, 0);
2250 goto out;
2251 case RTM_GETTCLASS:
2252 err = tclass_get_notify(net, skb, n, q, cl, extack);
2253 goto out;
2254 default:
2255 err = -EINVAL;
2256 goto out;
2257 }
2258 }
2259
2260 if (tca[TCA_INGRESS_BLOCK] || tca[TCA_EGRESS_BLOCK]) {
2261 NL_SET_ERR_MSG(extack, "Shared blocks are not supported for classes");
2262 return -EOPNOTSUPP;
2263 }
2264
2265 /* Prevent creation of traffic classes with classid TC_H_ROOT */
2266 if (clid == TC_H_ROOT) {
2267 NL_SET_ERR_MSG(extack, "Cannot create traffic class with classid TC_H_ROOT");
2268 return -EINVAL;
2269 }
2270
2271 new_cl = cl;
2272 err = -EOPNOTSUPP;
2273 if (cops->change)
2274 err = cops->change(q, clid, portid, tca, &new_cl, extack);
2275 if (err == 0) {
2276 tclass_notify(net, skb, n, q, new_cl, RTM_NEWTCLASS, extack);
2277 /* We just create a new class, need to do reverse binding. */
2278 if (cl != new_cl)
2279 tc_bind_tclass(q, portid, clid, new_cl);
2280 }
2281 out:
2282 return err;
2283 }
2284
tc_ctl_tclass(struct sk_buff * skb,struct nlmsghdr * n,struct netlink_ext_ack * extack)2285 static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n,
2286 struct netlink_ext_ack *extack)
2287 {
2288 struct net *net = sock_net(skb->sk);
2289 struct tcmsg *tcm = nlmsg_data(n);
2290 struct nlattr *tca[TCA_MAX + 1];
2291 struct net_device *dev;
2292 int err;
2293
2294 err = nlmsg_parse_deprecated(n, sizeof(*tcm), tca, TCA_MAX,
2295 rtm_tca_policy, extack);
2296 if (err < 0)
2297 return err;
2298
2299 dev = __dev_get_by_index(net, tcm->tcm_ifindex);
2300 if (!dev)
2301 return -ENODEV;
2302
2303 netdev_lock_ops(dev);
2304 err = __tc_ctl_tclass(skb, n, extack, dev, tca, tcm);
2305 netdev_unlock_ops(dev);
2306
2307 return err;
2308 }
2309
2310 struct qdisc_dump_args {
2311 struct qdisc_walker w;
2312 struct sk_buff *skb;
2313 struct netlink_callback *cb;
2314 };
2315
qdisc_class_dump(struct Qdisc * q,unsigned long cl,struct qdisc_walker * arg)2316 static int qdisc_class_dump(struct Qdisc *q, unsigned long cl,
2317 struct qdisc_walker *arg)
2318 {
2319 struct qdisc_dump_args *a = (struct qdisc_dump_args *)arg;
2320
2321 return tc_fill_tclass(a->skb, q, cl, NETLINK_CB(a->cb->skb).portid,
2322 a->cb->nlh->nlmsg_seq, NLM_F_MULTI,
2323 RTM_NEWTCLASS, NULL);
2324 }
2325
tc_dump_tclass_qdisc(struct Qdisc * q,struct sk_buff * skb,struct tcmsg * tcm,struct netlink_callback * cb,int * t_p,int s_t)2326 static int tc_dump_tclass_qdisc(struct Qdisc *q, struct sk_buff *skb,
2327 struct tcmsg *tcm, struct netlink_callback *cb,
2328 int *t_p, int s_t)
2329 {
2330 struct qdisc_dump_args arg;
2331
2332 if (tc_qdisc_dump_ignore(q, false) ||
2333 *t_p < s_t || !q->ops->cl_ops ||
2334 (tcm->tcm_parent &&
2335 TC_H_MAJ(tcm->tcm_parent) != q->handle)) {
2336 (*t_p)++;
2337 return 0;
2338 }
2339 if (*t_p > s_t)
2340 memset(&cb->args[1], 0, sizeof(cb->args)-sizeof(cb->args[0]));
2341 arg.w.fn = qdisc_class_dump;
2342 arg.skb = skb;
2343 arg.cb = cb;
2344 arg.w.stop = 0;
2345 arg.w.skip = cb->args[1];
2346 arg.w.count = 0;
2347 q->ops->cl_ops->walk(q, &arg.w);
2348 cb->args[1] = arg.w.count;
2349 if (arg.w.stop)
2350 return -1;
2351 (*t_p)++;
2352 return 0;
2353 }
2354
tc_dump_tclass_root(struct Qdisc * root,struct sk_buff * skb,struct tcmsg * tcm,struct netlink_callback * cb,int * t_p,int s_t,bool recur)2355 static int tc_dump_tclass_root(struct Qdisc *root, struct sk_buff *skb,
2356 struct tcmsg *tcm, struct netlink_callback *cb,
2357 int *t_p, int s_t, bool recur)
2358 {
2359 struct Qdisc *q;
2360 int b;
2361
2362 if (!root)
2363 return 0;
2364
2365 if (tc_dump_tclass_qdisc(root, skb, tcm, cb, t_p, s_t) < 0)
2366 return -1;
2367
2368 if (!qdisc_dev(root) || !recur)
2369 return 0;
2370
2371 if (tcm->tcm_parent) {
2372 q = qdisc_match_from_root(root, TC_H_MAJ(tcm->tcm_parent));
2373 if (q && q != root &&
2374 tc_dump_tclass_qdisc(q, skb, tcm, cb, t_p, s_t) < 0)
2375 return -1;
2376 return 0;
2377 }
2378 hash_for_each(qdisc_dev(root)->qdisc_hash, b, q, hash) {
2379 if (tc_dump_tclass_qdisc(q, skb, tcm, cb, t_p, s_t) < 0)
2380 return -1;
2381 }
2382
2383 return 0;
2384 }
2385
__tc_dump_tclass(struct sk_buff * skb,struct netlink_callback * cb,struct tcmsg * tcm,struct net_device * dev)2386 static int __tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb,
2387 struct tcmsg *tcm, struct net_device *dev)
2388 {
2389 struct netdev_queue *dev_queue;
2390 int t, s_t;
2391
2392 s_t = cb->args[0];
2393 t = 0;
2394
2395 if (tc_dump_tclass_root(rtnl_dereference(dev->qdisc),
2396 skb, tcm, cb, &t, s_t, true) < 0)
2397 goto done;
2398
2399 dev_queue = dev_ingress_queue(dev);
2400 if (dev_queue &&
2401 tc_dump_tclass_root(rtnl_dereference(dev_queue->qdisc_sleeping),
2402 skb, tcm, cb, &t, s_t, false) < 0)
2403 goto done;
2404
2405 done:
2406 cb->args[0] = t;
2407
2408 return skb->len;
2409 }
2410
tc_dump_tclass(struct sk_buff * skb,struct netlink_callback * cb)2411 static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb)
2412 {
2413 struct tcmsg *tcm = nlmsg_data(cb->nlh);
2414 struct net *net = sock_net(skb->sk);
2415 struct net_device *dev;
2416 int err;
2417
2418 if (nlmsg_len(cb->nlh) < sizeof(*tcm))
2419 return 0;
2420
2421 dev = dev_get_by_index(net, tcm->tcm_ifindex);
2422 if (!dev)
2423 return 0;
2424
2425 netdev_lock_ops(dev);
2426 err = __tc_dump_tclass(skb, cb, tcm, dev);
2427 netdev_unlock_ops(dev);
2428
2429 dev_put(dev);
2430
2431 return err;
2432 }
2433
2434 #ifdef CONFIG_PROC_FS
psched_show(struct seq_file * seq,void * v)2435 static int psched_show(struct seq_file *seq, void *v)
2436 {
2437 seq_printf(seq, "%08x %08x %08x %08x\n",
2438 (u32)NSEC_PER_USEC, (u32)PSCHED_TICKS2NS(1),
2439 1000000,
2440 (u32)NSEC_PER_SEC / hrtimer_resolution);
2441
2442 return 0;
2443 }
2444
psched_net_init(struct net * net)2445 static int __net_init psched_net_init(struct net *net)
2446 {
2447 struct proc_dir_entry *e;
2448
2449 e = proc_create_single("psched", 0, net->proc_net, psched_show);
2450 if (e == NULL)
2451 return -ENOMEM;
2452
2453 return 0;
2454 }
2455
psched_net_exit(struct net * net)2456 static void __net_exit psched_net_exit(struct net *net)
2457 {
2458 remove_proc_entry("psched", net->proc_net);
2459 }
2460 #else
psched_net_init(struct net * net)2461 static int __net_init psched_net_init(struct net *net)
2462 {
2463 return 0;
2464 }
2465
psched_net_exit(struct net * net)2466 static void __net_exit psched_net_exit(struct net *net)
2467 {
2468 }
2469 #endif
2470
2471 static struct pernet_operations psched_net_ops = {
2472 .init = psched_net_init,
2473 .exit = psched_net_exit,
2474 };
2475
2476 #if IS_ENABLED(CONFIG_MITIGATION_RETPOLINE)
2477 DEFINE_STATIC_KEY_FALSE(tc_skip_wrapper);
2478 #endif
2479
2480 static const struct rtnl_msg_handler psched_rtnl_msg_handlers[] __initconst = {
2481 {.msgtype = RTM_NEWQDISC, .doit = tc_modify_qdisc},
2482 {.msgtype = RTM_DELQDISC, .doit = tc_get_qdisc},
2483 {.msgtype = RTM_GETQDISC, .doit = tc_get_qdisc,
2484 .dumpit = tc_dump_qdisc},
2485 {.msgtype = RTM_NEWTCLASS, .doit = tc_ctl_tclass},
2486 {.msgtype = RTM_DELTCLASS, .doit = tc_ctl_tclass},
2487 {.msgtype = RTM_GETTCLASS, .doit = tc_ctl_tclass,
2488 .dumpit = tc_dump_tclass},
2489 };
2490
pktsched_init(void)2491 static int __init pktsched_init(void)
2492 {
2493 int err;
2494
2495 err = register_pernet_subsys(&psched_net_ops);
2496 if (err) {
2497 pr_err("pktsched_init: "
2498 "cannot initialize per netns operations\n");
2499 return err;
2500 }
2501
2502 register_qdisc(&pfifo_fast_ops);
2503 register_qdisc(&pfifo_qdisc_ops);
2504 register_qdisc(&bfifo_qdisc_ops);
2505 register_qdisc(&pfifo_head_drop_qdisc_ops);
2506 register_qdisc(&mq_qdisc_ops);
2507 register_qdisc(&noqueue_qdisc_ops);
2508
2509 rtnl_register_many(psched_rtnl_msg_handlers);
2510
2511 tc_wrapper_init();
2512
2513 return 0;
2514 }
2515
2516 subsys_initcall(pktsched_init);
2517