1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3 * net/sched/sch_api.c Packet scheduler API.
4 *
5 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
6 *
7 * Fixes:
8 *
9 * Rani Assaf <rani@magic.metawire.com> :980802: JIFFIES and CPU clock sources are repaired.
10 * Eduardo J. Blanco <ejbs@netlabs.com.uy> :990222: kmod support
11 * Jamal Hadi Salim <hadi@nortelnetworks.com>: 990601: ingress support
12 */
13
14 #include <linux/module.h>
15 #include <linux/types.h>
16 #include <linux/kernel.h>
17 #include <linux/string.h>
18 #include <linux/errno.h>
19 #include <linux/skbuff.h>
20 #include <linux/init.h>
21 #include <linux/proc_fs.h>
22 #include <linux/seq_file.h>
23 #include <linux/kmod.h>
24 #include <linux/list.h>
25 #include <linux/hrtimer.h>
26 #include <linux/slab.h>
27 #include <linux/hashtable.h>
28 #include <linux/bpf.h>
29
30 #include <net/netdev_lock.h>
31 #include <net/net_namespace.h>
32 #include <net/sock.h>
33 #include <net/netlink.h>
34 #include <net/pkt_sched.h>
35 #include <net/pkt_cls.h>
36 #include <net/tc_wrapper.h>
37
38 #include <trace/events/qdisc.h>
39
40 /*
41
42 Short review.
43 -------------
44
45 This file consists of two interrelated parts:
46
47 1. queueing disciplines manager frontend.
48 2. traffic classes manager frontend.
49
50 Generally, queueing discipline ("qdisc") is a black box,
51 which is able to enqueue packets and to dequeue them (when
52 device is ready to send something) in order and at times
53 determined by algorithm hidden in it.
54
55 qdisc's are divided to two categories:
56 - "queues", which have no internal structure visible from outside.
57 - "schedulers", which split all the packets to "traffic classes",
58 using "packet classifiers" (look at cls_api.c)
59
60 In turn, classes may have child qdiscs (as rule, queues)
61 attached to them etc. etc. etc.
62
63 The goal of the routines in this file is to translate
64 information supplied by user in the form of handles
65 to more intelligible for kernel form, to make some sanity
66 checks and part of work, which is common to all qdiscs
67 and to provide rtnetlink notifications.
68
69 All real intelligent work is done inside qdisc modules.
70
71
72
73 Every discipline has two major routines: enqueue and dequeue.
74
75 ---dequeue
76
77 dequeue usually returns a skb to send. It is allowed to return NULL,
78 but it does not mean that queue is empty, it just means that
79 discipline does not want to send anything this time.
80 Queue is really empty if q->q.qlen == 0.
81 For complicated disciplines with multiple queues q->q is not
82 real packet queue, but however q->q.qlen must be valid.
83
84 ---enqueue
85
86 enqueue returns 0, if packet was enqueued successfully.
87 If packet (this one or another one) was dropped, it returns
88 not zero error code.
89 NET_XMIT_DROP - this packet dropped
90 Expected action: do not backoff, but wait until queue will clear.
91 NET_XMIT_CN - probably this packet enqueued, but another one dropped.
92 Expected action: backoff or ignore
93
94 Auxiliary routines:
95
96 ---peek
97
98 like dequeue but without removing a packet from the queue
99
100 ---reset
101
102 returns qdisc to initial state: purge all buffers, clear all
103 timers, counters (except for statistics) etc.
104
105 ---init
106
107 initializes newly created qdisc.
108
109 ---destroy
110
111 destroys resources allocated by init and during lifetime of qdisc.
112
113 ---change
114
115 changes qdisc parameters.
116 */
117
118 /* Protects list of registered TC modules. It is pure SMP lock. */
119 static DEFINE_RWLOCK(qdisc_mod_lock);
120
121
122 /************************************************
123 * Queueing disciplines manipulation. *
124 ************************************************/
125
126
127 /* The list of all installed queueing disciplines. */
128
129 static struct Qdisc_ops *qdisc_base;
130
131 /* Register/unregister queueing discipline */
132
register_qdisc(struct Qdisc_ops * qops)133 int register_qdisc(struct Qdisc_ops *qops)
134 {
135 struct Qdisc_ops *q, **qp;
136 int rc = -EEXIST;
137
138 write_lock(&qdisc_mod_lock);
139 for (qp = &qdisc_base; (q = *qp) != NULL; qp = &q->next)
140 if (!strcmp(qops->id, q->id))
141 goto out;
142
143 if (qops->enqueue == NULL)
144 qops->enqueue = noop_qdisc_ops.enqueue;
145 if (qops->peek == NULL) {
146 if (qops->dequeue == NULL)
147 qops->peek = noop_qdisc_ops.peek;
148 else
149 goto out_einval;
150 }
151 if (qops->dequeue == NULL)
152 qops->dequeue = noop_qdisc_ops.dequeue;
153
154 if (qops->cl_ops) {
155 const struct Qdisc_class_ops *cops = qops->cl_ops;
156
157 if (!(cops->find && cops->walk && cops->leaf))
158 goto out_einval;
159
160 if (cops->tcf_block && !(cops->bind_tcf && cops->unbind_tcf))
161 goto out_einval;
162 }
163
164 qops->next = NULL;
165 *qp = qops;
166 rc = 0;
167 out:
168 write_unlock(&qdisc_mod_lock);
169 return rc;
170
171 out_einval:
172 rc = -EINVAL;
173 goto out;
174 }
175 EXPORT_SYMBOL(register_qdisc);
176
unregister_qdisc(struct Qdisc_ops * qops)177 void unregister_qdisc(struct Qdisc_ops *qops)
178 {
179 struct Qdisc_ops *q, **qp;
180 int err = -ENOENT;
181
182 write_lock(&qdisc_mod_lock);
183 for (qp = &qdisc_base; (q = *qp) != NULL; qp = &q->next)
184 if (q == qops)
185 break;
186 if (q) {
187 *qp = q->next;
188 q->next = NULL;
189 err = 0;
190 }
191 write_unlock(&qdisc_mod_lock);
192
193 WARN(err, "unregister qdisc(%s) failed\n", qops->id);
194 }
195 EXPORT_SYMBOL(unregister_qdisc);
196
197 /* Get default qdisc if not otherwise specified */
qdisc_get_default(char * name,size_t len)198 void qdisc_get_default(char *name, size_t len)
199 {
200 read_lock(&qdisc_mod_lock);
201 strscpy(name, default_qdisc_ops->id, len);
202 read_unlock(&qdisc_mod_lock);
203 }
204
qdisc_lookup_default(const char * name)205 static struct Qdisc_ops *qdisc_lookup_default(const char *name)
206 {
207 struct Qdisc_ops *q = NULL;
208
209 for (q = qdisc_base; q; q = q->next) {
210 if (!strcmp(name, q->id)) {
211 if (!bpf_try_module_get(q, q->owner))
212 q = NULL;
213 break;
214 }
215 }
216
217 return q;
218 }
219
220 /* Set new default qdisc to use */
qdisc_set_default(const char * name)221 int qdisc_set_default(const char *name)
222 {
223 const struct Qdisc_ops *ops;
224
225 if (!capable(CAP_NET_ADMIN))
226 return -EPERM;
227
228 write_lock(&qdisc_mod_lock);
229 ops = qdisc_lookup_default(name);
230 if (!ops) {
231 /* Not found, drop lock and try to load module */
232 write_unlock(&qdisc_mod_lock);
233 request_module(NET_SCH_ALIAS_PREFIX "%s", name);
234 write_lock(&qdisc_mod_lock);
235
236 ops = qdisc_lookup_default(name);
237 }
238
239 if (ops) {
240 /* Set new default */
241 bpf_module_put(default_qdisc_ops, default_qdisc_ops->owner);
242 default_qdisc_ops = ops;
243 }
244 write_unlock(&qdisc_mod_lock);
245
246 return ops ? 0 : -ENOENT;
247 }
248
249 #ifdef CONFIG_NET_SCH_DEFAULT
250 /* Set default value from kernel config */
sch_default_qdisc(void)251 static int __init sch_default_qdisc(void)
252 {
253 return qdisc_set_default(CONFIG_DEFAULT_NET_SCH);
254 }
255 late_initcall(sch_default_qdisc);
256 #endif
257
258 /* We know handle. Find qdisc among all qdisc's attached to device
259 * (root qdisc, all its children, children of children etc.)
260 * Note: caller either uses rtnl or rcu_read_lock()
261 */
262
qdisc_match_from_root(struct Qdisc * root,u32 handle)263 static struct Qdisc *qdisc_match_from_root(struct Qdisc *root, u32 handle)
264 {
265 struct Qdisc *q;
266
267 if (!qdisc_dev(root))
268 return (root->handle == handle ? root : NULL);
269
270 if (!(root->flags & TCQ_F_BUILTIN) &&
271 root->handle == handle)
272 return root;
273
274 hash_for_each_possible_rcu(qdisc_dev(root)->qdisc_hash, q, hash, handle,
275 lockdep_rtnl_is_held()) {
276 if (q->handle == handle)
277 return q;
278 }
279 return NULL;
280 }
281
qdisc_hash_add(struct Qdisc * q,bool invisible)282 void qdisc_hash_add(struct Qdisc *q, bool invisible)
283 {
284 if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) {
285 ASSERT_RTNL();
286 hash_add_rcu(qdisc_dev(q)->qdisc_hash, &q->hash, q->handle);
287 if (invisible)
288 q->flags |= TCQ_F_INVISIBLE;
289 }
290 }
291 EXPORT_SYMBOL(qdisc_hash_add);
292
qdisc_hash_del(struct Qdisc * q)293 void qdisc_hash_del(struct Qdisc *q)
294 {
295 if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) {
296 ASSERT_RTNL();
297 hash_del_rcu(&q->hash);
298 }
299 }
300 EXPORT_SYMBOL(qdisc_hash_del);
301
qdisc_lookup(struct net_device * dev,u32 handle)302 struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle)
303 {
304 struct Qdisc *q;
305
306 if (!handle)
307 return NULL;
308 q = qdisc_match_from_root(rtnl_dereference(dev->qdisc), handle);
309 if (q)
310 goto out;
311
312 if (dev_ingress_queue(dev))
313 q = qdisc_match_from_root(
314 rtnl_dereference(dev_ingress_queue(dev)->qdisc_sleeping),
315 handle);
316 out:
317 return q;
318 }
319
qdisc_lookup_rcu(struct net_device * dev,u32 handle)320 struct Qdisc *qdisc_lookup_rcu(struct net_device *dev, u32 handle)
321 {
322 struct netdev_queue *nq;
323 struct Qdisc *q;
324
325 if (!handle)
326 return NULL;
327 q = qdisc_match_from_root(rcu_dereference(dev->qdisc), handle);
328 if (q)
329 goto out;
330
331 nq = dev_ingress_queue_rcu(dev);
332 if (nq)
333 q = qdisc_match_from_root(rcu_dereference(nq->qdisc_sleeping),
334 handle);
335 out:
336 return q;
337 }
338
qdisc_leaf(struct Qdisc * p,u32 classid,struct netlink_ext_ack * extack)339 static struct Qdisc *qdisc_leaf(struct Qdisc *p, u32 classid,
340 struct netlink_ext_ack *extack)
341 {
342 unsigned long cl;
343 const struct Qdisc_class_ops *cops = p->ops->cl_ops;
344
345 if (cops == NULL) {
346 NL_SET_ERR_MSG(extack, "Parent qdisc is not classful");
347 return ERR_PTR(-EOPNOTSUPP);
348 }
349 cl = cops->find(p, classid);
350
351 if (cl == 0) {
352 NL_SET_ERR_MSG(extack, "Specified class not found");
353 return ERR_PTR(-ENOENT);
354 }
355 return cops->leaf(p, cl);
356 }
357
358 /* Find queueing discipline by name */
359
qdisc_lookup_ops(struct nlattr * kind)360 static struct Qdisc_ops *qdisc_lookup_ops(struct nlattr *kind)
361 {
362 struct Qdisc_ops *q = NULL;
363
364 if (kind) {
365 read_lock(&qdisc_mod_lock);
366 for (q = qdisc_base; q; q = q->next) {
367 if (nla_strcmp(kind, q->id) == 0) {
368 if (!bpf_try_module_get(q, q->owner))
369 q = NULL;
370 break;
371 }
372 }
373 read_unlock(&qdisc_mod_lock);
374 }
375 return q;
376 }
377
378 /* The linklayer setting were not transferred from iproute2, in older
379 * versions, and the rate tables lookup systems have been dropped in
380 * the kernel. To keep backward compatible with older iproute2 tc
381 * utils, we detect the linklayer setting by detecting if the rate
382 * table were modified.
383 *
384 * For linklayer ATM table entries, the rate table will be aligned to
385 * 48 bytes, thus some table entries will contain the same value. The
386 * mpu (min packet unit) is also encoded into the old rate table, thus
387 * starting from the mpu, we find low and high table entries for
388 * mapping this cell. If these entries contain the same value, when
389 * the rate tables have been modified for linklayer ATM.
390 *
391 * This is done by rounding mpu to the nearest 48 bytes cell/entry,
392 * and then roundup to the next cell, calc the table entry one below,
393 * and compare.
394 */
__detect_linklayer(struct tc_ratespec * r,__u32 * rtab)395 static __u8 __detect_linklayer(struct tc_ratespec *r, __u32 *rtab)
396 {
397 int low = roundup(r->mpu, 48);
398 int high = roundup(low+1, 48);
399 int cell_low = low >> r->cell_log;
400 int cell_high = (high >> r->cell_log) - 1;
401
402 /* rtab is too inaccurate at rates > 100Mbit/s */
403 if ((r->rate > (100000000/8)) || (rtab[0] == 0)) {
404 pr_debug("TC linklayer: Giving up ATM detection\n");
405 return TC_LINKLAYER_ETHERNET;
406 }
407
408 if ((cell_high > cell_low) && (cell_high < 256)
409 && (rtab[cell_low] == rtab[cell_high])) {
410 pr_debug("TC linklayer: Detected ATM, low(%d)=high(%d)=%u\n",
411 cell_low, cell_high, rtab[cell_high]);
412 return TC_LINKLAYER_ATM;
413 }
414 return TC_LINKLAYER_ETHERNET;
415 }
416
417 static struct qdisc_rate_table *qdisc_rtab_list;
418
qdisc_get_rtab(struct tc_ratespec * r,struct nlattr * tab,struct netlink_ext_ack * extack)419 struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r,
420 struct nlattr *tab,
421 struct netlink_ext_ack *extack)
422 {
423 struct qdisc_rate_table *rtab;
424
425 if (tab == NULL || r->rate == 0 ||
426 r->cell_log == 0 || r->cell_log >= 32 ||
427 nla_len(tab) != TC_RTAB_SIZE) {
428 NL_SET_ERR_MSG(extack, "Invalid rate table parameters for searching");
429 return NULL;
430 }
431
432 for (rtab = qdisc_rtab_list; rtab; rtab = rtab->next) {
433 if (!memcmp(&rtab->rate, r, sizeof(struct tc_ratespec)) &&
434 !memcmp(&rtab->data, nla_data(tab), TC_RTAB_SIZE)) {
435 rtab->refcnt++;
436 return rtab;
437 }
438 }
439
440 rtab = kmalloc(sizeof(*rtab), GFP_KERNEL);
441 if (rtab) {
442 rtab->rate = *r;
443 rtab->refcnt = 1;
444 memcpy(rtab->data, nla_data(tab), TC_RTAB_SIZE);
445 if (r->linklayer == TC_LINKLAYER_UNAWARE)
446 r->linklayer = __detect_linklayer(r, rtab->data);
447 rtab->next = qdisc_rtab_list;
448 qdisc_rtab_list = rtab;
449 } else {
450 NL_SET_ERR_MSG(extack, "Failed to allocate new qdisc rate table");
451 }
452 return rtab;
453 }
454 EXPORT_SYMBOL(qdisc_get_rtab);
455
qdisc_put_rtab(struct qdisc_rate_table * tab)456 void qdisc_put_rtab(struct qdisc_rate_table *tab)
457 {
458 struct qdisc_rate_table *rtab, **rtabp;
459
460 if (!tab || --tab->refcnt)
461 return;
462
463 for (rtabp = &qdisc_rtab_list;
464 (rtab = *rtabp) != NULL;
465 rtabp = &rtab->next) {
466 if (rtab == tab) {
467 *rtabp = rtab->next;
468 kfree(rtab);
469 return;
470 }
471 }
472 }
473 EXPORT_SYMBOL(qdisc_put_rtab);
474
475 static LIST_HEAD(qdisc_stab_list);
476
477 static const struct nla_policy stab_policy[TCA_STAB_MAX + 1] = {
478 [TCA_STAB_BASE] = { .len = sizeof(struct tc_sizespec) },
479 [TCA_STAB_DATA] = { .type = NLA_BINARY },
480 };
481
qdisc_get_stab(struct nlattr * opt,struct netlink_ext_ack * extack)482 static struct qdisc_size_table *qdisc_get_stab(struct nlattr *opt,
483 struct netlink_ext_ack *extack)
484 {
485 struct nlattr *tb[TCA_STAB_MAX + 1];
486 struct qdisc_size_table *stab;
487 struct tc_sizespec *s;
488 unsigned int tsize = 0;
489 u16 *tab = NULL;
490 int err;
491
492 err = nla_parse_nested_deprecated(tb, TCA_STAB_MAX, opt, stab_policy,
493 extack);
494 if (err < 0)
495 return ERR_PTR(err);
496 if (!tb[TCA_STAB_BASE]) {
497 NL_SET_ERR_MSG(extack, "Size table base attribute is missing");
498 return ERR_PTR(-EINVAL);
499 }
500
501 s = nla_data(tb[TCA_STAB_BASE]);
502
503 if (s->tsize > 0) {
504 if (!tb[TCA_STAB_DATA]) {
505 NL_SET_ERR_MSG(extack, "Size table data attribute is missing");
506 return ERR_PTR(-EINVAL);
507 }
508 tab = nla_data(tb[TCA_STAB_DATA]);
509 tsize = nla_len(tb[TCA_STAB_DATA]) / sizeof(u16);
510 }
511
512 if (tsize != s->tsize || (!tab && tsize > 0)) {
513 NL_SET_ERR_MSG(extack, "Invalid size of size table");
514 return ERR_PTR(-EINVAL);
515 }
516
517 list_for_each_entry(stab, &qdisc_stab_list, list) {
518 if (memcmp(&stab->szopts, s, sizeof(*s)))
519 continue;
520 if (tsize > 0 &&
521 memcmp(stab->data, tab, flex_array_size(stab, data, tsize)))
522 continue;
523 stab->refcnt++;
524 return stab;
525 }
526
527 if (s->size_log > STAB_SIZE_LOG_MAX ||
528 s->cell_log > STAB_SIZE_LOG_MAX) {
529 NL_SET_ERR_MSG(extack, "Invalid logarithmic size of size table");
530 return ERR_PTR(-EINVAL);
531 }
532
533 stab = kmalloc(struct_size(stab, data, tsize), GFP_KERNEL);
534 if (!stab)
535 return ERR_PTR(-ENOMEM);
536
537 stab->refcnt = 1;
538 stab->szopts = *s;
539 if (tsize > 0)
540 memcpy(stab->data, tab, flex_array_size(stab, data, tsize));
541
542 list_add_tail(&stab->list, &qdisc_stab_list);
543
544 return stab;
545 }
546
qdisc_put_stab(struct qdisc_size_table * tab)547 void qdisc_put_stab(struct qdisc_size_table *tab)
548 {
549 if (!tab)
550 return;
551
552 if (--tab->refcnt == 0) {
553 list_del(&tab->list);
554 kfree_rcu(tab, rcu);
555 }
556 }
557 EXPORT_SYMBOL(qdisc_put_stab);
558
qdisc_dump_stab(struct sk_buff * skb,struct qdisc_size_table * stab)559 static int qdisc_dump_stab(struct sk_buff *skb, struct qdisc_size_table *stab)
560 {
561 struct nlattr *nest;
562
563 nest = nla_nest_start_noflag(skb, TCA_STAB);
564 if (nest == NULL)
565 goto nla_put_failure;
566 if (nla_put(skb, TCA_STAB_BASE, sizeof(stab->szopts), &stab->szopts))
567 goto nla_put_failure;
568 nla_nest_end(skb, nest);
569
570 return skb->len;
571
572 nla_put_failure:
573 return -1;
574 }
575
__qdisc_calculate_pkt_len(struct sk_buff * skb,const struct qdisc_size_table * stab)576 void __qdisc_calculate_pkt_len(struct sk_buff *skb,
577 const struct qdisc_size_table *stab)
578 {
579 int pkt_len, slot;
580
581 pkt_len = skb->len + stab->szopts.overhead;
582 if (unlikely(!stab->szopts.tsize))
583 goto out;
584
585 slot = pkt_len + stab->szopts.cell_align;
586 if (unlikely(slot < 0))
587 slot = 0;
588
589 slot >>= stab->szopts.cell_log;
590 if (likely(slot < stab->szopts.tsize))
591 pkt_len = stab->data[slot];
592 else
593 pkt_len = stab->data[stab->szopts.tsize - 1] *
594 (slot / stab->szopts.tsize) +
595 stab->data[slot % stab->szopts.tsize];
596
597 pkt_len <<= stab->szopts.size_log;
598 out:
599 if (unlikely(pkt_len < 1))
600 pkt_len = 1;
601 qdisc_skb_cb(skb)->pkt_len = pkt_len;
602 }
603
qdisc_watchdog(struct hrtimer * timer)604 static enum hrtimer_restart qdisc_watchdog(struct hrtimer *timer)
605 {
606 struct qdisc_watchdog *wd = container_of(timer, struct qdisc_watchdog,
607 timer);
608
609 rcu_read_lock();
610 __netif_schedule(qdisc_root(wd->qdisc));
611 rcu_read_unlock();
612
613 return HRTIMER_NORESTART;
614 }
615
qdisc_watchdog_init_clockid(struct qdisc_watchdog * wd,struct Qdisc * qdisc,clockid_t clockid)616 void qdisc_watchdog_init_clockid(struct qdisc_watchdog *wd, struct Qdisc *qdisc,
617 clockid_t clockid)
618 {
619 hrtimer_setup(&wd->timer, qdisc_watchdog, clockid, HRTIMER_MODE_ABS_PINNED);
620 wd->qdisc = qdisc;
621 }
622 EXPORT_SYMBOL(qdisc_watchdog_init_clockid);
623
qdisc_watchdog_init(struct qdisc_watchdog * wd,struct Qdisc * qdisc)624 void qdisc_watchdog_init(struct qdisc_watchdog *wd, struct Qdisc *qdisc)
625 {
626 qdisc_watchdog_init_clockid(wd, qdisc, CLOCK_MONOTONIC);
627 }
628 EXPORT_SYMBOL(qdisc_watchdog_init);
629
qdisc_watchdog_schedule_range_ns(struct qdisc_watchdog * wd,u64 expires,u64 delta_ns)630 void qdisc_watchdog_schedule_range_ns(struct qdisc_watchdog *wd, u64 expires,
631 u64 delta_ns)
632 {
633 bool deactivated;
634
635 rcu_read_lock();
636 deactivated = test_bit(__QDISC_STATE_DEACTIVATED,
637 &qdisc_root_sleeping(wd->qdisc)->state);
638 rcu_read_unlock();
639 if (deactivated)
640 return;
641
642 if (hrtimer_is_queued(&wd->timer)) {
643 u64 softexpires;
644
645 softexpires = ktime_to_ns(hrtimer_get_softexpires(&wd->timer));
646 /* If timer is already set in [expires, expires + delta_ns],
647 * do not reprogram it.
648 */
649 if (softexpires - expires <= delta_ns)
650 return;
651 }
652
653 hrtimer_start_range_ns(&wd->timer,
654 ns_to_ktime(expires),
655 delta_ns,
656 HRTIMER_MODE_ABS_PINNED);
657 }
658 EXPORT_SYMBOL(qdisc_watchdog_schedule_range_ns);
659
qdisc_watchdog_cancel(struct qdisc_watchdog * wd)660 void qdisc_watchdog_cancel(struct qdisc_watchdog *wd)
661 {
662 hrtimer_cancel(&wd->timer);
663 }
664 EXPORT_SYMBOL(qdisc_watchdog_cancel);
665
qdisc_class_hash_alloc(unsigned int n)666 static struct hlist_head *qdisc_class_hash_alloc(unsigned int n)
667 {
668 struct hlist_head *h;
669 unsigned int i;
670
671 h = kvmalloc_array(n, sizeof(struct hlist_head), GFP_KERNEL);
672
673 if (h != NULL) {
674 for (i = 0; i < n; i++)
675 INIT_HLIST_HEAD(&h[i]);
676 }
677 return h;
678 }
679
qdisc_class_hash_grow(struct Qdisc * sch,struct Qdisc_class_hash * clhash)680 void qdisc_class_hash_grow(struct Qdisc *sch, struct Qdisc_class_hash *clhash)
681 {
682 struct Qdisc_class_common *cl;
683 struct hlist_node *next;
684 struct hlist_head *nhash, *ohash;
685 unsigned int nsize, nmask, osize;
686 unsigned int i, h;
687
688 /* Rehash when load factor exceeds 0.75 */
689 if (clhash->hashelems * 4 <= clhash->hashsize * 3)
690 return;
691 nsize = clhash->hashsize * 2;
692 nmask = nsize - 1;
693 nhash = qdisc_class_hash_alloc(nsize);
694 if (nhash == NULL)
695 return;
696
697 ohash = clhash->hash;
698 osize = clhash->hashsize;
699
700 sch_tree_lock(sch);
701 for (i = 0; i < osize; i++) {
702 hlist_for_each_entry_safe(cl, next, &ohash[i], hnode) {
703 h = qdisc_class_hash(cl->classid, nmask);
704 hlist_add_head(&cl->hnode, &nhash[h]);
705 }
706 }
707 clhash->hash = nhash;
708 clhash->hashsize = nsize;
709 clhash->hashmask = nmask;
710 sch_tree_unlock(sch);
711
712 kvfree(ohash);
713 }
714 EXPORT_SYMBOL(qdisc_class_hash_grow);
715
qdisc_class_hash_init(struct Qdisc_class_hash * clhash)716 int qdisc_class_hash_init(struct Qdisc_class_hash *clhash)
717 {
718 unsigned int size = 4;
719
720 clhash->hash = qdisc_class_hash_alloc(size);
721 if (!clhash->hash)
722 return -ENOMEM;
723 clhash->hashsize = size;
724 clhash->hashmask = size - 1;
725 clhash->hashelems = 0;
726 return 0;
727 }
728 EXPORT_SYMBOL(qdisc_class_hash_init);
729
qdisc_class_hash_destroy(struct Qdisc_class_hash * clhash)730 void qdisc_class_hash_destroy(struct Qdisc_class_hash *clhash)
731 {
732 kvfree(clhash->hash);
733 }
734 EXPORT_SYMBOL(qdisc_class_hash_destroy);
735
qdisc_class_hash_insert(struct Qdisc_class_hash * clhash,struct Qdisc_class_common * cl)736 void qdisc_class_hash_insert(struct Qdisc_class_hash *clhash,
737 struct Qdisc_class_common *cl)
738 {
739 unsigned int h;
740
741 INIT_HLIST_NODE(&cl->hnode);
742 h = qdisc_class_hash(cl->classid, clhash->hashmask);
743 hlist_add_head(&cl->hnode, &clhash->hash[h]);
744 clhash->hashelems++;
745 }
746 EXPORT_SYMBOL(qdisc_class_hash_insert);
747
qdisc_class_hash_remove(struct Qdisc_class_hash * clhash,struct Qdisc_class_common * cl)748 void qdisc_class_hash_remove(struct Qdisc_class_hash *clhash,
749 struct Qdisc_class_common *cl)
750 {
751 hlist_del(&cl->hnode);
752 clhash->hashelems--;
753 }
754 EXPORT_SYMBOL(qdisc_class_hash_remove);
755
756 /* Allocate an unique handle from space managed by kernel
757 * Possible range is [8000-FFFF]:0000 (0x8000 values)
758 */
qdisc_alloc_handle(struct net_device * dev)759 static u32 qdisc_alloc_handle(struct net_device *dev)
760 {
761 int i = 0x8000;
762 static u32 autohandle = TC_H_MAKE(0x80000000U, 0);
763
764 do {
765 autohandle += TC_H_MAKE(0x10000U, 0);
766 if (autohandle == TC_H_MAKE(TC_H_ROOT, 0))
767 autohandle = TC_H_MAKE(0x80000000U, 0);
768 if (!qdisc_lookup(dev, autohandle))
769 return autohandle;
770 cond_resched();
771 } while (--i > 0);
772
773 return 0;
774 }
775
qdisc_tree_reduce_backlog(struct Qdisc * sch,int n,int len)776 void qdisc_tree_reduce_backlog(struct Qdisc *sch, int n, int len)
777 {
778 const struct Qdisc_class_ops *cops;
779 unsigned long cl;
780 u32 parentid;
781 bool notify;
782 int drops;
783
784 drops = max_t(int, n, 0);
785 rcu_read_lock();
786 while ((parentid = sch->parent)) {
787 if (parentid == TC_H_ROOT)
788 break;
789
790 if (sch->flags & TCQ_F_NOPARENT)
791 break;
792 /* Notify parent qdisc only if child qdisc becomes empty. */
793 notify = !sch->q.qlen;
794 /* TODO: perform the search on a per txq basis */
795 sch = qdisc_lookup_rcu(qdisc_dev(sch), TC_H_MAJ(parentid));
796 if (sch == NULL) {
797 WARN_ON_ONCE(parentid != TC_H_ROOT);
798 break;
799 }
800 cops = sch->ops->cl_ops;
801 if (notify && cops->qlen_notify) {
802 /* Note that qlen_notify must be idempotent as it may get called
803 * multiple times.
804 */
805 cl = cops->find(sch, parentid);
806 cops->qlen_notify(sch, cl);
807 }
808 sch->q.qlen -= n;
809 sch->qstats.backlog -= len;
810 __qdisc_qstats_drop(sch, drops);
811 }
812 rcu_read_unlock();
813 }
814 EXPORT_SYMBOL(qdisc_tree_reduce_backlog);
815
qdisc_offload_dump_helper(struct Qdisc * sch,enum tc_setup_type type,void * type_data)816 int qdisc_offload_dump_helper(struct Qdisc *sch, enum tc_setup_type type,
817 void *type_data)
818 {
819 struct net_device *dev = qdisc_dev(sch);
820 int err;
821
822 sch->flags &= ~TCQ_F_OFFLOADED;
823 if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
824 return 0;
825
826 err = dev->netdev_ops->ndo_setup_tc(dev, type, type_data);
827 if (err == -EOPNOTSUPP)
828 return 0;
829
830 if (!err)
831 sch->flags |= TCQ_F_OFFLOADED;
832
833 return err;
834 }
835 EXPORT_SYMBOL(qdisc_offload_dump_helper);
836
qdisc_offload_graft_helper(struct net_device * dev,struct Qdisc * sch,struct Qdisc * new,struct Qdisc * old,enum tc_setup_type type,void * type_data,struct netlink_ext_ack * extack)837 void qdisc_offload_graft_helper(struct net_device *dev, struct Qdisc *sch,
838 struct Qdisc *new, struct Qdisc *old,
839 enum tc_setup_type type, void *type_data,
840 struct netlink_ext_ack *extack)
841 {
842 bool any_qdisc_is_offloaded;
843 int err;
844
845 if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
846 return;
847
848 err = dev->netdev_ops->ndo_setup_tc(dev, type, type_data);
849
850 /* Don't report error if the graft is part of destroy operation. */
851 if (!err || !new || new == &noop_qdisc)
852 return;
853
854 /* Don't report error if the parent, the old child and the new
855 * one are not offloaded.
856 */
857 any_qdisc_is_offloaded = new->flags & TCQ_F_OFFLOADED;
858 any_qdisc_is_offloaded |= sch && sch->flags & TCQ_F_OFFLOADED;
859 any_qdisc_is_offloaded |= old && old->flags & TCQ_F_OFFLOADED;
860
861 if (any_qdisc_is_offloaded)
862 NL_SET_ERR_MSG(extack, "Offloading graft operation failed.");
863 }
864 EXPORT_SYMBOL(qdisc_offload_graft_helper);
865
qdisc_offload_query_caps(struct net_device * dev,enum tc_setup_type type,void * caps,size_t caps_len)866 void qdisc_offload_query_caps(struct net_device *dev,
867 enum tc_setup_type type,
868 void *caps, size_t caps_len)
869 {
870 const struct net_device_ops *ops = dev->netdev_ops;
871 struct tc_query_caps_base base = {
872 .type = type,
873 .caps = caps,
874 };
875
876 memset(caps, 0, caps_len);
877
878 if (ops->ndo_setup_tc)
879 ops->ndo_setup_tc(dev, TC_QUERY_CAPS, &base);
880 }
881 EXPORT_SYMBOL(qdisc_offload_query_caps);
882
qdisc_offload_graft_root(struct net_device * dev,struct Qdisc * new,struct Qdisc * old,struct netlink_ext_ack * extack)883 static void qdisc_offload_graft_root(struct net_device *dev,
884 struct Qdisc *new, struct Qdisc *old,
885 struct netlink_ext_ack *extack)
886 {
887 struct tc_root_qopt_offload graft_offload = {
888 .command = TC_ROOT_GRAFT,
889 .handle = new ? new->handle : 0,
890 .ingress = (new && new->flags & TCQ_F_INGRESS) ||
891 (old && old->flags & TCQ_F_INGRESS),
892 };
893
894 qdisc_offload_graft_helper(dev, NULL, new, old,
895 TC_SETUP_ROOT_QDISC, &graft_offload, extack);
896 }
897
tc_fill_qdisc(struct sk_buff * skb,struct Qdisc * q,u32 clid,u32 portid,u32 seq,u16 flags,int event,struct netlink_ext_ack * extack)898 static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid,
899 u32 portid, u32 seq, u16 flags, int event,
900 struct netlink_ext_ack *extack)
901 {
902 struct gnet_stats_basic_sync __percpu *cpu_bstats = NULL;
903 struct gnet_stats_queue __percpu *cpu_qstats = NULL;
904 struct tcmsg *tcm;
905 struct nlmsghdr *nlh;
906 unsigned char *b = skb_tail_pointer(skb);
907 struct gnet_dump d;
908 struct qdisc_size_table *stab;
909 u32 block_index;
910 __u32 qlen;
911
912 cond_resched();
913 nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags);
914 if (!nlh)
915 goto out_nlmsg_trim;
916 tcm = nlmsg_data(nlh);
917 tcm->tcm_family = AF_UNSPEC;
918 tcm->tcm__pad1 = 0;
919 tcm->tcm__pad2 = 0;
920 tcm->tcm_ifindex = qdisc_dev(q)->ifindex;
921 tcm->tcm_parent = clid;
922 tcm->tcm_handle = q->handle;
923 tcm->tcm_info = refcount_read(&q->refcnt);
924 if (nla_put_string(skb, TCA_KIND, q->ops->id))
925 goto nla_put_failure;
926 if (q->ops->ingress_block_get) {
927 block_index = q->ops->ingress_block_get(q);
928 if (block_index &&
929 nla_put_u32(skb, TCA_INGRESS_BLOCK, block_index))
930 goto nla_put_failure;
931 }
932 if (q->ops->egress_block_get) {
933 block_index = q->ops->egress_block_get(q);
934 if (block_index &&
935 nla_put_u32(skb, TCA_EGRESS_BLOCK, block_index))
936 goto nla_put_failure;
937 }
938 if (q->ops->dump && q->ops->dump(q, skb) < 0)
939 goto nla_put_failure;
940 if (nla_put_u8(skb, TCA_HW_OFFLOAD, !!(q->flags & TCQ_F_OFFLOADED)))
941 goto nla_put_failure;
942 qlen = qdisc_qlen_sum(q);
943
944 stab = rtnl_dereference(q->stab);
945 if (stab && qdisc_dump_stab(skb, stab) < 0)
946 goto nla_put_failure;
947
948 if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, TCA_XSTATS,
949 NULL, &d, TCA_PAD) < 0)
950 goto nla_put_failure;
951
952 if (q->ops->dump_stats && q->ops->dump_stats(q, &d) < 0)
953 goto nla_put_failure;
954
955 if (qdisc_is_percpu_stats(q)) {
956 cpu_bstats = q->cpu_bstats;
957 cpu_qstats = q->cpu_qstats;
958 }
959
960 if (gnet_stats_copy_basic(&d, cpu_bstats, &q->bstats, true) < 0 ||
961 gnet_stats_copy_rate_est(&d, &q->rate_est) < 0 ||
962 gnet_stats_copy_queue(&d, cpu_qstats, &q->qstats, qlen) < 0)
963 goto nla_put_failure;
964
965 if (gnet_stats_finish_copy(&d) < 0)
966 goto nla_put_failure;
967
968 if (extack && extack->_msg &&
969 nla_put_string(skb, TCA_EXT_WARN_MSG, extack->_msg))
970 goto out_nlmsg_trim;
971
972 nlh->nlmsg_len = skb_tail_pointer(skb) - b;
973
974 return skb->len;
975
976 out_nlmsg_trim:
977 nla_put_failure:
978 nlmsg_trim(skb, b);
979 return -1;
980 }
981
tc_qdisc_dump_ignore(struct Qdisc * q,bool dump_invisible)982 static bool tc_qdisc_dump_ignore(struct Qdisc *q, bool dump_invisible)
983 {
984 if (q->flags & TCQ_F_BUILTIN)
985 return true;
986 if ((q->flags & TCQ_F_INVISIBLE) && !dump_invisible)
987 return true;
988
989 return false;
990 }
991
qdisc_get_notify(struct net * net,struct sk_buff * oskb,struct nlmsghdr * n,u32 clid,struct Qdisc * q,struct netlink_ext_ack * extack)992 static int qdisc_get_notify(struct net *net, struct sk_buff *oskb,
993 struct nlmsghdr *n, u32 clid, struct Qdisc *q,
994 struct netlink_ext_ack *extack)
995 {
996 struct sk_buff *skb;
997 u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
998
999 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1000 if (!skb)
1001 return -ENOBUFS;
1002
1003 if (!tc_qdisc_dump_ignore(q, false)) {
1004 if (tc_fill_qdisc(skb, q, clid, portid, n->nlmsg_seq, 0,
1005 RTM_NEWQDISC, extack) < 0)
1006 goto err_out;
1007 }
1008
1009 if (skb->len)
1010 return rtnetlink_send(skb, net, portid, RTNLGRP_TC,
1011 n->nlmsg_flags & NLM_F_ECHO);
1012
1013 err_out:
1014 kfree_skb(skb);
1015 return -EINVAL;
1016 }
1017
qdisc_notify(struct net * net,struct sk_buff * oskb,struct nlmsghdr * n,u32 clid,struct Qdisc * old,struct Qdisc * new,struct netlink_ext_ack * extack)1018 static int qdisc_notify(struct net *net, struct sk_buff *oskb,
1019 struct nlmsghdr *n, u32 clid,
1020 struct Qdisc *old, struct Qdisc *new,
1021 struct netlink_ext_ack *extack)
1022 {
1023 struct sk_buff *skb;
1024 u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
1025
1026 if (!rtnl_notify_needed(net, n->nlmsg_flags, RTNLGRP_TC))
1027 return 0;
1028
1029 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1030 if (!skb)
1031 return -ENOBUFS;
1032
1033 if (old && !tc_qdisc_dump_ignore(old, false)) {
1034 if (tc_fill_qdisc(skb, old, clid, portid, n->nlmsg_seq,
1035 0, RTM_DELQDISC, extack) < 0)
1036 goto err_out;
1037 }
1038 if (new && !tc_qdisc_dump_ignore(new, false)) {
1039 if (tc_fill_qdisc(skb, new, clid, portid, n->nlmsg_seq,
1040 old ? NLM_F_REPLACE : 0, RTM_NEWQDISC, extack) < 0)
1041 goto err_out;
1042 }
1043
1044 if (skb->len)
1045 return rtnetlink_send(skb, net, portid, RTNLGRP_TC,
1046 n->nlmsg_flags & NLM_F_ECHO);
1047
1048 err_out:
1049 kfree_skb(skb);
1050 return -EINVAL;
1051 }
1052
notify_and_destroy(struct net * net,struct sk_buff * skb,struct nlmsghdr * n,u32 clid,struct Qdisc * old,struct Qdisc * new,struct netlink_ext_ack * extack)1053 static void notify_and_destroy(struct net *net, struct sk_buff *skb,
1054 struct nlmsghdr *n, u32 clid,
1055 struct Qdisc *old, struct Qdisc *new,
1056 struct netlink_ext_ack *extack)
1057 {
1058 if (new || old)
1059 qdisc_notify(net, skb, n, clid, old, new, extack);
1060
1061 if (old)
1062 qdisc_put(old);
1063 }
1064
qdisc_clear_nolock(struct Qdisc * sch)1065 static void qdisc_clear_nolock(struct Qdisc *sch)
1066 {
1067 sch->flags &= ~TCQ_F_NOLOCK;
1068 if (!(sch->flags & TCQ_F_CPUSTATS))
1069 return;
1070
1071 free_percpu(sch->cpu_bstats);
1072 free_percpu(sch->cpu_qstats);
1073 sch->cpu_bstats = NULL;
1074 sch->cpu_qstats = NULL;
1075 sch->flags &= ~TCQ_F_CPUSTATS;
1076 }
1077
1078 /* Graft qdisc "new" to class "classid" of qdisc "parent" or
1079 * to device "dev".
1080 *
1081 * When appropriate send a netlink notification using 'skb'
1082 * and "n".
1083 *
1084 * On success, destroy old qdisc.
1085 */
1086
qdisc_graft(struct net_device * dev,struct Qdisc * parent,struct sk_buff * skb,struct nlmsghdr * n,u32 classid,struct Qdisc * new,struct Qdisc * old,struct netlink_ext_ack * extack)1087 static int qdisc_graft(struct net_device *dev, struct Qdisc *parent,
1088 struct sk_buff *skb, struct nlmsghdr *n, u32 classid,
1089 struct Qdisc *new, struct Qdisc *old,
1090 struct netlink_ext_ack *extack)
1091 {
1092 struct Qdisc *q = old;
1093 struct net *net = dev_net(dev);
1094
1095 if (parent == NULL) {
1096 unsigned int i, num_q, ingress;
1097 struct netdev_queue *dev_queue;
1098
1099 ingress = 0;
1100 num_q = dev->num_tx_queues;
1101 if ((q && q->flags & TCQ_F_INGRESS) ||
1102 (new && new->flags & TCQ_F_INGRESS)) {
1103 ingress = 1;
1104 dev_queue = dev_ingress_queue(dev);
1105 if (!dev_queue) {
1106 NL_SET_ERR_MSG(extack, "Device does not have an ingress queue");
1107 return -ENOENT;
1108 }
1109
1110 q = rtnl_dereference(dev_queue->qdisc_sleeping);
1111
1112 /* This is the counterpart of that qdisc_refcount_inc_nz() call in
1113 * __tcf_qdisc_find() for filter requests.
1114 */
1115 if (!qdisc_refcount_dec_if_one(q)) {
1116 NL_SET_ERR_MSG(extack,
1117 "Current ingress or clsact Qdisc has ongoing filter requests");
1118 return -EBUSY;
1119 }
1120 }
1121
1122 if (dev->flags & IFF_UP)
1123 dev_deactivate(dev);
1124
1125 qdisc_offload_graft_root(dev, new, old, extack);
1126
1127 if (new && new->ops->attach && !ingress)
1128 goto skip;
1129
1130 if (!ingress) {
1131 for (i = 0; i < num_q; i++) {
1132 dev_queue = netdev_get_tx_queue(dev, i);
1133 old = dev_graft_qdisc(dev_queue, new);
1134
1135 if (new && i > 0)
1136 qdisc_refcount_inc(new);
1137 qdisc_put(old);
1138 }
1139 } else {
1140 old = dev_graft_qdisc(dev_queue, NULL);
1141
1142 /* {ingress,clsact}_destroy() @old before grafting @new to avoid
1143 * unprotected concurrent accesses to net_device::miniq_{in,e}gress
1144 * pointer(s) in mini_qdisc_pair_swap().
1145 */
1146 qdisc_notify(net, skb, n, classid, old, new, extack);
1147 qdisc_destroy(old);
1148
1149 dev_graft_qdisc(dev_queue, new);
1150 }
1151
1152 skip:
1153 if (!ingress) {
1154 old = rtnl_dereference(dev->qdisc);
1155 if (new && !new->ops->attach)
1156 qdisc_refcount_inc(new);
1157 rcu_assign_pointer(dev->qdisc, new ? : &noop_qdisc);
1158
1159 notify_and_destroy(net, skb, n, classid, old, new, extack);
1160
1161 if (new && new->ops->attach)
1162 new->ops->attach(new);
1163 }
1164
1165 if (dev->flags & IFF_UP)
1166 dev_activate(dev);
1167 } else {
1168 const struct Qdisc_class_ops *cops = parent->ops->cl_ops;
1169 unsigned long cl;
1170 int err;
1171
1172 /* Only support running class lockless if parent is lockless */
1173 if (new && (new->flags & TCQ_F_NOLOCK) && !(parent->flags & TCQ_F_NOLOCK))
1174 qdisc_clear_nolock(new);
1175
1176 if (!cops || !cops->graft)
1177 return -EOPNOTSUPP;
1178
1179 cl = cops->find(parent, classid);
1180 if (!cl) {
1181 NL_SET_ERR_MSG(extack, "Specified class not found");
1182 return -ENOENT;
1183 }
1184
1185 if (new && new->ops == &noqueue_qdisc_ops) {
1186 NL_SET_ERR_MSG(extack, "Cannot assign noqueue to a class");
1187 return -EINVAL;
1188 }
1189
1190 if (new &&
1191 !(parent->flags & TCQ_F_MQROOT) &&
1192 rcu_access_pointer(new->stab)) {
1193 NL_SET_ERR_MSG(extack, "STAB not supported on a non root");
1194 return -EINVAL;
1195 }
1196 err = cops->graft(parent, cl, new, &old, extack);
1197 if (err)
1198 return err;
1199 notify_and_destroy(net, skb, n, classid, old, new, extack);
1200 }
1201 return 0;
1202 }
1203
qdisc_block_indexes_set(struct Qdisc * sch,struct nlattr ** tca,struct netlink_ext_ack * extack)1204 static int qdisc_block_indexes_set(struct Qdisc *sch, struct nlattr **tca,
1205 struct netlink_ext_ack *extack)
1206 {
1207 u32 block_index;
1208
1209 if (tca[TCA_INGRESS_BLOCK]) {
1210 block_index = nla_get_u32(tca[TCA_INGRESS_BLOCK]);
1211
1212 if (!block_index) {
1213 NL_SET_ERR_MSG(extack, "Ingress block index cannot be 0");
1214 return -EINVAL;
1215 }
1216 if (!sch->ops->ingress_block_set) {
1217 NL_SET_ERR_MSG(extack, "Ingress block sharing is not supported");
1218 return -EOPNOTSUPP;
1219 }
1220 sch->ops->ingress_block_set(sch, block_index);
1221 }
1222 if (tca[TCA_EGRESS_BLOCK]) {
1223 block_index = nla_get_u32(tca[TCA_EGRESS_BLOCK]);
1224
1225 if (!block_index) {
1226 NL_SET_ERR_MSG(extack, "Egress block index cannot be 0");
1227 return -EINVAL;
1228 }
1229 if (!sch->ops->egress_block_set) {
1230 NL_SET_ERR_MSG(extack, "Egress block sharing is not supported");
1231 return -EOPNOTSUPP;
1232 }
1233 sch->ops->egress_block_set(sch, block_index);
1234 }
1235 return 0;
1236 }
1237
1238 /*
1239 Allocate and initialize new qdisc.
1240
1241 Parameters are passed via opt.
1242 */
1243
qdisc_create(struct net_device * dev,struct netdev_queue * dev_queue,u32 parent,u32 handle,struct nlattr ** tca,int * errp,struct netlink_ext_ack * extack)1244 static struct Qdisc *qdisc_create(struct net_device *dev,
1245 struct netdev_queue *dev_queue,
1246 u32 parent, u32 handle,
1247 struct nlattr **tca, int *errp,
1248 struct netlink_ext_ack *extack)
1249 {
1250 int err;
1251 struct nlattr *kind = tca[TCA_KIND];
1252 struct Qdisc *sch;
1253 struct Qdisc_ops *ops;
1254 struct qdisc_size_table *stab;
1255
1256 ops = qdisc_lookup_ops(kind);
1257 if (!ops) {
1258 err = -ENOENT;
1259 NL_SET_ERR_MSG(extack, "Specified qdisc kind is unknown");
1260 goto err_out;
1261 }
1262
1263 sch = qdisc_alloc(dev_queue, ops, extack);
1264 if (IS_ERR(sch)) {
1265 err = PTR_ERR(sch);
1266 goto err_out2;
1267 }
1268
1269 sch->parent = parent;
1270
1271 if (handle == TC_H_INGRESS) {
1272 if (!(sch->flags & TCQ_F_INGRESS)) {
1273 NL_SET_ERR_MSG(extack,
1274 "Specified parent ID is reserved for ingress and clsact Qdiscs");
1275 err = -EINVAL;
1276 goto err_out3;
1277 }
1278 handle = TC_H_MAKE(TC_H_INGRESS, 0);
1279 } else {
1280 if (handle == 0) {
1281 handle = qdisc_alloc_handle(dev);
1282 if (handle == 0) {
1283 NL_SET_ERR_MSG(extack, "Maximum number of qdisc handles was exceeded");
1284 err = -ENOSPC;
1285 goto err_out3;
1286 }
1287 }
1288 if (!netif_is_multiqueue(dev))
1289 sch->flags |= TCQ_F_ONETXQUEUE;
1290 }
1291
1292 sch->handle = handle;
1293
1294 /* This exist to keep backward compatible with a userspace
1295 * loophole, what allowed userspace to get IFF_NO_QUEUE
1296 * facility on older kernels by setting tx_queue_len=0 (prior
1297 * to qdisc init), and then forgot to reinit tx_queue_len
1298 * before again attaching a qdisc.
1299 */
1300 if ((dev->priv_flags & IFF_NO_QUEUE) && (dev->tx_queue_len == 0)) {
1301 WRITE_ONCE(dev->tx_queue_len, DEFAULT_TX_QUEUE_LEN);
1302 netdev_info(dev, "Caught tx_queue_len zero misconfig\n");
1303 }
1304
1305 err = qdisc_block_indexes_set(sch, tca, extack);
1306 if (err)
1307 goto err_out3;
1308
1309 if (tca[TCA_STAB]) {
1310 stab = qdisc_get_stab(tca[TCA_STAB], extack);
1311 if (IS_ERR(stab)) {
1312 err = PTR_ERR(stab);
1313 goto err_out3;
1314 }
1315 rcu_assign_pointer(sch->stab, stab);
1316 }
1317
1318 if (ops->init) {
1319 err = ops->init(sch, tca[TCA_OPTIONS], extack);
1320 if (err != 0)
1321 goto err_out4;
1322 }
1323
1324 if (tca[TCA_RATE]) {
1325 err = -EOPNOTSUPP;
1326 if (sch->flags & TCQ_F_MQROOT) {
1327 NL_SET_ERR_MSG(extack, "Cannot attach rate estimator to a multi-queue root qdisc");
1328 goto err_out4;
1329 }
1330
1331 err = gen_new_estimator(&sch->bstats,
1332 sch->cpu_bstats,
1333 &sch->rate_est,
1334 NULL,
1335 true,
1336 tca[TCA_RATE]);
1337 if (err) {
1338 NL_SET_ERR_MSG(extack, "Failed to generate new estimator");
1339 goto err_out4;
1340 }
1341 }
1342
1343 qdisc_hash_add(sch, false);
1344 trace_qdisc_create(ops, dev, parent);
1345
1346 return sch;
1347
1348 err_out4:
1349 /* Even if ops->init() failed, we call ops->destroy()
1350 * like qdisc_create_dflt().
1351 */
1352 if (ops->destroy)
1353 ops->destroy(sch);
1354 qdisc_put_stab(rtnl_dereference(sch->stab));
1355 err_out3:
1356 lockdep_unregister_key(&sch->root_lock_key);
1357 netdev_put(dev, &sch->dev_tracker);
1358 qdisc_free(sch);
1359 err_out2:
1360 bpf_module_put(ops, ops->owner);
1361 err_out:
1362 *errp = err;
1363 return NULL;
1364 }
1365
qdisc_change(struct Qdisc * sch,struct nlattr ** tca,struct netlink_ext_ack * extack)1366 static int qdisc_change(struct Qdisc *sch, struct nlattr **tca,
1367 struct netlink_ext_ack *extack)
1368 {
1369 struct qdisc_size_table *ostab, *stab = NULL;
1370 int err = 0;
1371
1372 if (tca[TCA_OPTIONS]) {
1373 if (!sch->ops->change) {
1374 NL_SET_ERR_MSG(extack, "Change operation not supported by specified qdisc");
1375 return -EINVAL;
1376 }
1377 if (tca[TCA_INGRESS_BLOCK] || tca[TCA_EGRESS_BLOCK]) {
1378 NL_SET_ERR_MSG(extack, "Change of blocks is not supported");
1379 return -EOPNOTSUPP;
1380 }
1381 err = sch->ops->change(sch, tca[TCA_OPTIONS], extack);
1382 if (err)
1383 return err;
1384 }
1385
1386 if (tca[TCA_STAB]) {
1387 stab = qdisc_get_stab(tca[TCA_STAB], extack);
1388 if (IS_ERR(stab))
1389 return PTR_ERR(stab);
1390 }
1391
1392 ostab = rtnl_dereference(sch->stab);
1393 rcu_assign_pointer(sch->stab, stab);
1394 qdisc_put_stab(ostab);
1395
1396 if (tca[TCA_RATE]) {
1397 /* NB: ignores errors from replace_estimator
1398 because change can't be undone. */
1399 if (sch->flags & TCQ_F_MQROOT)
1400 goto out;
1401 gen_replace_estimator(&sch->bstats,
1402 sch->cpu_bstats,
1403 &sch->rate_est,
1404 NULL,
1405 true,
1406 tca[TCA_RATE]);
1407 }
1408 out:
1409 return 0;
1410 }
1411
1412 struct check_loop_arg {
1413 struct qdisc_walker w;
1414 struct Qdisc *p;
1415 int depth;
1416 };
1417
1418 static int check_loop_fn(struct Qdisc *q, unsigned long cl,
1419 struct qdisc_walker *w);
1420
check_loop(struct Qdisc * q,struct Qdisc * p,int depth)1421 static int check_loop(struct Qdisc *q, struct Qdisc *p, int depth)
1422 {
1423 struct check_loop_arg arg;
1424
1425 if (q->ops->cl_ops == NULL)
1426 return 0;
1427
1428 arg.w.stop = arg.w.skip = arg.w.count = 0;
1429 arg.w.fn = check_loop_fn;
1430 arg.depth = depth;
1431 arg.p = p;
1432 q->ops->cl_ops->walk(q, &arg.w);
1433 return arg.w.stop ? -ELOOP : 0;
1434 }
1435
1436 static int
check_loop_fn(struct Qdisc * q,unsigned long cl,struct qdisc_walker * w)1437 check_loop_fn(struct Qdisc *q, unsigned long cl, struct qdisc_walker *w)
1438 {
1439 struct Qdisc *leaf;
1440 const struct Qdisc_class_ops *cops = q->ops->cl_ops;
1441 struct check_loop_arg *arg = (struct check_loop_arg *)w;
1442
1443 leaf = cops->leaf(q, cl);
1444 if (leaf) {
1445 if (leaf == arg->p || arg->depth > 7)
1446 return -ELOOP;
1447 return check_loop(leaf, arg->p, arg->depth + 1);
1448 }
1449 return 0;
1450 }
1451
1452 const struct nla_policy rtm_tca_policy[TCA_MAX + 1] = {
1453 [TCA_KIND] = { .type = NLA_STRING },
1454 [TCA_RATE] = { .type = NLA_BINARY,
1455 .len = sizeof(struct tc_estimator) },
1456 [TCA_STAB] = { .type = NLA_NESTED },
1457 [TCA_DUMP_INVISIBLE] = { .type = NLA_FLAG },
1458 [TCA_CHAIN] = { .type = NLA_U32 },
1459 [TCA_INGRESS_BLOCK] = { .type = NLA_U32 },
1460 [TCA_EGRESS_BLOCK] = { .type = NLA_U32 },
1461 };
1462
1463 /*
1464 * Delete/get qdisc.
1465 */
1466
__tc_get_qdisc(struct sk_buff * skb,struct nlmsghdr * n,struct netlink_ext_ack * extack,struct net_device * dev,struct nlattr * tca[TCA_MAX+1],struct tcmsg * tcm)1467 static int __tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n,
1468 struct netlink_ext_ack *extack,
1469 struct net_device *dev,
1470 struct nlattr *tca[TCA_MAX + 1],
1471 struct tcmsg *tcm)
1472 {
1473 struct net *net = sock_net(skb->sk);
1474 struct Qdisc *q = NULL;
1475 struct Qdisc *p = NULL;
1476 u32 clid;
1477 int err;
1478
1479 clid = tcm->tcm_parent;
1480 if (clid) {
1481 if (clid != TC_H_ROOT) {
1482 if (TC_H_MAJ(clid) != TC_H_MAJ(TC_H_INGRESS)) {
1483 p = qdisc_lookup(dev, TC_H_MAJ(clid));
1484 if (!p) {
1485 NL_SET_ERR_MSG(extack, "Failed to find qdisc with specified classid");
1486 return -ENOENT;
1487 }
1488 q = qdisc_leaf(p, clid, extack);
1489 } else if (dev_ingress_queue(dev)) {
1490 q = rtnl_dereference(dev_ingress_queue(dev)->qdisc_sleeping);
1491 }
1492 } else {
1493 q = rtnl_dereference(dev->qdisc);
1494 }
1495 if (!q) {
1496 NL_SET_ERR_MSG(extack, "Cannot find specified qdisc on specified device");
1497 return -ENOENT;
1498 }
1499 if (IS_ERR(q))
1500 return PTR_ERR(q);
1501
1502 if (tcm->tcm_handle && q->handle != tcm->tcm_handle) {
1503 NL_SET_ERR_MSG(extack, "Invalid handle");
1504 return -EINVAL;
1505 }
1506 } else {
1507 q = qdisc_lookup(dev, tcm->tcm_handle);
1508 if (!q) {
1509 NL_SET_ERR_MSG(extack, "Failed to find qdisc with specified handle");
1510 return -ENOENT;
1511 }
1512 }
1513
1514 if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id)) {
1515 NL_SET_ERR_MSG(extack, "Invalid qdisc name: must match existing qdisc");
1516 return -EINVAL;
1517 }
1518
1519 if (n->nlmsg_type == RTM_DELQDISC) {
1520 if (!clid) {
1521 NL_SET_ERR_MSG(extack, "Classid cannot be zero");
1522 return -EINVAL;
1523 }
1524 if (q->handle == 0) {
1525 NL_SET_ERR_MSG(extack, "Cannot delete qdisc with handle of zero");
1526 return -ENOENT;
1527 }
1528 err = qdisc_graft(dev, p, skb, n, clid, NULL, q, extack);
1529 if (err != 0)
1530 return err;
1531 } else {
1532 qdisc_get_notify(net, skb, n, clid, q, NULL);
1533 }
1534 return 0;
1535 }
1536
tc_get_qdisc(struct sk_buff * skb,struct nlmsghdr * n,struct netlink_ext_ack * extack)1537 static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n,
1538 struct netlink_ext_ack *extack)
1539 {
1540 struct net *net = sock_net(skb->sk);
1541 struct tcmsg *tcm = nlmsg_data(n);
1542 struct nlattr *tca[TCA_MAX + 1];
1543 struct net_device *dev;
1544 int err;
1545
1546 err = nlmsg_parse_deprecated(n, sizeof(*tcm), tca, TCA_MAX,
1547 rtm_tca_policy, extack);
1548 if (err < 0)
1549 return err;
1550
1551 dev = __dev_get_by_index(net, tcm->tcm_ifindex);
1552 if (!dev)
1553 return -ENODEV;
1554
1555 netdev_lock_ops(dev);
1556 err = __tc_get_qdisc(skb, n, extack, dev, tca, tcm);
1557 netdev_unlock_ops(dev);
1558
1559 return err;
1560 }
1561
req_create_or_replace(struct nlmsghdr * n)1562 static bool req_create_or_replace(struct nlmsghdr *n)
1563 {
1564 return (n->nlmsg_flags & NLM_F_CREATE &&
1565 n->nlmsg_flags & NLM_F_REPLACE);
1566 }
1567
req_create_exclusive(struct nlmsghdr * n)1568 static bool req_create_exclusive(struct nlmsghdr *n)
1569 {
1570 return (n->nlmsg_flags & NLM_F_CREATE &&
1571 n->nlmsg_flags & NLM_F_EXCL);
1572 }
1573
req_change(struct nlmsghdr * n)1574 static bool req_change(struct nlmsghdr *n)
1575 {
1576 return (!(n->nlmsg_flags & NLM_F_CREATE) &&
1577 !(n->nlmsg_flags & NLM_F_REPLACE) &&
1578 !(n->nlmsg_flags & NLM_F_EXCL));
1579 }
1580
__tc_modify_qdisc(struct sk_buff * skb,struct nlmsghdr * n,struct netlink_ext_ack * extack,struct net_device * dev,struct nlattr * tca[TCA_MAX+1],struct tcmsg * tcm)1581 static int __tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n,
1582 struct netlink_ext_ack *extack,
1583 struct net_device *dev,
1584 struct nlattr *tca[TCA_MAX + 1],
1585 struct tcmsg *tcm)
1586 {
1587 struct Qdisc *q = NULL;
1588 struct Qdisc *p = NULL;
1589 u32 clid;
1590 int err;
1591
1592 clid = tcm->tcm_parent;
1593
1594 if (clid) {
1595 if (clid != TC_H_ROOT) {
1596 if (clid != TC_H_INGRESS) {
1597 p = qdisc_lookup(dev, TC_H_MAJ(clid));
1598 if (!p) {
1599 NL_SET_ERR_MSG(extack, "Failed to find specified qdisc");
1600 return -ENOENT;
1601 }
1602 if (p->flags & TCQ_F_INGRESS) {
1603 NL_SET_ERR_MSG(extack,
1604 "Cannot add children to ingress/clsact qdisc");
1605 return -EOPNOTSUPP;
1606 }
1607 q = qdisc_leaf(p, clid, extack);
1608 if (IS_ERR(q))
1609 return PTR_ERR(q);
1610 } else if (dev_ingress_queue_create(dev)) {
1611 q = rtnl_dereference(dev_ingress_queue(dev)->qdisc_sleeping);
1612 }
1613 } else {
1614 q = rtnl_dereference(dev->qdisc);
1615 }
1616
1617 /* It may be default qdisc, ignore it */
1618 if (q && q->handle == 0)
1619 q = NULL;
1620
1621 if (!q || !tcm->tcm_handle || q->handle != tcm->tcm_handle) {
1622 if (tcm->tcm_handle) {
1623 if (q && !(n->nlmsg_flags & NLM_F_REPLACE)) {
1624 NL_SET_ERR_MSG(extack, "NLM_F_REPLACE needed to override");
1625 return -EEXIST;
1626 }
1627 if (TC_H_MIN(tcm->tcm_handle)) {
1628 NL_SET_ERR_MSG(extack, "Invalid minor handle");
1629 return -EINVAL;
1630 }
1631 q = qdisc_lookup(dev, tcm->tcm_handle);
1632 if (!q)
1633 goto create_n_graft;
1634 if (q->parent != tcm->tcm_parent) {
1635 NL_SET_ERR_MSG(extack, "Cannot move an existing qdisc to a different parent");
1636 return -EINVAL;
1637 }
1638 if (n->nlmsg_flags & NLM_F_EXCL) {
1639 NL_SET_ERR_MSG(extack, "Exclusivity flag on, cannot override");
1640 return -EEXIST;
1641 }
1642 if (tca[TCA_KIND] &&
1643 nla_strcmp(tca[TCA_KIND], q->ops->id)) {
1644 NL_SET_ERR_MSG(extack, "Invalid qdisc name: must match existing qdisc");
1645 return -EINVAL;
1646 }
1647 if (q->flags & TCQ_F_INGRESS) {
1648 NL_SET_ERR_MSG(extack,
1649 "Cannot regraft ingress or clsact Qdiscs");
1650 return -EINVAL;
1651 }
1652 if (q == p ||
1653 (p && check_loop(q, p, 0))) {
1654 NL_SET_ERR_MSG(extack, "Qdisc parent/child loop detected");
1655 return -ELOOP;
1656 }
1657 if (clid == TC_H_INGRESS) {
1658 NL_SET_ERR_MSG(extack, "Ingress cannot graft directly");
1659 return -EINVAL;
1660 }
1661 qdisc_refcount_inc(q);
1662 goto graft;
1663 } else {
1664 if (!q)
1665 goto create_n_graft;
1666
1667 /* This magic test requires explanation.
1668 *
1669 * We know, that some child q is already
1670 * attached to this parent and have choice:
1671 * 1) change it or 2) create/graft new one.
1672 * If the requested qdisc kind is different
1673 * than the existing one, then we choose graft.
1674 * If they are the same then this is "change"
1675 * operation - just let it fallthrough..
1676 *
1677 * 1. We are allowed to create/graft only
1678 * if the request is explicitly stating
1679 * "please create if it doesn't exist".
1680 *
1681 * 2. If the request is to exclusive create
1682 * then the qdisc tcm_handle is not expected
1683 * to exist, so that we choose create/graft too.
1684 *
1685 * 3. The last case is when no flags are set.
1686 * This will happen when for example tc
1687 * utility issues a "change" command.
1688 * Alas, it is sort of hole in API, we
1689 * cannot decide what to do unambiguously.
1690 * For now we select create/graft.
1691 */
1692 if (tca[TCA_KIND] &&
1693 nla_strcmp(tca[TCA_KIND], q->ops->id)) {
1694 if (req_create_or_replace(n) ||
1695 req_create_exclusive(n))
1696 goto create_n_graft;
1697 else if (req_change(n))
1698 goto create_n_graft2;
1699 }
1700 }
1701 }
1702 } else {
1703 if (!tcm->tcm_handle) {
1704 NL_SET_ERR_MSG(extack, "Handle cannot be zero");
1705 return -EINVAL;
1706 }
1707 q = qdisc_lookup(dev, tcm->tcm_handle);
1708 }
1709
1710 /* Change qdisc parameters */
1711 if (!q) {
1712 NL_SET_ERR_MSG(extack, "Specified qdisc not found");
1713 return -ENOENT;
1714 }
1715 if (n->nlmsg_flags & NLM_F_EXCL) {
1716 NL_SET_ERR_MSG(extack, "Exclusivity flag on, cannot modify");
1717 return -EEXIST;
1718 }
1719 if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id)) {
1720 NL_SET_ERR_MSG(extack, "Invalid qdisc name: must match existing qdisc");
1721 return -EINVAL;
1722 }
1723 err = qdisc_change(q, tca, extack);
1724 if (err == 0)
1725 qdisc_notify(sock_net(skb->sk), skb, n, clid, NULL, q, extack);
1726 return err;
1727
1728 create_n_graft:
1729 if (!(n->nlmsg_flags & NLM_F_CREATE)) {
1730 NL_SET_ERR_MSG(extack, "Qdisc not found. To create specify NLM_F_CREATE flag");
1731 return -ENOENT;
1732 }
1733 create_n_graft2:
1734 if (clid == TC_H_INGRESS) {
1735 if (dev_ingress_queue(dev)) {
1736 q = qdisc_create(dev, dev_ingress_queue(dev),
1737 tcm->tcm_parent, tcm->tcm_parent,
1738 tca, &err, extack);
1739 } else {
1740 NL_SET_ERR_MSG(extack, "Cannot find ingress queue for specified device");
1741 err = -ENOENT;
1742 }
1743 } else {
1744 struct netdev_queue *dev_queue;
1745
1746 if (p && p->ops->cl_ops && p->ops->cl_ops->select_queue)
1747 dev_queue = p->ops->cl_ops->select_queue(p, tcm);
1748 else if (p)
1749 dev_queue = p->dev_queue;
1750 else
1751 dev_queue = netdev_get_tx_queue(dev, 0);
1752
1753 q = qdisc_create(dev, dev_queue,
1754 tcm->tcm_parent, tcm->tcm_handle,
1755 tca, &err, extack);
1756 }
1757 if (!q)
1758 return err;
1759
1760 graft:
1761 err = qdisc_graft(dev, p, skb, n, clid, q, NULL, extack);
1762 if (err) {
1763 if (q)
1764 qdisc_put(q);
1765 return err;
1766 }
1767
1768 return 0;
1769 }
1770
request_qdisc_module(struct nlattr * kind)1771 static void request_qdisc_module(struct nlattr *kind)
1772 {
1773 struct Qdisc_ops *ops;
1774 char name[IFNAMSIZ];
1775
1776 if (!kind)
1777 return;
1778
1779 ops = qdisc_lookup_ops(kind);
1780 if (ops) {
1781 bpf_module_put(ops, ops->owner);
1782 return;
1783 }
1784
1785 if (nla_strscpy(name, kind, IFNAMSIZ) >= 0) {
1786 rtnl_unlock();
1787 request_module(NET_SCH_ALIAS_PREFIX "%s", name);
1788 rtnl_lock();
1789 }
1790 }
1791
1792 /*
1793 * Create/change qdisc.
1794 */
tc_modify_qdisc(struct sk_buff * skb,struct nlmsghdr * n,struct netlink_ext_ack * extack)1795 static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n,
1796 struct netlink_ext_ack *extack)
1797 {
1798 struct net *net = sock_net(skb->sk);
1799 struct nlattr *tca[TCA_MAX + 1];
1800 struct net_device *dev;
1801 struct tcmsg *tcm;
1802 int err;
1803
1804 err = nlmsg_parse_deprecated(n, sizeof(*tcm), tca, TCA_MAX,
1805 rtm_tca_policy, extack);
1806 if (err < 0)
1807 return err;
1808
1809 request_qdisc_module(tca[TCA_KIND]);
1810
1811 tcm = nlmsg_data(n);
1812 dev = __dev_get_by_index(net, tcm->tcm_ifindex);
1813 if (!dev)
1814 return -ENODEV;
1815
1816 netdev_lock_ops(dev);
1817 err = __tc_modify_qdisc(skb, n, extack, dev, tca, tcm);
1818 netdev_unlock_ops(dev);
1819
1820 return err;
1821 }
1822
tc_dump_qdisc_root(struct Qdisc * root,struct sk_buff * skb,struct netlink_callback * cb,int * q_idx_p,int s_q_idx,bool recur,bool dump_invisible)1823 static int tc_dump_qdisc_root(struct Qdisc *root, struct sk_buff *skb,
1824 struct netlink_callback *cb,
1825 int *q_idx_p, int s_q_idx, bool recur,
1826 bool dump_invisible)
1827 {
1828 int ret = 0, q_idx = *q_idx_p;
1829 struct Qdisc *q;
1830 int b;
1831
1832 if (!root)
1833 return 0;
1834
1835 q = root;
1836 if (q_idx < s_q_idx) {
1837 q_idx++;
1838 } else {
1839 if (!tc_qdisc_dump_ignore(q, dump_invisible) &&
1840 tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).portid,
1841 cb->nlh->nlmsg_seq, NLM_F_MULTI,
1842 RTM_NEWQDISC, NULL) <= 0)
1843 goto done;
1844 q_idx++;
1845 }
1846
1847 /* If dumping singletons, there is no qdisc_dev(root) and the singleton
1848 * itself has already been dumped.
1849 *
1850 * If we've already dumped the top-level (ingress) qdisc above and the global
1851 * qdisc hashtable, we don't want to hit it again
1852 */
1853 if (!qdisc_dev(root) || !recur)
1854 goto out;
1855
1856 hash_for_each(qdisc_dev(root)->qdisc_hash, b, q, hash) {
1857 if (q_idx < s_q_idx) {
1858 q_idx++;
1859 continue;
1860 }
1861 if (!tc_qdisc_dump_ignore(q, dump_invisible) &&
1862 tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).portid,
1863 cb->nlh->nlmsg_seq, NLM_F_MULTI,
1864 RTM_NEWQDISC, NULL) <= 0)
1865 goto done;
1866 q_idx++;
1867 }
1868
1869 out:
1870 *q_idx_p = q_idx;
1871 return ret;
1872 done:
1873 ret = -1;
1874 goto out;
1875 }
1876
tc_dump_qdisc(struct sk_buff * skb,struct netlink_callback * cb)1877 static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb)
1878 {
1879 struct net *net = sock_net(skb->sk);
1880 int idx, q_idx;
1881 int s_idx, s_q_idx;
1882 struct net_device *dev;
1883 const struct nlmsghdr *nlh = cb->nlh;
1884 struct nlattr *tca[TCA_MAX + 1];
1885 int err;
1886
1887 s_idx = cb->args[0];
1888 s_q_idx = q_idx = cb->args[1];
1889
1890 idx = 0;
1891 ASSERT_RTNL();
1892
1893 err = nlmsg_parse_deprecated(nlh, sizeof(struct tcmsg), tca, TCA_MAX,
1894 rtm_tca_policy, cb->extack);
1895 if (err < 0)
1896 return err;
1897
1898 for_each_netdev(net, dev) {
1899 struct netdev_queue *dev_queue;
1900
1901 if (idx < s_idx)
1902 goto cont;
1903 if (idx > s_idx)
1904 s_q_idx = 0;
1905 q_idx = 0;
1906
1907 netdev_lock_ops(dev);
1908 if (tc_dump_qdisc_root(rtnl_dereference(dev->qdisc),
1909 skb, cb, &q_idx, s_q_idx,
1910 true, tca[TCA_DUMP_INVISIBLE]) < 0) {
1911 netdev_unlock_ops(dev);
1912 goto done;
1913 }
1914
1915 dev_queue = dev_ingress_queue(dev);
1916 if (dev_queue &&
1917 tc_dump_qdisc_root(rtnl_dereference(dev_queue->qdisc_sleeping),
1918 skb, cb, &q_idx, s_q_idx, false,
1919 tca[TCA_DUMP_INVISIBLE]) < 0) {
1920 netdev_unlock_ops(dev);
1921 goto done;
1922 }
1923 netdev_unlock_ops(dev);
1924
1925 cont:
1926 idx++;
1927 }
1928
1929 done:
1930 cb->args[0] = idx;
1931 cb->args[1] = q_idx;
1932
1933 return skb->len;
1934 }
1935
1936
1937
1938 /************************************************
1939 * Traffic classes manipulation. *
1940 ************************************************/
1941
tc_fill_tclass(struct sk_buff * skb,struct Qdisc * q,unsigned long cl,u32 portid,u32 seq,u16 flags,int event,struct netlink_ext_ack * extack)1942 static int tc_fill_tclass(struct sk_buff *skb, struct Qdisc *q,
1943 unsigned long cl, u32 portid, u32 seq, u16 flags,
1944 int event, struct netlink_ext_ack *extack)
1945 {
1946 struct tcmsg *tcm;
1947 struct nlmsghdr *nlh;
1948 unsigned char *b = skb_tail_pointer(skb);
1949 struct gnet_dump d;
1950 const struct Qdisc_class_ops *cl_ops = q->ops->cl_ops;
1951
1952 cond_resched();
1953 nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags);
1954 if (!nlh)
1955 goto out_nlmsg_trim;
1956 tcm = nlmsg_data(nlh);
1957 tcm->tcm_family = AF_UNSPEC;
1958 tcm->tcm__pad1 = 0;
1959 tcm->tcm__pad2 = 0;
1960 tcm->tcm_ifindex = qdisc_dev(q)->ifindex;
1961 tcm->tcm_parent = q->handle;
1962 tcm->tcm_handle = q->handle;
1963 tcm->tcm_info = 0;
1964 if (nla_put_string(skb, TCA_KIND, q->ops->id))
1965 goto nla_put_failure;
1966 if (cl_ops->dump && cl_ops->dump(q, cl, skb, tcm) < 0)
1967 goto nla_put_failure;
1968
1969 if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, TCA_XSTATS,
1970 NULL, &d, TCA_PAD) < 0)
1971 goto nla_put_failure;
1972
1973 if (cl_ops->dump_stats && cl_ops->dump_stats(q, cl, &d) < 0)
1974 goto nla_put_failure;
1975
1976 if (gnet_stats_finish_copy(&d) < 0)
1977 goto nla_put_failure;
1978
1979 if (extack && extack->_msg &&
1980 nla_put_string(skb, TCA_EXT_WARN_MSG, extack->_msg))
1981 goto out_nlmsg_trim;
1982
1983 nlh->nlmsg_len = skb_tail_pointer(skb) - b;
1984
1985 return skb->len;
1986
1987 out_nlmsg_trim:
1988 nla_put_failure:
1989 nlmsg_trim(skb, b);
1990 return -1;
1991 }
1992
tclass_notify(struct net * net,struct sk_buff * oskb,struct nlmsghdr * n,struct Qdisc * q,unsigned long cl,int event,struct netlink_ext_ack * extack)1993 static int tclass_notify(struct net *net, struct sk_buff *oskb,
1994 struct nlmsghdr *n, struct Qdisc *q,
1995 unsigned long cl, int event, struct netlink_ext_ack *extack)
1996 {
1997 struct sk_buff *skb;
1998 u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
1999
2000 if (!rtnl_notify_needed(net, n->nlmsg_flags, RTNLGRP_TC))
2001 return 0;
2002
2003 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2004 if (!skb)
2005 return -ENOBUFS;
2006
2007 if (tc_fill_tclass(skb, q, cl, portid, n->nlmsg_seq, 0, event, extack) < 0) {
2008 kfree_skb(skb);
2009 return -EINVAL;
2010 }
2011
2012 return rtnetlink_send(skb, net, portid, RTNLGRP_TC,
2013 n->nlmsg_flags & NLM_F_ECHO);
2014 }
2015
tclass_get_notify(struct net * net,struct sk_buff * oskb,struct nlmsghdr * n,struct Qdisc * q,unsigned long cl,struct netlink_ext_ack * extack)2016 static int tclass_get_notify(struct net *net, struct sk_buff *oskb,
2017 struct nlmsghdr *n, struct Qdisc *q,
2018 unsigned long cl, struct netlink_ext_ack *extack)
2019 {
2020 struct sk_buff *skb;
2021 u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
2022
2023 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2024 if (!skb)
2025 return -ENOBUFS;
2026
2027 if (tc_fill_tclass(skb, q, cl, portid, n->nlmsg_seq, 0, RTM_NEWTCLASS,
2028 extack) < 0) {
2029 kfree_skb(skb);
2030 return -EINVAL;
2031 }
2032
2033 return rtnetlink_send(skb, net, portid, RTNLGRP_TC,
2034 n->nlmsg_flags & NLM_F_ECHO);
2035 }
2036
tclass_del_notify(struct net * net,const struct Qdisc_class_ops * cops,struct sk_buff * oskb,struct nlmsghdr * n,struct Qdisc * q,unsigned long cl,struct netlink_ext_ack * extack)2037 static int tclass_del_notify(struct net *net,
2038 const struct Qdisc_class_ops *cops,
2039 struct sk_buff *oskb, struct nlmsghdr *n,
2040 struct Qdisc *q, unsigned long cl,
2041 struct netlink_ext_ack *extack)
2042 {
2043 u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
2044 struct sk_buff *skb;
2045 int err = 0;
2046
2047 if (!cops->delete)
2048 return -EOPNOTSUPP;
2049
2050 if (rtnl_notify_needed(net, n->nlmsg_flags, RTNLGRP_TC)) {
2051 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2052 if (!skb)
2053 return -ENOBUFS;
2054
2055 if (tc_fill_tclass(skb, q, cl, portid, n->nlmsg_seq, 0,
2056 RTM_DELTCLASS, extack) < 0) {
2057 kfree_skb(skb);
2058 return -EINVAL;
2059 }
2060 } else {
2061 skb = NULL;
2062 }
2063
2064 err = cops->delete(q, cl, extack);
2065 if (err) {
2066 kfree_skb(skb);
2067 return err;
2068 }
2069
2070 err = rtnetlink_maybe_send(skb, net, portid, RTNLGRP_TC,
2071 n->nlmsg_flags & NLM_F_ECHO);
2072 return err;
2073 }
2074
2075 #ifdef CONFIG_NET_CLS
2076
2077 struct tcf_bind_args {
2078 struct tcf_walker w;
2079 unsigned long base;
2080 unsigned long cl;
2081 u32 classid;
2082 };
2083
tcf_node_bind(struct tcf_proto * tp,void * n,struct tcf_walker * arg)2084 static int tcf_node_bind(struct tcf_proto *tp, void *n, struct tcf_walker *arg)
2085 {
2086 struct tcf_bind_args *a = (void *)arg;
2087
2088 if (n && tp->ops->bind_class) {
2089 struct Qdisc *q = tcf_block_q(tp->chain->block);
2090
2091 sch_tree_lock(q);
2092 tp->ops->bind_class(n, a->classid, a->cl, q, a->base);
2093 sch_tree_unlock(q);
2094 }
2095 return 0;
2096 }
2097
2098 struct tc_bind_class_args {
2099 struct qdisc_walker w;
2100 unsigned long new_cl;
2101 u32 portid;
2102 u32 clid;
2103 };
2104
tc_bind_class_walker(struct Qdisc * q,unsigned long cl,struct qdisc_walker * w)2105 static int tc_bind_class_walker(struct Qdisc *q, unsigned long cl,
2106 struct qdisc_walker *w)
2107 {
2108 struct tc_bind_class_args *a = (struct tc_bind_class_args *)w;
2109 const struct Qdisc_class_ops *cops = q->ops->cl_ops;
2110 struct tcf_block *block;
2111 struct tcf_chain *chain;
2112
2113 block = cops->tcf_block(q, cl, NULL);
2114 if (!block)
2115 return 0;
2116 for (chain = tcf_get_next_chain(block, NULL);
2117 chain;
2118 chain = tcf_get_next_chain(block, chain)) {
2119 struct tcf_proto *tp;
2120
2121 for (tp = tcf_get_next_proto(chain, NULL);
2122 tp; tp = tcf_get_next_proto(chain, tp)) {
2123 struct tcf_bind_args arg = {};
2124
2125 arg.w.fn = tcf_node_bind;
2126 arg.classid = a->clid;
2127 arg.base = cl;
2128 arg.cl = a->new_cl;
2129 tp->ops->walk(tp, &arg.w, true);
2130 }
2131 }
2132
2133 return 0;
2134 }
2135
tc_bind_tclass(struct Qdisc * q,u32 portid,u32 clid,unsigned long new_cl)2136 static void tc_bind_tclass(struct Qdisc *q, u32 portid, u32 clid,
2137 unsigned long new_cl)
2138 {
2139 const struct Qdisc_class_ops *cops = q->ops->cl_ops;
2140 struct tc_bind_class_args args = {};
2141
2142 if (!cops->tcf_block)
2143 return;
2144 args.portid = portid;
2145 args.clid = clid;
2146 args.new_cl = new_cl;
2147 args.w.fn = tc_bind_class_walker;
2148 q->ops->cl_ops->walk(q, &args.w);
2149 }
2150
2151 #else
2152
tc_bind_tclass(struct Qdisc * q,u32 portid,u32 clid,unsigned long new_cl)2153 static void tc_bind_tclass(struct Qdisc *q, u32 portid, u32 clid,
2154 unsigned long new_cl)
2155 {
2156 }
2157
2158 #endif
2159
__tc_ctl_tclass(struct sk_buff * skb,struct nlmsghdr * n,struct netlink_ext_ack * extack,struct net_device * dev,struct nlattr * tca[TCA_MAX+1],struct tcmsg * tcm)2160 static int __tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n,
2161 struct netlink_ext_ack *extack,
2162 struct net_device *dev,
2163 struct nlattr *tca[TCA_MAX + 1],
2164 struct tcmsg *tcm)
2165 {
2166 struct net *net = sock_net(skb->sk);
2167 const struct Qdisc_class_ops *cops;
2168 struct Qdisc *q = NULL;
2169 unsigned long cl = 0;
2170 unsigned long new_cl;
2171 u32 portid;
2172 u32 clid;
2173 u32 qid;
2174 int err;
2175
2176 /*
2177 parent == TC_H_UNSPEC - unspecified parent.
2178 parent == TC_H_ROOT - class is root, which has no parent.
2179 parent == X:0 - parent is root class.
2180 parent == X:Y - parent is a node in hierarchy.
2181 parent == 0:Y - parent is X:Y, where X:0 is qdisc.
2182
2183 handle == 0:0 - generate handle from kernel pool.
2184 handle == 0:Y - class is X:Y, where X:0 is qdisc.
2185 handle == X:Y - clear.
2186 handle == X:0 - root class.
2187 */
2188
2189 /* Step 1. Determine qdisc handle X:0 */
2190
2191 portid = tcm->tcm_parent;
2192 clid = tcm->tcm_handle;
2193 qid = TC_H_MAJ(clid);
2194
2195 if (portid != TC_H_ROOT) {
2196 u32 qid1 = TC_H_MAJ(portid);
2197
2198 if (qid && qid1) {
2199 /* If both majors are known, they must be identical. */
2200 if (qid != qid1)
2201 return -EINVAL;
2202 } else if (qid1) {
2203 qid = qid1;
2204 } else if (qid == 0)
2205 qid = rtnl_dereference(dev->qdisc)->handle;
2206
2207 /* Now qid is genuine qdisc handle consistent
2208 * both with parent and child.
2209 *
2210 * TC_H_MAJ(portid) still may be unspecified, complete it now.
2211 */
2212 if (portid)
2213 portid = TC_H_MAKE(qid, portid);
2214 } else {
2215 if (qid == 0)
2216 qid = rtnl_dereference(dev->qdisc)->handle;
2217 }
2218
2219 /* OK. Locate qdisc */
2220 q = qdisc_lookup(dev, qid);
2221 if (!q)
2222 return -ENOENT;
2223
2224 /* An check that it supports classes */
2225 cops = q->ops->cl_ops;
2226 if (cops == NULL)
2227 return -EINVAL;
2228
2229 /* Now try to get class */
2230 if (clid == 0) {
2231 if (portid == TC_H_ROOT)
2232 clid = qid;
2233 } else
2234 clid = TC_H_MAKE(qid, clid);
2235
2236 if (clid)
2237 cl = cops->find(q, clid);
2238
2239 if (cl == 0) {
2240 err = -ENOENT;
2241 if (n->nlmsg_type != RTM_NEWTCLASS ||
2242 !(n->nlmsg_flags & NLM_F_CREATE))
2243 goto out;
2244 } else {
2245 switch (n->nlmsg_type) {
2246 case RTM_NEWTCLASS:
2247 err = -EEXIST;
2248 if (n->nlmsg_flags & NLM_F_EXCL)
2249 goto out;
2250 break;
2251 case RTM_DELTCLASS:
2252 err = tclass_del_notify(net, cops, skb, n, q, cl, extack);
2253 /* Unbind the class with flilters with 0 */
2254 tc_bind_tclass(q, portid, clid, 0);
2255 goto out;
2256 case RTM_GETTCLASS:
2257 err = tclass_get_notify(net, skb, n, q, cl, extack);
2258 goto out;
2259 default:
2260 err = -EINVAL;
2261 goto out;
2262 }
2263 }
2264
2265 if (tca[TCA_INGRESS_BLOCK] || tca[TCA_EGRESS_BLOCK]) {
2266 NL_SET_ERR_MSG(extack, "Shared blocks are not supported for classes");
2267 return -EOPNOTSUPP;
2268 }
2269
2270 /* Prevent creation of traffic classes with classid TC_H_ROOT */
2271 if (clid == TC_H_ROOT) {
2272 NL_SET_ERR_MSG(extack, "Cannot create traffic class with classid TC_H_ROOT");
2273 return -EINVAL;
2274 }
2275
2276 new_cl = cl;
2277 err = -EOPNOTSUPP;
2278 if (cops->change)
2279 err = cops->change(q, clid, portid, tca, &new_cl, extack);
2280 if (err == 0) {
2281 tclass_notify(net, skb, n, q, new_cl, RTM_NEWTCLASS, extack);
2282 /* We just create a new class, need to do reverse binding. */
2283 if (cl != new_cl)
2284 tc_bind_tclass(q, portid, clid, new_cl);
2285 }
2286 out:
2287 return err;
2288 }
2289
tc_ctl_tclass(struct sk_buff * skb,struct nlmsghdr * n,struct netlink_ext_ack * extack)2290 static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n,
2291 struct netlink_ext_ack *extack)
2292 {
2293 struct net *net = sock_net(skb->sk);
2294 struct tcmsg *tcm = nlmsg_data(n);
2295 struct nlattr *tca[TCA_MAX + 1];
2296 struct net_device *dev;
2297 int err;
2298
2299 err = nlmsg_parse_deprecated(n, sizeof(*tcm), tca, TCA_MAX,
2300 rtm_tca_policy, extack);
2301 if (err < 0)
2302 return err;
2303
2304 dev = __dev_get_by_index(net, tcm->tcm_ifindex);
2305 if (!dev)
2306 return -ENODEV;
2307
2308 netdev_lock_ops(dev);
2309 err = __tc_ctl_tclass(skb, n, extack, dev, tca, tcm);
2310 netdev_unlock_ops(dev);
2311
2312 return err;
2313 }
2314
2315 struct qdisc_dump_args {
2316 struct qdisc_walker w;
2317 struct sk_buff *skb;
2318 struct netlink_callback *cb;
2319 };
2320
qdisc_class_dump(struct Qdisc * q,unsigned long cl,struct qdisc_walker * arg)2321 static int qdisc_class_dump(struct Qdisc *q, unsigned long cl,
2322 struct qdisc_walker *arg)
2323 {
2324 struct qdisc_dump_args *a = (struct qdisc_dump_args *)arg;
2325
2326 return tc_fill_tclass(a->skb, q, cl, NETLINK_CB(a->cb->skb).portid,
2327 a->cb->nlh->nlmsg_seq, NLM_F_MULTI,
2328 RTM_NEWTCLASS, NULL);
2329 }
2330
tc_dump_tclass_qdisc(struct Qdisc * q,struct sk_buff * skb,struct tcmsg * tcm,struct netlink_callback * cb,int * t_p,int s_t)2331 static int tc_dump_tclass_qdisc(struct Qdisc *q, struct sk_buff *skb,
2332 struct tcmsg *tcm, struct netlink_callback *cb,
2333 int *t_p, int s_t)
2334 {
2335 struct qdisc_dump_args arg;
2336
2337 if (tc_qdisc_dump_ignore(q, false) ||
2338 *t_p < s_t || !q->ops->cl_ops ||
2339 (tcm->tcm_parent &&
2340 TC_H_MAJ(tcm->tcm_parent) != q->handle)) {
2341 (*t_p)++;
2342 return 0;
2343 }
2344 if (*t_p > s_t)
2345 memset(&cb->args[1], 0, sizeof(cb->args)-sizeof(cb->args[0]));
2346 arg.w.fn = qdisc_class_dump;
2347 arg.skb = skb;
2348 arg.cb = cb;
2349 arg.w.stop = 0;
2350 arg.w.skip = cb->args[1];
2351 arg.w.count = 0;
2352 q->ops->cl_ops->walk(q, &arg.w);
2353 cb->args[1] = arg.w.count;
2354 if (arg.w.stop)
2355 return -1;
2356 (*t_p)++;
2357 return 0;
2358 }
2359
tc_dump_tclass_root(struct Qdisc * root,struct sk_buff * skb,struct tcmsg * tcm,struct netlink_callback * cb,int * t_p,int s_t,bool recur)2360 static int tc_dump_tclass_root(struct Qdisc *root, struct sk_buff *skb,
2361 struct tcmsg *tcm, struct netlink_callback *cb,
2362 int *t_p, int s_t, bool recur)
2363 {
2364 struct Qdisc *q;
2365 int b;
2366
2367 if (!root)
2368 return 0;
2369
2370 if (tc_dump_tclass_qdisc(root, skb, tcm, cb, t_p, s_t) < 0)
2371 return -1;
2372
2373 if (!qdisc_dev(root) || !recur)
2374 return 0;
2375
2376 if (tcm->tcm_parent) {
2377 q = qdisc_match_from_root(root, TC_H_MAJ(tcm->tcm_parent));
2378 if (q && q != root &&
2379 tc_dump_tclass_qdisc(q, skb, tcm, cb, t_p, s_t) < 0)
2380 return -1;
2381 return 0;
2382 }
2383 hash_for_each(qdisc_dev(root)->qdisc_hash, b, q, hash) {
2384 if (tc_dump_tclass_qdisc(q, skb, tcm, cb, t_p, s_t) < 0)
2385 return -1;
2386 }
2387
2388 return 0;
2389 }
2390
__tc_dump_tclass(struct sk_buff * skb,struct netlink_callback * cb,struct tcmsg * tcm,struct net_device * dev)2391 static int __tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb,
2392 struct tcmsg *tcm, struct net_device *dev)
2393 {
2394 struct netdev_queue *dev_queue;
2395 int t, s_t;
2396
2397 s_t = cb->args[0];
2398 t = 0;
2399
2400 if (tc_dump_tclass_root(rtnl_dereference(dev->qdisc),
2401 skb, tcm, cb, &t, s_t, true) < 0)
2402 goto done;
2403
2404 dev_queue = dev_ingress_queue(dev);
2405 if (dev_queue &&
2406 tc_dump_tclass_root(rtnl_dereference(dev_queue->qdisc_sleeping),
2407 skb, tcm, cb, &t, s_t, false) < 0)
2408 goto done;
2409
2410 done:
2411 cb->args[0] = t;
2412
2413 return skb->len;
2414 }
2415
tc_dump_tclass(struct sk_buff * skb,struct netlink_callback * cb)2416 static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb)
2417 {
2418 struct tcmsg *tcm = nlmsg_data(cb->nlh);
2419 struct net *net = sock_net(skb->sk);
2420 struct net_device *dev;
2421 int err;
2422
2423 if (nlmsg_len(cb->nlh) < sizeof(*tcm))
2424 return 0;
2425
2426 dev = dev_get_by_index(net, tcm->tcm_ifindex);
2427 if (!dev)
2428 return 0;
2429
2430 netdev_lock_ops(dev);
2431 err = __tc_dump_tclass(skb, cb, tcm, dev);
2432 netdev_unlock_ops(dev);
2433
2434 dev_put(dev);
2435
2436 return err;
2437 }
2438
2439 #ifdef CONFIG_PROC_FS
psched_show(struct seq_file * seq,void * v)2440 static int psched_show(struct seq_file *seq, void *v)
2441 {
2442 seq_printf(seq, "%08x %08x %08x %08x\n",
2443 (u32)NSEC_PER_USEC, (u32)PSCHED_TICKS2NS(1),
2444 1000000,
2445 (u32)NSEC_PER_SEC / hrtimer_resolution);
2446
2447 return 0;
2448 }
2449
psched_net_init(struct net * net)2450 static int __net_init psched_net_init(struct net *net)
2451 {
2452 struct proc_dir_entry *e;
2453
2454 e = proc_create_single("psched", 0, net->proc_net, psched_show);
2455 if (e == NULL)
2456 return -ENOMEM;
2457
2458 return 0;
2459 }
2460
psched_net_exit(struct net * net)2461 static void __net_exit psched_net_exit(struct net *net)
2462 {
2463 remove_proc_entry("psched", net->proc_net);
2464 }
2465 #else
psched_net_init(struct net * net)2466 static int __net_init psched_net_init(struct net *net)
2467 {
2468 return 0;
2469 }
2470
psched_net_exit(struct net * net)2471 static void __net_exit psched_net_exit(struct net *net)
2472 {
2473 }
2474 #endif
2475
2476 static struct pernet_operations psched_net_ops = {
2477 .init = psched_net_init,
2478 .exit = psched_net_exit,
2479 };
2480
2481 #if IS_ENABLED(CONFIG_MITIGATION_RETPOLINE)
2482 DEFINE_STATIC_KEY_FALSE(tc_skip_wrapper);
2483 #endif
2484
2485 static const struct rtnl_msg_handler psched_rtnl_msg_handlers[] __initconst = {
2486 {.msgtype = RTM_NEWQDISC, .doit = tc_modify_qdisc},
2487 {.msgtype = RTM_DELQDISC, .doit = tc_get_qdisc},
2488 {.msgtype = RTM_GETQDISC, .doit = tc_get_qdisc,
2489 .dumpit = tc_dump_qdisc},
2490 {.msgtype = RTM_NEWTCLASS, .doit = tc_ctl_tclass},
2491 {.msgtype = RTM_DELTCLASS, .doit = tc_ctl_tclass},
2492 {.msgtype = RTM_GETTCLASS, .doit = tc_ctl_tclass,
2493 .dumpit = tc_dump_tclass},
2494 };
2495
pktsched_init(void)2496 static int __init pktsched_init(void)
2497 {
2498 int err;
2499
2500 err = register_pernet_subsys(&psched_net_ops);
2501 if (err) {
2502 pr_err("pktsched_init: "
2503 "cannot initialize per netns operations\n");
2504 return err;
2505 }
2506
2507 register_qdisc(&pfifo_fast_ops);
2508 register_qdisc(&pfifo_qdisc_ops);
2509 register_qdisc(&bfifo_qdisc_ops);
2510 register_qdisc(&pfifo_head_drop_qdisc_ops);
2511 register_qdisc(&mq_qdisc_ops);
2512 register_qdisc(&noqueue_qdisc_ops);
2513
2514 rtnl_register_many(psched_rtnl_msg_handlers);
2515
2516 tc_wrapper_init();
2517
2518 return 0;
2519 }
2520
2521 subsys_initcall(pktsched_init);
2522