xref: /linux/net/sched/sch_api.c (revision de5ca699bc3f7fe9f90ba927d8a6e7783cd7311d)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * net/sched/sch_api.c	Packet scheduler API.
4  *
5  * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
6  *
7  * Fixes:
8  *
9  * Rani Assaf <rani@magic.metawire.com> :980802: JIFFIES and CPU clock sources are repaired.
10  * Eduardo J. Blanco <ejbs@netlabs.com.uy> :990222: kmod support
11  * Jamal Hadi Salim <hadi@nortelnetworks.com>: 990601: ingress support
12  */
13 
14 #include <linux/module.h>
15 #include <linux/types.h>
16 #include <linux/kernel.h>
17 #include <linux/string.h>
18 #include <linux/errno.h>
19 #include <linux/skbuff.h>
20 #include <linux/init.h>
21 #include <linux/proc_fs.h>
22 #include <linux/seq_file.h>
23 #include <linux/kmod.h>
24 #include <linux/list.h>
25 #include <linux/hrtimer.h>
26 #include <linux/slab.h>
27 #include <linux/hashtable.h>
28 
29 #include <net/netdev_lock.h>
30 #include <net/net_namespace.h>
31 #include <net/sock.h>
32 #include <net/netlink.h>
33 #include <net/pkt_sched.h>
34 #include <net/pkt_cls.h>
35 #include <net/tc_wrapper.h>
36 
37 #include <trace/events/qdisc.h>
38 
39 /*
40 
41    Short review.
42    -------------
43 
44    This file consists of two interrelated parts:
45 
46    1. queueing disciplines manager frontend.
47    2. traffic classes manager frontend.
48 
49    Generally, queueing discipline ("qdisc") is a black box,
50    which is able to enqueue packets and to dequeue them (when
51    device is ready to send something) in order and at times
52    determined by algorithm hidden in it.
53 
54    qdisc's are divided to two categories:
55    - "queues", which have no internal structure visible from outside.
56    - "schedulers", which split all the packets to "traffic classes",
57      using "packet classifiers" (look at cls_api.c)
58 
59    In turn, classes may have child qdiscs (as rule, queues)
60    attached to them etc. etc. etc.
61 
62    The goal of the routines in this file is to translate
63    information supplied by user in the form of handles
64    to more intelligible for kernel form, to make some sanity
65    checks and part of work, which is common to all qdiscs
66    and to provide rtnetlink notifications.
67 
68    All real intelligent work is done inside qdisc modules.
69 
70 
71 
72    Every discipline has two major routines: enqueue and dequeue.
73 
74    ---dequeue
75 
76    dequeue usually returns a skb to send. It is allowed to return NULL,
77    but it does not mean that queue is empty, it just means that
78    discipline does not want to send anything this time.
79    Queue is really empty if q->q.qlen == 0.
80    For complicated disciplines with multiple queues q->q is not
81    real packet queue, but however q->q.qlen must be valid.
82 
83    ---enqueue
84 
85    enqueue returns 0, if packet was enqueued successfully.
86    If packet (this one or another one) was dropped, it returns
87    not zero error code.
88    NET_XMIT_DROP 	- this packet dropped
89      Expected action: do not backoff, but wait until queue will clear.
90    NET_XMIT_CN	 	- probably this packet enqueued, but another one dropped.
91      Expected action: backoff or ignore
92 
93    Auxiliary routines:
94 
95    ---peek
96 
97    like dequeue but without removing a packet from the queue
98 
99    ---reset
100 
101    returns qdisc to initial state: purge all buffers, clear all
102    timers, counters (except for statistics) etc.
103 
104    ---init
105 
106    initializes newly created qdisc.
107 
108    ---destroy
109 
110    destroys resources allocated by init and during lifetime of qdisc.
111 
112    ---change
113 
114    changes qdisc parameters.
115  */
116 
117 /* Protects list of registered TC modules. It is pure SMP lock. */
118 static DEFINE_RWLOCK(qdisc_mod_lock);
119 
120 
121 /************************************************
122  *	Queueing disciplines manipulation.	*
123  ************************************************/
124 
125 
126 /* The list of all installed queueing disciplines. */
127 
128 static struct Qdisc_ops *qdisc_base;
129 
130 /* Register/unregister queueing discipline */
131 
132 int register_qdisc(struct Qdisc_ops *qops)
133 {
134 	struct Qdisc_ops *q, **qp;
135 	int rc = -EEXIST;
136 
137 	write_lock(&qdisc_mod_lock);
138 	for (qp = &qdisc_base; (q = *qp) != NULL; qp = &q->next)
139 		if (!strcmp(qops->id, q->id))
140 			goto out;
141 
142 	if (qops->enqueue == NULL)
143 		qops->enqueue = noop_qdisc_ops.enqueue;
144 	if (qops->peek == NULL) {
145 		if (qops->dequeue == NULL)
146 			qops->peek = noop_qdisc_ops.peek;
147 		else
148 			goto out_einval;
149 	}
150 	if (qops->dequeue == NULL)
151 		qops->dequeue = noop_qdisc_ops.dequeue;
152 
153 	if (qops->cl_ops) {
154 		const struct Qdisc_class_ops *cops = qops->cl_ops;
155 
156 		if (!(cops->find && cops->walk && cops->leaf))
157 			goto out_einval;
158 
159 		if (cops->tcf_block && !(cops->bind_tcf && cops->unbind_tcf))
160 			goto out_einval;
161 	}
162 
163 	qops->next = NULL;
164 	*qp = qops;
165 	rc = 0;
166 out:
167 	write_unlock(&qdisc_mod_lock);
168 	return rc;
169 
170 out_einval:
171 	rc = -EINVAL;
172 	goto out;
173 }
174 EXPORT_SYMBOL(register_qdisc);
175 
176 void unregister_qdisc(struct Qdisc_ops *qops)
177 {
178 	struct Qdisc_ops *q, **qp;
179 	int err = -ENOENT;
180 
181 	write_lock(&qdisc_mod_lock);
182 	for (qp = &qdisc_base; (q = *qp) != NULL; qp = &q->next)
183 		if (q == qops)
184 			break;
185 	if (q) {
186 		*qp = q->next;
187 		q->next = NULL;
188 		err = 0;
189 	}
190 	write_unlock(&qdisc_mod_lock);
191 
192 	WARN(err, "unregister qdisc(%s) failed\n", qops->id);
193 }
194 EXPORT_SYMBOL(unregister_qdisc);
195 
196 /* Get default qdisc if not otherwise specified */
197 void qdisc_get_default(char *name, size_t len)
198 {
199 	read_lock(&qdisc_mod_lock);
200 	strscpy(name, default_qdisc_ops->id, len);
201 	read_unlock(&qdisc_mod_lock);
202 }
203 
204 static struct Qdisc_ops *qdisc_lookup_default(const char *name)
205 {
206 	struct Qdisc_ops *q = NULL;
207 
208 	for (q = qdisc_base; q; q = q->next) {
209 		if (!strcmp(name, q->id)) {
210 			if (!try_module_get(q->owner))
211 				q = NULL;
212 			break;
213 		}
214 	}
215 
216 	return q;
217 }
218 
219 /* Set new default qdisc to use */
220 int qdisc_set_default(const char *name)
221 {
222 	const struct Qdisc_ops *ops;
223 
224 	if (!capable(CAP_NET_ADMIN))
225 		return -EPERM;
226 
227 	write_lock(&qdisc_mod_lock);
228 	ops = qdisc_lookup_default(name);
229 	if (!ops) {
230 		/* Not found, drop lock and try to load module */
231 		write_unlock(&qdisc_mod_lock);
232 		request_module(NET_SCH_ALIAS_PREFIX "%s", name);
233 		write_lock(&qdisc_mod_lock);
234 
235 		ops = qdisc_lookup_default(name);
236 	}
237 
238 	if (ops) {
239 		/* Set new default */
240 		module_put(default_qdisc_ops->owner);
241 		default_qdisc_ops = ops;
242 	}
243 	write_unlock(&qdisc_mod_lock);
244 
245 	return ops ? 0 : -ENOENT;
246 }
247 
248 #ifdef CONFIG_NET_SCH_DEFAULT
249 /* Set default value from kernel config */
250 static int __init sch_default_qdisc(void)
251 {
252 	return qdisc_set_default(CONFIG_DEFAULT_NET_SCH);
253 }
254 late_initcall(sch_default_qdisc);
255 #endif
256 
257 /* We know handle. Find qdisc among all qdisc's attached to device
258  * (root qdisc, all its children, children of children etc.)
259  * Note: caller either uses rtnl or rcu_read_lock()
260  */
261 
262 static struct Qdisc *qdisc_match_from_root(struct Qdisc *root, u32 handle)
263 {
264 	struct Qdisc *q;
265 
266 	if (!qdisc_dev(root))
267 		return (root->handle == handle ? root : NULL);
268 
269 	if (!(root->flags & TCQ_F_BUILTIN) &&
270 	    root->handle == handle)
271 		return root;
272 
273 	hash_for_each_possible_rcu(qdisc_dev(root)->qdisc_hash, q, hash, handle,
274 				   lockdep_rtnl_is_held()) {
275 		if (q->handle == handle)
276 			return q;
277 	}
278 	return NULL;
279 }
280 
281 void qdisc_hash_add(struct Qdisc *q, bool invisible)
282 {
283 	if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) {
284 		ASSERT_RTNL();
285 		hash_add_rcu(qdisc_dev(q)->qdisc_hash, &q->hash, q->handle);
286 		if (invisible)
287 			q->flags |= TCQ_F_INVISIBLE;
288 	}
289 }
290 EXPORT_SYMBOL(qdisc_hash_add);
291 
292 void qdisc_hash_del(struct Qdisc *q)
293 {
294 	if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) {
295 		ASSERT_RTNL();
296 		hash_del_rcu(&q->hash);
297 	}
298 }
299 EXPORT_SYMBOL(qdisc_hash_del);
300 
301 struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle)
302 {
303 	struct Qdisc *q;
304 
305 	if (!handle)
306 		return NULL;
307 	q = qdisc_match_from_root(rtnl_dereference(dev->qdisc), handle);
308 	if (q)
309 		goto out;
310 
311 	if (dev_ingress_queue(dev))
312 		q = qdisc_match_from_root(
313 			rtnl_dereference(dev_ingress_queue(dev)->qdisc_sleeping),
314 			handle);
315 out:
316 	return q;
317 }
318 
319 struct Qdisc *qdisc_lookup_rcu(struct net_device *dev, u32 handle)
320 {
321 	struct netdev_queue *nq;
322 	struct Qdisc *q;
323 
324 	if (!handle)
325 		return NULL;
326 	q = qdisc_match_from_root(rcu_dereference(dev->qdisc), handle);
327 	if (q)
328 		goto out;
329 
330 	nq = dev_ingress_queue_rcu(dev);
331 	if (nq)
332 		q = qdisc_match_from_root(rcu_dereference(nq->qdisc_sleeping),
333 					  handle);
334 out:
335 	return q;
336 }
337 
338 static struct Qdisc *qdisc_leaf(struct Qdisc *p, u32 classid)
339 {
340 	unsigned long cl;
341 	const struct Qdisc_class_ops *cops = p->ops->cl_ops;
342 
343 	if (cops == NULL)
344 		return NULL;
345 	cl = cops->find(p, classid);
346 
347 	if (cl == 0)
348 		return NULL;
349 	return cops->leaf(p, cl);
350 }
351 
352 /* Find queueing discipline by name */
353 
354 static struct Qdisc_ops *qdisc_lookup_ops(struct nlattr *kind)
355 {
356 	struct Qdisc_ops *q = NULL;
357 
358 	if (kind) {
359 		read_lock(&qdisc_mod_lock);
360 		for (q = qdisc_base; q; q = q->next) {
361 			if (nla_strcmp(kind, q->id) == 0) {
362 				if (!try_module_get(q->owner))
363 					q = NULL;
364 				break;
365 			}
366 		}
367 		read_unlock(&qdisc_mod_lock);
368 	}
369 	return q;
370 }
371 
372 /* The linklayer setting were not transferred from iproute2, in older
373  * versions, and the rate tables lookup systems have been dropped in
374  * the kernel. To keep backward compatible with older iproute2 tc
375  * utils, we detect the linklayer setting by detecting if the rate
376  * table were modified.
377  *
378  * For linklayer ATM table entries, the rate table will be aligned to
379  * 48 bytes, thus some table entries will contain the same value.  The
380  * mpu (min packet unit) is also encoded into the old rate table, thus
381  * starting from the mpu, we find low and high table entries for
382  * mapping this cell.  If these entries contain the same value, when
383  * the rate tables have been modified for linklayer ATM.
384  *
385  * This is done by rounding mpu to the nearest 48 bytes cell/entry,
386  * and then roundup to the next cell, calc the table entry one below,
387  * and compare.
388  */
389 static __u8 __detect_linklayer(struct tc_ratespec *r, __u32 *rtab)
390 {
391 	int low       = roundup(r->mpu, 48);
392 	int high      = roundup(low+1, 48);
393 	int cell_low  = low >> r->cell_log;
394 	int cell_high = (high >> r->cell_log) - 1;
395 
396 	/* rtab is too inaccurate at rates > 100Mbit/s */
397 	if ((r->rate > (100000000/8)) || (rtab[0] == 0)) {
398 		pr_debug("TC linklayer: Giving up ATM detection\n");
399 		return TC_LINKLAYER_ETHERNET;
400 	}
401 
402 	if ((cell_high > cell_low) && (cell_high < 256)
403 	    && (rtab[cell_low] == rtab[cell_high])) {
404 		pr_debug("TC linklayer: Detected ATM, low(%d)=high(%d)=%u\n",
405 			 cell_low, cell_high, rtab[cell_high]);
406 		return TC_LINKLAYER_ATM;
407 	}
408 	return TC_LINKLAYER_ETHERNET;
409 }
410 
411 static struct qdisc_rate_table *qdisc_rtab_list;
412 
413 struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r,
414 					struct nlattr *tab,
415 					struct netlink_ext_ack *extack)
416 {
417 	struct qdisc_rate_table *rtab;
418 
419 	if (tab == NULL || r->rate == 0 ||
420 	    r->cell_log == 0 || r->cell_log >= 32 ||
421 	    nla_len(tab) != TC_RTAB_SIZE) {
422 		NL_SET_ERR_MSG(extack, "Invalid rate table parameters for searching");
423 		return NULL;
424 	}
425 
426 	for (rtab = qdisc_rtab_list; rtab; rtab = rtab->next) {
427 		if (!memcmp(&rtab->rate, r, sizeof(struct tc_ratespec)) &&
428 		    !memcmp(&rtab->data, nla_data(tab), 1024)) {
429 			rtab->refcnt++;
430 			return rtab;
431 		}
432 	}
433 
434 	rtab = kmalloc(sizeof(*rtab), GFP_KERNEL);
435 	if (rtab) {
436 		rtab->rate = *r;
437 		rtab->refcnt = 1;
438 		memcpy(rtab->data, nla_data(tab), 1024);
439 		if (r->linklayer == TC_LINKLAYER_UNAWARE)
440 			r->linklayer = __detect_linklayer(r, rtab->data);
441 		rtab->next = qdisc_rtab_list;
442 		qdisc_rtab_list = rtab;
443 	} else {
444 		NL_SET_ERR_MSG(extack, "Failed to allocate new qdisc rate table");
445 	}
446 	return rtab;
447 }
448 EXPORT_SYMBOL(qdisc_get_rtab);
449 
450 void qdisc_put_rtab(struct qdisc_rate_table *tab)
451 {
452 	struct qdisc_rate_table *rtab, **rtabp;
453 
454 	if (!tab || --tab->refcnt)
455 		return;
456 
457 	for (rtabp = &qdisc_rtab_list;
458 	     (rtab = *rtabp) != NULL;
459 	     rtabp = &rtab->next) {
460 		if (rtab == tab) {
461 			*rtabp = rtab->next;
462 			kfree(rtab);
463 			return;
464 		}
465 	}
466 }
467 EXPORT_SYMBOL(qdisc_put_rtab);
468 
469 static LIST_HEAD(qdisc_stab_list);
470 
471 static const struct nla_policy stab_policy[TCA_STAB_MAX + 1] = {
472 	[TCA_STAB_BASE]	= { .len = sizeof(struct tc_sizespec) },
473 	[TCA_STAB_DATA] = { .type = NLA_BINARY },
474 };
475 
476 static struct qdisc_size_table *qdisc_get_stab(struct nlattr *opt,
477 					       struct netlink_ext_ack *extack)
478 {
479 	struct nlattr *tb[TCA_STAB_MAX + 1];
480 	struct qdisc_size_table *stab;
481 	struct tc_sizespec *s;
482 	unsigned int tsize = 0;
483 	u16 *tab = NULL;
484 	int err;
485 
486 	err = nla_parse_nested_deprecated(tb, TCA_STAB_MAX, opt, stab_policy,
487 					  extack);
488 	if (err < 0)
489 		return ERR_PTR(err);
490 	if (!tb[TCA_STAB_BASE]) {
491 		NL_SET_ERR_MSG(extack, "Size table base attribute is missing");
492 		return ERR_PTR(-EINVAL);
493 	}
494 
495 	s = nla_data(tb[TCA_STAB_BASE]);
496 
497 	if (s->tsize > 0) {
498 		if (!tb[TCA_STAB_DATA]) {
499 			NL_SET_ERR_MSG(extack, "Size table data attribute is missing");
500 			return ERR_PTR(-EINVAL);
501 		}
502 		tab = nla_data(tb[TCA_STAB_DATA]);
503 		tsize = nla_len(tb[TCA_STAB_DATA]) / sizeof(u16);
504 	}
505 
506 	if (tsize != s->tsize || (!tab && tsize > 0)) {
507 		NL_SET_ERR_MSG(extack, "Invalid size of size table");
508 		return ERR_PTR(-EINVAL);
509 	}
510 
511 	list_for_each_entry(stab, &qdisc_stab_list, list) {
512 		if (memcmp(&stab->szopts, s, sizeof(*s)))
513 			continue;
514 		if (tsize > 0 &&
515 		    memcmp(stab->data, tab, flex_array_size(stab, data, tsize)))
516 			continue;
517 		stab->refcnt++;
518 		return stab;
519 	}
520 
521 	if (s->size_log > STAB_SIZE_LOG_MAX ||
522 	    s->cell_log > STAB_SIZE_LOG_MAX) {
523 		NL_SET_ERR_MSG(extack, "Invalid logarithmic size of size table");
524 		return ERR_PTR(-EINVAL);
525 	}
526 
527 	stab = kmalloc(struct_size(stab, data, tsize), GFP_KERNEL);
528 	if (!stab)
529 		return ERR_PTR(-ENOMEM);
530 
531 	stab->refcnt = 1;
532 	stab->szopts = *s;
533 	if (tsize > 0)
534 		memcpy(stab->data, tab, flex_array_size(stab, data, tsize));
535 
536 	list_add_tail(&stab->list, &qdisc_stab_list);
537 
538 	return stab;
539 }
540 
541 void qdisc_put_stab(struct qdisc_size_table *tab)
542 {
543 	if (!tab)
544 		return;
545 
546 	if (--tab->refcnt == 0) {
547 		list_del(&tab->list);
548 		kfree_rcu(tab, rcu);
549 	}
550 }
551 EXPORT_SYMBOL(qdisc_put_stab);
552 
553 static int qdisc_dump_stab(struct sk_buff *skb, struct qdisc_size_table *stab)
554 {
555 	struct nlattr *nest;
556 
557 	nest = nla_nest_start_noflag(skb, TCA_STAB);
558 	if (nest == NULL)
559 		goto nla_put_failure;
560 	if (nla_put(skb, TCA_STAB_BASE, sizeof(stab->szopts), &stab->szopts))
561 		goto nla_put_failure;
562 	nla_nest_end(skb, nest);
563 
564 	return skb->len;
565 
566 nla_put_failure:
567 	return -1;
568 }
569 
570 void __qdisc_calculate_pkt_len(struct sk_buff *skb,
571 			       const struct qdisc_size_table *stab)
572 {
573 	int pkt_len, slot;
574 
575 	pkt_len = skb->len + stab->szopts.overhead;
576 	if (unlikely(!stab->szopts.tsize))
577 		goto out;
578 
579 	slot = pkt_len + stab->szopts.cell_align;
580 	if (unlikely(slot < 0))
581 		slot = 0;
582 
583 	slot >>= stab->szopts.cell_log;
584 	if (likely(slot < stab->szopts.tsize))
585 		pkt_len = stab->data[slot];
586 	else
587 		pkt_len = stab->data[stab->szopts.tsize - 1] *
588 				(slot / stab->szopts.tsize) +
589 				stab->data[slot % stab->szopts.tsize];
590 
591 	pkt_len <<= stab->szopts.size_log;
592 out:
593 	if (unlikely(pkt_len < 1))
594 		pkt_len = 1;
595 	qdisc_skb_cb(skb)->pkt_len = pkt_len;
596 }
597 
598 void qdisc_warn_nonwc(const char *txt, struct Qdisc *qdisc)
599 {
600 	if (!(qdisc->flags & TCQ_F_WARN_NONWC)) {
601 		pr_warn("%s: %s qdisc %X: is non-work-conserving?\n",
602 			txt, qdisc->ops->id, qdisc->handle >> 16);
603 		qdisc->flags |= TCQ_F_WARN_NONWC;
604 	}
605 }
606 EXPORT_SYMBOL(qdisc_warn_nonwc);
607 
608 static enum hrtimer_restart qdisc_watchdog(struct hrtimer *timer)
609 {
610 	struct qdisc_watchdog *wd = container_of(timer, struct qdisc_watchdog,
611 						 timer);
612 
613 	rcu_read_lock();
614 	__netif_schedule(qdisc_root(wd->qdisc));
615 	rcu_read_unlock();
616 
617 	return HRTIMER_NORESTART;
618 }
619 
620 void qdisc_watchdog_init_clockid(struct qdisc_watchdog *wd, struct Qdisc *qdisc,
621 				 clockid_t clockid)
622 {
623 	hrtimer_init(&wd->timer, clockid, HRTIMER_MODE_ABS_PINNED);
624 	wd->timer.function = qdisc_watchdog;
625 	wd->qdisc = qdisc;
626 }
627 EXPORT_SYMBOL(qdisc_watchdog_init_clockid);
628 
629 void qdisc_watchdog_init(struct qdisc_watchdog *wd, struct Qdisc *qdisc)
630 {
631 	qdisc_watchdog_init_clockid(wd, qdisc, CLOCK_MONOTONIC);
632 }
633 EXPORT_SYMBOL(qdisc_watchdog_init);
634 
635 void qdisc_watchdog_schedule_range_ns(struct qdisc_watchdog *wd, u64 expires,
636 				      u64 delta_ns)
637 {
638 	bool deactivated;
639 
640 	rcu_read_lock();
641 	deactivated = test_bit(__QDISC_STATE_DEACTIVATED,
642 			       &qdisc_root_sleeping(wd->qdisc)->state);
643 	rcu_read_unlock();
644 	if (deactivated)
645 		return;
646 
647 	if (hrtimer_is_queued(&wd->timer)) {
648 		u64 softexpires;
649 
650 		softexpires = ktime_to_ns(hrtimer_get_softexpires(&wd->timer));
651 		/* If timer is already set in [expires, expires + delta_ns],
652 		 * do not reprogram it.
653 		 */
654 		if (softexpires - expires <= delta_ns)
655 			return;
656 	}
657 
658 	hrtimer_start_range_ns(&wd->timer,
659 			       ns_to_ktime(expires),
660 			       delta_ns,
661 			       HRTIMER_MODE_ABS_PINNED);
662 }
663 EXPORT_SYMBOL(qdisc_watchdog_schedule_range_ns);
664 
665 void qdisc_watchdog_cancel(struct qdisc_watchdog *wd)
666 {
667 	hrtimer_cancel(&wd->timer);
668 }
669 EXPORT_SYMBOL(qdisc_watchdog_cancel);
670 
671 static struct hlist_head *qdisc_class_hash_alloc(unsigned int n)
672 {
673 	struct hlist_head *h;
674 	unsigned int i;
675 
676 	h = kvmalloc_array(n, sizeof(struct hlist_head), GFP_KERNEL);
677 
678 	if (h != NULL) {
679 		for (i = 0; i < n; i++)
680 			INIT_HLIST_HEAD(&h[i]);
681 	}
682 	return h;
683 }
684 
685 void qdisc_class_hash_grow(struct Qdisc *sch, struct Qdisc_class_hash *clhash)
686 {
687 	struct Qdisc_class_common *cl;
688 	struct hlist_node *next;
689 	struct hlist_head *nhash, *ohash;
690 	unsigned int nsize, nmask, osize;
691 	unsigned int i, h;
692 
693 	/* Rehash when load factor exceeds 0.75 */
694 	if (clhash->hashelems * 4 <= clhash->hashsize * 3)
695 		return;
696 	nsize = clhash->hashsize * 2;
697 	nmask = nsize - 1;
698 	nhash = qdisc_class_hash_alloc(nsize);
699 	if (nhash == NULL)
700 		return;
701 
702 	ohash = clhash->hash;
703 	osize = clhash->hashsize;
704 
705 	sch_tree_lock(sch);
706 	for (i = 0; i < osize; i++) {
707 		hlist_for_each_entry_safe(cl, next, &ohash[i], hnode) {
708 			h = qdisc_class_hash(cl->classid, nmask);
709 			hlist_add_head(&cl->hnode, &nhash[h]);
710 		}
711 	}
712 	clhash->hash     = nhash;
713 	clhash->hashsize = nsize;
714 	clhash->hashmask = nmask;
715 	sch_tree_unlock(sch);
716 
717 	kvfree(ohash);
718 }
719 EXPORT_SYMBOL(qdisc_class_hash_grow);
720 
721 int qdisc_class_hash_init(struct Qdisc_class_hash *clhash)
722 {
723 	unsigned int size = 4;
724 
725 	clhash->hash = qdisc_class_hash_alloc(size);
726 	if (!clhash->hash)
727 		return -ENOMEM;
728 	clhash->hashsize  = size;
729 	clhash->hashmask  = size - 1;
730 	clhash->hashelems = 0;
731 	return 0;
732 }
733 EXPORT_SYMBOL(qdisc_class_hash_init);
734 
735 void qdisc_class_hash_destroy(struct Qdisc_class_hash *clhash)
736 {
737 	kvfree(clhash->hash);
738 }
739 EXPORT_SYMBOL(qdisc_class_hash_destroy);
740 
741 void qdisc_class_hash_insert(struct Qdisc_class_hash *clhash,
742 			     struct Qdisc_class_common *cl)
743 {
744 	unsigned int h;
745 
746 	INIT_HLIST_NODE(&cl->hnode);
747 	h = qdisc_class_hash(cl->classid, clhash->hashmask);
748 	hlist_add_head(&cl->hnode, &clhash->hash[h]);
749 	clhash->hashelems++;
750 }
751 EXPORT_SYMBOL(qdisc_class_hash_insert);
752 
753 void qdisc_class_hash_remove(struct Qdisc_class_hash *clhash,
754 			     struct Qdisc_class_common *cl)
755 {
756 	hlist_del(&cl->hnode);
757 	clhash->hashelems--;
758 }
759 EXPORT_SYMBOL(qdisc_class_hash_remove);
760 
761 /* Allocate an unique handle from space managed by kernel
762  * Possible range is [8000-FFFF]:0000 (0x8000 values)
763  */
764 static u32 qdisc_alloc_handle(struct net_device *dev)
765 {
766 	int i = 0x8000;
767 	static u32 autohandle = TC_H_MAKE(0x80000000U, 0);
768 
769 	do {
770 		autohandle += TC_H_MAKE(0x10000U, 0);
771 		if (autohandle == TC_H_MAKE(TC_H_ROOT, 0))
772 			autohandle = TC_H_MAKE(0x80000000U, 0);
773 		if (!qdisc_lookup(dev, autohandle))
774 			return autohandle;
775 		cond_resched();
776 	} while	(--i > 0);
777 
778 	return 0;
779 }
780 
781 void qdisc_tree_reduce_backlog(struct Qdisc *sch, int n, int len)
782 {
783 	bool qdisc_is_offloaded = sch->flags & TCQ_F_OFFLOADED;
784 	const struct Qdisc_class_ops *cops;
785 	unsigned long cl;
786 	u32 parentid;
787 	bool notify;
788 	int drops;
789 
790 	if (n == 0 && len == 0)
791 		return;
792 	drops = max_t(int, n, 0);
793 	rcu_read_lock();
794 	while ((parentid = sch->parent)) {
795 		if (parentid == TC_H_ROOT)
796 			break;
797 
798 		if (sch->flags & TCQ_F_NOPARENT)
799 			break;
800 		/* Notify parent qdisc only if child qdisc becomes empty.
801 		 *
802 		 * If child was empty even before update then backlog
803 		 * counter is screwed and we skip notification because
804 		 * parent class is already passive.
805 		 *
806 		 * If the original child was offloaded then it is allowed
807 		 * to be seem as empty, so the parent is notified anyway.
808 		 */
809 		notify = !sch->q.qlen && !WARN_ON_ONCE(!n &&
810 						       !qdisc_is_offloaded);
811 		/* TODO: perform the search on a per txq basis */
812 		sch = qdisc_lookup_rcu(qdisc_dev(sch), TC_H_MAJ(parentid));
813 		if (sch == NULL) {
814 			WARN_ON_ONCE(parentid != TC_H_ROOT);
815 			break;
816 		}
817 		cops = sch->ops->cl_ops;
818 		if (notify && cops->qlen_notify) {
819 			cl = cops->find(sch, parentid);
820 			cops->qlen_notify(sch, cl);
821 		}
822 		sch->q.qlen -= n;
823 		sch->qstats.backlog -= len;
824 		__qdisc_qstats_drop(sch, drops);
825 	}
826 	rcu_read_unlock();
827 }
828 EXPORT_SYMBOL(qdisc_tree_reduce_backlog);
829 
830 int qdisc_offload_dump_helper(struct Qdisc *sch, enum tc_setup_type type,
831 			      void *type_data)
832 {
833 	struct net_device *dev = qdisc_dev(sch);
834 	int err;
835 
836 	sch->flags &= ~TCQ_F_OFFLOADED;
837 	if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
838 		return 0;
839 
840 	err = dev->netdev_ops->ndo_setup_tc(dev, type, type_data);
841 	if (err == -EOPNOTSUPP)
842 		return 0;
843 
844 	if (!err)
845 		sch->flags |= TCQ_F_OFFLOADED;
846 
847 	return err;
848 }
849 EXPORT_SYMBOL(qdisc_offload_dump_helper);
850 
851 void qdisc_offload_graft_helper(struct net_device *dev, struct Qdisc *sch,
852 				struct Qdisc *new, struct Qdisc *old,
853 				enum tc_setup_type type, void *type_data,
854 				struct netlink_ext_ack *extack)
855 {
856 	bool any_qdisc_is_offloaded;
857 	int err;
858 
859 	if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
860 		return;
861 
862 	err = dev->netdev_ops->ndo_setup_tc(dev, type, type_data);
863 
864 	/* Don't report error if the graft is part of destroy operation. */
865 	if (!err || !new || new == &noop_qdisc)
866 		return;
867 
868 	/* Don't report error if the parent, the old child and the new
869 	 * one are not offloaded.
870 	 */
871 	any_qdisc_is_offloaded = new->flags & TCQ_F_OFFLOADED;
872 	any_qdisc_is_offloaded |= sch && sch->flags & TCQ_F_OFFLOADED;
873 	any_qdisc_is_offloaded |= old && old->flags & TCQ_F_OFFLOADED;
874 
875 	if (any_qdisc_is_offloaded)
876 		NL_SET_ERR_MSG(extack, "Offloading graft operation failed.");
877 }
878 EXPORT_SYMBOL(qdisc_offload_graft_helper);
879 
880 void qdisc_offload_query_caps(struct net_device *dev,
881 			      enum tc_setup_type type,
882 			      void *caps, size_t caps_len)
883 {
884 	const struct net_device_ops *ops = dev->netdev_ops;
885 	struct tc_query_caps_base base = {
886 		.type = type,
887 		.caps = caps,
888 	};
889 
890 	memset(caps, 0, caps_len);
891 
892 	if (ops->ndo_setup_tc)
893 		ops->ndo_setup_tc(dev, TC_QUERY_CAPS, &base);
894 }
895 EXPORT_SYMBOL(qdisc_offload_query_caps);
896 
897 static void qdisc_offload_graft_root(struct net_device *dev,
898 				     struct Qdisc *new, struct Qdisc *old,
899 				     struct netlink_ext_ack *extack)
900 {
901 	struct tc_root_qopt_offload graft_offload = {
902 		.command	= TC_ROOT_GRAFT,
903 		.handle		= new ? new->handle : 0,
904 		.ingress	= (new && new->flags & TCQ_F_INGRESS) ||
905 				  (old && old->flags & TCQ_F_INGRESS),
906 	};
907 
908 	qdisc_offload_graft_helper(dev, NULL, new, old,
909 				   TC_SETUP_ROOT_QDISC, &graft_offload, extack);
910 }
911 
912 static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid,
913 			 u32 portid, u32 seq, u16 flags, int event,
914 			 struct netlink_ext_ack *extack)
915 {
916 	struct gnet_stats_basic_sync __percpu *cpu_bstats = NULL;
917 	struct gnet_stats_queue __percpu *cpu_qstats = NULL;
918 	struct tcmsg *tcm;
919 	struct nlmsghdr  *nlh;
920 	unsigned char *b = skb_tail_pointer(skb);
921 	struct gnet_dump d;
922 	struct qdisc_size_table *stab;
923 	u32 block_index;
924 	__u32 qlen;
925 
926 	cond_resched();
927 	nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags);
928 	if (!nlh)
929 		goto out_nlmsg_trim;
930 	tcm = nlmsg_data(nlh);
931 	tcm->tcm_family = AF_UNSPEC;
932 	tcm->tcm__pad1 = 0;
933 	tcm->tcm__pad2 = 0;
934 	tcm->tcm_ifindex = qdisc_dev(q)->ifindex;
935 	tcm->tcm_parent = clid;
936 	tcm->tcm_handle = q->handle;
937 	tcm->tcm_info = refcount_read(&q->refcnt);
938 	if (nla_put_string(skb, TCA_KIND, q->ops->id))
939 		goto nla_put_failure;
940 	if (q->ops->ingress_block_get) {
941 		block_index = q->ops->ingress_block_get(q);
942 		if (block_index &&
943 		    nla_put_u32(skb, TCA_INGRESS_BLOCK, block_index))
944 			goto nla_put_failure;
945 	}
946 	if (q->ops->egress_block_get) {
947 		block_index = q->ops->egress_block_get(q);
948 		if (block_index &&
949 		    nla_put_u32(skb, TCA_EGRESS_BLOCK, block_index))
950 			goto nla_put_failure;
951 	}
952 	if (q->ops->dump && q->ops->dump(q, skb) < 0)
953 		goto nla_put_failure;
954 	if (nla_put_u8(skb, TCA_HW_OFFLOAD, !!(q->flags & TCQ_F_OFFLOADED)))
955 		goto nla_put_failure;
956 	qlen = qdisc_qlen_sum(q);
957 
958 	stab = rtnl_dereference(q->stab);
959 	if (stab && qdisc_dump_stab(skb, stab) < 0)
960 		goto nla_put_failure;
961 
962 	if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, TCA_XSTATS,
963 					 NULL, &d, TCA_PAD) < 0)
964 		goto nla_put_failure;
965 
966 	if (q->ops->dump_stats && q->ops->dump_stats(q, &d) < 0)
967 		goto nla_put_failure;
968 
969 	if (qdisc_is_percpu_stats(q)) {
970 		cpu_bstats = q->cpu_bstats;
971 		cpu_qstats = q->cpu_qstats;
972 	}
973 
974 	if (gnet_stats_copy_basic(&d, cpu_bstats, &q->bstats, true) < 0 ||
975 	    gnet_stats_copy_rate_est(&d, &q->rate_est) < 0 ||
976 	    gnet_stats_copy_queue(&d, cpu_qstats, &q->qstats, qlen) < 0)
977 		goto nla_put_failure;
978 
979 	if (gnet_stats_finish_copy(&d) < 0)
980 		goto nla_put_failure;
981 
982 	if (extack && extack->_msg &&
983 	    nla_put_string(skb, TCA_EXT_WARN_MSG, extack->_msg))
984 		goto out_nlmsg_trim;
985 
986 	nlh->nlmsg_len = skb_tail_pointer(skb) - b;
987 
988 	return skb->len;
989 
990 out_nlmsg_trim:
991 nla_put_failure:
992 	nlmsg_trim(skb, b);
993 	return -1;
994 }
995 
996 static bool tc_qdisc_dump_ignore(struct Qdisc *q, bool dump_invisible)
997 {
998 	if (q->flags & TCQ_F_BUILTIN)
999 		return true;
1000 	if ((q->flags & TCQ_F_INVISIBLE) && !dump_invisible)
1001 		return true;
1002 
1003 	return false;
1004 }
1005 
1006 static int qdisc_get_notify(struct net *net, struct sk_buff *oskb,
1007 			    struct nlmsghdr *n, u32 clid, struct Qdisc *q,
1008 			    struct netlink_ext_ack *extack)
1009 {
1010 	struct sk_buff *skb;
1011 	u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
1012 
1013 	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1014 	if (!skb)
1015 		return -ENOBUFS;
1016 
1017 	if (!tc_qdisc_dump_ignore(q, false)) {
1018 		if (tc_fill_qdisc(skb, q, clid, portid, n->nlmsg_seq, 0,
1019 				  RTM_NEWQDISC, extack) < 0)
1020 			goto err_out;
1021 	}
1022 
1023 	if (skb->len)
1024 		return rtnetlink_send(skb, net, portid, RTNLGRP_TC,
1025 				      n->nlmsg_flags & NLM_F_ECHO);
1026 
1027 err_out:
1028 	kfree_skb(skb);
1029 	return -EINVAL;
1030 }
1031 
1032 static int qdisc_notify(struct net *net, struct sk_buff *oskb,
1033 			struct nlmsghdr *n, u32 clid,
1034 			struct Qdisc *old, struct Qdisc *new,
1035 			struct netlink_ext_ack *extack)
1036 {
1037 	struct sk_buff *skb;
1038 	u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
1039 
1040 	if (!rtnl_notify_needed(net, n->nlmsg_flags, RTNLGRP_TC))
1041 		return 0;
1042 
1043 	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1044 	if (!skb)
1045 		return -ENOBUFS;
1046 
1047 	if (old && !tc_qdisc_dump_ignore(old, false)) {
1048 		if (tc_fill_qdisc(skb, old, clid, portid, n->nlmsg_seq,
1049 				  0, RTM_DELQDISC, extack) < 0)
1050 			goto err_out;
1051 	}
1052 	if (new && !tc_qdisc_dump_ignore(new, false)) {
1053 		if (tc_fill_qdisc(skb, new, clid, portid, n->nlmsg_seq,
1054 				  old ? NLM_F_REPLACE : 0, RTM_NEWQDISC, extack) < 0)
1055 			goto err_out;
1056 	}
1057 
1058 	if (skb->len)
1059 		return rtnetlink_send(skb, net, portid, RTNLGRP_TC,
1060 				      n->nlmsg_flags & NLM_F_ECHO);
1061 
1062 err_out:
1063 	kfree_skb(skb);
1064 	return -EINVAL;
1065 }
1066 
1067 static void notify_and_destroy(struct net *net, struct sk_buff *skb,
1068 			       struct nlmsghdr *n, u32 clid,
1069 			       struct Qdisc *old, struct Qdisc *new,
1070 			       struct netlink_ext_ack *extack)
1071 {
1072 	if (new || old)
1073 		qdisc_notify(net, skb, n, clid, old, new, extack);
1074 
1075 	if (old)
1076 		qdisc_put(old);
1077 }
1078 
1079 static void qdisc_clear_nolock(struct Qdisc *sch)
1080 {
1081 	sch->flags &= ~TCQ_F_NOLOCK;
1082 	if (!(sch->flags & TCQ_F_CPUSTATS))
1083 		return;
1084 
1085 	free_percpu(sch->cpu_bstats);
1086 	free_percpu(sch->cpu_qstats);
1087 	sch->cpu_bstats = NULL;
1088 	sch->cpu_qstats = NULL;
1089 	sch->flags &= ~TCQ_F_CPUSTATS;
1090 }
1091 
1092 /* Graft qdisc "new" to class "classid" of qdisc "parent" or
1093  * to device "dev".
1094  *
1095  * When appropriate send a netlink notification using 'skb'
1096  * and "n".
1097  *
1098  * On success, destroy old qdisc.
1099  */
1100 
1101 static int qdisc_graft(struct net_device *dev, struct Qdisc *parent,
1102 		       struct sk_buff *skb, struct nlmsghdr *n, u32 classid,
1103 		       struct Qdisc *new, struct Qdisc *old,
1104 		       struct netlink_ext_ack *extack)
1105 {
1106 	struct Qdisc *q = old;
1107 	struct net *net = dev_net(dev);
1108 
1109 	if (parent == NULL) {
1110 		unsigned int i, num_q, ingress;
1111 		struct netdev_queue *dev_queue;
1112 
1113 		ingress = 0;
1114 		num_q = dev->num_tx_queues;
1115 		if ((q && q->flags & TCQ_F_INGRESS) ||
1116 		    (new && new->flags & TCQ_F_INGRESS)) {
1117 			ingress = 1;
1118 			dev_queue = dev_ingress_queue(dev);
1119 			if (!dev_queue) {
1120 				NL_SET_ERR_MSG(extack, "Device does not have an ingress queue");
1121 				return -ENOENT;
1122 			}
1123 
1124 			q = rtnl_dereference(dev_queue->qdisc_sleeping);
1125 
1126 			/* This is the counterpart of that qdisc_refcount_inc_nz() call in
1127 			 * __tcf_qdisc_find() for filter requests.
1128 			 */
1129 			if (!qdisc_refcount_dec_if_one(q)) {
1130 				NL_SET_ERR_MSG(extack,
1131 					       "Current ingress or clsact Qdisc has ongoing filter requests");
1132 				return -EBUSY;
1133 			}
1134 		}
1135 
1136 		if (dev->flags & IFF_UP)
1137 			dev_deactivate(dev);
1138 
1139 		qdisc_offload_graft_root(dev, new, old, extack);
1140 
1141 		if (new && new->ops->attach && !ingress)
1142 			goto skip;
1143 
1144 		if (!ingress) {
1145 			for (i = 0; i < num_q; i++) {
1146 				dev_queue = netdev_get_tx_queue(dev, i);
1147 				old = dev_graft_qdisc(dev_queue, new);
1148 
1149 				if (new && i > 0)
1150 					qdisc_refcount_inc(new);
1151 				qdisc_put(old);
1152 			}
1153 		} else {
1154 			old = dev_graft_qdisc(dev_queue, NULL);
1155 
1156 			/* {ingress,clsact}_destroy() @old before grafting @new to avoid
1157 			 * unprotected concurrent accesses to net_device::miniq_{in,e}gress
1158 			 * pointer(s) in mini_qdisc_pair_swap().
1159 			 */
1160 			qdisc_notify(net, skb, n, classid, old, new, extack);
1161 			qdisc_destroy(old);
1162 
1163 			dev_graft_qdisc(dev_queue, new);
1164 		}
1165 
1166 skip:
1167 		if (!ingress) {
1168 			old = rtnl_dereference(dev->qdisc);
1169 			if (new && !new->ops->attach)
1170 				qdisc_refcount_inc(new);
1171 			rcu_assign_pointer(dev->qdisc, new ? : &noop_qdisc);
1172 
1173 			notify_and_destroy(net, skb, n, classid, old, new, extack);
1174 
1175 			if (new && new->ops->attach)
1176 				new->ops->attach(new);
1177 		}
1178 
1179 		if (dev->flags & IFF_UP)
1180 			dev_activate(dev);
1181 	} else {
1182 		const struct Qdisc_class_ops *cops = parent->ops->cl_ops;
1183 		unsigned long cl;
1184 		int err;
1185 
1186 		/* Only support running class lockless if parent is lockless */
1187 		if (new && (new->flags & TCQ_F_NOLOCK) && !(parent->flags & TCQ_F_NOLOCK))
1188 			qdisc_clear_nolock(new);
1189 
1190 		if (!cops || !cops->graft)
1191 			return -EOPNOTSUPP;
1192 
1193 		cl = cops->find(parent, classid);
1194 		if (!cl) {
1195 			NL_SET_ERR_MSG(extack, "Specified class not found");
1196 			return -ENOENT;
1197 		}
1198 
1199 		if (new && new->ops == &noqueue_qdisc_ops) {
1200 			NL_SET_ERR_MSG(extack, "Cannot assign noqueue to a class");
1201 			return -EINVAL;
1202 		}
1203 
1204 		if (new &&
1205 		    !(parent->flags & TCQ_F_MQROOT) &&
1206 		    rcu_access_pointer(new->stab)) {
1207 			NL_SET_ERR_MSG(extack, "STAB not supported on a non root");
1208 			return -EINVAL;
1209 		}
1210 		err = cops->graft(parent, cl, new, &old, extack);
1211 		if (err)
1212 			return err;
1213 		notify_and_destroy(net, skb, n, classid, old, new, extack);
1214 	}
1215 	return 0;
1216 }
1217 
1218 static int qdisc_block_indexes_set(struct Qdisc *sch, struct nlattr **tca,
1219 				   struct netlink_ext_ack *extack)
1220 {
1221 	u32 block_index;
1222 
1223 	if (tca[TCA_INGRESS_BLOCK]) {
1224 		block_index = nla_get_u32(tca[TCA_INGRESS_BLOCK]);
1225 
1226 		if (!block_index) {
1227 			NL_SET_ERR_MSG(extack, "Ingress block index cannot be 0");
1228 			return -EINVAL;
1229 		}
1230 		if (!sch->ops->ingress_block_set) {
1231 			NL_SET_ERR_MSG(extack, "Ingress block sharing is not supported");
1232 			return -EOPNOTSUPP;
1233 		}
1234 		sch->ops->ingress_block_set(sch, block_index);
1235 	}
1236 	if (tca[TCA_EGRESS_BLOCK]) {
1237 		block_index = nla_get_u32(tca[TCA_EGRESS_BLOCK]);
1238 
1239 		if (!block_index) {
1240 			NL_SET_ERR_MSG(extack, "Egress block index cannot be 0");
1241 			return -EINVAL;
1242 		}
1243 		if (!sch->ops->egress_block_set) {
1244 			NL_SET_ERR_MSG(extack, "Egress block sharing is not supported");
1245 			return -EOPNOTSUPP;
1246 		}
1247 		sch->ops->egress_block_set(sch, block_index);
1248 	}
1249 	return 0;
1250 }
1251 
1252 /*
1253    Allocate and initialize new qdisc.
1254 
1255    Parameters are passed via opt.
1256  */
1257 
1258 static struct Qdisc *qdisc_create(struct net_device *dev,
1259 				  struct netdev_queue *dev_queue,
1260 				  u32 parent, u32 handle,
1261 				  struct nlattr **tca, int *errp,
1262 				  struct netlink_ext_ack *extack)
1263 {
1264 	int err;
1265 	struct nlattr *kind = tca[TCA_KIND];
1266 	struct Qdisc *sch;
1267 	struct Qdisc_ops *ops;
1268 	struct qdisc_size_table *stab;
1269 
1270 	ops = qdisc_lookup_ops(kind);
1271 #ifdef CONFIG_MODULES
1272 	if (ops == NULL && kind != NULL) {
1273 		char name[IFNAMSIZ];
1274 		if (nla_strscpy(name, kind, IFNAMSIZ) >= 0) {
1275 			/* We dropped the RTNL semaphore in order to
1276 			 * perform the module load.  So, even if we
1277 			 * succeeded in loading the module we have to
1278 			 * tell the caller to replay the request.  We
1279 			 * indicate this using -EAGAIN.
1280 			 * We replay the request because the device may
1281 			 * go away in the mean time.
1282 			 */
1283 			netdev_unlock_ops(dev);
1284 			rtnl_unlock();
1285 			request_module(NET_SCH_ALIAS_PREFIX "%s", name);
1286 			rtnl_lock();
1287 			netdev_lock_ops(dev);
1288 			ops = qdisc_lookup_ops(kind);
1289 			if (ops != NULL) {
1290 				/* We will try again qdisc_lookup_ops,
1291 				 * so don't keep a reference.
1292 				 */
1293 				module_put(ops->owner);
1294 				err = -EAGAIN;
1295 				goto err_out;
1296 			}
1297 		}
1298 	}
1299 #endif
1300 
1301 	err = -ENOENT;
1302 	if (!ops) {
1303 		NL_SET_ERR_MSG(extack, "Specified qdisc kind is unknown");
1304 		goto err_out;
1305 	}
1306 
1307 	sch = qdisc_alloc(dev_queue, ops, extack);
1308 	if (IS_ERR(sch)) {
1309 		err = PTR_ERR(sch);
1310 		goto err_out2;
1311 	}
1312 
1313 	sch->parent = parent;
1314 
1315 	if (handle == TC_H_INGRESS) {
1316 		if (!(sch->flags & TCQ_F_INGRESS)) {
1317 			NL_SET_ERR_MSG(extack,
1318 				       "Specified parent ID is reserved for ingress and clsact Qdiscs");
1319 			err = -EINVAL;
1320 			goto err_out3;
1321 		}
1322 		handle = TC_H_MAKE(TC_H_INGRESS, 0);
1323 	} else {
1324 		if (handle == 0) {
1325 			handle = qdisc_alloc_handle(dev);
1326 			if (handle == 0) {
1327 				NL_SET_ERR_MSG(extack, "Maximum number of qdisc handles was exceeded");
1328 				err = -ENOSPC;
1329 				goto err_out3;
1330 			}
1331 		}
1332 		if (!netif_is_multiqueue(dev))
1333 			sch->flags |= TCQ_F_ONETXQUEUE;
1334 	}
1335 
1336 	sch->handle = handle;
1337 
1338 	/* This exist to keep backward compatible with a userspace
1339 	 * loophole, what allowed userspace to get IFF_NO_QUEUE
1340 	 * facility on older kernels by setting tx_queue_len=0 (prior
1341 	 * to qdisc init), and then forgot to reinit tx_queue_len
1342 	 * before again attaching a qdisc.
1343 	 */
1344 	if ((dev->priv_flags & IFF_NO_QUEUE) && (dev->tx_queue_len == 0)) {
1345 		WRITE_ONCE(dev->tx_queue_len, DEFAULT_TX_QUEUE_LEN);
1346 		netdev_info(dev, "Caught tx_queue_len zero misconfig\n");
1347 	}
1348 
1349 	err = qdisc_block_indexes_set(sch, tca, extack);
1350 	if (err)
1351 		goto err_out3;
1352 
1353 	if (tca[TCA_STAB]) {
1354 		stab = qdisc_get_stab(tca[TCA_STAB], extack);
1355 		if (IS_ERR(stab)) {
1356 			err = PTR_ERR(stab);
1357 			goto err_out3;
1358 		}
1359 		rcu_assign_pointer(sch->stab, stab);
1360 	}
1361 
1362 	if (ops->init) {
1363 		err = ops->init(sch, tca[TCA_OPTIONS], extack);
1364 		if (err != 0)
1365 			goto err_out4;
1366 	}
1367 
1368 	if (tca[TCA_RATE]) {
1369 		err = -EOPNOTSUPP;
1370 		if (sch->flags & TCQ_F_MQROOT) {
1371 			NL_SET_ERR_MSG(extack, "Cannot attach rate estimator to a multi-queue root qdisc");
1372 			goto err_out4;
1373 		}
1374 
1375 		err = gen_new_estimator(&sch->bstats,
1376 					sch->cpu_bstats,
1377 					&sch->rate_est,
1378 					NULL,
1379 					true,
1380 					tca[TCA_RATE]);
1381 		if (err) {
1382 			NL_SET_ERR_MSG(extack, "Failed to generate new estimator");
1383 			goto err_out4;
1384 		}
1385 	}
1386 
1387 	qdisc_hash_add(sch, false);
1388 	trace_qdisc_create(ops, dev, parent);
1389 
1390 	return sch;
1391 
1392 err_out4:
1393 	/* Even if ops->init() failed, we call ops->destroy()
1394 	 * like qdisc_create_dflt().
1395 	 */
1396 	if (ops->destroy)
1397 		ops->destroy(sch);
1398 	qdisc_put_stab(rtnl_dereference(sch->stab));
1399 err_out3:
1400 	lockdep_unregister_key(&sch->root_lock_key);
1401 	netdev_put(dev, &sch->dev_tracker);
1402 	qdisc_free(sch);
1403 err_out2:
1404 	module_put(ops->owner);
1405 err_out:
1406 	*errp = err;
1407 	return NULL;
1408 }
1409 
1410 static int qdisc_change(struct Qdisc *sch, struct nlattr **tca,
1411 			struct netlink_ext_ack *extack)
1412 {
1413 	struct qdisc_size_table *ostab, *stab = NULL;
1414 	int err = 0;
1415 
1416 	if (tca[TCA_OPTIONS]) {
1417 		if (!sch->ops->change) {
1418 			NL_SET_ERR_MSG(extack, "Change operation not supported by specified qdisc");
1419 			return -EINVAL;
1420 		}
1421 		if (tca[TCA_INGRESS_BLOCK] || tca[TCA_EGRESS_BLOCK]) {
1422 			NL_SET_ERR_MSG(extack, "Change of blocks is not supported");
1423 			return -EOPNOTSUPP;
1424 		}
1425 		err = sch->ops->change(sch, tca[TCA_OPTIONS], extack);
1426 		if (err)
1427 			return err;
1428 	}
1429 
1430 	if (tca[TCA_STAB]) {
1431 		stab = qdisc_get_stab(tca[TCA_STAB], extack);
1432 		if (IS_ERR(stab))
1433 			return PTR_ERR(stab);
1434 	}
1435 
1436 	ostab = rtnl_dereference(sch->stab);
1437 	rcu_assign_pointer(sch->stab, stab);
1438 	qdisc_put_stab(ostab);
1439 
1440 	if (tca[TCA_RATE]) {
1441 		/* NB: ignores errors from replace_estimator
1442 		   because change can't be undone. */
1443 		if (sch->flags & TCQ_F_MQROOT)
1444 			goto out;
1445 		gen_replace_estimator(&sch->bstats,
1446 				      sch->cpu_bstats,
1447 				      &sch->rate_est,
1448 				      NULL,
1449 				      true,
1450 				      tca[TCA_RATE]);
1451 	}
1452 out:
1453 	return 0;
1454 }
1455 
1456 struct check_loop_arg {
1457 	struct qdisc_walker	w;
1458 	struct Qdisc		*p;
1459 	int			depth;
1460 };
1461 
1462 static int check_loop_fn(struct Qdisc *q, unsigned long cl,
1463 			 struct qdisc_walker *w);
1464 
1465 static int check_loop(struct Qdisc *q, struct Qdisc *p, int depth)
1466 {
1467 	struct check_loop_arg	arg;
1468 
1469 	if (q->ops->cl_ops == NULL)
1470 		return 0;
1471 
1472 	arg.w.stop = arg.w.skip = arg.w.count = 0;
1473 	arg.w.fn = check_loop_fn;
1474 	arg.depth = depth;
1475 	arg.p = p;
1476 	q->ops->cl_ops->walk(q, &arg.w);
1477 	return arg.w.stop ? -ELOOP : 0;
1478 }
1479 
1480 static int
1481 check_loop_fn(struct Qdisc *q, unsigned long cl, struct qdisc_walker *w)
1482 {
1483 	struct Qdisc *leaf;
1484 	const struct Qdisc_class_ops *cops = q->ops->cl_ops;
1485 	struct check_loop_arg *arg = (struct check_loop_arg *)w;
1486 
1487 	leaf = cops->leaf(q, cl);
1488 	if (leaf) {
1489 		if (leaf == arg->p || arg->depth > 7)
1490 			return -ELOOP;
1491 		return check_loop(leaf, arg->p, arg->depth + 1);
1492 	}
1493 	return 0;
1494 }
1495 
1496 const struct nla_policy rtm_tca_policy[TCA_MAX + 1] = {
1497 	[TCA_KIND]		= { .type = NLA_STRING },
1498 	[TCA_RATE]		= { .type = NLA_BINARY,
1499 				    .len = sizeof(struct tc_estimator) },
1500 	[TCA_STAB]		= { .type = NLA_NESTED },
1501 	[TCA_DUMP_INVISIBLE]	= { .type = NLA_FLAG },
1502 	[TCA_CHAIN]		= { .type = NLA_U32 },
1503 	[TCA_INGRESS_BLOCK]	= { .type = NLA_U32 },
1504 	[TCA_EGRESS_BLOCK]	= { .type = NLA_U32 },
1505 };
1506 
1507 /*
1508  * Delete/get qdisc.
1509  */
1510 
1511 static int __tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n,
1512 			  struct netlink_ext_ack *extack,
1513 			  struct net_device *dev,
1514 			  struct nlattr *tca[TCA_MAX + 1],
1515 			  struct tcmsg *tcm)
1516 {
1517 	struct net *net = sock_net(skb->sk);
1518 	struct Qdisc *q = NULL;
1519 	struct Qdisc *p = NULL;
1520 	u32 clid;
1521 	int err;
1522 
1523 	clid = tcm->tcm_parent;
1524 	if (clid) {
1525 		if (clid != TC_H_ROOT) {
1526 			if (TC_H_MAJ(clid) != TC_H_MAJ(TC_H_INGRESS)) {
1527 				p = qdisc_lookup(dev, TC_H_MAJ(clid));
1528 				if (!p) {
1529 					NL_SET_ERR_MSG(extack, "Failed to find qdisc with specified classid");
1530 					return -ENOENT;
1531 				}
1532 				q = qdisc_leaf(p, clid);
1533 			} else if (dev_ingress_queue(dev)) {
1534 				q = rtnl_dereference(dev_ingress_queue(dev)->qdisc_sleeping);
1535 			}
1536 		} else {
1537 			q = rtnl_dereference(dev->qdisc);
1538 		}
1539 		if (!q) {
1540 			NL_SET_ERR_MSG(extack, "Cannot find specified qdisc on specified device");
1541 			return -ENOENT;
1542 		}
1543 
1544 		if (tcm->tcm_handle && q->handle != tcm->tcm_handle) {
1545 			NL_SET_ERR_MSG(extack, "Invalid handle");
1546 			return -EINVAL;
1547 		}
1548 	} else {
1549 		q = qdisc_lookup(dev, tcm->tcm_handle);
1550 		if (!q) {
1551 			NL_SET_ERR_MSG(extack, "Failed to find qdisc with specified handle");
1552 			return -ENOENT;
1553 		}
1554 	}
1555 
1556 	if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id)) {
1557 		NL_SET_ERR_MSG(extack, "Invalid qdisc name: must match existing qdisc");
1558 		return -EINVAL;
1559 	}
1560 
1561 	if (n->nlmsg_type == RTM_DELQDISC) {
1562 		if (!clid) {
1563 			NL_SET_ERR_MSG(extack, "Classid cannot be zero");
1564 			return -EINVAL;
1565 		}
1566 		if (q->handle == 0) {
1567 			NL_SET_ERR_MSG(extack, "Cannot delete qdisc with handle of zero");
1568 			return -ENOENT;
1569 		}
1570 		err = qdisc_graft(dev, p, skb, n, clid, NULL, q, extack);
1571 		if (err != 0)
1572 			return err;
1573 	} else {
1574 		qdisc_get_notify(net, skb, n, clid, q, NULL);
1575 	}
1576 	return 0;
1577 }
1578 
1579 static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n,
1580 			struct netlink_ext_ack *extack)
1581 {
1582 	struct net *net = sock_net(skb->sk);
1583 	struct tcmsg *tcm = nlmsg_data(n);
1584 	struct nlattr *tca[TCA_MAX + 1];
1585 	struct net_device *dev;
1586 	int err;
1587 
1588 	err = nlmsg_parse_deprecated(n, sizeof(*tcm), tca, TCA_MAX,
1589 				     rtm_tca_policy, extack);
1590 	if (err < 0)
1591 		return err;
1592 
1593 	dev = __dev_get_by_index(net, tcm->tcm_ifindex);
1594 	if (!dev)
1595 		return -ENODEV;
1596 
1597 	netdev_lock_ops(dev);
1598 	err = __tc_get_qdisc(skb, n, extack, dev, tca, tcm);
1599 	netdev_unlock_ops(dev);
1600 
1601 	return err;
1602 }
1603 
1604 static bool req_create_or_replace(struct nlmsghdr *n)
1605 {
1606 	return (n->nlmsg_flags & NLM_F_CREATE &&
1607 		n->nlmsg_flags & NLM_F_REPLACE);
1608 }
1609 
1610 static bool req_create_exclusive(struct nlmsghdr *n)
1611 {
1612 	return (n->nlmsg_flags & NLM_F_CREATE &&
1613 		n->nlmsg_flags & NLM_F_EXCL);
1614 }
1615 
1616 static bool req_change(struct nlmsghdr *n)
1617 {
1618 	return (!(n->nlmsg_flags & NLM_F_CREATE) &&
1619 		!(n->nlmsg_flags & NLM_F_REPLACE) &&
1620 		!(n->nlmsg_flags & NLM_F_EXCL));
1621 }
1622 
1623 static int __tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n,
1624 			     struct netlink_ext_ack *extack,
1625 			     struct net_device *dev,
1626 			     struct nlattr *tca[TCA_MAX + 1],
1627 			     struct tcmsg *tcm,
1628 			     bool *replay)
1629 {
1630 	struct Qdisc *q = NULL;
1631 	struct Qdisc *p = NULL;
1632 	u32 clid;
1633 	int err;
1634 
1635 	clid = tcm->tcm_parent;
1636 
1637 	if (clid) {
1638 		if (clid != TC_H_ROOT) {
1639 			if (clid != TC_H_INGRESS) {
1640 				p = qdisc_lookup(dev, TC_H_MAJ(clid));
1641 				if (!p) {
1642 					NL_SET_ERR_MSG(extack, "Failed to find specified qdisc");
1643 					return -ENOENT;
1644 				}
1645 				q = qdisc_leaf(p, clid);
1646 			} else if (dev_ingress_queue_create(dev)) {
1647 				q = rtnl_dereference(dev_ingress_queue(dev)->qdisc_sleeping);
1648 			}
1649 		} else {
1650 			q = rtnl_dereference(dev->qdisc);
1651 		}
1652 
1653 		/* It may be default qdisc, ignore it */
1654 		if (q && q->handle == 0)
1655 			q = NULL;
1656 
1657 		if (!q || !tcm->tcm_handle || q->handle != tcm->tcm_handle) {
1658 			if (tcm->tcm_handle) {
1659 				if (q && !(n->nlmsg_flags & NLM_F_REPLACE)) {
1660 					NL_SET_ERR_MSG(extack, "NLM_F_REPLACE needed to override");
1661 					return -EEXIST;
1662 				}
1663 				if (TC_H_MIN(tcm->tcm_handle)) {
1664 					NL_SET_ERR_MSG(extack, "Invalid minor handle");
1665 					return -EINVAL;
1666 				}
1667 				q = qdisc_lookup(dev, tcm->tcm_handle);
1668 				if (!q)
1669 					goto create_n_graft;
1670 				if (q->parent != tcm->tcm_parent) {
1671 					NL_SET_ERR_MSG(extack, "Cannot move an existing qdisc to a different parent");
1672 					return -EINVAL;
1673 				}
1674 				if (n->nlmsg_flags & NLM_F_EXCL) {
1675 					NL_SET_ERR_MSG(extack, "Exclusivity flag on, cannot override");
1676 					return -EEXIST;
1677 				}
1678 				if (tca[TCA_KIND] &&
1679 				    nla_strcmp(tca[TCA_KIND], q->ops->id)) {
1680 					NL_SET_ERR_MSG(extack, "Invalid qdisc name: must match existing qdisc");
1681 					return -EINVAL;
1682 				}
1683 				if (q->flags & TCQ_F_INGRESS) {
1684 					NL_SET_ERR_MSG(extack,
1685 						       "Cannot regraft ingress or clsact Qdiscs");
1686 					return -EINVAL;
1687 				}
1688 				if (q == p ||
1689 				    (p && check_loop(q, p, 0))) {
1690 					NL_SET_ERR_MSG(extack, "Qdisc parent/child loop detected");
1691 					return -ELOOP;
1692 				}
1693 				if (clid == TC_H_INGRESS) {
1694 					NL_SET_ERR_MSG(extack, "Ingress cannot graft directly");
1695 					return -EINVAL;
1696 				}
1697 				qdisc_refcount_inc(q);
1698 				goto graft;
1699 			} else {
1700 				if (!q)
1701 					goto create_n_graft;
1702 
1703 				/* This magic test requires explanation.
1704 				 *
1705 				 *   We know, that some child q is already
1706 				 *   attached to this parent and have choice:
1707 				 *   1) change it or 2) create/graft new one.
1708 				 *   If the requested qdisc kind is different
1709 				 *   than the existing one, then we choose graft.
1710 				 *   If they are the same then this is "change"
1711 				 *   operation - just let it fallthrough..
1712 				 *
1713 				 *   1. We are allowed to create/graft only
1714 				 *   if the request is explicitly stating
1715 				 *   "please create if it doesn't exist".
1716 				 *
1717 				 *   2. If the request is to exclusive create
1718 				 *   then the qdisc tcm_handle is not expected
1719 				 *   to exist, so that we choose create/graft too.
1720 				 *
1721 				 *   3. The last case is when no flags are set.
1722 				 *   This will happen when for example tc
1723 				 *   utility issues a "change" command.
1724 				 *   Alas, it is sort of hole in API, we
1725 				 *   cannot decide what to do unambiguously.
1726 				 *   For now we select create/graft.
1727 				 */
1728 				if (tca[TCA_KIND] &&
1729 				    nla_strcmp(tca[TCA_KIND], q->ops->id)) {
1730 					if (req_create_or_replace(n) ||
1731 					    req_create_exclusive(n))
1732 						goto create_n_graft;
1733 					else if (req_change(n))
1734 						goto create_n_graft2;
1735 				}
1736 			}
1737 		}
1738 	} else {
1739 		if (!tcm->tcm_handle) {
1740 			NL_SET_ERR_MSG(extack, "Handle cannot be zero");
1741 			return -EINVAL;
1742 		}
1743 		q = qdisc_lookup(dev, tcm->tcm_handle);
1744 	}
1745 
1746 	/* Change qdisc parameters */
1747 	if (!q) {
1748 		NL_SET_ERR_MSG(extack, "Specified qdisc not found");
1749 		return -ENOENT;
1750 	}
1751 	if (n->nlmsg_flags & NLM_F_EXCL) {
1752 		NL_SET_ERR_MSG(extack, "Exclusivity flag on, cannot modify");
1753 		return -EEXIST;
1754 	}
1755 	if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id)) {
1756 		NL_SET_ERR_MSG(extack, "Invalid qdisc name: must match existing qdisc");
1757 		return -EINVAL;
1758 	}
1759 	err = qdisc_change(q, tca, extack);
1760 	if (err == 0)
1761 		qdisc_notify(sock_net(skb->sk), skb, n, clid, NULL, q, extack);
1762 	return err;
1763 
1764 create_n_graft:
1765 	if (!(n->nlmsg_flags & NLM_F_CREATE)) {
1766 		NL_SET_ERR_MSG(extack, "Qdisc not found. To create specify NLM_F_CREATE flag");
1767 		return -ENOENT;
1768 	}
1769 create_n_graft2:
1770 	if (clid == TC_H_INGRESS) {
1771 		if (dev_ingress_queue(dev)) {
1772 			q = qdisc_create(dev, dev_ingress_queue(dev),
1773 					 tcm->tcm_parent, tcm->tcm_parent,
1774 					 tca, &err, extack);
1775 		} else {
1776 			NL_SET_ERR_MSG(extack, "Cannot find ingress queue for specified device");
1777 			err = -ENOENT;
1778 		}
1779 	} else {
1780 		struct netdev_queue *dev_queue;
1781 
1782 		if (p && p->ops->cl_ops && p->ops->cl_ops->select_queue)
1783 			dev_queue = p->ops->cl_ops->select_queue(p, tcm);
1784 		else if (p)
1785 			dev_queue = p->dev_queue;
1786 		else
1787 			dev_queue = netdev_get_tx_queue(dev, 0);
1788 
1789 		q = qdisc_create(dev, dev_queue,
1790 				 tcm->tcm_parent, tcm->tcm_handle,
1791 				 tca, &err, extack);
1792 	}
1793 	if (q == NULL) {
1794 		if (err == -EAGAIN) {
1795 			*replay = true;
1796 			return 0;
1797 		}
1798 		return err;
1799 	}
1800 
1801 graft:
1802 	err = qdisc_graft(dev, p, skb, n, clid, q, NULL, extack);
1803 	if (err) {
1804 		if (q)
1805 			qdisc_put(q);
1806 		return err;
1807 	}
1808 
1809 	return 0;
1810 }
1811 
1812 /*
1813  * Create/change qdisc.
1814  */
1815 static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n,
1816 			   struct netlink_ext_ack *extack)
1817 {
1818 	struct net *net = sock_net(skb->sk);
1819 	struct nlattr *tca[TCA_MAX + 1];
1820 	struct net_device *dev;
1821 	struct tcmsg *tcm;
1822 	bool replay;
1823 	int err;
1824 
1825 replay:
1826 	/* Reinit, just in case something touches this. */
1827 	err = nlmsg_parse_deprecated(n, sizeof(*tcm), tca, TCA_MAX,
1828 				     rtm_tca_policy, extack);
1829 	if (err < 0)
1830 		return err;
1831 
1832 	tcm = nlmsg_data(n);
1833 	dev = __dev_get_by_index(net, tcm->tcm_ifindex);
1834 	if (!dev)
1835 		return -ENODEV;
1836 
1837 	replay = false;
1838 	netdev_lock_ops(dev);
1839 	err = __tc_modify_qdisc(skb, n, extack, dev, tca, tcm, &replay);
1840 	netdev_unlock_ops(dev);
1841 	if (replay)
1842 		goto replay;
1843 
1844 	return err;
1845 }
1846 
1847 static int tc_dump_qdisc_root(struct Qdisc *root, struct sk_buff *skb,
1848 			      struct netlink_callback *cb,
1849 			      int *q_idx_p, int s_q_idx, bool recur,
1850 			      bool dump_invisible)
1851 {
1852 	int ret = 0, q_idx = *q_idx_p;
1853 	struct Qdisc *q;
1854 	int b;
1855 
1856 	if (!root)
1857 		return 0;
1858 
1859 	q = root;
1860 	if (q_idx < s_q_idx) {
1861 		q_idx++;
1862 	} else {
1863 		if (!tc_qdisc_dump_ignore(q, dump_invisible) &&
1864 		    tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).portid,
1865 				  cb->nlh->nlmsg_seq, NLM_F_MULTI,
1866 				  RTM_NEWQDISC, NULL) <= 0)
1867 			goto done;
1868 		q_idx++;
1869 	}
1870 
1871 	/* If dumping singletons, there is no qdisc_dev(root) and the singleton
1872 	 * itself has already been dumped.
1873 	 *
1874 	 * If we've already dumped the top-level (ingress) qdisc above and the global
1875 	 * qdisc hashtable, we don't want to hit it again
1876 	 */
1877 	if (!qdisc_dev(root) || !recur)
1878 		goto out;
1879 
1880 	hash_for_each(qdisc_dev(root)->qdisc_hash, b, q, hash) {
1881 		if (q_idx < s_q_idx) {
1882 			q_idx++;
1883 			continue;
1884 		}
1885 		if (!tc_qdisc_dump_ignore(q, dump_invisible) &&
1886 		    tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).portid,
1887 				  cb->nlh->nlmsg_seq, NLM_F_MULTI,
1888 				  RTM_NEWQDISC, NULL) <= 0)
1889 			goto done;
1890 		q_idx++;
1891 	}
1892 
1893 out:
1894 	*q_idx_p = q_idx;
1895 	return ret;
1896 done:
1897 	ret = -1;
1898 	goto out;
1899 }
1900 
1901 static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb)
1902 {
1903 	struct net *net = sock_net(skb->sk);
1904 	int idx, q_idx;
1905 	int s_idx, s_q_idx;
1906 	struct net_device *dev;
1907 	const struct nlmsghdr *nlh = cb->nlh;
1908 	struct nlattr *tca[TCA_MAX + 1];
1909 	int err;
1910 
1911 	s_idx = cb->args[0];
1912 	s_q_idx = q_idx = cb->args[1];
1913 
1914 	idx = 0;
1915 	ASSERT_RTNL();
1916 
1917 	err = nlmsg_parse_deprecated(nlh, sizeof(struct tcmsg), tca, TCA_MAX,
1918 				     rtm_tca_policy, cb->extack);
1919 	if (err < 0)
1920 		return err;
1921 
1922 	for_each_netdev(net, dev) {
1923 		struct netdev_queue *dev_queue;
1924 
1925 		if (idx < s_idx)
1926 			goto cont;
1927 		if (idx > s_idx)
1928 			s_q_idx = 0;
1929 		q_idx = 0;
1930 
1931 		netdev_lock_ops(dev);
1932 		if (tc_dump_qdisc_root(rtnl_dereference(dev->qdisc),
1933 				       skb, cb, &q_idx, s_q_idx,
1934 				       true, tca[TCA_DUMP_INVISIBLE]) < 0) {
1935 			netdev_unlock_ops(dev);
1936 			goto done;
1937 		}
1938 
1939 		dev_queue = dev_ingress_queue(dev);
1940 		if (dev_queue &&
1941 		    tc_dump_qdisc_root(rtnl_dereference(dev_queue->qdisc_sleeping),
1942 				       skb, cb, &q_idx, s_q_idx, false,
1943 				       tca[TCA_DUMP_INVISIBLE]) < 0) {
1944 			netdev_unlock_ops(dev);
1945 			goto done;
1946 		}
1947 		netdev_unlock_ops(dev);
1948 
1949 cont:
1950 		idx++;
1951 	}
1952 
1953 done:
1954 	cb->args[0] = idx;
1955 	cb->args[1] = q_idx;
1956 
1957 	return skb->len;
1958 }
1959 
1960 
1961 
1962 /************************************************
1963  *	Traffic classes manipulation.		*
1964  ************************************************/
1965 
1966 static int tc_fill_tclass(struct sk_buff *skb, struct Qdisc *q,
1967 			  unsigned long cl, u32 portid, u32 seq, u16 flags,
1968 			  int event, struct netlink_ext_ack *extack)
1969 {
1970 	struct tcmsg *tcm;
1971 	struct nlmsghdr  *nlh;
1972 	unsigned char *b = skb_tail_pointer(skb);
1973 	struct gnet_dump d;
1974 	const struct Qdisc_class_ops *cl_ops = q->ops->cl_ops;
1975 
1976 	cond_resched();
1977 	nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags);
1978 	if (!nlh)
1979 		goto out_nlmsg_trim;
1980 	tcm = nlmsg_data(nlh);
1981 	tcm->tcm_family = AF_UNSPEC;
1982 	tcm->tcm__pad1 = 0;
1983 	tcm->tcm__pad2 = 0;
1984 	tcm->tcm_ifindex = qdisc_dev(q)->ifindex;
1985 	tcm->tcm_parent = q->handle;
1986 	tcm->tcm_handle = q->handle;
1987 	tcm->tcm_info = 0;
1988 	if (nla_put_string(skb, TCA_KIND, q->ops->id))
1989 		goto nla_put_failure;
1990 	if (cl_ops->dump && cl_ops->dump(q, cl, skb, tcm) < 0)
1991 		goto nla_put_failure;
1992 
1993 	if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, TCA_XSTATS,
1994 					 NULL, &d, TCA_PAD) < 0)
1995 		goto nla_put_failure;
1996 
1997 	if (cl_ops->dump_stats && cl_ops->dump_stats(q, cl, &d) < 0)
1998 		goto nla_put_failure;
1999 
2000 	if (gnet_stats_finish_copy(&d) < 0)
2001 		goto nla_put_failure;
2002 
2003 	if (extack && extack->_msg &&
2004 	    nla_put_string(skb, TCA_EXT_WARN_MSG, extack->_msg))
2005 		goto out_nlmsg_trim;
2006 
2007 	nlh->nlmsg_len = skb_tail_pointer(skb) - b;
2008 
2009 	return skb->len;
2010 
2011 out_nlmsg_trim:
2012 nla_put_failure:
2013 	nlmsg_trim(skb, b);
2014 	return -1;
2015 }
2016 
2017 static int tclass_notify(struct net *net, struct sk_buff *oskb,
2018 			 struct nlmsghdr *n, struct Qdisc *q,
2019 			 unsigned long cl, int event, struct netlink_ext_ack *extack)
2020 {
2021 	struct sk_buff *skb;
2022 	u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
2023 
2024 	if (!rtnl_notify_needed(net, n->nlmsg_flags, RTNLGRP_TC))
2025 		return 0;
2026 
2027 	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2028 	if (!skb)
2029 		return -ENOBUFS;
2030 
2031 	if (tc_fill_tclass(skb, q, cl, portid, n->nlmsg_seq, 0, event, extack) < 0) {
2032 		kfree_skb(skb);
2033 		return -EINVAL;
2034 	}
2035 
2036 	return rtnetlink_send(skb, net, portid, RTNLGRP_TC,
2037 			      n->nlmsg_flags & NLM_F_ECHO);
2038 }
2039 
2040 static int tclass_get_notify(struct net *net, struct sk_buff *oskb,
2041 			     struct nlmsghdr *n, struct Qdisc *q,
2042 			     unsigned long cl, struct netlink_ext_ack *extack)
2043 {
2044 	struct sk_buff *skb;
2045 	u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
2046 
2047 	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2048 	if (!skb)
2049 		return -ENOBUFS;
2050 
2051 	if (tc_fill_tclass(skb, q, cl, portid, n->nlmsg_seq, 0, RTM_NEWTCLASS,
2052 			   extack) < 0) {
2053 		kfree_skb(skb);
2054 		return -EINVAL;
2055 	}
2056 
2057 	return rtnetlink_send(skb, net, portid, RTNLGRP_TC,
2058 			      n->nlmsg_flags & NLM_F_ECHO);
2059 }
2060 
2061 static int tclass_del_notify(struct net *net,
2062 			     const struct Qdisc_class_ops *cops,
2063 			     struct sk_buff *oskb, struct nlmsghdr *n,
2064 			     struct Qdisc *q, unsigned long cl,
2065 			     struct netlink_ext_ack *extack)
2066 {
2067 	u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
2068 	struct sk_buff *skb;
2069 	int err = 0;
2070 
2071 	if (!cops->delete)
2072 		return -EOPNOTSUPP;
2073 
2074 	if (rtnl_notify_needed(net, n->nlmsg_flags, RTNLGRP_TC)) {
2075 		skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2076 		if (!skb)
2077 			return -ENOBUFS;
2078 
2079 		if (tc_fill_tclass(skb, q, cl, portid, n->nlmsg_seq, 0,
2080 				   RTM_DELTCLASS, extack) < 0) {
2081 			kfree_skb(skb);
2082 			return -EINVAL;
2083 		}
2084 	} else {
2085 		skb = NULL;
2086 	}
2087 
2088 	err = cops->delete(q, cl, extack);
2089 	if (err) {
2090 		kfree_skb(skb);
2091 		return err;
2092 	}
2093 
2094 	err = rtnetlink_maybe_send(skb, net, portid, RTNLGRP_TC,
2095 				   n->nlmsg_flags & NLM_F_ECHO);
2096 	return err;
2097 }
2098 
2099 #ifdef CONFIG_NET_CLS
2100 
2101 struct tcf_bind_args {
2102 	struct tcf_walker w;
2103 	unsigned long base;
2104 	unsigned long cl;
2105 	u32 classid;
2106 };
2107 
2108 static int tcf_node_bind(struct tcf_proto *tp, void *n, struct tcf_walker *arg)
2109 {
2110 	struct tcf_bind_args *a = (void *)arg;
2111 
2112 	if (n && tp->ops->bind_class) {
2113 		struct Qdisc *q = tcf_block_q(tp->chain->block);
2114 
2115 		sch_tree_lock(q);
2116 		tp->ops->bind_class(n, a->classid, a->cl, q, a->base);
2117 		sch_tree_unlock(q);
2118 	}
2119 	return 0;
2120 }
2121 
2122 struct tc_bind_class_args {
2123 	struct qdisc_walker w;
2124 	unsigned long new_cl;
2125 	u32 portid;
2126 	u32 clid;
2127 };
2128 
2129 static int tc_bind_class_walker(struct Qdisc *q, unsigned long cl,
2130 				struct qdisc_walker *w)
2131 {
2132 	struct tc_bind_class_args *a = (struct tc_bind_class_args *)w;
2133 	const struct Qdisc_class_ops *cops = q->ops->cl_ops;
2134 	struct tcf_block *block;
2135 	struct tcf_chain *chain;
2136 
2137 	block = cops->tcf_block(q, cl, NULL);
2138 	if (!block)
2139 		return 0;
2140 	for (chain = tcf_get_next_chain(block, NULL);
2141 	     chain;
2142 	     chain = tcf_get_next_chain(block, chain)) {
2143 		struct tcf_proto *tp;
2144 
2145 		for (tp = tcf_get_next_proto(chain, NULL);
2146 		     tp; tp = tcf_get_next_proto(chain, tp)) {
2147 			struct tcf_bind_args arg = {};
2148 
2149 			arg.w.fn = tcf_node_bind;
2150 			arg.classid = a->clid;
2151 			arg.base = cl;
2152 			arg.cl = a->new_cl;
2153 			tp->ops->walk(tp, &arg.w, true);
2154 		}
2155 	}
2156 
2157 	return 0;
2158 }
2159 
2160 static void tc_bind_tclass(struct Qdisc *q, u32 portid, u32 clid,
2161 			   unsigned long new_cl)
2162 {
2163 	const struct Qdisc_class_ops *cops = q->ops->cl_ops;
2164 	struct tc_bind_class_args args = {};
2165 
2166 	if (!cops->tcf_block)
2167 		return;
2168 	args.portid = portid;
2169 	args.clid = clid;
2170 	args.new_cl = new_cl;
2171 	args.w.fn = tc_bind_class_walker;
2172 	q->ops->cl_ops->walk(q, &args.w);
2173 }
2174 
2175 #else
2176 
2177 static void tc_bind_tclass(struct Qdisc *q, u32 portid, u32 clid,
2178 			   unsigned long new_cl)
2179 {
2180 }
2181 
2182 #endif
2183 
2184 static int __tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n,
2185 			   struct netlink_ext_ack *extack,
2186 			   struct net_device *dev,
2187 			   struct nlattr *tca[TCA_MAX + 1],
2188 			   struct tcmsg *tcm)
2189 {
2190 	struct net *net = sock_net(skb->sk);
2191 	const struct Qdisc_class_ops *cops;
2192 	struct Qdisc *q = NULL;
2193 	unsigned long cl = 0;
2194 	unsigned long new_cl;
2195 	u32 portid;
2196 	u32 clid;
2197 	u32 qid;
2198 	int err;
2199 
2200 	/*
2201 	   parent == TC_H_UNSPEC - unspecified parent.
2202 	   parent == TC_H_ROOT   - class is root, which has no parent.
2203 	   parent == X:0	 - parent is root class.
2204 	   parent == X:Y	 - parent is a node in hierarchy.
2205 	   parent == 0:Y	 - parent is X:Y, where X:0 is qdisc.
2206 
2207 	   handle == 0:0	 - generate handle from kernel pool.
2208 	   handle == 0:Y	 - class is X:Y, where X:0 is qdisc.
2209 	   handle == X:Y	 - clear.
2210 	   handle == X:0	 - root class.
2211 	 */
2212 
2213 	/* Step 1. Determine qdisc handle X:0 */
2214 
2215 	portid = tcm->tcm_parent;
2216 	clid = tcm->tcm_handle;
2217 	qid = TC_H_MAJ(clid);
2218 
2219 	if (portid != TC_H_ROOT) {
2220 		u32 qid1 = TC_H_MAJ(portid);
2221 
2222 		if (qid && qid1) {
2223 			/* If both majors are known, they must be identical. */
2224 			if (qid != qid1)
2225 				return -EINVAL;
2226 		} else if (qid1) {
2227 			qid = qid1;
2228 		} else if (qid == 0)
2229 			qid = rtnl_dereference(dev->qdisc)->handle;
2230 
2231 		/* Now qid is genuine qdisc handle consistent
2232 		 * both with parent and child.
2233 		 *
2234 		 * TC_H_MAJ(portid) still may be unspecified, complete it now.
2235 		 */
2236 		if (portid)
2237 			portid = TC_H_MAKE(qid, portid);
2238 	} else {
2239 		if (qid == 0)
2240 			qid = rtnl_dereference(dev->qdisc)->handle;
2241 	}
2242 
2243 	/* OK. Locate qdisc */
2244 	q = qdisc_lookup(dev, qid);
2245 	if (!q)
2246 		return -ENOENT;
2247 
2248 	/* An check that it supports classes */
2249 	cops = q->ops->cl_ops;
2250 	if (cops == NULL)
2251 		return -EINVAL;
2252 
2253 	/* Now try to get class */
2254 	if (clid == 0) {
2255 		if (portid == TC_H_ROOT)
2256 			clid = qid;
2257 	} else
2258 		clid = TC_H_MAKE(qid, clid);
2259 
2260 	if (clid)
2261 		cl = cops->find(q, clid);
2262 
2263 	if (cl == 0) {
2264 		err = -ENOENT;
2265 		if (n->nlmsg_type != RTM_NEWTCLASS ||
2266 		    !(n->nlmsg_flags & NLM_F_CREATE))
2267 			goto out;
2268 	} else {
2269 		switch (n->nlmsg_type) {
2270 		case RTM_NEWTCLASS:
2271 			err = -EEXIST;
2272 			if (n->nlmsg_flags & NLM_F_EXCL)
2273 				goto out;
2274 			break;
2275 		case RTM_DELTCLASS:
2276 			err = tclass_del_notify(net, cops, skb, n, q, cl, extack);
2277 			/* Unbind the class with flilters with 0 */
2278 			tc_bind_tclass(q, portid, clid, 0);
2279 			goto out;
2280 		case RTM_GETTCLASS:
2281 			err = tclass_get_notify(net, skb, n, q, cl, extack);
2282 			goto out;
2283 		default:
2284 			err = -EINVAL;
2285 			goto out;
2286 		}
2287 	}
2288 
2289 	if (tca[TCA_INGRESS_BLOCK] || tca[TCA_EGRESS_BLOCK]) {
2290 		NL_SET_ERR_MSG(extack, "Shared blocks are not supported for classes");
2291 		return -EOPNOTSUPP;
2292 	}
2293 
2294 	/* Prevent creation of traffic classes with classid TC_H_ROOT */
2295 	if (clid == TC_H_ROOT) {
2296 		NL_SET_ERR_MSG(extack, "Cannot create traffic class with classid TC_H_ROOT");
2297 		return -EINVAL;
2298 	}
2299 
2300 	new_cl = cl;
2301 	err = -EOPNOTSUPP;
2302 	if (cops->change)
2303 		err = cops->change(q, clid, portid, tca, &new_cl, extack);
2304 	if (err == 0) {
2305 		tclass_notify(net, skb, n, q, new_cl, RTM_NEWTCLASS, extack);
2306 		/* We just create a new class, need to do reverse binding. */
2307 		if (cl != new_cl)
2308 			tc_bind_tclass(q, portid, clid, new_cl);
2309 	}
2310 out:
2311 	return err;
2312 }
2313 
2314 static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n,
2315 			 struct netlink_ext_ack *extack)
2316 {
2317 	struct net *net = sock_net(skb->sk);
2318 	struct tcmsg *tcm = nlmsg_data(n);
2319 	struct nlattr *tca[TCA_MAX + 1];
2320 	struct net_device *dev;
2321 	int err;
2322 
2323 	err = nlmsg_parse_deprecated(n, sizeof(*tcm), tca, TCA_MAX,
2324 				     rtm_tca_policy, extack);
2325 	if (err < 0)
2326 		return err;
2327 
2328 	dev = __dev_get_by_index(net, tcm->tcm_ifindex);
2329 	if (!dev)
2330 		return -ENODEV;
2331 
2332 	netdev_lock_ops(dev);
2333 	err = __tc_ctl_tclass(skb, n, extack, dev, tca, tcm);
2334 	netdev_unlock_ops(dev);
2335 
2336 	return err;
2337 }
2338 
2339 struct qdisc_dump_args {
2340 	struct qdisc_walker	w;
2341 	struct sk_buff		*skb;
2342 	struct netlink_callback	*cb;
2343 };
2344 
2345 static int qdisc_class_dump(struct Qdisc *q, unsigned long cl,
2346 			    struct qdisc_walker *arg)
2347 {
2348 	struct qdisc_dump_args *a = (struct qdisc_dump_args *)arg;
2349 
2350 	return tc_fill_tclass(a->skb, q, cl, NETLINK_CB(a->cb->skb).portid,
2351 			      a->cb->nlh->nlmsg_seq, NLM_F_MULTI,
2352 			      RTM_NEWTCLASS, NULL);
2353 }
2354 
2355 static int tc_dump_tclass_qdisc(struct Qdisc *q, struct sk_buff *skb,
2356 				struct tcmsg *tcm, struct netlink_callback *cb,
2357 				int *t_p, int s_t)
2358 {
2359 	struct qdisc_dump_args arg;
2360 
2361 	if (tc_qdisc_dump_ignore(q, false) ||
2362 	    *t_p < s_t || !q->ops->cl_ops ||
2363 	    (tcm->tcm_parent &&
2364 	     TC_H_MAJ(tcm->tcm_parent) != q->handle)) {
2365 		(*t_p)++;
2366 		return 0;
2367 	}
2368 	if (*t_p > s_t)
2369 		memset(&cb->args[1], 0, sizeof(cb->args)-sizeof(cb->args[0]));
2370 	arg.w.fn = qdisc_class_dump;
2371 	arg.skb = skb;
2372 	arg.cb = cb;
2373 	arg.w.stop  = 0;
2374 	arg.w.skip = cb->args[1];
2375 	arg.w.count = 0;
2376 	q->ops->cl_ops->walk(q, &arg.w);
2377 	cb->args[1] = arg.w.count;
2378 	if (arg.w.stop)
2379 		return -1;
2380 	(*t_p)++;
2381 	return 0;
2382 }
2383 
2384 static int tc_dump_tclass_root(struct Qdisc *root, struct sk_buff *skb,
2385 			       struct tcmsg *tcm, struct netlink_callback *cb,
2386 			       int *t_p, int s_t, bool recur)
2387 {
2388 	struct Qdisc *q;
2389 	int b;
2390 
2391 	if (!root)
2392 		return 0;
2393 
2394 	if (tc_dump_tclass_qdisc(root, skb, tcm, cb, t_p, s_t) < 0)
2395 		return -1;
2396 
2397 	if (!qdisc_dev(root) || !recur)
2398 		return 0;
2399 
2400 	if (tcm->tcm_parent) {
2401 		q = qdisc_match_from_root(root, TC_H_MAJ(tcm->tcm_parent));
2402 		if (q && q != root &&
2403 		    tc_dump_tclass_qdisc(q, skb, tcm, cb, t_p, s_t) < 0)
2404 			return -1;
2405 		return 0;
2406 	}
2407 	hash_for_each(qdisc_dev(root)->qdisc_hash, b, q, hash) {
2408 		if (tc_dump_tclass_qdisc(q, skb, tcm, cb, t_p, s_t) < 0)
2409 			return -1;
2410 	}
2411 
2412 	return 0;
2413 }
2414 
2415 static int __tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb,
2416 			    struct tcmsg *tcm, struct net_device *dev)
2417 {
2418 	struct netdev_queue *dev_queue;
2419 	int t, s_t;
2420 
2421 	s_t = cb->args[0];
2422 	t = 0;
2423 
2424 	if (tc_dump_tclass_root(rtnl_dereference(dev->qdisc),
2425 				skb, tcm, cb, &t, s_t, true) < 0)
2426 		goto done;
2427 
2428 	dev_queue = dev_ingress_queue(dev);
2429 	if (dev_queue &&
2430 	    tc_dump_tclass_root(rtnl_dereference(dev_queue->qdisc_sleeping),
2431 				skb, tcm, cb, &t, s_t, false) < 0)
2432 		goto done;
2433 
2434 done:
2435 	cb->args[0] = t;
2436 
2437 	return skb->len;
2438 }
2439 
2440 static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb)
2441 {
2442 	struct tcmsg *tcm = nlmsg_data(cb->nlh);
2443 	struct net *net = sock_net(skb->sk);
2444 	struct net_device *dev;
2445 	int err;
2446 
2447 	if (nlmsg_len(cb->nlh) < sizeof(*tcm))
2448 		return 0;
2449 
2450 	dev = dev_get_by_index(net, tcm->tcm_ifindex);
2451 	if (!dev)
2452 		return 0;
2453 
2454 	netdev_lock_ops(dev);
2455 	err = __tc_dump_tclass(skb, cb, tcm, dev);
2456 	netdev_unlock_ops(dev);
2457 
2458 	dev_put(dev);
2459 
2460 	return err;
2461 }
2462 
2463 #ifdef CONFIG_PROC_FS
2464 static int psched_show(struct seq_file *seq, void *v)
2465 {
2466 	seq_printf(seq, "%08x %08x %08x %08x\n",
2467 		   (u32)NSEC_PER_USEC, (u32)PSCHED_TICKS2NS(1),
2468 		   1000000,
2469 		   (u32)NSEC_PER_SEC / hrtimer_resolution);
2470 
2471 	return 0;
2472 }
2473 
2474 static int __net_init psched_net_init(struct net *net)
2475 {
2476 	struct proc_dir_entry *e;
2477 
2478 	e = proc_create_single("psched", 0, net->proc_net, psched_show);
2479 	if (e == NULL)
2480 		return -ENOMEM;
2481 
2482 	return 0;
2483 }
2484 
2485 static void __net_exit psched_net_exit(struct net *net)
2486 {
2487 	remove_proc_entry("psched", net->proc_net);
2488 }
2489 #else
2490 static int __net_init psched_net_init(struct net *net)
2491 {
2492 	return 0;
2493 }
2494 
2495 static void __net_exit psched_net_exit(struct net *net)
2496 {
2497 }
2498 #endif
2499 
2500 static struct pernet_operations psched_net_ops = {
2501 	.init = psched_net_init,
2502 	.exit = psched_net_exit,
2503 };
2504 
2505 #if IS_ENABLED(CONFIG_MITIGATION_RETPOLINE)
2506 DEFINE_STATIC_KEY_FALSE(tc_skip_wrapper);
2507 #endif
2508 
2509 static const struct rtnl_msg_handler psched_rtnl_msg_handlers[] __initconst = {
2510 	{.msgtype = RTM_NEWQDISC, .doit = tc_modify_qdisc},
2511 	{.msgtype = RTM_DELQDISC, .doit = tc_get_qdisc},
2512 	{.msgtype = RTM_GETQDISC, .doit = tc_get_qdisc,
2513 	 .dumpit = tc_dump_qdisc},
2514 	{.msgtype = RTM_NEWTCLASS, .doit = tc_ctl_tclass},
2515 	{.msgtype = RTM_DELTCLASS, .doit = tc_ctl_tclass},
2516 	{.msgtype = RTM_GETTCLASS, .doit = tc_ctl_tclass,
2517 	 .dumpit = tc_dump_tclass},
2518 };
2519 
2520 static int __init pktsched_init(void)
2521 {
2522 	int err;
2523 
2524 	err = register_pernet_subsys(&psched_net_ops);
2525 	if (err) {
2526 		pr_err("pktsched_init: "
2527 		       "cannot initialize per netns operations\n");
2528 		return err;
2529 	}
2530 
2531 	register_qdisc(&pfifo_fast_ops);
2532 	register_qdisc(&pfifo_qdisc_ops);
2533 	register_qdisc(&bfifo_qdisc_ops);
2534 	register_qdisc(&pfifo_head_drop_qdisc_ops);
2535 	register_qdisc(&mq_qdisc_ops);
2536 	register_qdisc(&noqueue_qdisc_ops);
2537 
2538 	rtnl_register_many(psched_rtnl_msg_handlers);
2539 
2540 	tc_wrapper_init();
2541 
2542 	return 0;
2543 }
2544 
2545 subsys_initcall(pktsched_init);
2546