xref: /linux/net/sched/sch_api.c (revision 83a37b3292f4aca799b355179ad6fbdd78a08e10)
1 /*
2  * net/sched/sch_api.c	Packet scheduler API.
3  *
4  *		This program is free software; you can redistribute it and/or
5  *		modify it under the terms of the GNU General Public License
6  *		as published by the Free Software Foundation; either version
7  *		2 of the License, or (at your option) any later version.
8  *
9  * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
10  *
11  * Fixes:
12  *
13  * Rani Assaf <rani@magic.metawire.com> :980802: JIFFIES and CPU clock sources are repaired.
14  * Eduardo J. Blanco <ejbs@netlabs.com.uy> :990222: kmod support
15  * Jamal Hadi Salim <hadi@nortelnetworks.com>: 990601: ingress support
16  */
17 
18 #include <linux/module.h>
19 #include <linux/types.h>
20 #include <linux/kernel.h>
21 #include <linux/string.h>
22 #include <linux/errno.h>
23 #include <linux/skbuff.h>
24 #include <linux/init.h>
25 #include <linux/proc_fs.h>
26 #include <linux/seq_file.h>
27 #include <linux/kmod.h>
28 #include <linux/list.h>
29 #include <linux/hrtimer.h>
30 #include <linux/lockdep.h>
31 #include <linux/slab.h>
32 #include <linux/hashtable.h>
33 
34 #include <net/net_namespace.h>
35 #include <net/sock.h>
36 #include <net/netlink.h>
37 #include <net/pkt_sched.h>
38 #include <net/pkt_cls.h>
39 
40 /*
41 
42    Short review.
43    -------------
44 
45    This file consists of two interrelated parts:
46 
47    1. queueing disciplines manager frontend.
48    2. traffic classes manager frontend.
49 
50    Generally, queueing discipline ("qdisc") is a black box,
51    which is able to enqueue packets and to dequeue them (when
52    device is ready to send something) in order and at times
53    determined by algorithm hidden in it.
54 
55    qdisc's are divided to two categories:
56    - "queues", which have no internal structure visible from outside.
57    - "schedulers", which split all the packets to "traffic classes",
58      using "packet classifiers" (look at cls_api.c)
59 
60    In turn, classes may have child qdiscs (as rule, queues)
61    attached to them etc. etc. etc.
62 
63    The goal of the routines in this file is to translate
64    information supplied by user in the form of handles
65    to more intelligible for kernel form, to make some sanity
66    checks and part of work, which is common to all qdiscs
67    and to provide rtnetlink notifications.
68 
69    All real intelligent work is done inside qdisc modules.
70 
71 
72 
73    Every discipline has two major routines: enqueue and dequeue.
74 
75    ---dequeue
76 
77    dequeue usually returns a skb to send. It is allowed to return NULL,
78    but it does not mean that queue is empty, it just means that
79    discipline does not want to send anything this time.
80    Queue is really empty if q->q.qlen == 0.
81    For complicated disciplines with multiple queues q->q is not
82    real packet queue, but however q->q.qlen must be valid.
83 
84    ---enqueue
85 
86    enqueue returns 0, if packet was enqueued successfully.
87    If packet (this one or another one) was dropped, it returns
88    not zero error code.
89    NET_XMIT_DROP 	- this packet dropped
90      Expected action: do not backoff, but wait until queue will clear.
91    NET_XMIT_CN	 	- probably this packet enqueued, but another one dropped.
92      Expected action: backoff or ignore
93 
94    Auxiliary routines:
95 
96    ---peek
97 
98    like dequeue but without removing a packet from the queue
99 
100    ---reset
101 
102    returns qdisc to initial state: purge all buffers, clear all
103    timers, counters (except for statistics) etc.
104 
105    ---init
106 
107    initializes newly created qdisc.
108 
109    ---destroy
110 
111    destroys resources allocated by init and during lifetime of qdisc.
112 
113    ---change
114 
115    changes qdisc parameters.
116  */
117 
118 /* Protects list of registered TC modules. It is pure SMP lock. */
119 static DEFINE_RWLOCK(qdisc_mod_lock);
120 
121 
122 /************************************************
123  *	Queueing disciplines manipulation.	*
124  ************************************************/
125 
126 
127 /* The list of all installed queueing disciplines. */
128 
129 static struct Qdisc_ops *qdisc_base;
130 
131 /* Register/unregister queueing discipline */
132 
133 int register_qdisc(struct Qdisc_ops *qops)
134 {
135 	struct Qdisc_ops *q, **qp;
136 	int rc = -EEXIST;
137 
138 	write_lock(&qdisc_mod_lock);
139 	for (qp = &qdisc_base; (q = *qp) != NULL; qp = &q->next)
140 		if (!strcmp(qops->id, q->id))
141 			goto out;
142 
143 	if (qops->enqueue == NULL)
144 		qops->enqueue = noop_qdisc_ops.enqueue;
145 	if (qops->peek == NULL) {
146 		if (qops->dequeue == NULL)
147 			qops->peek = noop_qdisc_ops.peek;
148 		else
149 			goto out_einval;
150 	}
151 	if (qops->dequeue == NULL)
152 		qops->dequeue = noop_qdisc_ops.dequeue;
153 
154 	if (qops->cl_ops) {
155 		const struct Qdisc_class_ops *cops = qops->cl_ops;
156 
157 		if (!(cops->find && cops->walk && cops->leaf))
158 			goto out_einval;
159 
160 		if (cops->tcf_block && !(cops->bind_tcf && cops->unbind_tcf))
161 			goto out_einval;
162 	}
163 
164 	qops->next = NULL;
165 	*qp = qops;
166 	rc = 0;
167 out:
168 	write_unlock(&qdisc_mod_lock);
169 	return rc;
170 
171 out_einval:
172 	rc = -EINVAL;
173 	goto out;
174 }
175 EXPORT_SYMBOL(register_qdisc);
176 
177 int unregister_qdisc(struct Qdisc_ops *qops)
178 {
179 	struct Qdisc_ops *q, **qp;
180 	int err = -ENOENT;
181 
182 	write_lock(&qdisc_mod_lock);
183 	for (qp = &qdisc_base; (q = *qp) != NULL; qp = &q->next)
184 		if (q == qops)
185 			break;
186 	if (q) {
187 		*qp = q->next;
188 		q->next = NULL;
189 		err = 0;
190 	}
191 	write_unlock(&qdisc_mod_lock);
192 	return err;
193 }
194 EXPORT_SYMBOL(unregister_qdisc);
195 
196 /* Get default qdisc if not otherwise specified */
197 void qdisc_get_default(char *name, size_t len)
198 {
199 	read_lock(&qdisc_mod_lock);
200 	strlcpy(name, default_qdisc_ops->id, len);
201 	read_unlock(&qdisc_mod_lock);
202 }
203 
204 static struct Qdisc_ops *qdisc_lookup_default(const char *name)
205 {
206 	struct Qdisc_ops *q = NULL;
207 
208 	for (q = qdisc_base; q; q = q->next) {
209 		if (!strcmp(name, q->id)) {
210 			if (!try_module_get(q->owner))
211 				q = NULL;
212 			break;
213 		}
214 	}
215 
216 	return q;
217 }
218 
219 /* Set new default qdisc to use */
220 int qdisc_set_default(const char *name)
221 {
222 	const struct Qdisc_ops *ops;
223 
224 	if (!capable(CAP_NET_ADMIN))
225 		return -EPERM;
226 
227 	write_lock(&qdisc_mod_lock);
228 	ops = qdisc_lookup_default(name);
229 	if (!ops) {
230 		/* Not found, drop lock and try to load module */
231 		write_unlock(&qdisc_mod_lock);
232 		request_module("sch_%s", name);
233 		write_lock(&qdisc_mod_lock);
234 
235 		ops = qdisc_lookup_default(name);
236 	}
237 
238 	if (ops) {
239 		/* Set new default */
240 		module_put(default_qdisc_ops->owner);
241 		default_qdisc_ops = ops;
242 	}
243 	write_unlock(&qdisc_mod_lock);
244 
245 	return ops ? 0 : -ENOENT;
246 }
247 
248 #ifdef CONFIG_NET_SCH_DEFAULT
249 /* Set default value from kernel config */
250 static int __init sch_default_qdisc(void)
251 {
252 	return qdisc_set_default(CONFIG_DEFAULT_NET_SCH);
253 }
254 late_initcall(sch_default_qdisc);
255 #endif
256 
257 /* We know handle. Find qdisc among all qdisc's attached to device
258  * (root qdisc, all its children, children of children etc.)
259  * Note: caller either uses rtnl or rcu_read_lock()
260  */
261 
262 static struct Qdisc *qdisc_match_from_root(struct Qdisc *root, u32 handle)
263 {
264 	struct Qdisc *q;
265 
266 	if (!qdisc_dev(root))
267 		return (root->handle == handle ? root : NULL);
268 
269 	if (!(root->flags & TCQ_F_BUILTIN) &&
270 	    root->handle == handle)
271 		return root;
272 
273 	hash_for_each_possible_rcu(qdisc_dev(root)->qdisc_hash, q, hash, handle) {
274 		if (q->handle == handle)
275 			return q;
276 	}
277 	return NULL;
278 }
279 
280 void qdisc_hash_add(struct Qdisc *q, bool invisible)
281 {
282 	if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) {
283 		ASSERT_RTNL();
284 		hash_add_rcu(qdisc_dev(q)->qdisc_hash, &q->hash, q->handle);
285 		if (invisible)
286 			q->flags |= TCQ_F_INVISIBLE;
287 	}
288 }
289 EXPORT_SYMBOL(qdisc_hash_add);
290 
291 void qdisc_hash_del(struct Qdisc *q)
292 {
293 	if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) {
294 		ASSERT_RTNL();
295 		hash_del_rcu(&q->hash);
296 	}
297 }
298 EXPORT_SYMBOL(qdisc_hash_del);
299 
300 struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle)
301 {
302 	struct Qdisc *q;
303 
304 	q = qdisc_match_from_root(dev->qdisc, handle);
305 	if (q)
306 		goto out;
307 
308 	if (dev_ingress_queue(dev))
309 		q = qdisc_match_from_root(
310 			dev_ingress_queue(dev)->qdisc_sleeping,
311 			handle);
312 out:
313 	return q;
314 }
315 
316 static struct Qdisc *qdisc_leaf(struct Qdisc *p, u32 classid)
317 {
318 	unsigned long cl;
319 	struct Qdisc *leaf;
320 	const struct Qdisc_class_ops *cops = p->ops->cl_ops;
321 
322 	if (cops == NULL)
323 		return NULL;
324 	cl = cops->find(p, classid);
325 
326 	if (cl == 0)
327 		return NULL;
328 	leaf = cops->leaf(p, cl);
329 	return leaf;
330 }
331 
332 /* Find queueing discipline by name */
333 
334 static struct Qdisc_ops *qdisc_lookup_ops(struct nlattr *kind)
335 {
336 	struct Qdisc_ops *q = NULL;
337 
338 	if (kind) {
339 		read_lock(&qdisc_mod_lock);
340 		for (q = qdisc_base; q; q = q->next) {
341 			if (nla_strcmp(kind, q->id) == 0) {
342 				if (!try_module_get(q->owner))
343 					q = NULL;
344 				break;
345 			}
346 		}
347 		read_unlock(&qdisc_mod_lock);
348 	}
349 	return q;
350 }
351 
352 /* The linklayer setting were not transferred from iproute2, in older
353  * versions, and the rate tables lookup systems have been dropped in
354  * the kernel. To keep backward compatible with older iproute2 tc
355  * utils, we detect the linklayer setting by detecting if the rate
356  * table were modified.
357  *
358  * For linklayer ATM table entries, the rate table will be aligned to
359  * 48 bytes, thus some table entries will contain the same value.  The
360  * mpu (min packet unit) is also encoded into the old rate table, thus
361  * starting from the mpu, we find low and high table entries for
362  * mapping this cell.  If these entries contain the same value, when
363  * the rate tables have been modified for linklayer ATM.
364  *
365  * This is done by rounding mpu to the nearest 48 bytes cell/entry,
366  * and then roundup to the next cell, calc the table entry one below,
367  * and compare.
368  */
369 static __u8 __detect_linklayer(struct tc_ratespec *r, __u32 *rtab)
370 {
371 	int low       = roundup(r->mpu, 48);
372 	int high      = roundup(low+1, 48);
373 	int cell_low  = low >> r->cell_log;
374 	int cell_high = (high >> r->cell_log) - 1;
375 
376 	/* rtab is too inaccurate at rates > 100Mbit/s */
377 	if ((r->rate > (100000000/8)) || (rtab[0] == 0)) {
378 		pr_debug("TC linklayer: Giving up ATM detection\n");
379 		return TC_LINKLAYER_ETHERNET;
380 	}
381 
382 	if ((cell_high > cell_low) && (cell_high < 256)
383 	    && (rtab[cell_low] == rtab[cell_high])) {
384 		pr_debug("TC linklayer: Detected ATM, low(%d)=high(%d)=%u\n",
385 			 cell_low, cell_high, rtab[cell_high]);
386 		return TC_LINKLAYER_ATM;
387 	}
388 	return TC_LINKLAYER_ETHERNET;
389 }
390 
391 static struct qdisc_rate_table *qdisc_rtab_list;
392 
393 struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r,
394 					struct nlattr *tab)
395 {
396 	struct qdisc_rate_table *rtab;
397 
398 	if (tab == NULL || r->rate == 0 || r->cell_log == 0 ||
399 	    nla_len(tab) != TC_RTAB_SIZE)
400 		return NULL;
401 
402 	for (rtab = qdisc_rtab_list; rtab; rtab = rtab->next) {
403 		if (!memcmp(&rtab->rate, r, sizeof(struct tc_ratespec)) &&
404 		    !memcmp(&rtab->data, nla_data(tab), 1024)) {
405 			rtab->refcnt++;
406 			return rtab;
407 		}
408 	}
409 
410 	rtab = kmalloc(sizeof(*rtab), GFP_KERNEL);
411 	if (rtab) {
412 		rtab->rate = *r;
413 		rtab->refcnt = 1;
414 		memcpy(rtab->data, nla_data(tab), 1024);
415 		if (r->linklayer == TC_LINKLAYER_UNAWARE)
416 			r->linklayer = __detect_linklayer(r, rtab->data);
417 		rtab->next = qdisc_rtab_list;
418 		qdisc_rtab_list = rtab;
419 	}
420 	return rtab;
421 }
422 EXPORT_SYMBOL(qdisc_get_rtab);
423 
424 void qdisc_put_rtab(struct qdisc_rate_table *tab)
425 {
426 	struct qdisc_rate_table *rtab, **rtabp;
427 
428 	if (!tab || --tab->refcnt)
429 		return;
430 
431 	for (rtabp = &qdisc_rtab_list;
432 	     (rtab = *rtabp) != NULL;
433 	     rtabp = &rtab->next) {
434 		if (rtab == tab) {
435 			*rtabp = rtab->next;
436 			kfree(rtab);
437 			return;
438 		}
439 	}
440 }
441 EXPORT_SYMBOL(qdisc_put_rtab);
442 
443 static LIST_HEAD(qdisc_stab_list);
444 
445 static const struct nla_policy stab_policy[TCA_STAB_MAX + 1] = {
446 	[TCA_STAB_BASE]	= { .len = sizeof(struct tc_sizespec) },
447 	[TCA_STAB_DATA] = { .type = NLA_BINARY },
448 };
449 
450 static struct qdisc_size_table *qdisc_get_stab(struct nlattr *opt)
451 {
452 	struct nlattr *tb[TCA_STAB_MAX + 1];
453 	struct qdisc_size_table *stab;
454 	struct tc_sizespec *s;
455 	unsigned int tsize = 0;
456 	u16 *tab = NULL;
457 	int err;
458 
459 	err = nla_parse_nested(tb, TCA_STAB_MAX, opt, stab_policy, NULL);
460 	if (err < 0)
461 		return ERR_PTR(err);
462 	if (!tb[TCA_STAB_BASE])
463 		return ERR_PTR(-EINVAL);
464 
465 	s = nla_data(tb[TCA_STAB_BASE]);
466 
467 	if (s->tsize > 0) {
468 		if (!tb[TCA_STAB_DATA])
469 			return ERR_PTR(-EINVAL);
470 		tab = nla_data(tb[TCA_STAB_DATA]);
471 		tsize = nla_len(tb[TCA_STAB_DATA]) / sizeof(u16);
472 	}
473 
474 	if (tsize != s->tsize || (!tab && tsize > 0))
475 		return ERR_PTR(-EINVAL);
476 
477 	list_for_each_entry(stab, &qdisc_stab_list, list) {
478 		if (memcmp(&stab->szopts, s, sizeof(*s)))
479 			continue;
480 		if (tsize > 0 && memcmp(stab->data, tab, tsize * sizeof(u16)))
481 			continue;
482 		stab->refcnt++;
483 		return stab;
484 	}
485 
486 	stab = kmalloc(sizeof(*stab) + tsize * sizeof(u16), GFP_KERNEL);
487 	if (!stab)
488 		return ERR_PTR(-ENOMEM);
489 
490 	stab->refcnt = 1;
491 	stab->szopts = *s;
492 	if (tsize > 0)
493 		memcpy(stab->data, tab, tsize * sizeof(u16));
494 
495 	list_add_tail(&stab->list, &qdisc_stab_list);
496 
497 	return stab;
498 }
499 
500 static void stab_kfree_rcu(struct rcu_head *head)
501 {
502 	kfree(container_of(head, struct qdisc_size_table, rcu));
503 }
504 
505 void qdisc_put_stab(struct qdisc_size_table *tab)
506 {
507 	if (!tab)
508 		return;
509 
510 	if (--tab->refcnt == 0) {
511 		list_del(&tab->list);
512 		call_rcu_bh(&tab->rcu, stab_kfree_rcu);
513 	}
514 }
515 EXPORT_SYMBOL(qdisc_put_stab);
516 
517 static int qdisc_dump_stab(struct sk_buff *skb, struct qdisc_size_table *stab)
518 {
519 	struct nlattr *nest;
520 
521 	nest = nla_nest_start(skb, TCA_STAB);
522 	if (nest == NULL)
523 		goto nla_put_failure;
524 	if (nla_put(skb, TCA_STAB_BASE, sizeof(stab->szopts), &stab->szopts))
525 		goto nla_put_failure;
526 	nla_nest_end(skb, nest);
527 
528 	return skb->len;
529 
530 nla_put_failure:
531 	return -1;
532 }
533 
534 void __qdisc_calculate_pkt_len(struct sk_buff *skb,
535 			       const struct qdisc_size_table *stab)
536 {
537 	int pkt_len, slot;
538 
539 	pkt_len = skb->len + stab->szopts.overhead;
540 	if (unlikely(!stab->szopts.tsize))
541 		goto out;
542 
543 	slot = pkt_len + stab->szopts.cell_align;
544 	if (unlikely(slot < 0))
545 		slot = 0;
546 
547 	slot >>= stab->szopts.cell_log;
548 	if (likely(slot < stab->szopts.tsize))
549 		pkt_len = stab->data[slot];
550 	else
551 		pkt_len = stab->data[stab->szopts.tsize - 1] *
552 				(slot / stab->szopts.tsize) +
553 				stab->data[slot % stab->szopts.tsize];
554 
555 	pkt_len <<= stab->szopts.size_log;
556 out:
557 	if (unlikely(pkt_len < 1))
558 		pkt_len = 1;
559 	qdisc_skb_cb(skb)->pkt_len = pkt_len;
560 }
561 EXPORT_SYMBOL(__qdisc_calculate_pkt_len);
562 
563 void qdisc_warn_nonwc(const char *txt, struct Qdisc *qdisc)
564 {
565 	if (!(qdisc->flags & TCQ_F_WARN_NONWC)) {
566 		pr_warn("%s: %s qdisc %X: is non-work-conserving?\n",
567 			txt, qdisc->ops->id, qdisc->handle >> 16);
568 		qdisc->flags |= TCQ_F_WARN_NONWC;
569 	}
570 }
571 EXPORT_SYMBOL(qdisc_warn_nonwc);
572 
573 static enum hrtimer_restart qdisc_watchdog(struct hrtimer *timer)
574 {
575 	struct qdisc_watchdog *wd = container_of(timer, struct qdisc_watchdog,
576 						 timer);
577 
578 	rcu_read_lock();
579 	__netif_schedule(qdisc_root(wd->qdisc));
580 	rcu_read_unlock();
581 
582 	return HRTIMER_NORESTART;
583 }
584 
585 void qdisc_watchdog_init(struct qdisc_watchdog *wd, struct Qdisc *qdisc)
586 {
587 	hrtimer_init(&wd->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED);
588 	wd->timer.function = qdisc_watchdog;
589 	wd->qdisc = qdisc;
590 }
591 EXPORT_SYMBOL(qdisc_watchdog_init);
592 
593 void qdisc_watchdog_schedule_ns(struct qdisc_watchdog *wd, u64 expires)
594 {
595 	if (test_bit(__QDISC_STATE_DEACTIVATED,
596 		     &qdisc_root_sleeping(wd->qdisc)->state))
597 		return;
598 
599 	if (wd->last_expires == expires)
600 		return;
601 
602 	wd->last_expires = expires;
603 	hrtimer_start(&wd->timer,
604 		      ns_to_ktime(expires),
605 		      HRTIMER_MODE_ABS_PINNED);
606 }
607 EXPORT_SYMBOL(qdisc_watchdog_schedule_ns);
608 
609 void qdisc_watchdog_cancel(struct qdisc_watchdog *wd)
610 {
611 	hrtimer_cancel(&wd->timer);
612 }
613 EXPORT_SYMBOL(qdisc_watchdog_cancel);
614 
615 static struct hlist_head *qdisc_class_hash_alloc(unsigned int n)
616 {
617 	struct hlist_head *h;
618 	unsigned int i;
619 
620 	h = kvmalloc_array(n, sizeof(struct hlist_head), GFP_KERNEL);
621 
622 	if (h != NULL) {
623 		for (i = 0; i < n; i++)
624 			INIT_HLIST_HEAD(&h[i]);
625 	}
626 	return h;
627 }
628 
629 void qdisc_class_hash_grow(struct Qdisc *sch, struct Qdisc_class_hash *clhash)
630 {
631 	struct Qdisc_class_common *cl;
632 	struct hlist_node *next;
633 	struct hlist_head *nhash, *ohash;
634 	unsigned int nsize, nmask, osize;
635 	unsigned int i, h;
636 
637 	/* Rehash when load factor exceeds 0.75 */
638 	if (clhash->hashelems * 4 <= clhash->hashsize * 3)
639 		return;
640 	nsize = clhash->hashsize * 2;
641 	nmask = nsize - 1;
642 	nhash = qdisc_class_hash_alloc(nsize);
643 	if (nhash == NULL)
644 		return;
645 
646 	ohash = clhash->hash;
647 	osize = clhash->hashsize;
648 
649 	sch_tree_lock(sch);
650 	for (i = 0; i < osize; i++) {
651 		hlist_for_each_entry_safe(cl, next, &ohash[i], hnode) {
652 			h = qdisc_class_hash(cl->classid, nmask);
653 			hlist_add_head(&cl->hnode, &nhash[h]);
654 		}
655 	}
656 	clhash->hash     = nhash;
657 	clhash->hashsize = nsize;
658 	clhash->hashmask = nmask;
659 	sch_tree_unlock(sch);
660 
661 	kvfree(ohash);
662 }
663 EXPORT_SYMBOL(qdisc_class_hash_grow);
664 
665 int qdisc_class_hash_init(struct Qdisc_class_hash *clhash)
666 {
667 	unsigned int size = 4;
668 
669 	clhash->hash = qdisc_class_hash_alloc(size);
670 	if (clhash->hash == NULL)
671 		return -ENOMEM;
672 	clhash->hashsize  = size;
673 	clhash->hashmask  = size - 1;
674 	clhash->hashelems = 0;
675 	return 0;
676 }
677 EXPORT_SYMBOL(qdisc_class_hash_init);
678 
679 void qdisc_class_hash_destroy(struct Qdisc_class_hash *clhash)
680 {
681 	kvfree(clhash->hash);
682 }
683 EXPORT_SYMBOL(qdisc_class_hash_destroy);
684 
685 void qdisc_class_hash_insert(struct Qdisc_class_hash *clhash,
686 			     struct Qdisc_class_common *cl)
687 {
688 	unsigned int h;
689 
690 	INIT_HLIST_NODE(&cl->hnode);
691 	h = qdisc_class_hash(cl->classid, clhash->hashmask);
692 	hlist_add_head(&cl->hnode, &clhash->hash[h]);
693 	clhash->hashelems++;
694 }
695 EXPORT_SYMBOL(qdisc_class_hash_insert);
696 
697 void qdisc_class_hash_remove(struct Qdisc_class_hash *clhash,
698 			     struct Qdisc_class_common *cl)
699 {
700 	hlist_del(&cl->hnode);
701 	clhash->hashelems--;
702 }
703 EXPORT_SYMBOL(qdisc_class_hash_remove);
704 
705 /* Allocate an unique handle from space managed by kernel
706  * Possible range is [8000-FFFF]:0000 (0x8000 values)
707  */
708 static u32 qdisc_alloc_handle(struct net_device *dev)
709 {
710 	int i = 0x8000;
711 	static u32 autohandle = TC_H_MAKE(0x80000000U, 0);
712 
713 	do {
714 		autohandle += TC_H_MAKE(0x10000U, 0);
715 		if (autohandle == TC_H_MAKE(TC_H_ROOT, 0))
716 			autohandle = TC_H_MAKE(0x80000000U, 0);
717 		if (!qdisc_lookup(dev, autohandle))
718 			return autohandle;
719 		cond_resched();
720 	} while	(--i > 0);
721 
722 	return 0;
723 }
724 
725 void qdisc_tree_reduce_backlog(struct Qdisc *sch, unsigned int n,
726 			       unsigned int len)
727 {
728 	const struct Qdisc_class_ops *cops;
729 	unsigned long cl;
730 	u32 parentid;
731 	bool notify;
732 	int drops;
733 
734 	if (n == 0 && len == 0)
735 		return;
736 	drops = max_t(int, n, 0);
737 	rcu_read_lock();
738 	while ((parentid = sch->parent)) {
739 		if (TC_H_MAJ(parentid) == TC_H_MAJ(TC_H_INGRESS))
740 			break;
741 
742 		if (sch->flags & TCQ_F_NOPARENT)
743 			break;
744 		/* Notify parent qdisc only if child qdisc becomes empty.
745 		 *
746 		 * If child was empty even before update then backlog
747 		 * counter is screwed and we skip notification because
748 		 * parent class is already passive.
749 		 */
750 		notify = !sch->q.qlen && !WARN_ON_ONCE(!n);
751 		/* TODO: perform the search on a per txq basis */
752 		sch = qdisc_lookup(qdisc_dev(sch), TC_H_MAJ(parentid));
753 		if (sch == NULL) {
754 			WARN_ON_ONCE(parentid != TC_H_ROOT);
755 			break;
756 		}
757 		cops = sch->ops->cl_ops;
758 		if (notify && cops->qlen_notify) {
759 			cl = cops->find(sch, parentid);
760 			cops->qlen_notify(sch, cl);
761 		}
762 		sch->q.qlen -= n;
763 		sch->qstats.backlog -= len;
764 		__qdisc_qstats_drop(sch, drops);
765 	}
766 	rcu_read_unlock();
767 }
768 EXPORT_SYMBOL(qdisc_tree_reduce_backlog);
769 
770 static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid,
771 			 u32 portid, u32 seq, u16 flags, int event)
772 {
773 	struct gnet_stats_basic_cpu __percpu *cpu_bstats = NULL;
774 	struct gnet_stats_queue __percpu *cpu_qstats = NULL;
775 	struct tcmsg *tcm;
776 	struct nlmsghdr  *nlh;
777 	unsigned char *b = skb_tail_pointer(skb);
778 	struct gnet_dump d;
779 	struct qdisc_size_table *stab;
780 	__u32 qlen;
781 
782 	cond_resched();
783 	nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags);
784 	if (!nlh)
785 		goto out_nlmsg_trim;
786 	tcm = nlmsg_data(nlh);
787 	tcm->tcm_family = AF_UNSPEC;
788 	tcm->tcm__pad1 = 0;
789 	tcm->tcm__pad2 = 0;
790 	tcm->tcm_ifindex = qdisc_dev(q)->ifindex;
791 	tcm->tcm_parent = clid;
792 	tcm->tcm_handle = q->handle;
793 	tcm->tcm_info = refcount_read(&q->refcnt);
794 	if (nla_put_string(skb, TCA_KIND, q->ops->id))
795 		goto nla_put_failure;
796 	if (q->ops->dump && q->ops->dump(q, skb) < 0)
797 		goto nla_put_failure;
798 	qlen = q->q.qlen;
799 
800 	stab = rtnl_dereference(q->stab);
801 	if (stab && qdisc_dump_stab(skb, stab) < 0)
802 		goto nla_put_failure;
803 
804 	if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, TCA_XSTATS,
805 					 NULL, &d, TCA_PAD) < 0)
806 		goto nla_put_failure;
807 
808 	if (q->ops->dump_stats && q->ops->dump_stats(q, &d) < 0)
809 		goto nla_put_failure;
810 
811 	if (qdisc_is_percpu_stats(q)) {
812 		cpu_bstats = q->cpu_bstats;
813 		cpu_qstats = q->cpu_qstats;
814 	}
815 
816 	if (gnet_stats_copy_basic(qdisc_root_sleeping_running(q),
817 				  &d, cpu_bstats, &q->bstats) < 0 ||
818 	    gnet_stats_copy_rate_est(&d, &q->rate_est) < 0 ||
819 	    gnet_stats_copy_queue(&d, cpu_qstats, &q->qstats, qlen) < 0)
820 		goto nla_put_failure;
821 
822 	if (gnet_stats_finish_copy(&d) < 0)
823 		goto nla_put_failure;
824 
825 	nlh->nlmsg_len = skb_tail_pointer(skb) - b;
826 	return skb->len;
827 
828 out_nlmsg_trim:
829 nla_put_failure:
830 	nlmsg_trim(skb, b);
831 	return -1;
832 }
833 
834 static bool tc_qdisc_dump_ignore(struct Qdisc *q, bool dump_invisible)
835 {
836 	if (q->flags & TCQ_F_BUILTIN)
837 		return true;
838 	if ((q->flags & TCQ_F_INVISIBLE) && !dump_invisible)
839 		return true;
840 
841 	return false;
842 }
843 
844 static int qdisc_notify(struct net *net, struct sk_buff *oskb,
845 			struct nlmsghdr *n, u32 clid,
846 			struct Qdisc *old, struct Qdisc *new)
847 {
848 	struct sk_buff *skb;
849 	u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
850 
851 	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
852 	if (!skb)
853 		return -ENOBUFS;
854 
855 	if (old && !tc_qdisc_dump_ignore(old, false)) {
856 		if (tc_fill_qdisc(skb, old, clid, portid, n->nlmsg_seq,
857 				  0, RTM_DELQDISC) < 0)
858 			goto err_out;
859 	}
860 	if (new && !tc_qdisc_dump_ignore(new, false)) {
861 		if (tc_fill_qdisc(skb, new, clid, portid, n->nlmsg_seq,
862 				  old ? NLM_F_REPLACE : 0, RTM_NEWQDISC) < 0)
863 			goto err_out;
864 	}
865 
866 	if (skb->len)
867 		return rtnetlink_send(skb, net, portid, RTNLGRP_TC,
868 				      n->nlmsg_flags & NLM_F_ECHO);
869 
870 err_out:
871 	kfree_skb(skb);
872 	return -EINVAL;
873 }
874 
875 static void notify_and_destroy(struct net *net, struct sk_buff *skb,
876 			       struct nlmsghdr *n, u32 clid,
877 			       struct Qdisc *old, struct Qdisc *new)
878 {
879 	if (new || old)
880 		qdisc_notify(net, skb, n, clid, old, new);
881 
882 	if (old)
883 		qdisc_destroy(old);
884 }
885 
886 /* Graft qdisc "new" to class "classid" of qdisc "parent" or
887  * to device "dev".
888  *
889  * When appropriate send a netlink notification using 'skb'
890  * and "n".
891  *
892  * On success, destroy old qdisc.
893  */
894 
895 static int qdisc_graft(struct net_device *dev, struct Qdisc *parent,
896 		       struct sk_buff *skb, struct nlmsghdr *n, u32 classid,
897 		       struct Qdisc *new, struct Qdisc *old)
898 {
899 	struct Qdisc *q = old;
900 	struct net *net = dev_net(dev);
901 	int err = 0;
902 
903 	if (parent == NULL) {
904 		unsigned int i, num_q, ingress;
905 
906 		ingress = 0;
907 		num_q = dev->num_tx_queues;
908 		if ((q && q->flags & TCQ_F_INGRESS) ||
909 		    (new && new->flags & TCQ_F_INGRESS)) {
910 			num_q = 1;
911 			ingress = 1;
912 			if (!dev_ingress_queue(dev))
913 				return -ENOENT;
914 		}
915 
916 		if (dev->flags & IFF_UP)
917 			dev_deactivate(dev);
918 
919 		if (new && new->ops->attach)
920 			goto skip;
921 
922 		for (i = 0; i < num_q; i++) {
923 			struct netdev_queue *dev_queue = dev_ingress_queue(dev);
924 
925 			if (!ingress)
926 				dev_queue = netdev_get_tx_queue(dev, i);
927 
928 			old = dev_graft_qdisc(dev_queue, new);
929 			if (new && i > 0)
930 				qdisc_refcount_inc(new);
931 
932 			if (!ingress)
933 				qdisc_destroy(old);
934 		}
935 
936 skip:
937 		if (!ingress) {
938 			notify_and_destroy(net, skb, n, classid,
939 					   dev->qdisc, new);
940 			if (new && !new->ops->attach)
941 				qdisc_refcount_inc(new);
942 			dev->qdisc = new ? : &noop_qdisc;
943 
944 			if (new && new->ops->attach)
945 				new->ops->attach(new);
946 		} else {
947 			notify_and_destroy(net, skb, n, classid, old, new);
948 		}
949 
950 		if (dev->flags & IFF_UP)
951 			dev_activate(dev);
952 	} else {
953 		const struct Qdisc_class_ops *cops = parent->ops->cl_ops;
954 
955 		err = -EOPNOTSUPP;
956 		if (cops && cops->graft) {
957 			unsigned long cl = cops->find(parent, classid);
958 
959 			if (cl)
960 				err = cops->graft(parent, cl, new, &old);
961 			else
962 				err = -ENOENT;
963 		}
964 		if (!err)
965 			notify_and_destroy(net, skb, n, classid, old, new);
966 	}
967 	return err;
968 }
969 
970 /* lockdep annotation is needed for ingress; egress gets it only for name */
971 static struct lock_class_key qdisc_tx_lock;
972 static struct lock_class_key qdisc_rx_lock;
973 
974 /*
975    Allocate and initialize new qdisc.
976 
977    Parameters are passed via opt.
978  */
979 
980 static struct Qdisc *qdisc_create(struct net_device *dev,
981 				  struct netdev_queue *dev_queue,
982 				  struct Qdisc *p, u32 parent, u32 handle,
983 				  struct nlattr **tca, int *errp)
984 {
985 	int err;
986 	struct nlattr *kind = tca[TCA_KIND];
987 	struct Qdisc *sch;
988 	struct Qdisc_ops *ops;
989 	struct qdisc_size_table *stab;
990 
991 	ops = qdisc_lookup_ops(kind);
992 #ifdef CONFIG_MODULES
993 	if (ops == NULL && kind != NULL) {
994 		char name[IFNAMSIZ];
995 		if (nla_strlcpy(name, kind, IFNAMSIZ) < IFNAMSIZ) {
996 			/* We dropped the RTNL semaphore in order to
997 			 * perform the module load.  So, even if we
998 			 * succeeded in loading the module we have to
999 			 * tell the caller to replay the request.  We
1000 			 * indicate this using -EAGAIN.
1001 			 * We replay the request because the device may
1002 			 * go away in the mean time.
1003 			 */
1004 			rtnl_unlock();
1005 			request_module("sch_%s", name);
1006 			rtnl_lock();
1007 			ops = qdisc_lookup_ops(kind);
1008 			if (ops != NULL) {
1009 				/* We will try again qdisc_lookup_ops,
1010 				 * so don't keep a reference.
1011 				 */
1012 				module_put(ops->owner);
1013 				err = -EAGAIN;
1014 				goto err_out;
1015 			}
1016 		}
1017 	}
1018 #endif
1019 
1020 	err = -ENOENT;
1021 	if (ops == NULL)
1022 		goto err_out;
1023 
1024 	sch = qdisc_alloc(dev_queue, ops);
1025 	if (IS_ERR(sch)) {
1026 		err = PTR_ERR(sch);
1027 		goto err_out2;
1028 	}
1029 
1030 	sch->parent = parent;
1031 
1032 	if (handle == TC_H_INGRESS) {
1033 		sch->flags |= TCQ_F_INGRESS;
1034 		handle = TC_H_MAKE(TC_H_INGRESS, 0);
1035 		lockdep_set_class(qdisc_lock(sch), &qdisc_rx_lock);
1036 	} else {
1037 		if (handle == 0) {
1038 			handle = qdisc_alloc_handle(dev);
1039 			err = -ENOMEM;
1040 			if (handle == 0)
1041 				goto err_out3;
1042 		}
1043 		lockdep_set_class(qdisc_lock(sch), &qdisc_tx_lock);
1044 		if (!netif_is_multiqueue(dev))
1045 			sch->flags |= TCQ_F_ONETXQUEUE;
1046 	}
1047 
1048 	sch->handle = handle;
1049 
1050 	/* This exist to keep backward compatible with a userspace
1051 	 * loophole, what allowed userspace to get IFF_NO_QUEUE
1052 	 * facility on older kernels by setting tx_queue_len=0 (prior
1053 	 * to qdisc init), and then forgot to reinit tx_queue_len
1054 	 * before again attaching a qdisc.
1055 	 */
1056 	if ((dev->priv_flags & IFF_NO_QUEUE) && (dev->tx_queue_len == 0)) {
1057 		dev->tx_queue_len = DEFAULT_TX_QUEUE_LEN;
1058 		netdev_info(dev, "Caught tx_queue_len zero misconfig\n");
1059 	}
1060 
1061 	if (!ops->init || (err = ops->init(sch, tca[TCA_OPTIONS])) == 0) {
1062 		if (qdisc_is_percpu_stats(sch)) {
1063 			sch->cpu_bstats =
1064 				netdev_alloc_pcpu_stats(struct gnet_stats_basic_cpu);
1065 			if (!sch->cpu_bstats)
1066 				goto err_out4;
1067 
1068 			sch->cpu_qstats = alloc_percpu(struct gnet_stats_queue);
1069 			if (!sch->cpu_qstats)
1070 				goto err_out4;
1071 		}
1072 
1073 		if (tca[TCA_STAB]) {
1074 			stab = qdisc_get_stab(tca[TCA_STAB]);
1075 			if (IS_ERR(stab)) {
1076 				err = PTR_ERR(stab);
1077 				goto err_out4;
1078 			}
1079 			rcu_assign_pointer(sch->stab, stab);
1080 		}
1081 		if (tca[TCA_RATE]) {
1082 			seqcount_t *running;
1083 
1084 			err = -EOPNOTSUPP;
1085 			if (sch->flags & TCQ_F_MQROOT)
1086 				goto err_out4;
1087 
1088 			if ((sch->parent != TC_H_ROOT) &&
1089 			    !(sch->flags & TCQ_F_INGRESS) &&
1090 			    (!p || !(p->flags & TCQ_F_MQROOT)))
1091 				running = qdisc_root_sleeping_running(sch);
1092 			else
1093 				running = &sch->running;
1094 
1095 			err = gen_new_estimator(&sch->bstats,
1096 						sch->cpu_bstats,
1097 						&sch->rate_est,
1098 						NULL,
1099 						running,
1100 						tca[TCA_RATE]);
1101 			if (err)
1102 				goto err_out4;
1103 		}
1104 
1105 		qdisc_hash_add(sch, false);
1106 
1107 		return sch;
1108 	}
1109 	/* ops->init() failed, we call ->destroy() like qdisc_create_dflt() */
1110 	if (ops->destroy)
1111 		ops->destroy(sch);
1112 err_out3:
1113 	dev_put(dev);
1114 	kfree((char *) sch - sch->padded);
1115 err_out2:
1116 	module_put(ops->owner);
1117 err_out:
1118 	*errp = err;
1119 	return NULL;
1120 
1121 err_out4:
1122 	free_percpu(sch->cpu_bstats);
1123 	free_percpu(sch->cpu_qstats);
1124 	/*
1125 	 * Any broken qdiscs that would require a ops->reset() here?
1126 	 * The qdisc was never in action so it shouldn't be necessary.
1127 	 */
1128 	qdisc_put_stab(rtnl_dereference(sch->stab));
1129 	if (ops->destroy)
1130 		ops->destroy(sch);
1131 	goto err_out3;
1132 }
1133 
1134 static int qdisc_change(struct Qdisc *sch, struct nlattr **tca)
1135 {
1136 	struct qdisc_size_table *ostab, *stab = NULL;
1137 	int err = 0;
1138 
1139 	if (tca[TCA_OPTIONS]) {
1140 		if (sch->ops->change == NULL)
1141 			return -EINVAL;
1142 		err = sch->ops->change(sch, tca[TCA_OPTIONS]);
1143 		if (err)
1144 			return err;
1145 	}
1146 
1147 	if (tca[TCA_STAB]) {
1148 		stab = qdisc_get_stab(tca[TCA_STAB]);
1149 		if (IS_ERR(stab))
1150 			return PTR_ERR(stab);
1151 	}
1152 
1153 	ostab = rtnl_dereference(sch->stab);
1154 	rcu_assign_pointer(sch->stab, stab);
1155 	qdisc_put_stab(ostab);
1156 
1157 	if (tca[TCA_RATE]) {
1158 		/* NB: ignores errors from replace_estimator
1159 		   because change can't be undone. */
1160 		if (sch->flags & TCQ_F_MQROOT)
1161 			goto out;
1162 		gen_replace_estimator(&sch->bstats,
1163 				      sch->cpu_bstats,
1164 				      &sch->rate_est,
1165 				      NULL,
1166 				      qdisc_root_sleeping_running(sch),
1167 				      tca[TCA_RATE]);
1168 	}
1169 out:
1170 	return 0;
1171 }
1172 
1173 struct check_loop_arg {
1174 	struct qdisc_walker	w;
1175 	struct Qdisc		*p;
1176 	int			depth;
1177 };
1178 
1179 static int check_loop_fn(struct Qdisc *q, unsigned long cl,
1180 			 struct qdisc_walker *w);
1181 
1182 static int check_loop(struct Qdisc *q, struct Qdisc *p, int depth)
1183 {
1184 	struct check_loop_arg	arg;
1185 
1186 	if (q->ops->cl_ops == NULL)
1187 		return 0;
1188 
1189 	arg.w.stop = arg.w.skip = arg.w.count = 0;
1190 	arg.w.fn = check_loop_fn;
1191 	arg.depth = depth;
1192 	arg.p = p;
1193 	q->ops->cl_ops->walk(q, &arg.w);
1194 	return arg.w.stop ? -ELOOP : 0;
1195 }
1196 
1197 static int
1198 check_loop_fn(struct Qdisc *q, unsigned long cl, struct qdisc_walker *w)
1199 {
1200 	struct Qdisc *leaf;
1201 	const struct Qdisc_class_ops *cops = q->ops->cl_ops;
1202 	struct check_loop_arg *arg = (struct check_loop_arg *)w;
1203 
1204 	leaf = cops->leaf(q, cl);
1205 	if (leaf) {
1206 		if (leaf == arg->p || arg->depth > 7)
1207 			return -ELOOP;
1208 		return check_loop(leaf, arg->p, arg->depth + 1);
1209 	}
1210 	return 0;
1211 }
1212 
1213 /*
1214  * Delete/get qdisc.
1215  */
1216 
1217 static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n,
1218 			struct netlink_ext_ack *extack)
1219 {
1220 	struct net *net = sock_net(skb->sk);
1221 	struct tcmsg *tcm = nlmsg_data(n);
1222 	struct nlattr *tca[TCA_MAX + 1];
1223 	struct net_device *dev;
1224 	u32 clid;
1225 	struct Qdisc *q = NULL;
1226 	struct Qdisc *p = NULL;
1227 	int err;
1228 
1229 	if ((n->nlmsg_type != RTM_GETQDISC) &&
1230 	    !netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
1231 		return -EPERM;
1232 
1233 	err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL, extack);
1234 	if (err < 0)
1235 		return err;
1236 
1237 	dev = __dev_get_by_index(net, tcm->tcm_ifindex);
1238 	if (!dev)
1239 		return -ENODEV;
1240 
1241 	clid = tcm->tcm_parent;
1242 	if (clid) {
1243 		if (clid != TC_H_ROOT) {
1244 			if (TC_H_MAJ(clid) != TC_H_MAJ(TC_H_INGRESS)) {
1245 				p = qdisc_lookup(dev, TC_H_MAJ(clid));
1246 				if (!p)
1247 					return -ENOENT;
1248 				q = qdisc_leaf(p, clid);
1249 			} else if (dev_ingress_queue(dev)) {
1250 				q = dev_ingress_queue(dev)->qdisc_sleeping;
1251 			}
1252 		} else {
1253 			q = dev->qdisc;
1254 		}
1255 		if (!q)
1256 			return -ENOENT;
1257 
1258 		if (tcm->tcm_handle && q->handle != tcm->tcm_handle)
1259 			return -EINVAL;
1260 	} else {
1261 		q = qdisc_lookup(dev, tcm->tcm_handle);
1262 		if (!q)
1263 			return -ENOENT;
1264 	}
1265 
1266 	if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id))
1267 		return -EINVAL;
1268 
1269 	if (n->nlmsg_type == RTM_DELQDISC) {
1270 		if (!clid)
1271 			return -EINVAL;
1272 		if (q->handle == 0)
1273 			return -ENOENT;
1274 		err = qdisc_graft(dev, p, skb, n, clid, NULL, q);
1275 		if (err != 0)
1276 			return err;
1277 	} else {
1278 		qdisc_notify(net, skb, n, clid, NULL, q);
1279 	}
1280 	return 0;
1281 }
1282 
1283 /*
1284  * Create/change qdisc.
1285  */
1286 
1287 static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n,
1288 			   struct netlink_ext_ack *extack)
1289 {
1290 	struct net *net = sock_net(skb->sk);
1291 	struct tcmsg *tcm;
1292 	struct nlattr *tca[TCA_MAX + 1];
1293 	struct net_device *dev;
1294 	u32 clid;
1295 	struct Qdisc *q, *p;
1296 	int err;
1297 
1298 	if (!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
1299 		return -EPERM;
1300 
1301 replay:
1302 	/* Reinit, just in case something touches this. */
1303 	err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL, extack);
1304 	if (err < 0)
1305 		return err;
1306 
1307 	tcm = nlmsg_data(n);
1308 	clid = tcm->tcm_parent;
1309 	q = p = NULL;
1310 
1311 	dev = __dev_get_by_index(net, tcm->tcm_ifindex);
1312 	if (!dev)
1313 		return -ENODEV;
1314 
1315 
1316 	if (clid) {
1317 		if (clid != TC_H_ROOT) {
1318 			if (clid != TC_H_INGRESS) {
1319 				p = qdisc_lookup(dev, TC_H_MAJ(clid));
1320 				if (!p)
1321 					return -ENOENT;
1322 				q = qdisc_leaf(p, clid);
1323 			} else if (dev_ingress_queue_create(dev)) {
1324 				q = dev_ingress_queue(dev)->qdisc_sleeping;
1325 			}
1326 		} else {
1327 			q = dev->qdisc;
1328 		}
1329 
1330 		/* It may be default qdisc, ignore it */
1331 		if (q && q->handle == 0)
1332 			q = NULL;
1333 
1334 		if (!q || !tcm->tcm_handle || q->handle != tcm->tcm_handle) {
1335 			if (tcm->tcm_handle) {
1336 				if (q && !(n->nlmsg_flags & NLM_F_REPLACE))
1337 					return -EEXIST;
1338 				if (TC_H_MIN(tcm->tcm_handle))
1339 					return -EINVAL;
1340 				q = qdisc_lookup(dev, tcm->tcm_handle);
1341 				if (!q)
1342 					goto create_n_graft;
1343 				if (n->nlmsg_flags & NLM_F_EXCL)
1344 					return -EEXIST;
1345 				if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id))
1346 					return -EINVAL;
1347 				if (q == p ||
1348 				    (p && check_loop(q, p, 0)))
1349 					return -ELOOP;
1350 				qdisc_refcount_inc(q);
1351 				goto graft;
1352 			} else {
1353 				if (!q)
1354 					goto create_n_graft;
1355 
1356 				/* This magic test requires explanation.
1357 				 *
1358 				 *   We know, that some child q is already
1359 				 *   attached to this parent and have choice:
1360 				 *   either to change it or to create/graft new one.
1361 				 *
1362 				 *   1. We are allowed to create/graft only
1363 				 *   if CREATE and REPLACE flags are set.
1364 				 *
1365 				 *   2. If EXCL is set, requestor wanted to say,
1366 				 *   that qdisc tcm_handle is not expected
1367 				 *   to exist, so that we choose create/graft too.
1368 				 *
1369 				 *   3. The last case is when no flags are set.
1370 				 *   Alas, it is sort of hole in API, we
1371 				 *   cannot decide what to do unambiguously.
1372 				 *   For now we select create/graft, if
1373 				 *   user gave KIND, which does not match existing.
1374 				 */
1375 				if ((n->nlmsg_flags & NLM_F_CREATE) &&
1376 				    (n->nlmsg_flags & NLM_F_REPLACE) &&
1377 				    ((n->nlmsg_flags & NLM_F_EXCL) ||
1378 				     (tca[TCA_KIND] &&
1379 				      nla_strcmp(tca[TCA_KIND], q->ops->id))))
1380 					goto create_n_graft;
1381 			}
1382 		}
1383 	} else {
1384 		if (!tcm->tcm_handle)
1385 			return -EINVAL;
1386 		q = qdisc_lookup(dev, tcm->tcm_handle);
1387 	}
1388 
1389 	/* Change qdisc parameters */
1390 	if (q == NULL)
1391 		return -ENOENT;
1392 	if (n->nlmsg_flags & NLM_F_EXCL)
1393 		return -EEXIST;
1394 	if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id))
1395 		return -EINVAL;
1396 	err = qdisc_change(q, tca);
1397 	if (err == 0)
1398 		qdisc_notify(net, skb, n, clid, NULL, q);
1399 	return err;
1400 
1401 create_n_graft:
1402 	if (!(n->nlmsg_flags & NLM_F_CREATE))
1403 		return -ENOENT;
1404 	if (clid == TC_H_INGRESS) {
1405 		if (dev_ingress_queue(dev))
1406 			q = qdisc_create(dev, dev_ingress_queue(dev), p,
1407 					 tcm->tcm_parent, tcm->tcm_parent,
1408 					 tca, &err);
1409 		else
1410 			err = -ENOENT;
1411 	} else {
1412 		struct netdev_queue *dev_queue;
1413 
1414 		if (p && p->ops->cl_ops && p->ops->cl_ops->select_queue)
1415 			dev_queue = p->ops->cl_ops->select_queue(p, tcm);
1416 		else if (p)
1417 			dev_queue = p->dev_queue;
1418 		else
1419 			dev_queue = netdev_get_tx_queue(dev, 0);
1420 
1421 		q = qdisc_create(dev, dev_queue, p,
1422 				 tcm->tcm_parent, tcm->tcm_handle,
1423 				 tca, &err);
1424 	}
1425 	if (q == NULL) {
1426 		if (err == -EAGAIN)
1427 			goto replay;
1428 		return err;
1429 	}
1430 
1431 graft:
1432 	err = qdisc_graft(dev, p, skb, n, clid, q, NULL);
1433 	if (err) {
1434 		if (q)
1435 			qdisc_destroy(q);
1436 		return err;
1437 	}
1438 
1439 	return 0;
1440 }
1441 
1442 static int tc_dump_qdisc_root(struct Qdisc *root, struct sk_buff *skb,
1443 			      struct netlink_callback *cb,
1444 			      int *q_idx_p, int s_q_idx, bool recur,
1445 			      bool dump_invisible)
1446 {
1447 	int ret = 0, q_idx = *q_idx_p;
1448 	struct Qdisc *q;
1449 	int b;
1450 
1451 	if (!root)
1452 		return 0;
1453 
1454 	q = root;
1455 	if (q_idx < s_q_idx) {
1456 		q_idx++;
1457 	} else {
1458 		if (!tc_qdisc_dump_ignore(q, dump_invisible) &&
1459 		    tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).portid,
1460 				  cb->nlh->nlmsg_seq, NLM_F_MULTI,
1461 				  RTM_NEWQDISC) <= 0)
1462 			goto done;
1463 		q_idx++;
1464 	}
1465 
1466 	/* If dumping singletons, there is no qdisc_dev(root) and the singleton
1467 	 * itself has already been dumped.
1468 	 *
1469 	 * If we've already dumped the top-level (ingress) qdisc above and the global
1470 	 * qdisc hashtable, we don't want to hit it again
1471 	 */
1472 	if (!qdisc_dev(root) || !recur)
1473 		goto out;
1474 
1475 	hash_for_each(qdisc_dev(root)->qdisc_hash, b, q, hash) {
1476 		if (q_idx < s_q_idx) {
1477 			q_idx++;
1478 			continue;
1479 		}
1480 		if (!tc_qdisc_dump_ignore(q, dump_invisible) &&
1481 		    tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).portid,
1482 				  cb->nlh->nlmsg_seq, NLM_F_MULTI,
1483 				  RTM_NEWQDISC) <= 0)
1484 			goto done;
1485 		q_idx++;
1486 	}
1487 
1488 out:
1489 	*q_idx_p = q_idx;
1490 	return ret;
1491 done:
1492 	ret = -1;
1493 	goto out;
1494 }
1495 
1496 static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb)
1497 {
1498 	struct net *net = sock_net(skb->sk);
1499 	int idx, q_idx;
1500 	int s_idx, s_q_idx;
1501 	struct net_device *dev;
1502 	const struct nlmsghdr *nlh = cb->nlh;
1503 	struct nlattr *tca[TCA_MAX + 1];
1504 	int err;
1505 
1506 	s_idx = cb->args[0];
1507 	s_q_idx = q_idx = cb->args[1];
1508 
1509 	idx = 0;
1510 	ASSERT_RTNL();
1511 
1512 	err = nlmsg_parse(nlh, sizeof(struct tcmsg), tca, TCA_MAX, NULL, NULL);
1513 	if (err < 0)
1514 		return err;
1515 
1516 	for_each_netdev(net, dev) {
1517 		struct netdev_queue *dev_queue;
1518 
1519 		if (idx < s_idx)
1520 			goto cont;
1521 		if (idx > s_idx)
1522 			s_q_idx = 0;
1523 		q_idx = 0;
1524 
1525 		if (tc_dump_qdisc_root(dev->qdisc, skb, cb, &q_idx, s_q_idx,
1526 				       true, tca[TCA_DUMP_INVISIBLE]) < 0)
1527 			goto done;
1528 
1529 		dev_queue = dev_ingress_queue(dev);
1530 		if (dev_queue &&
1531 		    tc_dump_qdisc_root(dev_queue->qdisc_sleeping, skb, cb,
1532 				       &q_idx, s_q_idx, false,
1533 				       tca[TCA_DUMP_INVISIBLE]) < 0)
1534 			goto done;
1535 
1536 cont:
1537 		idx++;
1538 	}
1539 
1540 done:
1541 	cb->args[0] = idx;
1542 	cb->args[1] = q_idx;
1543 
1544 	return skb->len;
1545 }
1546 
1547 
1548 
1549 /************************************************
1550  *	Traffic classes manipulation.		*
1551  ************************************************/
1552 
1553 static int tc_fill_tclass(struct sk_buff *skb, struct Qdisc *q,
1554 			  unsigned long cl,
1555 			  u32 portid, u32 seq, u16 flags, int event)
1556 {
1557 	struct tcmsg *tcm;
1558 	struct nlmsghdr  *nlh;
1559 	unsigned char *b = skb_tail_pointer(skb);
1560 	struct gnet_dump d;
1561 	const struct Qdisc_class_ops *cl_ops = q->ops->cl_ops;
1562 
1563 	cond_resched();
1564 	nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags);
1565 	if (!nlh)
1566 		goto out_nlmsg_trim;
1567 	tcm = nlmsg_data(nlh);
1568 	tcm->tcm_family = AF_UNSPEC;
1569 	tcm->tcm__pad1 = 0;
1570 	tcm->tcm__pad2 = 0;
1571 	tcm->tcm_ifindex = qdisc_dev(q)->ifindex;
1572 	tcm->tcm_parent = q->handle;
1573 	tcm->tcm_handle = q->handle;
1574 	tcm->tcm_info = 0;
1575 	if (nla_put_string(skb, TCA_KIND, q->ops->id))
1576 		goto nla_put_failure;
1577 	if (cl_ops->dump && cl_ops->dump(q, cl, skb, tcm) < 0)
1578 		goto nla_put_failure;
1579 
1580 	if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, TCA_XSTATS,
1581 					 NULL, &d, TCA_PAD) < 0)
1582 		goto nla_put_failure;
1583 
1584 	if (cl_ops->dump_stats && cl_ops->dump_stats(q, cl, &d) < 0)
1585 		goto nla_put_failure;
1586 
1587 	if (gnet_stats_finish_copy(&d) < 0)
1588 		goto nla_put_failure;
1589 
1590 	nlh->nlmsg_len = skb_tail_pointer(skb) - b;
1591 	return skb->len;
1592 
1593 out_nlmsg_trim:
1594 nla_put_failure:
1595 	nlmsg_trim(skb, b);
1596 	return -1;
1597 }
1598 
1599 static int tclass_notify(struct net *net, struct sk_buff *oskb,
1600 			 struct nlmsghdr *n, struct Qdisc *q,
1601 			 unsigned long cl, int event)
1602 {
1603 	struct sk_buff *skb;
1604 	u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
1605 
1606 	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1607 	if (!skb)
1608 		return -ENOBUFS;
1609 
1610 	if (tc_fill_tclass(skb, q, cl, portid, n->nlmsg_seq, 0, event) < 0) {
1611 		kfree_skb(skb);
1612 		return -EINVAL;
1613 	}
1614 
1615 	return rtnetlink_send(skb, net, portid, RTNLGRP_TC,
1616 			      n->nlmsg_flags & NLM_F_ECHO);
1617 }
1618 
1619 static int tclass_del_notify(struct net *net,
1620 			     const struct Qdisc_class_ops *cops,
1621 			     struct sk_buff *oskb, struct nlmsghdr *n,
1622 			     struct Qdisc *q, unsigned long cl)
1623 {
1624 	u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
1625 	struct sk_buff *skb;
1626 	int err = 0;
1627 
1628 	if (!cops->delete)
1629 		return -EOPNOTSUPP;
1630 
1631 	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1632 	if (!skb)
1633 		return -ENOBUFS;
1634 
1635 	if (tc_fill_tclass(skb, q, cl, portid, n->nlmsg_seq, 0,
1636 			   RTM_DELTCLASS) < 0) {
1637 		kfree_skb(skb);
1638 		return -EINVAL;
1639 	}
1640 
1641 	err = cops->delete(q, cl);
1642 	if (err) {
1643 		kfree_skb(skb);
1644 		return err;
1645 	}
1646 
1647 	return rtnetlink_send(skb, net, portid, RTNLGRP_TC,
1648 			      n->nlmsg_flags & NLM_F_ECHO);
1649 }
1650 
1651 #ifdef CONFIG_NET_CLS
1652 
1653 struct tcf_bind_args {
1654 	struct tcf_walker w;
1655 	u32 classid;
1656 	unsigned long cl;
1657 };
1658 
1659 static int tcf_node_bind(struct tcf_proto *tp, void *n, struct tcf_walker *arg)
1660 {
1661 	struct tcf_bind_args *a = (void *)arg;
1662 
1663 	if (tp->ops->bind_class) {
1664 		struct Qdisc *q = tcf_block_q(tp->chain->block);
1665 
1666 		sch_tree_lock(q);
1667 		tp->ops->bind_class(n, a->classid, a->cl);
1668 		sch_tree_unlock(q);
1669 	}
1670 	return 0;
1671 }
1672 
1673 static void tc_bind_tclass(struct Qdisc *q, u32 portid, u32 clid,
1674 			   unsigned long new_cl)
1675 {
1676 	const struct Qdisc_class_ops *cops = q->ops->cl_ops;
1677 	struct tcf_block *block;
1678 	struct tcf_chain *chain;
1679 	unsigned long cl;
1680 
1681 	cl = cops->find(q, portid);
1682 	if (!cl)
1683 		return;
1684 	block = cops->tcf_block(q, cl);
1685 	if (!block)
1686 		return;
1687 	list_for_each_entry(chain, &block->chain_list, list) {
1688 		struct tcf_proto *tp;
1689 
1690 		for (tp = rtnl_dereference(chain->filter_chain);
1691 		     tp; tp = rtnl_dereference(tp->next)) {
1692 			struct tcf_bind_args arg = {};
1693 
1694 			arg.w.fn = tcf_node_bind;
1695 			arg.classid = clid;
1696 			arg.cl = new_cl;
1697 			tp->ops->walk(tp, &arg.w);
1698 		}
1699 	}
1700 }
1701 
1702 #else
1703 
1704 static void tc_bind_tclass(struct Qdisc *q, u32 portid, u32 clid,
1705 			   unsigned long new_cl)
1706 {
1707 }
1708 
1709 #endif
1710 
1711 static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n,
1712 			 struct netlink_ext_ack *extack)
1713 {
1714 	struct net *net = sock_net(skb->sk);
1715 	struct tcmsg *tcm = nlmsg_data(n);
1716 	struct nlattr *tca[TCA_MAX + 1];
1717 	struct net_device *dev;
1718 	struct Qdisc *q = NULL;
1719 	const struct Qdisc_class_ops *cops;
1720 	unsigned long cl = 0;
1721 	unsigned long new_cl;
1722 	u32 portid;
1723 	u32 clid;
1724 	u32 qid;
1725 	int err;
1726 
1727 	if ((n->nlmsg_type != RTM_GETTCLASS) &&
1728 	    !netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
1729 		return -EPERM;
1730 
1731 	err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL, extack);
1732 	if (err < 0)
1733 		return err;
1734 
1735 	dev = __dev_get_by_index(net, tcm->tcm_ifindex);
1736 	if (!dev)
1737 		return -ENODEV;
1738 
1739 	/*
1740 	   parent == TC_H_UNSPEC - unspecified parent.
1741 	   parent == TC_H_ROOT   - class is root, which has no parent.
1742 	   parent == X:0	 - parent is root class.
1743 	   parent == X:Y	 - parent is a node in hierarchy.
1744 	   parent == 0:Y	 - parent is X:Y, where X:0 is qdisc.
1745 
1746 	   handle == 0:0	 - generate handle from kernel pool.
1747 	   handle == 0:Y	 - class is X:Y, where X:0 is qdisc.
1748 	   handle == X:Y	 - clear.
1749 	   handle == X:0	 - root class.
1750 	 */
1751 
1752 	/* Step 1. Determine qdisc handle X:0 */
1753 
1754 	portid = tcm->tcm_parent;
1755 	clid = tcm->tcm_handle;
1756 	qid = TC_H_MAJ(clid);
1757 
1758 	if (portid != TC_H_ROOT) {
1759 		u32 qid1 = TC_H_MAJ(portid);
1760 
1761 		if (qid && qid1) {
1762 			/* If both majors are known, they must be identical. */
1763 			if (qid != qid1)
1764 				return -EINVAL;
1765 		} else if (qid1) {
1766 			qid = qid1;
1767 		} else if (qid == 0)
1768 			qid = dev->qdisc->handle;
1769 
1770 		/* Now qid is genuine qdisc handle consistent
1771 		 * both with parent and child.
1772 		 *
1773 		 * TC_H_MAJ(portid) still may be unspecified, complete it now.
1774 		 */
1775 		if (portid)
1776 			portid = TC_H_MAKE(qid, portid);
1777 	} else {
1778 		if (qid == 0)
1779 			qid = dev->qdisc->handle;
1780 	}
1781 
1782 	/* OK. Locate qdisc */
1783 	q = qdisc_lookup(dev, qid);
1784 	if (!q)
1785 		return -ENOENT;
1786 
1787 	/* An check that it supports classes */
1788 	cops = q->ops->cl_ops;
1789 	if (cops == NULL)
1790 		return -EINVAL;
1791 
1792 	/* Now try to get class */
1793 	if (clid == 0) {
1794 		if (portid == TC_H_ROOT)
1795 			clid = qid;
1796 	} else
1797 		clid = TC_H_MAKE(qid, clid);
1798 
1799 	if (clid)
1800 		cl = cops->find(q, clid);
1801 
1802 	if (cl == 0) {
1803 		err = -ENOENT;
1804 		if (n->nlmsg_type != RTM_NEWTCLASS ||
1805 		    !(n->nlmsg_flags & NLM_F_CREATE))
1806 			goto out;
1807 	} else {
1808 		switch (n->nlmsg_type) {
1809 		case RTM_NEWTCLASS:
1810 			err = -EEXIST;
1811 			if (n->nlmsg_flags & NLM_F_EXCL)
1812 				goto out;
1813 			break;
1814 		case RTM_DELTCLASS:
1815 			err = tclass_del_notify(net, cops, skb, n, q, cl);
1816 			/* Unbind the class with flilters with 0 */
1817 			tc_bind_tclass(q, portid, clid, 0);
1818 			goto out;
1819 		case RTM_GETTCLASS:
1820 			err = tclass_notify(net, skb, n, q, cl, RTM_NEWTCLASS);
1821 			goto out;
1822 		default:
1823 			err = -EINVAL;
1824 			goto out;
1825 		}
1826 	}
1827 
1828 	new_cl = cl;
1829 	err = -EOPNOTSUPP;
1830 	if (cops->change)
1831 		err = cops->change(q, clid, portid, tca, &new_cl);
1832 	if (err == 0) {
1833 		tclass_notify(net, skb, n, q, new_cl, RTM_NEWTCLASS);
1834 		/* We just create a new class, need to do reverse binding. */
1835 		if (cl != new_cl)
1836 			tc_bind_tclass(q, portid, clid, new_cl);
1837 	}
1838 out:
1839 	return err;
1840 }
1841 
1842 struct qdisc_dump_args {
1843 	struct qdisc_walker	w;
1844 	struct sk_buff		*skb;
1845 	struct netlink_callback	*cb;
1846 };
1847 
1848 static int qdisc_class_dump(struct Qdisc *q, unsigned long cl,
1849 			    struct qdisc_walker *arg)
1850 {
1851 	struct qdisc_dump_args *a = (struct qdisc_dump_args *)arg;
1852 
1853 	return tc_fill_tclass(a->skb, q, cl, NETLINK_CB(a->cb->skb).portid,
1854 			      a->cb->nlh->nlmsg_seq, NLM_F_MULTI,
1855 			      RTM_NEWTCLASS);
1856 }
1857 
1858 static int tc_dump_tclass_qdisc(struct Qdisc *q, struct sk_buff *skb,
1859 				struct tcmsg *tcm, struct netlink_callback *cb,
1860 				int *t_p, int s_t)
1861 {
1862 	struct qdisc_dump_args arg;
1863 
1864 	if (tc_qdisc_dump_ignore(q, false) ||
1865 	    *t_p < s_t || !q->ops->cl_ops ||
1866 	    (tcm->tcm_parent &&
1867 	     TC_H_MAJ(tcm->tcm_parent) != q->handle)) {
1868 		(*t_p)++;
1869 		return 0;
1870 	}
1871 	if (*t_p > s_t)
1872 		memset(&cb->args[1], 0, sizeof(cb->args)-sizeof(cb->args[0]));
1873 	arg.w.fn = qdisc_class_dump;
1874 	arg.skb = skb;
1875 	arg.cb = cb;
1876 	arg.w.stop  = 0;
1877 	arg.w.skip = cb->args[1];
1878 	arg.w.count = 0;
1879 	q->ops->cl_ops->walk(q, &arg.w);
1880 	cb->args[1] = arg.w.count;
1881 	if (arg.w.stop)
1882 		return -1;
1883 	(*t_p)++;
1884 	return 0;
1885 }
1886 
1887 static int tc_dump_tclass_root(struct Qdisc *root, struct sk_buff *skb,
1888 			       struct tcmsg *tcm, struct netlink_callback *cb,
1889 			       int *t_p, int s_t)
1890 {
1891 	struct Qdisc *q;
1892 	int b;
1893 
1894 	if (!root)
1895 		return 0;
1896 
1897 	if (tc_dump_tclass_qdisc(root, skb, tcm, cb, t_p, s_t) < 0)
1898 		return -1;
1899 
1900 	if (!qdisc_dev(root))
1901 		return 0;
1902 
1903 	if (tcm->tcm_parent) {
1904 		q = qdisc_match_from_root(root, TC_H_MAJ(tcm->tcm_parent));
1905 		if (q && tc_dump_tclass_qdisc(q, skb, tcm, cb, t_p, s_t) < 0)
1906 			return -1;
1907 		return 0;
1908 	}
1909 	hash_for_each(qdisc_dev(root)->qdisc_hash, b, q, hash) {
1910 		if (tc_dump_tclass_qdisc(q, skb, tcm, cb, t_p, s_t) < 0)
1911 			return -1;
1912 	}
1913 
1914 	return 0;
1915 }
1916 
1917 static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb)
1918 {
1919 	struct tcmsg *tcm = nlmsg_data(cb->nlh);
1920 	struct net *net = sock_net(skb->sk);
1921 	struct netdev_queue *dev_queue;
1922 	struct net_device *dev;
1923 	int t, s_t;
1924 
1925 	if (nlmsg_len(cb->nlh) < sizeof(*tcm))
1926 		return 0;
1927 	dev = dev_get_by_index(net, tcm->tcm_ifindex);
1928 	if (!dev)
1929 		return 0;
1930 
1931 	s_t = cb->args[0];
1932 	t = 0;
1933 
1934 	if (tc_dump_tclass_root(dev->qdisc, skb, tcm, cb, &t, s_t) < 0)
1935 		goto done;
1936 
1937 	dev_queue = dev_ingress_queue(dev);
1938 	if (dev_queue &&
1939 	    tc_dump_tclass_root(dev_queue->qdisc_sleeping, skb, tcm, cb,
1940 				&t, s_t) < 0)
1941 		goto done;
1942 
1943 done:
1944 	cb->args[0] = t;
1945 
1946 	dev_put(dev);
1947 	return skb->len;
1948 }
1949 
1950 #ifdef CONFIG_PROC_FS
1951 static int psched_show(struct seq_file *seq, void *v)
1952 {
1953 	seq_printf(seq, "%08x %08x %08x %08x\n",
1954 		   (u32)NSEC_PER_USEC, (u32)PSCHED_TICKS2NS(1),
1955 		   1000000,
1956 		   (u32)NSEC_PER_SEC / hrtimer_resolution);
1957 
1958 	return 0;
1959 }
1960 
1961 static int psched_open(struct inode *inode, struct file *file)
1962 {
1963 	return single_open(file, psched_show, NULL);
1964 }
1965 
1966 static const struct file_operations psched_fops = {
1967 	.owner = THIS_MODULE,
1968 	.open = psched_open,
1969 	.read  = seq_read,
1970 	.llseek = seq_lseek,
1971 	.release = single_release,
1972 };
1973 
1974 static int __net_init psched_net_init(struct net *net)
1975 {
1976 	struct proc_dir_entry *e;
1977 
1978 	e = proc_create("psched", 0, net->proc_net, &psched_fops);
1979 	if (e == NULL)
1980 		return -ENOMEM;
1981 
1982 	return 0;
1983 }
1984 
1985 static void __net_exit psched_net_exit(struct net *net)
1986 {
1987 	remove_proc_entry("psched", net->proc_net);
1988 }
1989 #else
1990 static int __net_init psched_net_init(struct net *net)
1991 {
1992 	return 0;
1993 }
1994 
1995 static void __net_exit psched_net_exit(struct net *net)
1996 {
1997 }
1998 #endif
1999 
2000 static struct pernet_operations psched_net_ops = {
2001 	.init = psched_net_init,
2002 	.exit = psched_net_exit,
2003 };
2004 
2005 static int __init pktsched_init(void)
2006 {
2007 	int err;
2008 
2009 	err = register_pernet_subsys(&psched_net_ops);
2010 	if (err) {
2011 		pr_err("pktsched_init: "
2012 		       "cannot initialize per netns operations\n");
2013 		return err;
2014 	}
2015 
2016 	register_qdisc(&pfifo_fast_ops);
2017 	register_qdisc(&pfifo_qdisc_ops);
2018 	register_qdisc(&bfifo_qdisc_ops);
2019 	register_qdisc(&pfifo_head_drop_qdisc_ops);
2020 	register_qdisc(&mq_qdisc_ops);
2021 	register_qdisc(&noqueue_qdisc_ops);
2022 
2023 	rtnl_register(PF_UNSPEC, RTM_NEWQDISC, tc_modify_qdisc, NULL, 0);
2024 	rtnl_register(PF_UNSPEC, RTM_DELQDISC, tc_get_qdisc, NULL, 0);
2025 	rtnl_register(PF_UNSPEC, RTM_GETQDISC, tc_get_qdisc, tc_dump_qdisc,
2026 		      0);
2027 	rtnl_register(PF_UNSPEC, RTM_NEWTCLASS, tc_ctl_tclass, NULL, 0);
2028 	rtnl_register(PF_UNSPEC, RTM_DELTCLASS, tc_ctl_tclass, NULL, 0);
2029 	rtnl_register(PF_UNSPEC, RTM_GETTCLASS, tc_ctl_tclass, tc_dump_tclass,
2030 		      0);
2031 
2032 	return 0;
2033 }
2034 
2035 subsys_initcall(pktsched_init);
2036