xref: /linux/net/sched/sch_api.c (revision b24413180f5600bcb3bb70fbed5cf186b60864bd)
1 /*
2  * net/sched/sch_api.c	Packet scheduler API.
3  *
4  *		This program is free software; you can redistribute it and/or
5  *		modify it under the terms of the GNU General Public License
6  *		as published by the Free Software Foundation; either version
7  *		2 of the License, or (at your option) any later version.
8  *
9  * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
10  *
11  * Fixes:
12  *
13  * Rani Assaf <rani@magic.metawire.com> :980802: JIFFIES and CPU clock sources are repaired.
14  * Eduardo J. Blanco <ejbs@netlabs.com.uy> :990222: kmod support
15  * Jamal Hadi Salim <hadi@nortelnetworks.com>: 990601: ingress support
16  */
17 
18 #include <linux/module.h>
19 #include <linux/types.h>
20 #include <linux/kernel.h>
21 #include <linux/string.h>
22 #include <linux/errno.h>
23 #include <linux/skbuff.h>
24 #include <linux/init.h>
25 #include <linux/proc_fs.h>
26 #include <linux/seq_file.h>
27 #include <linux/kmod.h>
28 #include <linux/list.h>
29 #include <linux/hrtimer.h>
30 #include <linux/lockdep.h>
31 #include <linux/slab.h>
32 #include <linux/hashtable.h>
33 
34 #include <net/net_namespace.h>
35 #include <net/sock.h>
36 #include <net/netlink.h>
37 #include <net/pkt_sched.h>
38 #include <net/pkt_cls.h>
39 
40 /*
41 
42    Short review.
43    -------------
44 
45    This file consists of two interrelated parts:
46 
47    1. queueing disciplines manager frontend.
48    2. traffic classes manager frontend.
49 
50    Generally, queueing discipline ("qdisc") is a black box,
51    which is able to enqueue packets and to dequeue them (when
52    device is ready to send something) in order and at times
53    determined by algorithm hidden in it.
54 
55    qdisc's are divided to two categories:
56    - "queues", which have no internal structure visible from outside.
57    - "schedulers", which split all the packets to "traffic classes",
58      using "packet classifiers" (look at cls_api.c)
59 
60    In turn, classes may have child qdiscs (as rule, queues)
61    attached to them etc. etc. etc.
62 
63    The goal of the routines in this file is to translate
64    information supplied by user in the form of handles
65    to more intelligible for kernel form, to make some sanity
66    checks and part of work, which is common to all qdiscs
67    and to provide rtnetlink notifications.
68 
69    All real intelligent work is done inside qdisc modules.
70 
71 
72 
73    Every discipline has two major routines: enqueue and dequeue.
74 
75    ---dequeue
76 
77    dequeue usually returns a skb to send. It is allowed to return NULL,
78    but it does not mean that queue is empty, it just means that
79    discipline does not want to send anything this time.
80    Queue is really empty if q->q.qlen == 0.
81    For complicated disciplines with multiple queues q->q is not
82    real packet queue, but however q->q.qlen must be valid.
83 
84    ---enqueue
85 
86    enqueue returns 0, if packet was enqueued successfully.
87    If packet (this one or another one) was dropped, it returns
88    not zero error code.
89    NET_XMIT_DROP 	- this packet dropped
90      Expected action: do not backoff, but wait until queue will clear.
91    NET_XMIT_CN	 	- probably this packet enqueued, but another one dropped.
92      Expected action: backoff or ignore
93 
94    Auxiliary routines:
95 
96    ---peek
97 
98    like dequeue but without removing a packet from the queue
99 
100    ---reset
101 
102    returns qdisc to initial state: purge all buffers, clear all
103    timers, counters (except for statistics) etc.
104 
105    ---init
106 
107    initializes newly created qdisc.
108 
109    ---destroy
110 
111    destroys resources allocated by init and during lifetime of qdisc.
112 
113    ---change
114 
115    changes qdisc parameters.
116  */
117 
118 /* Protects list of registered TC modules. It is pure SMP lock. */
119 static DEFINE_RWLOCK(qdisc_mod_lock);
120 
121 
122 /************************************************
123  *	Queueing disciplines manipulation.	*
124  ************************************************/
125 
126 
127 /* The list of all installed queueing disciplines. */
128 
129 static struct Qdisc_ops *qdisc_base;
130 
131 /* Register/unregister queueing discipline */
132 
133 int register_qdisc(struct Qdisc_ops *qops)
134 {
135 	struct Qdisc_ops *q, **qp;
136 	int rc = -EEXIST;
137 
138 	write_lock(&qdisc_mod_lock);
139 	for (qp = &qdisc_base; (q = *qp) != NULL; qp = &q->next)
140 		if (!strcmp(qops->id, q->id))
141 			goto out;
142 
143 	if (qops->enqueue == NULL)
144 		qops->enqueue = noop_qdisc_ops.enqueue;
145 	if (qops->peek == NULL) {
146 		if (qops->dequeue == NULL)
147 			qops->peek = noop_qdisc_ops.peek;
148 		else
149 			goto out_einval;
150 	}
151 	if (qops->dequeue == NULL)
152 		qops->dequeue = noop_qdisc_ops.dequeue;
153 
154 	if (qops->cl_ops) {
155 		const struct Qdisc_class_ops *cops = qops->cl_ops;
156 
157 		if (!(cops->find && cops->walk && cops->leaf))
158 			goto out_einval;
159 
160 		if (cops->tcf_block && !(cops->bind_tcf && cops->unbind_tcf))
161 			goto out_einval;
162 	}
163 
164 	qops->next = NULL;
165 	*qp = qops;
166 	rc = 0;
167 out:
168 	write_unlock(&qdisc_mod_lock);
169 	return rc;
170 
171 out_einval:
172 	rc = -EINVAL;
173 	goto out;
174 }
175 EXPORT_SYMBOL(register_qdisc);
176 
177 int unregister_qdisc(struct Qdisc_ops *qops)
178 {
179 	struct Qdisc_ops *q, **qp;
180 	int err = -ENOENT;
181 
182 	write_lock(&qdisc_mod_lock);
183 	for (qp = &qdisc_base; (q = *qp) != NULL; qp = &q->next)
184 		if (q == qops)
185 			break;
186 	if (q) {
187 		*qp = q->next;
188 		q->next = NULL;
189 		err = 0;
190 	}
191 	write_unlock(&qdisc_mod_lock);
192 	return err;
193 }
194 EXPORT_SYMBOL(unregister_qdisc);
195 
196 /* Get default qdisc if not otherwise specified */
197 void qdisc_get_default(char *name, size_t len)
198 {
199 	read_lock(&qdisc_mod_lock);
200 	strlcpy(name, default_qdisc_ops->id, len);
201 	read_unlock(&qdisc_mod_lock);
202 }
203 
204 static struct Qdisc_ops *qdisc_lookup_default(const char *name)
205 {
206 	struct Qdisc_ops *q = NULL;
207 
208 	for (q = qdisc_base; q; q = q->next) {
209 		if (!strcmp(name, q->id)) {
210 			if (!try_module_get(q->owner))
211 				q = NULL;
212 			break;
213 		}
214 	}
215 
216 	return q;
217 }
218 
219 /* Set new default qdisc to use */
220 int qdisc_set_default(const char *name)
221 {
222 	const struct Qdisc_ops *ops;
223 
224 	if (!capable(CAP_NET_ADMIN))
225 		return -EPERM;
226 
227 	write_lock(&qdisc_mod_lock);
228 	ops = qdisc_lookup_default(name);
229 	if (!ops) {
230 		/* Not found, drop lock and try to load module */
231 		write_unlock(&qdisc_mod_lock);
232 		request_module("sch_%s", name);
233 		write_lock(&qdisc_mod_lock);
234 
235 		ops = qdisc_lookup_default(name);
236 	}
237 
238 	if (ops) {
239 		/* Set new default */
240 		module_put(default_qdisc_ops->owner);
241 		default_qdisc_ops = ops;
242 	}
243 	write_unlock(&qdisc_mod_lock);
244 
245 	return ops ? 0 : -ENOENT;
246 }
247 
248 #ifdef CONFIG_NET_SCH_DEFAULT
249 /* Set default value from kernel config */
250 static int __init sch_default_qdisc(void)
251 {
252 	return qdisc_set_default(CONFIG_DEFAULT_NET_SCH);
253 }
254 late_initcall(sch_default_qdisc);
255 #endif
256 
257 /* We know handle. Find qdisc among all qdisc's attached to device
258  * (root qdisc, all its children, children of children etc.)
259  * Note: caller either uses rtnl or rcu_read_lock()
260  */
261 
262 static struct Qdisc *qdisc_match_from_root(struct Qdisc *root, u32 handle)
263 {
264 	struct Qdisc *q;
265 
266 	if (!qdisc_dev(root))
267 		return (root->handle == handle ? root : NULL);
268 
269 	if (!(root->flags & TCQ_F_BUILTIN) &&
270 	    root->handle == handle)
271 		return root;
272 
273 	hash_for_each_possible_rcu(qdisc_dev(root)->qdisc_hash, q, hash, handle) {
274 		if (q->handle == handle)
275 			return q;
276 	}
277 	return NULL;
278 }
279 
280 void qdisc_hash_add(struct Qdisc *q, bool invisible)
281 {
282 	if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) {
283 		ASSERT_RTNL();
284 		hash_add_rcu(qdisc_dev(q)->qdisc_hash, &q->hash, q->handle);
285 		if (invisible)
286 			q->flags |= TCQ_F_INVISIBLE;
287 	}
288 }
289 EXPORT_SYMBOL(qdisc_hash_add);
290 
291 void qdisc_hash_del(struct Qdisc *q)
292 {
293 	if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) {
294 		ASSERT_RTNL();
295 		hash_del_rcu(&q->hash);
296 	}
297 }
298 EXPORT_SYMBOL(qdisc_hash_del);
299 
300 struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle)
301 {
302 	struct Qdisc *q;
303 
304 	q = qdisc_match_from_root(dev->qdisc, handle);
305 	if (q)
306 		goto out;
307 
308 	if (dev_ingress_queue(dev))
309 		q = qdisc_match_from_root(
310 			dev_ingress_queue(dev)->qdisc_sleeping,
311 			handle);
312 out:
313 	return q;
314 }
315 
316 static struct Qdisc *qdisc_leaf(struct Qdisc *p, u32 classid)
317 {
318 	unsigned long cl;
319 	struct Qdisc *leaf;
320 	const struct Qdisc_class_ops *cops = p->ops->cl_ops;
321 
322 	if (cops == NULL)
323 		return NULL;
324 	cl = cops->find(p, classid);
325 
326 	if (cl == 0)
327 		return NULL;
328 	leaf = cops->leaf(p, cl);
329 	return leaf;
330 }
331 
332 /* Find queueing discipline by name */
333 
334 static struct Qdisc_ops *qdisc_lookup_ops(struct nlattr *kind)
335 {
336 	struct Qdisc_ops *q = NULL;
337 
338 	if (kind) {
339 		read_lock(&qdisc_mod_lock);
340 		for (q = qdisc_base; q; q = q->next) {
341 			if (nla_strcmp(kind, q->id) == 0) {
342 				if (!try_module_get(q->owner))
343 					q = NULL;
344 				break;
345 			}
346 		}
347 		read_unlock(&qdisc_mod_lock);
348 	}
349 	return q;
350 }
351 
352 /* The linklayer setting were not transferred from iproute2, in older
353  * versions, and the rate tables lookup systems have been dropped in
354  * the kernel. To keep backward compatible with older iproute2 tc
355  * utils, we detect the linklayer setting by detecting if the rate
356  * table were modified.
357  *
358  * For linklayer ATM table entries, the rate table will be aligned to
359  * 48 bytes, thus some table entries will contain the same value.  The
360  * mpu (min packet unit) is also encoded into the old rate table, thus
361  * starting from the mpu, we find low and high table entries for
362  * mapping this cell.  If these entries contain the same value, when
363  * the rate tables have been modified for linklayer ATM.
364  *
365  * This is done by rounding mpu to the nearest 48 bytes cell/entry,
366  * and then roundup to the next cell, calc the table entry one below,
367  * and compare.
368  */
369 static __u8 __detect_linklayer(struct tc_ratespec *r, __u32 *rtab)
370 {
371 	int low       = roundup(r->mpu, 48);
372 	int high      = roundup(low+1, 48);
373 	int cell_low  = low >> r->cell_log;
374 	int cell_high = (high >> r->cell_log) - 1;
375 
376 	/* rtab is too inaccurate at rates > 100Mbit/s */
377 	if ((r->rate > (100000000/8)) || (rtab[0] == 0)) {
378 		pr_debug("TC linklayer: Giving up ATM detection\n");
379 		return TC_LINKLAYER_ETHERNET;
380 	}
381 
382 	if ((cell_high > cell_low) && (cell_high < 256)
383 	    && (rtab[cell_low] == rtab[cell_high])) {
384 		pr_debug("TC linklayer: Detected ATM, low(%d)=high(%d)=%u\n",
385 			 cell_low, cell_high, rtab[cell_high]);
386 		return TC_LINKLAYER_ATM;
387 	}
388 	return TC_LINKLAYER_ETHERNET;
389 }
390 
391 static struct qdisc_rate_table *qdisc_rtab_list;
392 
393 struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r,
394 					struct nlattr *tab)
395 {
396 	struct qdisc_rate_table *rtab;
397 
398 	if (tab == NULL || r->rate == 0 || r->cell_log == 0 ||
399 	    nla_len(tab) != TC_RTAB_SIZE)
400 		return NULL;
401 
402 	for (rtab = qdisc_rtab_list; rtab; rtab = rtab->next) {
403 		if (!memcmp(&rtab->rate, r, sizeof(struct tc_ratespec)) &&
404 		    !memcmp(&rtab->data, nla_data(tab), 1024)) {
405 			rtab->refcnt++;
406 			return rtab;
407 		}
408 	}
409 
410 	rtab = kmalloc(sizeof(*rtab), GFP_KERNEL);
411 	if (rtab) {
412 		rtab->rate = *r;
413 		rtab->refcnt = 1;
414 		memcpy(rtab->data, nla_data(tab), 1024);
415 		if (r->linklayer == TC_LINKLAYER_UNAWARE)
416 			r->linklayer = __detect_linklayer(r, rtab->data);
417 		rtab->next = qdisc_rtab_list;
418 		qdisc_rtab_list = rtab;
419 	}
420 	return rtab;
421 }
422 EXPORT_SYMBOL(qdisc_get_rtab);
423 
424 void qdisc_put_rtab(struct qdisc_rate_table *tab)
425 {
426 	struct qdisc_rate_table *rtab, **rtabp;
427 
428 	if (!tab || --tab->refcnt)
429 		return;
430 
431 	for (rtabp = &qdisc_rtab_list;
432 	     (rtab = *rtabp) != NULL;
433 	     rtabp = &rtab->next) {
434 		if (rtab == tab) {
435 			*rtabp = rtab->next;
436 			kfree(rtab);
437 			return;
438 		}
439 	}
440 }
441 EXPORT_SYMBOL(qdisc_put_rtab);
442 
443 static LIST_HEAD(qdisc_stab_list);
444 
445 static const struct nla_policy stab_policy[TCA_STAB_MAX + 1] = {
446 	[TCA_STAB_BASE]	= { .len = sizeof(struct tc_sizespec) },
447 	[TCA_STAB_DATA] = { .type = NLA_BINARY },
448 };
449 
450 static struct qdisc_size_table *qdisc_get_stab(struct nlattr *opt)
451 {
452 	struct nlattr *tb[TCA_STAB_MAX + 1];
453 	struct qdisc_size_table *stab;
454 	struct tc_sizespec *s;
455 	unsigned int tsize = 0;
456 	u16 *tab = NULL;
457 	int err;
458 
459 	err = nla_parse_nested(tb, TCA_STAB_MAX, opt, stab_policy, NULL);
460 	if (err < 0)
461 		return ERR_PTR(err);
462 	if (!tb[TCA_STAB_BASE])
463 		return ERR_PTR(-EINVAL);
464 
465 	s = nla_data(tb[TCA_STAB_BASE]);
466 
467 	if (s->tsize > 0) {
468 		if (!tb[TCA_STAB_DATA])
469 			return ERR_PTR(-EINVAL);
470 		tab = nla_data(tb[TCA_STAB_DATA]);
471 		tsize = nla_len(tb[TCA_STAB_DATA]) / sizeof(u16);
472 	}
473 
474 	if (tsize != s->tsize || (!tab && tsize > 0))
475 		return ERR_PTR(-EINVAL);
476 
477 	list_for_each_entry(stab, &qdisc_stab_list, list) {
478 		if (memcmp(&stab->szopts, s, sizeof(*s)))
479 			continue;
480 		if (tsize > 0 && memcmp(stab->data, tab, tsize * sizeof(u16)))
481 			continue;
482 		stab->refcnt++;
483 		return stab;
484 	}
485 
486 	stab = kmalloc(sizeof(*stab) + tsize * sizeof(u16), GFP_KERNEL);
487 	if (!stab)
488 		return ERR_PTR(-ENOMEM);
489 
490 	stab->refcnt = 1;
491 	stab->szopts = *s;
492 	if (tsize > 0)
493 		memcpy(stab->data, tab, tsize * sizeof(u16));
494 
495 	list_add_tail(&stab->list, &qdisc_stab_list);
496 
497 	return stab;
498 }
499 
500 static void stab_kfree_rcu(struct rcu_head *head)
501 {
502 	kfree(container_of(head, struct qdisc_size_table, rcu));
503 }
504 
505 void qdisc_put_stab(struct qdisc_size_table *tab)
506 {
507 	if (!tab)
508 		return;
509 
510 	if (--tab->refcnt == 0) {
511 		list_del(&tab->list);
512 		call_rcu_bh(&tab->rcu, stab_kfree_rcu);
513 	}
514 }
515 EXPORT_SYMBOL(qdisc_put_stab);
516 
517 static int qdisc_dump_stab(struct sk_buff *skb, struct qdisc_size_table *stab)
518 {
519 	struct nlattr *nest;
520 
521 	nest = nla_nest_start(skb, TCA_STAB);
522 	if (nest == NULL)
523 		goto nla_put_failure;
524 	if (nla_put(skb, TCA_STAB_BASE, sizeof(stab->szopts), &stab->szopts))
525 		goto nla_put_failure;
526 	nla_nest_end(skb, nest);
527 
528 	return skb->len;
529 
530 nla_put_failure:
531 	return -1;
532 }
533 
534 void __qdisc_calculate_pkt_len(struct sk_buff *skb,
535 			       const struct qdisc_size_table *stab)
536 {
537 	int pkt_len, slot;
538 
539 	pkt_len = skb->len + stab->szopts.overhead;
540 	if (unlikely(!stab->szopts.tsize))
541 		goto out;
542 
543 	slot = pkt_len + stab->szopts.cell_align;
544 	if (unlikely(slot < 0))
545 		slot = 0;
546 
547 	slot >>= stab->szopts.cell_log;
548 	if (likely(slot < stab->szopts.tsize))
549 		pkt_len = stab->data[slot];
550 	else
551 		pkt_len = stab->data[stab->szopts.tsize - 1] *
552 				(slot / stab->szopts.tsize) +
553 				stab->data[slot % stab->szopts.tsize];
554 
555 	pkt_len <<= stab->szopts.size_log;
556 out:
557 	if (unlikely(pkt_len < 1))
558 		pkt_len = 1;
559 	qdisc_skb_cb(skb)->pkt_len = pkt_len;
560 }
561 EXPORT_SYMBOL(__qdisc_calculate_pkt_len);
562 
563 void qdisc_warn_nonwc(const char *txt, struct Qdisc *qdisc)
564 {
565 	if (!(qdisc->flags & TCQ_F_WARN_NONWC)) {
566 		pr_warn("%s: %s qdisc %X: is non-work-conserving?\n",
567 			txt, qdisc->ops->id, qdisc->handle >> 16);
568 		qdisc->flags |= TCQ_F_WARN_NONWC;
569 	}
570 }
571 EXPORT_SYMBOL(qdisc_warn_nonwc);
572 
573 static enum hrtimer_restart qdisc_watchdog(struct hrtimer *timer)
574 {
575 	struct qdisc_watchdog *wd = container_of(timer, struct qdisc_watchdog,
576 						 timer);
577 
578 	rcu_read_lock();
579 	__netif_schedule(qdisc_root(wd->qdisc));
580 	rcu_read_unlock();
581 
582 	return HRTIMER_NORESTART;
583 }
584 
585 void qdisc_watchdog_init(struct qdisc_watchdog *wd, struct Qdisc *qdisc)
586 {
587 	hrtimer_init(&wd->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED);
588 	wd->timer.function = qdisc_watchdog;
589 	wd->qdisc = qdisc;
590 }
591 EXPORT_SYMBOL(qdisc_watchdog_init);
592 
593 void qdisc_watchdog_schedule_ns(struct qdisc_watchdog *wd, u64 expires)
594 {
595 	if (test_bit(__QDISC_STATE_DEACTIVATED,
596 		     &qdisc_root_sleeping(wd->qdisc)->state))
597 		return;
598 
599 	if (wd->last_expires == expires)
600 		return;
601 
602 	wd->last_expires = expires;
603 	hrtimer_start(&wd->timer,
604 		      ns_to_ktime(expires),
605 		      HRTIMER_MODE_ABS_PINNED);
606 }
607 EXPORT_SYMBOL(qdisc_watchdog_schedule_ns);
608 
609 void qdisc_watchdog_cancel(struct qdisc_watchdog *wd)
610 {
611 	hrtimer_cancel(&wd->timer);
612 }
613 EXPORT_SYMBOL(qdisc_watchdog_cancel);
614 
615 static struct hlist_head *qdisc_class_hash_alloc(unsigned int n)
616 {
617 	struct hlist_head *h;
618 	unsigned int i;
619 
620 	h = kvmalloc_array(n, sizeof(struct hlist_head), GFP_KERNEL);
621 
622 	if (h != NULL) {
623 		for (i = 0; i < n; i++)
624 			INIT_HLIST_HEAD(&h[i]);
625 	}
626 	return h;
627 }
628 
629 void qdisc_class_hash_grow(struct Qdisc *sch, struct Qdisc_class_hash *clhash)
630 {
631 	struct Qdisc_class_common *cl;
632 	struct hlist_node *next;
633 	struct hlist_head *nhash, *ohash;
634 	unsigned int nsize, nmask, osize;
635 	unsigned int i, h;
636 
637 	/* Rehash when load factor exceeds 0.75 */
638 	if (clhash->hashelems * 4 <= clhash->hashsize * 3)
639 		return;
640 	nsize = clhash->hashsize * 2;
641 	nmask = nsize - 1;
642 	nhash = qdisc_class_hash_alloc(nsize);
643 	if (nhash == NULL)
644 		return;
645 
646 	ohash = clhash->hash;
647 	osize = clhash->hashsize;
648 
649 	sch_tree_lock(sch);
650 	for (i = 0; i < osize; i++) {
651 		hlist_for_each_entry_safe(cl, next, &ohash[i], hnode) {
652 			h = qdisc_class_hash(cl->classid, nmask);
653 			hlist_add_head(&cl->hnode, &nhash[h]);
654 		}
655 	}
656 	clhash->hash     = nhash;
657 	clhash->hashsize = nsize;
658 	clhash->hashmask = nmask;
659 	sch_tree_unlock(sch);
660 
661 	kvfree(ohash);
662 }
663 EXPORT_SYMBOL(qdisc_class_hash_grow);
664 
665 int qdisc_class_hash_init(struct Qdisc_class_hash *clhash)
666 {
667 	unsigned int size = 4;
668 
669 	clhash->hash = qdisc_class_hash_alloc(size);
670 	if (clhash->hash == NULL)
671 		return -ENOMEM;
672 	clhash->hashsize  = size;
673 	clhash->hashmask  = size - 1;
674 	clhash->hashelems = 0;
675 	return 0;
676 }
677 EXPORT_SYMBOL(qdisc_class_hash_init);
678 
679 void qdisc_class_hash_destroy(struct Qdisc_class_hash *clhash)
680 {
681 	kvfree(clhash->hash);
682 }
683 EXPORT_SYMBOL(qdisc_class_hash_destroy);
684 
685 void qdisc_class_hash_insert(struct Qdisc_class_hash *clhash,
686 			     struct Qdisc_class_common *cl)
687 {
688 	unsigned int h;
689 
690 	INIT_HLIST_NODE(&cl->hnode);
691 	h = qdisc_class_hash(cl->classid, clhash->hashmask);
692 	hlist_add_head(&cl->hnode, &clhash->hash[h]);
693 	clhash->hashelems++;
694 }
695 EXPORT_SYMBOL(qdisc_class_hash_insert);
696 
697 void qdisc_class_hash_remove(struct Qdisc_class_hash *clhash,
698 			     struct Qdisc_class_common *cl)
699 {
700 	hlist_del(&cl->hnode);
701 	clhash->hashelems--;
702 }
703 EXPORT_SYMBOL(qdisc_class_hash_remove);
704 
705 /* Allocate an unique handle from space managed by kernel
706  * Possible range is [8000-FFFF]:0000 (0x8000 values)
707  */
708 static u32 qdisc_alloc_handle(struct net_device *dev)
709 {
710 	int i = 0x8000;
711 	static u32 autohandle = TC_H_MAKE(0x80000000U, 0);
712 
713 	do {
714 		autohandle += TC_H_MAKE(0x10000U, 0);
715 		if (autohandle == TC_H_MAKE(TC_H_ROOT, 0))
716 			autohandle = TC_H_MAKE(0x80000000U, 0);
717 		if (!qdisc_lookup(dev, autohandle))
718 			return autohandle;
719 		cond_resched();
720 	} while	(--i > 0);
721 
722 	return 0;
723 }
724 
725 void qdisc_tree_reduce_backlog(struct Qdisc *sch, unsigned int n,
726 			       unsigned int len)
727 {
728 	const struct Qdisc_class_ops *cops;
729 	unsigned long cl;
730 	u32 parentid;
731 	bool notify;
732 	int drops;
733 
734 	if (n == 0 && len == 0)
735 		return;
736 	drops = max_t(int, n, 0);
737 	rcu_read_lock();
738 	while ((parentid = sch->parent)) {
739 		if (TC_H_MAJ(parentid) == TC_H_MAJ(TC_H_INGRESS))
740 			break;
741 
742 		if (sch->flags & TCQ_F_NOPARENT)
743 			break;
744 		/* Notify parent qdisc only if child qdisc becomes empty.
745 		 *
746 		 * If child was empty even before update then backlog
747 		 * counter is screwed and we skip notification because
748 		 * parent class is already passive.
749 		 */
750 		notify = !sch->q.qlen && !WARN_ON_ONCE(!n);
751 		/* TODO: perform the search on a per txq basis */
752 		sch = qdisc_lookup(qdisc_dev(sch), TC_H_MAJ(parentid));
753 		if (sch == NULL) {
754 			WARN_ON_ONCE(parentid != TC_H_ROOT);
755 			break;
756 		}
757 		cops = sch->ops->cl_ops;
758 		if (notify && cops->qlen_notify) {
759 			cl = cops->find(sch, parentid);
760 			cops->qlen_notify(sch, cl);
761 		}
762 		sch->q.qlen -= n;
763 		sch->qstats.backlog -= len;
764 		__qdisc_qstats_drop(sch, drops);
765 	}
766 	rcu_read_unlock();
767 }
768 EXPORT_SYMBOL(qdisc_tree_reduce_backlog);
769 
770 static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid,
771 			 u32 portid, u32 seq, u16 flags, int event)
772 {
773 	struct gnet_stats_basic_cpu __percpu *cpu_bstats = NULL;
774 	struct gnet_stats_queue __percpu *cpu_qstats = NULL;
775 	struct tcmsg *tcm;
776 	struct nlmsghdr  *nlh;
777 	unsigned char *b = skb_tail_pointer(skb);
778 	struct gnet_dump d;
779 	struct qdisc_size_table *stab;
780 	__u32 qlen;
781 
782 	cond_resched();
783 	nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags);
784 	if (!nlh)
785 		goto out_nlmsg_trim;
786 	tcm = nlmsg_data(nlh);
787 	tcm->tcm_family = AF_UNSPEC;
788 	tcm->tcm__pad1 = 0;
789 	tcm->tcm__pad2 = 0;
790 	tcm->tcm_ifindex = qdisc_dev(q)->ifindex;
791 	tcm->tcm_parent = clid;
792 	tcm->tcm_handle = q->handle;
793 	tcm->tcm_info = refcount_read(&q->refcnt);
794 	if (nla_put_string(skb, TCA_KIND, q->ops->id))
795 		goto nla_put_failure;
796 	if (q->ops->dump && q->ops->dump(q, skb) < 0)
797 		goto nla_put_failure;
798 	qlen = q->q.qlen;
799 
800 	stab = rtnl_dereference(q->stab);
801 	if (stab && qdisc_dump_stab(skb, stab) < 0)
802 		goto nla_put_failure;
803 
804 	if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, TCA_XSTATS,
805 					 NULL, &d, TCA_PAD) < 0)
806 		goto nla_put_failure;
807 
808 	if (q->ops->dump_stats && q->ops->dump_stats(q, &d) < 0)
809 		goto nla_put_failure;
810 
811 	if (qdisc_is_percpu_stats(q)) {
812 		cpu_bstats = q->cpu_bstats;
813 		cpu_qstats = q->cpu_qstats;
814 	}
815 
816 	if (gnet_stats_copy_basic(qdisc_root_sleeping_running(q),
817 				  &d, cpu_bstats, &q->bstats) < 0 ||
818 	    gnet_stats_copy_rate_est(&d, &q->rate_est) < 0 ||
819 	    gnet_stats_copy_queue(&d, cpu_qstats, &q->qstats, qlen) < 0)
820 		goto nla_put_failure;
821 
822 	if (gnet_stats_finish_copy(&d) < 0)
823 		goto nla_put_failure;
824 
825 	nlh->nlmsg_len = skb_tail_pointer(skb) - b;
826 	return skb->len;
827 
828 out_nlmsg_trim:
829 nla_put_failure:
830 	nlmsg_trim(skb, b);
831 	return -1;
832 }
833 
834 static bool tc_qdisc_dump_ignore(struct Qdisc *q, bool dump_invisible)
835 {
836 	if (q->flags & TCQ_F_BUILTIN)
837 		return true;
838 	if ((q->flags & TCQ_F_INVISIBLE) && !dump_invisible)
839 		return true;
840 
841 	return false;
842 }
843 
844 static int qdisc_notify(struct net *net, struct sk_buff *oskb,
845 			struct nlmsghdr *n, u32 clid,
846 			struct Qdisc *old, struct Qdisc *new)
847 {
848 	struct sk_buff *skb;
849 	u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
850 
851 	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
852 	if (!skb)
853 		return -ENOBUFS;
854 
855 	if (old && !tc_qdisc_dump_ignore(old, false)) {
856 		if (tc_fill_qdisc(skb, old, clid, portid, n->nlmsg_seq,
857 				  0, RTM_DELQDISC) < 0)
858 			goto err_out;
859 	}
860 	if (new && !tc_qdisc_dump_ignore(new, false)) {
861 		if (tc_fill_qdisc(skb, new, clid, portid, n->nlmsg_seq,
862 				  old ? NLM_F_REPLACE : 0, RTM_NEWQDISC) < 0)
863 			goto err_out;
864 	}
865 
866 	if (skb->len)
867 		return rtnetlink_send(skb, net, portid, RTNLGRP_TC,
868 				      n->nlmsg_flags & NLM_F_ECHO);
869 
870 err_out:
871 	kfree_skb(skb);
872 	return -EINVAL;
873 }
874 
875 static void notify_and_destroy(struct net *net, struct sk_buff *skb,
876 			       struct nlmsghdr *n, u32 clid,
877 			       struct Qdisc *old, struct Qdisc *new)
878 {
879 	if (new || old)
880 		qdisc_notify(net, skb, n, clid, old, new);
881 
882 	if (old)
883 		qdisc_destroy(old);
884 }
885 
886 /* Graft qdisc "new" to class "classid" of qdisc "parent" or
887  * to device "dev".
888  *
889  * When appropriate send a netlink notification using 'skb'
890  * and "n".
891  *
892  * On success, destroy old qdisc.
893  */
894 
895 static int qdisc_graft(struct net_device *dev, struct Qdisc *parent,
896 		       struct sk_buff *skb, struct nlmsghdr *n, u32 classid,
897 		       struct Qdisc *new, struct Qdisc *old)
898 {
899 	struct Qdisc *q = old;
900 	struct net *net = dev_net(dev);
901 	int err = 0;
902 
903 	if (parent == NULL) {
904 		unsigned int i, num_q, ingress;
905 
906 		ingress = 0;
907 		num_q = dev->num_tx_queues;
908 		if ((q && q->flags & TCQ_F_INGRESS) ||
909 		    (new && new->flags & TCQ_F_INGRESS)) {
910 			num_q = 1;
911 			ingress = 1;
912 			if (!dev_ingress_queue(dev))
913 				return -ENOENT;
914 		}
915 
916 		if (dev->flags & IFF_UP)
917 			dev_deactivate(dev);
918 
919 		if (new && new->ops->attach)
920 			goto skip;
921 
922 		for (i = 0; i < num_q; i++) {
923 			struct netdev_queue *dev_queue = dev_ingress_queue(dev);
924 
925 			if (!ingress)
926 				dev_queue = netdev_get_tx_queue(dev, i);
927 
928 			old = dev_graft_qdisc(dev_queue, new);
929 			if (new && i > 0)
930 				qdisc_refcount_inc(new);
931 
932 			if (!ingress)
933 				qdisc_destroy(old);
934 		}
935 
936 skip:
937 		if (!ingress) {
938 			notify_and_destroy(net, skb, n, classid,
939 					   dev->qdisc, new);
940 			if (new && !new->ops->attach)
941 				qdisc_refcount_inc(new);
942 			dev->qdisc = new ? : &noop_qdisc;
943 
944 			if (new && new->ops->attach)
945 				new->ops->attach(new);
946 		} else {
947 			notify_and_destroy(net, skb, n, classid, old, new);
948 		}
949 
950 		if (dev->flags & IFF_UP)
951 			dev_activate(dev);
952 	} else {
953 		const struct Qdisc_class_ops *cops = parent->ops->cl_ops;
954 
955 		err = -EOPNOTSUPP;
956 		if (cops && cops->graft) {
957 			unsigned long cl = cops->find(parent, classid);
958 
959 			if (cl)
960 				err = cops->graft(parent, cl, new, &old);
961 			else
962 				err = -ENOENT;
963 		}
964 		if (!err)
965 			notify_and_destroy(net, skb, n, classid, old, new);
966 	}
967 	return err;
968 }
969 
970 /* lockdep annotation is needed for ingress; egress gets it only for name */
971 static struct lock_class_key qdisc_tx_lock;
972 static struct lock_class_key qdisc_rx_lock;
973 
974 /*
975    Allocate and initialize new qdisc.
976 
977    Parameters are passed via opt.
978  */
979 
980 static struct Qdisc *qdisc_create(struct net_device *dev,
981 				  struct netdev_queue *dev_queue,
982 				  struct Qdisc *p, u32 parent, u32 handle,
983 				  struct nlattr **tca, int *errp)
984 {
985 	int err;
986 	struct nlattr *kind = tca[TCA_KIND];
987 	struct Qdisc *sch;
988 	struct Qdisc_ops *ops;
989 	struct qdisc_size_table *stab;
990 
991 	ops = qdisc_lookup_ops(kind);
992 #ifdef CONFIG_MODULES
993 	if (ops == NULL && kind != NULL) {
994 		char name[IFNAMSIZ];
995 		if (nla_strlcpy(name, kind, IFNAMSIZ) < IFNAMSIZ) {
996 			/* We dropped the RTNL semaphore in order to
997 			 * perform the module load.  So, even if we
998 			 * succeeded in loading the module we have to
999 			 * tell the caller to replay the request.  We
1000 			 * indicate this using -EAGAIN.
1001 			 * We replay the request because the device may
1002 			 * go away in the mean time.
1003 			 */
1004 			rtnl_unlock();
1005 			request_module("sch_%s", name);
1006 			rtnl_lock();
1007 			ops = qdisc_lookup_ops(kind);
1008 			if (ops != NULL) {
1009 				/* We will try again qdisc_lookup_ops,
1010 				 * so don't keep a reference.
1011 				 */
1012 				module_put(ops->owner);
1013 				err = -EAGAIN;
1014 				goto err_out;
1015 			}
1016 		}
1017 	}
1018 #endif
1019 
1020 	err = -ENOENT;
1021 	if (ops == NULL)
1022 		goto err_out;
1023 
1024 	sch = qdisc_alloc(dev_queue, ops);
1025 	if (IS_ERR(sch)) {
1026 		err = PTR_ERR(sch);
1027 		goto err_out2;
1028 	}
1029 
1030 	sch->parent = parent;
1031 
1032 	if (handle == TC_H_INGRESS) {
1033 		sch->flags |= TCQ_F_INGRESS;
1034 		handle = TC_H_MAKE(TC_H_INGRESS, 0);
1035 		lockdep_set_class(qdisc_lock(sch), &qdisc_rx_lock);
1036 	} else {
1037 		if (handle == 0) {
1038 			handle = qdisc_alloc_handle(dev);
1039 			err = -ENOMEM;
1040 			if (handle == 0)
1041 				goto err_out3;
1042 		}
1043 		lockdep_set_class(qdisc_lock(sch), &qdisc_tx_lock);
1044 		if (!netif_is_multiqueue(dev))
1045 			sch->flags |= TCQ_F_ONETXQUEUE;
1046 	}
1047 
1048 	sch->handle = handle;
1049 
1050 	/* This exist to keep backward compatible with a userspace
1051 	 * loophole, what allowed userspace to get IFF_NO_QUEUE
1052 	 * facility on older kernels by setting tx_queue_len=0 (prior
1053 	 * to qdisc init), and then forgot to reinit tx_queue_len
1054 	 * before again attaching a qdisc.
1055 	 */
1056 	if ((dev->priv_flags & IFF_NO_QUEUE) && (dev->tx_queue_len == 0)) {
1057 		dev->tx_queue_len = DEFAULT_TX_QUEUE_LEN;
1058 		netdev_info(dev, "Caught tx_queue_len zero misconfig\n");
1059 	}
1060 
1061 	if (!ops->init || (err = ops->init(sch, tca[TCA_OPTIONS])) == 0) {
1062 		if (qdisc_is_percpu_stats(sch)) {
1063 			sch->cpu_bstats =
1064 				netdev_alloc_pcpu_stats(struct gnet_stats_basic_cpu);
1065 			if (!sch->cpu_bstats)
1066 				goto err_out4;
1067 
1068 			sch->cpu_qstats = alloc_percpu(struct gnet_stats_queue);
1069 			if (!sch->cpu_qstats)
1070 				goto err_out4;
1071 		}
1072 
1073 		if (tca[TCA_STAB]) {
1074 			stab = qdisc_get_stab(tca[TCA_STAB]);
1075 			if (IS_ERR(stab)) {
1076 				err = PTR_ERR(stab);
1077 				goto err_out4;
1078 			}
1079 			rcu_assign_pointer(sch->stab, stab);
1080 		}
1081 		if (tca[TCA_RATE]) {
1082 			seqcount_t *running;
1083 
1084 			err = -EOPNOTSUPP;
1085 			if (sch->flags & TCQ_F_MQROOT)
1086 				goto err_out4;
1087 
1088 			if ((sch->parent != TC_H_ROOT) &&
1089 			    !(sch->flags & TCQ_F_INGRESS) &&
1090 			    (!p || !(p->flags & TCQ_F_MQROOT)))
1091 				running = qdisc_root_sleeping_running(sch);
1092 			else
1093 				running = &sch->running;
1094 
1095 			err = gen_new_estimator(&sch->bstats,
1096 						sch->cpu_bstats,
1097 						&sch->rate_est,
1098 						NULL,
1099 						running,
1100 						tca[TCA_RATE]);
1101 			if (err)
1102 				goto err_out4;
1103 		}
1104 
1105 		qdisc_hash_add(sch, false);
1106 
1107 		return sch;
1108 	}
1109 	/* ops->init() failed, we call ->destroy() like qdisc_create_dflt() */
1110 	if (ops->destroy)
1111 		ops->destroy(sch);
1112 err_out3:
1113 	dev_put(dev);
1114 	kfree((char *) sch - sch->padded);
1115 err_out2:
1116 	module_put(ops->owner);
1117 err_out:
1118 	*errp = err;
1119 	return NULL;
1120 
1121 err_out4:
1122 	free_percpu(sch->cpu_bstats);
1123 	free_percpu(sch->cpu_qstats);
1124 	/*
1125 	 * Any broken qdiscs that would require a ops->reset() here?
1126 	 * The qdisc was never in action so it shouldn't be necessary.
1127 	 */
1128 	qdisc_put_stab(rtnl_dereference(sch->stab));
1129 	if (ops->destroy)
1130 		ops->destroy(sch);
1131 	goto err_out3;
1132 }
1133 
1134 static int qdisc_change(struct Qdisc *sch, struct nlattr **tca)
1135 {
1136 	struct qdisc_size_table *ostab, *stab = NULL;
1137 	int err = 0;
1138 
1139 	if (tca[TCA_OPTIONS]) {
1140 		if (sch->ops->change == NULL)
1141 			return -EINVAL;
1142 		err = sch->ops->change(sch, tca[TCA_OPTIONS]);
1143 		if (err)
1144 			return err;
1145 	}
1146 
1147 	if (tca[TCA_STAB]) {
1148 		stab = qdisc_get_stab(tca[TCA_STAB]);
1149 		if (IS_ERR(stab))
1150 			return PTR_ERR(stab);
1151 	}
1152 
1153 	ostab = rtnl_dereference(sch->stab);
1154 	rcu_assign_pointer(sch->stab, stab);
1155 	qdisc_put_stab(ostab);
1156 
1157 	if (tca[TCA_RATE]) {
1158 		/* NB: ignores errors from replace_estimator
1159 		   because change can't be undone. */
1160 		if (sch->flags & TCQ_F_MQROOT)
1161 			goto out;
1162 		gen_replace_estimator(&sch->bstats,
1163 				      sch->cpu_bstats,
1164 				      &sch->rate_est,
1165 				      NULL,
1166 				      qdisc_root_sleeping_running(sch),
1167 				      tca[TCA_RATE]);
1168 	}
1169 out:
1170 	return 0;
1171 }
1172 
1173 struct check_loop_arg {
1174 	struct qdisc_walker	w;
1175 	struct Qdisc		*p;
1176 	int			depth;
1177 };
1178 
1179 static int check_loop_fn(struct Qdisc *q, unsigned long cl,
1180 			 struct qdisc_walker *w);
1181 
1182 static int check_loop(struct Qdisc *q, struct Qdisc *p, int depth)
1183 {
1184 	struct check_loop_arg	arg;
1185 
1186 	if (q->ops->cl_ops == NULL)
1187 		return 0;
1188 
1189 	arg.w.stop = arg.w.skip = arg.w.count = 0;
1190 	arg.w.fn = check_loop_fn;
1191 	arg.depth = depth;
1192 	arg.p = p;
1193 	q->ops->cl_ops->walk(q, &arg.w);
1194 	return arg.w.stop ? -ELOOP : 0;
1195 }
1196 
1197 static int
1198 check_loop_fn(struct Qdisc *q, unsigned long cl, struct qdisc_walker *w)
1199 {
1200 	struct Qdisc *leaf;
1201 	const struct Qdisc_class_ops *cops = q->ops->cl_ops;
1202 	struct check_loop_arg *arg = (struct check_loop_arg *)w;
1203 
1204 	leaf = cops->leaf(q, cl);
1205 	if (leaf) {
1206 		if (leaf == arg->p || arg->depth > 7)
1207 			return -ELOOP;
1208 		return check_loop(leaf, arg->p, arg->depth + 1);
1209 	}
1210 	return 0;
1211 }
1212 
1213 /*
1214  * Delete/get qdisc.
1215  */
1216 
1217 static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n,
1218 			struct netlink_ext_ack *extack)
1219 {
1220 	struct net *net = sock_net(skb->sk);
1221 	struct tcmsg *tcm = nlmsg_data(n);
1222 	struct nlattr *tca[TCA_MAX + 1];
1223 	struct net_device *dev;
1224 	u32 clid;
1225 	struct Qdisc *q = NULL;
1226 	struct Qdisc *p = NULL;
1227 	int err;
1228 
1229 	if ((n->nlmsg_type != RTM_GETQDISC) &&
1230 	    !netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
1231 		return -EPERM;
1232 
1233 	err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL, extack);
1234 	if (err < 0)
1235 		return err;
1236 
1237 	dev = __dev_get_by_index(net, tcm->tcm_ifindex);
1238 	if (!dev)
1239 		return -ENODEV;
1240 
1241 	clid = tcm->tcm_parent;
1242 	if (clid) {
1243 		if (clid != TC_H_ROOT) {
1244 			if (TC_H_MAJ(clid) != TC_H_MAJ(TC_H_INGRESS)) {
1245 				p = qdisc_lookup(dev, TC_H_MAJ(clid));
1246 				if (!p)
1247 					return -ENOENT;
1248 				q = qdisc_leaf(p, clid);
1249 			} else if (dev_ingress_queue(dev)) {
1250 				q = dev_ingress_queue(dev)->qdisc_sleeping;
1251 			}
1252 		} else {
1253 			q = dev->qdisc;
1254 		}
1255 		if (!q)
1256 			return -ENOENT;
1257 
1258 		if (tcm->tcm_handle && q->handle != tcm->tcm_handle)
1259 			return -EINVAL;
1260 	} else {
1261 		q = qdisc_lookup(dev, tcm->tcm_handle);
1262 		if (!q)
1263 			return -ENOENT;
1264 	}
1265 
1266 	if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id))
1267 		return -EINVAL;
1268 
1269 	if (n->nlmsg_type == RTM_DELQDISC) {
1270 		if (!clid)
1271 			return -EINVAL;
1272 		if (q->handle == 0)
1273 			return -ENOENT;
1274 		err = qdisc_graft(dev, p, skb, n, clid, NULL, q);
1275 		if (err != 0)
1276 			return err;
1277 	} else {
1278 		qdisc_notify(net, skb, n, clid, NULL, q);
1279 	}
1280 	return 0;
1281 }
1282 
1283 /*
1284  * Create/change qdisc.
1285  */
1286 
1287 static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n,
1288 			   struct netlink_ext_ack *extack)
1289 {
1290 	struct net *net = sock_net(skb->sk);
1291 	struct tcmsg *tcm;
1292 	struct nlattr *tca[TCA_MAX + 1];
1293 	struct net_device *dev;
1294 	u32 clid;
1295 	struct Qdisc *q, *p;
1296 	int err;
1297 
1298 	if (!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
1299 		return -EPERM;
1300 
1301 replay:
1302 	/* Reinit, just in case something touches this. */
1303 	err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL, extack);
1304 	if (err < 0)
1305 		return err;
1306 
1307 	tcm = nlmsg_data(n);
1308 	clid = tcm->tcm_parent;
1309 	q = p = NULL;
1310 
1311 	dev = __dev_get_by_index(net, tcm->tcm_ifindex);
1312 	if (!dev)
1313 		return -ENODEV;
1314 
1315 
1316 	if (clid) {
1317 		if (clid != TC_H_ROOT) {
1318 			if (clid != TC_H_INGRESS) {
1319 				p = qdisc_lookup(dev, TC_H_MAJ(clid));
1320 				if (!p)
1321 					return -ENOENT;
1322 				q = qdisc_leaf(p, clid);
1323 			} else if (dev_ingress_queue_create(dev)) {
1324 				q = dev_ingress_queue(dev)->qdisc_sleeping;
1325 			}
1326 		} else {
1327 			q = dev->qdisc;
1328 		}
1329 
1330 		/* It may be default qdisc, ignore it */
1331 		if (q && q->handle == 0)
1332 			q = NULL;
1333 
1334 		if (!q || !tcm->tcm_handle || q->handle != tcm->tcm_handle) {
1335 			if (tcm->tcm_handle) {
1336 				if (q && !(n->nlmsg_flags & NLM_F_REPLACE))
1337 					return -EEXIST;
1338 				if (TC_H_MIN(tcm->tcm_handle))
1339 					return -EINVAL;
1340 				q = qdisc_lookup(dev, tcm->tcm_handle);
1341 				if (!q)
1342 					goto create_n_graft;
1343 				if (n->nlmsg_flags & NLM_F_EXCL)
1344 					return -EEXIST;
1345 				if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id))
1346 					return -EINVAL;
1347 				if (q == p ||
1348 				    (p && check_loop(q, p, 0)))
1349 					return -ELOOP;
1350 				qdisc_refcount_inc(q);
1351 				goto graft;
1352 			} else {
1353 				if (!q)
1354 					goto create_n_graft;
1355 
1356 				/* This magic test requires explanation.
1357 				 *
1358 				 *   We know, that some child q is already
1359 				 *   attached to this parent and have choice:
1360 				 *   either to change it or to create/graft new one.
1361 				 *
1362 				 *   1. We are allowed to create/graft only
1363 				 *   if CREATE and REPLACE flags are set.
1364 				 *
1365 				 *   2. If EXCL is set, requestor wanted to say,
1366 				 *   that qdisc tcm_handle is not expected
1367 				 *   to exist, so that we choose create/graft too.
1368 				 *
1369 				 *   3. The last case is when no flags are set.
1370 				 *   Alas, it is sort of hole in API, we
1371 				 *   cannot decide what to do unambiguously.
1372 				 *   For now we select create/graft, if
1373 				 *   user gave KIND, which does not match existing.
1374 				 */
1375 				if ((n->nlmsg_flags & NLM_F_CREATE) &&
1376 				    (n->nlmsg_flags & NLM_F_REPLACE) &&
1377 				    ((n->nlmsg_flags & NLM_F_EXCL) ||
1378 				     (tca[TCA_KIND] &&
1379 				      nla_strcmp(tca[TCA_KIND], q->ops->id))))
1380 					goto create_n_graft;
1381 			}
1382 		}
1383 	} else {
1384 		if (!tcm->tcm_handle)
1385 			return -EINVAL;
1386 		q = qdisc_lookup(dev, tcm->tcm_handle);
1387 	}
1388 
1389 	/* Change qdisc parameters */
1390 	if (q == NULL)
1391 		return -ENOENT;
1392 	if (n->nlmsg_flags & NLM_F_EXCL)
1393 		return -EEXIST;
1394 	if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id))
1395 		return -EINVAL;
1396 	err = qdisc_change(q, tca);
1397 	if (err == 0)
1398 		qdisc_notify(net, skb, n, clid, NULL, q);
1399 	return err;
1400 
1401 create_n_graft:
1402 	if (!(n->nlmsg_flags & NLM_F_CREATE))
1403 		return -ENOENT;
1404 	if (clid == TC_H_INGRESS) {
1405 		if (dev_ingress_queue(dev))
1406 			q = qdisc_create(dev, dev_ingress_queue(dev), p,
1407 					 tcm->tcm_parent, tcm->tcm_parent,
1408 					 tca, &err);
1409 		else
1410 			err = -ENOENT;
1411 	} else {
1412 		struct netdev_queue *dev_queue;
1413 
1414 		if (p && p->ops->cl_ops && p->ops->cl_ops->select_queue)
1415 			dev_queue = p->ops->cl_ops->select_queue(p, tcm);
1416 		else if (p)
1417 			dev_queue = p->dev_queue;
1418 		else
1419 			dev_queue = netdev_get_tx_queue(dev, 0);
1420 
1421 		q = qdisc_create(dev, dev_queue, p,
1422 				 tcm->tcm_parent, tcm->tcm_handle,
1423 				 tca, &err);
1424 	}
1425 	if (q == NULL) {
1426 		if (err == -EAGAIN)
1427 			goto replay;
1428 		return err;
1429 	}
1430 
1431 graft:
1432 	err = qdisc_graft(dev, p, skb, n, clid, q, NULL);
1433 	if (err) {
1434 		if (q)
1435 			qdisc_destroy(q);
1436 		return err;
1437 	}
1438 
1439 	return 0;
1440 }
1441 
1442 static int tc_dump_qdisc_root(struct Qdisc *root, struct sk_buff *skb,
1443 			      struct netlink_callback *cb,
1444 			      int *q_idx_p, int s_q_idx, bool recur,
1445 			      bool dump_invisible)
1446 {
1447 	int ret = 0, q_idx = *q_idx_p;
1448 	struct Qdisc *q;
1449 	int b;
1450 
1451 	if (!root)
1452 		return 0;
1453 
1454 	q = root;
1455 	if (q_idx < s_q_idx) {
1456 		q_idx++;
1457 	} else {
1458 		if (!tc_qdisc_dump_ignore(q, dump_invisible) &&
1459 		    tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).portid,
1460 				  cb->nlh->nlmsg_seq, NLM_F_MULTI,
1461 				  RTM_NEWQDISC) <= 0)
1462 			goto done;
1463 		q_idx++;
1464 	}
1465 
1466 	/* If dumping singletons, there is no qdisc_dev(root) and the singleton
1467 	 * itself has already been dumped.
1468 	 *
1469 	 * If we've already dumped the top-level (ingress) qdisc above and the global
1470 	 * qdisc hashtable, we don't want to hit it again
1471 	 */
1472 	if (!qdisc_dev(root) || !recur)
1473 		goto out;
1474 
1475 	hash_for_each(qdisc_dev(root)->qdisc_hash, b, q, hash) {
1476 		if (q_idx < s_q_idx) {
1477 			q_idx++;
1478 			continue;
1479 		}
1480 		if (!tc_qdisc_dump_ignore(q, dump_invisible) &&
1481 		    tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).portid,
1482 				  cb->nlh->nlmsg_seq, NLM_F_MULTI,
1483 				  RTM_NEWQDISC) <= 0)
1484 			goto done;
1485 		q_idx++;
1486 	}
1487 
1488 out:
1489 	*q_idx_p = q_idx;
1490 	return ret;
1491 done:
1492 	ret = -1;
1493 	goto out;
1494 }
1495 
1496 static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb)
1497 {
1498 	struct net *net = sock_net(skb->sk);
1499 	int idx, q_idx;
1500 	int s_idx, s_q_idx;
1501 	struct net_device *dev;
1502 	const struct nlmsghdr *nlh = cb->nlh;
1503 	struct tcmsg *tcm = nlmsg_data(nlh);
1504 	struct nlattr *tca[TCA_MAX + 1];
1505 	int err;
1506 
1507 	s_idx = cb->args[0];
1508 	s_q_idx = q_idx = cb->args[1];
1509 
1510 	idx = 0;
1511 	ASSERT_RTNL();
1512 
1513 	err = nlmsg_parse(nlh, sizeof(*tcm), tca, TCA_MAX, NULL, NULL);
1514 	if (err < 0)
1515 		return err;
1516 
1517 	for_each_netdev(net, dev) {
1518 		struct netdev_queue *dev_queue;
1519 
1520 		if (idx < s_idx)
1521 			goto cont;
1522 		if (idx > s_idx)
1523 			s_q_idx = 0;
1524 		q_idx = 0;
1525 
1526 		if (tc_dump_qdisc_root(dev->qdisc, skb, cb, &q_idx, s_q_idx,
1527 				       true, tca[TCA_DUMP_INVISIBLE]) < 0)
1528 			goto done;
1529 
1530 		dev_queue = dev_ingress_queue(dev);
1531 		if (dev_queue &&
1532 		    tc_dump_qdisc_root(dev_queue->qdisc_sleeping, skb, cb,
1533 				       &q_idx, s_q_idx, false,
1534 				       tca[TCA_DUMP_INVISIBLE]) < 0)
1535 			goto done;
1536 
1537 cont:
1538 		idx++;
1539 	}
1540 
1541 done:
1542 	cb->args[0] = idx;
1543 	cb->args[1] = q_idx;
1544 
1545 	return skb->len;
1546 }
1547 
1548 
1549 
1550 /************************************************
1551  *	Traffic classes manipulation.		*
1552  ************************************************/
1553 
1554 static int tc_fill_tclass(struct sk_buff *skb, struct Qdisc *q,
1555 			  unsigned long cl,
1556 			  u32 portid, u32 seq, u16 flags, int event)
1557 {
1558 	struct tcmsg *tcm;
1559 	struct nlmsghdr  *nlh;
1560 	unsigned char *b = skb_tail_pointer(skb);
1561 	struct gnet_dump d;
1562 	const struct Qdisc_class_ops *cl_ops = q->ops->cl_ops;
1563 
1564 	cond_resched();
1565 	nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags);
1566 	if (!nlh)
1567 		goto out_nlmsg_trim;
1568 	tcm = nlmsg_data(nlh);
1569 	tcm->tcm_family = AF_UNSPEC;
1570 	tcm->tcm__pad1 = 0;
1571 	tcm->tcm__pad2 = 0;
1572 	tcm->tcm_ifindex = qdisc_dev(q)->ifindex;
1573 	tcm->tcm_parent = q->handle;
1574 	tcm->tcm_handle = q->handle;
1575 	tcm->tcm_info = 0;
1576 	if (nla_put_string(skb, TCA_KIND, q->ops->id))
1577 		goto nla_put_failure;
1578 	if (cl_ops->dump && cl_ops->dump(q, cl, skb, tcm) < 0)
1579 		goto nla_put_failure;
1580 
1581 	if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, TCA_XSTATS,
1582 					 NULL, &d, TCA_PAD) < 0)
1583 		goto nla_put_failure;
1584 
1585 	if (cl_ops->dump_stats && cl_ops->dump_stats(q, cl, &d) < 0)
1586 		goto nla_put_failure;
1587 
1588 	if (gnet_stats_finish_copy(&d) < 0)
1589 		goto nla_put_failure;
1590 
1591 	nlh->nlmsg_len = skb_tail_pointer(skb) - b;
1592 	return skb->len;
1593 
1594 out_nlmsg_trim:
1595 nla_put_failure:
1596 	nlmsg_trim(skb, b);
1597 	return -1;
1598 }
1599 
1600 static int tclass_notify(struct net *net, struct sk_buff *oskb,
1601 			 struct nlmsghdr *n, struct Qdisc *q,
1602 			 unsigned long cl, int event)
1603 {
1604 	struct sk_buff *skb;
1605 	u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
1606 
1607 	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1608 	if (!skb)
1609 		return -ENOBUFS;
1610 
1611 	if (tc_fill_tclass(skb, q, cl, portid, n->nlmsg_seq, 0, event) < 0) {
1612 		kfree_skb(skb);
1613 		return -EINVAL;
1614 	}
1615 
1616 	return rtnetlink_send(skb, net, portid, RTNLGRP_TC,
1617 			      n->nlmsg_flags & NLM_F_ECHO);
1618 }
1619 
1620 static int tclass_del_notify(struct net *net,
1621 			     const struct Qdisc_class_ops *cops,
1622 			     struct sk_buff *oskb, struct nlmsghdr *n,
1623 			     struct Qdisc *q, unsigned long cl)
1624 {
1625 	u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
1626 	struct sk_buff *skb;
1627 	int err = 0;
1628 
1629 	if (!cops->delete)
1630 		return -EOPNOTSUPP;
1631 
1632 	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1633 	if (!skb)
1634 		return -ENOBUFS;
1635 
1636 	if (tc_fill_tclass(skb, q, cl, portid, n->nlmsg_seq, 0,
1637 			   RTM_DELTCLASS) < 0) {
1638 		kfree_skb(skb);
1639 		return -EINVAL;
1640 	}
1641 
1642 	err = cops->delete(q, cl);
1643 	if (err) {
1644 		kfree_skb(skb);
1645 		return err;
1646 	}
1647 
1648 	return rtnetlink_send(skb, net, portid, RTNLGRP_TC,
1649 			      n->nlmsg_flags & NLM_F_ECHO);
1650 }
1651 
1652 #ifdef CONFIG_NET_CLS
1653 
1654 struct tcf_bind_args {
1655 	struct tcf_walker w;
1656 	u32 classid;
1657 	unsigned long cl;
1658 };
1659 
1660 static int tcf_node_bind(struct tcf_proto *tp, void *n, struct tcf_walker *arg)
1661 {
1662 	struct tcf_bind_args *a = (void *)arg;
1663 
1664 	if (tp->ops->bind_class) {
1665 		tcf_tree_lock(tp);
1666 		tp->ops->bind_class(n, a->classid, a->cl);
1667 		tcf_tree_unlock(tp);
1668 	}
1669 	return 0;
1670 }
1671 
1672 static void tc_bind_tclass(struct Qdisc *q, u32 portid, u32 clid,
1673 			   unsigned long new_cl)
1674 {
1675 	const struct Qdisc_class_ops *cops = q->ops->cl_ops;
1676 	struct tcf_block *block;
1677 	struct tcf_chain *chain;
1678 	unsigned long cl;
1679 
1680 	cl = cops->find(q, portid);
1681 	if (!cl)
1682 		return;
1683 	block = cops->tcf_block(q, cl);
1684 	if (!block)
1685 		return;
1686 	list_for_each_entry(chain, &block->chain_list, list) {
1687 		struct tcf_proto *tp;
1688 
1689 		for (tp = rtnl_dereference(chain->filter_chain);
1690 		     tp; tp = rtnl_dereference(tp->next)) {
1691 			struct tcf_bind_args arg = {};
1692 
1693 			arg.w.fn = tcf_node_bind;
1694 			arg.classid = clid;
1695 			arg.cl = new_cl;
1696 			tp->ops->walk(tp, &arg.w);
1697 		}
1698 	}
1699 }
1700 
1701 #else
1702 
1703 static void tc_bind_tclass(struct Qdisc *q, u32 portid, u32 clid,
1704 			   unsigned long new_cl)
1705 {
1706 }
1707 
1708 #endif
1709 
1710 static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n,
1711 			 struct netlink_ext_ack *extack)
1712 {
1713 	struct net *net = sock_net(skb->sk);
1714 	struct tcmsg *tcm = nlmsg_data(n);
1715 	struct nlattr *tca[TCA_MAX + 1];
1716 	struct net_device *dev;
1717 	struct Qdisc *q = NULL;
1718 	const struct Qdisc_class_ops *cops;
1719 	unsigned long cl = 0;
1720 	unsigned long new_cl;
1721 	u32 portid;
1722 	u32 clid;
1723 	u32 qid;
1724 	int err;
1725 
1726 	if ((n->nlmsg_type != RTM_GETTCLASS) &&
1727 	    !netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
1728 		return -EPERM;
1729 
1730 	err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL, extack);
1731 	if (err < 0)
1732 		return err;
1733 
1734 	dev = __dev_get_by_index(net, tcm->tcm_ifindex);
1735 	if (!dev)
1736 		return -ENODEV;
1737 
1738 	/*
1739 	   parent == TC_H_UNSPEC - unspecified parent.
1740 	   parent == TC_H_ROOT   - class is root, which has no parent.
1741 	   parent == X:0	 - parent is root class.
1742 	   parent == X:Y	 - parent is a node in hierarchy.
1743 	   parent == 0:Y	 - parent is X:Y, where X:0 is qdisc.
1744 
1745 	   handle == 0:0	 - generate handle from kernel pool.
1746 	   handle == 0:Y	 - class is X:Y, where X:0 is qdisc.
1747 	   handle == X:Y	 - clear.
1748 	   handle == X:0	 - root class.
1749 	 */
1750 
1751 	/* Step 1. Determine qdisc handle X:0 */
1752 
1753 	portid = tcm->tcm_parent;
1754 	clid = tcm->tcm_handle;
1755 	qid = TC_H_MAJ(clid);
1756 
1757 	if (portid != TC_H_ROOT) {
1758 		u32 qid1 = TC_H_MAJ(portid);
1759 
1760 		if (qid && qid1) {
1761 			/* If both majors are known, they must be identical. */
1762 			if (qid != qid1)
1763 				return -EINVAL;
1764 		} else if (qid1) {
1765 			qid = qid1;
1766 		} else if (qid == 0)
1767 			qid = dev->qdisc->handle;
1768 
1769 		/* Now qid is genuine qdisc handle consistent
1770 		 * both with parent and child.
1771 		 *
1772 		 * TC_H_MAJ(portid) still may be unspecified, complete it now.
1773 		 */
1774 		if (portid)
1775 			portid = TC_H_MAKE(qid, portid);
1776 	} else {
1777 		if (qid == 0)
1778 			qid = dev->qdisc->handle;
1779 	}
1780 
1781 	/* OK. Locate qdisc */
1782 	q = qdisc_lookup(dev, qid);
1783 	if (!q)
1784 		return -ENOENT;
1785 
1786 	/* An check that it supports classes */
1787 	cops = q->ops->cl_ops;
1788 	if (cops == NULL)
1789 		return -EINVAL;
1790 
1791 	/* Now try to get class */
1792 	if (clid == 0) {
1793 		if (portid == TC_H_ROOT)
1794 			clid = qid;
1795 	} else
1796 		clid = TC_H_MAKE(qid, clid);
1797 
1798 	if (clid)
1799 		cl = cops->find(q, clid);
1800 
1801 	if (cl == 0) {
1802 		err = -ENOENT;
1803 		if (n->nlmsg_type != RTM_NEWTCLASS ||
1804 		    !(n->nlmsg_flags & NLM_F_CREATE))
1805 			goto out;
1806 	} else {
1807 		switch (n->nlmsg_type) {
1808 		case RTM_NEWTCLASS:
1809 			err = -EEXIST;
1810 			if (n->nlmsg_flags & NLM_F_EXCL)
1811 				goto out;
1812 			break;
1813 		case RTM_DELTCLASS:
1814 			err = tclass_del_notify(net, cops, skb, n, q, cl);
1815 			/* Unbind the class with flilters with 0 */
1816 			tc_bind_tclass(q, portid, clid, 0);
1817 			goto out;
1818 		case RTM_GETTCLASS:
1819 			err = tclass_notify(net, skb, n, q, cl, RTM_NEWTCLASS);
1820 			goto out;
1821 		default:
1822 			err = -EINVAL;
1823 			goto out;
1824 		}
1825 	}
1826 
1827 	new_cl = cl;
1828 	err = -EOPNOTSUPP;
1829 	if (cops->change)
1830 		err = cops->change(q, clid, portid, tca, &new_cl);
1831 	if (err == 0) {
1832 		tclass_notify(net, skb, n, q, new_cl, RTM_NEWTCLASS);
1833 		/* We just create a new class, need to do reverse binding. */
1834 		if (cl != new_cl)
1835 			tc_bind_tclass(q, portid, clid, new_cl);
1836 	}
1837 out:
1838 	return err;
1839 }
1840 
1841 struct qdisc_dump_args {
1842 	struct qdisc_walker	w;
1843 	struct sk_buff		*skb;
1844 	struct netlink_callback	*cb;
1845 };
1846 
1847 static int qdisc_class_dump(struct Qdisc *q, unsigned long cl,
1848 			    struct qdisc_walker *arg)
1849 {
1850 	struct qdisc_dump_args *a = (struct qdisc_dump_args *)arg;
1851 
1852 	return tc_fill_tclass(a->skb, q, cl, NETLINK_CB(a->cb->skb).portid,
1853 			      a->cb->nlh->nlmsg_seq, NLM_F_MULTI,
1854 			      RTM_NEWTCLASS);
1855 }
1856 
1857 static int tc_dump_tclass_qdisc(struct Qdisc *q, struct sk_buff *skb,
1858 				struct tcmsg *tcm, struct netlink_callback *cb,
1859 				int *t_p, int s_t)
1860 {
1861 	struct qdisc_dump_args arg;
1862 
1863 	if (tc_qdisc_dump_ignore(q, false) ||
1864 	    *t_p < s_t || !q->ops->cl_ops ||
1865 	    (tcm->tcm_parent &&
1866 	     TC_H_MAJ(tcm->tcm_parent) != q->handle)) {
1867 		(*t_p)++;
1868 		return 0;
1869 	}
1870 	if (*t_p > s_t)
1871 		memset(&cb->args[1], 0, sizeof(cb->args)-sizeof(cb->args[0]));
1872 	arg.w.fn = qdisc_class_dump;
1873 	arg.skb = skb;
1874 	arg.cb = cb;
1875 	arg.w.stop  = 0;
1876 	arg.w.skip = cb->args[1];
1877 	arg.w.count = 0;
1878 	q->ops->cl_ops->walk(q, &arg.w);
1879 	cb->args[1] = arg.w.count;
1880 	if (arg.w.stop)
1881 		return -1;
1882 	(*t_p)++;
1883 	return 0;
1884 }
1885 
1886 static int tc_dump_tclass_root(struct Qdisc *root, struct sk_buff *skb,
1887 			       struct tcmsg *tcm, struct netlink_callback *cb,
1888 			       int *t_p, int s_t)
1889 {
1890 	struct Qdisc *q;
1891 	int b;
1892 
1893 	if (!root)
1894 		return 0;
1895 
1896 	if (tc_dump_tclass_qdisc(root, skb, tcm, cb, t_p, s_t) < 0)
1897 		return -1;
1898 
1899 	if (!qdisc_dev(root))
1900 		return 0;
1901 
1902 	if (tcm->tcm_parent) {
1903 		q = qdisc_match_from_root(root, TC_H_MAJ(tcm->tcm_parent));
1904 		if (q && tc_dump_tclass_qdisc(q, skb, tcm, cb, t_p, s_t) < 0)
1905 			return -1;
1906 		return 0;
1907 	}
1908 	hash_for_each(qdisc_dev(root)->qdisc_hash, b, q, hash) {
1909 		if (tc_dump_tclass_qdisc(q, skb, tcm, cb, t_p, s_t) < 0)
1910 			return -1;
1911 	}
1912 
1913 	return 0;
1914 }
1915 
1916 static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb)
1917 {
1918 	struct tcmsg *tcm = nlmsg_data(cb->nlh);
1919 	struct net *net = sock_net(skb->sk);
1920 	struct netdev_queue *dev_queue;
1921 	struct net_device *dev;
1922 	int t, s_t;
1923 
1924 	if (nlmsg_len(cb->nlh) < sizeof(*tcm))
1925 		return 0;
1926 	dev = dev_get_by_index(net, tcm->tcm_ifindex);
1927 	if (!dev)
1928 		return 0;
1929 
1930 	s_t = cb->args[0];
1931 	t = 0;
1932 
1933 	if (tc_dump_tclass_root(dev->qdisc, skb, tcm, cb, &t, s_t) < 0)
1934 		goto done;
1935 
1936 	dev_queue = dev_ingress_queue(dev);
1937 	if (dev_queue &&
1938 	    tc_dump_tclass_root(dev_queue->qdisc_sleeping, skb, tcm, cb,
1939 				&t, s_t) < 0)
1940 		goto done;
1941 
1942 done:
1943 	cb->args[0] = t;
1944 
1945 	dev_put(dev);
1946 	return skb->len;
1947 }
1948 
1949 #ifdef CONFIG_PROC_FS
1950 static int psched_show(struct seq_file *seq, void *v)
1951 {
1952 	seq_printf(seq, "%08x %08x %08x %08x\n",
1953 		   (u32)NSEC_PER_USEC, (u32)PSCHED_TICKS2NS(1),
1954 		   1000000,
1955 		   (u32)NSEC_PER_SEC / hrtimer_resolution);
1956 
1957 	return 0;
1958 }
1959 
1960 static int psched_open(struct inode *inode, struct file *file)
1961 {
1962 	return single_open(file, psched_show, NULL);
1963 }
1964 
1965 static const struct file_operations psched_fops = {
1966 	.owner = THIS_MODULE,
1967 	.open = psched_open,
1968 	.read  = seq_read,
1969 	.llseek = seq_lseek,
1970 	.release = single_release,
1971 };
1972 
1973 static int __net_init psched_net_init(struct net *net)
1974 {
1975 	struct proc_dir_entry *e;
1976 
1977 	e = proc_create("psched", 0, net->proc_net, &psched_fops);
1978 	if (e == NULL)
1979 		return -ENOMEM;
1980 
1981 	return 0;
1982 }
1983 
1984 static void __net_exit psched_net_exit(struct net *net)
1985 {
1986 	remove_proc_entry("psched", net->proc_net);
1987 }
1988 #else
1989 static int __net_init psched_net_init(struct net *net)
1990 {
1991 	return 0;
1992 }
1993 
1994 static void __net_exit psched_net_exit(struct net *net)
1995 {
1996 }
1997 #endif
1998 
1999 static struct pernet_operations psched_net_ops = {
2000 	.init = psched_net_init,
2001 	.exit = psched_net_exit,
2002 };
2003 
2004 static int __init pktsched_init(void)
2005 {
2006 	int err;
2007 
2008 	err = register_pernet_subsys(&psched_net_ops);
2009 	if (err) {
2010 		pr_err("pktsched_init: "
2011 		       "cannot initialize per netns operations\n");
2012 		return err;
2013 	}
2014 
2015 	register_qdisc(&pfifo_fast_ops);
2016 	register_qdisc(&pfifo_qdisc_ops);
2017 	register_qdisc(&bfifo_qdisc_ops);
2018 	register_qdisc(&pfifo_head_drop_qdisc_ops);
2019 	register_qdisc(&mq_qdisc_ops);
2020 	register_qdisc(&noqueue_qdisc_ops);
2021 
2022 	rtnl_register(PF_UNSPEC, RTM_NEWQDISC, tc_modify_qdisc, NULL, 0);
2023 	rtnl_register(PF_UNSPEC, RTM_DELQDISC, tc_get_qdisc, NULL, 0);
2024 	rtnl_register(PF_UNSPEC, RTM_GETQDISC, tc_get_qdisc, tc_dump_qdisc,
2025 		      0);
2026 	rtnl_register(PF_UNSPEC, RTM_NEWTCLASS, tc_ctl_tclass, NULL, 0);
2027 	rtnl_register(PF_UNSPEC, RTM_DELTCLASS, tc_ctl_tclass, NULL, 0);
2028 	rtnl_register(PF_UNSPEC, RTM_GETTCLASS, tc_ctl_tclass, tc_dump_tclass,
2029 		      0);
2030 
2031 	return 0;
2032 }
2033 
2034 subsys_initcall(pktsched_init);
2035