xref: /linux/net/sched/sch_api.c (revision 9ee0034b8f49aaaa7e7c2da8db1038915db99c19)
1 /*
2  * net/sched/sch_api.c	Packet scheduler API.
3  *
4  *		This program is free software; you can redistribute it and/or
5  *		modify it under the terms of the GNU General Public License
6  *		as published by the Free Software Foundation; either version
7  *		2 of the License, or (at your option) any later version.
8  *
9  * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
10  *
11  * Fixes:
12  *
13  * Rani Assaf <rani@magic.metawire.com> :980802: JIFFIES and CPU clock sources are repaired.
14  * Eduardo J. Blanco <ejbs@netlabs.com.uy> :990222: kmod support
15  * Jamal Hadi Salim <hadi@nortelnetworks.com>: 990601: ingress support
16  */
17 
18 #include <linux/module.h>
19 #include <linux/types.h>
20 #include <linux/kernel.h>
21 #include <linux/string.h>
22 #include <linux/errno.h>
23 #include <linux/skbuff.h>
24 #include <linux/init.h>
25 #include <linux/proc_fs.h>
26 #include <linux/seq_file.h>
27 #include <linux/kmod.h>
28 #include <linux/list.h>
29 #include <linux/hrtimer.h>
30 #include <linux/lockdep.h>
31 #include <linux/slab.h>
32 #include <linux/hashtable.h>
33 
34 #include <net/net_namespace.h>
35 #include <net/sock.h>
36 #include <net/netlink.h>
37 #include <net/pkt_sched.h>
38 
39 static int qdisc_notify(struct net *net, struct sk_buff *oskb,
40 			struct nlmsghdr *n, u32 clid,
41 			struct Qdisc *old, struct Qdisc *new);
42 static int tclass_notify(struct net *net, struct sk_buff *oskb,
43 			 struct nlmsghdr *n, struct Qdisc *q,
44 			 unsigned long cl, int event);
45 
46 /*
47 
48    Short review.
49    -------------
50 
51    This file consists of two interrelated parts:
52 
53    1. queueing disciplines manager frontend.
54    2. traffic classes manager frontend.
55 
56    Generally, queueing discipline ("qdisc") is a black box,
57    which is able to enqueue packets and to dequeue them (when
58    device is ready to send something) in order and at times
59    determined by algorithm hidden in it.
60 
61    qdisc's are divided to two categories:
62    - "queues", which have no internal structure visible from outside.
63    - "schedulers", which split all the packets to "traffic classes",
64      using "packet classifiers" (look at cls_api.c)
65 
66    In turn, classes may have child qdiscs (as rule, queues)
67    attached to them etc. etc. etc.
68 
69    The goal of the routines in this file is to translate
70    information supplied by user in the form of handles
71    to more intelligible for kernel form, to make some sanity
72    checks and part of work, which is common to all qdiscs
73    and to provide rtnetlink notifications.
74 
75    All real intelligent work is done inside qdisc modules.
76 
77 
78 
79    Every discipline has two major routines: enqueue and dequeue.
80 
81    ---dequeue
82 
83    dequeue usually returns a skb to send. It is allowed to return NULL,
84    but it does not mean that queue is empty, it just means that
85    discipline does not want to send anything this time.
86    Queue is really empty if q->q.qlen == 0.
87    For complicated disciplines with multiple queues q->q is not
88    real packet queue, but however q->q.qlen must be valid.
89 
90    ---enqueue
91 
92    enqueue returns 0, if packet was enqueued successfully.
93    If packet (this one or another one) was dropped, it returns
94    not zero error code.
95    NET_XMIT_DROP 	- this packet dropped
96      Expected action: do not backoff, but wait until queue will clear.
97    NET_XMIT_CN	 	- probably this packet enqueued, but another one dropped.
98      Expected action: backoff or ignore
99 
100    Auxiliary routines:
101 
102    ---peek
103 
104    like dequeue but without removing a packet from the queue
105 
106    ---reset
107 
108    returns qdisc to initial state: purge all buffers, clear all
109    timers, counters (except for statistics) etc.
110 
111    ---init
112 
113    initializes newly created qdisc.
114 
115    ---destroy
116 
117    destroys resources allocated by init and during lifetime of qdisc.
118 
119    ---change
120 
121    changes qdisc parameters.
122  */
123 
124 /* Protects list of registered TC modules. It is pure SMP lock. */
125 static DEFINE_RWLOCK(qdisc_mod_lock);
126 
127 
128 /************************************************
129  *	Queueing disciplines manipulation.	*
130  ************************************************/
131 
132 
133 /* The list of all installed queueing disciplines. */
134 
135 static struct Qdisc_ops *qdisc_base;
136 
137 /* Register/unregister queueing discipline */
138 
139 int register_qdisc(struct Qdisc_ops *qops)
140 {
141 	struct Qdisc_ops *q, **qp;
142 	int rc = -EEXIST;
143 
144 	write_lock(&qdisc_mod_lock);
145 	for (qp = &qdisc_base; (q = *qp) != NULL; qp = &q->next)
146 		if (!strcmp(qops->id, q->id))
147 			goto out;
148 
149 	if (qops->enqueue == NULL)
150 		qops->enqueue = noop_qdisc_ops.enqueue;
151 	if (qops->peek == NULL) {
152 		if (qops->dequeue == NULL)
153 			qops->peek = noop_qdisc_ops.peek;
154 		else
155 			goto out_einval;
156 	}
157 	if (qops->dequeue == NULL)
158 		qops->dequeue = noop_qdisc_ops.dequeue;
159 
160 	if (qops->cl_ops) {
161 		const struct Qdisc_class_ops *cops = qops->cl_ops;
162 
163 		if (!(cops->get && cops->put && cops->walk && cops->leaf))
164 			goto out_einval;
165 
166 		if (cops->tcf_chain && !(cops->bind_tcf && cops->unbind_tcf))
167 			goto out_einval;
168 	}
169 
170 	qops->next = NULL;
171 	*qp = qops;
172 	rc = 0;
173 out:
174 	write_unlock(&qdisc_mod_lock);
175 	return rc;
176 
177 out_einval:
178 	rc = -EINVAL;
179 	goto out;
180 }
181 EXPORT_SYMBOL(register_qdisc);
182 
183 int unregister_qdisc(struct Qdisc_ops *qops)
184 {
185 	struct Qdisc_ops *q, **qp;
186 	int err = -ENOENT;
187 
188 	write_lock(&qdisc_mod_lock);
189 	for (qp = &qdisc_base; (q = *qp) != NULL; qp = &q->next)
190 		if (q == qops)
191 			break;
192 	if (q) {
193 		*qp = q->next;
194 		q->next = NULL;
195 		err = 0;
196 	}
197 	write_unlock(&qdisc_mod_lock);
198 	return err;
199 }
200 EXPORT_SYMBOL(unregister_qdisc);
201 
202 /* Get default qdisc if not otherwise specified */
203 void qdisc_get_default(char *name, size_t len)
204 {
205 	read_lock(&qdisc_mod_lock);
206 	strlcpy(name, default_qdisc_ops->id, len);
207 	read_unlock(&qdisc_mod_lock);
208 }
209 
210 static struct Qdisc_ops *qdisc_lookup_default(const char *name)
211 {
212 	struct Qdisc_ops *q = NULL;
213 
214 	for (q = qdisc_base; q; q = q->next) {
215 		if (!strcmp(name, q->id)) {
216 			if (!try_module_get(q->owner))
217 				q = NULL;
218 			break;
219 		}
220 	}
221 
222 	return q;
223 }
224 
225 /* Set new default qdisc to use */
226 int qdisc_set_default(const char *name)
227 {
228 	const struct Qdisc_ops *ops;
229 
230 	if (!capable(CAP_NET_ADMIN))
231 		return -EPERM;
232 
233 	write_lock(&qdisc_mod_lock);
234 	ops = qdisc_lookup_default(name);
235 	if (!ops) {
236 		/* Not found, drop lock and try to load module */
237 		write_unlock(&qdisc_mod_lock);
238 		request_module("sch_%s", name);
239 		write_lock(&qdisc_mod_lock);
240 
241 		ops = qdisc_lookup_default(name);
242 	}
243 
244 	if (ops) {
245 		/* Set new default */
246 		module_put(default_qdisc_ops->owner);
247 		default_qdisc_ops = ops;
248 	}
249 	write_unlock(&qdisc_mod_lock);
250 
251 	return ops ? 0 : -ENOENT;
252 }
253 
254 /* We know handle. Find qdisc among all qdisc's attached to device
255  * (root qdisc, all its children, children of children etc.)
256  * Note: caller either uses rtnl or rcu_read_lock()
257  */
258 
259 static struct Qdisc *qdisc_match_from_root(struct Qdisc *root, u32 handle)
260 {
261 	struct Qdisc *q;
262 
263 	if (!qdisc_dev(root))
264 		return (root->handle == handle ? root : NULL);
265 
266 	if (!(root->flags & TCQ_F_BUILTIN) &&
267 	    root->handle == handle)
268 		return root;
269 
270 	hash_for_each_possible_rcu(qdisc_dev(root)->qdisc_hash, q, hash, handle) {
271 		if (q->handle == handle)
272 			return q;
273 	}
274 	return NULL;
275 }
276 
277 void qdisc_hash_add(struct Qdisc *q)
278 {
279 	if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) {
280 		struct Qdisc *root = qdisc_dev(q)->qdisc;
281 
282 		WARN_ON_ONCE(root == &noop_qdisc);
283 		ASSERT_RTNL();
284 		hash_add_rcu(qdisc_dev(q)->qdisc_hash, &q->hash, q->handle);
285 	}
286 }
287 EXPORT_SYMBOL(qdisc_hash_add);
288 
289 void qdisc_hash_del(struct Qdisc *q)
290 {
291 	if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) {
292 		ASSERT_RTNL();
293 		hash_del_rcu(&q->hash);
294 	}
295 }
296 EXPORT_SYMBOL(qdisc_hash_del);
297 
298 struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle)
299 {
300 	struct Qdisc *q;
301 
302 	q = qdisc_match_from_root(dev->qdisc, handle);
303 	if (q)
304 		goto out;
305 
306 	if (dev_ingress_queue(dev))
307 		q = qdisc_match_from_root(
308 			dev_ingress_queue(dev)->qdisc_sleeping,
309 			handle);
310 out:
311 	return q;
312 }
313 
314 static struct Qdisc *qdisc_leaf(struct Qdisc *p, u32 classid)
315 {
316 	unsigned long cl;
317 	struct Qdisc *leaf;
318 	const struct Qdisc_class_ops *cops = p->ops->cl_ops;
319 
320 	if (cops == NULL)
321 		return NULL;
322 	cl = cops->get(p, classid);
323 
324 	if (cl == 0)
325 		return NULL;
326 	leaf = cops->leaf(p, cl);
327 	cops->put(p, cl);
328 	return leaf;
329 }
330 
331 /* Find queueing discipline by name */
332 
333 static struct Qdisc_ops *qdisc_lookup_ops(struct nlattr *kind)
334 {
335 	struct Qdisc_ops *q = NULL;
336 
337 	if (kind) {
338 		read_lock(&qdisc_mod_lock);
339 		for (q = qdisc_base; q; q = q->next) {
340 			if (nla_strcmp(kind, q->id) == 0) {
341 				if (!try_module_get(q->owner))
342 					q = NULL;
343 				break;
344 			}
345 		}
346 		read_unlock(&qdisc_mod_lock);
347 	}
348 	return q;
349 }
350 
351 /* The linklayer setting were not transferred from iproute2, in older
352  * versions, and the rate tables lookup systems have been dropped in
353  * the kernel. To keep backward compatible with older iproute2 tc
354  * utils, we detect the linklayer setting by detecting if the rate
355  * table were modified.
356  *
357  * For linklayer ATM table entries, the rate table will be aligned to
358  * 48 bytes, thus some table entries will contain the same value.  The
359  * mpu (min packet unit) is also encoded into the old rate table, thus
360  * starting from the mpu, we find low and high table entries for
361  * mapping this cell.  If these entries contain the same value, when
362  * the rate tables have been modified for linklayer ATM.
363  *
364  * This is done by rounding mpu to the nearest 48 bytes cell/entry,
365  * and then roundup to the next cell, calc the table entry one below,
366  * and compare.
367  */
368 static __u8 __detect_linklayer(struct tc_ratespec *r, __u32 *rtab)
369 {
370 	int low       = roundup(r->mpu, 48);
371 	int high      = roundup(low+1, 48);
372 	int cell_low  = low >> r->cell_log;
373 	int cell_high = (high >> r->cell_log) - 1;
374 
375 	/* rtab is too inaccurate at rates > 100Mbit/s */
376 	if ((r->rate > (100000000/8)) || (rtab[0] == 0)) {
377 		pr_debug("TC linklayer: Giving up ATM detection\n");
378 		return TC_LINKLAYER_ETHERNET;
379 	}
380 
381 	if ((cell_high > cell_low) && (cell_high < 256)
382 	    && (rtab[cell_low] == rtab[cell_high])) {
383 		pr_debug("TC linklayer: Detected ATM, low(%d)=high(%d)=%u\n",
384 			 cell_low, cell_high, rtab[cell_high]);
385 		return TC_LINKLAYER_ATM;
386 	}
387 	return TC_LINKLAYER_ETHERNET;
388 }
389 
390 static struct qdisc_rate_table *qdisc_rtab_list;
391 
392 struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r, struct nlattr *tab)
393 {
394 	struct qdisc_rate_table *rtab;
395 
396 	if (tab == NULL || r->rate == 0 || r->cell_log == 0 ||
397 	    nla_len(tab) != TC_RTAB_SIZE)
398 		return NULL;
399 
400 	for (rtab = qdisc_rtab_list; rtab; rtab = rtab->next) {
401 		if (!memcmp(&rtab->rate, r, sizeof(struct tc_ratespec)) &&
402 		    !memcmp(&rtab->data, nla_data(tab), 1024)) {
403 			rtab->refcnt++;
404 			return rtab;
405 		}
406 	}
407 
408 	rtab = kmalloc(sizeof(*rtab), GFP_KERNEL);
409 	if (rtab) {
410 		rtab->rate = *r;
411 		rtab->refcnt = 1;
412 		memcpy(rtab->data, nla_data(tab), 1024);
413 		if (r->linklayer == TC_LINKLAYER_UNAWARE)
414 			r->linklayer = __detect_linklayer(r, rtab->data);
415 		rtab->next = qdisc_rtab_list;
416 		qdisc_rtab_list = rtab;
417 	}
418 	return rtab;
419 }
420 EXPORT_SYMBOL(qdisc_get_rtab);
421 
422 void qdisc_put_rtab(struct qdisc_rate_table *tab)
423 {
424 	struct qdisc_rate_table *rtab, **rtabp;
425 
426 	if (!tab || --tab->refcnt)
427 		return;
428 
429 	for (rtabp = &qdisc_rtab_list;
430 	     (rtab = *rtabp) != NULL;
431 	     rtabp = &rtab->next) {
432 		if (rtab == tab) {
433 			*rtabp = rtab->next;
434 			kfree(rtab);
435 			return;
436 		}
437 	}
438 }
439 EXPORT_SYMBOL(qdisc_put_rtab);
440 
441 static LIST_HEAD(qdisc_stab_list);
442 static DEFINE_SPINLOCK(qdisc_stab_lock);
443 
444 static const struct nla_policy stab_policy[TCA_STAB_MAX + 1] = {
445 	[TCA_STAB_BASE]	= { .len = sizeof(struct tc_sizespec) },
446 	[TCA_STAB_DATA] = { .type = NLA_BINARY },
447 };
448 
449 static struct qdisc_size_table *qdisc_get_stab(struct nlattr *opt)
450 {
451 	struct nlattr *tb[TCA_STAB_MAX + 1];
452 	struct qdisc_size_table *stab;
453 	struct tc_sizespec *s;
454 	unsigned int tsize = 0;
455 	u16 *tab = NULL;
456 	int err;
457 
458 	err = nla_parse_nested(tb, TCA_STAB_MAX, opt, stab_policy);
459 	if (err < 0)
460 		return ERR_PTR(err);
461 	if (!tb[TCA_STAB_BASE])
462 		return ERR_PTR(-EINVAL);
463 
464 	s = nla_data(tb[TCA_STAB_BASE]);
465 
466 	if (s->tsize > 0) {
467 		if (!tb[TCA_STAB_DATA])
468 			return ERR_PTR(-EINVAL);
469 		tab = nla_data(tb[TCA_STAB_DATA]);
470 		tsize = nla_len(tb[TCA_STAB_DATA]) / sizeof(u16);
471 	}
472 
473 	if (tsize != s->tsize || (!tab && tsize > 0))
474 		return ERR_PTR(-EINVAL);
475 
476 	spin_lock(&qdisc_stab_lock);
477 
478 	list_for_each_entry(stab, &qdisc_stab_list, list) {
479 		if (memcmp(&stab->szopts, s, sizeof(*s)))
480 			continue;
481 		if (tsize > 0 && memcmp(stab->data, tab, tsize * sizeof(u16)))
482 			continue;
483 		stab->refcnt++;
484 		spin_unlock(&qdisc_stab_lock);
485 		return stab;
486 	}
487 
488 	spin_unlock(&qdisc_stab_lock);
489 
490 	stab = kmalloc(sizeof(*stab) + tsize * sizeof(u16), GFP_KERNEL);
491 	if (!stab)
492 		return ERR_PTR(-ENOMEM);
493 
494 	stab->refcnt = 1;
495 	stab->szopts = *s;
496 	if (tsize > 0)
497 		memcpy(stab->data, tab, tsize * sizeof(u16));
498 
499 	spin_lock(&qdisc_stab_lock);
500 	list_add_tail(&stab->list, &qdisc_stab_list);
501 	spin_unlock(&qdisc_stab_lock);
502 
503 	return stab;
504 }
505 
506 static void stab_kfree_rcu(struct rcu_head *head)
507 {
508 	kfree(container_of(head, struct qdisc_size_table, rcu));
509 }
510 
511 void qdisc_put_stab(struct qdisc_size_table *tab)
512 {
513 	if (!tab)
514 		return;
515 
516 	spin_lock(&qdisc_stab_lock);
517 
518 	if (--tab->refcnt == 0) {
519 		list_del(&tab->list);
520 		call_rcu_bh(&tab->rcu, stab_kfree_rcu);
521 	}
522 
523 	spin_unlock(&qdisc_stab_lock);
524 }
525 EXPORT_SYMBOL(qdisc_put_stab);
526 
527 static int qdisc_dump_stab(struct sk_buff *skb, struct qdisc_size_table *stab)
528 {
529 	struct nlattr *nest;
530 
531 	nest = nla_nest_start(skb, TCA_STAB);
532 	if (nest == NULL)
533 		goto nla_put_failure;
534 	if (nla_put(skb, TCA_STAB_BASE, sizeof(stab->szopts), &stab->szopts))
535 		goto nla_put_failure;
536 	nla_nest_end(skb, nest);
537 
538 	return skb->len;
539 
540 nla_put_failure:
541 	return -1;
542 }
543 
544 void __qdisc_calculate_pkt_len(struct sk_buff *skb, const struct qdisc_size_table *stab)
545 {
546 	int pkt_len, slot;
547 
548 	pkt_len = skb->len + stab->szopts.overhead;
549 	if (unlikely(!stab->szopts.tsize))
550 		goto out;
551 
552 	slot = pkt_len + stab->szopts.cell_align;
553 	if (unlikely(slot < 0))
554 		slot = 0;
555 
556 	slot >>= stab->szopts.cell_log;
557 	if (likely(slot < stab->szopts.tsize))
558 		pkt_len = stab->data[slot];
559 	else
560 		pkt_len = stab->data[stab->szopts.tsize - 1] *
561 				(slot / stab->szopts.tsize) +
562 				stab->data[slot % stab->szopts.tsize];
563 
564 	pkt_len <<= stab->szopts.size_log;
565 out:
566 	if (unlikely(pkt_len < 1))
567 		pkt_len = 1;
568 	qdisc_skb_cb(skb)->pkt_len = pkt_len;
569 }
570 EXPORT_SYMBOL(__qdisc_calculate_pkt_len);
571 
572 void qdisc_warn_nonwc(const char *txt, struct Qdisc *qdisc)
573 {
574 	if (!(qdisc->flags & TCQ_F_WARN_NONWC)) {
575 		pr_warn("%s: %s qdisc %X: is non-work-conserving?\n",
576 			txt, qdisc->ops->id, qdisc->handle >> 16);
577 		qdisc->flags |= TCQ_F_WARN_NONWC;
578 	}
579 }
580 EXPORT_SYMBOL(qdisc_warn_nonwc);
581 
582 static enum hrtimer_restart qdisc_watchdog(struct hrtimer *timer)
583 {
584 	struct qdisc_watchdog *wd = container_of(timer, struct qdisc_watchdog,
585 						 timer);
586 
587 	rcu_read_lock();
588 	__netif_schedule(qdisc_root(wd->qdisc));
589 	rcu_read_unlock();
590 
591 	return HRTIMER_NORESTART;
592 }
593 
594 void qdisc_watchdog_init(struct qdisc_watchdog *wd, struct Qdisc *qdisc)
595 {
596 	hrtimer_init(&wd->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED);
597 	wd->timer.function = qdisc_watchdog;
598 	wd->qdisc = qdisc;
599 }
600 EXPORT_SYMBOL(qdisc_watchdog_init);
601 
602 void qdisc_watchdog_schedule_ns(struct qdisc_watchdog *wd, u64 expires)
603 {
604 	if (test_bit(__QDISC_STATE_DEACTIVATED,
605 		     &qdisc_root_sleeping(wd->qdisc)->state))
606 		return;
607 
608 	if (wd->last_expires == expires)
609 		return;
610 
611 	wd->last_expires = expires;
612 	hrtimer_start(&wd->timer,
613 		      ns_to_ktime(expires),
614 		      HRTIMER_MODE_ABS_PINNED);
615 }
616 EXPORT_SYMBOL(qdisc_watchdog_schedule_ns);
617 
618 void qdisc_watchdog_cancel(struct qdisc_watchdog *wd)
619 {
620 	hrtimer_cancel(&wd->timer);
621 }
622 EXPORT_SYMBOL(qdisc_watchdog_cancel);
623 
624 static struct hlist_head *qdisc_class_hash_alloc(unsigned int n)
625 {
626 	unsigned int size = n * sizeof(struct hlist_head), i;
627 	struct hlist_head *h;
628 
629 	if (size <= PAGE_SIZE)
630 		h = kmalloc(size, GFP_KERNEL);
631 	else
632 		h = (struct hlist_head *)
633 			__get_free_pages(GFP_KERNEL, get_order(size));
634 
635 	if (h != NULL) {
636 		for (i = 0; i < n; i++)
637 			INIT_HLIST_HEAD(&h[i]);
638 	}
639 	return h;
640 }
641 
642 static void qdisc_class_hash_free(struct hlist_head *h, unsigned int n)
643 {
644 	unsigned int size = n * sizeof(struct hlist_head);
645 
646 	if (size <= PAGE_SIZE)
647 		kfree(h);
648 	else
649 		free_pages((unsigned long)h, get_order(size));
650 }
651 
652 void qdisc_class_hash_grow(struct Qdisc *sch, struct Qdisc_class_hash *clhash)
653 {
654 	struct Qdisc_class_common *cl;
655 	struct hlist_node *next;
656 	struct hlist_head *nhash, *ohash;
657 	unsigned int nsize, nmask, osize;
658 	unsigned int i, h;
659 
660 	/* Rehash when load factor exceeds 0.75 */
661 	if (clhash->hashelems * 4 <= clhash->hashsize * 3)
662 		return;
663 	nsize = clhash->hashsize * 2;
664 	nmask = nsize - 1;
665 	nhash = qdisc_class_hash_alloc(nsize);
666 	if (nhash == NULL)
667 		return;
668 
669 	ohash = clhash->hash;
670 	osize = clhash->hashsize;
671 
672 	sch_tree_lock(sch);
673 	for (i = 0; i < osize; i++) {
674 		hlist_for_each_entry_safe(cl, next, &ohash[i], hnode) {
675 			h = qdisc_class_hash(cl->classid, nmask);
676 			hlist_add_head(&cl->hnode, &nhash[h]);
677 		}
678 	}
679 	clhash->hash     = nhash;
680 	clhash->hashsize = nsize;
681 	clhash->hashmask = nmask;
682 	sch_tree_unlock(sch);
683 
684 	qdisc_class_hash_free(ohash, osize);
685 }
686 EXPORT_SYMBOL(qdisc_class_hash_grow);
687 
688 int qdisc_class_hash_init(struct Qdisc_class_hash *clhash)
689 {
690 	unsigned int size = 4;
691 
692 	clhash->hash = qdisc_class_hash_alloc(size);
693 	if (clhash->hash == NULL)
694 		return -ENOMEM;
695 	clhash->hashsize  = size;
696 	clhash->hashmask  = size - 1;
697 	clhash->hashelems = 0;
698 	return 0;
699 }
700 EXPORT_SYMBOL(qdisc_class_hash_init);
701 
702 void qdisc_class_hash_destroy(struct Qdisc_class_hash *clhash)
703 {
704 	qdisc_class_hash_free(clhash->hash, clhash->hashsize);
705 }
706 EXPORT_SYMBOL(qdisc_class_hash_destroy);
707 
708 void qdisc_class_hash_insert(struct Qdisc_class_hash *clhash,
709 			     struct Qdisc_class_common *cl)
710 {
711 	unsigned int h;
712 
713 	INIT_HLIST_NODE(&cl->hnode);
714 	h = qdisc_class_hash(cl->classid, clhash->hashmask);
715 	hlist_add_head(&cl->hnode, &clhash->hash[h]);
716 	clhash->hashelems++;
717 }
718 EXPORT_SYMBOL(qdisc_class_hash_insert);
719 
720 void qdisc_class_hash_remove(struct Qdisc_class_hash *clhash,
721 			     struct Qdisc_class_common *cl)
722 {
723 	hlist_del(&cl->hnode);
724 	clhash->hashelems--;
725 }
726 EXPORT_SYMBOL(qdisc_class_hash_remove);
727 
728 /* Allocate an unique handle from space managed by kernel
729  * Possible range is [8000-FFFF]:0000 (0x8000 values)
730  */
731 static u32 qdisc_alloc_handle(struct net_device *dev)
732 {
733 	int i = 0x8000;
734 	static u32 autohandle = TC_H_MAKE(0x80000000U, 0);
735 
736 	do {
737 		autohandle += TC_H_MAKE(0x10000U, 0);
738 		if (autohandle == TC_H_MAKE(TC_H_ROOT, 0))
739 			autohandle = TC_H_MAKE(0x80000000U, 0);
740 		if (!qdisc_lookup(dev, autohandle))
741 			return autohandle;
742 		cond_resched();
743 	} while	(--i > 0);
744 
745 	return 0;
746 }
747 
748 void qdisc_tree_reduce_backlog(struct Qdisc *sch, unsigned int n,
749 			       unsigned int len)
750 {
751 	const struct Qdisc_class_ops *cops;
752 	unsigned long cl;
753 	u32 parentid;
754 	int drops;
755 
756 	if (n == 0 && len == 0)
757 		return;
758 	drops = max_t(int, n, 0);
759 	rcu_read_lock();
760 	while ((parentid = sch->parent)) {
761 		if (TC_H_MAJ(parentid) == TC_H_MAJ(TC_H_INGRESS))
762 			break;
763 
764 		if (sch->flags & TCQ_F_NOPARENT)
765 			break;
766 		/* TODO: perform the search on a per txq basis */
767 		sch = qdisc_lookup(qdisc_dev(sch), TC_H_MAJ(parentid));
768 		if (sch == NULL) {
769 			WARN_ON_ONCE(parentid != TC_H_ROOT);
770 			break;
771 		}
772 		cops = sch->ops->cl_ops;
773 		if (cops->qlen_notify) {
774 			cl = cops->get(sch, parentid);
775 			cops->qlen_notify(sch, cl);
776 			cops->put(sch, cl);
777 		}
778 		sch->q.qlen -= n;
779 		sch->qstats.backlog -= len;
780 		__qdisc_qstats_drop(sch, drops);
781 	}
782 	rcu_read_unlock();
783 }
784 EXPORT_SYMBOL(qdisc_tree_reduce_backlog);
785 
786 static void notify_and_destroy(struct net *net, struct sk_buff *skb,
787 			       struct nlmsghdr *n, u32 clid,
788 			       struct Qdisc *old, struct Qdisc *new)
789 {
790 	if (new || old)
791 		qdisc_notify(net, skb, n, clid, old, new);
792 
793 	if (old)
794 		qdisc_destroy(old);
795 }
796 
797 /* Graft qdisc "new" to class "classid" of qdisc "parent" or
798  * to device "dev".
799  *
800  * When appropriate send a netlink notification using 'skb'
801  * and "n".
802  *
803  * On success, destroy old qdisc.
804  */
805 
806 static int qdisc_graft(struct net_device *dev, struct Qdisc *parent,
807 		       struct sk_buff *skb, struct nlmsghdr *n, u32 classid,
808 		       struct Qdisc *new, struct Qdisc *old)
809 {
810 	struct Qdisc *q = old;
811 	struct net *net = dev_net(dev);
812 	int err = 0;
813 
814 	if (parent == NULL) {
815 		unsigned int i, num_q, ingress;
816 
817 		ingress = 0;
818 		num_q = dev->num_tx_queues;
819 		if ((q && q->flags & TCQ_F_INGRESS) ||
820 		    (new && new->flags & TCQ_F_INGRESS)) {
821 			num_q = 1;
822 			ingress = 1;
823 			if (!dev_ingress_queue(dev))
824 				return -ENOENT;
825 		}
826 
827 		if (dev->flags & IFF_UP)
828 			dev_deactivate(dev);
829 
830 		if (new && new->ops->attach)
831 			goto skip;
832 
833 		for (i = 0; i < num_q; i++) {
834 			struct netdev_queue *dev_queue = dev_ingress_queue(dev);
835 
836 			if (!ingress)
837 				dev_queue = netdev_get_tx_queue(dev, i);
838 
839 			old = dev_graft_qdisc(dev_queue, new);
840 			if (new && i > 0)
841 				atomic_inc(&new->refcnt);
842 
843 			if (!ingress)
844 				qdisc_destroy(old);
845 		}
846 
847 skip:
848 		if (!ingress) {
849 			notify_and_destroy(net, skb, n, classid,
850 					   dev->qdisc, new);
851 			if (new && !new->ops->attach)
852 				atomic_inc(&new->refcnt);
853 			dev->qdisc = new ? : &noop_qdisc;
854 
855 			if (new && new->ops->attach)
856 				new->ops->attach(new);
857 		} else {
858 			notify_and_destroy(net, skb, n, classid, old, new);
859 		}
860 
861 		if (dev->flags & IFF_UP)
862 			dev_activate(dev);
863 	} else {
864 		const struct Qdisc_class_ops *cops = parent->ops->cl_ops;
865 
866 		err = -EOPNOTSUPP;
867 		if (cops && cops->graft) {
868 			unsigned long cl = cops->get(parent, classid);
869 			if (cl) {
870 				err = cops->graft(parent, cl, new, &old);
871 				cops->put(parent, cl);
872 			} else
873 				err = -ENOENT;
874 		}
875 		if (!err)
876 			notify_and_destroy(net, skb, n, classid, old, new);
877 	}
878 	return err;
879 }
880 
881 /* lockdep annotation is needed for ingress; egress gets it only for name */
882 static struct lock_class_key qdisc_tx_lock;
883 static struct lock_class_key qdisc_rx_lock;
884 
885 /*
886    Allocate and initialize new qdisc.
887 
888    Parameters are passed via opt.
889  */
890 
891 static struct Qdisc *
892 qdisc_create(struct net_device *dev, struct netdev_queue *dev_queue,
893 	     struct Qdisc *p, u32 parent, u32 handle,
894 	     struct nlattr **tca, int *errp)
895 {
896 	int err;
897 	struct nlattr *kind = tca[TCA_KIND];
898 	struct Qdisc *sch;
899 	struct Qdisc_ops *ops;
900 	struct qdisc_size_table *stab;
901 
902 	ops = qdisc_lookup_ops(kind);
903 #ifdef CONFIG_MODULES
904 	if (ops == NULL && kind != NULL) {
905 		char name[IFNAMSIZ];
906 		if (nla_strlcpy(name, kind, IFNAMSIZ) < IFNAMSIZ) {
907 			/* We dropped the RTNL semaphore in order to
908 			 * perform the module load.  So, even if we
909 			 * succeeded in loading the module we have to
910 			 * tell the caller to replay the request.  We
911 			 * indicate this using -EAGAIN.
912 			 * We replay the request because the device may
913 			 * go away in the mean time.
914 			 */
915 			rtnl_unlock();
916 			request_module("sch_%s", name);
917 			rtnl_lock();
918 			ops = qdisc_lookup_ops(kind);
919 			if (ops != NULL) {
920 				/* We will try again qdisc_lookup_ops,
921 				 * so don't keep a reference.
922 				 */
923 				module_put(ops->owner);
924 				err = -EAGAIN;
925 				goto err_out;
926 			}
927 		}
928 	}
929 #endif
930 
931 	err = -ENOENT;
932 	if (ops == NULL)
933 		goto err_out;
934 
935 	sch = qdisc_alloc(dev_queue, ops);
936 	if (IS_ERR(sch)) {
937 		err = PTR_ERR(sch);
938 		goto err_out2;
939 	}
940 
941 	sch->parent = parent;
942 
943 	if (handle == TC_H_INGRESS) {
944 		sch->flags |= TCQ_F_INGRESS;
945 		handle = TC_H_MAKE(TC_H_INGRESS, 0);
946 		lockdep_set_class(qdisc_lock(sch), &qdisc_rx_lock);
947 	} else {
948 		if (handle == 0) {
949 			handle = qdisc_alloc_handle(dev);
950 			err = -ENOMEM;
951 			if (handle == 0)
952 				goto err_out3;
953 		}
954 		lockdep_set_class(qdisc_lock(sch), &qdisc_tx_lock);
955 		if (!netif_is_multiqueue(dev))
956 			sch->flags |= TCQ_F_ONETXQUEUE;
957 	}
958 
959 	sch->handle = handle;
960 
961 	if (!ops->init || (err = ops->init(sch, tca[TCA_OPTIONS])) == 0) {
962 		if (qdisc_is_percpu_stats(sch)) {
963 			sch->cpu_bstats =
964 				netdev_alloc_pcpu_stats(struct gnet_stats_basic_cpu);
965 			if (!sch->cpu_bstats)
966 				goto err_out4;
967 
968 			sch->cpu_qstats = alloc_percpu(struct gnet_stats_queue);
969 			if (!sch->cpu_qstats)
970 				goto err_out4;
971 		}
972 
973 		if (tca[TCA_STAB]) {
974 			stab = qdisc_get_stab(tca[TCA_STAB]);
975 			if (IS_ERR(stab)) {
976 				err = PTR_ERR(stab);
977 				goto err_out4;
978 			}
979 			rcu_assign_pointer(sch->stab, stab);
980 		}
981 		if (tca[TCA_RATE]) {
982 			seqcount_t *running;
983 
984 			err = -EOPNOTSUPP;
985 			if (sch->flags & TCQ_F_MQROOT)
986 				goto err_out4;
987 
988 			if ((sch->parent != TC_H_ROOT) &&
989 			    !(sch->flags & TCQ_F_INGRESS) &&
990 			    (!p || !(p->flags & TCQ_F_MQROOT)))
991 				running = qdisc_root_sleeping_running(sch);
992 			else
993 				running = &sch->running;
994 
995 			err = gen_new_estimator(&sch->bstats,
996 						sch->cpu_bstats,
997 						&sch->rate_est,
998 						NULL,
999 						running,
1000 						tca[TCA_RATE]);
1001 			if (err)
1002 				goto err_out4;
1003 		}
1004 
1005 		qdisc_hash_add(sch);
1006 
1007 		return sch;
1008 	}
1009 err_out3:
1010 	dev_put(dev);
1011 	kfree((char *) sch - sch->padded);
1012 err_out2:
1013 	module_put(ops->owner);
1014 err_out:
1015 	*errp = err;
1016 	return NULL;
1017 
1018 err_out4:
1019 	free_percpu(sch->cpu_bstats);
1020 	free_percpu(sch->cpu_qstats);
1021 	/*
1022 	 * Any broken qdiscs that would require a ops->reset() here?
1023 	 * The qdisc was never in action so it shouldn't be necessary.
1024 	 */
1025 	qdisc_put_stab(rtnl_dereference(sch->stab));
1026 	if (ops->destroy)
1027 		ops->destroy(sch);
1028 	goto err_out3;
1029 }
1030 
1031 static int qdisc_change(struct Qdisc *sch, struct nlattr **tca)
1032 {
1033 	struct qdisc_size_table *ostab, *stab = NULL;
1034 	int err = 0;
1035 
1036 	if (tca[TCA_OPTIONS]) {
1037 		if (sch->ops->change == NULL)
1038 			return -EINVAL;
1039 		err = sch->ops->change(sch, tca[TCA_OPTIONS]);
1040 		if (err)
1041 			return err;
1042 	}
1043 
1044 	if (tca[TCA_STAB]) {
1045 		stab = qdisc_get_stab(tca[TCA_STAB]);
1046 		if (IS_ERR(stab))
1047 			return PTR_ERR(stab);
1048 	}
1049 
1050 	ostab = rtnl_dereference(sch->stab);
1051 	rcu_assign_pointer(sch->stab, stab);
1052 	qdisc_put_stab(ostab);
1053 
1054 	if (tca[TCA_RATE]) {
1055 		/* NB: ignores errors from replace_estimator
1056 		   because change can't be undone. */
1057 		if (sch->flags & TCQ_F_MQROOT)
1058 			goto out;
1059 		gen_replace_estimator(&sch->bstats,
1060 				      sch->cpu_bstats,
1061 				      &sch->rate_est,
1062 				      NULL,
1063 				      qdisc_root_sleeping_running(sch),
1064 				      tca[TCA_RATE]);
1065 	}
1066 out:
1067 	return 0;
1068 }
1069 
1070 struct check_loop_arg {
1071 	struct qdisc_walker	w;
1072 	struct Qdisc		*p;
1073 	int			depth;
1074 };
1075 
1076 static int check_loop_fn(struct Qdisc *q, unsigned long cl, struct qdisc_walker *w);
1077 
1078 static int check_loop(struct Qdisc *q, struct Qdisc *p, int depth)
1079 {
1080 	struct check_loop_arg	arg;
1081 
1082 	if (q->ops->cl_ops == NULL)
1083 		return 0;
1084 
1085 	arg.w.stop = arg.w.skip = arg.w.count = 0;
1086 	arg.w.fn = check_loop_fn;
1087 	arg.depth = depth;
1088 	arg.p = p;
1089 	q->ops->cl_ops->walk(q, &arg.w);
1090 	return arg.w.stop ? -ELOOP : 0;
1091 }
1092 
1093 static int
1094 check_loop_fn(struct Qdisc *q, unsigned long cl, struct qdisc_walker *w)
1095 {
1096 	struct Qdisc *leaf;
1097 	const struct Qdisc_class_ops *cops = q->ops->cl_ops;
1098 	struct check_loop_arg *arg = (struct check_loop_arg *)w;
1099 
1100 	leaf = cops->leaf(q, cl);
1101 	if (leaf) {
1102 		if (leaf == arg->p || arg->depth > 7)
1103 			return -ELOOP;
1104 		return check_loop(leaf, arg->p, arg->depth + 1);
1105 	}
1106 	return 0;
1107 }
1108 
1109 /*
1110  * Delete/get qdisc.
1111  */
1112 
1113 static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n)
1114 {
1115 	struct net *net = sock_net(skb->sk);
1116 	struct tcmsg *tcm = nlmsg_data(n);
1117 	struct nlattr *tca[TCA_MAX + 1];
1118 	struct net_device *dev;
1119 	u32 clid;
1120 	struct Qdisc *q = NULL;
1121 	struct Qdisc *p = NULL;
1122 	int err;
1123 
1124 	if ((n->nlmsg_type != RTM_GETQDISC) &&
1125 	    !netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
1126 		return -EPERM;
1127 
1128 	err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL);
1129 	if (err < 0)
1130 		return err;
1131 
1132 	dev = __dev_get_by_index(net, tcm->tcm_ifindex);
1133 	if (!dev)
1134 		return -ENODEV;
1135 
1136 	clid = tcm->tcm_parent;
1137 	if (clid) {
1138 		if (clid != TC_H_ROOT) {
1139 			if (TC_H_MAJ(clid) != TC_H_MAJ(TC_H_INGRESS)) {
1140 				p = qdisc_lookup(dev, TC_H_MAJ(clid));
1141 				if (!p)
1142 					return -ENOENT;
1143 				q = qdisc_leaf(p, clid);
1144 			} else if (dev_ingress_queue(dev)) {
1145 				q = dev_ingress_queue(dev)->qdisc_sleeping;
1146 			}
1147 		} else {
1148 			q = dev->qdisc;
1149 		}
1150 		if (!q)
1151 			return -ENOENT;
1152 
1153 		if (tcm->tcm_handle && q->handle != tcm->tcm_handle)
1154 			return -EINVAL;
1155 	} else {
1156 		q = qdisc_lookup(dev, tcm->tcm_handle);
1157 		if (!q)
1158 			return -ENOENT;
1159 	}
1160 
1161 	if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id))
1162 		return -EINVAL;
1163 
1164 	if (n->nlmsg_type == RTM_DELQDISC) {
1165 		if (!clid)
1166 			return -EINVAL;
1167 		if (q->handle == 0)
1168 			return -ENOENT;
1169 		err = qdisc_graft(dev, p, skb, n, clid, NULL, q);
1170 		if (err != 0)
1171 			return err;
1172 	} else {
1173 		qdisc_notify(net, skb, n, clid, NULL, q);
1174 	}
1175 	return 0;
1176 }
1177 
1178 /*
1179  * Create/change qdisc.
1180  */
1181 
1182 static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n)
1183 {
1184 	struct net *net = sock_net(skb->sk);
1185 	struct tcmsg *tcm;
1186 	struct nlattr *tca[TCA_MAX + 1];
1187 	struct net_device *dev;
1188 	u32 clid;
1189 	struct Qdisc *q, *p;
1190 	int err;
1191 
1192 	if (!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
1193 		return -EPERM;
1194 
1195 replay:
1196 	/* Reinit, just in case something touches this. */
1197 	err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL);
1198 	if (err < 0)
1199 		return err;
1200 
1201 	tcm = nlmsg_data(n);
1202 	clid = tcm->tcm_parent;
1203 	q = p = NULL;
1204 
1205 	dev = __dev_get_by_index(net, tcm->tcm_ifindex);
1206 	if (!dev)
1207 		return -ENODEV;
1208 
1209 
1210 	if (clid) {
1211 		if (clid != TC_H_ROOT) {
1212 			if (clid != TC_H_INGRESS) {
1213 				p = qdisc_lookup(dev, TC_H_MAJ(clid));
1214 				if (!p)
1215 					return -ENOENT;
1216 				q = qdisc_leaf(p, clid);
1217 			} else if (dev_ingress_queue_create(dev)) {
1218 				q = dev_ingress_queue(dev)->qdisc_sleeping;
1219 			}
1220 		} else {
1221 			q = dev->qdisc;
1222 		}
1223 
1224 		/* It may be default qdisc, ignore it */
1225 		if (q && q->handle == 0)
1226 			q = NULL;
1227 
1228 		if (!q || !tcm->tcm_handle || q->handle != tcm->tcm_handle) {
1229 			if (tcm->tcm_handle) {
1230 				if (q && !(n->nlmsg_flags & NLM_F_REPLACE))
1231 					return -EEXIST;
1232 				if (TC_H_MIN(tcm->tcm_handle))
1233 					return -EINVAL;
1234 				q = qdisc_lookup(dev, tcm->tcm_handle);
1235 				if (!q)
1236 					goto create_n_graft;
1237 				if (n->nlmsg_flags & NLM_F_EXCL)
1238 					return -EEXIST;
1239 				if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id))
1240 					return -EINVAL;
1241 				if (q == p ||
1242 				    (p && check_loop(q, p, 0)))
1243 					return -ELOOP;
1244 				atomic_inc(&q->refcnt);
1245 				goto graft;
1246 			} else {
1247 				if (!q)
1248 					goto create_n_graft;
1249 
1250 				/* This magic test requires explanation.
1251 				 *
1252 				 *   We know, that some child q is already
1253 				 *   attached to this parent and have choice:
1254 				 *   either to change it or to create/graft new one.
1255 				 *
1256 				 *   1. We are allowed to create/graft only
1257 				 *   if CREATE and REPLACE flags are set.
1258 				 *
1259 				 *   2. If EXCL is set, requestor wanted to say,
1260 				 *   that qdisc tcm_handle is not expected
1261 				 *   to exist, so that we choose create/graft too.
1262 				 *
1263 				 *   3. The last case is when no flags are set.
1264 				 *   Alas, it is sort of hole in API, we
1265 				 *   cannot decide what to do unambiguously.
1266 				 *   For now we select create/graft, if
1267 				 *   user gave KIND, which does not match existing.
1268 				 */
1269 				if ((n->nlmsg_flags & NLM_F_CREATE) &&
1270 				    (n->nlmsg_flags & NLM_F_REPLACE) &&
1271 				    ((n->nlmsg_flags & NLM_F_EXCL) ||
1272 				     (tca[TCA_KIND] &&
1273 				      nla_strcmp(tca[TCA_KIND], q->ops->id))))
1274 					goto create_n_graft;
1275 			}
1276 		}
1277 	} else {
1278 		if (!tcm->tcm_handle)
1279 			return -EINVAL;
1280 		q = qdisc_lookup(dev, tcm->tcm_handle);
1281 	}
1282 
1283 	/* Change qdisc parameters */
1284 	if (q == NULL)
1285 		return -ENOENT;
1286 	if (n->nlmsg_flags & NLM_F_EXCL)
1287 		return -EEXIST;
1288 	if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id))
1289 		return -EINVAL;
1290 	err = qdisc_change(q, tca);
1291 	if (err == 0)
1292 		qdisc_notify(net, skb, n, clid, NULL, q);
1293 	return err;
1294 
1295 create_n_graft:
1296 	if (!(n->nlmsg_flags & NLM_F_CREATE))
1297 		return -ENOENT;
1298 	if (clid == TC_H_INGRESS) {
1299 		if (dev_ingress_queue(dev))
1300 			q = qdisc_create(dev, dev_ingress_queue(dev), p,
1301 					 tcm->tcm_parent, tcm->tcm_parent,
1302 					 tca, &err);
1303 		else
1304 			err = -ENOENT;
1305 	} else {
1306 		struct netdev_queue *dev_queue;
1307 
1308 		if (p && p->ops->cl_ops && p->ops->cl_ops->select_queue)
1309 			dev_queue = p->ops->cl_ops->select_queue(p, tcm);
1310 		else if (p)
1311 			dev_queue = p->dev_queue;
1312 		else
1313 			dev_queue = netdev_get_tx_queue(dev, 0);
1314 
1315 		q = qdisc_create(dev, dev_queue, p,
1316 				 tcm->tcm_parent, tcm->tcm_handle,
1317 				 tca, &err);
1318 	}
1319 	if (q == NULL) {
1320 		if (err == -EAGAIN)
1321 			goto replay;
1322 		return err;
1323 	}
1324 
1325 graft:
1326 	err = qdisc_graft(dev, p, skb, n, clid, q, NULL);
1327 	if (err) {
1328 		if (q)
1329 			qdisc_destroy(q);
1330 		return err;
1331 	}
1332 
1333 	return 0;
1334 }
1335 
1336 static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid,
1337 			 u32 portid, u32 seq, u16 flags, int event)
1338 {
1339 	struct gnet_stats_basic_cpu __percpu *cpu_bstats = NULL;
1340 	struct gnet_stats_queue __percpu *cpu_qstats = NULL;
1341 	struct tcmsg *tcm;
1342 	struct nlmsghdr  *nlh;
1343 	unsigned char *b = skb_tail_pointer(skb);
1344 	struct gnet_dump d;
1345 	struct qdisc_size_table *stab;
1346 	__u32 qlen;
1347 
1348 	cond_resched();
1349 	nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags);
1350 	if (!nlh)
1351 		goto out_nlmsg_trim;
1352 	tcm = nlmsg_data(nlh);
1353 	tcm->tcm_family = AF_UNSPEC;
1354 	tcm->tcm__pad1 = 0;
1355 	tcm->tcm__pad2 = 0;
1356 	tcm->tcm_ifindex = qdisc_dev(q)->ifindex;
1357 	tcm->tcm_parent = clid;
1358 	tcm->tcm_handle = q->handle;
1359 	tcm->tcm_info = atomic_read(&q->refcnt);
1360 	if (nla_put_string(skb, TCA_KIND, q->ops->id))
1361 		goto nla_put_failure;
1362 	if (q->ops->dump && q->ops->dump(q, skb) < 0)
1363 		goto nla_put_failure;
1364 	qlen = q->q.qlen;
1365 
1366 	stab = rtnl_dereference(q->stab);
1367 	if (stab && qdisc_dump_stab(skb, stab) < 0)
1368 		goto nla_put_failure;
1369 
1370 	if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, TCA_XSTATS,
1371 					 NULL, &d, TCA_PAD) < 0)
1372 		goto nla_put_failure;
1373 
1374 	if (q->ops->dump_stats && q->ops->dump_stats(q, &d) < 0)
1375 		goto nla_put_failure;
1376 
1377 	if (qdisc_is_percpu_stats(q)) {
1378 		cpu_bstats = q->cpu_bstats;
1379 		cpu_qstats = q->cpu_qstats;
1380 	}
1381 
1382 	if (gnet_stats_copy_basic(qdisc_root_sleeping_running(q),
1383 				  &d, cpu_bstats, &q->bstats) < 0 ||
1384 	    gnet_stats_copy_rate_est(&d, &q->bstats, &q->rate_est) < 0 ||
1385 	    gnet_stats_copy_queue(&d, cpu_qstats, &q->qstats, qlen) < 0)
1386 		goto nla_put_failure;
1387 
1388 	if (gnet_stats_finish_copy(&d) < 0)
1389 		goto nla_put_failure;
1390 
1391 	nlh->nlmsg_len = skb_tail_pointer(skb) - b;
1392 	return skb->len;
1393 
1394 out_nlmsg_trim:
1395 nla_put_failure:
1396 	nlmsg_trim(skb, b);
1397 	return -1;
1398 }
1399 
1400 static bool tc_qdisc_dump_ignore(struct Qdisc *q)
1401 {
1402 	return (q->flags & TCQ_F_BUILTIN) ? true : false;
1403 }
1404 
1405 static int qdisc_notify(struct net *net, struct sk_buff *oskb,
1406 			struct nlmsghdr *n, u32 clid,
1407 			struct Qdisc *old, struct Qdisc *new)
1408 {
1409 	struct sk_buff *skb;
1410 	u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
1411 
1412 	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1413 	if (!skb)
1414 		return -ENOBUFS;
1415 
1416 	if (old && !tc_qdisc_dump_ignore(old)) {
1417 		if (tc_fill_qdisc(skb, old, clid, portid, n->nlmsg_seq,
1418 				  0, RTM_DELQDISC) < 0)
1419 			goto err_out;
1420 	}
1421 	if (new && !tc_qdisc_dump_ignore(new)) {
1422 		if (tc_fill_qdisc(skb, new, clid, portid, n->nlmsg_seq,
1423 				  old ? NLM_F_REPLACE : 0, RTM_NEWQDISC) < 0)
1424 			goto err_out;
1425 	}
1426 
1427 	if (skb->len)
1428 		return rtnetlink_send(skb, net, portid, RTNLGRP_TC,
1429 				      n->nlmsg_flags & NLM_F_ECHO);
1430 
1431 err_out:
1432 	kfree_skb(skb);
1433 	return -EINVAL;
1434 }
1435 
1436 static int tc_dump_qdisc_root(struct Qdisc *root, struct sk_buff *skb,
1437 			      struct netlink_callback *cb,
1438 			      int *q_idx_p, int s_q_idx, bool recur)
1439 {
1440 	int ret = 0, q_idx = *q_idx_p;
1441 	struct Qdisc *q;
1442 	int b;
1443 
1444 	if (!root)
1445 		return 0;
1446 
1447 	q = root;
1448 	if (q_idx < s_q_idx) {
1449 		q_idx++;
1450 	} else {
1451 		if (!tc_qdisc_dump_ignore(q) &&
1452 		    tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).portid,
1453 				  cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWQDISC) <= 0)
1454 			goto done;
1455 		q_idx++;
1456 	}
1457 
1458 	/* If dumping singletons, there is no qdisc_dev(root) and the singleton
1459 	 * itself has already been dumped.
1460 	 *
1461 	 * If we've already dumped the top-level (ingress) qdisc above and the global
1462 	 * qdisc hashtable, we don't want to hit it again
1463 	 */
1464 	if (!qdisc_dev(root) || !recur)
1465 		goto out;
1466 
1467 	hash_for_each(qdisc_dev(root)->qdisc_hash, b, q, hash) {
1468 		if (q_idx < s_q_idx) {
1469 			q_idx++;
1470 			continue;
1471 		}
1472 		if (!tc_qdisc_dump_ignore(q) &&
1473 		    tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).portid,
1474 				  cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWQDISC) <= 0)
1475 			goto done;
1476 		q_idx++;
1477 	}
1478 
1479 out:
1480 	*q_idx_p = q_idx;
1481 	return ret;
1482 done:
1483 	ret = -1;
1484 	goto out;
1485 }
1486 
1487 static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb)
1488 {
1489 	struct net *net = sock_net(skb->sk);
1490 	int idx, q_idx;
1491 	int s_idx, s_q_idx;
1492 	struct net_device *dev;
1493 
1494 	s_idx = cb->args[0];
1495 	s_q_idx = q_idx = cb->args[1];
1496 
1497 	idx = 0;
1498 	ASSERT_RTNL();
1499 	for_each_netdev(net, dev) {
1500 		struct netdev_queue *dev_queue;
1501 
1502 		if (idx < s_idx)
1503 			goto cont;
1504 		if (idx > s_idx)
1505 			s_q_idx = 0;
1506 		q_idx = 0;
1507 
1508 		if (tc_dump_qdisc_root(dev->qdisc, skb, cb, &q_idx, s_q_idx, true) < 0)
1509 			goto done;
1510 
1511 		dev_queue = dev_ingress_queue(dev);
1512 		if (dev_queue &&
1513 		    tc_dump_qdisc_root(dev_queue->qdisc_sleeping, skb, cb,
1514 				       &q_idx, s_q_idx, false) < 0)
1515 			goto done;
1516 
1517 cont:
1518 		idx++;
1519 	}
1520 
1521 done:
1522 	cb->args[0] = idx;
1523 	cb->args[1] = q_idx;
1524 
1525 	return skb->len;
1526 }
1527 
1528 
1529 
1530 /************************************************
1531  *	Traffic classes manipulation.		*
1532  ************************************************/
1533 
1534 
1535 
1536 static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n)
1537 {
1538 	struct net *net = sock_net(skb->sk);
1539 	struct tcmsg *tcm = nlmsg_data(n);
1540 	struct nlattr *tca[TCA_MAX + 1];
1541 	struct net_device *dev;
1542 	struct Qdisc *q = NULL;
1543 	const struct Qdisc_class_ops *cops;
1544 	unsigned long cl = 0;
1545 	unsigned long new_cl;
1546 	u32 portid;
1547 	u32 clid;
1548 	u32 qid;
1549 	int err;
1550 
1551 	if ((n->nlmsg_type != RTM_GETTCLASS) &&
1552 	    !netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
1553 		return -EPERM;
1554 
1555 	err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL);
1556 	if (err < 0)
1557 		return err;
1558 
1559 	dev = __dev_get_by_index(net, tcm->tcm_ifindex);
1560 	if (!dev)
1561 		return -ENODEV;
1562 
1563 	/*
1564 	   parent == TC_H_UNSPEC - unspecified parent.
1565 	   parent == TC_H_ROOT   - class is root, which has no parent.
1566 	   parent == X:0	 - parent is root class.
1567 	   parent == X:Y	 - parent is a node in hierarchy.
1568 	   parent == 0:Y	 - parent is X:Y, where X:0 is qdisc.
1569 
1570 	   handle == 0:0	 - generate handle from kernel pool.
1571 	   handle == 0:Y	 - class is X:Y, where X:0 is qdisc.
1572 	   handle == X:Y	 - clear.
1573 	   handle == X:0	 - root class.
1574 	 */
1575 
1576 	/* Step 1. Determine qdisc handle X:0 */
1577 
1578 	portid = tcm->tcm_parent;
1579 	clid = tcm->tcm_handle;
1580 	qid = TC_H_MAJ(clid);
1581 
1582 	if (portid != TC_H_ROOT) {
1583 		u32 qid1 = TC_H_MAJ(portid);
1584 
1585 		if (qid && qid1) {
1586 			/* If both majors are known, they must be identical. */
1587 			if (qid != qid1)
1588 				return -EINVAL;
1589 		} else if (qid1) {
1590 			qid = qid1;
1591 		} else if (qid == 0)
1592 			qid = dev->qdisc->handle;
1593 
1594 		/* Now qid is genuine qdisc handle consistent
1595 		 * both with parent and child.
1596 		 *
1597 		 * TC_H_MAJ(portid) still may be unspecified, complete it now.
1598 		 */
1599 		if (portid)
1600 			portid = TC_H_MAKE(qid, portid);
1601 	} else {
1602 		if (qid == 0)
1603 			qid = dev->qdisc->handle;
1604 	}
1605 
1606 	/* OK. Locate qdisc */
1607 	q = qdisc_lookup(dev, qid);
1608 	if (!q)
1609 		return -ENOENT;
1610 
1611 	/* An check that it supports classes */
1612 	cops = q->ops->cl_ops;
1613 	if (cops == NULL)
1614 		return -EINVAL;
1615 
1616 	/* Now try to get class */
1617 	if (clid == 0) {
1618 		if (portid == TC_H_ROOT)
1619 			clid = qid;
1620 	} else
1621 		clid = TC_H_MAKE(qid, clid);
1622 
1623 	if (clid)
1624 		cl = cops->get(q, clid);
1625 
1626 	if (cl == 0) {
1627 		err = -ENOENT;
1628 		if (n->nlmsg_type != RTM_NEWTCLASS ||
1629 		    !(n->nlmsg_flags & NLM_F_CREATE))
1630 			goto out;
1631 	} else {
1632 		switch (n->nlmsg_type) {
1633 		case RTM_NEWTCLASS:
1634 			err = -EEXIST;
1635 			if (n->nlmsg_flags & NLM_F_EXCL)
1636 				goto out;
1637 			break;
1638 		case RTM_DELTCLASS:
1639 			err = -EOPNOTSUPP;
1640 			if (cops->delete)
1641 				err = cops->delete(q, cl);
1642 			if (err == 0)
1643 				tclass_notify(net, skb, n, q, cl, RTM_DELTCLASS);
1644 			goto out;
1645 		case RTM_GETTCLASS:
1646 			err = tclass_notify(net, skb, n, q, cl, RTM_NEWTCLASS);
1647 			goto out;
1648 		default:
1649 			err = -EINVAL;
1650 			goto out;
1651 		}
1652 	}
1653 
1654 	new_cl = cl;
1655 	err = -EOPNOTSUPP;
1656 	if (cops->change)
1657 		err = cops->change(q, clid, portid, tca, &new_cl);
1658 	if (err == 0)
1659 		tclass_notify(net, skb, n, q, new_cl, RTM_NEWTCLASS);
1660 
1661 out:
1662 	if (cl)
1663 		cops->put(q, cl);
1664 
1665 	return err;
1666 }
1667 
1668 
1669 static int tc_fill_tclass(struct sk_buff *skb, struct Qdisc *q,
1670 			  unsigned long cl,
1671 			  u32 portid, u32 seq, u16 flags, int event)
1672 {
1673 	struct tcmsg *tcm;
1674 	struct nlmsghdr  *nlh;
1675 	unsigned char *b = skb_tail_pointer(skb);
1676 	struct gnet_dump d;
1677 	const struct Qdisc_class_ops *cl_ops = q->ops->cl_ops;
1678 
1679 	cond_resched();
1680 	nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags);
1681 	if (!nlh)
1682 		goto out_nlmsg_trim;
1683 	tcm = nlmsg_data(nlh);
1684 	tcm->tcm_family = AF_UNSPEC;
1685 	tcm->tcm__pad1 = 0;
1686 	tcm->tcm__pad2 = 0;
1687 	tcm->tcm_ifindex = qdisc_dev(q)->ifindex;
1688 	tcm->tcm_parent = q->handle;
1689 	tcm->tcm_handle = q->handle;
1690 	tcm->tcm_info = 0;
1691 	if (nla_put_string(skb, TCA_KIND, q->ops->id))
1692 		goto nla_put_failure;
1693 	if (cl_ops->dump && cl_ops->dump(q, cl, skb, tcm) < 0)
1694 		goto nla_put_failure;
1695 
1696 	if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, TCA_XSTATS,
1697 					 NULL, &d, TCA_PAD) < 0)
1698 		goto nla_put_failure;
1699 
1700 	if (cl_ops->dump_stats && cl_ops->dump_stats(q, cl, &d) < 0)
1701 		goto nla_put_failure;
1702 
1703 	if (gnet_stats_finish_copy(&d) < 0)
1704 		goto nla_put_failure;
1705 
1706 	nlh->nlmsg_len = skb_tail_pointer(skb) - b;
1707 	return skb->len;
1708 
1709 out_nlmsg_trim:
1710 nla_put_failure:
1711 	nlmsg_trim(skb, b);
1712 	return -1;
1713 }
1714 
1715 static int tclass_notify(struct net *net, struct sk_buff *oskb,
1716 			 struct nlmsghdr *n, struct Qdisc *q,
1717 			 unsigned long cl, int event)
1718 {
1719 	struct sk_buff *skb;
1720 	u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
1721 
1722 	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1723 	if (!skb)
1724 		return -ENOBUFS;
1725 
1726 	if (tc_fill_tclass(skb, q, cl, portid, n->nlmsg_seq, 0, event) < 0) {
1727 		kfree_skb(skb);
1728 		return -EINVAL;
1729 	}
1730 
1731 	return rtnetlink_send(skb, net, portid, RTNLGRP_TC,
1732 			      n->nlmsg_flags & NLM_F_ECHO);
1733 }
1734 
1735 struct qdisc_dump_args {
1736 	struct qdisc_walker	w;
1737 	struct sk_buff		*skb;
1738 	struct netlink_callback	*cb;
1739 };
1740 
1741 static int qdisc_class_dump(struct Qdisc *q, unsigned long cl, struct qdisc_walker *arg)
1742 {
1743 	struct qdisc_dump_args *a = (struct qdisc_dump_args *)arg;
1744 
1745 	return tc_fill_tclass(a->skb, q, cl, NETLINK_CB(a->cb->skb).portid,
1746 			      a->cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWTCLASS);
1747 }
1748 
1749 static int tc_dump_tclass_qdisc(struct Qdisc *q, struct sk_buff *skb,
1750 				struct tcmsg *tcm, struct netlink_callback *cb,
1751 				int *t_p, int s_t)
1752 {
1753 	struct qdisc_dump_args arg;
1754 
1755 	if (tc_qdisc_dump_ignore(q) ||
1756 	    *t_p < s_t || !q->ops->cl_ops ||
1757 	    (tcm->tcm_parent &&
1758 	     TC_H_MAJ(tcm->tcm_parent) != q->handle)) {
1759 		(*t_p)++;
1760 		return 0;
1761 	}
1762 	if (*t_p > s_t)
1763 		memset(&cb->args[1], 0, sizeof(cb->args)-sizeof(cb->args[0]));
1764 	arg.w.fn = qdisc_class_dump;
1765 	arg.skb = skb;
1766 	arg.cb = cb;
1767 	arg.w.stop  = 0;
1768 	arg.w.skip = cb->args[1];
1769 	arg.w.count = 0;
1770 	q->ops->cl_ops->walk(q, &arg.w);
1771 	cb->args[1] = arg.w.count;
1772 	if (arg.w.stop)
1773 		return -1;
1774 	(*t_p)++;
1775 	return 0;
1776 }
1777 
1778 static int tc_dump_tclass_root(struct Qdisc *root, struct sk_buff *skb,
1779 			       struct tcmsg *tcm, struct netlink_callback *cb,
1780 			       int *t_p, int s_t)
1781 {
1782 	struct Qdisc *q;
1783 	int b;
1784 
1785 	if (!root)
1786 		return 0;
1787 
1788 	if (tc_dump_tclass_qdisc(root, skb, tcm, cb, t_p, s_t) < 0)
1789 		return -1;
1790 
1791 	if (!qdisc_dev(root))
1792 		return 0;
1793 
1794 	hash_for_each(qdisc_dev(root)->qdisc_hash, b, q, hash) {
1795 		if (tc_dump_tclass_qdisc(q, skb, tcm, cb, t_p, s_t) < 0)
1796 			return -1;
1797 	}
1798 
1799 	return 0;
1800 }
1801 
1802 static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb)
1803 {
1804 	struct tcmsg *tcm = nlmsg_data(cb->nlh);
1805 	struct net *net = sock_net(skb->sk);
1806 	struct netdev_queue *dev_queue;
1807 	struct net_device *dev;
1808 	int t, s_t;
1809 
1810 	if (nlmsg_len(cb->nlh) < sizeof(*tcm))
1811 		return 0;
1812 	dev = dev_get_by_index(net, tcm->tcm_ifindex);
1813 	if (!dev)
1814 		return 0;
1815 
1816 	s_t = cb->args[0];
1817 	t = 0;
1818 
1819 	if (tc_dump_tclass_root(dev->qdisc, skb, tcm, cb, &t, s_t) < 0)
1820 		goto done;
1821 
1822 	dev_queue = dev_ingress_queue(dev);
1823 	if (dev_queue &&
1824 	    tc_dump_tclass_root(dev_queue->qdisc_sleeping, skb, tcm, cb,
1825 				&t, s_t) < 0)
1826 		goto done;
1827 
1828 done:
1829 	cb->args[0] = t;
1830 
1831 	dev_put(dev);
1832 	return skb->len;
1833 }
1834 
1835 /* Main classifier routine: scans classifier chain attached
1836  * to this qdisc, (optionally) tests for protocol and asks
1837  * specific classifiers.
1838  */
1839 int tc_classify(struct sk_buff *skb, const struct tcf_proto *tp,
1840 		struct tcf_result *res, bool compat_mode)
1841 {
1842 	__be16 protocol = tc_skb_protocol(skb);
1843 #ifdef CONFIG_NET_CLS_ACT
1844 	const struct tcf_proto *old_tp = tp;
1845 	int limit = 0;
1846 
1847 reclassify:
1848 #endif
1849 	for (; tp; tp = rcu_dereference_bh(tp->next)) {
1850 		int err;
1851 
1852 		if (tp->protocol != protocol &&
1853 		    tp->protocol != htons(ETH_P_ALL))
1854 			continue;
1855 
1856 		err = tp->classify(skb, tp, res);
1857 #ifdef CONFIG_NET_CLS_ACT
1858 		if (unlikely(err == TC_ACT_RECLASSIFY && !compat_mode))
1859 			goto reset;
1860 #endif
1861 		if (err >= 0)
1862 			return err;
1863 	}
1864 
1865 	return TC_ACT_UNSPEC; /* signal: continue lookup */
1866 #ifdef CONFIG_NET_CLS_ACT
1867 reset:
1868 	if (unlikely(limit++ >= MAX_REC_LOOP)) {
1869 		net_notice_ratelimited("%s: reclassify loop, rule prio %u, protocol %02x\n",
1870 				       tp->q->ops->id, tp->prio & 0xffff,
1871 				       ntohs(tp->protocol));
1872 		return TC_ACT_SHOT;
1873 	}
1874 
1875 	tp = old_tp;
1876 	protocol = tc_skb_protocol(skb);
1877 	goto reclassify;
1878 #endif
1879 }
1880 EXPORT_SYMBOL(tc_classify);
1881 
1882 bool tcf_destroy(struct tcf_proto *tp, bool force)
1883 {
1884 	if (tp->ops->destroy(tp, force)) {
1885 		module_put(tp->ops->owner);
1886 		kfree_rcu(tp, rcu);
1887 		return true;
1888 	}
1889 
1890 	return false;
1891 }
1892 
1893 void tcf_destroy_chain(struct tcf_proto __rcu **fl)
1894 {
1895 	struct tcf_proto *tp;
1896 
1897 	while ((tp = rtnl_dereference(*fl)) != NULL) {
1898 		RCU_INIT_POINTER(*fl, tp->next);
1899 		tcf_destroy(tp, true);
1900 	}
1901 }
1902 EXPORT_SYMBOL(tcf_destroy_chain);
1903 
1904 #ifdef CONFIG_PROC_FS
1905 static int psched_show(struct seq_file *seq, void *v)
1906 {
1907 	seq_printf(seq, "%08x %08x %08x %08x\n",
1908 		   (u32)NSEC_PER_USEC, (u32)PSCHED_TICKS2NS(1),
1909 		   1000000,
1910 		   (u32)NSEC_PER_SEC / hrtimer_resolution);
1911 
1912 	return 0;
1913 }
1914 
1915 static int psched_open(struct inode *inode, struct file *file)
1916 {
1917 	return single_open(file, psched_show, NULL);
1918 }
1919 
1920 static const struct file_operations psched_fops = {
1921 	.owner = THIS_MODULE,
1922 	.open = psched_open,
1923 	.read  = seq_read,
1924 	.llseek = seq_lseek,
1925 	.release = single_release,
1926 };
1927 
1928 static int __net_init psched_net_init(struct net *net)
1929 {
1930 	struct proc_dir_entry *e;
1931 
1932 	e = proc_create("psched", 0, net->proc_net, &psched_fops);
1933 	if (e == NULL)
1934 		return -ENOMEM;
1935 
1936 	return 0;
1937 }
1938 
1939 static void __net_exit psched_net_exit(struct net *net)
1940 {
1941 	remove_proc_entry("psched", net->proc_net);
1942 }
1943 #else
1944 static int __net_init psched_net_init(struct net *net)
1945 {
1946 	return 0;
1947 }
1948 
1949 static void __net_exit psched_net_exit(struct net *net)
1950 {
1951 }
1952 #endif
1953 
1954 static struct pernet_operations psched_net_ops = {
1955 	.init = psched_net_init,
1956 	.exit = psched_net_exit,
1957 };
1958 
1959 static int __init pktsched_init(void)
1960 {
1961 	int err;
1962 
1963 	err = register_pernet_subsys(&psched_net_ops);
1964 	if (err) {
1965 		pr_err("pktsched_init: "
1966 		       "cannot initialize per netns operations\n");
1967 		return err;
1968 	}
1969 
1970 	register_qdisc(&pfifo_fast_ops);
1971 	register_qdisc(&pfifo_qdisc_ops);
1972 	register_qdisc(&bfifo_qdisc_ops);
1973 	register_qdisc(&pfifo_head_drop_qdisc_ops);
1974 	register_qdisc(&mq_qdisc_ops);
1975 	register_qdisc(&noqueue_qdisc_ops);
1976 
1977 	rtnl_register(PF_UNSPEC, RTM_NEWQDISC, tc_modify_qdisc, NULL, NULL);
1978 	rtnl_register(PF_UNSPEC, RTM_DELQDISC, tc_get_qdisc, NULL, NULL);
1979 	rtnl_register(PF_UNSPEC, RTM_GETQDISC, tc_get_qdisc, tc_dump_qdisc, NULL);
1980 	rtnl_register(PF_UNSPEC, RTM_NEWTCLASS, tc_ctl_tclass, NULL, NULL);
1981 	rtnl_register(PF_UNSPEC, RTM_DELTCLASS, tc_ctl_tclass, NULL, NULL);
1982 	rtnl_register(PF_UNSPEC, RTM_GETTCLASS, tc_ctl_tclass, tc_dump_tclass, NULL);
1983 
1984 	return 0;
1985 }
1986 
1987 subsys_initcall(pktsched_init);
1988