xref: /linux/net/sched/sch_api.c (revision 4f139972b489f8bc2c821aa25ac65018d92af3f7)
1 /*
2  * net/sched/sch_api.c	Packet scheduler API.
3  *
4  *		This program is free software; you can redistribute it and/or
5  *		modify it under the terms of the GNU General Public License
6  *		as published by the Free Software Foundation; either version
7  *		2 of the License, or (at your option) any later version.
8  *
9  * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
10  *
11  * Fixes:
12  *
13  * Rani Assaf <rani@magic.metawire.com> :980802: JIFFIES and CPU clock sources are repaired.
14  * Eduardo J. Blanco <ejbs@netlabs.com.uy> :990222: kmod support
15  * Jamal Hadi Salim <hadi@nortelnetworks.com>: 990601: ingress support
16  */
17 
18 #include <linux/module.h>
19 #include <linux/types.h>
20 #include <linux/kernel.h>
21 #include <linux/string.h>
22 #include <linux/errno.h>
23 #include <linux/skbuff.h>
24 #include <linux/init.h>
25 #include <linux/proc_fs.h>
26 #include <linux/seq_file.h>
27 #include <linux/kmod.h>
28 #include <linux/list.h>
29 #include <linux/hrtimer.h>
30 #include <linux/lockdep.h>
31 #include <linux/slab.h>
32 #include <linux/hashtable.h>
33 
34 #include <net/net_namespace.h>
35 #include <net/sock.h>
36 #include <net/netlink.h>
37 #include <net/pkt_sched.h>
38 
39 static int qdisc_notify(struct net *net, struct sk_buff *oskb,
40 			struct nlmsghdr *n, u32 clid,
41 			struct Qdisc *old, struct Qdisc *new);
42 static int tclass_notify(struct net *net, struct sk_buff *oskb,
43 			 struct nlmsghdr *n, struct Qdisc *q,
44 			 unsigned long cl, int event);
45 
46 /*
47 
48    Short review.
49    -------------
50 
51    This file consists of two interrelated parts:
52 
53    1. queueing disciplines manager frontend.
54    2. traffic classes manager frontend.
55 
56    Generally, queueing discipline ("qdisc") is a black box,
57    which is able to enqueue packets and to dequeue them (when
58    device is ready to send something) in order and at times
59    determined by algorithm hidden in it.
60 
61    qdisc's are divided to two categories:
62    - "queues", which have no internal structure visible from outside.
63    - "schedulers", which split all the packets to "traffic classes",
64      using "packet classifiers" (look at cls_api.c)
65 
66    In turn, classes may have child qdiscs (as rule, queues)
67    attached to them etc. etc. etc.
68 
69    The goal of the routines in this file is to translate
70    information supplied by user in the form of handles
71    to more intelligible for kernel form, to make some sanity
72    checks and part of work, which is common to all qdiscs
73    and to provide rtnetlink notifications.
74 
75    All real intelligent work is done inside qdisc modules.
76 
77 
78 
79    Every discipline has two major routines: enqueue and dequeue.
80 
81    ---dequeue
82 
83    dequeue usually returns a skb to send. It is allowed to return NULL,
84    but it does not mean that queue is empty, it just means that
85    discipline does not want to send anything this time.
86    Queue is really empty if q->q.qlen == 0.
87    For complicated disciplines with multiple queues q->q is not
88    real packet queue, but however q->q.qlen must be valid.
89 
90    ---enqueue
91 
92    enqueue returns 0, if packet was enqueued successfully.
93    If packet (this one or another one) was dropped, it returns
94    not zero error code.
95    NET_XMIT_DROP 	- this packet dropped
96      Expected action: do not backoff, but wait until queue will clear.
97    NET_XMIT_CN	 	- probably this packet enqueued, but another one dropped.
98      Expected action: backoff or ignore
99 
100    Auxiliary routines:
101 
102    ---peek
103 
104    like dequeue but without removing a packet from the queue
105 
106    ---reset
107 
108    returns qdisc to initial state: purge all buffers, clear all
109    timers, counters (except for statistics) etc.
110 
111    ---init
112 
113    initializes newly created qdisc.
114 
115    ---destroy
116 
117    destroys resources allocated by init and during lifetime of qdisc.
118 
119    ---change
120 
121    changes qdisc parameters.
122  */
123 
124 /* Protects list of registered TC modules. It is pure SMP lock. */
125 static DEFINE_RWLOCK(qdisc_mod_lock);
126 
127 
128 /************************************************
129  *	Queueing disciplines manipulation.	*
130  ************************************************/
131 
132 
133 /* The list of all installed queueing disciplines. */
134 
135 static struct Qdisc_ops *qdisc_base;
136 
137 /* Register/unregister queueing discipline */
138 
139 int register_qdisc(struct Qdisc_ops *qops)
140 {
141 	struct Qdisc_ops *q, **qp;
142 	int rc = -EEXIST;
143 
144 	write_lock(&qdisc_mod_lock);
145 	for (qp = &qdisc_base; (q = *qp) != NULL; qp = &q->next)
146 		if (!strcmp(qops->id, q->id))
147 			goto out;
148 
149 	if (qops->enqueue == NULL)
150 		qops->enqueue = noop_qdisc_ops.enqueue;
151 	if (qops->peek == NULL) {
152 		if (qops->dequeue == NULL)
153 			qops->peek = noop_qdisc_ops.peek;
154 		else
155 			goto out_einval;
156 	}
157 	if (qops->dequeue == NULL)
158 		qops->dequeue = noop_qdisc_ops.dequeue;
159 
160 	if (qops->cl_ops) {
161 		const struct Qdisc_class_ops *cops = qops->cl_ops;
162 
163 		if (!(cops->get && cops->put && cops->walk && cops->leaf))
164 			goto out_einval;
165 
166 		if (cops->tcf_chain && !(cops->bind_tcf && cops->unbind_tcf))
167 			goto out_einval;
168 	}
169 
170 	qops->next = NULL;
171 	*qp = qops;
172 	rc = 0;
173 out:
174 	write_unlock(&qdisc_mod_lock);
175 	return rc;
176 
177 out_einval:
178 	rc = -EINVAL;
179 	goto out;
180 }
181 EXPORT_SYMBOL(register_qdisc);
182 
183 int unregister_qdisc(struct Qdisc_ops *qops)
184 {
185 	struct Qdisc_ops *q, **qp;
186 	int err = -ENOENT;
187 
188 	write_lock(&qdisc_mod_lock);
189 	for (qp = &qdisc_base; (q = *qp) != NULL; qp = &q->next)
190 		if (q == qops)
191 			break;
192 	if (q) {
193 		*qp = q->next;
194 		q->next = NULL;
195 		err = 0;
196 	}
197 	write_unlock(&qdisc_mod_lock);
198 	return err;
199 }
200 EXPORT_SYMBOL(unregister_qdisc);
201 
202 /* Get default qdisc if not otherwise specified */
203 void qdisc_get_default(char *name, size_t len)
204 {
205 	read_lock(&qdisc_mod_lock);
206 	strlcpy(name, default_qdisc_ops->id, len);
207 	read_unlock(&qdisc_mod_lock);
208 }
209 
210 static struct Qdisc_ops *qdisc_lookup_default(const char *name)
211 {
212 	struct Qdisc_ops *q = NULL;
213 
214 	for (q = qdisc_base; q; q = q->next) {
215 		if (!strcmp(name, q->id)) {
216 			if (!try_module_get(q->owner))
217 				q = NULL;
218 			break;
219 		}
220 	}
221 
222 	return q;
223 }
224 
225 /* Set new default qdisc to use */
226 int qdisc_set_default(const char *name)
227 {
228 	const struct Qdisc_ops *ops;
229 
230 	if (!capable(CAP_NET_ADMIN))
231 		return -EPERM;
232 
233 	write_lock(&qdisc_mod_lock);
234 	ops = qdisc_lookup_default(name);
235 	if (!ops) {
236 		/* Not found, drop lock and try to load module */
237 		write_unlock(&qdisc_mod_lock);
238 		request_module("sch_%s", name);
239 		write_lock(&qdisc_mod_lock);
240 
241 		ops = qdisc_lookup_default(name);
242 	}
243 
244 	if (ops) {
245 		/* Set new default */
246 		module_put(default_qdisc_ops->owner);
247 		default_qdisc_ops = ops;
248 	}
249 	write_unlock(&qdisc_mod_lock);
250 
251 	return ops ? 0 : -ENOENT;
252 }
253 
254 /* We know handle. Find qdisc among all qdisc's attached to device
255  * (root qdisc, all its children, children of children etc.)
256  * Note: caller either uses rtnl or rcu_read_lock()
257  */
258 
259 static struct Qdisc *qdisc_match_from_root(struct Qdisc *root, u32 handle)
260 {
261 	struct Qdisc *q;
262 
263 	if (!qdisc_dev(root))
264 		return (root->handle == handle ? root : NULL);
265 
266 	if (!(root->flags & TCQ_F_BUILTIN) &&
267 	    root->handle == handle)
268 		return root;
269 
270 	hash_for_each_possible_rcu(qdisc_dev(root)->qdisc_hash, q, hash, handle) {
271 		if (q->handle == handle)
272 			return q;
273 	}
274 	return NULL;
275 }
276 
277 void qdisc_hash_add(struct Qdisc *q, bool invisible)
278 {
279 	if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) {
280 		struct Qdisc *root = qdisc_dev(q)->qdisc;
281 
282 		WARN_ON_ONCE(root == &noop_qdisc);
283 		ASSERT_RTNL();
284 		hash_add_rcu(qdisc_dev(q)->qdisc_hash, &q->hash, q->handle);
285 		if (invisible)
286 			q->flags |= TCQ_F_INVISIBLE;
287 	}
288 }
289 EXPORT_SYMBOL(qdisc_hash_add);
290 
291 void qdisc_hash_del(struct Qdisc *q)
292 {
293 	if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) {
294 		ASSERT_RTNL();
295 		hash_del_rcu(&q->hash);
296 	}
297 }
298 EXPORT_SYMBOL(qdisc_hash_del);
299 
300 struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle)
301 {
302 	struct Qdisc *q;
303 
304 	q = qdisc_match_from_root(dev->qdisc, handle);
305 	if (q)
306 		goto out;
307 
308 	if (dev_ingress_queue(dev))
309 		q = qdisc_match_from_root(
310 			dev_ingress_queue(dev)->qdisc_sleeping,
311 			handle);
312 out:
313 	return q;
314 }
315 
316 static struct Qdisc *qdisc_leaf(struct Qdisc *p, u32 classid)
317 {
318 	unsigned long cl;
319 	struct Qdisc *leaf;
320 	const struct Qdisc_class_ops *cops = p->ops->cl_ops;
321 
322 	if (cops == NULL)
323 		return NULL;
324 	cl = cops->get(p, classid);
325 
326 	if (cl == 0)
327 		return NULL;
328 	leaf = cops->leaf(p, cl);
329 	cops->put(p, cl);
330 	return leaf;
331 }
332 
333 /* Find queueing discipline by name */
334 
335 static struct Qdisc_ops *qdisc_lookup_ops(struct nlattr *kind)
336 {
337 	struct Qdisc_ops *q = NULL;
338 
339 	if (kind) {
340 		read_lock(&qdisc_mod_lock);
341 		for (q = qdisc_base; q; q = q->next) {
342 			if (nla_strcmp(kind, q->id) == 0) {
343 				if (!try_module_get(q->owner))
344 					q = NULL;
345 				break;
346 			}
347 		}
348 		read_unlock(&qdisc_mod_lock);
349 	}
350 	return q;
351 }
352 
353 /* The linklayer setting were not transferred from iproute2, in older
354  * versions, and the rate tables lookup systems have been dropped in
355  * the kernel. To keep backward compatible with older iproute2 tc
356  * utils, we detect the linklayer setting by detecting if the rate
357  * table were modified.
358  *
359  * For linklayer ATM table entries, the rate table will be aligned to
360  * 48 bytes, thus some table entries will contain the same value.  The
361  * mpu (min packet unit) is also encoded into the old rate table, thus
362  * starting from the mpu, we find low and high table entries for
363  * mapping this cell.  If these entries contain the same value, when
364  * the rate tables have been modified for linklayer ATM.
365  *
366  * This is done by rounding mpu to the nearest 48 bytes cell/entry,
367  * and then roundup to the next cell, calc the table entry one below,
368  * and compare.
369  */
370 static __u8 __detect_linklayer(struct tc_ratespec *r, __u32 *rtab)
371 {
372 	int low       = roundup(r->mpu, 48);
373 	int high      = roundup(low+1, 48);
374 	int cell_low  = low >> r->cell_log;
375 	int cell_high = (high >> r->cell_log) - 1;
376 
377 	/* rtab is too inaccurate at rates > 100Mbit/s */
378 	if ((r->rate > (100000000/8)) || (rtab[0] == 0)) {
379 		pr_debug("TC linklayer: Giving up ATM detection\n");
380 		return TC_LINKLAYER_ETHERNET;
381 	}
382 
383 	if ((cell_high > cell_low) && (cell_high < 256)
384 	    && (rtab[cell_low] == rtab[cell_high])) {
385 		pr_debug("TC linklayer: Detected ATM, low(%d)=high(%d)=%u\n",
386 			 cell_low, cell_high, rtab[cell_high]);
387 		return TC_LINKLAYER_ATM;
388 	}
389 	return TC_LINKLAYER_ETHERNET;
390 }
391 
392 static struct qdisc_rate_table *qdisc_rtab_list;
393 
394 struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r,
395 					struct nlattr *tab)
396 {
397 	struct qdisc_rate_table *rtab;
398 
399 	if (tab == NULL || r->rate == 0 || r->cell_log == 0 ||
400 	    nla_len(tab) != TC_RTAB_SIZE)
401 		return NULL;
402 
403 	for (rtab = qdisc_rtab_list; rtab; rtab = rtab->next) {
404 		if (!memcmp(&rtab->rate, r, sizeof(struct tc_ratespec)) &&
405 		    !memcmp(&rtab->data, nla_data(tab), 1024)) {
406 			rtab->refcnt++;
407 			return rtab;
408 		}
409 	}
410 
411 	rtab = kmalloc(sizeof(*rtab), GFP_KERNEL);
412 	if (rtab) {
413 		rtab->rate = *r;
414 		rtab->refcnt = 1;
415 		memcpy(rtab->data, nla_data(tab), 1024);
416 		if (r->linklayer == TC_LINKLAYER_UNAWARE)
417 			r->linklayer = __detect_linklayer(r, rtab->data);
418 		rtab->next = qdisc_rtab_list;
419 		qdisc_rtab_list = rtab;
420 	}
421 	return rtab;
422 }
423 EXPORT_SYMBOL(qdisc_get_rtab);
424 
425 void qdisc_put_rtab(struct qdisc_rate_table *tab)
426 {
427 	struct qdisc_rate_table *rtab, **rtabp;
428 
429 	if (!tab || --tab->refcnt)
430 		return;
431 
432 	for (rtabp = &qdisc_rtab_list;
433 	     (rtab = *rtabp) != NULL;
434 	     rtabp = &rtab->next) {
435 		if (rtab == tab) {
436 			*rtabp = rtab->next;
437 			kfree(rtab);
438 			return;
439 		}
440 	}
441 }
442 EXPORT_SYMBOL(qdisc_put_rtab);
443 
444 static LIST_HEAD(qdisc_stab_list);
445 
446 static const struct nla_policy stab_policy[TCA_STAB_MAX + 1] = {
447 	[TCA_STAB_BASE]	= { .len = sizeof(struct tc_sizespec) },
448 	[TCA_STAB_DATA] = { .type = NLA_BINARY },
449 };
450 
451 static struct qdisc_size_table *qdisc_get_stab(struct nlattr *opt)
452 {
453 	struct nlattr *tb[TCA_STAB_MAX + 1];
454 	struct qdisc_size_table *stab;
455 	struct tc_sizespec *s;
456 	unsigned int tsize = 0;
457 	u16 *tab = NULL;
458 	int err;
459 
460 	err = nla_parse_nested(tb, TCA_STAB_MAX, opt, stab_policy);
461 	if (err < 0)
462 		return ERR_PTR(err);
463 	if (!tb[TCA_STAB_BASE])
464 		return ERR_PTR(-EINVAL);
465 
466 	s = nla_data(tb[TCA_STAB_BASE]);
467 
468 	if (s->tsize > 0) {
469 		if (!tb[TCA_STAB_DATA])
470 			return ERR_PTR(-EINVAL);
471 		tab = nla_data(tb[TCA_STAB_DATA]);
472 		tsize = nla_len(tb[TCA_STAB_DATA]) / sizeof(u16);
473 	}
474 
475 	if (tsize != s->tsize || (!tab && tsize > 0))
476 		return ERR_PTR(-EINVAL);
477 
478 	list_for_each_entry(stab, &qdisc_stab_list, list) {
479 		if (memcmp(&stab->szopts, s, sizeof(*s)))
480 			continue;
481 		if (tsize > 0 && memcmp(stab->data, tab, tsize * sizeof(u16)))
482 			continue;
483 		stab->refcnt++;
484 		return stab;
485 	}
486 
487 	stab = kmalloc(sizeof(*stab) + tsize * sizeof(u16), GFP_KERNEL);
488 	if (!stab)
489 		return ERR_PTR(-ENOMEM);
490 
491 	stab->refcnt = 1;
492 	stab->szopts = *s;
493 	if (tsize > 0)
494 		memcpy(stab->data, tab, tsize * sizeof(u16));
495 
496 	list_add_tail(&stab->list, &qdisc_stab_list);
497 
498 	return stab;
499 }
500 
501 static void stab_kfree_rcu(struct rcu_head *head)
502 {
503 	kfree(container_of(head, struct qdisc_size_table, rcu));
504 }
505 
506 void qdisc_put_stab(struct qdisc_size_table *tab)
507 {
508 	if (!tab)
509 		return;
510 
511 	if (--tab->refcnt == 0) {
512 		list_del(&tab->list);
513 		call_rcu_bh(&tab->rcu, stab_kfree_rcu);
514 	}
515 }
516 EXPORT_SYMBOL(qdisc_put_stab);
517 
518 static int qdisc_dump_stab(struct sk_buff *skb, struct qdisc_size_table *stab)
519 {
520 	struct nlattr *nest;
521 
522 	nest = nla_nest_start(skb, TCA_STAB);
523 	if (nest == NULL)
524 		goto nla_put_failure;
525 	if (nla_put(skb, TCA_STAB_BASE, sizeof(stab->szopts), &stab->szopts))
526 		goto nla_put_failure;
527 	nla_nest_end(skb, nest);
528 
529 	return skb->len;
530 
531 nla_put_failure:
532 	return -1;
533 }
534 
535 void __qdisc_calculate_pkt_len(struct sk_buff *skb,
536 			       const struct qdisc_size_table *stab)
537 {
538 	int pkt_len, slot;
539 
540 	pkt_len = skb->len + stab->szopts.overhead;
541 	if (unlikely(!stab->szopts.tsize))
542 		goto out;
543 
544 	slot = pkt_len + stab->szopts.cell_align;
545 	if (unlikely(slot < 0))
546 		slot = 0;
547 
548 	slot >>= stab->szopts.cell_log;
549 	if (likely(slot < stab->szopts.tsize))
550 		pkt_len = stab->data[slot];
551 	else
552 		pkt_len = stab->data[stab->szopts.tsize - 1] *
553 				(slot / stab->szopts.tsize) +
554 				stab->data[slot % stab->szopts.tsize];
555 
556 	pkt_len <<= stab->szopts.size_log;
557 out:
558 	if (unlikely(pkt_len < 1))
559 		pkt_len = 1;
560 	qdisc_skb_cb(skb)->pkt_len = pkt_len;
561 }
562 EXPORT_SYMBOL(__qdisc_calculate_pkt_len);
563 
564 void qdisc_warn_nonwc(const char *txt, struct Qdisc *qdisc)
565 {
566 	if (!(qdisc->flags & TCQ_F_WARN_NONWC)) {
567 		pr_warn("%s: %s qdisc %X: is non-work-conserving?\n",
568 			txt, qdisc->ops->id, qdisc->handle >> 16);
569 		qdisc->flags |= TCQ_F_WARN_NONWC;
570 	}
571 }
572 EXPORT_SYMBOL(qdisc_warn_nonwc);
573 
574 static enum hrtimer_restart qdisc_watchdog(struct hrtimer *timer)
575 {
576 	struct qdisc_watchdog *wd = container_of(timer, struct qdisc_watchdog,
577 						 timer);
578 
579 	rcu_read_lock();
580 	__netif_schedule(qdisc_root(wd->qdisc));
581 	rcu_read_unlock();
582 
583 	return HRTIMER_NORESTART;
584 }
585 
586 void qdisc_watchdog_init(struct qdisc_watchdog *wd, struct Qdisc *qdisc)
587 {
588 	hrtimer_init(&wd->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED);
589 	wd->timer.function = qdisc_watchdog;
590 	wd->qdisc = qdisc;
591 }
592 EXPORT_SYMBOL(qdisc_watchdog_init);
593 
594 void qdisc_watchdog_schedule_ns(struct qdisc_watchdog *wd, u64 expires)
595 {
596 	if (test_bit(__QDISC_STATE_DEACTIVATED,
597 		     &qdisc_root_sleeping(wd->qdisc)->state))
598 		return;
599 
600 	if (wd->last_expires == expires)
601 		return;
602 
603 	wd->last_expires = expires;
604 	hrtimer_start(&wd->timer,
605 		      ns_to_ktime(expires),
606 		      HRTIMER_MODE_ABS_PINNED);
607 }
608 EXPORT_SYMBOL(qdisc_watchdog_schedule_ns);
609 
610 void qdisc_watchdog_cancel(struct qdisc_watchdog *wd)
611 {
612 	hrtimer_cancel(&wd->timer);
613 }
614 EXPORT_SYMBOL(qdisc_watchdog_cancel);
615 
616 static struct hlist_head *qdisc_class_hash_alloc(unsigned int n)
617 {
618 	unsigned int size = n * sizeof(struct hlist_head), i;
619 	struct hlist_head *h;
620 
621 	if (size <= PAGE_SIZE)
622 		h = kmalloc(size, GFP_KERNEL);
623 	else
624 		h = (struct hlist_head *)
625 			__get_free_pages(GFP_KERNEL, get_order(size));
626 
627 	if (h != NULL) {
628 		for (i = 0; i < n; i++)
629 			INIT_HLIST_HEAD(&h[i]);
630 	}
631 	return h;
632 }
633 
634 static void qdisc_class_hash_free(struct hlist_head *h, unsigned int n)
635 {
636 	unsigned int size = n * sizeof(struct hlist_head);
637 
638 	if (size <= PAGE_SIZE)
639 		kfree(h);
640 	else
641 		free_pages((unsigned long)h, get_order(size));
642 }
643 
644 void qdisc_class_hash_grow(struct Qdisc *sch, struct Qdisc_class_hash *clhash)
645 {
646 	struct Qdisc_class_common *cl;
647 	struct hlist_node *next;
648 	struct hlist_head *nhash, *ohash;
649 	unsigned int nsize, nmask, osize;
650 	unsigned int i, h;
651 
652 	/* Rehash when load factor exceeds 0.75 */
653 	if (clhash->hashelems * 4 <= clhash->hashsize * 3)
654 		return;
655 	nsize = clhash->hashsize * 2;
656 	nmask = nsize - 1;
657 	nhash = qdisc_class_hash_alloc(nsize);
658 	if (nhash == NULL)
659 		return;
660 
661 	ohash = clhash->hash;
662 	osize = clhash->hashsize;
663 
664 	sch_tree_lock(sch);
665 	for (i = 0; i < osize; i++) {
666 		hlist_for_each_entry_safe(cl, next, &ohash[i], hnode) {
667 			h = qdisc_class_hash(cl->classid, nmask);
668 			hlist_add_head(&cl->hnode, &nhash[h]);
669 		}
670 	}
671 	clhash->hash     = nhash;
672 	clhash->hashsize = nsize;
673 	clhash->hashmask = nmask;
674 	sch_tree_unlock(sch);
675 
676 	qdisc_class_hash_free(ohash, osize);
677 }
678 EXPORT_SYMBOL(qdisc_class_hash_grow);
679 
680 int qdisc_class_hash_init(struct Qdisc_class_hash *clhash)
681 {
682 	unsigned int size = 4;
683 
684 	clhash->hash = qdisc_class_hash_alloc(size);
685 	if (clhash->hash == NULL)
686 		return -ENOMEM;
687 	clhash->hashsize  = size;
688 	clhash->hashmask  = size - 1;
689 	clhash->hashelems = 0;
690 	return 0;
691 }
692 EXPORT_SYMBOL(qdisc_class_hash_init);
693 
694 void qdisc_class_hash_destroy(struct Qdisc_class_hash *clhash)
695 {
696 	qdisc_class_hash_free(clhash->hash, clhash->hashsize);
697 }
698 EXPORT_SYMBOL(qdisc_class_hash_destroy);
699 
700 void qdisc_class_hash_insert(struct Qdisc_class_hash *clhash,
701 			     struct Qdisc_class_common *cl)
702 {
703 	unsigned int h;
704 
705 	INIT_HLIST_NODE(&cl->hnode);
706 	h = qdisc_class_hash(cl->classid, clhash->hashmask);
707 	hlist_add_head(&cl->hnode, &clhash->hash[h]);
708 	clhash->hashelems++;
709 }
710 EXPORT_SYMBOL(qdisc_class_hash_insert);
711 
712 void qdisc_class_hash_remove(struct Qdisc_class_hash *clhash,
713 			     struct Qdisc_class_common *cl)
714 {
715 	hlist_del(&cl->hnode);
716 	clhash->hashelems--;
717 }
718 EXPORT_SYMBOL(qdisc_class_hash_remove);
719 
720 /* Allocate an unique handle from space managed by kernel
721  * Possible range is [8000-FFFF]:0000 (0x8000 values)
722  */
723 static u32 qdisc_alloc_handle(struct net_device *dev)
724 {
725 	int i = 0x8000;
726 	static u32 autohandle = TC_H_MAKE(0x80000000U, 0);
727 
728 	do {
729 		autohandle += TC_H_MAKE(0x10000U, 0);
730 		if (autohandle == TC_H_MAKE(TC_H_ROOT, 0))
731 			autohandle = TC_H_MAKE(0x80000000U, 0);
732 		if (!qdisc_lookup(dev, autohandle))
733 			return autohandle;
734 		cond_resched();
735 	} while	(--i > 0);
736 
737 	return 0;
738 }
739 
740 void qdisc_tree_reduce_backlog(struct Qdisc *sch, unsigned int n,
741 			       unsigned int len)
742 {
743 	const struct Qdisc_class_ops *cops;
744 	unsigned long cl;
745 	u32 parentid;
746 	int drops;
747 
748 	if (n == 0 && len == 0)
749 		return;
750 	drops = max_t(int, n, 0);
751 	rcu_read_lock();
752 	while ((parentid = sch->parent)) {
753 		if (TC_H_MAJ(parentid) == TC_H_MAJ(TC_H_INGRESS))
754 			break;
755 
756 		if (sch->flags & TCQ_F_NOPARENT)
757 			break;
758 		/* TODO: perform the search on a per txq basis */
759 		sch = qdisc_lookup(qdisc_dev(sch), TC_H_MAJ(parentid));
760 		if (sch == NULL) {
761 			WARN_ON_ONCE(parentid != TC_H_ROOT);
762 			break;
763 		}
764 		cops = sch->ops->cl_ops;
765 		if (cops->qlen_notify) {
766 			cl = cops->get(sch, parentid);
767 			cops->qlen_notify(sch, cl);
768 			cops->put(sch, cl);
769 		}
770 		sch->q.qlen -= n;
771 		sch->qstats.backlog -= len;
772 		__qdisc_qstats_drop(sch, drops);
773 	}
774 	rcu_read_unlock();
775 }
776 EXPORT_SYMBOL(qdisc_tree_reduce_backlog);
777 
778 static void notify_and_destroy(struct net *net, struct sk_buff *skb,
779 			       struct nlmsghdr *n, u32 clid,
780 			       struct Qdisc *old, struct Qdisc *new)
781 {
782 	if (new || old)
783 		qdisc_notify(net, skb, n, clid, old, new);
784 
785 	if (old)
786 		qdisc_destroy(old);
787 }
788 
789 /* Graft qdisc "new" to class "classid" of qdisc "parent" or
790  * to device "dev".
791  *
792  * When appropriate send a netlink notification using 'skb'
793  * and "n".
794  *
795  * On success, destroy old qdisc.
796  */
797 
798 static int qdisc_graft(struct net_device *dev, struct Qdisc *parent,
799 		       struct sk_buff *skb, struct nlmsghdr *n, u32 classid,
800 		       struct Qdisc *new, struct Qdisc *old)
801 {
802 	struct Qdisc *q = old;
803 	struct net *net = dev_net(dev);
804 	int err = 0;
805 
806 	if (parent == NULL) {
807 		unsigned int i, num_q, ingress;
808 
809 		ingress = 0;
810 		num_q = dev->num_tx_queues;
811 		if ((q && q->flags & TCQ_F_INGRESS) ||
812 		    (new && new->flags & TCQ_F_INGRESS)) {
813 			num_q = 1;
814 			ingress = 1;
815 			if (!dev_ingress_queue(dev))
816 				return -ENOENT;
817 		}
818 
819 		if (dev->flags & IFF_UP)
820 			dev_deactivate(dev);
821 
822 		if (new && new->ops->attach)
823 			goto skip;
824 
825 		for (i = 0; i < num_q; i++) {
826 			struct netdev_queue *dev_queue = dev_ingress_queue(dev);
827 
828 			if (!ingress)
829 				dev_queue = netdev_get_tx_queue(dev, i);
830 
831 			old = dev_graft_qdisc(dev_queue, new);
832 			if (new && i > 0)
833 				atomic_inc(&new->refcnt);
834 
835 			if (!ingress)
836 				qdisc_destroy(old);
837 		}
838 
839 skip:
840 		if (!ingress) {
841 			notify_and_destroy(net, skb, n, classid,
842 					   dev->qdisc, new);
843 			if (new && !new->ops->attach)
844 				atomic_inc(&new->refcnt);
845 			dev->qdisc = new ? : &noop_qdisc;
846 
847 			if (new && new->ops->attach)
848 				new->ops->attach(new);
849 		} else {
850 			notify_and_destroy(net, skb, n, classid, old, new);
851 		}
852 
853 		if (dev->flags & IFF_UP)
854 			dev_activate(dev);
855 	} else {
856 		const struct Qdisc_class_ops *cops = parent->ops->cl_ops;
857 
858 		err = -EOPNOTSUPP;
859 		if (cops && cops->graft) {
860 			unsigned long cl = cops->get(parent, classid);
861 			if (cl) {
862 				err = cops->graft(parent, cl, new, &old);
863 				cops->put(parent, cl);
864 			} else
865 				err = -ENOENT;
866 		}
867 		if (!err)
868 			notify_and_destroy(net, skb, n, classid, old, new);
869 	}
870 	return err;
871 }
872 
873 /* lockdep annotation is needed for ingress; egress gets it only for name */
874 static struct lock_class_key qdisc_tx_lock;
875 static struct lock_class_key qdisc_rx_lock;
876 
877 /*
878    Allocate and initialize new qdisc.
879 
880    Parameters are passed via opt.
881  */
882 
883 static struct Qdisc *qdisc_create(struct net_device *dev,
884 				  struct netdev_queue *dev_queue,
885 				  struct Qdisc *p, u32 parent, u32 handle,
886 				  struct nlattr **tca, int *errp)
887 {
888 	int err;
889 	struct nlattr *kind = tca[TCA_KIND];
890 	struct Qdisc *sch;
891 	struct Qdisc_ops *ops;
892 	struct qdisc_size_table *stab;
893 
894 	ops = qdisc_lookup_ops(kind);
895 #ifdef CONFIG_MODULES
896 	if (ops == NULL && kind != NULL) {
897 		char name[IFNAMSIZ];
898 		if (nla_strlcpy(name, kind, IFNAMSIZ) < IFNAMSIZ) {
899 			/* We dropped the RTNL semaphore in order to
900 			 * perform the module load.  So, even if we
901 			 * succeeded in loading the module we have to
902 			 * tell the caller to replay the request.  We
903 			 * indicate this using -EAGAIN.
904 			 * We replay the request because the device may
905 			 * go away in the mean time.
906 			 */
907 			rtnl_unlock();
908 			request_module("sch_%s", name);
909 			rtnl_lock();
910 			ops = qdisc_lookup_ops(kind);
911 			if (ops != NULL) {
912 				/* We will try again qdisc_lookup_ops,
913 				 * so don't keep a reference.
914 				 */
915 				module_put(ops->owner);
916 				err = -EAGAIN;
917 				goto err_out;
918 			}
919 		}
920 	}
921 #endif
922 
923 	err = -ENOENT;
924 	if (ops == NULL)
925 		goto err_out;
926 
927 	sch = qdisc_alloc(dev_queue, ops);
928 	if (IS_ERR(sch)) {
929 		err = PTR_ERR(sch);
930 		goto err_out2;
931 	}
932 
933 	sch->parent = parent;
934 
935 	if (handle == TC_H_INGRESS) {
936 		sch->flags |= TCQ_F_INGRESS;
937 		handle = TC_H_MAKE(TC_H_INGRESS, 0);
938 		lockdep_set_class(qdisc_lock(sch), &qdisc_rx_lock);
939 	} else {
940 		if (handle == 0) {
941 			handle = qdisc_alloc_handle(dev);
942 			err = -ENOMEM;
943 			if (handle == 0)
944 				goto err_out3;
945 		}
946 		lockdep_set_class(qdisc_lock(sch), &qdisc_tx_lock);
947 		if (!netif_is_multiqueue(dev))
948 			sch->flags |= TCQ_F_ONETXQUEUE;
949 	}
950 
951 	sch->handle = handle;
952 
953 	/* This exist to keep backward compatible with a userspace
954 	 * loophole, what allowed userspace to get IFF_NO_QUEUE
955 	 * facility on older kernels by setting tx_queue_len=0 (prior
956 	 * to qdisc init), and then forgot to reinit tx_queue_len
957 	 * before again attaching a qdisc.
958 	 */
959 	if ((dev->priv_flags & IFF_NO_QUEUE) && (dev->tx_queue_len == 0)) {
960 		dev->tx_queue_len = DEFAULT_TX_QUEUE_LEN;
961 		netdev_info(dev, "Caught tx_queue_len zero misconfig\n");
962 	}
963 
964 	if (!ops->init || (err = ops->init(sch, tca[TCA_OPTIONS])) == 0) {
965 		if (qdisc_is_percpu_stats(sch)) {
966 			sch->cpu_bstats =
967 				netdev_alloc_pcpu_stats(struct gnet_stats_basic_cpu);
968 			if (!sch->cpu_bstats)
969 				goto err_out4;
970 
971 			sch->cpu_qstats = alloc_percpu(struct gnet_stats_queue);
972 			if (!sch->cpu_qstats)
973 				goto err_out4;
974 		}
975 
976 		if (tca[TCA_STAB]) {
977 			stab = qdisc_get_stab(tca[TCA_STAB]);
978 			if (IS_ERR(stab)) {
979 				err = PTR_ERR(stab);
980 				goto err_out4;
981 			}
982 			rcu_assign_pointer(sch->stab, stab);
983 		}
984 		if (tca[TCA_RATE]) {
985 			seqcount_t *running;
986 
987 			err = -EOPNOTSUPP;
988 			if (sch->flags & TCQ_F_MQROOT)
989 				goto err_out4;
990 
991 			if ((sch->parent != TC_H_ROOT) &&
992 			    !(sch->flags & TCQ_F_INGRESS) &&
993 			    (!p || !(p->flags & TCQ_F_MQROOT)))
994 				running = qdisc_root_sleeping_running(sch);
995 			else
996 				running = &sch->running;
997 
998 			err = gen_new_estimator(&sch->bstats,
999 						sch->cpu_bstats,
1000 						&sch->rate_est,
1001 						NULL,
1002 						running,
1003 						tca[TCA_RATE]);
1004 			if (err)
1005 				goto err_out4;
1006 		}
1007 
1008 		qdisc_hash_add(sch, false);
1009 
1010 		return sch;
1011 	}
1012 	/* ops->init() failed, we call ->destroy() like qdisc_create_dflt() */
1013 	ops->destroy(sch);
1014 err_out3:
1015 	dev_put(dev);
1016 	kfree((char *) sch - sch->padded);
1017 err_out2:
1018 	module_put(ops->owner);
1019 err_out:
1020 	*errp = err;
1021 	return NULL;
1022 
1023 err_out4:
1024 	free_percpu(sch->cpu_bstats);
1025 	free_percpu(sch->cpu_qstats);
1026 	/*
1027 	 * Any broken qdiscs that would require a ops->reset() here?
1028 	 * The qdisc was never in action so it shouldn't be necessary.
1029 	 */
1030 	qdisc_put_stab(rtnl_dereference(sch->stab));
1031 	if (ops->destroy)
1032 		ops->destroy(sch);
1033 	goto err_out3;
1034 }
1035 
1036 static int qdisc_change(struct Qdisc *sch, struct nlattr **tca)
1037 {
1038 	struct qdisc_size_table *ostab, *stab = NULL;
1039 	int err = 0;
1040 
1041 	if (tca[TCA_OPTIONS]) {
1042 		if (sch->ops->change == NULL)
1043 			return -EINVAL;
1044 		err = sch->ops->change(sch, tca[TCA_OPTIONS]);
1045 		if (err)
1046 			return err;
1047 	}
1048 
1049 	if (tca[TCA_STAB]) {
1050 		stab = qdisc_get_stab(tca[TCA_STAB]);
1051 		if (IS_ERR(stab))
1052 			return PTR_ERR(stab);
1053 	}
1054 
1055 	ostab = rtnl_dereference(sch->stab);
1056 	rcu_assign_pointer(sch->stab, stab);
1057 	qdisc_put_stab(ostab);
1058 
1059 	if (tca[TCA_RATE]) {
1060 		/* NB: ignores errors from replace_estimator
1061 		   because change can't be undone. */
1062 		if (sch->flags & TCQ_F_MQROOT)
1063 			goto out;
1064 		gen_replace_estimator(&sch->bstats,
1065 				      sch->cpu_bstats,
1066 				      &sch->rate_est,
1067 				      NULL,
1068 				      qdisc_root_sleeping_running(sch),
1069 				      tca[TCA_RATE]);
1070 	}
1071 out:
1072 	return 0;
1073 }
1074 
1075 struct check_loop_arg {
1076 	struct qdisc_walker	w;
1077 	struct Qdisc		*p;
1078 	int			depth;
1079 };
1080 
1081 static int check_loop_fn(struct Qdisc *q, unsigned long cl,
1082 			 struct qdisc_walker *w);
1083 
1084 static int check_loop(struct Qdisc *q, struct Qdisc *p, int depth)
1085 {
1086 	struct check_loop_arg	arg;
1087 
1088 	if (q->ops->cl_ops == NULL)
1089 		return 0;
1090 
1091 	arg.w.stop = arg.w.skip = arg.w.count = 0;
1092 	arg.w.fn = check_loop_fn;
1093 	arg.depth = depth;
1094 	arg.p = p;
1095 	q->ops->cl_ops->walk(q, &arg.w);
1096 	return arg.w.stop ? -ELOOP : 0;
1097 }
1098 
1099 static int
1100 check_loop_fn(struct Qdisc *q, unsigned long cl, struct qdisc_walker *w)
1101 {
1102 	struct Qdisc *leaf;
1103 	const struct Qdisc_class_ops *cops = q->ops->cl_ops;
1104 	struct check_loop_arg *arg = (struct check_loop_arg *)w;
1105 
1106 	leaf = cops->leaf(q, cl);
1107 	if (leaf) {
1108 		if (leaf == arg->p || arg->depth > 7)
1109 			return -ELOOP;
1110 		return check_loop(leaf, arg->p, arg->depth + 1);
1111 	}
1112 	return 0;
1113 }
1114 
1115 /*
1116  * Delete/get qdisc.
1117  */
1118 
1119 static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n)
1120 {
1121 	struct net *net = sock_net(skb->sk);
1122 	struct tcmsg *tcm = nlmsg_data(n);
1123 	struct nlattr *tca[TCA_MAX + 1];
1124 	struct net_device *dev;
1125 	u32 clid;
1126 	struct Qdisc *q = NULL;
1127 	struct Qdisc *p = NULL;
1128 	int err;
1129 
1130 	if ((n->nlmsg_type != RTM_GETQDISC) &&
1131 	    !netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
1132 		return -EPERM;
1133 
1134 	err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL);
1135 	if (err < 0)
1136 		return err;
1137 
1138 	dev = __dev_get_by_index(net, tcm->tcm_ifindex);
1139 	if (!dev)
1140 		return -ENODEV;
1141 
1142 	clid = tcm->tcm_parent;
1143 	if (clid) {
1144 		if (clid != TC_H_ROOT) {
1145 			if (TC_H_MAJ(clid) != TC_H_MAJ(TC_H_INGRESS)) {
1146 				p = qdisc_lookup(dev, TC_H_MAJ(clid));
1147 				if (!p)
1148 					return -ENOENT;
1149 				q = qdisc_leaf(p, clid);
1150 			} else if (dev_ingress_queue(dev)) {
1151 				q = dev_ingress_queue(dev)->qdisc_sleeping;
1152 			}
1153 		} else {
1154 			q = dev->qdisc;
1155 		}
1156 		if (!q)
1157 			return -ENOENT;
1158 
1159 		if (tcm->tcm_handle && q->handle != tcm->tcm_handle)
1160 			return -EINVAL;
1161 	} else {
1162 		q = qdisc_lookup(dev, tcm->tcm_handle);
1163 		if (!q)
1164 			return -ENOENT;
1165 	}
1166 
1167 	if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id))
1168 		return -EINVAL;
1169 
1170 	if (n->nlmsg_type == RTM_DELQDISC) {
1171 		if (!clid)
1172 			return -EINVAL;
1173 		if (q->handle == 0)
1174 			return -ENOENT;
1175 		err = qdisc_graft(dev, p, skb, n, clid, NULL, q);
1176 		if (err != 0)
1177 			return err;
1178 	} else {
1179 		qdisc_notify(net, skb, n, clid, NULL, q);
1180 	}
1181 	return 0;
1182 }
1183 
1184 /*
1185  * Create/change qdisc.
1186  */
1187 
1188 static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n)
1189 {
1190 	struct net *net = sock_net(skb->sk);
1191 	struct tcmsg *tcm;
1192 	struct nlattr *tca[TCA_MAX + 1];
1193 	struct net_device *dev;
1194 	u32 clid;
1195 	struct Qdisc *q, *p;
1196 	int err;
1197 
1198 	if (!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
1199 		return -EPERM;
1200 
1201 replay:
1202 	/* Reinit, just in case something touches this. */
1203 	err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL);
1204 	if (err < 0)
1205 		return err;
1206 
1207 	tcm = nlmsg_data(n);
1208 	clid = tcm->tcm_parent;
1209 	q = p = NULL;
1210 
1211 	dev = __dev_get_by_index(net, tcm->tcm_ifindex);
1212 	if (!dev)
1213 		return -ENODEV;
1214 
1215 
1216 	if (clid) {
1217 		if (clid != TC_H_ROOT) {
1218 			if (clid != TC_H_INGRESS) {
1219 				p = qdisc_lookup(dev, TC_H_MAJ(clid));
1220 				if (!p)
1221 					return -ENOENT;
1222 				q = qdisc_leaf(p, clid);
1223 			} else if (dev_ingress_queue_create(dev)) {
1224 				q = dev_ingress_queue(dev)->qdisc_sleeping;
1225 			}
1226 		} else {
1227 			q = dev->qdisc;
1228 		}
1229 
1230 		/* It may be default qdisc, ignore it */
1231 		if (q && q->handle == 0)
1232 			q = NULL;
1233 
1234 		if (!q || !tcm->tcm_handle || q->handle != tcm->tcm_handle) {
1235 			if (tcm->tcm_handle) {
1236 				if (q && !(n->nlmsg_flags & NLM_F_REPLACE))
1237 					return -EEXIST;
1238 				if (TC_H_MIN(tcm->tcm_handle))
1239 					return -EINVAL;
1240 				q = qdisc_lookup(dev, tcm->tcm_handle);
1241 				if (!q)
1242 					goto create_n_graft;
1243 				if (n->nlmsg_flags & NLM_F_EXCL)
1244 					return -EEXIST;
1245 				if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id))
1246 					return -EINVAL;
1247 				if (q == p ||
1248 				    (p && check_loop(q, p, 0)))
1249 					return -ELOOP;
1250 				atomic_inc(&q->refcnt);
1251 				goto graft;
1252 			} else {
1253 				if (!q)
1254 					goto create_n_graft;
1255 
1256 				/* This magic test requires explanation.
1257 				 *
1258 				 *   We know, that some child q is already
1259 				 *   attached to this parent and have choice:
1260 				 *   either to change it or to create/graft new one.
1261 				 *
1262 				 *   1. We are allowed to create/graft only
1263 				 *   if CREATE and REPLACE flags are set.
1264 				 *
1265 				 *   2. If EXCL is set, requestor wanted to say,
1266 				 *   that qdisc tcm_handle is not expected
1267 				 *   to exist, so that we choose create/graft too.
1268 				 *
1269 				 *   3. The last case is when no flags are set.
1270 				 *   Alas, it is sort of hole in API, we
1271 				 *   cannot decide what to do unambiguously.
1272 				 *   For now we select create/graft, if
1273 				 *   user gave KIND, which does not match existing.
1274 				 */
1275 				if ((n->nlmsg_flags & NLM_F_CREATE) &&
1276 				    (n->nlmsg_flags & NLM_F_REPLACE) &&
1277 				    ((n->nlmsg_flags & NLM_F_EXCL) ||
1278 				     (tca[TCA_KIND] &&
1279 				      nla_strcmp(tca[TCA_KIND], q->ops->id))))
1280 					goto create_n_graft;
1281 			}
1282 		}
1283 	} else {
1284 		if (!tcm->tcm_handle)
1285 			return -EINVAL;
1286 		q = qdisc_lookup(dev, tcm->tcm_handle);
1287 	}
1288 
1289 	/* Change qdisc parameters */
1290 	if (q == NULL)
1291 		return -ENOENT;
1292 	if (n->nlmsg_flags & NLM_F_EXCL)
1293 		return -EEXIST;
1294 	if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id))
1295 		return -EINVAL;
1296 	err = qdisc_change(q, tca);
1297 	if (err == 0)
1298 		qdisc_notify(net, skb, n, clid, NULL, q);
1299 	return err;
1300 
1301 create_n_graft:
1302 	if (!(n->nlmsg_flags & NLM_F_CREATE))
1303 		return -ENOENT;
1304 	if (clid == TC_H_INGRESS) {
1305 		if (dev_ingress_queue(dev))
1306 			q = qdisc_create(dev, dev_ingress_queue(dev), p,
1307 					 tcm->tcm_parent, tcm->tcm_parent,
1308 					 tca, &err);
1309 		else
1310 			err = -ENOENT;
1311 	} else {
1312 		struct netdev_queue *dev_queue;
1313 
1314 		if (p && p->ops->cl_ops && p->ops->cl_ops->select_queue)
1315 			dev_queue = p->ops->cl_ops->select_queue(p, tcm);
1316 		else if (p)
1317 			dev_queue = p->dev_queue;
1318 		else
1319 			dev_queue = netdev_get_tx_queue(dev, 0);
1320 
1321 		q = qdisc_create(dev, dev_queue, p,
1322 				 tcm->tcm_parent, tcm->tcm_handle,
1323 				 tca, &err);
1324 	}
1325 	if (q == NULL) {
1326 		if (err == -EAGAIN)
1327 			goto replay;
1328 		return err;
1329 	}
1330 
1331 graft:
1332 	err = qdisc_graft(dev, p, skb, n, clid, q, NULL);
1333 	if (err) {
1334 		if (q)
1335 			qdisc_destroy(q);
1336 		return err;
1337 	}
1338 
1339 	return 0;
1340 }
1341 
1342 static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid,
1343 			 u32 portid, u32 seq, u16 flags, int event)
1344 {
1345 	struct gnet_stats_basic_cpu __percpu *cpu_bstats = NULL;
1346 	struct gnet_stats_queue __percpu *cpu_qstats = NULL;
1347 	struct tcmsg *tcm;
1348 	struct nlmsghdr  *nlh;
1349 	unsigned char *b = skb_tail_pointer(skb);
1350 	struct gnet_dump d;
1351 	struct qdisc_size_table *stab;
1352 	__u32 qlen;
1353 
1354 	cond_resched();
1355 	nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags);
1356 	if (!nlh)
1357 		goto out_nlmsg_trim;
1358 	tcm = nlmsg_data(nlh);
1359 	tcm->tcm_family = AF_UNSPEC;
1360 	tcm->tcm__pad1 = 0;
1361 	tcm->tcm__pad2 = 0;
1362 	tcm->tcm_ifindex = qdisc_dev(q)->ifindex;
1363 	tcm->tcm_parent = clid;
1364 	tcm->tcm_handle = q->handle;
1365 	tcm->tcm_info = atomic_read(&q->refcnt);
1366 	if (nla_put_string(skb, TCA_KIND, q->ops->id))
1367 		goto nla_put_failure;
1368 	if (q->ops->dump && q->ops->dump(q, skb) < 0)
1369 		goto nla_put_failure;
1370 	qlen = q->q.qlen;
1371 
1372 	stab = rtnl_dereference(q->stab);
1373 	if (stab && qdisc_dump_stab(skb, stab) < 0)
1374 		goto nla_put_failure;
1375 
1376 	if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, TCA_XSTATS,
1377 					 NULL, &d, TCA_PAD) < 0)
1378 		goto nla_put_failure;
1379 
1380 	if (q->ops->dump_stats && q->ops->dump_stats(q, &d) < 0)
1381 		goto nla_put_failure;
1382 
1383 	if (qdisc_is_percpu_stats(q)) {
1384 		cpu_bstats = q->cpu_bstats;
1385 		cpu_qstats = q->cpu_qstats;
1386 	}
1387 
1388 	if (gnet_stats_copy_basic(qdisc_root_sleeping_running(q),
1389 				  &d, cpu_bstats, &q->bstats) < 0 ||
1390 	    gnet_stats_copy_rate_est(&d, &q->rate_est) < 0 ||
1391 	    gnet_stats_copy_queue(&d, cpu_qstats, &q->qstats, qlen) < 0)
1392 		goto nla_put_failure;
1393 
1394 	if (gnet_stats_finish_copy(&d) < 0)
1395 		goto nla_put_failure;
1396 
1397 	nlh->nlmsg_len = skb_tail_pointer(skb) - b;
1398 	return skb->len;
1399 
1400 out_nlmsg_trim:
1401 nla_put_failure:
1402 	nlmsg_trim(skb, b);
1403 	return -1;
1404 }
1405 
1406 static bool tc_qdisc_dump_ignore(struct Qdisc *q, bool dump_invisible)
1407 {
1408 	if (q->flags & TCQ_F_BUILTIN)
1409 		return true;
1410 	if ((q->flags & TCQ_F_INVISIBLE) && !dump_invisible)
1411 		return true;
1412 
1413 	return false;
1414 }
1415 
1416 static int qdisc_notify(struct net *net, struct sk_buff *oskb,
1417 			struct nlmsghdr *n, u32 clid,
1418 			struct Qdisc *old, struct Qdisc *new)
1419 {
1420 	struct sk_buff *skb;
1421 	u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
1422 
1423 	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1424 	if (!skb)
1425 		return -ENOBUFS;
1426 
1427 	if (old && !tc_qdisc_dump_ignore(old, false)) {
1428 		if (tc_fill_qdisc(skb, old, clid, portid, n->nlmsg_seq,
1429 				  0, RTM_DELQDISC) < 0)
1430 			goto err_out;
1431 	}
1432 	if (new && !tc_qdisc_dump_ignore(new, false)) {
1433 		if (tc_fill_qdisc(skb, new, clid, portid, n->nlmsg_seq,
1434 				  old ? NLM_F_REPLACE : 0, RTM_NEWQDISC) < 0)
1435 			goto err_out;
1436 	}
1437 
1438 	if (skb->len)
1439 		return rtnetlink_send(skb, net, portid, RTNLGRP_TC,
1440 				      n->nlmsg_flags & NLM_F_ECHO);
1441 
1442 err_out:
1443 	kfree_skb(skb);
1444 	return -EINVAL;
1445 }
1446 
1447 static int tc_dump_qdisc_root(struct Qdisc *root, struct sk_buff *skb,
1448 			      struct netlink_callback *cb,
1449 			      int *q_idx_p, int s_q_idx, bool recur,
1450 			      bool dump_invisible)
1451 {
1452 	int ret = 0, q_idx = *q_idx_p;
1453 	struct Qdisc *q;
1454 	int b;
1455 
1456 	if (!root)
1457 		return 0;
1458 
1459 	q = root;
1460 	if (q_idx < s_q_idx) {
1461 		q_idx++;
1462 	} else {
1463 		if (!tc_qdisc_dump_ignore(q, dump_invisible) &&
1464 		    tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).portid,
1465 				  cb->nlh->nlmsg_seq, NLM_F_MULTI,
1466 				  RTM_NEWQDISC) <= 0)
1467 			goto done;
1468 		q_idx++;
1469 	}
1470 
1471 	/* If dumping singletons, there is no qdisc_dev(root) and the singleton
1472 	 * itself has already been dumped.
1473 	 *
1474 	 * If we've already dumped the top-level (ingress) qdisc above and the global
1475 	 * qdisc hashtable, we don't want to hit it again
1476 	 */
1477 	if (!qdisc_dev(root) || !recur)
1478 		goto out;
1479 
1480 	hash_for_each(qdisc_dev(root)->qdisc_hash, b, q, hash) {
1481 		if (q_idx < s_q_idx) {
1482 			q_idx++;
1483 			continue;
1484 		}
1485 		if (!tc_qdisc_dump_ignore(q, dump_invisible) &&
1486 		    tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).portid,
1487 				  cb->nlh->nlmsg_seq, NLM_F_MULTI,
1488 				  RTM_NEWQDISC) <= 0)
1489 			goto done;
1490 		q_idx++;
1491 	}
1492 
1493 out:
1494 	*q_idx_p = q_idx;
1495 	return ret;
1496 done:
1497 	ret = -1;
1498 	goto out;
1499 }
1500 
1501 static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb)
1502 {
1503 	struct net *net = sock_net(skb->sk);
1504 	int idx, q_idx;
1505 	int s_idx, s_q_idx;
1506 	struct net_device *dev;
1507 	const struct nlmsghdr *nlh = cb->nlh;
1508 	struct tcmsg *tcm = nlmsg_data(nlh);
1509 	struct nlattr *tca[TCA_MAX + 1];
1510 	int err;
1511 
1512 	s_idx = cb->args[0];
1513 	s_q_idx = q_idx = cb->args[1];
1514 
1515 	idx = 0;
1516 	ASSERT_RTNL();
1517 
1518 	err = nlmsg_parse(nlh, sizeof(*tcm), tca, TCA_MAX, NULL);
1519 	if (err < 0)
1520 		return err;
1521 
1522 	for_each_netdev(net, dev) {
1523 		struct netdev_queue *dev_queue;
1524 
1525 		if (idx < s_idx)
1526 			goto cont;
1527 		if (idx > s_idx)
1528 			s_q_idx = 0;
1529 		q_idx = 0;
1530 
1531 		if (tc_dump_qdisc_root(dev->qdisc, skb, cb, &q_idx, s_q_idx,
1532 				       true, tca[TCA_DUMP_INVISIBLE]) < 0)
1533 			goto done;
1534 
1535 		dev_queue = dev_ingress_queue(dev);
1536 		if (dev_queue &&
1537 		    tc_dump_qdisc_root(dev_queue->qdisc_sleeping, skb, cb,
1538 				       &q_idx, s_q_idx, false,
1539 				       tca[TCA_DUMP_INVISIBLE]) < 0)
1540 			goto done;
1541 
1542 cont:
1543 		idx++;
1544 	}
1545 
1546 done:
1547 	cb->args[0] = idx;
1548 	cb->args[1] = q_idx;
1549 
1550 	return skb->len;
1551 }
1552 
1553 
1554 
1555 /************************************************
1556  *	Traffic classes manipulation.		*
1557  ************************************************/
1558 
1559 
1560 
1561 static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n)
1562 {
1563 	struct net *net = sock_net(skb->sk);
1564 	struct tcmsg *tcm = nlmsg_data(n);
1565 	struct nlattr *tca[TCA_MAX + 1];
1566 	struct net_device *dev;
1567 	struct Qdisc *q = NULL;
1568 	const struct Qdisc_class_ops *cops;
1569 	unsigned long cl = 0;
1570 	unsigned long new_cl;
1571 	u32 portid;
1572 	u32 clid;
1573 	u32 qid;
1574 	int err;
1575 
1576 	if ((n->nlmsg_type != RTM_GETTCLASS) &&
1577 	    !netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
1578 		return -EPERM;
1579 
1580 	err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL);
1581 	if (err < 0)
1582 		return err;
1583 
1584 	dev = __dev_get_by_index(net, tcm->tcm_ifindex);
1585 	if (!dev)
1586 		return -ENODEV;
1587 
1588 	/*
1589 	   parent == TC_H_UNSPEC - unspecified parent.
1590 	   parent == TC_H_ROOT   - class is root, which has no parent.
1591 	   parent == X:0	 - parent is root class.
1592 	   parent == X:Y	 - parent is a node in hierarchy.
1593 	   parent == 0:Y	 - parent is X:Y, where X:0 is qdisc.
1594 
1595 	   handle == 0:0	 - generate handle from kernel pool.
1596 	   handle == 0:Y	 - class is X:Y, where X:0 is qdisc.
1597 	   handle == X:Y	 - clear.
1598 	   handle == X:0	 - root class.
1599 	 */
1600 
1601 	/* Step 1. Determine qdisc handle X:0 */
1602 
1603 	portid = tcm->tcm_parent;
1604 	clid = tcm->tcm_handle;
1605 	qid = TC_H_MAJ(clid);
1606 
1607 	if (portid != TC_H_ROOT) {
1608 		u32 qid1 = TC_H_MAJ(portid);
1609 
1610 		if (qid && qid1) {
1611 			/* If both majors are known, they must be identical. */
1612 			if (qid != qid1)
1613 				return -EINVAL;
1614 		} else if (qid1) {
1615 			qid = qid1;
1616 		} else if (qid == 0)
1617 			qid = dev->qdisc->handle;
1618 
1619 		/* Now qid is genuine qdisc handle consistent
1620 		 * both with parent and child.
1621 		 *
1622 		 * TC_H_MAJ(portid) still may be unspecified, complete it now.
1623 		 */
1624 		if (portid)
1625 			portid = TC_H_MAKE(qid, portid);
1626 	} else {
1627 		if (qid == 0)
1628 			qid = dev->qdisc->handle;
1629 	}
1630 
1631 	/* OK. Locate qdisc */
1632 	q = qdisc_lookup(dev, qid);
1633 	if (!q)
1634 		return -ENOENT;
1635 
1636 	/* An check that it supports classes */
1637 	cops = q->ops->cl_ops;
1638 	if (cops == NULL)
1639 		return -EINVAL;
1640 
1641 	/* Now try to get class */
1642 	if (clid == 0) {
1643 		if (portid == TC_H_ROOT)
1644 			clid = qid;
1645 	} else
1646 		clid = TC_H_MAKE(qid, clid);
1647 
1648 	if (clid)
1649 		cl = cops->get(q, clid);
1650 
1651 	if (cl == 0) {
1652 		err = -ENOENT;
1653 		if (n->nlmsg_type != RTM_NEWTCLASS ||
1654 		    !(n->nlmsg_flags & NLM_F_CREATE))
1655 			goto out;
1656 	} else {
1657 		switch (n->nlmsg_type) {
1658 		case RTM_NEWTCLASS:
1659 			err = -EEXIST;
1660 			if (n->nlmsg_flags & NLM_F_EXCL)
1661 				goto out;
1662 			break;
1663 		case RTM_DELTCLASS:
1664 			err = -EOPNOTSUPP;
1665 			if (cops->delete)
1666 				err = cops->delete(q, cl);
1667 			if (err == 0)
1668 				tclass_notify(net, skb, n, q, cl,
1669 					      RTM_DELTCLASS);
1670 			goto out;
1671 		case RTM_GETTCLASS:
1672 			err = tclass_notify(net, skb, n, q, cl, RTM_NEWTCLASS);
1673 			goto out;
1674 		default:
1675 			err = -EINVAL;
1676 			goto out;
1677 		}
1678 	}
1679 
1680 	new_cl = cl;
1681 	err = -EOPNOTSUPP;
1682 	if (cops->change)
1683 		err = cops->change(q, clid, portid, tca, &new_cl);
1684 	if (err == 0)
1685 		tclass_notify(net, skb, n, q, new_cl, RTM_NEWTCLASS);
1686 
1687 out:
1688 	if (cl)
1689 		cops->put(q, cl);
1690 
1691 	return err;
1692 }
1693 
1694 
1695 static int tc_fill_tclass(struct sk_buff *skb, struct Qdisc *q,
1696 			  unsigned long cl,
1697 			  u32 portid, u32 seq, u16 flags, int event)
1698 {
1699 	struct tcmsg *tcm;
1700 	struct nlmsghdr  *nlh;
1701 	unsigned char *b = skb_tail_pointer(skb);
1702 	struct gnet_dump d;
1703 	const struct Qdisc_class_ops *cl_ops = q->ops->cl_ops;
1704 
1705 	cond_resched();
1706 	nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags);
1707 	if (!nlh)
1708 		goto out_nlmsg_trim;
1709 	tcm = nlmsg_data(nlh);
1710 	tcm->tcm_family = AF_UNSPEC;
1711 	tcm->tcm__pad1 = 0;
1712 	tcm->tcm__pad2 = 0;
1713 	tcm->tcm_ifindex = qdisc_dev(q)->ifindex;
1714 	tcm->tcm_parent = q->handle;
1715 	tcm->tcm_handle = q->handle;
1716 	tcm->tcm_info = 0;
1717 	if (nla_put_string(skb, TCA_KIND, q->ops->id))
1718 		goto nla_put_failure;
1719 	if (cl_ops->dump && cl_ops->dump(q, cl, skb, tcm) < 0)
1720 		goto nla_put_failure;
1721 
1722 	if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, TCA_XSTATS,
1723 					 NULL, &d, TCA_PAD) < 0)
1724 		goto nla_put_failure;
1725 
1726 	if (cl_ops->dump_stats && cl_ops->dump_stats(q, cl, &d) < 0)
1727 		goto nla_put_failure;
1728 
1729 	if (gnet_stats_finish_copy(&d) < 0)
1730 		goto nla_put_failure;
1731 
1732 	nlh->nlmsg_len = skb_tail_pointer(skb) - b;
1733 	return skb->len;
1734 
1735 out_nlmsg_trim:
1736 nla_put_failure:
1737 	nlmsg_trim(skb, b);
1738 	return -1;
1739 }
1740 
1741 static int tclass_notify(struct net *net, struct sk_buff *oskb,
1742 			 struct nlmsghdr *n, struct Qdisc *q,
1743 			 unsigned long cl, int event)
1744 {
1745 	struct sk_buff *skb;
1746 	u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
1747 
1748 	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1749 	if (!skb)
1750 		return -ENOBUFS;
1751 
1752 	if (tc_fill_tclass(skb, q, cl, portid, n->nlmsg_seq, 0, event) < 0) {
1753 		kfree_skb(skb);
1754 		return -EINVAL;
1755 	}
1756 
1757 	return rtnetlink_send(skb, net, portid, RTNLGRP_TC,
1758 			      n->nlmsg_flags & NLM_F_ECHO);
1759 }
1760 
1761 struct qdisc_dump_args {
1762 	struct qdisc_walker	w;
1763 	struct sk_buff		*skb;
1764 	struct netlink_callback	*cb;
1765 };
1766 
1767 static int qdisc_class_dump(struct Qdisc *q, unsigned long cl,
1768 			    struct qdisc_walker *arg)
1769 {
1770 	struct qdisc_dump_args *a = (struct qdisc_dump_args *)arg;
1771 
1772 	return tc_fill_tclass(a->skb, q, cl, NETLINK_CB(a->cb->skb).portid,
1773 			      a->cb->nlh->nlmsg_seq, NLM_F_MULTI,
1774 			      RTM_NEWTCLASS);
1775 }
1776 
1777 static int tc_dump_tclass_qdisc(struct Qdisc *q, struct sk_buff *skb,
1778 				struct tcmsg *tcm, struct netlink_callback *cb,
1779 				int *t_p, int s_t)
1780 {
1781 	struct qdisc_dump_args arg;
1782 
1783 	if (tc_qdisc_dump_ignore(q, false) ||
1784 	    *t_p < s_t || !q->ops->cl_ops ||
1785 	    (tcm->tcm_parent &&
1786 	     TC_H_MAJ(tcm->tcm_parent) != q->handle)) {
1787 		(*t_p)++;
1788 		return 0;
1789 	}
1790 	if (*t_p > s_t)
1791 		memset(&cb->args[1], 0, sizeof(cb->args)-sizeof(cb->args[0]));
1792 	arg.w.fn = qdisc_class_dump;
1793 	arg.skb = skb;
1794 	arg.cb = cb;
1795 	arg.w.stop  = 0;
1796 	arg.w.skip = cb->args[1];
1797 	arg.w.count = 0;
1798 	q->ops->cl_ops->walk(q, &arg.w);
1799 	cb->args[1] = arg.w.count;
1800 	if (arg.w.stop)
1801 		return -1;
1802 	(*t_p)++;
1803 	return 0;
1804 }
1805 
1806 static int tc_dump_tclass_root(struct Qdisc *root, struct sk_buff *skb,
1807 			       struct tcmsg *tcm, struct netlink_callback *cb,
1808 			       int *t_p, int s_t)
1809 {
1810 	struct Qdisc *q;
1811 	int b;
1812 
1813 	if (!root)
1814 		return 0;
1815 
1816 	if (tc_dump_tclass_qdisc(root, skb, tcm, cb, t_p, s_t) < 0)
1817 		return -1;
1818 
1819 	if (!qdisc_dev(root))
1820 		return 0;
1821 
1822 	hash_for_each(qdisc_dev(root)->qdisc_hash, b, q, hash) {
1823 		if (tc_dump_tclass_qdisc(q, skb, tcm, cb, t_p, s_t) < 0)
1824 			return -1;
1825 	}
1826 
1827 	return 0;
1828 }
1829 
1830 static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb)
1831 {
1832 	struct tcmsg *tcm = nlmsg_data(cb->nlh);
1833 	struct net *net = sock_net(skb->sk);
1834 	struct netdev_queue *dev_queue;
1835 	struct net_device *dev;
1836 	int t, s_t;
1837 
1838 	if (nlmsg_len(cb->nlh) < sizeof(*tcm))
1839 		return 0;
1840 	dev = dev_get_by_index(net, tcm->tcm_ifindex);
1841 	if (!dev)
1842 		return 0;
1843 
1844 	s_t = cb->args[0];
1845 	t = 0;
1846 
1847 	if (tc_dump_tclass_root(dev->qdisc, skb, tcm, cb, &t, s_t) < 0)
1848 		goto done;
1849 
1850 	dev_queue = dev_ingress_queue(dev);
1851 	if (dev_queue &&
1852 	    tc_dump_tclass_root(dev_queue->qdisc_sleeping, skb, tcm, cb,
1853 				&t, s_t) < 0)
1854 		goto done;
1855 
1856 done:
1857 	cb->args[0] = t;
1858 
1859 	dev_put(dev);
1860 	return skb->len;
1861 }
1862 
1863 /* Main classifier routine: scans classifier chain attached
1864  * to this qdisc, (optionally) tests for protocol and asks
1865  * specific classifiers.
1866  */
1867 int tc_classify(struct sk_buff *skb, const struct tcf_proto *tp,
1868 		struct tcf_result *res, bool compat_mode)
1869 {
1870 	__be16 protocol = tc_skb_protocol(skb);
1871 #ifdef CONFIG_NET_CLS_ACT
1872 	const int max_reclassify_loop = 4;
1873 	const struct tcf_proto *old_tp = tp;
1874 	int limit = 0;
1875 
1876 reclassify:
1877 #endif
1878 	for (; tp; tp = rcu_dereference_bh(tp->next)) {
1879 		int err;
1880 
1881 		if (tp->protocol != protocol &&
1882 		    tp->protocol != htons(ETH_P_ALL))
1883 			continue;
1884 
1885 		err = tp->classify(skb, tp, res);
1886 #ifdef CONFIG_NET_CLS_ACT
1887 		if (unlikely(err == TC_ACT_RECLASSIFY && !compat_mode))
1888 			goto reset;
1889 #endif
1890 		if (err >= 0)
1891 			return err;
1892 	}
1893 
1894 	return TC_ACT_UNSPEC; /* signal: continue lookup */
1895 #ifdef CONFIG_NET_CLS_ACT
1896 reset:
1897 	if (unlikely(limit++ >= max_reclassify_loop)) {
1898 		net_notice_ratelimited("%s: reclassify loop, rule prio %u, protocol %02x\n",
1899 				       tp->q->ops->id, tp->prio & 0xffff,
1900 				       ntohs(tp->protocol));
1901 		return TC_ACT_SHOT;
1902 	}
1903 
1904 	tp = old_tp;
1905 	protocol = tc_skb_protocol(skb);
1906 	goto reclassify;
1907 #endif
1908 }
1909 EXPORT_SYMBOL(tc_classify);
1910 
1911 #ifdef CONFIG_PROC_FS
1912 static int psched_show(struct seq_file *seq, void *v)
1913 {
1914 	seq_printf(seq, "%08x %08x %08x %08x\n",
1915 		   (u32)NSEC_PER_USEC, (u32)PSCHED_TICKS2NS(1),
1916 		   1000000,
1917 		   (u32)NSEC_PER_SEC / hrtimer_resolution);
1918 
1919 	return 0;
1920 }
1921 
1922 static int psched_open(struct inode *inode, struct file *file)
1923 {
1924 	return single_open(file, psched_show, NULL);
1925 }
1926 
1927 static const struct file_operations psched_fops = {
1928 	.owner = THIS_MODULE,
1929 	.open = psched_open,
1930 	.read  = seq_read,
1931 	.llseek = seq_lseek,
1932 	.release = single_release,
1933 };
1934 
1935 static int __net_init psched_net_init(struct net *net)
1936 {
1937 	struct proc_dir_entry *e;
1938 
1939 	e = proc_create("psched", 0, net->proc_net, &psched_fops);
1940 	if (e == NULL)
1941 		return -ENOMEM;
1942 
1943 	return 0;
1944 }
1945 
1946 static void __net_exit psched_net_exit(struct net *net)
1947 {
1948 	remove_proc_entry("psched", net->proc_net);
1949 }
1950 #else
1951 static int __net_init psched_net_init(struct net *net)
1952 {
1953 	return 0;
1954 }
1955 
1956 static void __net_exit psched_net_exit(struct net *net)
1957 {
1958 }
1959 #endif
1960 
1961 static struct pernet_operations psched_net_ops = {
1962 	.init = psched_net_init,
1963 	.exit = psched_net_exit,
1964 };
1965 
1966 static int __init pktsched_init(void)
1967 {
1968 	int err;
1969 
1970 	err = register_pernet_subsys(&psched_net_ops);
1971 	if (err) {
1972 		pr_err("pktsched_init: "
1973 		       "cannot initialize per netns operations\n");
1974 		return err;
1975 	}
1976 
1977 	register_qdisc(&pfifo_fast_ops);
1978 	register_qdisc(&pfifo_qdisc_ops);
1979 	register_qdisc(&bfifo_qdisc_ops);
1980 	register_qdisc(&pfifo_head_drop_qdisc_ops);
1981 	register_qdisc(&mq_qdisc_ops);
1982 	register_qdisc(&noqueue_qdisc_ops);
1983 
1984 	rtnl_register(PF_UNSPEC, RTM_NEWQDISC, tc_modify_qdisc, NULL, NULL);
1985 	rtnl_register(PF_UNSPEC, RTM_DELQDISC, tc_get_qdisc, NULL, NULL);
1986 	rtnl_register(PF_UNSPEC, RTM_GETQDISC, tc_get_qdisc, tc_dump_qdisc,
1987 		      NULL);
1988 	rtnl_register(PF_UNSPEC, RTM_NEWTCLASS, tc_ctl_tclass, NULL, NULL);
1989 	rtnl_register(PF_UNSPEC, RTM_DELTCLASS, tc_ctl_tclass, NULL, NULL);
1990 	rtnl_register(PF_UNSPEC, RTM_GETTCLASS, tc_ctl_tclass, tc_dump_tclass,
1991 		      NULL);
1992 
1993 	return 0;
1994 }
1995 
1996 subsys_initcall(pktsched_init);
1997