xref: /linux/net/sched/sch_api.c (revision 17bbde2e1716e2ee4b997d476b48ae85c5a47671)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * net/sched/sch_api.c	Packet scheduler API.
4  *
5  * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
6  *
7  * Fixes:
8  *
9  * Rani Assaf <rani@magic.metawire.com> :980802: JIFFIES and CPU clock sources are repaired.
10  * Eduardo J. Blanco <ejbs@netlabs.com.uy> :990222: kmod support
11  * Jamal Hadi Salim <hadi@nortelnetworks.com>: 990601: ingress support
12  */
13 
14 #include <linux/module.h>
15 #include <linux/types.h>
16 #include <linux/kernel.h>
17 #include <linux/string.h>
18 #include <linux/errno.h>
19 #include <linux/skbuff.h>
20 #include <linux/init.h>
21 #include <linux/proc_fs.h>
22 #include <linux/seq_file.h>
23 #include <linux/kmod.h>
24 #include <linux/list.h>
25 #include <linux/hrtimer.h>
26 #include <linux/slab.h>
27 #include <linux/hashtable.h>
28 #include <linux/bpf.h>
29 
30 #include <net/netdev_lock.h>
31 #include <net/net_namespace.h>
32 #include <net/sock.h>
33 #include <net/netlink.h>
34 #include <net/pkt_sched.h>
35 #include <net/pkt_cls.h>
36 #include <net/tc_wrapper.h>
37 
38 #include <trace/events/qdisc.h>
39 
40 /*
41 
42    Short review.
43    -------------
44 
45    This file consists of two interrelated parts:
46 
47    1. queueing disciplines manager frontend.
48    2. traffic classes manager frontend.
49 
50    Generally, queueing discipline ("qdisc") is a black box,
51    which is able to enqueue packets and to dequeue them (when
52    device is ready to send something) in order and at times
53    determined by algorithm hidden in it.
54 
55    qdisc's are divided to two categories:
56    - "queues", which have no internal structure visible from outside.
57    - "schedulers", which split all the packets to "traffic classes",
58      using "packet classifiers" (look at cls_api.c)
59 
60    In turn, classes may have child qdiscs (as rule, queues)
61    attached to them etc. etc. etc.
62 
63    The goal of the routines in this file is to translate
64    information supplied by user in the form of handles
65    to more intelligible for kernel form, to make some sanity
66    checks and part of work, which is common to all qdiscs
67    and to provide rtnetlink notifications.
68 
69    All real intelligent work is done inside qdisc modules.
70 
71 
72 
73    Every discipline has two major routines: enqueue and dequeue.
74 
75    ---dequeue
76 
77    dequeue usually returns a skb to send. It is allowed to return NULL,
78    but it does not mean that queue is empty, it just means that
79    discipline does not want to send anything this time.
80    Queue is really empty if q->q.qlen == 0.
81    For complicated disciplines with multiple queues q->q is not
82    real packet queue, but however q->q.qlen must be valid.
83 
84    ---enqueue
85 
86    enqueue returns 0, if packet was enqueued successfully.
87    If packet (this one or another one) was dropped, it returns
88    not zero error code.
89    NET_XMIT_DROP 	- this packet dropped
90      Expected action: do not backoff, but wait until queue will clear.
91    NET_XMIT_CN	 	- probably this packet enqueued, but another one dropped.
92      Expected action: backoff or ignore
93 
94    Auxiliary routines:
95 
96    ---peek
97 
98    like dequeue but without removing a packet from the queue
99 
100    ---reset
101 
102    returns qdisc to initial state: purge all buffers, clear all
103    timers, counters (except for statistics) etc.
104 
105    ---init
106 
107    initializes newly created qdisc.
108 
109    ---destroy
110 
111    destroys resources allocated by init and during lifetime of qdisc.
112 
113    ---change
114 
115    changes qdisc parameters.
116  */
117 
118 /* Protects list of registered TC modules. It is pure SMP lock. */
119 static DEFINE_RWLOCK(qdisc_mod_lock);
120 
121 
122 /************************************************
123  *	Queueing disciplines manipulation.	*
124  ************************************************/
125 
126 
127 /* The list of all installed queueing disciplines. */
128 
129 static struct Qdisc_ops *qdisc_base;
130 
131 /* Register/unregister queueing discipline */
132 
register_qdisc(struct Qdisc_ops * qops)133 int register_qdisc(struct Qdisc_ops *qops)
134 {
135 	struct Qdisc_ops *q, **qp;
136 	int rc = -EEXIST;
137 
138 	write_lock(&qdisc_mod_lock);
139 	for (qp = &qdisc_base; (q = *qp) != NULL; qp = &q->next)
140 		if (!strcmp(qops->id, q->id))
141 			goto out;
142 
143 	if (qops->enqueue == NULL)
144 		qops->enqueue = noop_qdisc_ops.enqueue;
145 	if (qops->peek == NULL) {
146 		if (qops->dequeue == NULL)
147 			qops->peek = noop_qdisc_ops.peek;
148 		else
149 			goto out_einval;
150 	}
151 	if (qops->dequeue == NULL)
152 		qops->dequeue = noop_qdisc_ops.dequeue;
153 
154 	if (qops->cl_ops) {
155 		const struct Qdisc_class_ops *cops = qops->cl_ops;
156 
157 		if (!(cops->find && cops->walk && cops->leaf))
158 			goto out_einval;
159 
160 		if (cops->tcf_block && !(cops->bind_tcf && cops->unbind_tcf))
161 			goto out_einval;
162 	}
163 
164 	qops->next = NULL;
165 	*qp = qops;
166 	rc = 0;
167 out:
168 	write_unlock(&qdisc_mod_lock);
169 	return rc;
170 
171 out_einval:
172 	rc = -EINVAL;
173 	goto out;
174 }
175 EXPORT_SYMBOL(register_qdisc);
176 
unregister_qdisc(struct Qdisc_ops * qops)177 void unregister_qdisc(struct Qdisc_ops *qops)
178 {
179 	struct Qdisc_ops *q, **qp;
180 	int err = -ENOENT;
181 
182 	write_lock(&qdisc_mod_lock);
183 	for (qp = &qdisc_base; (q = *qp) != NULL; qp = &q->next)
184 		if (q == qops)
185 			break;
186 	if (q) {
187 		*qp = q->next;
188 		q->next = NULL;
189 		err = 0;
190 	}
191 	write_unlock(&qdisc_mod_lock);
192 
193 	WARN(err, "unregister qdisc(%s) failed\n", qops->id);
194 }
195 EXPORT_SYMBOL(unregister_qdisc);
196 
197 /* Get default qdisc if not otherwise specified */
qdisc_get_default(char * name,size_t len)198 void qdisc_get_default(char *name, size_t len)
199 {
200 	read_lock(&qdisc_mod_lock);
201 	strscpy(name, default_qdisc_ops->id, len);
202 	read_unlock(&qdisc_mod_lock);
203 }
204 
qdisc_lookup_default(const char * name)205 static struct Qdisc_ops *qdisc_lookup_default(const char *name)
206 {
207 	struct Qdisc_ops *q = NULL;
208 
209 	for (q = qdisc_base; q; q = q->next) {
210 		if (!strcmp(name, q->id)) {
211 			if (!bpf_try_module_get(q, q->owner))
212 				q = NULL;
213 			break;
214 		}
215 	}
216 
217 	return q;
218 }
219 
220 /* Set new default qdisc to use */
qdisc_set_default(const char * name)221 int qdisc_set_default(const char *name)
222 {
223 	const struct Qdisc_ops *ops;
224 
225 	if (!capable(CAP_NET_ADMIN))
226 		return -EPERM;
227 
228 	write_lock(&qdisc_mod_lock);
229 	ops = qdisc_lookup_default(name);
230 	if (!ops) {
231 		/* Not found, drop lock and try to load module */
232 		write_unlock(&qdisc_mod_lock);
233 		request_module(NET_SCH_ALIAS_PREFIX "%s", name);
234 		write_lock(&qdisc_mod_lock);
235 
236 		ops = qdisc_lookup_default(name);
237 	}
238 
239 	if (ops) {
240 		/* Set new default */
241 		bpf_module_put(default_qdisc_ops, default_qdisc_ops->owner);
242 		default_qdisc_ops = ops;
243 	}
244 	write_unlock(&qdisc_mod_lock);
245 
246 	return ops ? 0 : -ENOENT;
247 }
248 
249 #ifdef CONFIG_NET_SCH_DEFAULT
250 /* Set default value from kernel config */
sch_default_qdisc(void)251 static int __init sch_default_qdisc(void)
252 {
253 	return qdisc_set_default(CONFIG_DEFAULT_NET_SCH);
254 }
255 late_initcall(sch_default_qdisc);
256 #endif
257 
258 /* We know handle. Find qdisc among all qdisc's attached to device
259  * (root qdisc, all its children, children of children etc.)
260  * Note: caller either uses rtnl or rcu_read_lock()
261  */
262 
qdisc_match_from_root(struct Qdisc * root,u32 handle)263 static struct Qdisc *qdisc_match_from_root(struct Qdisc *root, u32 handle)
264 {
265 	struct Qdisc *q;
266 
267 	if (!qdisc_dev(root))
268 		return (root->handle == handle ? root : NULL);
269 
270 	if (!(root->flags & TCQ_F_BUILTIN) &&
271 	    root->handle == handle)
272 		return root;
273 
274 	hash_for_each_possible_rcu(qdisc_dev(root)->qdisc_hash, q, hash, handle,
275 				   lockdep_rtnl_is_held()) {
276 		if (q->handle == handle)
277 			return q;
278 	}
279 	return NULL;
280 }
281 
qdisc_hash_add(struct Qdisc * q,bool invisible)282 void qdisc_hash_add(struct Qdisc *q, bool invisible)
283 {
284 	if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) {
285 		ASSERT_RTNL();
286 		hash_add_rcu(qdisc_dev(q)->qdisc_hash, &q->hash, q->handle);
287 		if (invisible)
288 			q->flags |= TCQ_F_INVISIBLE;
289 	}
290 }
291 EXPORT_SYMBOL(qdisc_hash_add);
292 
qdisc_hash_del(struct Qdisc * q)293 void qdisc_hash_del(struct Qdisc *q)
294 {
295 	if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) {
296 		ASSERT_RTNL();
297 		hash_del_rcu(&q->hash);
298 	}
299 }
300 EXPORT_SYMBOL(qdisc_hash_del);
301 
qdisc_lookup(struct net_device * dev,u32 handle)302 struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle)
303 {
304 	struct Qdisc *q;
305 
306 	if (!handle)
307 		return NULL;
308 	q = qdisc_match_from_root(rtnl_dereference(dev->qdisc), handle);
309 	if (q)
310 		goto out;
311 
312 	if (dev_ingress_queue(dev))
313 		q = qdisc_match_from_root(
314 			rtnl_dereference(dev_ingress_queue(dev)->qdisc_sleeping),
315 			handle);
316 out:
317 	return q;
318 }
319 
qdisc_lookup_rcu(struct net_device * dev,u32 handle)320 struct Qdisc *qdisc_lookup_rcu(struct net_device *dev, u32 handle)
321 {
322 	struct netdev_queue *nq;
323 	struct Qdisc *q;
324 
325 	if (!handle)
326 		return NULL;
327 	q = qdisc_match_from_root(rcu_dereference(dev->qdisc), handle);
328 	if (q)
329 		goto out;
330 
331 	nq = dev_ingress_queue_rcu(dev);
332 	if (nq)
333 		q = qdisc_match_from_root(rcu_dereference(nq->qdisc_sleeping),
334 					  handle);
335 out:
336 	return q;
337 }
338 
qdisc_leaf(struct Qdisc * p,u32 classid)339 static struct Qdisc *qdisc_leaf(struct Qdisc *p, u32 classid)
340 {
341 	unsigned long cl;
342 	const struct Qdisc_class_ops *cops = p->ops->cl_ops;
343 
344 	if (cops == NULL)
345 		return NULL;
346 	cl = cops->find(p, classid);
347 
348 	if (cl == 0)
349 		return NULL;
350 	return cops->leaf(p, cl);
351 }
352 
353 /* Find queueing discipline by name */
354 
qdisc_lookup_ops(struct nlattr * kind)355 static struct Qdisc_ops *qdisc_lookup_ops(struct nlattr *kind)
356 {
357 	struct Qdisc_ops *q = NULL;
358 
359 	if (kind) {
360 		read_lock(&qdisc_mod_lock);
361 		for (q = qdisc_base; q; q = q->next) {
362 			if (nla_strcmp(kind, q->id) == 0) {
363 				if (!bpf_try_module_get(q, q->owner))
364 					q = NULL;
365 				break;
366 			}
367 		}
368 		read_unlock(&qdisc_mod_lock);
369 	}
370 	return q;
371 }
372 
373 /* The linklayer setting were not transferred from iproute2, in older
374  * versions, and the rate tables lookup systems have been dropped in
375  * the kernel. To keep backward compatible with older iproute2 tc
376  * utils, we detect the linklayer setting by detecting if the rate
377  * table were modified.
378  *
379  * For linklayer ATM table entries, the rate table will be aligned to
380  * 48 bytes, thus some table entries will contain the same value.  The
381  * mpu (min packet unit) is also encoded into the old rate table, thus
382  * starting from the mpu, we find low and high table entries for
383  * mapping this cell.  If these entries contain the same value, when
384  * the rate tables have been modified for linklayer ATM.
385  *
386  * This is done by rounding mpu to the nearest 48 bytes cell/entry,
387  * and then roundup to the next cell, calc the table entry one below,
388  * and compare.
389  */
__detect_linklayer(struct tc_ratespec * r,__u32 * rtab)390 static __u8 __detect_linklayer(struct tc_ratespec *r, __u32 *rtab)
391 {
392 	int low       = roundup(r->mpu, 48);
393 	int high      = roundup(low+1, 48);
394 	int cell_low  = low >> r->cell_log;
395 	int cell_high = (high >> r->cell_log) - 1;
396 
397 	/* rtab is too inaccurate at rates > 100Mbit/s */
398 	if ((r->rate > (100000000/8)) || (rtab[0] == 0)) {
399 		pr_debug("TC linklayer: Giving up ATM detection\n");
400 		return TC_LINKLAYER_ETHERNET;
401 	}
402 
403 	if ((cell_high > cell_low) && (cell_high < 256)
404 	    && (rtab[cell_low] == rtab[cell_high])) {
405 		pr_debug("TC linklayer: Detected ATM, low(%d)=high(%d)=%u\n",
406 			 cell_low, cell_high, rtab[cell_high]);
407 		return TC_LINKLAYER_ATM;
408 	}
409 	return TC_LINKLAYER_ETHERNET;
410 }
411 
412 static struct qdisc_rate_table *qdisc_rtab_list;
413 
qdisc_get_rtab(struct tc_ratespec * r,struct nlattr * tab,struct netlink_ext_ack * extack)414 struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r,
415 					struct nlattr *tab,
416 					struct netlink_ext_ack *extack)
417 {
418 	struct qdisc_rate_table *rtab;
419 
420 	if (tab == NULL || r->rate == 0 ||
421 	    r->cell_log == 0 || r->cell_log >= 32 ||
422 	    nla_len(tab) != TC_RTAB_SIZE) {
423 		NL_SET_ERR_MSG(extack, "Invalid rate table parameters for searching");
424 		return NULL;
425 	}
426 
427 	for (rtab = qdisc_rtab_list; rtab; rtab = rtab->next) {
428 		if (!memcmp(&rtab->rate, r, sizeof(struct tc_ratespec)) &&
429 		    !memcmp(&rtab->data, nla_data(tab), 1024)) {
430 			rtab->refcnt++;
431 			return rtab;
432 		}
433 	}
434 
435 	rtab = kmalloc(sizeof(*rtab), GFP_KERNEL);
436 	if (rtab) {
437 		rtab->rate = *r;
438 		rtab->refcnt = 1;
439 		memcpy(rtab->data, nla_data(tab), 1024);
440 		if (r->linklayer == TC_LINKLAYER_UNAWARE)
441 			r->linklayer = __detect_linklayer(r, rtab->data);
442 		rtab->next = qdisc_rtab_list;
443 		qdisc_rtab_list = rtab;
444 	} else {
445 		NL_SET_ERR_MSG(extack, "Failed to allocate new qdisc rate table");
446 	}
447 	return rtab;
448 }
449 EXPORT_SYMBOL(qdisc_get_rtab);
450 
qdisc_put_rtab(struct qdisc_rate_table * tab)451 void qdisc_put_rtab(struct qdisc_rate_table *tab)
452 {
453 	struct qdisc_rate_table *rtab, **rtabp;
454 
455 	if (!tab || --tab->refcnt)
456 		return;
457 
458 	for (rtabp = &qdisc_rtab_list;
459 	     (rtab = *rtabp) != NULL;
460 	     rtabp = &rtab->next) {
461 		if (rtab == tab) {
462 			*rtabp = rtab->next;
463 			kfree(rtab);
464 			return;
465 		}
466 	}
467 }
468 EXPORT_SYMBOL(qdisc_put_rtab);
469 
470 static LIST_HEAD(qdisc_stab_list);
471 
472 static const struct nla_policy stab_policy[TCA_STAB_MAX + 1] = {
473 	[TCA_STAB_BASE]	= { .len = sizeof(struct tc_sizespec) },
474 	[TCA_STAB_DATA] = { .type = NLA_BINARY },
475 };
476 
qdisc_get_stab(struct nlattr * opt,struct netlink_ext_ack * extack)477 static struct qdisc_size_table *qdisc_get_stab(struct nlattr *opt,
478 					       struct netlink_ext_ack *extack)
479 {
480 	struct nlattr *tb[TCA_STAB_MAX + 1];
481 	struct qdisc_size_table *stab;
482 	struct tc_sizespec *s;
483 	unsigned int tsize = 0;
484 	u16 *tab = NULL;
485 	int err;
486 
487 	err = nla_parse_nested_deprecated(tb, TCA_STAB_MAX, opt, stab_policy,
488 					  extack);
489 	if (err < 0)
490 		return ERR_PTR(err);
491 	if (!tb[TCA_STAB_BASE]) {
492 		NL_SET_ERR_MSG(extack, "Size table base attribute is missing");
493 		return ERR_PTR(-EINVAL);
494 	}
495 
496 	s = nla_data(tb[TCA_STAB_BASE]);
497 
498 	if (s->tsize > 0) {
499 		if (!tb[TCA_STAB_DATA]) {
500 			NL_SET_ERR_MSG(extack, "Size table data attribute is missing");
501 			return ERR_PTR(-EINVAL);
502 		}
503 		tab = nla_data(tb[TCA_STAB_DATA]);
504 		tsize = nla_len(tb[TCA_STAB_DATA]) / sizeof(u16);
505 	}
506 
507 	if (tsize != s->tsize || (!tab && tsize > 0)) {
508 		NL_SET_ERR_MSG(extack, "Invalid size of size table");
509 		return ERR_PTR(-EINVAL);
510 	}
511 
512 	list_for_each_entry(stab, &qdisc_stab_list, list) {
513 		if (memcmp(&stab->szopts, s, sizeof(*s)))
514 			continue;
515 		if (tsize > 0 &&
516 		    memcmp(stab->data, tab, flex_array_size(stab, data, tsize)))
517 			continue;
518 		stab->refcnt++;
519 		return stab;
520 	}
521 
522 	if (s->size_log > STAB_SIZE_LOG_MAX ||
523 	    s->cell_log > STAB_SIZE_LOG_MAX) {
524 		NL_SET_ERR_MSG(extack, "Invalid logarithmic size of size table");
525 		return ERR_PTR(-EINVAL);
526 	}
527 
528 	stab = kmalloc(struct_size(stab, data, tsize), GFP_KERNEL);
529 	if (!stab)
530 		return ERR_PTR(-ENOMEM);
531 
532 	stab->refcnt = 1;
533 	stab->szopts = *s;
534 	if (tsize > 0)
535 		memcpy(stab->data, tab, flex_array_size(stab, data, tsize));
536 
537 	list_add_tail(&stab->list, &qdisc_stab_list);
538 
539 	return stab;
540 }
541 
qdisc_put_stab(struct qdisc_size_table * tab)542 void qdisc_put_stab(struct qdisc_size_table *tab)
543 {
544 	if (!tab)
545 		return;
546 
547 	if (--tab->refcnt == 0) {
548 		list_del(&tab->list);
549 		kfree_rcu(tab, rcu);
550 	}
551 }
552 EXPORT_SYMBOL(qdisc_put_stab);
553 
qdisc_dump_stab(struct sk_buff * skb,struct qdisc_size_table * stab)554 static int qdisc_dump_stab(struct sk_buff *skb, struct qdisc_size_table *stab)
555 {
556 	struct nlattr *nest;
557 
558 	nest = nla_nest_start_noflag(skb, TCA_STAB);
559 	if (nest == NULL)
560 		goto nla_put_failure;
561 	if (nla_put(skb, TCA_STAB_BASE, sizeof(stab->szopts), &stab->szopts))
562 		goto nla_put_failure;
563 	nla_nest_end(skb, nest);
564 
565 	return skb->len;
566 
567 nla_put_failure:
568 	return -1;
569 }
570 
__qdisc_calculate_pkt_len(struct sk_buff * skb,const struct qdisc_size_table * stab)571 void __qdisc_calculate_pkt_len(struct sk_buff *skb,
572 			       const struct qdisc_size_table *stab)
573 {
574 	int pkt_len, slot;
575 
576 	pkt_len = skb->len + stab->szopts.overhead;
577 	if (unlikely(!stab->szopts.tsize))
578 		goto out;
579 
580 	slot = pkt_len + stab->szopts.cell_align;
581 	if (unlikely(slot < 0))
582 		slot = 0;
583 
584 	slot >>= stab->szopts.cell_log;
585 	if (likely(slot < stab->szopts.tsize))
586 		pkt_len = stab->data[slot];
587 	else
588 		pkt_len = stab->data[stab->szopts.tsize - 1] *
589 				(slot / stab->szopts.tsize) +
590 				stab->data[slot % stab->szopts.tsize];
591 
592 	pkt_len <<= stab->szopts.size_log;
593 out:
594 	if (unlikely(pkt_len < 1))
595 		pkt_len = 1;
596 	qdisc_skb_cb(skb)->pkt_len = pkt_len;
597 }
598 
qdisc_warn_nonwc(const char * txt,struct Qdisc * qdisc)599 void qdisc_warn_nonwc(const char *txt, struct Qdisc *qdisc)
600 {
601 	if (!(qdisc->flags & TCQ_F_WARN_NONWC)) {
602 		pr_warn("%s: %s qdisc %X: is non-work-conserving?\n",
603 			txt, qdisc->ops->id, qdisc->handle >> 16);
604 		qdisc->flags |= TCQ_F_WARN_NONWC;
605 	}
606 }
607 EXPORT_SYMBOL(qdisc_warn_nonwc);
608 
qdisc_watchdog(struct hrtimer * timer)609 static enum hrtimer_restart qdisc_watchdog(struct hrtimer *timer)
610 {
611 	struct qdisc_watchdog *wd = container_of(timer, struct qdisc_watchdog,
612 						 timer);
613 
614 	rcu_read_lock();
615 	__netif_schedule(qdisc_root(wd->qdisc));
616 	rcu_read_unlock();
617 
618 	return HRTIMER_NORESTART;
619 }
620 
qdisc_watchdog_init_clockid(struct qdisc_watchdog * wd,struct Qdisc * qdisc,clockid_t clockid)621 void qdisc_watchdog_init_clockid(struct qdisc_watchdog *wd, struct Qdisc *qdisc,
622 				 clockid_t clockid)
623 {
624 	hrtimer_setup(&wd->timer, qdisc_watchdog, clockid, HRTIMER_MODE_ABS_PINNED);
625 	wd->qdisc = qdisc;
626 }
627 EXPORT_SYMBOL(qdisc_watchdog_init_clockid);
628 
qdisc_watchdog_init(struct qdisc_watchdog * wd,struct Qdisc * qdisc)629 void qdisc_watchdog_init(struct qdisc_watchdog *wd, struct Qdisc *qdisc)
630 {
631 	qdisc_watchdog_init_clockid(wd, qdisc, CLOCK_MONOTONIC);
632 }
633 EXPORT_SYMBOL(qdisc_watchdog_init);
634 
qdisc_watchdog_schedule_range_ns(struct qdisc_watchdog * wd,u64 expires,u64 delta_ns)635 void qdisc_watchdog_schedule_range_ns(struct qdisc_watchdog *wd, u64 expires,
636 				      u64 delta_ns)
637 {
638 	bool deactivated;
639 
640 	rcu_read_lock();
641 	deactivated = test_bit(__QDISC_STATE_DEACTIVATED,
642 			       &qdisc_root_sleeping(wd->qdisc)->state);
643 	rcu_read_unlock();
644 	if (deactivated)
645 		return;
646 
647 	if (hrtimer_is_queued(&wd->timer)) {
648 		u64 softexpires;
649 
650 		softexpires = ktime_to_ns(hrtimer_get_softexpires(&wd->timer));
651 		/* If timer is already set in [expires, expires + delta_ns],
652 		 * do not reprogram it.
653 		 */
654 		if (softexpires - expires <= delta_ns)
655 			return;
656 	}
657 
658 	hrtimer_start_range_ns(&wd->timer,
659 			       ns_to_ktime(expires),
660 			       delta_ns,
661 			       HRTIMER_MODE_ABS_PINNED);
662 }
663 EXPORT_SYMBOL(qdisc_watchdog_schedule_range_ns);
664 
qdisc_watchdog_cancel(struct qdisc_watchdog * wd)665 void qdisc_watchdog_cancel(struct qdisc_watchdog *wd)
666 {
667 	hrtimer_cancel(&wd->timer);
668 }
669 EXPORT_SYMBOL(qdisc_watchdog_cancel);
670 
qdisc_class_hash_alloc(unsigned int n)671 static struct hlist_head *qdisc_class_hash_alloc(unsigned int n)
672 {
673 	struct hlist_head *h;
674 	unsigned int i;
675 
676 	h = kvmalloc_array(n, sizeof(struct hlist_head), GFP_KERNEL);
677 
678 	if (h != NULL) {
679 		for (i = 0; i < n; i++)
680 			INIT_HLIST_HEAD(&h[i]);
681 	}
682 	return h;
683 }
684 
qdisc_class_hash_grow(struct Qdisc * sch,struct Qdisc_class_hash * clhash)685 void qdisc_class_hash_grow(struct Qdisc *sch, struct Qdisc_class_hash *clhash)
686 {
687 	struct Qdisc_class_common *cl;
688 	struct hlist_node *next;
689 	struct hlist_head *nhash, *ohash;
690 	unsigned int nsize, nmask, osize;
691 	unsigned int i, h;
692 
693 	/* Rehash when load factor exceeds 0.75 */
694 	if (clhash->hashelems * 4 <= clhash->hashsize * 3)
695 		return;
696 	nsize = clhash->hashsize * 2;
697 	nmask = nsize - 1;
698 	nhash = qdisc_class_hash_alloc(nsize);
699 	if (nhash == NULL)
700 		return;
701 
702 	ohash = clhash->hash;
703 	osize = clhash->hashsize;
704 
705 	sch_tree_lock(sch);
706 	for (i = 0; i < osize; i++) {
707 		hlist_for_each_entry_safe(cl, next, &ohash[i], hnode) {
708 			h = qdisc_class_hash(cl->classid, nmask);
709 			hlist_add_head(&cl->hnode, &nhash[h]);
710 		}
711 	}
712 	clhash->hash     = nhash;
713 	clhash->hashsize = nsize;
714 	clhash->hashmask = nmask;
715 	sch_tree_unlock(sch);
716 
717 	kvfree(ohash);
718 }
719 EXPORT_SYMBOL(qdisc_class_hash_grow);
720 
qdisc_class_hash_init(struct Qdisc_class_hash * clhash)721 int qdisc_class_hash_init(struct Qdisc_class_hash *clhash)
722 {
723 	unsigned int size = 4;
724 
725 	clhash->hash = qdisc_class_hash_alloc(size);
726 	if (!clhash->hash)
727 		return -ENOMEM;
728 	clhash->hashsize  = size;
729 	clhash->hashmask  = size - 1;
730 	clhash->hashelems = 0;
731 	return 0;
732 }
733 EXPORT_SYMBOL(qdisc_class_hash_init);
734 
qdisc_class_hash_destroy(struct Qdisc_class_hash * clhash)735 void qdisc_class_hash_destroy(struct Qdisc_class_hash *clhash)
736 {
737 	kvfree(clhash->hash);
738 }
739 EXPORT_SYMBOL(qdisc_class_hash_destroy);
740 
qdisc_class_hash_insert(struct Qdisc_class_hash * clhash,struct Qdisc_class_common * cl)741 void qdisc_class_hash_insert(struct Qdisc_class_hash *clhash,
742 			     struct Qdisc_class_common *cl)
743 {
744 	unsigned int h;
745 
746 	INIT_HLIST_NODE(&cl->hnode);
747 	h = qdisc_class_hash(cl->classid, clhash->hashmask);
748 	hlist_add_head(&cl->hnode, &clhash->hash[h]);
749 	clhash->hashelems++;
750 }
751 EXPORT_SYMBOL(qdisc_class_hash_insert);
752 
qdisc_class_hash_remove(struct Qdisc_class_hash * clhash,struct Qdisc_class_common * cl)753 void qdisc_class_hash_remove(struct Qdisc_class_hash *clhash,
754 			     struct Qdisc_class_common *cl)
755 {
756 	hlist_del(&cl->hnode);
757 	clhash->hashelems--;
758 }
759 EXPORT_SYMBOL(qdisc_class_hash_remove);
760 
761 /* Allocate an unique handle from space managed by kernel
762  * Possible range is [8000-FFFF]:0000 (0x8000 values)
763  */
qdisc_alloc_handle(struct net_device * dev)764 static u32 qdisc_alloc_handle(struct net_device *dev)
765 {
766 	int i = 0x8000;
767 	static u32 autohandle = TC_H_MAKE(0x80000000U, 0);
768 
769 	do {
770 		autohandle += TC_H_MAKE(0x10000U, 0);
771 		if (autohandle == TC_H_MAKE(TC_H_ROOT, 0))
772 			autohandle = TC_H_MAKE(0x80000000U, 0);
773 		if (!qdisc_lookup(dev, autohandle))
774 			return autohandle;
775 		cond_resched();
776 	} while	(--i > 0);
777 
778 	return 0;
779 }
780 
qdisc_tree_reduce_backlog(struct Qdisc * sch,int n,int len)781 void qdisc_tree_reduce_backlog(struct Qdisc *sch, int n, int len)
782 {
783 	const struct Qdisc_class_ops *cops;
784 	unsigned long cl;
785 	u32 parentid;
786 	bool notify;
787 	int drops;
788 
789 	drops = max_t(int, n, 0);
790 	rcu_read_lock();
791 	while ((parentid = sch->parent)) {
792 		if (parentid == TC_H_ROOT)
793 			break;
794 
795 		if (sch->flags & TCQ_F_NOPARENT)
796 			break;
797 		/* Notify parent qdisc only if child qdisc becomes empty. */
798 		notify = !sch->q.qlen;
799 		/* TODO: perform the search on a per txq basis */
800 		sch = qdisc_lookup_rcu(qdisc_dev(sch), TC_H_MAJ(parentid));
801 		if (sch == NULL) {
802 			WARN_ON_ONCE(parentid != TC_H_ROOT);
803 			break;
804 		}
805 		cops = sch->ops->cl_ops;
806 		if (notify && cops->qlen_notify) {
807 			/* Note that qlen_notify must be idempotent as it may get called
808 			 * multiple times.
809 			 */
810 			cl = cops->find(sch, parentid);
811 			cops->qlen_notify(sch, cl);
812 		}
813 		sch->q.qlen -= n;
814 		sch->qstats.backlog -= len;
815 		__qdisc_qstats_drop(sch, drops);
816 	}
817 	rcu_read_unlock();
818 }
819 EXPORT_SYMBOL(qdisc_tree_reduce_backlog);
820 
qdisc_offload_dump_helper(struct Qdisc * sch,enum tc_setup_type type,void * type_data)821 int qdisc_offload_dump_helper(struct Qdisc *sch, enum tc_setup_type type,
822 			      void *type_data)
823 {
824 	struct net_device *dev = qdisc_dev(sch);
825 	int err;
826 
827 	sch->flags &= ~TCQ_F_OFFLOADED;
828 	if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
829 		return 0;
830 
831 	err = dev->netdev_ops->ndo_setup_tc(dev, type, type_data);
832 	if (err == -EOPNOTSUPP)
833 		return 0;
834 
835 	if (!err)
836 		sch->flags |= TCQ_F_OFFLOADED;
837 
838 	return err;
839 }
840 EXPORT_SYMBOL(qdisc_offload_dump_helper);
841 
qdisc_offload_graft_helper(struct net_device * dev,struct Qdisc * sch,struct Qdisc * new,struct Qdisc * old,enum tc_setup_type type,void * type_data,struct netlink_ext_ack * extack)842 void qdisc_offload_graft_helper(struct net_device *dev, struct Qdisc *sch,
843 				struct Qdisc *new, struct Qdisc *old,
844 				enum tc_setup_type type, void *type_data,
845 				struct netlink_ext_ack *extack)
846 {
847 	bool any_qdisc_is_offloaded;
848 	int err;
849 
850 	if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
851 		return;
852 
853 	err = dev->netdev_ops->ndo_setup_tc(dev, type, type_data);
854 
855 	/* Don't report error if the graft is part of destroy operation. */
856 	if (!err || !new || new == &noop_qdisc)
857 		return;
858 
859 	/* Don't report error if the parent, the old child and the new
860 	 * one are not offloaded.
861 	 */
862 	any_qdisc_is_offloaded = new->flags & TCQ_F_OFFLOADED;
863 	any_qdisc_is_offloaded |= sch && sch->flags & TCQ_F_OFFLOADED;
864 	any_qdisc_is_offloaded |= old && old->flags & TCQ_F_OFFLOADED;
865 
866 	if (any_qdisc_is_offloaded)
867 		NL_SET_ERR_MSG(extack, "Offloading graft operation failed.");
868 }
869 EXPORT_SYMBOL(qdisc_offload_graft_helper);
870 
qdisc_offload_query_caps(struct net_device * dev,enum tc_setup_type type,void * caps,size_t caps_len)871 void qdisc_offload_query_caps(struct net_device *dev,
872 			      enum tc_setup_type type,
873 			      void *caps, size_t caps_len)
874 {
875 	const struct net_device_ops *ops = dev->netdev_ops;
876 	struct tc_query_caps_base base = {
877 		.type = type,
878 		.caps = caps,
879 	};
880 
881 	memset(caps, 0, caps_len);
882 
883 	if (ops->ndo_setup_tc)
884 		ops->ndo_setup_tc(dev, TC_QUERY_CAPS, &base);
885 }
886 EXPORT_SYMBOL(qdisc_offload_query_caps);
887 
qdisc_offload_graft_root(struct net_device * dev,struct Qdisc * new,struct Qdisc * old,struct netlink_ext_ack * extack)888 static void qdisc_offload_graft_root(struct net_device *dev,
889 				     struct Qdisc *new, struct Qdisc *old,
890 				     struct netlink_ext_ack *extack)
891 {
892 	struct tc_root_qopt_offload graft_offload = {
893 		.command	= TC_ROOT_GRAFT,
894 		.handle		= new ? new->handle : 0,
895 		.ingress	= (new && new->flags & TCQ_F_INGRESS) ||
896 				  (old && old->flags & TCQ_F_INGRESS),
897 	};
898 
899 	qdisc_offload_graft_helper(dev, NULL, new, old,
900 				   TC_SETUP_ROOT_QDISC, &graft_offload, extack);
901 }
902 
tc_fill_qdisc(struct sk_buff * skb,struct Qdisc * q,u32 clid,u32 portid,u32 seq,u16 flags,int event,struct netlink_ext_ack * extack)903 static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid,
904 			 u32 portid, u32 seq, u16 flags, int event,
905 			 struct netlink_ext_ack *extack)
906 {
907 	struct gnet_stats_basic_sync __percpu *cpu_bstats = NULL;
908 	struct gnet_stats_queue __percpu *cpu_qstats = NULL;
909 	struct tcmsg *tcm;
910 	struct nlmsghdr  *nlh;
911 	unsigned char *b = skb_tail_pointer(skb);
912 	struct gnet_dump d;
913 	struct qdisc_size_table *stab;
914 	u32 block_index;
915 	__u32 qlen;
916 
917 	cond_resched();
918 	nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags);
919 	if (!nlh)
920 		goto out_nlmsg_trim;
921 	tcm = nlmsg_data(nlh);
922 	tcm->tcm_family = AF_UNSPEC;
923 	tcm->tcm__pad1 = 0;
924 	tcm->tcm__pad2 = 0;
925 	tcm->tcm_ifindex = qdisc_dev(q)->ifindex;
926 	tcm->tcm_parent = clid;
927 	tcm->tcm_handle = q->handle;
928 	tcm->tcm_info = refcount_read(&q->refcnt);
929 	if (nla_put_string(skb, TCA_KIND, q->ops->id))
930 		goto nla_put_failure;
931 	if (q->ops->ingress_block_get) {
932 		block_index = q->ops->ingress_block_get(q);
933 		if (block_index &&
934 		    nla_put_u32(skb, TCA_INGRESS_BLOCK, block_index))
935 			goto nla_put_failure;
936 	}
937 	if (q->ops->egress_block_get) {
938 		block_index = q->ops->egress_block_get(q);
939 		if (block_index &&
940 		    nla_put_u32(skb, TCA_EGRESS_BLOCK, block_index))
941 			goto nla_put_failure;
942 	}
943 	if (q->ops->dump && q->ops->dump(q, skb) < 0)
944 		goto nla_put_failure;
945 	if (nla_put_u8(skb, TCA_HW_OFFLOAD, !!(q->flags & TCQ_F_OFFLOADED)))
946 		goto nla_put_failure;
947 	qlen = qdisc_qlen_sum(q);
948 
949 	stab = rtnl_dereference(q->stab);
950 	if (stab && qdisc_dump_stab(skb, stab) < 0)
951 		goto nla_put_failure;
952 
953 	if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, TCA_XSTATS,
954 					 NULL, &d, TCA_PAD) < 0)
955 		goto nla_put_failure;
956 
957 	if (q->ops->dump_stats && q->ops->dump_stats(q, &d) < 0)
958 		goto nla_put_failure;
959 
960 	if (qdisc_is_percpu_stats(q)) {
961 		cpu_bstats = q->cpu_bstats;
962 		cpu_qstats = q->cpu_qstats;
963 	}
964 
965 	if (gnet_stats_copy_basic(&d, cpu_bstats, &q->bstats, true) < 0 ||
966 	    gnet_stats_copy_rate_est(&d, &q->rate_est) < 0 ||
967 	    gnet_stats_copy_queue(&d, cpu_qstats, &q->qstats, qlen) < 0)
968 		goto nla_put_failure;
969 
970 	if (gnet_stats_finish_copy(&d) < 0)
971 		goto nla_put_failure;
972 
973 	if (extack && extack->_msg &&
974 	    nla_put_string(skb, TCA_EXT_WARN_MSG, extack->_msg))
975 		goto out_nlmsg_trim;
976 
977 	nlh->nlmsg_len = skb_tail_pointer(skb) - b;
978 
979 	return skb->len;
980 
981 out_nlmsg_trim:
982 nla_put_failure:
983 	nlmsg_trim(skb, b);
984 	return -1;
985 }
986 
tc_qdisc_dump_ignore(struct Qdisc * q,bool dump_invisible)987 static bool tc_qdisc_dump_ignore(struct Qdisc *q, bool dump_invisible)
988 {
989 	if (q->flags & TCQ_F_BUILTIN)
990 		return true;
991 	if ((q->flags & TCQ_F_INVISIBLE) && !dump_invisible)
992 		return true;
993 
994 	return false;
995 }
996 
qdisc_get_notify(struct net * net,struct sk_buff * oskb,struct nlmsghdr * n,u32 clid,struct Qdisc * q,struct netlink_ext_ack * extack)997 static int qdisc_get_notify(struct net *net, struct sk_buff *oskb,
998 			    struct nlmsghdr *n, u32 clid, struct Qdisc *q,
999 			    struct netlink_ext_ack *extack)
1000 {
1001 	struct sk_buff *skb;
1002 	u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
1003 
1004 	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1005 	if (!skb)
1006 		return -ENOBUFS;
1007 
1008 	if (!tc_qdisc_dump_ignore(q, false)) {
1009 		if (tc_fill_qdisc(skb, q, clid, portid, n->nlmsg_seq, 0,
1010 				  RTM_NEWQDISC, extack) < 0)
1011 			goto err_out;
1012 	}
1013 
1014 	if (skb->len)
1015 		return rtnetlink_send(skb, net, portid, RTNLGRP_TC,
1016 				      n->nlmsg_flags & NLM_F_ECHO);
1017 
1018 err_out:
1019 	kfree_skb(skb);
1020 	return -EINVAL;
1021 }
1022 
qdisc_notify(struct net * net,struct sk_buff * oskb,struct nlmsghdr * n,u32 clid,struct Qdisc * old,struct Qdisc * new,struct netlink_ext_ack * extack)1023 static int qdisc_notify(struct net *net, struct sk_buff *oskb,
1024 			struct nlmsghdr *n, u32 clid,
1025 			struct Qdisc *old, struct Qdisc *new,
1026 			struct netlink_ext_ack *extack)
1027 {
1028 	struct sk_buff *skb;
1029 	u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
1030 
1031 	if (!rtnl_notify_needed(net, n->nlmsg_flags, RTNLGRP_TC))
1032 		return 0;
1033 
1034 	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1035 	if (!skb)
1036 		return -ENOBUFS;
1037 
1038 	if (old && !tc_qdisc_dump_ignore(old, false)) {
1039 		if (tc_fill_qdisc(skb, old, clid, portid, n->nlmsg_seq,
1040 				  0, RTM_DELQDISC, extack) < 0)
1041 			goto err_out;
1042 	}
1043 	if (new && !tc_qdisc_dump_ignore(new, false)) {
1044 		if (tc_fill_qdisc(skb, new, clid, portid, n->nlmsg_seq,
1045 				  old ? NLM_F_REPLACE : 0, RTM_NEWQDISC, extack) < 0)
1046 			goto err_out;
1047 	}
1048 
1049 	if (skb->len)
1050 		return rtnetlink_send(skb, net, portid, RTNLGRP_TC,
1051 				      n->nlmsg_flags & NLM_F_ECHO);
1052 
1053 err_out:
1054 	kfree_skb(skb);
1055 	return -EINVAL;
1056 }
1057 
notify_and_destroy(struct net * net,struct sk_buff * skb,struct nlmsghdr * n,u32 clid,struct Qdisc * old,struct Qdisc * new,struct netlink_ext_ack * extack)1058 static void notify_and_destroy(struct net *net, struct sk_buff *skb,
1059 			       struct nlmsghdr *n, u32 clid,
1060 			       struct Qdisc *old, struct Qdisc *new,
1061 			       struct netlink_ext_ack *extack)
1062 {
1063 	if (new || old)
1064 		qdisc_notify(net, skb, n, clid, old, new, extack);
1065 
1066 	if (old)
1067 		qdisc_put(old);
1068 }
1069 
qdisc_clear_nolock(struct Qdisc * sch)1070 static void qdisc_clear_nolock(struct Qdisc *sch)
1071 {
1072 	sch->flags &= ~TCQ_F_NOLOCK;
1073 	if (!(sch->flags & TCQ_F_CPUSTATS))
1074 		return;
1075 
1076 	free_percpu(sch->cpu_bstats);
1077 	free_percpu(sch->cpu_qstats);
1078 	sch->cpu_bstats = NULL;
1079 	sch->cpu_qstats = NULL;
1080 	sch->flags &= ~TCQ_F_CPUSTATS;
1081 }
1082 
1083 /* Graft qdisc "new" to class "classid" of qdisc "parent" or
1084  * to device "dev".
1085  *
1086  * When appropriate send a netlink notification using 'skb'
1087  * and "n".
1088  *
1089  * On success, destroy old qdisc.
1090  */
1091 
qdisc_graft(struct net_device * dev,struct Qdisc * parent,struct sk_buff * skb,struct nlmsghdr * n,u32 classid,struct Qdisc * new,struct Qdisc * old,struct netlink_ext_ack * extack)1092 static int qdisc_graft(struct net_device *dev, struct Qdisc *parent,
1093 		       struct sk_buff *skb, struct nlmsghdr *n, u32 classid,
1094 		       struct Qdisc *new, struct Qdisc *old,
1095 		       struct netlink_ext_ack *extack)
1096 {
1097 	struct Qdisc *q = old;
1098 	struct net *net = dev_net(dev);
1099 
1100 	if (parent == NULL) {
1101 		unsigned int i, num_q, ingress;
1102 		struct netdev_queue *dev_queue;
1103 
1104 		ingress = 0;
1105 		num_q = dev->num_tx_queues;
1106 		if ((q && q->flags & TCQ_F_INGRESS) ||
1107 		    (new && new->flags & TCQ_F_INGRESS)) {
1108 			ingress = 1;
1109 			dev_queue = dev_ingress_queue(dev);
1110 			if (!dev_queue) {
1111 				NL_SET_ERR_MSG(extack, "Device does not have an ingress queue");
1112 				return -ENOENT;
1113 			}
1114 
1115 			q = rtnl_dereference(dev_queue->qdisc_sleeping);
1116 
1117 			/* This is the counterpart of that qdisc_refcount_inc_nz() call in
1118 			 * __tcf_qdisc_find() for filter requests.
1119 			 */
1120 			if (!qdisc_refcount_dec_if_one(q)) {
1121 				NL_SET_ERR_MSG(extack,
1122 					       "Current ingress or clsact Qdisc has ongoing filter requests");
1123 				return -EBUSY;
1124 			}
1125 		}
1126 
1127 		if (dev->flags & IFF_UP)
1128 			dev_deactivate(dev);
1129 
1130 		qdisc_offload_graft_root(dev, new, old, extack);
1131 
1132 		if (new && new->ops->attach && !ingress)
1133 			goto skip;
1134 
1135 		if (!ingress) {
1136 			for (i = 0; i < num_q; i++) {
1137 				dev_queue = netdev_get_tx_queue(dev, i);
1138 				old = dev_graft_qdisc(dev_queue, new);
1139 
1140 				if (new && i > 0)
1141 					qdisc_refcount_inc(new);
1142 				qdisc_put(old);
1143 			}
1144 		} else {
1145 			old = dev_graft_qdisc(dev_queue, NULL);
1146 
1147 			/* {ingress,clsact}_destroy() @old before grafting @new to avoid
1148 			 * unprotected concurrent accesses to net_device::miniq_{in,e}gress
1149 			 * pointer(s) in mini_qdisc_pair_swap().
1150 			 */
1151 			qdisc_notify(net, skb, n, classid, old, new, extack);
1152 			qdisc_destroy(old);
1153 
1154 			dev_graft_qdisc(dev_queue, new);
1155 		}
1156 
1157 skip:
1158 		if (!ingress) {
1159 			old = rtnl_dereference(dev->qdisc);
1160 			if (new && !new->ops->attach)
1161 				qdisc_refcount_inc(new);
1162 			rcu_assign_pointer(dev->qdisc, new ? : &noop_qdisc);
1163 
1164 			notify_and_destroy(net, skb, n, classid, old, new, extack);
1165 
1166 			if (new && new->ops->attach)
1167 				new->ops->attach(new);
1168 		}
1169 
1170 		if (dev->flags & IFF_UP)
1171 			dev_activate(dev);
1172 	} else {
1173 		const struct Qdisc_class_ops *cops = parent->ops->cl_ops;
1174 		unsigned long cl;
1175 		int err;
1176 
1177 		/* Only support running class lockless if parent is lockless */
1178 		if (new && (new->flags & TCQ_F_NOLOCK) && !(parent->flags & TCQ_F_NOLOCK))
1179 			qdisc_clear_nolock(new);
1180 
1181 		if (!cops || !cops->graft)
1182 			return -EOPNOTSUPP;
1183 
1184 		cl = cops->find(parent, classid);
1185 		if (!cl) {
1186 			NL_SET_ERR_MSG(extack, "Specified class not found");
1187 			return -ENOENT;
1188 		}
1189 
1190 		if (new && new->ops == &noqueue_qdisc_ops) {
1191 			NL_SET_ERR_MSG(extack, "Cannot assign noqueue to a class");
1192 			return -EINVAL;
1193 		}
1194 
1195 		if (new &&
1196 		    !(parent->flags & TCQ_F_MQROOT) &&
1197 		    rcu_access_pointer(new->stab)) {
1198 			NL_SET_ERR_MSG(extack, "STAB not supported on a non root");
1199 			return -EINVAL;
1200 		}
1201 		err = cops->graft(parent, cl, new, &old, extack);
1202 		if (err)
1203 			return err;
1204 		notify_and_destroy(net, skb, n, classid, old, new, extack);
1205 	}
1206 	return 0;
1207 }
1208 
qdisc_block_indexes_set(struct Qdisc * sch,struct nlattr ** tca,struct netlink_ext_ack * extack)1209 static int qdisc_block_indexes_set(struct Qdisc *sch, struct nlattr **tca,
1210 				   struct netlink_ext_ack *extack)
1211 {
1212 	u32 block_index;
1213 
1214 	if (tca[TCA_INGRESS_BLOCK]) {
1215 		block_index = nla_get_u32(tca[TCA_INGRESS_BLOCK]);
1216 
1217 		if (!block_index) {
1218 			NL_SET_ERR_MSG(extack, "Ingress block index cannot be 0");
1219 			return -EINVAL;
1220 		}
1221 		if (!sch->ops->ingress_block_set) {
1222 			NL_SET_ERR_MSG(extack, "Ingress block sharing is not supported");
1223 			return -EOPNOTSUPP;
1224 		}
1225 		sch->ops->ingress_block_set(sch, block_index);
1226 	}
1227 	if (tca[TCA_EGRESS_BLOCK]) {
1228 		block_index = nla_get_u32(tca[TCA_EGRESS_BLOCK]);
1229 
1230 		if (!block_index) {
1231 			NL_SET_ERR_MSG(extack, "Egress block index cannot be 0");
1232 			return -EINVAL;
1233 		}
1234 		if (!sch->ops->egress_block_set) {
1235 			NL_SET_ERR_MSG(extack, "Egress block sharing is not supported");
1236 			return -EOPNOTSUPP;
1237 		}
1238 		sch->ops->egress_block_set(sch, block_index);
1239 	}
1240 	return 0;
1241 }
1242 
1243 /*
1244    Allocate and initialize new qdisc.
1245 
1246    Parameters are passed via opt.
1247  */
1248 
qdisc_create(struct net_device * dev,struct netdev_queue * dev_queue,u32 parent,u32 handle,struct nlattr ** tca,int * errp,struct netlink_ext_ack * extack)1249 static struct Qdisc *qdisc_create(struct net_device *dev,
1250 				  struct netdev_queue *dev_queue,
1251 				  u32 parent, u32 handle,
1252 				  struct nlattr **tca, int *errp,
1253 				  struct netlink_ext_ack *extack)
1254 {
1255 	int err;
1256 	struct nlattr *kind = tca[TCA_KIND];
1257 	struct Qdisc *sch;
1258 	struct Qdisc_ops *ops;
1259 	struct qdisc_size_table *stab;
1260 
1261 	ops = qdisc_lookup_ops(kind);
1262 	if (!ops) {
1263 		err = -ENOENT;
1264 		NL_SET_ERR_MSG(extack, "Specified qdisc kind is unknown");
1265 		goto err_out;
1266 	}
1267 
1268 	sch = qdisc_alloc(dev_queue, ops, extack);
1269 	if (IS_ERR(sch)) {
1270 		err = PTR_ERR(sch);
1271 		goto err_out2;
1272 	}
1273 
1274 	sch->parent = parent;
1275 
1276 	if (handle == TC_H_INGRESS) {
1277 		if (!(sch->flags & TCQ_F_INGRESS)) {
1278 			NL_SET_ERR_MSG(extack,
1279 				       "Specified parent ID is reserved for ingress and clsact Qdiscs");
1280 			err = -EINVAL;
1281 			goto err_out3;
1282 		}
1283 		handle = TC_H_MAKE(TC_H_INGRESS, 0);
1284 	} else {
1285 		if (handle == 0) {
1286 			handle = qdisc_alloc_handle(dev);
1287 			if (handle == 0) {
1288 				NL_SET_ERR_MSG(extack, "Maximum number of qdisc handles was exceeded");
1289 				err = -ENOSPC;
1290 				goto err_out3;
1291 			}
1292 		}
1293 		if (!netif_is_multiqueue(dev))
1294 			sch->flags |= TCQ_F_ONETXQUEUE;
1295 	}
1296 
1297 	sch->handle = handle;
1298 
1299 	/* This exist to keep backward compatible with a userspace
1300 	 * loophole, what allowed userspace to get IFF_NO_QUEUE
1301 	 * facility on older kernels by setting tx_queue_len=0 (prior
1302 	 * to qdisc init), and then forgot to reinit tx_queue_len
1303 	 * before again attaching a qdisc.
1304 	 */
1305 	if ((dev->priv_flags & IFF_NO_QUEUE) && (dev->tx_queue_len == 0)) {
1306 		WRITE_ONCE(dev->tx_queue_len, DEFAULT_TX_QUEUE_LEN);
1307 		netdev_info(dev, "Caught tx_queue_len zero misconfig\n");
1308 	}
1309 
1310 	err = qdisc_block_indexes_set(sch, tca, extack);
1311 	if (err)
1312 		goto err_out3;
1313 
1314 	if (tca[TCA_STAB]) {
1315 		stab = qdisc_get_stab(tca[TCA_STAB], extack);
1316 		if (IS_ERR(stab)) {
1317 			err = PTR_ERR(stab);
1318 			goto err_out3;
1319 		}
1320 		rcu_assign_pointer(sch->stab, stab);
1321 	}
1322 
1323 	if (ops->init) {
1324 		err = ops->init(sch, tca[TCA_OPTIONS], extack);
1325 		if (err != 0)
1326 			goto err_out4;
1327 	}
1328 
1329 	if (tca[TCA_RATE]) {
1330 		err = -EOPNOTSUPP;
1331 		if (sch->flags & TCQ_F_MQROOT) {
1332 			NL_SET_ERR_MSG(extack, "Cannot attach rate estimator to a multi-queue root qdisc");
1333 			goto err_out4;
1334 		}
1335 
1336 		err = gen_new_estimator(&sch->bstats,
1337 					sch->cpu_bstats,
1338 					&sch->rate_est,
1339 					NULL,
1340 					true,
1341 					tca[TCA_RATE]);
1342 		if (err) {
1343 			NL_SET_ERR_MSG(extack, "Failed to generate new estimator");
1344 			goto err_out4;
1345 		}
1346 	}
1347 
1348 	qdisc_hash_add(sch, false);
1349 	trace_qdisc_create(ops, dev, parent);
1350 
1351 	return sch;
1352 
1353 err_out4:
1354 	/* Even if ops->init() failed, we call ops->destroy()
1355 	 * like qdisc_create_dflt().
1356 	 */
1357 	if (ops->destroy)
1358 		ops->destroy(sch);
1359 	qdisc_put_stab(rtnl_dereference(sch->stab));
1360 err_out3:
1361 	lockdep_unregister_key(&sch->root_lock_key);
1362 	netdev_put(dev, &sch->dev_tracker);
1363 	qdisc_free(sch);
1364 err_out2:
1365 	bpf_module_put(ops, ops->owner);
1366 err_out:
1367 	*errp = err;
1368 	return NULL;
1369 }
1370 
qdisc_change(struct Qdisc * sch,struct nlattr ** tca,struct netlink_ext_ack * extack)1371 static int qdisc_change(struct Qdisc *sch, struct nlattr **tca,
1372 			struct netlink_ext_ack *extack)
1373 {
1374 	struct qdisc_size_table *ostab, *stab = NULL;
1375 	int err = 0;
1376 
1377 	if (tca[TCA_OPTIONS]) {
1378 		if (!sch->ops->change) {
1379 			NL_SET_ERR_MSG(extack, "Change operation not supported by specified qdisc");
1380 			return -EINVAL;
1381 		}
1382 		if (tca[TCA_INGRESS_BLOCK] || tca[TCA_EGRESS_BLOCK]) {
1383 			NL_SET_ERR_MSG(extack, "Change of blocks is not supported");
1384 			return -EOPNOTSUPP;
1385 		}
1386 		err = sch->ops->change(sch, tca[TCA_OPTIONS], extack);
1387 		if (err)
1388 			return err;
1389 	}
1390 
1391 	if (tca[TCA_STAB]) {
1392 		stab = qdisc_get_stab(tca[TCA_STAB], extack);
1393 		if (IS_ERR(stab))
1394 			return PTR_ERR(stab);
1395 	}
1396 
1397 	ostab = rtnl_dereference(sch->stab);
1398 	rcu_assign_pointer(sch->stab, stab);
1399 	qdisc_put_stab(ostab);
1400 
1401 	if (tca[TCA_RATE]) {
1402 		/* NB: ignores errors from replace_estimator
1403 		   because change can't be undone. */
1404 		if (sch->flags & TCQ_F_MQROOT)
1405 			goto out;
1406 		gen_replace_estimator(&sch->bstats,
1407 				      sch->cpu_bstats,
1408 				      &sch->rate_est,
1409 				      NULL,
1410 				      true,
1411 				      tca[TCA_RATE]);
1412 	}
1413 out:
1414 	return 0;
1415 }
1416 
1417 struct check_loop_arg {
1418 	struct qdisc_walker	w;
1419 	struct Qdisc		*p;
1420 	int			depth;
1421 };
1422 
1423 static int check_loop_fn(struct Qdisc *q, unsigned long cl,
1424 			 struct qdisc_walker *w);
1425 
check_loop(struct Qdisc * q,struct Qdisc * p,int depth)1426 static int check_loop(struct Qdisc *q, struct Qdisc *p, int depth)
1427 {
1428 	struct check_loop_arg	arg;
1429 
1430 	if (q->ops->cl_ops == NULL)
1431 		return 0;
1432 
1433 	arg.w.stop = arg.w.skip = arg.w.count = 0;
1434 	arg.w.fn = check_loop_fn;
1435 	arg.depth = depth;
1436 	arg.p = p;
1437 	q->ops->cl_ops->walk(q, &arg.w);
1438 	return arg.w.stop ? -ELOOP : 0;
1439 }
1440 
1441 static int
check_loop_fn(struct Qdisc * q,unsigned long cl,struct qdisc_walker * w)1442 check_loop_fn(struct Qdisc *q, unsigned long cl, struct qdisc_walker *w)
1443 {
1444 	struct Qdisc *leaf;
1445 	const struct Qdisc_class_ops *cops = q->ops->cl_ops;
1446 	struct check_loop_arg *arg = (struct check_loop_arg *)w;
1447 
1448 	leaf = cops->leaf(q, cl);
1449 	if (leaf) {
1450 		if (leaf == arg->p || arg->depth > 7)
1451 			return -ELOOP;
1452 		return check_loop(leaf, arg->p, arg->depth + 1);
1453 	}
1454 	return 0;
1455 }
1456 
1457 const struct nla_policy rtm_tca_policy[TCA_MAX + 1] = {
1458 	[TCA_KIND]		= { .type = NLA_STRING },
1459 	[TCA_RATE]		= { .type = NLA_BINARY,
1460 				    .len = sizeof(struct tc_estimator) },
1461 	[TCA_STAB]		= { .type = NLA_NESTED },
1462 	[TCA_DUMP_INVISIBLE]	= { .type = NLA_FLAG },
1463 	[TCA_CHAIN]		= { .type = NLA_U32 },
1464 	[TCA_INGRESS_BLOCK]	= { .type = NLA_U32 },
1465 	[TCA_EGRESS_BLOCK]	= { .type = NLA_U32 },
1466 };
1467 
1468 /*
1469  * Delete/get qdisc.
1470  */
1471 
__tc_get_qdisc(struct sk_buff * skb,struct nlmsghdr * n,struct netlink_ext_ack * extack,struct net_device * dev,struct nlattr * tca[TCA_MAX+1],struct tcmsg * tcm)1472 static int __tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n,
1473 			  struct netlink_ext_ack *extack,
1474 			  struct net_device *dev,
1475 			  struct nlattr *tca[TCA_MAX + 1],
1476 			  struct tcmsg *tcm)
1477 {
1478 	struct net *net = sock_net(skb->sk);
1479 	struct Qdisc *q = NULL;
1480 	struct Qdisc *p = NULL;
1481 	u32 clid;
1482 	int err;
1483 
1484 	clid = tcm->tcm_parent;
1485 	if (clid) {
1486 		if (clid != TC_H_ROOT) {
1487 			if (TC_H_MAJ(clid) != TC_H_MAJ(TC_H_INGRESS)) {
1488 				p = qdisc_lookup(dev, TC_H_MAJ(clid));
1489 				if (!p) {
1490 					NL_SET_ERR_MSG(extack, "Failed to find qdisc with specified classid");
1491 					return -ENOENT;
1492 				}
1493 				q = qdisc_leaf(p, clid);
1494 			} else if (dev_ingress_queue(dev)) {
1495 				q = rtnl_dereference(dev_ingress_queue(dev)->qdisc_sleeping);
1496 			}
1497 		} else {
1498 			q = rtnl_dereference(dev->qdisc);
1499 		}
1500 		if (!q) {
1501 			NL_SET_ERR_MSG(extack, "Cannot find specified qdisc on specified device");
1502 			return -ENOENT;
1503 		}
1504 
1505 		if (tcm->tcm_handle && q->handle != tcm->tcm_handle) {
1506 			NL_SET_ERR_MSG(extack, "Invalid handle");
1507 			return -EINVAL;
1508 		}
1509 	} else {
1510 		q = qdisc_lookup(dev, tcm->tcm_handle);
1511 		if (!q) {
1512 			NL_SET_ERR_MSG(extack, "Failed to find qdisc with specified handle");
1513 			return -ENOENT;
1514 		}
1515 	}
1516 
1517 	if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id)) {
1518 		NL_SET_ERR_MSG(extack, "Invalid qdisc name: must match existing qdisc");
1519 		return -EINVAL;
1520 	}
1521 
1522 	if (n->nlmsg_type == RTM_DELQDISC) {
1523 		if (!clid) {
1524 			NL_SET_ERR_MSG(extack, "Classid cannot be zero");
1525 			return -EINVAL;
1526 		}
1527 		if (q->handle == 0) {
1528 			NL_SET_ERR_MSG(extack, "Cannot delete qdisc with handle of zero");
1529 			return -ENOENT;
1530 		}
1531 		err = qdisc_graft(dev, p, skb, n, clid, NULL, q, extack);
1532 		if (err != 0)
1533 			return err;
1534 	} else {
1535 		qdisc_get_notify(net, skb, n, clid, q, NULL);
1536 	}
1537 	return 0;
1538 }
1539 
tc_get_qdisc(struct sk_buff * skb,struct nlmsghdr * n,struct netlink_ext_ack * extack)1540 static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n,
1541 			struct netlink_ext_ack *extack)
1542 {
1543 	struct net *net = sock_net(skb->sk);
1544 	struct tcmsg *tcm = nlmsg_data(n);
1545 	struct nlattr *tca[TCA_MAX + 1];
1546 	struct net_device *dev;
1547 	int err;
1548 
1549 	err = nlmsg_parse_deprecated(n, sizeof(*tcm), tca, TCA_MAX,
1550 				     rtm_tca_policy, extack);
1551 	if (err < 0)
1552 		return err;
1553 
1554 	dev = __dev_get_by_index(net, tcm->tcm_ifindex);
1555 	if (!dev)
1556 		return -ENODEV;
1557 
1558 	netdev_lock_ops(dev);
1559 	err = __tc_get_qdisc(skb, n, extack, dev, tca, tcm);
1560 	netdev_unlock_ops(dev);
1561 
1562 	return err;
1563 }
1564 
req_create_or_replace(struct nlmsghdr * n)1565 static bool req_create_or_replace(struct nlmsghdr *n)
1566 {
1567 	return (n->nlmsg_flags & NLM_F_CREATE &&
1568 		n->nlmsg_flags & NLM_F_REPLACE);
1569 }
1570 
req_create_exclusive(struct nlmsghdr * n)1571 static bool req_create_exclusive(struct nlmsghdr *n)
1572 {
1573 	return (n->nlmsg_flags & NLM_F_CREATE &&
1574 		n->nlmsg_flags & NLM_F_EXCL);
1575 }
1576 
req_change(struct nlmsghdr * n)1577 static bool req_change(struct nlmsghdr *n)
1578 {
1579 	return (!(n->nlmsg_flags & NLM_F_CREATE) &&
1580 		!(n->nlmsg_flags & NLM_F_REPLACE) &&
1581 		!(n->nlmsg_flags & NLM_F_EXCL));
1582 }
1583 
__tc_modify_qdisc(struct sk_buff * skb,struct nlmsghdr * n,struct netlink_ext_ack * extack,struct net_device * dev,struct nlattr * tca[TCA_MAX+1],struct tcmsg * tcm)1584 static int __tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n,
1585 			     struct netlink_ext_ack *extack,
1586 			     struct net_device *dev,
1587 			     struct nlattr *tca[TCA_MAX + 1],
1588 			     struct tcmsg *tcm)
1589 {
1590 	struct Qdisc *q = NULL;
1591 	struct Qdisc *p = NULL;
1592 	u32 clid;
1593 	int err;
1594 
1595 	clid = tcm->tcm_parent;
1596 
1597 	if (clid) {
1598 		if (clid != TC_H_ROOT) {
1599 			if (clid != TC_H_INGRESS) {
1600 				p = qdisc_lookup(dev, TC_H_MAJ(clid));
1601 				if (!p) {
1602 					NL_SET_ERR_MSG(extack, "Failed to find specified qdisc");
1603 					return -ENOENT;
1604 				}
1605 				q = qdisc_leaf(p, clid);
1606 			} else if (dev_ingress_queue_create(dev)) {
1607 				q = rtnl_dereference(dev_ingress_queue(dev)->qdisc_sleeping);
1608 			}
1609 		} else {
1610 			q = rtnl_dereference(dev->qdisc);
1611 		}
1612 
1613 		/* It may be default qdisc, ignore it */
1614 		if (q && q->handle == 0)
1615 			q = NULL;
1616 
1617 		if (!q || !tcm->tcm_handle || q->handle != tcm->tcm_handle) {
1618 			if (tcm->tcm_handle) {
1619 				if (q && !(n->nlmsg_flags & NLM_F_REPLACE)) {
1620 					NL_SET_ERR_MSG(extack, "NLM_F_REPLACE needed to override");
1621 					return -EEXIST;
1622 				}
1623 				if (TC_H_MIN(tcm->tcm_handle)) {
1624 					NL_SET_ERR_MSG(extack, "Invalid minor handle");
1625 					return -EINVAL;
1626 				}
1627 				q = qdisc_lookup(dev, tcm->tcm_handle);
1628 				if (!q)
1629 					goto create_n_graft;
1630 				if (q->parent != tcm->tcm_parent) {
1631 					NL_SET_ERR_MSG(extack, "Cannot move an existing qdisc to a different parent");
1632 					return -EINVAL;
1633 				}
1634 				if (n->nlmsg_flags & NLM_F_EXCL) {
1635 					NL_SET_ERR_MSG(extack, "Exclusivity flag on, cannot override");
1636 					return -EEXIST;
1637 				}
1638 				if (tca[TCA_KIND] &&
1639 				    nla_strcmp(tca[TCA_KIND], q->ops->id)) {
1640 					NL_SET_ERR_MSG(extack, "Invalid qdisc name: must match existing qdisc");
1641 					return -EINVAL;
1642 				}
1643 				if (q->flags & TCQ_F_INGRESS) {
1644 					NL_SET_ERR_MSG(extack,
1645 						       "Cannot regraft ingress or clsact Qdiscs");
1646 					return -EINVAL;
1647 				}
1648 				if (q == p ||
1649 				    (p && check_loop(q, p, 0))) {
1650 					NL_SET_ERR_MSG(extack, "Qdisc parent/child loop detected");
1651 					return -ELOOP;
1652 				}
1653 				if (clid == TC_H_INGRESS) {
1654 					NL_SET_ERR_MSG(extack, "Ingress cannot graft directly");
1655 					return -EINVAL;
1656 				}
1657 				qdisc_refcount_inc(q);
1658 				goto graft;
1659 			} else {
1660 				if (!q)
1661 					goto create_n_graft;
1662 
1663 				/* This magic test requires explanation.
1664 				 *
1665 				 *   We know, that some child q is already
1666 				 *   attached to this parent and have choice:
1667 				 *   1) change it or 2) create/graft new one.
1668 				 *   If the requested qdisc kind is different
1669 				 *   than the existing one, then we choose graft.
1670 				 *   If they are the same then this is "change"
1671 				 *   operation - just let it fallthrough..
1672 				 *
1673 				 *   1. We are allowed to create/graft only
1674 				 *   if the request is explicitly stating
1675 				 *   "please create if it doesn't exist".
1676 				 *
1677 				 *   2. If the request is to exclusive create
1678 				 *   then the qdisc tcm_handle is not expected
1679 				 *   to exist, so that we choose create/graft too.
1680 				 *
1681 				 *   3. The last case is when no flags are set.
1682 				 *   This will happen when for example tc
1683 				 *   utility issues a "change" command.
1684 				 *   Alas, it is sort of hole in API, we
1685 				 *   cannot decide what to do unambiguously.
1686 				 *   For now we select create/graft.
1687 				 */
1688 				if (tca[TCA_KIND] &&
1689 				    nla_strcmp(tca[TCA_KIND], q->ops->id)) {
1690 					if (req_create_or_replace(n) ||
1691 					    req_create_exclusive(n))
1692 						goto create_n_graft;
1693 					else if (req_change(n))
1694 						goto create_n_graft2;
1695 				}
1696 			}
1697 		}
1698 	} else {
1699 		if (!tcm->tcm_handle) {
1700 			NL_SET_ERR_MSG(extack, "Handle cannot be zero");
1701 			return -EINVAL;
1702 		}
1703 		q = qdisc_lookup(dev, tcm->tcm_handle);
1704 	}
1705 
1706 	/* Change qdisc parameters */
1707 	if (!q) {
1708 		NL_SET_ERR_MSG(extack, "Specified qdisc not found");
1709 		return -ENOENT;
1710 	}
1711 	if (n->nlmsg_flags & NLM_F_EXCL) {
1712 		NL_SET_ERR_MSG(extack, "Exclusivity flag on, cannot modify");
1713 		return -EEXIST;
1714 	}
1715 	if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id)) {
1716 		NL_SET_ERR_MSG(extack, "Invalid qdisc name: must match existing qdisc");
1717 		return -EINVAL;
1718 	}
1719 	err = qdisc_change(q, tca, extack);
1720 	if (err == 0)
1721 		qdisc_notify(sock_net(skb->sk), skb, n, clid, NULL, q, extack);
1722 	return err;
1723 
1724 create_n_graft:
1725 	if (!(n->nlmsg_flags & NLM_F_CREATE)) {
1726 		NL_SET_ERR_MSG(extack, "Qdisc not found. To create specify NLM_F_CREATE flag");
1727 		return -ENOENT;
1728 	}
1729 create_n_graft2:
1730 	if (clid == TC_H_INGRESS) {
1731 		if (dev_ingress_queue(dev)) {
1732 			q = qdisc_create(dev, dev_ingress_queue(dev),
1733 					 tcm->tcm_parent, tcm->tcm_parent,
1734 					 tca, &err, extack);
1735 		} else {
1736 			NL_SET_ERR_MSG(extack, "Cannot find ingress queue for specified device");
1737 			err = -ENOENT;
1738 		}
1739 	} else {
1740 		struct netdev_queue *dev_queue;
1741 
1742 		if (p && p->ops->cl_ops && p->ops->cl_ops->select_queue)
1743 			dev_queue = p->ops->cl_ops->select_queue(p, tcm);
1744 		else if (p)
1745 			dev_queue = p->dev_queue;
1746 		else
1747 			dev_queue = netdev_get_tx_queue(dev, 0);
1748 
1749 		q = qdisc_create(dev, dev_queue,
1750 				 tcm->tcm_parent, tcm->tcm_handle,
1751 				 tca, &err, extack);
1752 	}
1753 	if (!q)
1754 		return err;
1755 
1756 graft:
1757 	err = qdisc_graft(dev, p, skb, n, clid, q, NULL, extack);
1758 	if (err) {
1759 		if (q)
1760 			qdisc_put(q);
1761 		return err;
1762 	}
1763 
1764 	return 0;
1765 }
1766 
request_qdisc_module(struct nlattr * kind)1767 static void request_qdisc_module(struct nlattr *kind)
1768 {
1769 	struct Qdisc_ops *ops;
1770 	char name[IFNAMSIZ];
1771 
1772 	if (!kind)
1773 		return;
1774 
1775 	ops = qdisc_lookup_ops(kind);
1776 	if (ops) {
1777 		bpf_module_put(ops, ops->owner);
1778 		return;
1779 	}
1780 
1781 	if (nla_strscpy(name, kind, IFNAMSIZ) >= 0) {
1782 		rtnl_unlock();
1783 		request_module(NET_SCH_ALIAS_PREFIX "%s", name);
1784 		rtnl_lock();
1785 	}
1786 }
1787 
1788 /*
1789  * Create/change qdisc.
1790  */
tc_modify_qdisc(struct sk_buff * skb,struct nlmsghdr * n,struct netlink_ext_ack * extack)1791 static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n,
1792 			   struct netlink_ext_ack *extack)
1793 {
1794 	struct net *net = sock_net(skb->sk);
1795 	struct nlattr *tca[TCA_MAX + 1];
1796 	struct net_device *dev;
1797 	struct tcmsg *tcm;
1798 	int err;
1799 
1800 	err = nlmsg_parse_deprecated(n, sizeof(*tcm), tca, TCA_MAX,
1801 				     rtm_tca_policy, extack);
1802 	if (err < 0)
1803 		return err;
1804 
1805 	request_qdisc_module(tca[TCA_KIND]);
1806 
1807 	tcm = nlmsg_data(n);
1808 	dev = __dev_get_by_index(net, tcm->tcm_ifindex);
1809 	if (!dev)
1810 		return -ENODEV;
1811 
1812 	netdev_lock_ops(dev);
1813 	err = __tc_modify_qdisc(skb, n, extack, dev, tca, tcm);
1814 	netdev_unlock_ops(dev);
1815 
1816 	return err;
1817 }
1818 
tc_dump_qdisc_root(struct Qdisc * root,struct sk_buff * skb,struct netlink_callback * cb,int * q_idx_p,int s_q_idx,bool recur,bool dump_invisible)1819 static int tc_dump_qdisc_root(struct Qdisc *root, struct sk_buff *skb,
1820 			      struct netlink_callback *cb,
1821 			      int *q_idx_p, int s_q_idx, bool recur,
1822 			      bool dump_invisible)
1823 {
1824 	int ret = 0, q_idx = *q_idx_p;
1825 	struct Qdisc *q;
1826 	int b;
1827 
1828 	if (!root)
1829 		return 0;
1830 
1831 	q = root;
1832 	if (q_idx < s_q_idx) {
1833 		q_idx++;
1834 	} else {
1835 		if (!tc_qdisc_dump_ignore(q, dump_invisible) &&
1836 		    tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).portid,
1837 				  cb->nlh->nlmsg_seq, NLM_F_MULTI,
1838 				  RTM_NEWQDISC, NULL) <= 0)
1839 			goto done;
1840 		q_idx++;
1841 	}
1842 
1843 	/* If dumping singletons, there is no qdisc_dev(root) and the singleton
1844 	 * itself has already been dumped.
1845 	 *
1846 	 * If we've already dumped the top-level (ingress) qdisc above and the global
1847 	 * qdisc hashtable, we don't want to hit it again
1848 	 */
1849 	if (!qdisc_dev(root) || !recur)
1850 		goto out;
1851 
1852 	hash_for_each(qdisc_dev(root)->qdisc_hash, b, q, hash) {
1853 		if (q_idx < s_q_idx) {
1854 			q_idx++;
1855 			continue;
1856 		}
1857 		if (!tc_qdisc_dump_ignore(q, dump_invisible) &&
1858 		    tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).portid,
1859 				  cb->nlh->nlmsg_seq, NLM_F_MULTI,
1860 				  RTM_NEWQDISC, NULL) <= 0)
1861 			goto done;
1862 		q_idx++;
1863 	}
1864 
1865 out:
1866 	*q_idx_p = q_idx;
1867 	return ret;
1868 done:
1869 	ret = -1;
1870 	goto out;
1871 }
1872 
tc_dump_qdisc(struct sk_buff * skb,struct netlink_callback * cb)1873 static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb)
1874 {
1875 	struct net *net = sock_net(skb->sk);
1876 	int idx, q_idx;
1877 	int s_idx, s_q_idx;
1878 	struct net_device *dev;
1879 	const struct nlmsghdr *nlh = cb->nlh;
1880 	struct nlattr *tca[TCA_MAX + 1];
1881 	int err;
1882 
1883 	s_idx = cb->args[0];
1884 	s_q_idx = q_idx = cb->args[1];
1885 
1886 	idx = 0;
1887 	ASSERT_RTNL();
1888 
1889 	err = nlmsg_parse_deprecated(nlh, sizeof(struct tcmsg), tca, TCA_MAX,
1890 				     rtm_tca_policy, cb->extack);
1891 	if (err < 0)
1892 		return err;
1893 
1894 	for_each_netdev(net, dev) {
1895 		struct netdev_queue *dev_queue;
1896 
1897 		if (idx < s_idx)
1898 			goto cont;
1899 		if (idx > s_idx)
1900 			s_q_idx = 0;
1901 		q_idx = 0;
1902 
1903 		netdev_lock_ops(dev);
1904 		if (tc_dump_qdisc_root(rtnl_dereference(dev->qdisc),
1905 				       skb, cb, &q_idx, s_q_idx,
1906 				       true, tca[TCA_DUMP_INVISIBLE]) < 0) {
1907 			netdev_unlock_ops(dev);
1908 			goto done;
1909 		}
1910 
1911 		dev_queue = dev_ingress_queue(dev);
1912 		if (dev_queue &&
1913 		    tc_dump_qdisc_root(rtnl_dereference(dev_queue->qdisc_sleeping),
1914 				       skb, cb, &q_idx, s_q_idx, false,
1915 				       tca[TCA_DUMP_INVISIBLE]) < 0) {
1916 			netdev_unlock_ops(dev);
1917 			goto done;
1918 		}
1919 		netdev_unlock_ops(dev);
1920 
1921 cont:
1922 		idx++;
1923 	}
1924 
1925 done:
1926 	cb->args[0] = idx;
1927 	cb->args[1] = q_idx;
1928 
1929 	return skb->len;
1930 }
1931 
1932 
1933 
1934 /************************************************
1935  *	Traffic classes manipulation.		*
1936  ************************************************/
1937 
tc_fill_tclass(struct sk_buff * skb,struct Qdisc * q,unsigned long cl,u32 portid,u32 seq,u16 flags,int event,struct netlink_ext_ack * extack)1938 static int tc_fill_tclass(struct sk_buff *skb, struct Qdisc *q,
1939 			  unsigned long cl, u32 portid, u32 seq, u16 flags,
1940 			  int event, struct netlink_ext_ack *extack)
1941 {
1942 	struct tcmsg *tcm;
1943 	struct nlmsghdr  *nlh;
1944 	unsigned char *b = skb_tail_pointer(skb);
1945 	struct gnet_dump d;
1946 	const struct Qdisc_class_ops *cl_ops = q->ops->cl_ops;
1947 
1948 	cond_resched();
1949 	nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags);
1950 	if (!nlh)
1951 		goto out_nlmsg_trim;
1952 	tcm = nlmsg_data(nlh);
1953 	tcm->tcm_family = AF_UNSPEC;
1954 	tcm->tcm__pad1 = 0;
1955 	tcm->tcm__pad2 = 0;
1956 	tcm->tcm_ifindex = qdisc_dev(q)->ifindex;
1957 	tcm->tcm_parent = q->handle;
1958 	tcm->tcm_handle = q->handle;
1959 	tcm->tcm_info = 0;
1960 	if (nla_put_string(skb, TCA_KIND, q->ops->id))
1961 		goto nla_put_failure;
1962 	if (cl_ops->dump && cl_ops->dump(q, cl, skb, tcm) < 0)
1963 		goto nla_put_failure;
1964 
1965 	if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, TCA_XSTATS,
1966 					 NULL, &d, TCA_PAD) < 0)
1967 		goto nla_put_failure;
1968 
1969 	if (cl_ops->dump_stats && cl_ops->dump_stats(q, cl, &d) < 0)
1970 		goto nla_put_failure;
1971 
1972 	if (gnet_stats_finish_copy(&d) < 0)
1973 		goto nla_put_failure;
1974 
1975 	if (extack && extack->_msg &&
1976 	    nla_put_string(skb, TCA_EXT_WARN_MSG, extack->_msg))
1977 		goto out_nlmsg_trim;
1978 
1979 	nlh->nlmsg_len = skb_tail_pointer(skb) - b;
1980 
1981 	return skb->len;
1982 
1983 out_nlmsg_trim:
1984 nla_put_failure:
1985 	nlmsg_trim(skb, b);
1986 	return -1;
1987 }
1988 
tclass_notify(struct net * net,struct sk_buff * oskb,struct nlmsghdr * n,struct Qdisc * q,unsigned long cl,int event,struct netlink_ext_ack * extack)1989 static int tclass_notify(struct net *net, struct sk_buff *oskb,
1990 			 struct nlmsghdr *n, struct Qdisc *q,
1991 			 unsigned long cl, int event, struct netlink_ext_ack *extack)
1992 {
1993 	struct sk_buff *skb;
1994 	u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
1995 
1996 	if (!rtnl_notify_needed(net, n->nlmsg_flags, RTNLGRP_TC))
1997 		return 0;
1998 
1999 	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2000 	if (!skb)
2001 		return -ENOBUFS;
2002 
2003 	if (tc_fill_tclass(skb, q, cl, portid, n->nlmsg_seq, 0, event, extack) < 0) {
2004 		kfree_skb(skb);
2005 		return -EINVAL;
2006 	}
2007 
2008 	return rtnetlink_send(skb, net, portid, RTNLGRP_TC,
2009 			      n->nlmsg_flags & NLM_F_ECHO);
2010 }
2011 
tclass_get_notify(struct net * net,struct sk_buff * oskb,struct nlmsghdr * n,struct Qdisc * q,unsigned long cl,struct netlink_ext_ack * extack)2012 static int tclass_get_notify(struct net *net, struct sk_buff *oskb,
2013 			     struct nlmsghdr *n, struct Qdisc *q,
2014 			     unsigned long cl, struct netlink_ext_ack *extack)
2015 {
2016 	struct sk_buff *skb;
2017 	u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
2018 
2019 	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2020 	if (!skb)
2021 		return -ENOBUFS;
2022 
2023 	if (tc_fill_tclass(skb, q, cl, portid, n->nlmsg_seq, 0, RTM_NEWTCLASS,
2024 			   extack) < 0) {
2025 		kfree_skb(skb);
2026 		return -EINVAL;
2027 	}
2028 
2029 	return rtnetlink_send(skb, net, portid, RTNLGRP_TC,
2030 			      n->nlmsg_flags & NLM_F_ECHO);
2031 }
2032 
tclass_del_notify(struct net * net,const struct Qdisc_class_ops * cops,struct sk_buff * oskb,struct nlmsghdr * n,struct Qdisc * q,unsigned long cl,struct netlink_ext_ack * extack)2033 static int tclass_del_notify(struct net *net,
2034 			     const struct Qdisc_class_ops *cops,
2035 			     struct sk_buff *oskb, struct nlmsghdr *n,
2036 			     struct Qdisc *q, unsigned long cl,
2037 			     struct netlink_ext_ack *extack)
2038 {
2039 	u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
2040 	struct sk_buff *skb;
2041 	int err = 0;
2042 
2043 	if (!cops->delete)
2044 		return -EOPNOTSUPP;
2045 
2046 	if (rtnl_notify_needed(net, n->nlmsg_flags, RTNLGRP_TC)) {
2047 		skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2048 		if (!skb)
2049 			return -ENOBUFS;
2050 
2051 		if (tc_fill_tclass(skb, q, cl, portid, n->nlmsg_seq, 0,
2052 				   RTM_DELTCLASS, extack) < 0) {
2053 			kfree_skb(skb);
2054 			return -EINVAL;
2055 		}
2056 	} else {
2057 		skb = NULL;
2058 	}
2059 
2060 	err = cops->delete(q, cl, extack);
2061 	if (err) {
2062 		kfree_skb(skb);
2063 		return err;
2064 	}
2065 
2066 	err = rtnetlink_maybe_send(skb, net, portid, RTNLGRP_TC,
2067 				   n->nlmsg_flags & NLM_F_ECHO);
2068 	return err;
2069 }
2070 
2071 #ifdef CONFIG_NET_CLS
2072 
2073 struct tcf_bind_args {
2074 	struct tcf_walker w;
2075 	unsigned long base;
2076 	unsigned long cl;
2077 	u32 classid;
2078 };
2079 
tcf_node_bind(struct tcf_proto * tp,void * n,struct tcf_walker * arg)2080 static int tcf_node_bind(struct tcf_proto *tp, void *n, struct tcf_walker *arg)
2081 {
2082 	struct tcf_bind_args *a = (void *)arg;
2083 
2084 	if (n && tp->ops->bind_class) {
2085 		struct Qdisc *q = tcf_block_q(tp->chain->block);
2086 
2087 		sch_tree_lock(q);
2088 		tp->ops->bind_class(n, a->classid, a->cl, q, a->base);
2089 		sch_tree_unlock(q);
2090 	}
2091 	return 0;
2092 }
2093 
2094 struct tc_bind_class_args {
2095 	struct qdisc_walker w;
2096 	unsigned long new_cl;
2097 	u32 portid;
2098 	u32 clid;
2099 };
2100 
tc_bind_class_walker(struct Qdisc * q,unsigned long cl,struct qdisc_walker * w)2101 static int tc_bind_class_walker(struct Qdisc *q, unsigned long cl,
2102 				struct qdisc_walker *w)
2103 {
2104 	struct tc_bind_class_args *a = (struct tc_bind_class_args *)w;
2105 	const struct Qdisc_class_ops *cops = q->ops->cl_ops;
2106 	struct tcf_block *block;
2107 	struct tcf_chain *chain;
2108 
2109 	block = cops->tcf_block(q, cl, NULL);
2110 	if (!block)
2111 		return 0;
2112 	for (chain = tcf_get_next_chain(block, NULL);
2113 	     chain;
2114 	     chain = tcf_get_next_chain(block, chain)) {
2115 		struct tcf_proto *tp;
2116 
2117 		for (tp = tcf_get_next_proto(chain, NULL);
2118 		     tp; tp = tcf_get_next_proto(chain, tp)) {
2119 			struct tcf_bind_args arg = {};
2120 
2121 			arg.w.fn = tcf_node_bind;
2122 			arg.classid = a->clid;
2123 			arg.base = cl;
2124 			arg.cl = a->new_cl;
2125 			tp->ops->walk(tp, &arg.w, true);
2126 		}
2127 	}
2128 
2129 	return 0;
2130 }
2131 
tc_bind_tclass(struct Qdisc * q,u32 portid,u32 clid,unsigned long new_cl)2132 static void tc_bind_tclass(struct Qdisc *q, u32 portid, u32 clid,
2133 			   unsigned long new_cl)
2134 {
2135 	const struct Qdisc_class_ops *cops = q->ops->cl_ops;
2136 	struct tc_bind_class_args args = {};
2137 
2138 	if (!cops->tcf_block)
2139 		return;
2140 	args.portid = portid;
2141 	args.clid = clid;
2142 	args.new_cl = new_cl;
2143 	args.w.fn = tc_bind_class_walker;
2144 	q->ops->cl_ops->walk(q, &args.w);
2145 }
2146 
2147 #else
2148 
tc_bind_tclass(struct Qdisc * q,u32 portid,u32 clid,unsigned long new_cl)2149 static void tc_bind_tclass(struct Qdisc *q, u32 portid, u32 clid,
2150 			   unsigned long new_cl)
2151 {
2152 }
2153 
2154 #endif
2155 
__tc_ctl_tclass(struct sk_buff * skb,struct nlmsghdr * n,struct netlink_ext_ack * extack,struct net_device * dev,struct nlattr * tca[TCA_MAX+1],struct tcmsg * tcm)2156 static int __tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n,
2157 			   struct netlink_ext_ack *extack,
2158 			   struct net_device *dev,
2159 			   struct nlattr *tca[TCA_MAX + 1],
2160 			   struct tcmsg *tcm)
2161 {
2162 	struct net *net = sock_net(skb->sk);
2163 	const struct Qdisc_class_ops *cops;
2164 	struct Qdisc *q = NULL;
2165 	unsigned long cl = 0;
2166 	unsigned long new_cl;
2167 	u32 portid;
2168 	u32 clid;
2169 	u32 qid;
2170 	int err;
2171 
2172 	/*
2173 	   parent == TC_H_UNSPEC - unspecified parent.
2174 	   parent == TC_H_ROOT   - class is root, which has no parent.
2175 	   parent == X:0	 - parent is root class.
2176 	   parent == X:Y	 - parent is a node in hierarchy.
2177 	   parent == 0:Y	 - parent is X:Y, where X:0 is qdisc.
2178 
2179 	   handle == 0:0	 - generate handle from kernel pool.
2180 	   handle == 0:Y	 - class is X:Y, where X:0 is qdisc.
2181 	   handle == X:Y	 - clear.
2182 	   handle == X:0	 - root class.
2183 	 */
2184 
2185 	/* Step 1. Determine qdisc handle X:0 */
2186 
2187 	portid = tcm->tcm_parent;
2188 	clid = tcm->tcm_handle;
2189 	qid = TC_H_MAJ(clid);
2190 
2191 	if (portid != TC_H_ROOT) {
2192 		u32 qid1 = TC_H_MAJ(portid);
2193 
2194 		if (qid && qid1) {
2195 			/* If both majors are known, they must be identical. */
2196 			if (qid != qid1)
2197 				return -EINVAL;
2198 		} else if (qid1) {
2199 			qid = qid1;
2200 		} else if (qid == 0)
2201 			qid = rtnl_dereference(dev->qdisc)->handle;
2202 
2203 		/* Now qid is genuine qdisc handle consistent
2204 		 * both with parent and child.
2205 		 *
2206 		 * TC_H_MAJ(portid) still may be unspecified, complete it now.
2207 		 */
2208 		if (portid)
2209 			portid = TC_H_MAKE(qid, portid);
2210 	} else {
2211 		if (qid == 0)
2212 			qid = rtnl_dereference(dev->qdisc)->handle;
2213 	}
2214 
2215 	/* OK. Locate qdisc */
2216 	q = qdisc_lookup(dev, qid);
2217 	if (!q)
2218 		return -ENOENT;
2219 
2220 	/* An check that it supports classes */
2221 	cops = q->ops->cl_ops;
2222 	if (cops == NULL)
2223 		return -EINVAL;
2224 
2225 	/* Now try to get class */
2226 	if (clid == 0) {
2227 		if (portid == TC_H_ROOT)
2228 			clid = qid;
2229 	} else
2230 		clid = TC_H_MAKE(qid, clid);
2231 
2232 	if (clid)
2233 		cl = cops->find(q, clid);
2234 
2235 	if (cl == 0) {
2236 		err = -ENOENT;
2237 		if (n->nlmsg_type != RTM_NEWTCLASS ||
2238 		    !(n->nlmsg_flags & NLM_F_CREATE))
2239 			goto out;
2240 	} else {
2241 		switch (n->nlmsg_type) {
2242 		case RTM_NEWTCLASS:
2243 			err = -EEXIST;
2244 			if (n->nlmsg_flags & NLM_F_EXCL)
2245 				goto out;
2246 			break;
2247 		case RTM_DELTCLASS:
2248 			err = tclass_del_notify(net, cops, skb, n, q, cl, extack);
2249 			/* Unbind the class with flilters with 0 */
2250 			tc_bind_tclass(q, portid, clid, 0);
2251 			goto out;
2252 		case RTM_GETTCLASS:
2253 			err = tclass_get_notify(net, skb, n, q, cl, extack);
2254 			goto out;
2255 		default:
2256 			err = -EINVAL;
2257 			goto out;
2258 		}
2259 	}
2260 
2261 	if (tca[TCA_INGRESS_BLOCK] || tca[TCA_EGRESS_BLOCK]) {
2262 		NL_SET_ERR_MSG(extack, "Shared blocks are not supported for classes");
2263 		return -EOPNOTSUPP;
2264 	}
2265 
2266 	/* Prevent creation of traffic classes with classid TC_H_ROOT */
2267 	if (clid == TC_H_ROOT) {
2268 		NL_SET_ERR_MSG(extack, "Cannot create traffic class with classid TC_H_ROOT");
2269 		return -EINVAL;
2270 	}
2271 
2272 	new_cl = cl;
2273 	err = -EOPNOTSUPP;
2274 	if (cops->change)
2275 		err = cops->change(q, clid, portid, tca, &new_cl, extack);
2276 	if (err == 0) {
2277 		tclass_notify(net, skb, n, q, new_cl, RTM_NEWTCLASS, extack);
2278 		/* We just create a new class, need to do reverse binding. */
2279 		if (cl != new_cl)
2280 			tc_bind_tclass(q, portid, clid, new_cl);
2281 	}
2282 out:
2283 	return err;
2284 }
2285 
tc_ctl_tclass(struct sk_buff * skb,struct nlmsghdr * n,struct netlink_ext_ack * extack)2286 static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n,
2287 			 struct netlink_ext_ack *extack)
2288 {
2289 	struct net *net = sock_net(skb->sk);
2290 	struct tcmsg *tcm = nlmsg_data(n);
2291 	struct nlattr *tca[TCA_MAX + 1];
2292 	struct net_device *dev;
2293 	int err;
2294 
2295 	err = nlmsg_parse_deprecated(n, sizeof(*tcm), tca, TCA_MAX,
2296 				     rtm_tca_policy, extack);
2297 	if (err < 0)
2298 		return err;
2299 
2300 	dev = __dev_get_by_index(net, tcm->tcm_ifindex);
2301 	if (!dev)
2302 		return -ENODEV;
2303 
2304 	netdev_lock_ops(dev);
2305 	err = __tc_ctl_tclass(skb, n, extack, dev, tca, tcm);
2306 	netdev_unlock_ops(dev);
2307 
2308 	return err;
2309 }
2310 
2311 struct qdisc_dump_args {
2312 	struct qdisc_walker	w;
2313 	struct sk_buff		*skb;
2314 	struct netlink_callback	*cb;
2315 };
2316 
qdisc_class_dump(struct Qdisc * q,unsigned long cl,struct qdisc_walker * arg)2317 static int qdisc_class_dump(struct Qdisc *q, unsigned long cl,
2318 			    struct qdisc_walker *arg)
2319 {
2320 	struct qdisc_dump_args *a = (struct qdisc_dump_args *)arg;
2321 
2322 	return tc_fill_tclass(a->skb, q, cl, NETLINK_CB(a->cb->skb).portid,
2323 			      a->cb->nlh->nlmsg_seq, NLM_F_MULTI,
2324 			      RTM_NEWTCLASS, NULL);
2325 }
2326 
tc_dump_tclass_qdisc(struct Qdisc * q,struct sk_buff * skb,struct tcmsg * tcm,struct netlink_callback * cb,int * t_p,int s_t)2327 static int tc_dump_tclass_qdisc(struct Qdisc *q, struct sk_buff *skb,
2328 				struct tcmsg *tcm, struct netlink_callback *cb,
2329 				int *t_p, int s_t)
2330 {
2331 	struct qdisc_dump_args arg;
2332 
2333 	if (tc_qdisc_dump_ignore(q, false) ||
2334 	    *t_p < s_t || !q->ops->cl_ops ||
2335 	    (tcm->tcm_parent &&
2336 	     TC_H_MAJ(tcm->tcm_parent) != q->handle)) {
2337 		(*t_p)++;
2338 		return 0;
2339 	}
2340 	if (*t_p > s_t)
2341 		memset(&cb->args[1], 0, sizeof(cb->args)-sizeof(cb->args[0]));
2342 	arg.w.fn = qdisc_class_dump;
2343 	arg.skb = skb;
2344 	arg.cb = cb;
2345 	arg.w.stop  = 0;
2346 	arg.w.skip = cb->args[1];
2347 	arg.w.count = 0;
2348 	q->ops->cl_ops->walk(q, &arg.w);
2349 	cb->args[1] = arg.w.count;
2350 	if (arg.w.stop)
2351 		return -1;
2352 	(*t_p)++;
2353 	return 0;
2354 }
2355 
tc_dump_tclass_root(struct Qdisc * root,struct sk_buff * skb,struct tcmsg * tcm,struct netlink_callback * cb,int * t_p,int s_t,bool recur)2356 static int tc_dump_tclass_root(struct Qdisc *root, struct sk_buff *skb,
2357 			       struct tcmsg *tcm, struct netlink_callback *cb,
2358 			       int *t_p, int s_t, bool recur)
2359 {
2360 	struct Qdisc *q;
2361 	int b;
2362 
2363 	if (!root)
2364 		return 0;
2365 
2366 	if (tc_dump_tclass_qdisc(root, skb, tcm, cb, t_p, s_t) < 0)
2367 		return -1;
2368 
2369 	if (!qdisc_dev(root) || !recur)
2370 		return 0;
2371 
2372 	if (tcm->tcm_parent) {
2373 		q = qdisc_match_from_root(root, TC_H_MAJ(tcm->tcm_parent));
2374 		if (q && q != root &&
2375 		    tc_dump_tclass_qdisc(q, skb, tcm, cb, t_p, s_t) < 0)
2376 			return -1;
2377 		return 0;
2378 	}
2379 	hash_for_each(qdisc_dev(root)->qdisc_hash, b, q, hash) {
2380 		if (tc_dump_tclass_qdisc(q, skb, tcm, cb, t_p, s_t) < 0)
2381 			return -1;
2382 	}
2383 
2384 	return 0;
2385 }
2386 
__tc_dump_tclass(struct sk_buff * skb,struct netlink_callback * cb,struct tcmsg * tcm,struct net_device * dev)2387 static int __tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb,
2388 			    struct tcmsg *tcm, struct net_device *dev)
2389 {
2390 	struct netdev_queue *dev_queue;
2391 	int t, s_t;
2392 
2393 	s_t = cb->args[0];
2394 	t = 0;
2395 
2396 	if (tc_dump_tclass_root(rtnl_dereference(dev->qdisc),
2397 				skb, tcm, cb, &t, s_t, true) < 0)
2398 		goto done;
2399 
2400 	dev_queue = dev_ingress_queue(dev);
2401 	if (dev_queue &&
2402 	    tc_dump_tclass_root(rtnl_dereference(dev_queue->qdisc_sleeping),
2403 				skb, tcm, cb, &t, s_t, false) < 0)
2404 		goto done;
2405 
2406 done:
2407 	cb->args[0] = t;
2408 
2409 	return skb->len;
2410 }
2411 
tc_dump_tclass(struct sk_buff * skb,struct netlink_callback * cb)2412 static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb)
2413 {
2414 	struct tcmsg *tcm = nlmsg_data(cb->nlh);
2415 	struct net *net = sock_net(skb->sk);
2416 	struct net_device *dev;
2417 	int err;
2418 
2419 	if (nlmsg_len(cb->nlh) < sizeof(*tcm))
2420 		return 0;
2421 
2422 	dev = dev_get_by_index(net, tcm->tcm_ifindex);
2423 	if (!dev)
2424 		return 0;
2425 
2426 	netdev_lock_ops(dev);
2427 	err = __tc_dump_tclass(skb, cb, tcm, dev);
2428 	netdev_unlock_ops(dev);
2429 
2430 	dev_put(dev);
2431 
2432 	return err;
2433 }
2434 
2435 #ifdef CONFIG_PROC_FS
psched_show(struct seq_file * seq,void * v)2436 static int psched_show(struct seq_file *seq, void *v)
2437 {
2438 	seq_printf(seq, "%08x %08x %08x %08x\n",
2439 		   (u32)NSEC_PER_USEC, (u32)PSCHED_TICKS2NS(1),
2440 		   1000000,
2441 		   (u32)NSEC_PER_SEC / hrtimer_resolution);
2442 
2443 	return 0;
2444 }
2445 
psched_net_init(struct net * net)2446 static int __net_init psched_net_init(struct net *net)
2447 {
2448 	struct proc_dir_entry *e;
2449 
2450 	e = proc_create_single("psched", 0, net->proc_net, psched_show);
2451 	if (e == NULL)
2452 		return -ENOMEM;
2453 
2454 	return 0;
2455 }
2456 
psched_net_exit(struct net * net)2457 static void __net_exit psched_net_exit(struct net *net)
2458 {
2459 	remove_proc_entry("psched", net->proc_net);
2460 }
2461 #else
psched_net_init(struct net * net)2462 static int __net_init psched_net_init(struct net *net)
2463 {
2464 	return 0;
2465 }
2466 
psched_net_exit(struct net * net)2467 static void __net_exit psched_net_exit(struct net *net)
2468 {
2469 }
2470 #endif
2471 
2472 static struct pernet_operations psched_net_ops = {
2473 	.init = psched_net_init,
2474 	.exit = psched_net_exit,
2475 };
2476 
2477 #if IS_ENABLED(CONFIG_MITIGATION_RETPOLINE)
2478 DEFINE_STATIC_KEY_FALSE(tc_skip_wrapper);
2479 #endif
2480 
2481 static const struct rtnl_msg_handler psched_rtnl_msg_handlers[] __initconst = {
2482 	{.msgtype = RTM_NEWQDISC, .doit = tc_modify_qdisc},
2483 	{.msgtype = RTM_DELQDISC, .doit = tc_get_qdisc},
2484 	{.msgtype = RTM_GETQDISC, .doit = tc_get_qdisc,
2485 	 .dumpit = tc_dump_qdisc},
2486 	{.msgtype = RTM_NEWTCLASS, .doit = tc_ctl_tclass},
2487 	{.msgtype = RTM_DELTCLASS, .doit = tc_ctl_tclass},
2488 	{.msgtype = RTM_GETTCLASS, .doit = tc_ctl_tclass,
2489 	 .dumpit = tc_dump_tclass},
2490 };
2491 
pktsched_init(void)2492 static int __init pktsched_init(void)
2493 {
2494 	int err;
2495 
2496 	err = register_pernet_subsys(&psched_net_ops);
2497 	if (err) {
2498 		pr_err("pktsched_init: "
2499 		       "cannot initialize per netns operations\n");
2500 		return err;
2501 	}
2502 
2503 	register_qdisc(&pfifo_fast_ops);
2504 	register_qdisc(&pfifo_qdisc_ops);
2505 	register_qdisc(&bfifo_qdisc_ops);
2506 	register_qdisc(&pfifo_head_drop_qdisc_ops);
2507 	register_qdisc(&mq_qdisc_ops);
2508 	register_qdisc(&noqueue_qdisc_ops);
2509 
2510 	rtnl_register_many(psched_rtnl_msg_handlers);
2511 
2512 	tc_wrapper_init();
2513 
2514 	return 0;
2515 }
2516 
2517 subsys_initcall(pktsched_init);
2518