xref: /linux/net/sched/sch_htb.c (revision ab59a8605604f71bbbc16077270dc3f39648b7fc)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * net/sched/sch_htb.c	Hierarchical token bucket, feed tree version
4  *
5  * Authors:	Martin Devera, <devik@cdi.cz>
6  *
7  * Credits (in time order) for older HTB versions:
8  *              Stef Coene <stef.coene@docum.org>
9  *			HTB support at LARTC mailing list
10  *		Ondrej Kraus, <krauso@barr.cz>
11  *			found missing INIT_QDISC(htb)
12  *		Vladimir Smelhaus, Aamer Akhter, Bert Hubert
13  *			helped a lot to locate nasty class stall bug
14  *		Andi Kleen, Jamal Hadi, Bert Hubert
15  *			code review and helpful comments on shaping
16  *		Tomasz Wrona, <tw@eter.tym.pl>
17  *			created test case so that I was able to fix nasty bug
18  *		Wilfried Weissmann
19  *			spotted bug in dequeue code and helped with fix
20  *		Jiri Fojtasek
21  *			fixed requeue routine
22  *		and many others. thanks.
23  */
24 #include <linux/module.h>
25 #include <linux/moduleparam.h>
26 #include <linux/types.h>
27 #include <linux/kernel.h>
28 #include <linux/string.h>
29 #include <linux/errno.h>
30 #include <linux/skbuff.h>
31 #include <linux/list.h>
32 #include <linux/compiler.h>
33 #include <linux/rbtree.h>
34 #include <linux/workqueue.h>
35 #include <linux/slab.h>
36 #include <net/netlink.h>
37 #include <net/sch_generic.h>
38 #include <net/pkt_sched.h>
39 #include <net/pkt_cls.h>
40 
41 /* HTB algorithm.
42     Author: devik@cdi.cz
43     ========================================================================
44     HTB is like TBF with multiple classes. It is also similar to CBQ because
45     it allows to assign priority to each class in hierarchy.
46     In fact it is another implementation of Floyd's formal sharing.
47 
48     Levels:
49     Each class is assigned level. Leaf has ALWAYS level 0 and root
50     classes have level TC_HTB_MAXDEPTH-1. Interior nodes has level
51     one less than their parent.
52 */
53 
54 static int htb_hysteresis __read_mostly = 0; /* whether to use mode hysteresis for speedup */
55 #define HTB_VER 0x30011		/* major must be matched with number supplied by TC as version */
56 
57 #if HTB_VER >> 16 != TC_HTB_PROTOVER
58 #error "Mismatched sch_htb.c and pkt_sch.h"
59 #endif
60 
61 /* Module parameter and sysfs export */
62 module_param    (htb_hysteresis, int, 0640);
63 MODULE_PARM_DESC(htb_hysteresis, "Hysteresis mode, less CPU load, less accurate");
64 
65 static int htb_rate_est = 0; /* htb classes have a default rate estimator */
66 module_param(htb_rate_est, int, 0640);
67 MODULE_PARM_DESC(htb_rate_est, "setup a default rate estimator (4sec 16sec) for htb classes");
68 
69 /* used internaly to keep status of single class */
70 enum htb_cmode {
71 	HTB_CANT_SEND,		/* class can't send and can't borrow */
72 	HTB_MAY_BORROW,		/* class can't send but may borrow */
73 	HTB_CAN_SEND		/* class can send */
74 };
75 
76 struct htb_prio {
77 	union {
78 		struct rb_root	row;
79 		struct rb_root	feed;
80 	};
81 	struct rb_node	*ptr;
82 	/* When class changes from state 1->2 and disconnects from
83 	 * parent's feed then we lost ptr value and start from the
84 	 * first child again. Here we store classid of the
85 	 * last valid ptr (used when ptr is NULL).
86 	 */
87 	u32		last_ptr_id;
88 };
89 
90 /* interior & leaf nodes; props specific to leaves are marked L:
91  * To reduce false sharing, place mostly read fields at beginning,
92  * and mostly written ones at the end.
93  */
94 struct htb_class {
95 	struct Qdisc_class_common common;
96 	struct psched_ratecfg	rate;
97 	struct psched_ratecfg	ceil;
98 	s64			buffer, cbuffer;/* token bucket depth/rate */
99 	s64			mbuffer;	/* max wait time */
100 	u32			prio;		/* these two are used only by leaves... */
101 	int			quantum;	/* but stored for parent-to-leaf return */
102 
103 	struct tcf_proto __rcu	*filter_list;	/* class attached filters */
104 	struct tcf_block	*block;
105 
106 	int			level;		/* our level (see above) */
107 	unsigned int		children;
108 	struct htb_class	*parent;	/* parent class */
109 
110 	struct net_rate_estimator __rcu *rate_est;
111 
112 	/*
113 	 * Written often fields
114 	 */
115 	struct gnet_stats_basic_sync bstats;
116 	struct gnet_stats_basic_sync bstats_bias;
117 	struct tc_htb_xstats	xstats;	/* our special stats */
118 
119 	/* token bucket parameters */
120 	s64			tokens, ctokens;/* current number of tokens */
121 	s64			t_c;		/* checkpoint time */
122 
123 	union {
124 		struct htb_class_leaf {
125 			int		deficit[TC_HTB_MAXDEPTH];
126 			struct Qdisc	*q;
127 			struct netdev_queue *offload_queue;
128 		} leaf;
129 		struct htb_class_inner {
130 			struct htb_prio clprio[TC_HTB_NUMPRIO];
131 		} inner;
132 	};
133 	s64			pq_key;
134 
135 	int			prio_activity;	/* for which prios are we active */
136 	enum htb_cmode		cmode;		/* current mode of the class */
137 	struct rb_node		pq_node;	/* node for event queue */
138 	struct rb_node		node[TC_HTB_NUMPRIO];	/* node for self or feed tree */
139 
140 	unsigned int drops ____cacheline_aligned_in_smp;
141 	unsigned int		overlimits;
142 };
143 
144 struct htb_level {
145 	struct rb_root	wait_pq;
146 	struct htb_prio hprio[TC_HTB_NUMPRIO];
147 };
148 
149 struct htb_sched {
150 	struct Qdisc_class_hash clhash;
151 	int			defcls;		/* class where unclassified flows go to */
152 	int			rate2quantum;	/* quant = rate / rate2quantum */
153 
154 	/* filters for qdisc itself */
155 	struct tcf_proto __rcu	*filter_list;
156 	struct tcf_block	*block;
157 
158 #define HTB_WARN_TOOMANYEVENTS	0x1
159 	unsigned int		warned;	/* only one warning */
160 	int			direct_qlen;
161 	struct work_struct	work;
162 
163 	/* non shaped skbs; let them go directly thru */
164 	struct qdisc_skb_head	direct_queue;
165 	u32			direct_pkts;
166 	u32			overlimits;
167 
168 	struct qdisc_watchdog	watchdog;
169 
170 	s64			now;	/* cached dequeue time */
171 
172 	/* time of nearest event per level (row) */
173 	s64			near_ev_cache[TC_HTB_MAXDEPTH];
174 
175 	int			row_mask[TC_HTB_MAXDEPTH];
176 
177 	struct htb_level	hlevel[TC_HTB_MAXDEPTH];
178 
179 	struct Qdisc		**direct_qdiscs;
180 	unsigned int            num_direct_qdiscs;
181 
182 	bool			offload;
183 };
184 
185 /* find class in global hash table using given handle */
htb_find(u32 handle,struct Qdisc * sch)186 static inline struct htb_class *htb_find(u32 handle, struct Qdisc *sch)
187 {
188 	struct htb_sched *q = qdisc_priv(sch);
189 	struct Qdisc_class_common *clc;
190 
191 	clc = qdisc_class_find(&q->clhash, handle);
192 	if (clc == NULL)
193 		return NULL;
194 	return container_of(clc, struct htb_class, common);
195 }
196 
htb_search(struct Qdisc * sch,u32 handle)197 static unsigned long htb_search(struct Qdisc *sch, u32 handle)
198 {
199 	return (unsigned long)htb_find(handle, sch);
200 }
201 
202 #define HTB_DIRECT ((struct htb_class *)-1L)
203 
204 /**
205  * htb_classify - classify a packet into class
206  * @skb: the socket buffer
207  * @sch: the active queue discipline
208  * @qerr: pointer for returned status code
209  *
210  * It returns NULL if the packet should be dropped or -1 if the packet
211  * should be passed directly thru. In all other cases leaf class is returned.
212  * We allow direct class selection by classid in priority. The we examine
213  * filters in qdisc and in inner nodes (if higher filter points to the inner
214  * node). If we end up with classid MAJOR:0 we enqueue the skb into special
215  * internal fifo (direct). These packets then go directly thru. If we still
216  * have no valid leaf we try to use MAJOR:default leaf. It still unsuccessful
217  * then finish and return direct queue.
218  */
htb_classify(struct sk_buff * skb,struct Qdisc * sch,int * qerr)219 static struct htb_class *htb_classify(struct sk_buff *skb, struct Qdisc *sch,
220 				      int *qerr)
221 {
222 	struct htb_sched *q = qdisc_priv(sch);
223 	struct htb_class *cl;
224 	struct tcf_result res;
225 	struct tcf_proto *tcf;
226 	int result;
227 
228 	/* allow to select class by setting skb->priority to valid classid;
229 	 * note that nfmark can be used too by attaching filter fw with no
230 	 * rules in it
231 	 */
232 	if (skb->priority == sch->handle)
233 		return HTB_DIRECT;	/* X:0 (direct flow) selected */
234 	cl = htb_find(skb->priority, sch);
235 	if (cl) {
236 		if (cl->level == 0)
237 			return cl;
238 		/* Start with inner filter chain if a non-leaf class is selected */
239 		tcf = rcu_dereference_bh(cl->filter_list);
240 	} else {
241 		tcf = rcu_dereference_bh(q->filter_list);
242 	}
243 
244 	*qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
245 	while (tcf && (result = tcf_classify(skb, NULL, tcf, &res, false)) >= 0) {
246 #ifdef CONFIG_NET_CLS_ACT
247 		switch (result) {
248 		case TC_ACT_QUEUED:
249 		case TC_ACT_STOLEN:
250 		case TC_ACT_TRAP:
251 			*qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
252 			fallthrough;
253 		case TC_ACT_SHOT:
254 			return NULL;
255 		}
256 #endif
257 		cl = (void *)res.class;
258 		if (!cl) {
259 			if (res.classid == sch->handle)
260 				return HTB_DIRECT;	/* X:0 (direct flow) */
261 			cl = htb_find(res.classid, sch);
262 			if (!cl)
263 				break;	/* filter selected invalid classid */
264 		}
265 		if (!cl->level)
266 			return cl;	/* we hit leaf; return it */
267 
268 		/* we have got inner class; apply inner filter chain */
269 		tcf = rcu_dereference_bh(cl->filter_list);
270 	}
271 	/* classification failed; try to use default class */
272 	cl = htb_find(TC_H_MAKE(TC_H_MAJ(sch->handle), q->defcls), sch);
273 	if (!cl || cl->level)
274 		return HTB_DIRECT;	/* bad default .. this is safe bet */
275 	return cl;
276 }
277 
278 /**
279  * htb_add_to_id_tree - adds class to the round robin list
280  * @root: the root of the tree
281  * @cl: the class to add
282  * @prio: the give prio in class
283  *
284  * Routine adds class to the list (actually tree) sorted by classid.
285  * Make sure that class is not already on such list for given prio.
286  */
htb_add_to_id_tree(struct rb_root * root,struct htb_class * cl,int prio)287 static void htb_add_to_id_tree(struct rb_root *root,
288 			       struct htb_class *cl, int prio)
289 {
290 	struct rb_node **p = &root->rb_node, *parent = NULL;
291 
292 	while (*p) {
293 		struct htb_class *c;
294 		parent = *p;
295 		c = rb_entry(parent, struct htb_class, node[prio]);
296 
297 		if (cl->common.classid > c->common.classid)
298 			p = &parent->rb_right;
299 		else
300 			p = &parent->rb_left;
301 	}
302 	rb_link_node(&cl->node[prio], parent, p);
303 	rb_insert_color(&cl->node[prio], root);
304 }
305 
306 /**
307  * htb_add_to_wait_tree - adds class to the event queue with delay
308  * @q: the priority event queue
309  * @cl: the class to add
310  * @delay: delay in microseconds
311  *
312  * The class is added to priority event queue to indicate that class will
313  * change its mode in cl->pq_key microseconds. Make sure that class is not
314  * already in the queue.
315  */
htb_add_to_wait_tree(struct htb_sched * q,struct htb_class * cl,s64 delay)316 static void htb_add_to_wait_tree(struct htb_sched *q,
317 				 struct htb_class *cl, s64 delay)
318 {
319 	struct rb_node **p = &q->hlevel[cl->level].wait_pq.rb_node, *parent = NULL;
320 
321 	cl->pq_key = q->now + delay;
322 	if (cl->pq_key == q->now)
323 		cl->pq_key++;
324 
325 	/* update the nearest event cache */
326 	if (q->near_ev_cache[cl->level] > cl->pq_key)
327 		q->near_ev_cache[cl->level] = cl->pq_key;
328 
329 	while (*p) {
330 		struct htb_class *c;
331 		parent = *p;
332 		c = rb_entry(parent, struct htb_class, pq_node);
333 		if (cl->pq_key >= c->pq_key)
334 			p = &parent->rb_right;
335 		else
336 			p = &parent->rb_left;
337 	}
338 	rb_link_node(&cl->pq_node, parent, p);
339 	rb_insert_color(&cl->pq_node, &q->hlevel[cl->level].wait_pq);
340 }
341 
342 /**
343  * htb_next_rb_node - finds next node in binary tree
344  * @n: the current node in binary tree
345  *
346  * When we are past last key we return NULL.
347  * Average complexity is 2 steps per call.
348  */
htb_next_rb_node(struct rb_node ** n)349 static inline void htb_next_rb_node(struct rb_node **n)
350 {
351 	*n = rb_next(*n);
352 }
353 
354 /**
355  * htb_add_class_to_row - add class to its row
356  * @q: the priority event queue
357  * @cl: the class to add
358  * @mask: the given priorities in class in bitmap
359  *
360  * The class is added to row at priorities marked in mask.
361  * It does nothing if mask == 0.
362  */
htb_add_class_to_row(struct htb_sched * q,struct htb_class * cl,int mask)363 static inline void htb_add_class_to_row(struct htb_sched *q,
364 					struct htb_class *cl, int mask)
365 {
366 	q->row_mask[cl->level] |= mask;
367 	while (mask) {
368 		int prio = ffz(~mask);
369 		mask &= ~(1 << prio);
370 		htb_add_to_id_tree(&q->hlevel[cl->level].hprio[prio].row, cl, prio);
371 	}
372 }
373 
374 /* If this triggers, it is a bug in this code, but it need not be fatal */
htb_safe_rb_erase(struct rb_node * rb,struct rb_root * root)375 static void htb_safe_rb_erase(struct rb_node *rb, struct rb_root *root)
376 {
377 	if (RB_EMPTY_NODE(rb)) {
378 		WARN_ON(1);
379 	} else {
380 		rb_erase(rb, root);
381 		RB_CLEAR_NODE(rb);
382 	}
383 }
384 
385 
386 /**
387  * htb_remove_class_from_row - removes class from its row
388  * @q: the priority event queue
389  * @cl: the class to add
390  * @mask: the given priorities in class in bitmap
391  *
392  * The class is removed from row at priorities marked in mask.
393  * It does nothing if mask == 0.
394  */
htb_remove_class_from_row(struct htb_sched * q,struct htb_class * cl,int mask)395 static inline void htb_remove_class_from_row(struct htb_sched *q,
396 						 struct htb_class *cl, int mask)
397 {
398 	int m = 0;
399 	struct htb_level *hlevel = &q->hlevel[cl->level];
400 
401 	while (mask) {
402 		int prio = ffz(~mask);
403 		struct htb_prio *hprio = &hlevel->hprio[prio];
404 
405 		mask &= ~(1 << prio);
406 		if (hprio->ptr == cl->node + prio)
407 			htb_next_rb_node(&hprio->ptr);
408 
409 		htb_safe_rb_erase(cl->node + prio, &hprio->row);
410 		if (!hprio->row.rb_node)
411 			m |= 1 << prio;
412 	}
413 	q->row_mask[cl->level] &= ~m;
414 }
415 
416 /**
417  * htb_activate_prios - creates active classe's feed chain
418  * @q: the priority event queue
419  * @cl: the class to activate
420  *
421  * The class is connected to ancestors and/or appropriate rows
422  * for priorities it is participating on. cl->cmode must be new
423  * (activated) mode. It does nothing if cl->prio_activity == 0.
424  */
htb_activate_prios(struct htb_sched * q,struct htb_class * cl)425 static void htb_activate_prios(struct htb_sched *q, struct htb_class *cl)
426 {
427 	struct htb_class *p = cl->parent;
428 	long m, mask = cl->prio_activity;
429 
430 	while (cl->cmode == HTB_MAY_BORROW && p && mask) {
431 		m = mask;
432 		while (m) {
433 			unsigned int prio = ffz(~m);
434 
435 			if (WARN_ON_ONCE(prio >= ARRAY_SIZE(p->inner.clprio)))
436 				break;
437 			m &= ~(1 << prio);
438 
439 			if (p->inner.clprio[prio].feed.rb_node)
440 				/* parent already has its feed in use so that
441 				 * reset bit in mask as parent is already ok
442 				 */
443 				mask &= ~(1 << prio);
444 
445 			htb_add_to_id_tree(&p->inner.clprio[prio].feed, cl, prio);
446 		}
447 		p->prio_activity |= mask;
448 		cl = p;
449 		p = cl->parent;
450 
451 	}
452 	if (cl->cmode == HTB_CAN_SEND && mask)
453 		htb_add_class_to_row(q, cl, mask);
454 }
455 
456 /**
457  * htb_deactivate_prios - remove class from feed chain
458  * @q: the priority event queue
459  * @cl: the class to deactivate
460  *
461  * cl->cmode must represent old mode (before deactivation). It does
462  * nothing if cl->prio_activity == 0. Class is removed from all feed
463  * chains and rows.
464  */
htb_deactivate_prios(struct htb_sched * q,struct htb_class * cl)465 static void htb_deactivate_prios(struct htb_sched *q, struct htb_class *cl)
466 {
467 	struct htb_class *p = cl->parent;
468 	long m, mask = cl->prio_activity;
469 
470 	while (cl->cmode == HTB_MAY_BORROW && p && mask) {
471 		m = mask;
472 		mask = 0;
473 		while (m) {
474 			int prio = ffz(~m);
475 			m &= ~(1 << prio);
476 
477 			if (p->inner.clprio[prio].ptr == cl->node + prio) {
478 				/* we are removing child which is pointed to from
479 				 * parent feed - forget the pointer but remember
480 				 * classid
481 				 */
482 				p->inner.clprio[prio].last_ptr_id = cl->common.classid;
483 				p->inner.clprio[prio].ptr = NULL;
484 			}
485 
486 			htb_safe_rb_erase(cl->node + prio,
487 					  &p->inner.clprio[prio].feed);
488 
489 			if (!p->inner.clprio[prio].feed.rb_node)
490 				mask |= 1 << prio;
491 		}
492 
493 		p->prio_activity &= ~mask;
494 		cl = p;
495 		p = cl->parent;
496 
497 	}
498 	if (cl->cmode == HTB_CAN_SEND && mask)
499 		htb_remove_class_from_row(q, cl, mask);
500 }
501 
htb_lowater(const struct htb_class * cl)502 static inline s64 htb_lowater(const struct htb_class *cl)
503 {
504 	if (htb_hysteresis)
505 		return cl->cmode != HTB_CANT_SEND ? -cl->cbuffer : 0;
506 	else
507 		return 0;
508 }
htb_hiwater(const struct htb_class * cl)509 static inline s64 htb_hiwater(const struct htb_class *cl)
510 {
511 	if (htb_hysteresis)
512 		return cl->cmode == HTB_CAN_SEND ? -cl->buffer : 0;
513 	else
514 		return 0;
515 }
516 
517 
518 /**
519  * htb_class_mode - computes and returns current class mode
520  * @cl: the target class
521  * @diff: diff time in microseconds
522  *
523  * It computes cl's mode at time cl->t_c+diff and returns it. If mode
524  * is not HTB_CAN_SEND then cl->pq_key is updated to time difference
525  * from now to time when cl will change its state.
526  * Also it is worth to note that class mode doesn't change simply
527  * at cl->{c,}tokens == 0 but there can rather be hysteresis of
528  * 0 .. -cl->{c,}buffer range. It is meant to limit number of
529  * mode transitions per time unit. The speed gain is about 1/6.
530  */
531 static inline enum htb_cmode
htb_class_mode(struct htb_class * cl,s64 * diff)532 htb_class_mode(struct htb_class *cl, s64 *diff)
533 {
534 	s64 toks;
535 
536 	if ((toks = (cl->ctokens + *diff)) < htb_lowater(cl)) {
537 		*diff = -toks;
538 		return HTB_CANT_SEND;
539 	}
540 
541 	if ((toks = (cl->tokens + *diff)) >= htb_hiwater(cl))
542 		return HTB_CAN_SEND;
543 
544 	*diff = -toks;
545 	return HTB_MAY_BORROW;
546 }
547 
548 /**
549  * htb_change_class_mode - changes classe's mode
550  * @q: the priority event queue
551  * @cl: the target class
552  * @diff: diff time in microseconds
553  *
554  * This should be the only way how to change classe's mode under normal
555  * circumstances. Routine will update feed lists linkage, change mode
556  * and add class to the wait event queue if appropriate. New mode should
557  * be different from old one and cl->pq_key has to be valid if changing
558  * to mode other than HTB_CAN_SEND (see htb_add_to_wait_tree).
559  */
560 static void
htb_change_class_mode(struct htb_sched * q,struct htb_class * cl,s64 * diff)561 htb_change_class_mode(struct htb_sched *q, struct htb_class *cl, s64 *diff)
562 {
563 	enum htb_cmode new_mode = htb_class_mode(cl, diff);
564 
565 	if (new_mode == cl->cmode)
566 		return;
567 
568 	if (new_mode == HTB_CANT_SEND) {
569 		cl->overlimits++;
570 		q->overlimits++;
571 	}
572 
573 	if (cl->prio_activity) {	/* not necessary: speed optimization */
574 		if (cl->cmode != HTB_CANT_SEND)
575 			htb_deactivate_prios(q, cl);
576 		cl->cmode = new_mode;
577 		if (new_mode != HTB_CANT_SEND)
578 			htb_activate_prios(q, cl);
579 	} else
580 		cl->cmode = new_mode;
581 }
582 
583 /**
584  * htb_activate - inserts leaf cl into appropriate active feeds
585  * @q: the priority event queue
586  * @cl: the target class
587  *
588  * Routine learns (new) priority of leaf and activates feed chain
589  * for the prio. It can be called on already active leaf safely.
590  * It also adds leaf into droplist.
591  */
htb_activate(struct htb_sched * q,struct htb_class * cl)592 static inline void htb_activate(struct htb_sched *q, struct htb_class *cl)
593 {
594 	WARN_ON(cl->level || !cl->leaf.q || !cl->leaf.q->q.qlen);
595 
596 	if (!cl->prio_activity) {
597 		cl->prio_activity = 1 << cl->prio;
598 		htb_activate_prios(q, cl);
599 	}
600 }
601 
602 /**
603  * htb_deactivate - remove leaf cl from active feeds
604  * @q: the priority event queue
605  * @cl: the target class
606  *
607  * Make sure that leaf is active. In the other words it can't be called
608  * with non-active leaf. It also removes class from the drop list.
609  */
htb_deactivate(struct htb_sched * q,struct htb_class * cl)610 static inline void htb_deactivate(struct htb_sched *q, struct htb_class *cl)
611 {
612 	WARN_ON(!cl->prio_activity);
613 
614 	htb_deactivate_prios(q, cl);
615 	cl->prio_activity = 0;
616 }
617 
htb_enqueue(struct sk_buff * skb,struct Qdisc * sch,struct sk_buff ** to_free)618 static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch,
619 		       struct sk_buff **to_free)
620 {
621 	int ret;
622 	unsigned int len = qdisc_pkt_len(skb);
623 	struct htb_sched *q = qdisc_priv(sch);
624 	struct htb_class *cl = htb_classify(skb, sch, &ret);
625 
626 	if (cl == HTB_DIRECT) {
627 		/* enqueue to helper queue */
628 		if (q->direct_queue.qlen < q->direct_qlen) {
629 			__qdisc_enqueue_tail(skb, &q->direct_queue);
630 			q->direct_pkts++;
631 		} else {
632 			return qdisc_drop(skb, sch, to_free);
633 		}
634 #ifdef CONFIG_NET_CLS_ACT
635 	} else if (!cl) {
636 		if (ret & __NET_XMIT_BYPASS)
637 			qdisc_qstats_drop(sch);
638 		__qdisc_drop(skb, to_free);
639 		return ret;
640 #endif
641 	} else if ((ret = qdisc_enqueue(skb, cl->leaf.q,
642 					to_free)) != NET_XMIT_SUCCESS) {
643 		if (net_xmit_drop_count(ret)) {
644 			qdisc_qstats_drop(sch);
645 			cl->drops++;
646 		}
647 		return ret;
648 	} else {
649 		htb_activate(q, cl);
650 	}
651 
652 	sch->qstats.backlog += len;
653 	sch->q.qlen++;
654 	return NET_XMIT_SUCCESS;
655 }
656 
htb_accnt_tokens(struct htb_class * cl,int bytes,s64 diff)657 static inline void htb_accnt_tokens(struct htb_class *cl, int bytes, s64 diff)
658 {
659 	s64 toks = diff + cl->tokens;
660 
661 	if (toks > cl->buffer)
662 		toks = cl->buffer;
663 	toks -= (s64) psched_l2t_ns(&cl->rate, bytes);
664 	if (toks <= -cl->mbuffer)
665 		toks = 1 - cl->mbuffer;
666 
667 	cl->tokens = toks;
668 }
669 
htb_accnt_ctokens(struct htb_class * cl,int bytes,s64 diff)670 static inline void htb_accnt_ctokens(struct htb_class *cl, int bytes, s64 diff)
671 {
672 	s64 toks = diff + cl->ctokens;
673 
674 	if (toks > cl->cbuffer)
675 		toks = cl->cbuffer;
676 	toks -= (s64) psched_l2t_ns(&cl->ceil, bytes);
677 	if (toks <= -cl->mbuffer)
678 		toks = 1 - cl->mbuffer;
679 
680 	cl->ctokens = toks;
681 }
682 
683 /**
684  * htb_charge_class - charges amount "bytes" to leaf and ancestors
685  * @q: the priority event queue
686  * @cl: the class to start iterate
687  * @level: the minimum level to account
688  * @skb: the socket buffer
689  *
690  * Routine assumes that packet "bytes" long was dequeued from leaf cl
691  * borrowing from "level". It accounts bytes to ceil leaky bucket for
692  * leaf and all ancestors and to rate bucket for ancestors at levels
693  * "level" and higher. It also handles possible change of mode resulting
694  * from the update. Note that mode can also increase here (MAY_BORROW to
695  * CAN_SEND) because we can use more precise clock that event queue here.
696  * In such case we remove class from event queue first.
697  */
htb_charge_class(struct htb_sched * q,struct htb_class * cl,int level,struct sk_buff * skb)698 static void htb_charge_class(struct htb_sched *q, struct htb_class *cl,
699 			     int level, struct sk_buff *skb)
700 {
701 	int bytes = qdisc_pkt_len(skb);
702 	enum htb_cmode old_mode;
703 	s64 diff;
704 
705 	while (cl) {
706 		diff = min_t(s64, q->now - cl->t_c, cl->mbuffer);
707 		if (cl->level >= level) {
708 			if (cl->level == level)
709 				cl->xstats.lends++;
710 			htb_accnt_tokens(cl, bytes, diff);
711 		} else {
712 			cl->xstats.borrows++;
713 			cl->tokens += diff;	/* we moved t_c; update tokens */
714 		}
715 		htb_accnt_ctokens(cl, bytes, diff);
716 		cl->t_c = q->now;
717 
718 		old_mode = cl->cmode;
719 		diff = 0;
720 		htb_change_class_mode(q, cl, &diff);
721 		if (old_mode != cl->cmode) {
722 			if (old_mode != HTB_CAN_SEND)
723 				htb_safe_rb_erase(&cl->pq_node, &q->hlevel[cl->level].wait_pq);
724 			if (cl->cmode != HTB_CAN_SEND)
725 				htb_add_to_wait_tree(q, cl, diff);
726 		}
727 
728 		/* update basic stats except for leaves which are already updated */
729 		if (cl->level)
730 			bstats_update(&cl->bstats, skb);
731 
732 		cl = cl->parent;
733 	}
734 }
735 
736 /**
737  * htb_do_events - make mode changes to classes at the level
738  * @q: the priority event queue
739  * @level: which wait_pq in 'q->hlevel'
740  * @start: start jiffies
741  *
742  * Scans event queue for pending events and applies them. Returns time of
743  * next pending event (0 for no event in pq, q->now for too many events).
744  * Note: Applied are events whose have cl->pq_key <= q->now.
745  */
htb_do_events(struct htb_sched * q,const int level,unsigned long start)746 static s64 htb_do_events(struct htb_sched *q, const int level,
747 			 unsigned long start)
748 {
749 	/* don't run for longer than 2 jiffies; 2 is used instead of
750 	 * 1 to simplify things when jiffy is going to be incremented
751 	 * too soon
752 	 */
753 	unsigned long stop_at = start + 2;
754 	struct rb_root *wait_pq = &q->hlevel[level].wait_pq;
755 
756 	while (time_before(jiffies, stop_at)) {
757 		struct htb_class *cl;
758 		s64 diff;
759 		struct rb_node *p = rb_first(wait_pq);
760 
761 		if (!p)
762 			return 0;
763 
764 		cl = rb_entry(p, struct htb_class, pq_node);
765 		if (cl->pq_key > q->now)
766 			return cl->pq_key;
767 
768 		htb_safe_rb_erase(p, wait_pq);
769 		diff = min_t(s64, q->now - cl->t_c, cl->mbuffer);
770 		htb_change_class_mode(q, cl, &diff);
771 		if (cl->cmode != HTB_CAN_SEND)
772 			htb_add_to_wait_tree(q, cl, diff);
773 	}
774 
775 	/* too much load - let's continue after a break for scheduling */
776 	if (!(q->warned & HTB_WARN_TOOMANYEVENTS)) {
777 		pr_warn("htb: too many events!\n");
778 		q->warned |= HTB_WARN_TOOMANYEVENTS;
779 	}
780 
781 	return q->now;
782 }
783 
784 /* Returns class->node+prio from id-tree where classe's id is >= id. NULL
785  * is no such one exists.
786  */
htb_id_find_next_upper(int prio,struct rb_node * n,u32 id)787 static struct rb_node *htb_id_find_next_upper(int prio, struct rb_node *n,
788 					      u32 id)
789 {
790 	struct rb_node *r = NULL;
791 	while (n) {
792 		struct htb_class *cl =
793 		    rb_entry(n, struct htb_class, node[prio]);
794 
795 		if (id > cl->common.classid) {
796 			n = n->rb_right;
797 		} else if (id < cl->common.classid) {
798 			r = n;
799 			n = n->rb_left;
800 		} else {
801 			return n;
802 		}
803 	}
804 	return r;
805 }
806 
807 /**
808  * htb_lookup_leaf - returns next leaf class in DRR order
809  * @hprio: the current one
810  * @prio: which prio in class
811  *
812  * Find leaf where current feed pointers points to.
813  */
htb_lookup_leaf(struct htb_prio * hprio,const int prio)814 static struct htb_class *htb_lookup_leaf(struct htb_prio *hprio, const int prio)
815 {
816 	int i;
817 	struct {
818 		struct rb_node *root;
819 		struct rb_node **pptr;
820 		u32 *pid;
821 	} stk[TC_HTB_MAXDEPTH], *sp = stk;
822 
823 	BUG_ON(!hprio->row.rb_node);
824 	sp->root = hprio->row.rb_node;
825 	sp->pptr = &hprio->ptr;
826 	sp->pid = &hprio->last_ptr_id;
827 
828 	for (i = 0; i < 65535; i++) {
829 		if (!*sp->pptr && *sp->pid) {
830 			/* ptr was invalidated but id is valid - try to recover
831 			 * the original or next ptr
832 			 */
833 			*sp->pptr =
834 			    htb_id_find_next_upper(prio, sp->root, *sp->pid);
835 		}
836 		*sp->pid = 0;	/* ptr is valid now so that remove this hint as it
837 				 * can become out of date quickly
838 				 */
839 		if (!*sp->pptr) {	/* we are at right end; rewind & go up */
840 			*sp->pptr = sp->root;
841 			while ((*sp->pptr)->rb_left)
842 				*sp->pptr = (*sp->pptr)->rb_left;
843 			if (sp > stk) {
844 				sp--;
845 				if (!*sp->pptr) {
846 					WARN_ON(1);
847 					return NULL;
848 				}
849 				htb_next_rb_node(sp->pptr);
850 			}
851 		} else {
852 			struct htb_class *cl;
853 			struct htb_prio *clp;
854 
855 			cl = rb_entry(*sp->pptr, struct htb_class, node[prio]);
856 			if (!cl->level)
857 				return cl;
858 			clp = &cl->inner.clprio[prio];
859 			(++sp)->root = clp->feed.rb_node;
860 			sp->pptr = &clp->ptr;
861 			sp->pid = &clp->last_ptr_id;
862 		}
863 	}
864 	WARN_ON(1);
865 	return NULL;
866 }
867 
868 /* dequeues packet at given priority and level; call only if
869  * you are sure that there is active class at prio/level
870  */
htb_dequeue_tree(struct htb_sched * q,const int prio,const int level)871 static struct sk_buff *htb_dequeue_tree(struct htb_sched *q, const int prio,
872 					const int level)
873 {
874 	struct sk_buff *skb = NULL;
875 	struct htb_class *cl, *start;
876 	struct htb_level *hlevel = &q->hlevel[level];
877 	struct htb_prio *hprio = &hlevel->hprio[prio];
878 
879 	/* look initial class up in the row */
880 	start = cl = htb_lookup_leaf(hprio, prio);
881 
882 	do {
883 next:
884 		if (unlikely(!cl))
885 			return NULL;
886 
887 		/* class can be empty - it is unlikely but can be true if leaf
888 		 * qdisc drops packets in enqueue routine or if someone used
889 		 * graft operation on the leaf since last dequeue;
890 		 * simply deactivate and skip such class
891 		 */
892 		if (unlikely(cl->leaf.q->q.qlen == 0)) {
893 			struct htb_class *next;
894 			htb_deactivate(q, cl);
895 
896 			/* row/level might become empty */
897 			if ((q->row_mask[level] & (1 << prio)) == 0)
898 				return NULL;
899 
900 			next = htb_lookup_leaf(hprio, prio);
901 
902 			if (cl == start)	/* fix start if we just deleted it */
903 				start = next;
904 			cl = next;
905 			goto next;
906 		}
907 
908 		skb = cl->leaf.q->dequeue(cl->leaf.q);
909 		if (likely(skb != NULL))
910 			break;
911 
912 		qdisc_warn_nonwc("htb", cl->leaf.q);
913 		htb_next_rb_node(level ? &cl->parent->inner.clprio[prio].ptr:
914 					 &q->hlevel[0].hprio[prio].ptr);
915 		cl = htb_lookup_leaf(hprio, prio);
916 
917 	} while (cl != start);
918 
919 	if (likely(skb != NULL)) {
920 		bstats_update(&cl->bstats, skb);
921 		cl->leaf.deficit[level] -= qdisc_pkt_len(skb);
922 		if (cl->leaf.deficit[level] < 0) {
923 			cl->leaf.deficit[level] += cl->quantum;
924 			htb_next_rb_node(level ? &cl->parent->inner.clprio[prio].ptr :
925 						 &q->hlevel[0].hprio[prio].ptr);
926 		}
927 		/* this used to be after charge_class but this constelation
928 		 * gives us slightly better performance
929 		 */
930 		if (!cl->leaf.q->q.qlen)
931 			htb_deactivate(q, cl);
932 		htb_charge_class(q, cl, level, skb);
933 	}
934 	return skb;
935 }
936 
htb_dequeue(struct Qdisc * sch)937 static struct sk_buff *htb_dequeue(struct Qdisc *sch)
938 {
939 	struct sk_buff *skb;
940 	struct htb_sched *q = qdisc_priv(sch);
941 	int level;
942 	s64 next_event;
943 	unsigned long start_at;
944 
945 	/* try to dequeue direct packets as high prio (!) to minimize cpu work */
946 	skb = __qdisc_dequeue_head(&q->direct_queue);
947 	if (skb != NULL) {
948 ok:
949 		qdisc_bstats_update(sch, skb);
950 		qdisc_qstats_backlog_dec(sch, skb);
951 		sch->q.qlen--;
952 		return skb;
953 	}
954 
955 	if (!sch->q.qlen)
956 		goto fin;
957 	q->now = ktime_get_ns();
958 	start_at = jiffies;
959 
960 	next_event = q->now + 5LLU * NSEC_PER_SEC;
961 
962 	for (level = 0; level < TC_HTB_MAXDEPTH; level++) {
963 		/* common case optimization - skip event handler quickly */
964 		int m;
965 		s64 event = q->near_ev_cache[level];
966 
967 		if (q->now >= event) {
968 			event = htb_do_events(q, level, start_at);
969 			if (!event)
970 				event = q->now + NSEC_PER_SEC;
971 			q->near_ev_cache[level] = event;
972 		}
973 
974 		if (next_event > event)
975 			next_event = event;
976 
977 		m = ~q->row_mask[level];
978 		while (m != (int)(-1)) {
979 			int prio = ffz(m);
980 
981 			m |= 1 << prio;
982 			skb = htb_dequeue_tree(q, prio, level);
983 			if (likely(skb != NULL))
984 				goto ok;
985 		}
986 	}
987 	if (likely(next_event > q->now))
988 		qdisc_watchdog_schedule_ns(&q->watchdog, next_event);
989 	else
990 		schedule_work(&q->work);
991 fin:
992 	return skb;
993 }
994 
995 /* reset all classes */
996 /* always caled under BH & queue lock */
htb_reset(struct Qdisc * sch)997 static void htb_reset(struct Qdisc *sch)
998 {
999 	struct htb_sched *q = qdisc_priv(sch);
1000 	struct htb_class *cl;
1001 	unsigned int i;
1002 
1003 	for (i = 0; i < q->clhash.hashsize; i++) {
1004 		hlist_for_each_entry(cl, &q->clhash.hash[i], common.hnode) {
1005 			if (cl->level)
1006 				memset(&cl->inner, 0, sizeof(cl->inner));
1007 			else {
1008 				if (cl->leaf.q && !q->offload)
1009 					qdisc_reset(cl->leaf.q);
1010 			}
1011 			cl->prio_activity = 0;
1012 			cl->cmode = HTB_CAN_SEND;
1013 		}
1014 	}
1015 	qdisc_watchdog_cancel(&q->watchdog);
1016 	__qdisc_reset_queue(&q->direct_queue);
1017 	memset(q->hlevel, 0, sizeof(q->hlevel));
1018 	memset(q->row_mask, 0, sizeof(q->row_mask));
1019 }
1020 
1021 static const struct nla_policy htb_policy[TCA_HTB_MAX + 1] = {
1022 	[TCA_HTB_PARMS]	= { .len = sizeof(struct tc_htb_opt) },
1023 	[TCA_HTB_INIT]	= { .len = sizeof(struct tc_htb_glob) },
1024 	[TCA_HTB_CTAB]	= { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
1025 	[TCA_HTB_RTAB]	= { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
1026 	[TCA_HTB_DIRECT_QLEN] = { .type = NLA_U32 },
1027 	[TCA_HTB_RATE64] = { .type = NLA_U64 },
1028 	[TCA_HTB_CEIL64] = { .type = NLA_U64 },
1029 	[TCA_HTB_OFFLOAD] = { .type = NLA_FLAG },
1030 };
1031 
htb_work_func(struct work_struct * work)1032 static void htb_work_func(struct work_struct *work)
1033 {
1034 	struct htb_sched *q = container_of(work, struct htb_sched, work);
1035 	struct Qdisc *sch = q->watchdog.qdisc;
1036 
1037 	rcu_read_lock();
1038 	__netif_schedule(qdisc_root(sch));
1039 	rcu_read_unlock();
1040 }
1041 
htb_offload(struct net_device * dev,struct tc_htb_qopt_offload * opt)1042 static int htb_offload(struct net_device *dev, struct tc_htb_qopt_offload *opt)
1043 {
1044 	return dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_HTB, opt);
1045 }
1046 
htb_init(struct Qdisc * sch,struct nlattr * opt,struct netlink_ext_ack * extack)1047 static int htb_init(struct Qdisc *sch, struct nlattr *opt,
1048 		    struct netlink_ext_ack *extack)
1049 {
1050 	struct net_device *dev = qdisc_dev(sch);
1051 	struct tc_htb_qopt_offload offload_opt;
1052 	struct htb_sched *q = qdisc_priv(sch);
1053 	struct nlattr *tb[TCA_HTB_MAX + 1];
1054 	struct tc_htb_glob *gopt;
1055 	unsigned int ntx;
1056 	bool offload;
1057 	int err;
1058 
1059 	qdisc_watchdog_init(&q->watchdog, sch);
1060 	INIT_WORK(&q->work, htb_work_func);
1061 
1062 	if (!opt)
1063 		return -EINVAL;
1064 
1065 	err = tcf_block_get(&q->block, &q->filter_list, sch, extack);
1066 	if (err)
1067 		return err;
1068 
1069 	err = nla_parse_nested_deprecated(tb, TCA_HTB_MAX, opt, htb_policy,
1070 					  NULL);
1071 	if (err < 0)
1072 		return err;
1073 
1074 	if (!tb[TCA_HTB_INIT])
1075 		return -EINVAL;
1076 
1077 	gopt = nla_data(tb[TCA_HTB_INIT]);
1078 	if (gopt->version != HTB_VER >> 16)
1079 		return -EINVAL;
1080 
1081 	offload = nla_get_flag(tb[TCA_HTB_OFFLOAD]);
1082 
1083 	if (offload) {
1084 		if (sch->parent != TC_H_ROOT) {
1085 			NL_SET_ERR_MSG(extack, "HTB must be the root qdisc to use offload");
1086 			return -EOPNOTSUPP;
1087 		}
1088 
1089 		if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc) {
1090 			NL_SET_ERR_MSG(extack, "hw-tc-offload ethtool feature flag must be on");
1091 			return -EOPNOTSUPP;
1092 		}
1093 
1094 		q->num_direct_qdiscs = dev->real_num_tx_queues;
1095 		q->direct_qdiscs = kcalloc(q->num_direct_qdiscs,
1096 					   sizeof(*q->direct_qdiscs),
1097 					   GFP_KERNEL);
1098 		if (!q->direct_qdiscs)
1099 			return -ENOMEM;
1100 	}
1101 
1102 	err = qdisc_class_hash_init(&q->clhash);
1103 	if (err < 0)
1104 		return err;
1105 
1106 	if (tb[TCA_HTB_DIRECT_QLEN])
1107 		q->direct_qlen = nla_get_u32(tb[TCA_HTB_DIRECT_QLEN]);
1108 	else
1109 		q->direct_qlen = qdisc_dev(sch)->tx_queue_len;
1110 
1111 	if ((q->rate2quantum = gopt->rate2quantum) < 1)
1112 		q->rate2quantum = 1;
1113 	q->defcls = gopt->defcls;
1114 
1115 	if (!offload)
1116 		return 0;
1117 
1118 	for (ntx = 0; ntx < q->num_direct_qdiscs; ntx++) {
1119 		struct netdev_queue *dev_queue = netdev_get_tx_queue(dev, ntx);
1120 		struct Qdisc *qdisc;
1121 
1122 		qdisc = qdisc_create_dflt(dev_queue, &pfifo_qdisc_ops,
1123 					  TC_H_MAKE(sch->handle, 0), extack);
1124 		if (!qdisc) {
1125 			return -ENOMEM;
1126 		}
1127 
1128 		q->direct_qdiscs[ntx] = qdisc;
1129 		qdisc->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT;
1130 	}
1131 
1132 	sch->flags |= TCQ_F_MQROOT;
1133 
1134 	offload_opt = (struct tc_htb_qopt_offload) {
1135 		.command = TC_HTB_CREATE,
1136 		.parent_classid = TC_H_MAJ(sch->handle) >> 16,
1137 		.classid = TC_H_MIN(q->defcls),
1138 		.extack = extack,
1139 	};
1140 	err = htb_offload(dev, &offload_opt);
1141 	if (err)
1142 		return err;
1143 
1144 	/* Defer this assignment, so that htb_destroy skips offload-related
1145 	 * parts (especially calling ndo_setup_tc) on errors.
1146 	 */
1147 	q->offload = true;
1148 
1149 	return 0;
1150 }
1151 
htb_attach_offload(struct Qdisc * sch)1152 static void htb_attach_offload(struct Qdisc *sch)
1153 {
1154 	struct net_device *dev = qdisc_dev(sch);
1155 	struct htb_sched *q = qdisc_priv(sch);
1156 	unsigned int ntx;
1157 
1158 	for (ntx = 0; ntx < q->num_direct_qdiscs; ntx++) {
1159 		struct Qdisc *old, *qdisc = q->direct_qdiscs[ntx];
1160 
1161 		old = dev_graft_qdisc(qdisc->dev_queue, qdisc);
1162 		qdisc_put(old);
1163 		qdisc_hash_add(qdisc, false);
1164 	}
1165 	for (ntx = q->num_direct_qdiscs; ntx < dev->num_tx_queues; ntx++) {
1166 		struct netdev_queue *dev_queue = netdev_get_tx_queue(dev, ntx);
1167 		struct Qdisc *old = dev_graft_qdisc(dev_queue, NULL);
1168 
1169 		qdisc_put(old);
1170 	}
1171 
1172 	kfree(q->direct_qdiscs);
1173 	q->direct_qdiscs = NULL;
1174 }
1175 
htb_attach_software(struct Qdisc * sch)1176 static void htb_attach_software(struct Qdisc *sch)
1177 {
1178 	struct net_device *dev = qdisc_dev(sch);
1179 	unsigned int ntx;
1180 
1181 	/* Resemble qdisc_graft behavior. */
1182 	for (ntx = 0; ntx < dev->num_tx_queues; ntx++) {
1183 		struct netdev_queue *dev_queue = netdev_get_tx_queue(dev, ntx);
1184 		struct Qdisc *old = dev_graft_qdisc(dev_queue, sch);
1185 
1186 		qdisc_refcount_inc(sch);
1187 
1188 		qdisc_put(old);
1189 	}
1190 }
1191 
htb_attach(struct Qdisc * sch)1192 static void htb_attach(struct Qdisc *sch)
1193 {
1194 	struct htb_sched *q = qdisc_priv(sch);
1195 
1196 	if (q->offload)
1197 		htb_attach_offload(sch);
1198 	else
1199 		htb_attach_software(sch);
1200 }
1201 
htb_dump(struct Qdisc * sch,struct sk_buff * skb)1202 static int htb_dump(struct Qdisc *sch, struct sk_buff *skb)
1203 {
1204 	struct htb_sched *q = qdisc_priv(sch);
1205 	struct nlattr *nest;
1206 	struct tc_htb_glob gopt;
1207 
1208 	if (q->offload)
1209 		sch->flags |= TCQ_F_OFFLOADED;
1210 	else
1211 		sch->flags &= ~TCQ_F_OFFLOADED;
1212 
1213 	sch->qstats.overlimits = q->overlimits;
1214 	/* Its safe to not acquire qdisc lock. As we hold RTNL,
1215 	 * no change can happen on the qdisc parameters.
1216 	 */
1217 
1218 	gopt.direct_pkts = q->direct_pkts;
1219 	gopt.version = HTB_VER;
1220 	gopt.rate2quantum = q->rate2quantum;
1221 	gopt.defcls = q->defcls;
1222 	gopt.debug = 0;
1223 
1224 	nest = nla_nest_start_noflag(skb, TCA_OPTIONS);
1225 	if (nest == NULL)
1226 		goto nla_put_failure;
1227 	if (nla_put(skb, TCA_HTB_INIT, sizeof(gopt), &gopt) ||
1228 	    nla_put_u32(skb, TCA_HTB_DIRECT_QLEN, q->direct_qlen))
1229 		goto nla_put_failure;
1230 	if (q->offload && nla_put_flag(skb, TCA_HTB_OFFLOAD))
1231 		goto nla_put_failure;
1232 
1233 	return nla_nest_end(skb, nest);
1234 
1235 nla_put_failure:
1236 	nla_nest_cancel(skb, nest);
1237 	return -1;
1238 }
1239 
htb_dump_class(struct Qdisc * sch,unsigned long arg,struct sk_buff * skb,struct tcmsg * tcm)1240 static int htb_dump_class(struct Qdisc *sch, unsigned long arg,
1241 			  struct sk_buff *skb, struct tcmsg *tcm)
1242 {
1243 	struct htb_class *cl = (struct htb_class *)arg;
1244 	struct htb_sched *q = qdisc_priv(sch);
1245 	struct nlattr *nest;
1246 	struct tc_htb_opt opt;
1247 
1248 	/* Its safe to not acquire qdisc lock. As we hold RTNL,
1249 	 * no change can happen on the class parameters.
1250 	 */
1251 	tcm->tcm_parent = cl->parent ? cl->parent->common.classid : TC_H_ROOT;
1252 	tcm->tcm_handle = cl->common.classid;
1253 	if (!cl->level && cl->leaf.q)
1254 		tcm->tcm_info = cl->leaf.q->handle;
1255 
1256 	nest = nla_nest_start_noflag(skb, TCA_OPTIONS);
1257 	if (nest == NULL)
1258 		goto nla_put_failure;
1259 
1260 	memset(&opt, 0, sizeof(opt));
1261 
1262 	psched_ratecfg_getrate(&opt.rate, &cl->rate);
1263 	opt.buffer = PSCHED_NS2TICKS(cl->buffer);
1264 	psched_ratecfg_getrate(&opt.ceil, &cl->ceil);
1265 	opt.cbuffer = PSCHED_NS2TICKS(cl->cbuffer);
1266 	opt.quantum = cl->quantum;
1267 	opt.prio = cl->prio;
1268 	opt.level = cl->level;
1269 	if (nla_put(skb, TCA_HTB_PARMS, sizeof(opt), &opt))
1270 		goto nla_put_failure;
1271 	if (q->offload && nla_put_flag(skb, TCA_HTB_OFFLOAD))
1272 		goto nla_put_failure;
1273 	if ((cl->rate.rate_bytes_ps >= (1ULL << 32)) &&
1274 	    nla_put_u64_64bit(skb, TCA_HTB_RATE64, cl->rate.rate_bytes_ps,
1275 			      TCA_HTB_PAD))
1276 		goto nla_put_failure;
1277 	if ((cl->ceil.rate_bytes_ps >= (1ULL << 32)) &&
1278 	    nla_put_u64_64bit(skb, TCA_HTB_CEIL64, cl->ceil.rate_bytes_ps,
1279 			      TCA_HTB_PAD))
1280 		goto nla_put_failure;
1281 
1282 	return nla_nest_end(skb, nest);
1283 
1284 nla_put_failure:
1285 	nla_nest_cancel(skb, nest);
1286 	return -1;
1287 }
1288 
htb_offload_aggregate_stats(struct htb_sched * q,struct htb_class * cl)1289 static void htb_offload_aggregate_stats(struct htb_sched *q,
1290 					struct htb_class *cl)
1291 {
1292 	u64 bytes = 0, packets = 0;
1293 	struct htb_class *c;
1294 	unsigned int i;
1295 
1296 	gnet_stats_basic_sync_init(&cl->bstats);
1297 
1298 	for (i = 0; i < q->clhash.hashsize; i++) {
1299 		hlist_for_each_entry(c, &q->clhash.hash[i], common.hnode) {
1300 			struct htb_class *p = c;
1301 
1302 			while (p && p->level < cl->level)
1303 				p = p->parent;
1304 
1305 			if (p != cl)
1306 				continue;
1307 
1308 			bytes += u64_stats_read(&c->bstats_bias.bytes);
1309 			packets += u64_stats_read(&c->bstats_bias.packets);
1310 			if (c->level == 0) {
1311 				bytes += u64_stats_read(&c->leaf.q->bstats.bytes);
1312 				packets += u64_stats_read(&c->leaf.q->bstats.packets);
1313 			}
1314 		}
1315 	}
1316 	_bstats_update(&cl->bstats, bytes, packets);
1317 }
1318 
1319 static int
htb_dump_class_stats(struct Qdisc * sch,unsigned long arg,struct gnet_dump * d)1320 htb_dump_class_stats(struct Qdisc *sch, unsigned long arg, struct gnet_dump *d)
1321 {
1322 	struct htb_class *cl = (struct htb_class *)arg;
1323 	struct htb_sched *q = qdisc_priv(sch);
1324 	struct gnet_stats_queue qs = {
1325 		.drops = cl->drops,
1326 		.overlimits = cl->overlimits,
1327 	};
1328 	__u32 qlen = 0;
1329 
1330 	if (!cl->level && cl->leaf.q)
1331 		qdisc_qstats_qlen_backlog(cl->leaf.q, &qlen, &qs.backlog);
1332 
1333 	cl->xstats.tokens = clamp_t(s64, PSCHED_NS2TICKS(cl->tokens),
1334 				    INT_MIN, INT_MAX);
1335 	cl->xstats.ctokens = clamp_t(s64, PSCHED_NS2TICKS(cl->ctokens),
1336 				     INT_MIN, INT_MAX);
1337 
1338 	if (q->offload) {
1339 		if (!cl->level) {
1340 			if (cl->leaf.q)
1341 				cl->bstats = cl->leaf.q->bstats;
1342 			else
1343 				gnet_stats_basic_sync_init(&cl->bstats);
1344 			_bstats_update(&cl->bstats,
1345 				       u64_stats_read(&cl->bstats_bias.bytes),
1346 				       u64_stats_read(&cl->bstats_bias.packets));
1347 		} else {
1348 			htb_offload_aggregate_stats(q, cl);
1349 		}
1350 	}
1351 
1352 	if (gnet_stats_copy_basic(d, NULL, &cl->bstats, true) < 0 ||
1353 	    gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 ||
1354 	    gnet_stats_copy_queue(d, NULL, &qs, qlen) < 0)
1355 		return -1;
1356 
1357 	return gnet_stats_copy_app(d, &cl->xstats, sizeof(cl->xstats));
1358 }
1359 
1360 static struct netdev_queue *
htb_select_queue(struct Qdisc * sch,struct tcmsg * tcm)1361 htb_select_queue(struct Qdisc *sch, struct tcmsg *tcm)
1362 {
1363 	struct net_device *dev = qdisc_dev(sch);
1364 	struct tc_htb_qopt_offload offload_opt;
1365 	struct htb_sched *q = qdisc_priv(sch);
1366 	int err;
1367 
1368 	if (!q->offload)
1369 		return sch->dev_queue;
1370 
1371 	offload_opt = (struct tc_htb_qopt_offload) {
1372 		.command = TC_HTB_LEAF_QUERY_QUEUE,
1373 		.classid = TC_H_MIN(tcm->tcm_parent),
1374 	};
1375 	err = htb_offload(dev, &offload_opt);
1376 	if (err || offload_opt.qid >= dev->num_tx_queues)
1377 		return NULL;
1378 	return netdev_get_tx_queue(dev, offload_opt.qid);
1379 }
1380 
1381 static struct Qdisc *
htb_graft_helper(struct netdev_queue * dev_queue,struct Qdisc * new_q)1382 htb_graft_helper(struct netdev_queue *dev_queue, struct Qdisc *new_q)
1383 {
1384 	struct net_device *dev = dev_queue->dev;
1385 	struct Qdisc *old_q;
1386 
1387 	if (dev->flags & IFF_UP)
1388 		dev_deactivate(dev);
1389 	old_q = dev_graft_qdisc(dev_queue, new_q);
1390 	if (new_q)
1391 		new_q->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT;
1392 	if (dev->flags & IFF_UP)
1393 		dev_activate(dev);
1394 
1395 	return old_q;
1396 }
1397 
htb_offload_get_queue(struct htb_class * cl)1398 static struct netdev_queue *htb_offload_get_queue(struct htb_class *cl)
1399 {
1400 	struct netdev_queue *queue;
1401 
1402 	queue = cl->leaf.offload_queue;
1403 	if (!(cl->leaf.q->flags & TCQ_F_BUILTIN))
1404 		WARN_ON(cl->leaf.q->dev_queue != queue);
1405 
1406 	return queue;
1407 }
1408 
htb_offload_move_qdisc(struct Qdisc * sch,struct htb_class * cl_old,struct htb_class * cl_new,bool destroying)1409 static void htb_offload_move_qdisc(struct Qdisc *sch, struct htb_class *cl_old,
1410 				   struct htb_class *cl_new, bool destroying)
1411 {
1412 	struct netdev_queue *queue_old, *queue_new;
1413 	struct net_device *dev = qdisc_dev(sch);
1414 
1415 	queue_old = htb_offload_get_queue(cl_old);
1416 	queue_new = htb_offload_get_queue(cl_new);
1417 
1418 	if (!destroying) {
1419 		struct Qdisc *qdisc;
1420 
1421 		if (dev->flags & IFF_UP)
1422 			dev_deactivate(dev);
1423 		qdisc = dev_graft_qdisc(queue_old, NULL);
1424 		WARN_ON(qdisc != cl_old->leaf.q);
1425 	}
1426 
1427 	if (!(cl_old->leaf.q->flags & TCQ_F_BUILTIN))
1428 		cl_old->leaf.q->dev_queue = queue_new;
1429 	cl_old->leaf.offload_queue = queue_new;
1430 
1431 	if (!destroying) {
1432 		struct Qdisc *qdisc;
1433 
1434 		qdisc = dev_graft_qdisc(queue_new, cl_old->leaf.q);
1435 		if (dev->flags & IFF_UP)
1436 			dev_activate(dev);
1437 		WARN_ON(!(qdisc->flags & TCQ_F_BUILTIN));
1438 	}
1439 }
1440 
htb_graft(struct Qdisc * sch,unsigned long arg,struct Qdisc * new,struct Qdisc ** old,struct netlink_ext_ack * extack)1441 static int htb_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
1442 		     struct Qdisc **old, struct netlink_ext_ack *extack)
1443 {
1444 	struct netdev_queue *dev_queue = sch->dev_queue;
1445 	struct htb_class *cl = (struct htb_class *)arg;
1446 	struct htb_sched *q = qdisc_priv(sch);
1447 	struct Qdisc *old_q;
1448 
1449 	if (cl->level)
1450 		return -EINVAL;
1451 
1452 	if (q->offload)
1453 		dev_queue = htb_offload_get_queue(cl);
1454 
1455 	if (!new) {
1456 		new = qdisc_create_dflt(dev_queue, &pfifo_qdisc_ops,
1457 					cl->common.classid, extack);
1458 		if (!new)
1459 			return -ENOBUFS;
1460 	}
1461 
1462 	if (q->offload) {
1463 		/* One ref for cl->leaf.q, the other for dev_queue->qdisc. */
1464 		qdisc_refcount_inc(new);
1465 		old_q = htb_graft_helper(dev_queue, new);
1466 	}
1467 
1468 	*old = qdisc_replace(sch, new, &cl->leaf.q);
1469 
1470 	if (q->offload) {
1471 		WARN_ON(old_q != *old);
1472 		qdisc_put(old_q);
1473 	}
1474 
1475 	return 0;
1476 }
1477 
htb_leaf(struct Qdisc * sch,unsigned long arg)1478 static struct Qdisc *htb_leaf(struct Qdisc *sch, unsigned long arg)
1479 {
1480 	struct htb_class *cl = (struct htb_class *)arg;
1481 	return !cl->level ? cl->leaf.q : NULL;
1482 }
1483 
htb_qlen_notify(struct Qdisc * sch,unsigned long arg)1484 static void htb_qlen_notify(struct Qdisc *sch, unsigned long arg)
1485 {
1486 	struct htb_class *cl = (struct htb_class *)arg;
1487 
1488 	if (!cl->prio_activity)
1489 		return;
1490 	htb_deactivate(qdisc_priv(sch), cl);
1491 }
1492 
htb_parent_last_child(struct htb_class * cl)1493 static inline int htb_parent_last_child(struct htb_class *cl)
1494 {
1495 	if (!cl->parent)
1496 		/* the root class */
1497 		return 0;
1498 	if (cl->parent->children > 1)
1499 		/* not the last child */
1500 		return 0;
1501 	return 1;
1502 }
1503 
htb_parent_to_leaf(struct Qdisc * sch,struct htb_class * cl,struct Qdisc * new_q)1504 static void htb_parent_to_leaf(struct Qdisc *sch, struct htb_class *cl,
1505 			       struct Qdisc *new_q)
1506 {
1507 	struct htb_sched *q = qdisc_priv(sch);
1508 	struct htb_class *parent = cl->parent;
1509 
1510 	WARN_ON(cl->level || !cl->leaf.q || cl->prio_activity);
1511 
1512 	if (parent->cmode != HTB_CAN_SEND)
1513 		htb_safe_rb_erase(&parent->pq_node,
1514 				  &q->hlevel[parent->level].wait_pq);
1515 
1516 	parent->level = 0;
1517 	memset(&parent->inner, 0, sizeof(parent->inner));
1518 	parent->leaf.q = new_q ? new_q : &noop_qdisc;
1519 	parent->tokens = parent->buffer;
1520 	parent->ctokens = parent->cbuffer;
1521 	parent->t_c = ktime_get_ns();
1522 	parent->cmode = HTB_CAN_SEND;
1523 	if (q->offload)
1524 		parent->leaf.offload_queue = cl->leaf.offload_queue;
1525 }
1526 
htb_parent_to_leaf_offload(struct Qdisc * sch,struct netdev_queue * dev_queue,struct Qdisc * new_q)1527 static void htb_parent_to_leaf_offload(struct Qdisc *sch,
1528 				       struct netdev_queue *dev_queue,
1529 				       struct Qdisc *new_q)
1530 {
1531 	struct Qdisc *old_q;
1532 
1533 	/* One ref for cl->leaf.q, the other for dev_queue->qdisc. */
1534 	if (new_q)
1535 		qdisc_refcount_inc(new_q);
1536 	old_q = htb_graft_helper(dev_queue, new_q);
1537 	WARN_ON(!(old_q->flags & TCQ_F_BUILTIN));
1538 }
1539 
htb_destroy_class_offload(struct Qdisc * sch,struct htb_class * cl,bool last_child,bool destroying,struct netlink_ext_ack * extack)1540 static int htb_destroy_class_offload(struct Qdisc *sch, struct htb_class *cl,
1541 				     bool last_child, bool destroying,
1542 				     struct netlink_ext_ack *extack)
1543 {
1544 	struct tc_htb_qopt_offload offload_opt;
1545 	struct netdev_queue *dev_queue;
1546 	struct Qdisc *q = cl->leaf.q;
1547 	struct Qdisc *old;
1548 	int err;
1549 
1550 	if (cl->level)
1551 		return -EINVAL;
1552 
1553 	WARN_ON(!q);
1554 	dev_queue = htb_offload_get_queue(cl);
1555 	/* When destroying, caller qdisc_graft grafts the new qdisc and invokes
1556 	 * qdisc_put for the qdisc being destroyed. htb_destroy_class_offload
1557 	 * does not need to graft or qdisc_put the qdisc being destroyed.
1558 	 */
1559 	if (!destroying) {
1560 		old = htb_graft_helper(dev_queue, NULL);
1561 		/* Last qdisc grafted should be the same as cl->leaf.q when
1562 		 * calling htb_delete.
1563 		 */
1564 		WARN_ON(old != q);
1565 	}
1566 
1567 	if (cl->parent) {
1568 		_bstats_update(&cl->parent->bstats_bias,
1569 			       u64_stats_read(&q->bstats.bytes),
1570 			       u64_stats_read(&q->bstats.packets));
1571 	}
1572 
1573 	offload_opt = (struct tc_htb_qopt_offload) {
1574 		.command = !last_child ? TC_HTB_LEAF_DEL :
1575 			   destroying ? TC_HTB_LEAF_DEL_LAST_FORCE :
1576 			   TC_HTB_LEAF_DEL_LAST,
1577 		.classid = cl->common.classid,
1578 		.extack = extack,
1579 	};
1580 	err = htb_offload(qdisc_dev(sch), &offload_opt);
1581 
1582 	if (!destroying) {
1583 		if (!err)
1584 			qdisc_put(old);
1585 		else
1586 			htb_graft_helper(dev_queue, old);
1587 	}
1588 
1589 	if (last_child)
1590 		return err;
1591 
1592 	if (!err && offload_opt.classid != TC_H_MIN(cl->common.classid)) {
1593 		u32 classid = TC_H_MAJ(sch->handle) |
1594 			      TC_H_MIN(offload_opt.classid);
1595 		struct htb_class *moved_cl = htb_find(classid, sch);
1596 
1597 		htb_offload_move_qdisc(sch, moved_cl, cl, destroying);
1598 	}
1599 
1600 	return err;
1601 }
1602 
htb_destroy_class(struct Qdisc * sch,struct htb_class * cl)1603 static void htb_destroy_class(struct Qdisc *sch, struct htb_class *cl)
1604 {
1605 	if (!cl->level) {
1606 		WARN_ON(!cl->leaf.q);
1607 		qdisc_put(cl->leaf.q);
1608 	}
1609 	gen_kill_estimator(&cl->rate_est);
1610 	tcf_block_put(cl->block);
1611 	kfree(cl);
1612 }
1613 
htb_destroy(struct Qdisc * sch)1614 static void htb_destroy(struct Qdisc *sch)
1615 {
1616 	struct net_device *dev = qdisc_dev(sch);
1617 	struct tc_htb_qopt_offload offload_opt;
1618 	struct htb_sched *q = qdisc_priv(sch);
1619 	struct hlist_node *next;
1620 	bool nonempty, changed;
1621 	struct htb_class *cl;
1622 	unsigned int i;
1623 
1624 	cancel_work_sync(&q->work);
1625 	qdisc_watchdog_cancel(&q->watchdog);
1626 	/* This line used to be after htb_destroy_class call below
1627 	 * and surprisingly it worked in 2.4. But it must precede it
1628 	 * because filter need its target class alive to be able to call
1629 	 * unbind_filter on it (without Oops).
1630 	 */
1631 	tcf_block_put(q->block);
1632 
1633 	for (i = 0; i < q->clhash.hashsize; i++) {
1634 		hlist_for_each_entry(cl, &q->clhash.hash[i], common.hnode) {
1635 			tcf_block_put(cl->block);
1636 			cl->block = NULL;
1637 		}
1638 	}
1639 
1640 	do {
1641 		nonempty = false;
1642 		changed = false;
1643 		for (i = 0; i < q->clhash.hashsize; i++) {
1644 			hlist_for_each_entry_safe(cl, next, &q->clhash.hash[i],
1645 						  common.hnode) {
1646 				bool last_child;
1647 
1648 				if (!q->offload) {
1649 					htb_destroy_class(sch, cl);
1650 					continue;
1651 				}
1652 
1653 				nonempty = true;
1654 
1655 				if (cl->level)
1656 					continue;
1657 
1658 				changed = true;
1659 
1660 				last_child = htb_parent_last_child(cl);
1661 				htb_destroy_class_offload(sch, cl, last_child,
1662 							  true, NULL);
1663 				qdisc_class_hash_remove(&q->clhash,
1664 							&cl->common);
1665 				if (cl->parent)
1666 					cl->parent->children--;
1667 				if (last_child)
1668 					htb_parent_to_leaf(sch, cl, NULL);
1669 				htb_destroy_class(sch, cl);
1670 			}
1671 		}
1672 	} while (changed);
1673 	WARN_ON(nonempty);
1674 
1675 	qdisc_class_hash_destroy(&q->clhash);
1676 	__qdisc_reset_queue(&q->direct_queue);
1677 
1678 	if (q->offload) {
1679 		offload_opt = (struct tc_htb_qopt_offload) {
1680 			.command = TC_HTB_DESTROY,
1681 		};
1682 		htb_offload(dev, &offload_opt);
1683 	}
1684 
1685 	if (!q->direct_qdiscs)
1686 		return;
1687 	for (i = 0; i < q->num_direct_qdiscs && q->direct_qdiscs[i]; i++)
1688 		qdisc_put(q->direct_qdiscs[i]);
1689 	kfree(q->direct_qdiscs);
1690 }
1691 
htb_delete(struct Qdisc * sch,unsigned long arg,struct netlink_ext_ack * extack)1692 static int htb_delete(struct Qdisc *sch, unsigned long arg,
1693 		      struct netlink_ext_ack *extack)
1694 {
1695 	struct htb_sched *q = qdisc_priv(sch);
1696 	struct htb_class *cl = (struct htb_class *)arg;
1697 	struct Qdisc *new_q = NULL;
1698 	int last_child = 0;
1699 	int err;
1700 
1701 	/* TODO: why don't allow to delete subtree ? references ? does
1702 	 * tc subsys guarantee us that in htb_destroy it holds no class
1703 	 * refs so that we can remove children safely there ?
1704 	 */
1705 	if (cl->children || qdisc_class_in_use(&cl->common)) {
1706 		NL_SET_ERR_MSG(extack, "HTB class in use");
1707 		return -EBUSY;
1708 	}
1709 
1710 	if (!cl->level && htb_parent_last_child(cl))
1711 		last_child = 1;
1712 
1713 	if (q->offload) {
1714 		err = htb_destroy_class_offload(sch, cl, last_child, false,
1715 						extack);
1716 		if (err)
1717 			return err;
1718 	}
1719 
1720 	if (last_child) {
1721 		struct netdev_queue *dev_queue = sch->dev_queue;
1722 
1723 		if (q->offload)
1724 			dev_queue = htb_offload_get_queue(cl);
1725 
1726 		new_q = qdisc_create_dflt(dev_queue, &pfifo_qdisc_ops,
1727 					  cl->parent->common.classid,
1728 					  NULL);
1729 		if (q->offload)
1730 			htb_parent_to_leaf_offload(sch, dev_queue, new_q);
1731 	}
1732 
1733 	sch_tree_lock(sch);
1734 
1735 	if (!cl->level)
1736 		qdisc_purge_queue(cl->leaf.q);
1737 
1738 	/* delete from hash and active; remainder in destroy_class */
1739 	qdisc_class_hash_remove(&q->clhash, &cl->common);
1740 	if (cl->parent)
1741 		cl->parent->children--;
1742 
1743 	if (cl->prio_activity)
1744 		htb_deactivate(q, cl);
1745 
1746 	if (cl->cmode != HTB_CAN_SEND)
1747 		htb_safe_rb_erase(&cl->pq_node,
1748 				  &q->hlevel[cl->level].wait_pq);
1749 
1750 	if (last_child)
1751 		htb_parent_to_leaf(sch, cl, new_q);
1752 
1753 	sch_tree_unlock(sch);
1754 
1755 	htb_destroy_class(sch, cl);
1756 	return 0;
1757 }
1758 
htb_change_class(struct Qdisc * sch,u32 classid,u32 parentid,struct nlattr ** tca,unsigned long * arg,struct netlink_ext_ack * extack)1759 static int htb_change_class(struct Qdisc *sch, u32 classid,
1760 			    u32 parentid, struct nlattr **tca,
1761 			    unsigned long *arg, struct netlink_ext_ack *extack)
1762 {
1763 	int err = -EINVAL;
1764 	struct htb_sched *q = qdisc_priv(sch);
1765 	struct htb_class *cl = (struct htb_class *)*arg, *parent;
1766 	struct tc_htb_qopt_offload offload_opt;
1767 	struct nlattr *opt = tca[TCA_OPTIONS];
1768 	struct nlattr *tb[TCA_HTB_MAX + 1];
1769 	struct Qdisc *parent_qdisc = NULL;
1770 	struct netdev_queue *dev_queue;
1771 	struct tc_htb_opt *hopt;
1772 	u64 rate64, ceil64;
1773 	int warn = 0;
1774 
1775 	/* extract all subattrs from opt attr */
1776 	if (!opt)
1777 		goto failure;
1778 
1779 	err = nla_parse_nested_deprecated(tb, TCA_HTB_MAX, opt, htb_policy,
1780 					  extack);
1781 	if (err < 0)
1782 		goto failure;
1783 
1784 	err = -EINVAL;
1785 	if (tb[TCA_HTB_PARMS] == NULL)
1786 		goto failure;
1787 
1788 	parent = parentid == TC_H_ROOT ? NULL : htb_find(parentid, sch);
1789 
1790 	hopt = nla_data(tb[TCA_HTB_PARMS]);
1791 	if (!hopt->rate.rate || !hopt->ceil.rate)
1792 		goto failure;
1793 
1794 	if (q->offload) {
1795 		/* Options not supported by the offload. */
1796 		if (hopt->rate.overhead || hopt->ceil.overhead) {
1797 			NL_SET_ERR_MSG(extack, "HTB offload doesn't support the overhead parameter");
1798 			goto failure;
1799 		}
1800 		if (hopt->rate.mpu || hopt->ceil.mpu) {
1801 			NL_SET_ERR_MSG(extack, "HTB offload doesn't support the mpu parameter");
1802 			goto failure;
1803 		}
1804 	}
1805 
1806 	/* Keeping backward compatible with rate_table based iproute2 tc */
1807 	if (hopt->rate.linklayer == TC_LINKLAYER_UNAWARE)
1808 		qdisc_put_rtab(qdisc_get_rtab(&hopt->rate, tb[TCA_HTB_RTAB],
1809 					      NULL));
1810 
1811 	if (hopt->ceil.linklayer == TC_LINKLAYER_UNAWARE)
1812 		qdisc_put_rtab(qdisc_get_rtab(&hopt->ceil, tb[TCA_HTB_CTAB],
1813 					      NULL));
1814 
1815 	rate64 = nla_get_u64_default(tb[TCA_HTB_RATE64], 0);
1816 	ceil64 = nla_get_u64_default(tb[TCA_HTB_CEIL64], 0);
1817 
1818 	if (!cl) {		/* new class */
1819 		struct net_device *dev = qdisc_dev(sch);
1820 		struct Qdisc *new_q, *old_q;
1821 		int prio;
1822 		struct {
1823 			struct nlattr		nla;
1824 			struct gnet_estimator	opt;
1825 		} est = {
1826 			.nla = {
1827 				.nla_len	= nla_attr_size(sizeof(est.opt)),
1828 				.nla_type	= TCA_RATE,
1829 			},
1830 			.opt = {
1831 				/* 4s interval, 16s averaging constant */
1832 				.interval	= 2,
1833 				.ewma_log	= 2,
1834 			},
1835 		};
1836 
1837 		/* check for valid classid */
1838 		if (!classid || TC_H_MAJ(classid ^ sch->handle) ||
1839 		    htb_find(classid, sch))
1840 			goto failure;
1841 
1842 		/* check maximal depth */
1843 		if (parent && parent->parent && parent->parent->level < 2) {
1844 			NL_SET_ERR_MSG_MOD(extack, "tree is too deep");
1845 			goto failure;
1846 		}
1847 		err = -ENOBUFS;
1848 		cl = kzalloc(sizeof(*cl), GFP_KERNEL);
1849 		if (!cl)
1850 			goto failure;
1851 
1852 		gnet_stats_basic_sync_init(&cl->bstats);
1853 		gnet_stats_basic_sync_init(&cl->bstats_bias);
1854 
1855 		err = tcf_block_get(&cl->block, &cl->filter_list, sch, extack);
1856 		if (err) {
1857 			kfree(cl);
1858 			goto failure;
1859 		}
1860 		if (htb_rate_est || tca[TCA_RATE]) {
1861 			err = gen_new_estimator(&cl->bstats, NULL,
1862 						&cl->rate_est,
1863 						NULL,
1864 						true,
1865 						tca[TCA_RATE] ? : &est.nla);
1866 			if (err)
1867 				goto err_block_put;
1868 		}
1869 
1870 		cl->children = 0;
1871 		RB_CLEAR_NODE(&cl->pq_node);
1872 
1873 		for (prio = 0; prio < TC_HTB_NUMPRIO; prio++)
1874 			RB_CLEAR_NODE(&cl->node[prio]);
1875 
1876 		cl->common.classid = classid;
1877 
1878 		/* Make sure nothing interrupts us in between of two
1879 		 * ndo_setup_tc calls.
1880 		 */
1881 		ASSERT_RTNL();
1882 
1883 		/* create leaf qdisc early because it uses kmalloc(GFP_KERNEL)
1884 		 * so that can't be used inside of sch_tree_lock
1885 		 * -- thanks to Karlis Peisenieks
1886 		 */
1887 		if (!q->offload) {
1888 			dev_queue = sch->dev_queue;
1889 		} else if (!(parent && !parent->level)) {
1890 			/* Assign a dev_queue to this classid. */
1891 			offload_opt = (struct tc_htb_qopt_offload) {
1892 				.command = TC_HTB_LEAF_ALLOC_QUEUE,
1893 				.classid = cl->common.classid,
1894 				.parent_classid = parent ?
1895 					TC_H_MIN(parent->common.classid) :
1896 					TC_HTB_CLASSID_ROOT,
1897 				.rate = max_t(u64, hopt->rate.rate, rate64),
1898 				.ceil = max_t(u64, hopt->ceil.rate, ceil64),
1899 				.prio = hopt->prio,
1900 				.quantum = hopt->quantum,
1901 				.extack = extack,
1902 			};
1903 			err = htb_offload(dev, &offload_opt);
1904 			if (err) {
1905 				NL_SET_ERR_MSG_WEAK(extack,
1906 						    "Failed to offload TC_HTB_LEAF_ALLOC_QUEUE");
1907 				goto err_kill_estimator;
1908 			}
1909 			dev_queue = netdev_get_tx_queue(dev, offload_opt.qid);
1910 		} else { /* First child. */
1911 			dev_queue = htb_offload_get_queue(parent);
1912 			old_q = htb_graft_helper(dev_queue, NULL);
1913 			WARN_ON(old_q != parent->leaf.q);
1914 			offload_opt = (struct tc_htb_qopt_offload) {
1915 				.command = TC_HTB_LEAF_TO_INNER,
1916 				.classid = cl->common.classid,
1917 				.parent_classid =
1918 					TC_H_MIN(parent->common.classid),
1919 				.rate = max_t(u64, hopt->rate.rate, rate64),
1920 				.ceil = max_t(u64, hopt->ceil.rate, ceil64),
1921 				.prio = hopt->prio,
1922 				.quantum = hopt->quantum,
1923 				.extack = extack,
1924 			};
1925 			err = htb_offload(dev, &offload_opt);
1926 			if (err) {
1927 				NL_SET_ERR_MSG_WEAK(extack,
1928 						    "Failed to offload TC_HTB_LEAF_TO_INNER");
1929 				htb_graft_helper(dev_queue, old_q);
1930 				goto err_kill_estimator;
1931 			}
1932 			_bstats_update(&parent->bstats_bias,
1933 				       u64_stats_read(&old_q->bstats.bytes),
1934 				       u64_stats_read(&old_q->bstats.packets));
1935 			qdisc_put(old_q);
1936 		}
1937 		new_q = qdisc_create_dflt(dev_queue, &pfifo_qdisc_ops,
1938 					  classid, NULL);
1939 		if (q->offload) {
1940 			/* One ref for cl->leaf.q, the other for dev_queue->qdisc. */
1941 			if (new_q)
1942 				qdisc_refcount_inc(new_q);
1943 			old_q = htb_graft_helper(dev_queue, new_q);
1944 			/* No qdisc_put needed. */
1945 			WARN_ON(!(old_q->flags & TCQ_F_BUILTIN));
1946 		}
1947 		sch_tree_lock(sch);
1948 		if (parent && !parent->level) {
1949 			/* turn parent into inner node */
1950 			qdisc_purge_queue(parent->leaf.q);
1951 			parent_qdisc = parent->leaf.q;
1952 			if (parent->prio_activity)
1953 				htb_deactivate(q, parent);
1954 
1955 			/* remove from evt list because of level change */
1956 			if (parent->cmode != HTB_CAN_SEND) {
1957 				htb_safe_rb_erase(&parent->pq_node, &q->hlevel[0].wait_pq);
1958 				parent->cmode = HTB_CAN_SEND;
1959 			}
1960 			parent->level = (parent->parent ? parent->parent->level
1961 					 : TC_HTB_MAXDEPTH) - 1;
1962 			memset(&parent->inner, 0, sizeof(parent->inner));
1963 		}
1964 
1965 		/* leaf (we) needs elementary qdisc */
1966 		cl->leaf.q = new_q ? new_q : &noop_qdisc;
1967 		if (q->offload)
1968 			cl->leaf.offload_queue = dev_queue;
1969 
1970 		cl->parent = parent;
1971 
1972 		/* set class to be in HTB_CAN_SEND state */
1973 		cl->tokens = PSCHED_TICKS2NS(hopt->buffer);
1974 		cl->ctokens = PSCHED_TICKS2NS(hopt->cbuffer);
1975 		cl->mbuffer = 60ULL * NSEC_PER_SEC;	/* 1min */
1976 		cl->t_c = ktime_get_ns();
1977 		cl->cmode = HTB_CAN_SEND;
1978 
1979 		/* attach to the hash list and parent's family */
1980 		qdisc_class_hash_insert(&q->clhash, &cl->common);
1981 		if (parent)
1982 			parent->children++;
1983 		if (cl->leaf.q != &noop_qdisc)
1984 			qdisc_hash_add(cl->leaf.q, true);
1985 	} else {
1986 		if (tca[TCA_RATE]) {
1987 			err = gen_replace_estimator(&cl->bstats, NULL,
1988 						    &cl->rate_est,
1989 						    NULL,
1990 						    true,
1991 						    tca[TCA_RATE]);
1992 			if (err)
1993 				return err;
1994 		}
1995 
1996 		if (q->offload) {
1997 			struct net_device *dev = qdisc_dev(sch);
1998 
1999 			offload_opt = (struct tc_htb_qopt_offload) {
2000 				.command = TC_HTB_NODE_MODIFY,
2001 				.classid = cl->common.classid,
2002 				.rate = max_t(u64, hopt->rate.rate, rate64),
2003 				.ceil = max_t(u64, hopt->ceil.rate, ceil64),
2004 				.prio = hopt->prio,
2005 				.quantum = hopt->quantum,
2006 				.extack = extack,
2007 			};
2008 			err = htb_offload(dev, &offload_opt);
2009 			if (err)
2010 				/* Estimator was replaced, and rollback may fail
2011 				 * as well, so we don't try to recover it, and
2012 				 * the estimator won't work property with the
2013 				 * offload anyway, because bstats are updated
2014 				 * only when the stats are queried.
2015 				 */
2016 				return err;
2017 		}
2018 
2019 		sch_tree_lock(sch);
2020 	}
2021 
2022 	psched_ratecfg_precompute(&cl->rate, &hopt->rate, rate64);
2023 	psched_ratecfg_precompute(&cl->ceil, &hopt->ceil, ceil64);
2024 
2025 	/* it used to be a nasty bug here, we have to check that node
2026 	 * is really leaf before changing cl->leaf !
2027 	 */
2028 	if (!cl->level) {
2029 		u64 quantum = cl->rate.rate_bytes_ps;
2030 
2031 		do_div(quantum, q->rate2quantum);
2032 		cl->quantum = min_t(u64, quantum, INT_MAX);
2033 
2034 		if (!hopt->quantum && cl->quantum < 1000) {
2035 			warn = -1;
2036 			cl->quantum = 1000;
2037 		}
2038 		if (!hopt->quantum && cl->quantum > 200000) {
2039 			warn = 1;
2040 			cl->quantum = 200000;
2041 		}
2042 		if (hopt->quantum)
2043 			cl->quantum = hopt->quantum;
2044 		if ((cl->prio = hopt->prio) >= TC_HTB_NUMPRIO)
2045 			cl->prio = TC_HTB_NUMPRIO - 1;
2046 	}
2047 
2048 	cl->buffer = PSCHED_TICKS2NS(hopt->buffer);
2049 	cl->cbuffer = PSCHED_TICKS2NS(hopt->cbuffer);
2050 
2051 	sch_tree_unlock(sch);
2052 	qdisc_put(parent_qdisc);
2053 
2054 	if (warn)
2055 		NL_SET_ERR_MSG_FMT_MOD(extack,
2056 				       "quantum of class %X is %s. Consider r2q change.",
2057 				       cl->common.classid, (warn == -1 ? "small" : "big"));
2058 
2059 	qdisc_class_hash_grow(sch, &q->clhash);
2060 
2061 	*arg = (unsigned long)cl;
2062 	return 0;
2063 
2064 err_kill_estimator:
2065 	gen_kill_estimator(&cl->rate_est);
2066 err_block_put:
2067 	tcf_block_put(cl->block);
2068 	kfree(cl);
2069 failure:
2070 	return err;
2071 }
2072 
htb_tcf_block(struct Qdisc * sch,unsigned long arg,struct netlink_ext_ack * extack)2073 static struct tcf_block *htb_tcf_block(struct Qdisc *sch, unsigned long arg,
2074 				       struct netlink_ext_ack *extack)
2075 {
2076 	struct htb_sched *q = qdisc_priv(sch);
2077 	struct htb_class *cl = (struct htb_class *)arg;
2078 
2079 	return cl ? cl->block : q->block;
2080 }
2081 
htb_bind_filter(struct Qdisc * sch,unsigned long parent,u32 classid)2082 static unsigned long htb_bind_filter(struct Qdisc *sch, unsigned long parent,
2083 				     u32 classid)
2084 {
2085 	struct htb_class *cl = htb_find(classid, sch);
2086 
2087 	/*if (cl && !cl->level) return 0;
2088 	 * The line above used to be there to prevent attaching filters to
2089 	 * leaves. But at least tc_index filter uses this just to get class
2090 	 * for other reasons so that we have to allow for it.
2091 	 * ----
2092 	 * 19.6.2002 As Werner explained it is ok - bind filter is just
2093 	 * another way to "lock" the class - unlike "get" this lock can
2094 	 * be broken by class during destroy IIUC.
2095 	 */
2096 	if (cl)
2097 		qdisc_class_get(&cl->common);
2098 	return (unsigned long)cl;
2099 }
2100 
htb_unbind_filter(struct Qdisc * sch,unsigned long arg)2101 static void htb_unbind_filter(struct Qdisc *sch, unsigned long arg)
2102 {
2103 	struct htb_class *cl = (struct htb_class *)arg;
2104 
2105 	qdisc_class_put(&cl->common);
2106 }
2107 
htb_walk(struct Qdisc * sch,struct qdisc_walker * arg)2108 static void htb_walk(struct Qdisc *sch, struct qdisc_walker *arg)
2109 {
2110 	struct htb_sched *q = qdisc_priv(sch);
2111 	struct htb_class *cl;
2112 	unsigned int i;
2113 
2114 	if (arg->stop)
2115 		return;
2116 
2117 	for (i = 0; i < q->clhash.hashsize; i++) {
2118 		hlist_for_each_entry(cl, &q->clhash.hash[i], common.hnode) {
2119 			if (!tc_qdisc_stats_dump(sch, (unsigned long)cl, arg))
2120 				return;
2121 		}
2122 	}
2123 }
2124 
2125 static const struct Qdisc_class_ops htb_class_ops = {
2126 	.select_queue	=	htb_select_queue,
2127 	.graft		=	htb_graft,
2128 	.leaf		=	htb_leaf,
2129 	.qlen_notify	=	htb_qlen_notify,
2130 	.find		=	htb_search,
2131 	.change		=	htb_change_class,
2132 	.delete		=	htb_delete,
2133 	.walk		=	htb_walk,
2134 	.tcf_block	=	htb_tcf_block,
2135 	.bind_tcf	=	htb_bind_filter,
2136 	.unbind_tcf	=	htb_unbind_filter,
2137 	.dump		=	htb_dump_class,
2138 	.dump_stats	=	htb_dump_class_stats,
2139 };
2140 
2141 static struct Qdisc_ops htb_qdisc_ops __read_mostly = {
2142 	.cl_ops		=	&htb_class_ops,
2143 	.id		=	"htb",
2144 	.priv_size	=	sizeof(struct htb_sched),
2145 	.enqueue	=	htb_enqueue,
2146 	.dequeue	=	htb_dequeue,
2147 	.peek		=	qdisc_peek_dequeued,
2148 	.init		=	htb_init,
2149 	.attach		=	htb_attach,
2150 	.reset		=	htb_reset,
2151 	.destroy	=	htb_destroy,
2152 	.dump		=	htb_dump,
2153 	.owner		=	THIS_MODULE,
2154 };
2155 MODULE_ALIAS_NET_SCH("htb");
2156 
htb_module_init(void)2157 static int __init htb_module_init(void)
2158 {
2159 	return register_qdisc(&htb_qdisc_ops);
2160 }
htb_module_exit(void)2161 static void __exit htb_module_exit(void)
2162 {
2163 	unregister_qdisc(&htb_qdisc_ops);
2164 }
2165 
2166 module_init(htb_module_init)
2167 module_exit(htb_module_exit)
2168 MODULE_LICENSE("GPL");
2169 MODULE_DESCRIPTION("Hierarchical Token Bucket scheduler");
2170