xref: /linux/net/sched/sch_htb.c (revision 189f164e573e18d9f8876dbd3ad8fcbe11f93037)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * net/sched/sch_htb.c	Hierarchical token bucket, feed tree version
4  *
5  * Authors:	Martin Devera, <devik@cdi.cz>
6  *
7  * Credits (in time order) for older HTB versions:
8  *              Stef Coene <stef.coene@docum.org>
9  *			HTB support at LARTC mailing list
10  *		Ondrej Kraus, <krauso@barr.cz>
11  *			found missing INIT_QDISC(htb)
12  *		Vladimir Smelhaus, Aamer Akhter, Bert Hubert
13  *			helped a lot to locate nasty class stall bug
14  *		Andi Kleen, Jamal Hadi, Bert Hubert
15  *			code review and helpful comments on shaping
16  *		Tomasz Wrona, <tw@eter.tym.pl>
17  *			created test case so that I was able to fix nasty bug
18  *		Wilfried Weissmann
19  *			spotted bug in dequeue code and helped with fix
20  *		Jiri Fojtasek
21  *			fixed requeue routine
22  *		and many others. thanks.
23  */
24 #include <linux/module.h>
25 #include <linux/moduleparam.h>
26 #include <linux/types.h>
27 #include <linux/kernel.h>
28 #include <linux/string.h>
29 #include <linux/errno.h>
30 #include <linux/skbuff.h>
31 #include <linux/list.h>
32 #include <linux/compiler.h>
33 #include <linux/rbtree.h>
34 #include <linux/workqueue.h>
35 #include <linux/slab.h>
36 #include <net/netlink.h>
37 #include <net/sch_generic.h>
38 #include <net/pkt_sched.h>
39 #include <net/pkt_cls.h>
40 
41 /* HTB algorithm.
42     Author: devik@cdi.cz
43     ========================================================================
44     HTB is like TBF with multiple classes. It is also similar to CBQ because
45     it allows to assign priority to each class in hierarchy.
46     In fact it is another implementation of Floyd's formal sharing.
47 
48     Levels:
49     Each class is assigned level. Leaf has ALWAYS level 0 and root
50     classes have level TC_HTB_MAXDEPTH-1. Interior nodes has level
51     one less than their parent.
52 */
53 
54 static int htb_hysteresis __read_mostly = 0; /* whether to use mode hysteresis for speedup */
55 #define HTB_VER 0x30011		/* major must be matched with number supplied by TC as version */
56 
57 #if HTB_VER >> 16 != TC_HTB_PROTOVER
58 #error "Mismatched sch_htb.c and pkt_sch.h"
59 #endif
60 
61 /* Module parameter and sysfs export */
62 module_param    (htb_hysteresis, int, 0640);
63 MODULE_PARM_DESC(htb_hysteresis, "Hysteresis mode, less CPU load, less accurate");
64 
65 static int htb_rate_est = 0; /* htb classes have a default rate estimator */
66 module_param(htb_rate_est, int, 0640);
67 MODULE_PARM_DESC(htb_rate_est, "setup a default rate estimator (4sec 16sec) for htb classes");
68 
69 /* used internaly to keep status of single class */
70 enum htb_cmode {
71 	HTB_CANT_SEND,		/* class can't send and can't borrow */
72 	HTB_MAY_BORROW,		/* class can't send but may borrow */
73 	HTB_CAN_SEND		/* class can send */
74 };
75 
76 struct htb_prio {
77 	union {
78 		struct rb_root	row;
79 		struct rb_root	feed;
80 	};
81 	struct rb_node	*ptr;
82 	/* When class changes from state 1->2 and disconnects from
83 	 * parent's feed then we lost ptr value and start from the
84 	 * first child again. Here we store classid of the
85 	 * last valid ptr (used when ptr is NULL).
86 	 */
87 	u32		last_ptr_id;
88 };
89 
90 /* interior & leaf nodes; props specific to leaves are marked L:
91  * To reduce false sharing, place mostly read fields at beginning,
92  * and mostly written ones at the end.
93  */
94 struct htb_class {
95 	struct Qdisc_class_common common;
96 	struct psched_ratecfg	rate;
97 	struct psched_ratecfg	ceil;
98 	s64			buffer, cbuffer;/* token bucket depth/rate */
99 	s64			mbuffer;	/* max wait time */
100 	u32			prio;		/* these two are used only by leaves... */
101 	int			quantum;	/* but stored for parent-to-leaf return */
102 
103 	struct tcf_proto __rcu	*filter_list;	/* class attached filters */
104 	struct tcf_block	*block;
105 
106 	int			level;		/* our level (see above) */
107 	unsigned int		children;
108 	struct htb_class	*parent;	/* parent class */
109 
110 	struct net_rate_estimator __rcu *rate_est;
111 
112 	/*
113 	 * Written often fields
114 	 */
115 	struct gnet_stats_basic_sync bstats;
116 	struct gnet_stats_basic_sync bstats_bias;
117 	struct tc_htb_xstats	xstats;	/* our special stats */
118 
119 	/* token bucket parameters */
120 	s64			tokens, ctokens;/* current number of tokens */
121 	s64			t_c;		/* checkpoint time */
122 
123 	union {
124 		struct htb_class_leaf {
125 			int		deficit[TC_HTB_MAXDEPTH];
126 			struct Qdisc	*q;
127 			struct netdev_queue *offload_queue;
128 		} leaf;
129 		struct htb_class_inner {
130 			struct htb_prio clprio[TC_HTB_NUMPRIO];
131 		} inner;
132 	};
133 	s64			pq_key;
134 
135 	int			prio_activity;	/* for which prios are we active */
136 	enum htb_cmode		cmode;		/* current mode of the class */
137 	struct rb_node		pq_node;	/* node for event queue */
138 	struct rb_node		node[TC_HTB_NUMPRIO];	/* node for self or feed tree */
139 
140 	unsigned int drops ____cacheline_aligned_in_smp;
141 	unsigned int		overlimits;
142 };
143 
144 struct htb_level {
145 	struct rb_root	wait_pq;
146 	struct htb_prio hprio[TC_HTB_NUMPRIO];
147 };
148 
149 struct htb_sched {
150 	struct Qdisc_class_hash clhash;
151 	int			defcls;		/* class where unclassified flows go to */
152 	int			rate2quantum;	/* quant = rate / rate2quantum */
153 
154 	/* filters for qdisc itself */
155 	struct tcf_proto __rcu	*filter_list;
156 	struct tcf_block	*block;
157 
158 #define HTB_WARN_TOOMANYEVENTS	0x1
159 	unsigned int		warned;	/* only one warning */
160 	int			direct_qlen;
161 	struct work_struct	work;
162 
163 	/* non shaped skbs; let them go directly thru */
164 	struct qdisc_skb_head	direct_queue;
165 	u32			direct_pkts;
166 	u32			overlimits;
167 
168 	struct qdisc_watchdog	watchdog;
169 
170 	s64			now;	/* cached dequeue time */
171 
172 	/* time of nearest event per level (row) */
173 	s64			near_ev_cache[TC_HTB_MAXDEPTH];
174 
175 	int			row_mask[TC_HTB_MAXDEPTH];
176 
177 	struct htb_level	hlevel[TC_HTB_MAXDEPTH];
178 
179 	struct Qdisc		**direct_qdiscs;
180 	unsigned int            num_direct_qdiscs;
181 
182 	bool			offload;
183 };
184 
185 /* find class in global hash table using given handle */
htb_find(u32 handle,struct Qdisc * sch)186 static inline struct htb_class *htb_find(u32 handle, struct Qdisc *sch)
187 {
188 	struct htb_sched *q = qdisc_priv(sch);
189 	struct Qdisc_class_common *clc;
190 
191 	clc = qdisc_class_find(&q->clhash, handle);
192 	if (clc == NULL)
193 		return NULL;
194 	return container_of(clc, struct htb_class, common);
195 }
196 
htb_search(struct Qdisc * sch,u32 handle)197 static unsigned long htb_search(struct Qdisc *sch, u32 handle)
198 {
199 	return (unsigned long)htb_find(handle, sch);
200 }
201 
202 #define HTB_DIRECT ((struct htb_class *)-1L)
203 
204 /**
205  * htb_classify - classify a packet into class
206  * @skb: the socket buffer
207  * @sch: the active queue discipline
208  * @qerr: pointer for returned status code
209  *
210  * It returns NULL if the packet should be dropped or -1 if the packet
211  * should be passed directly thru. In all other cases leaf class is returned.
212  * We allow direct class selection by classid in priority. The we examine
213  * filters in qdisc and in inner nodes (if higher filter points to the inner
214  * node). If we end up with classid MAJOR:0 we enqueue the skb into special
215  * internal fifo (direct). These packets then go directly thru. If we still
216  * have no valid leaf we try to use MAJOR:default leaf. It still unsuccessful
217  * then finish and return direct queue.
218  */
htb_classify(struct sk_buff * skb,struct Qdisc * sch,int * qerr)219 static struct htb_class *htb_classify(struct sk_buff *skb, struct Qdisc *sch,
220 				      int *qerr)
221 {
222 	struct htb_sched *q = qdisc_priv(sch);
223 	struct htb_class *cl;
224 	struct tcf_result res;
225 	struct tcf_proto *tcf;
226 	int result;
227 
228 	/* allow to select class by setting skb->priority to valid classid;
229 	 * note that nfmark can be used too by attaching filter fw with no
230 	 * rules in it
231 	 */
232 	if (skb->priority == sch->handle)
233 		return HTB_DIRECT;	/* X:0 (direct flow) selected */
234 	cl = htb_find(skb->priority, sch);
235 	if (cl) {
236 		if (cl->level == 0)
237 			return cl;
238 		/* Start with inner filter chain if a non-leaf class is selected */
239 		tcf = rcu_dereference_bh(cl->filter_list);
240 	} else {
241 		tcf = rcu_dereference_bh(q->filter_list);
242 	}
243 
244 	*qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
245 	while (tcf && (result = tcf_classify(skb, NULL, tcf, &res, false)) >= 0) {
246 #ifdef CONFIG_NET_CLS_ACT
247 		switch (result) {
248 		case TC_ACT_QUEUED:
249 		case TC_ACT_STOLEN:
250 		case TC_ACT_TRAP:
251 			*qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
252 			fallthrough;
253 		case TC_ACT_SHOT:
254 			return NULL;
255 		}
256 #endif
257 		cl = (void *)res.class;
258 		if (!cl) {
259 			if (res.classid == sch->handle)
260 				return HTB_DIRECT;	/* X:0 (direct flow) */
261 			cl = htb_find(res.classid, sch);
262 			if (!cl)
263 				break;	/* filter selected invalid classid */
264 		}
265 		if (!cl->level)
266 			return cl;	/* we hit leaf; return it */
267 
268 		/* we have got inner class; apply inner filter chain */
269 		tcf = rcu_dereference_bh(cl->filter_list);
270 	}
271 	/* classification failed; try to use default class */
272 	cl = htb_find(TC_H_MAKE(TC_H_MAJ(sch->handle), q->defcls), sch);
273 	if (!cl || cl->level)
274 		return HTB_DIRECT;	/* bad default .. this is safe bet */
275 	return cl;
276 }
277 
278 /**
279  * htb_add_to_id_tree - adds class to the round robin list
280  * @root: the root of the tree
281  * @cl: the class to add
282  * @prio: the give prio in class
283  *
284  * Routine adds class to the list (actually tree) sorted by classid.
285  * Make sure that class is not already on such list for given prio.
286  */
htb_add_to_id_tree(struct rb_root * root,struct htb_class * cl,int prio)287 static void htb_add_to_id_tree(struct rb_root *root,
288 			       struct htb_class *cl, int prio)
289 {
290 	struct rb_node **p = &root->rb_node, *parent = NULL;
291 
292 	while (*p) {
293 		struct htb_class *c;
294 		parent = *p;
295 		c = rb_entry(parent, struct htb_class, node[prio]);
296 
297 		if (cl->common.classid > c->common.classid)
298 			p = &parent->rb_right;
299 		else
300 			p = &parent->rb_left;
301 	}
302 	rb_link_node(&cl->node[prio], parent, p);
303 	rb_insert_color(&cl->node[prio], root);
304 }
305 
306 /**
307  * htb_add_to_wait_tree - adds class to the event queue with delay
308  * @q: the priority event queue
309  * @cl: the class to add
310  * @delay: delay in microseconds
311  *
312  * The class is added to priority event queue to indicate that class will
313  * change its mode in cl->pq_key microseconds. Make sure that class is not
314  * already in the queue.
315  */
htb_add_to_wait_tree(struct htb_sched * q,struct htb_class * cl,s64 delay)316 static void htb_add_to_wait_tree(struct htb_sched *q,
317 				 struct htb_class *cl, s64 delay)
318 {
319 	struct rb_node **p = &q->hlevel[cl->level].wait_pq.rb_node, *parent = NULL;
320 
321 	cl->pq_key = q->now + delay;
322 	if (cl->pq_key == q->now)
323 		cl->pq_key++;
324 
325 	/* update the nearest event cache */
326 	if (q->near_ev_cache[cl->level] > cl->pq_key)
327 		q->near_ev_cache[cl->level] = cl->pq_key;
328 
329 	while (*p) {
330 		struct htb_class *c;
331 		parent = *p;
332 		c = rb_entry(parent, struct htb_class, pq_node);
333 		if (cl->pq_key >= c->pq_key)
334 			p = &parent->rb_right;
335 		else
336 			p = &parent->rb_left;
337 	}
338 	rb_link_node(&cl->pq_node, parent, p);
339 	rb_insert_color(&cl->pq_node, &q->hlevel[cl->level].wait_pq);
340 }
341 
342 /**
343  * htb_next_rb_node - finds next node in binary tree
344  * @n: the current node in binary tree
345  *
346  * When we are past last key we return NULL.
347  * Average complexity is 2 steps per call.
348  */
htb_next_rb_node(struct rb_node ** n)349 static inline void htb_next_rb_node(struct rb_node **n)
350 {
351 	if (*n)
352 		*n = rb_next(*n);
353 }
354 
355 /**
356  * htb_add_class_to_row - add class to its row
357  * @q: the priority event queue
358  * @cl: the class to add
359  * @mask: the given priorities in class in bitmap
360  *
361  * The class is added to row at priorities marked in mask.
362  * It does nothing if mask == 0.
363  */
htb_add_class_to_row(struct htb_sched * q,struct htb_class * cl,int mask)364 static inline void htb_add_class_to_row(struct htb_sched *q,
365 					struct htb_class *cl, int mask)
366 {
367 	q->row_mask[cl->level] |= mask;
368 	while (mask) {
369 		int prio = ffz(~mask);
370 		mask &= ~(1 << prio);
371 		htb_add_to_id_tree(&q->hlevel[cl->level].hprio[prio].row, cl, prio);
372 	}
373 }
374 
375 /* If this triggers, it is a bug in this code, but it need not be fatal */
htb_safe_rb_erase(struct rb_node * rb,struct rb_root * root)376 static void htb_safe_rb_erase(struct rb_node *rb, struct rb_root *root)
377 {
378 	if (RB_EMPTY_NODE(rb)) {
379 		WARN_ON(1);
380 	} else {
381 		rb_erase(rb, root);
382 		RB_CLEAR_NODE(rb);
383 	}
384 }
385 
386 
387 /**
388  * htb_remove_class_from_row - removes class from its row
389  * @q: the priority event queue
390  * @cl: the class to add
391  * @mask: the given priorities in class in bitmap
392  *
393  * The class is removed from row at priorities marked in mask.
394  * It does nothing if mask == 0.
395  */
htb_remove_class_from_row(struct htb_sched * q,struct htb_class * cl,int mask)396 static inline void htb_remove_class_from_row(struct htb_sched *q,
397 						 struct htb_class *cl, int mask)
398 {
399 	int m = 0;
400 	struct htb_level *hlevel = &q->hlevel[cl->level];
401 
402 	while (mask) {
403 		int prio = ffz(~mask);
404 		struct htb_prio *hprio = &hlevel->hprio[prio];
405 
406 		mask &= ~(1 << prio);
407 		if (hprio->ptr == cl->node + prio)
408 			htb_next_rb_node(&hprio->ptr);
409 
410 		htb_safe_rb_erase(cl->node + prio, &hprio->row);
411 		if (!hprio->row.rb_node)
412 			m |= 1 << prio;
413 	}
414 	q->row_mask[cl->level] &= ~m;
415 }
416 
417 /**
418  * htb_activate_prios - creates active classe's feed chain
419  * @q: the priority event queue
420  * @cl: the class to activate
421  *
422  * The class is connected to ancestors and/or appropriate rows
423  * for priorities it is participating on. cl->cmode must be new
424  * (activated) mode. It does nothing if cl->prio_activity == 0.
425  */
htb_activate_prios(struct htb_sched * q,struct htb_class * cl)426 static void htb_activate_prios(struct htb_sched *q, struct htb_class *cl)
427 {
428 	struct htb_class *p = cl->parent;
429 	long m, mask = cl->prio_activity;
430 
431 	while (cl->cmode == HTB_MAY_BORROW && p && mask) {
432 		m = mask;
433 		while (m) {
434 			unsigned int prio = ffz(~m);
435 
436 			if (WARN_ON_ONCE(prio >= ARRAY_SIZE(p->inner.clprio)))
437 				break;
438 			m &= ~(1 << prio);
439 
440 			if (p->inner.clprio[prio].feed.rb_node)
441 				/* parent already has its feed in use so that
442 				 * reset bit in mask as parent is already ok
443 				 */
444 				mask &= ~(1 << prio);
445 
446 			htb_add_to_id_tree(&p->inner.clprio[prio].feed, cl, prio);
447 		}
448 		p->prio_activity |= mask;
449 		cl = p;
450 		p = cl->parent;
451 
452 	}
453 	if (cl->cmode == HTB_CAN_SEND && mask)
454 		htb_add_class_to_row(q, cl, mask);
455 }
456 
457 /**
458  * htb_deactivate_prios - remove class from feed chain
459  * @q: the priority event queue
460  * @cl: the class to deactivate
461  *
462  * cl->cmode must represent old mode (before deactivation). It does
463  * nothing if cl->prio_activity == 0. Class is removed from all feed
464  * chains and rows.
465  */
htb_deactivate_prios(struct htb_sched * q,struct htb_class * cl)466 static void htb_deactivate_prios(struct htb_sched *q, struct htb_class *cl)
467 {
468 	struct htb_class *p = cl->parent;
469 	long m, mask = cl->prio_activity;
470 
471 	while (cl->cmode == HTB_MAY_BORROW && p && mask) {
472 		m = mask;
473 		mask = 0;
474 		while (m) {
475 			int prio = ffz(~m);
476 			m &= ~(1 << prio);
477 
478 			if (p->inner.clprio[prio].ptr == cl->node + prio) {
479 				/* we are removing child which is pointed to from
480 				 * parent feed - forget the pointer but remember
481 				 * classid
482 				 */
483 				p->inner.clprio[prio].last_ptr_id = cl->common.classid;
484 				p->inner.clprio[prio].ptr = NULL;
485 			}
486 
487 			htb_safe_rb_erase(cl->node + prio,
488 					  &p->inner.clprio[prio].feed);
489 
490 			if (!p->inner.clprio[prio].feed.rb_node)
491 				mask |= 1 << prio;
492 		}
493 
494 		p->prio_activity &= ~mask;
495 		cl = p;
496 		p = cl->parent;
497 
498 	}
499 	if (cl->cmode == HTB_CAN_SEND && mask)
500 		htb_remove_class_from_row(q, cl, mask);
501 }
502 
htb_lowater(const struct htb_class * cl)503 static inline s64 htb_lowater(const struct htb_class *cl)
504 {
505 	if (htb_hysteresis)
506 		return cl->cmode != HTB_CANT_SEND ? -cl->cbuffer : 0;
507 	else
508 		return 0;
509 }
htb_hiwater(const struct htb_class * cl)510 static inline s64 htb_hiwater(const struct htb_class *cl)
511 {
512 	if (htb_hysteresis)
513 		return cl->cmode == HTB_CAN_SEND ? -cl->buffer : 0;
514 	else
515 		return 0;
516 }
517 
518 
519 /**
520  * htb_class_mode - computes and returns current class mode
521  * @cl: the target class
522  * @diff: diff time in microseconds
523  *
524  * It computes cl's mode at time cl->t_c+diff and returns it. If mode
525  * is not HTB_CAN_SEND then cl->pq_key is updated to time difference
526  * from now to time when cl will change its state.
527  * Also it is worth to note that class mode doesn't change simply
528  * at cl->{c,}tokens == 0 but there can rather be hysteresis of
529  * 0 .. -cl->{c,}buffer range. It is meant to limit number of
530  * mode transitions per time unit. The speed gain is about 1/6.
531  */
532 static inline enum htb_cmode
htb_class_mode(struct htb_class * cl,s64 * diff)533 htb_class_mode(struct htb_class *cl, s64 *diff)
534 {
535 	s64 toks;
536 
537 	if ((toks = (cl->ctokens + *diff)) < htb_lowater(cl)) {
538 		*diff = -toks;
539 		return HTB_CANT_SEND;
540 	}
541 
542 	if ((toks = (cl->tokens + *diff)) >= htb_hiwater(cl))
543 		return HTB_CAN_SEND;
544 
545 	*diff = -toks;
546 	return HTB_MAY_BORROW;
547 }
548 
549 /**
550  * htb_change_class_mode - changes classe's mode
551  * @q: the priority event queue
552  * @cl: the target class
553  * @diff: diff time in microseconds
554  *
555  * This should be the only way how to change classe's mode under normal
556  * circumstances. Routine will update feed lists linkage, change mode
557  * and add class to the wait event queue if appropriate. New mode should
558  * be different from old one and cl->pq_key has to be valid if changing
559  * to mode other than HTB_CAN_SEND (see htb_add_to_wait_tree).
560  */
561 static void
htb_change_class_mode(struct htb_sched * q,struct htb_class * cl,s64 * diff)562 htb_change_class_mode(struct htb_sched *q, struct htb_class *cl, s64 *diff)
563 {
564 	enum htb_cmode new_mode = htb_class_mode(cl, diff);
565 
566 	if (new_mode == cl->cmode)
567 		return;
568 
569 	if (new_mode == HTB_CANT_SEND) {
570 		cl->overlimits++;
571 		q->overlimits++;
572 	}
573 
574 	if (cl->prio_activity) {	/* not necessary: speed optimization */
575 		if (cl->cmode != HTB_CANT_SEND)
576 			htb_deactivate_prios(q, cl);
577 		cl->cmode = new_mode;
578 		if (new_mode != HTB_CANT_SEND)
579 			htb_activate_prios(q, cl);
580 	} else
581 		cl->cmode = new_mode;
582 }
583 
584 /**
585  * htb_activate - inserts leaf cl into appropriate active feeds
586  * @q: the priority event queue
587  * @cl: the target class
588  *
589  * Routine learns (new) priority of leaf and activates feed chain
590  * for the prio. It can be called on already active leaf safely.
591  * It also adds leaf into droplist.
592  */
htb_activate(struct htb_sched * q,struct htb_class * cl)593 static inline void htb_activate(struct htb_sched *q, struct htb_class *cl)
594 {
595 	WARN_ON(cl->level || !cl->leaf.q);
596 
597 	if (!cl->prio_activity) {
598 		cl->prio_activity = 1 << cl->prio;
599 		htb_activate_prios(q, cl);
600 	}
601 }
602 
603 /**
604  * htb_deactivate - remove leaf cl from active feeds
605  * @q: the priority event queue
606  * @cl: the target class
607  *
608  * Make sure that leaf is active. In the other words it can't be called
609  * with non-active leaf. It also removes class from the drop list.
610  */
htb_deactivate(struct htb_sched * q,struct htb_class * cl)611 static inline void htb_deactivate(struct htb_sched *q, struct htb_class *cl)
612 {
613 	if (!cl->prio_activity)
614 		return;
615 	htb_deactivate_prios(q, cl);
616 	cl->prio_activity = 0;
617 }
618 
htb_enqueue(struct sk_buff * skb,struct Qdisc * sch,struct sk_buff ** to_free)619 static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch,
620 		       struct sk_buff **to_free)
621 {
622 	int ret;
623 	unsigned int len = qdisc_pkt_len(skb);
624 	struct htb_sched *q = qdisc_priv(sch);
625 	struct htb_class *cl = htb_classify(skb, sch, &ret);
626 
627 	if (cl == HTB_DIRECT) {
628 		/* enqueue to helper queue */
629 		if (q->direct_queue.qlen < q->direct_qlen) {
630 			__qdisc_enqueue_tail(skb, &q->direct_queue);
631 			q->direct_pkts++;
632 		} else {
633 			return qdisc_drop(skb, sch, to_free);
634 		}
635 #ifdef CONFIG_NET_CLS_ACT
636 	} else if (!cl) {
637 		if (ret & __NET_XMIT_BYPASS)
638 			qdisc_qstats_drop(sch);
639 		__qdisc_drop(skb, to_free);
640 		return ret;
641 #endif
642 	} else if ((ret = qdisc_enqueue(skb, cl->leaf.q,
643 					to_free)) != NET_XMIT_SUCCESS) {
644 		if (net_xmit_drop_count(ret)) {
645 			qdisc_qstats_drop(sch);
646 			cl->drops++;
647 		}
648 		return ret;
649 	} else {
650 		htb_activate(q, cl);
651 	}
652 
653 	sch->qstats.backlog += len;
654 	sch->q.qlen++;
655 	return NET_XMIT_SUCCESS;
656 }
657 
htb_accnt_tokens(struct htb_class * cl,int bytes,s64 diff)658 static inline void htb_accnt_tokens(struct htb_class *cl, int bytes, s64 diff)
659 {
660 	s64 toks = diff + cl->tokens;
661 
662 	if (toks > cl->buffer)
663 		toks = cl->buffer;
664 	toks -= (s64) psched_l2t_ns(&cl->rate, bytes);
665 	if (toks <= -cl->mbuffer)
666 		toks = 1 - cl->mbuffer;
667 
668 	cl->tokens = toks;
669 }
670 
htb_accnt_ctokens(struct htb_class * cl,int bytes,s64 diff)671 static inline void htb_accnt_ctokens(struct htb_class *cl, int bytes, s64 diff)
672 {
673 	s64 toks = diff + cl->ctokens;
674 
675 	if (toks > cl->cbuffer)
676 		toks = cl->cbuffer;
677 	toks -= (s64) psched_l2t_ns(&cl->ceil, bytes);
678 	if (toks <= -cl->mbuffer)
679 		toks = 1 - cl->mbuffer;
680 
681 	cl->ctokens = toks;
682 }
683 
684 /**
685  * htb_charge_class - charges amount "bytes" to leaf and ancestors
686  * @q: the priority event queue
687  * @cl: the class to start iterate
688  * @level: the minimum level to account
689  * @skb: the socket buffer
690  *
691  * Routine assumes that packet "bytes" long was dequeued from leaf cl
692  * borrowing from "level". It accounts bytes to ceil leaky bucket for
693  * leaf and all ancestors and to rate bucket for ancestors at levels
694  * "level" and higher. It also handles possible change of mode resulting
695  * from the update. Note that mode can also increase here (MAY_BORROW to
696  * CAN_SEND) because we can use more precise clock that event queue here.
697  * In such case we remove class from event queue first.
698  */
htb_charge_class(struct htb_sched * q,struct htb_class * cl,int level,struct sk_buff * skb)699 static void htb_charge_class(struct htb_sched *q, struct htb_class *cl,
700 			     int level, struct sk_buff *skb)
701 {
702 	int bytes = qdisc_pkt_len(skb);
703 	enum htb_cmode old_mode;
704 	s64 diff;
705 
706 	while (cl) {
707 		diff = min_t(s64, q->now - cl->t_c, cl->mbuffer);
708 		if (cl->level >= level) {
709 			if (cl->level == level)
710 				cl->xstats.lends++;
711 			htb_accnt_tokens(cl, bytes, diff);
712 		} else {
713 			cl->xstats.borrows++;
714 			cl->tokens += diff;	/* we moved t_c; update tokens */
715 		}
716 		htb_accnt_ctokens(cl, bytes, diff);
717 		cl->t_c = q->now;
718 
719 		old_mode = cl->cmode;
720 		diff = 0;
721 		htb_change_class_mode(q, cl, &diff);
722 		if (old_mode != cl->cmode) {
723 			if (old_mode != HTB_CAN_SEND)
724 				htb_safe_rb_erase(&cl->pq_node, &q->hlevel[cl->level].wait_pq);
725 			if (cl->cmode != HTB_CAN_SEND)
726 				htb_add_to_wait_tree(q, cl, diff);
727 		}
728 
729 		/* update basic stats except for leaves which are already updated */
730 		if (cl->level)
731 			bstats_update(&cl->bstats, skb);
732 
733 		cl = cl->parent;
734 	}
735 }
736 
737 /**
738  * htb_do_events - make mode changes to classes at the level
739  * @q: the priority event queue
740  * @level: which wait_pq in 'q->hlevel'
741  * @start: start jiffies
742  *
743  * Scans event queue for pending events and applies them. Returns time of
744  * next pending event (0 for no event in pq, q->now for too many events).
745  * Note: Applied are events whose have cl->pq_key <= q->now.
746  */
htb_do_events(struct htb_sched * q,const int level,unsigned long start)747 static s64 htb_do_events(struct htb_sched *q, const int level,
748 			 unsigned long start)
749 {
750 	/* don't run for longer than 2 jiffies; 2 is used instead of
751 	 * 1 to simplify things when jiffy is going to be incremented
752 	 * too soon
753 	 */
754 	unsigned long stop_at = start + 2;
755 	struct rb_root *wait_pq = &q->hlevel[level].wait_pq;
756 
757 	while (time_before(jiffies, stop_at)) {
758 		struct htb_class *cl;
759 		s64 diff;
760 		struct rb_node *p = rb_first(wait_pq);
761 
762 		if (!p)
763 			return 0;
764 
765 		cl = rb_entry(p, struct htb_class, pq_node);
766 		if (cl->pq_key > q->now)
767 			return cl->pq_key;
768 
769 		htb_safe_rb_erase(p, wait_pq);
770 		diff = min_t(s64, q->now - cl->t_c, cl->mbuffer);
771 		htb_change_class_mode(q, cl, &diff);
772 		if (cl->cmode != HTB_CAN_SEND)
773 			htb_add_to_wait_tree(q, cl, diff);
774 	}
775 
776 	/* too much load - let's continue after a break for scheduling */
777 	if (!(q->warned & HTB_WARN_TOOMANYEVENTS)) {
778 		pr_warn("htb: too many events!\n");
779 		q->warned |= HTB_WARN_TOOMANYEVENTS;
780 	}
781 
782 	return q->now;
783 }
784 
785 /* Returns class->node+prio from id-tree where classe's id is >= id. NULL
786  * is no such one exists.
787  */
htb_id_find_next_upper(int prio,struct rb_node * n,u32 id)788 static struct rb_node *htb_id_find_next_upper(int prio, struct rb_node *n,
789 					      u32 id)
790 {
791 	struct rb_node *r = NULL;
792 	while (n) {
793 		struct htb_class *cl =
794 		    rb_entry(n, struct htb_class, node[prio]);
795 
796 		if (id > cl->common.classid) {
797 			n = n->rb_right;
798 		} else if (id < cl->common.classid) {
799 			r = n;
800 			n = n->rb_left;
801 		} else {
802 			return n;
803 		}
804 	}
805 	return r;
806 }
807 
808 /**
809  * htb_lookup_leaf - returns next leaf class in DRR order
810  * @hprio: the current one
811  * @prio: which prio in class
812  *
813  * Find leaf where current feed pointers points to.
814  */
htb_lookup_leaf(struct htb_prio * hprio,const int prio)815 static struct htb_class *htb_lookup_leaf(struct htb_prio *hprio, const int prio)
816 {
817 	int i;
818 	struct {
819 		struct rb_node *root;
820 		struct rb_node **pptr;
821 		u32 *pid;
822 	} stk[TC_HTB_MAXDEPTH], *sp = stk;
823 
824 	if (unlikely(!hprio->row.rb_node))
825 		return NULL;
826 
827 	sp->root = hprio->row.rb_node;
828 	sp->pptr = &hprio->ptr;
829 	sp->pid = &hprio->last_ptr_id;
830 
831 	for (i = 0; i < 65535; i++) {
832 		if (!*sp->pptr && *sp->pid) {
833 			/* ptr was invalidated but id is valid - try to recover
834 			 * the original or next ptr
835 			 */
836 			*sp->pptr =
837 			    htb_id_find_next_upper(prio, sp->root, *sp->pid);
838 		}
839 		*sp->pid = 0;	/* ptr is valid now so that remove this hint as it
840 				 * can become out of date quickly
841 				 */
842 		if (!*sp->pptr) {	/* we are at right end; rewind & go up */
843 			*sp->pptr = sp->root;
844 			while ((*sp->pptr)->rb_left)
845 				*sp->pptr = (*sp->pptr)->rb_left;
846 			if (sp > stk) {
847 				sp--;
848 				if (!*sp->pptr) {
849 					WARN_ON(1);
850 					return NULL;
851 				}
852 				htb_next_rb_node(sp->pptr);
853 			}
854 		} else {
855 			struct htb_class *cl;
856 			struct htb_prio *clp;
857 
858 			cl = rb_entry(*sp->pptr, struct htb_class, node[prio]);
859 			if (!cl->level)
860 				return cl;
861 			clp = &cl->inner.clprio[prio];
862 			(++sp)->root = clp->feed.rb_node;
863 			sp->pptr = &clp->ptr;
864 			sp->pid = &clp->last_ptr_id;
865 		}
866 	}
867 	WARN_ON(1);
868 	return NULL;
869 }
870 
871 /* dequeues packet at given priority and level; call only if
872  * you are sure that there is active class at prio/level
873  */
htb_dequeue_tree(struct htb_sched * q,const int prio,const int level)874 static struct sk_buff *htb_dequeue_tree(struct htb_sched *q, const int prio,
875 					const int level)
876 {
877 	struct sk_buff *skb = NULL;
878 	struct htb_class *cl, *start;
879 	struct htb_level *hlevel = &q->hlevel[level];
880 	struct htb_prio *hprio = &hlevel->hprio[prio];
881 
882 	/* look initial class up in the row */
883 	start = cl = htb_lookup_leaf(hprio, prio);
884 
885 	do {
886 next:
887 		if (unlikely(!cl))
888 			return NULL;
889 
890 		/* class can be empty - it is unlikely but can be true if leaf
891 		 * qdisc drops packets in enqueue routine or if someone used
892 		 * graft operation on the leaf since last dequeue;
893 		 * simply deactivate and skip such class
894 		 */
895 		if (unlikely(cl->leaf.q->q.qlen == 0)) {
896 			struct htb_class *next;
897 			htb_deactivate(q, cl);
898 
899 			/* row/level might become empty */
900 			if ((q->row_mask[level] & (1 << prio)) == 0)
901 				return NULL;
902 
903 			next = htb_lookup_leaf(hprio, prio);
904 
905 			if (cl == start)	/* fix start if we just deleted it */
906 				start = next;
907 			cl = next;
908 			goto next;
909 		}
910 
911 		skb = cl->leaf.q->dequeue(cl->leaf.q);
912 		if (likely(skb != NULL))
913 			break;
914 
915 		qdisc_warn_nonwc("htb", cl->leaf.q);
916 		htb_next_rb_node(level ? &cl->parent->inner.clprio[prio].ptr:
917 					 &q->hlevel[0].hprio[prio].ptr);
918 		cl = htb_lookup_leaf(hprio, prio);
919 
920 	} while (cl != start);
921 
922 	if (likely(skb != NULL)) {
923 		bstats_update(&cl->bstats, skb);
924 		cl->leaf.deficit[level] -= qdisc_pkt_len(skb);
925 		if (cl->leaf.deficit[level] < 0) {
926 			cl->leaf.deficit[level] += cl->quantum;
927 			htb_next_rb_node(level ? &cl->parent->inner.clprio[prio].ptr :
928 						 &q->hlevel[0].hprio[prio].ptr);
929 		}
930 		/* this used to be after charge_class but this constelation
931 		 * gives us slightly better performance
932 		 */
933 		if (!cl->leaf.q->q.qlen)
934 			htb_deactivate(q, cl);
935 		htb_charge_class(q, cl, level, skb);
936 	}
937 	return skb;
938 }
939 
htb_dequeue(struct Qdisc * sch)940 static struct sk_buff *htb_dequeue(struct Qdisc *sch)
941 {
942 	struct sk_buff *skb;
943 	struct htb_sched *q = qdisc_priv(sch);
944 	int level;
945 	s64 next_event;
946 	unsigned long start_at;
947 
948 	/* try to dequeue direct packets as high prio (!) to minimize cpu work */
949 	skb = __qdisc_dequeue_head(&q->direct_queue);
950 	if (skb != NULL) {
951 ok:
952 		qdisc_bstats_update(sch, skb);
953 		qdisc_qstats_backlog_dec(sch, skb);
954 		sch->q.qlen--;
955 		return skb;
956 	}
957 
958 	if (!sch->q.qlen)
959 		goto fin;
960 	q->now = ktime_get_ns();
961 	start_at = jiffies;
962 
963 	next_event = q->now + 5LLU * NSEC_PER_SEC;
964 
965 	for (level = 0; level < TC_HTB_MAXDEPTH; level++) {
966 		/* common case optimization - skip event handler quickly */
967 		int m;
968 		s64 event = q->near_ev_cache[level];
969 
970 		if (q->now >= event) {
971 			event = htb_do_events(q, level, start_at);
972 			if (!event)
973 				event = q->now + NSEC_PER_SEC;
974 			q->near_ev_cache[level] = event;
975 		}
976 
977 		if (next_event > event)
978 			next_event = event;
979 
980 		m = ~q->row_mask[level];
981 		while (m != (int)(-1)) {
982 			int prio = ffz(m);
983 
984 			m |= 1 << prio;
985 			skb = htb_dequeue_tree(q, prio, level);
986 			if (likely(skb != NULL))
987 				goto ok;
988 		}
989 	}
990 	if (likely(next_event > q->now))
991 		qdisc_watchdog_schedule_ns(&q->watchdog, next_event);
992 	else
993 		schedule_work(&q->work);
994 fin:
995 	return skb;
996 }
997 
998 /* reset all classes */
999 /* always caled under BH & queue lock */
htb_reset(struct Qdisc * sch)1000 static void htb_reset(struct Qdisc *sch)
1001 {
1002 	struct htb_sched *q = qdisc_priv(sch);
1003 	struct htb_class *cl;
1004 	unsigned int i;
1005 
1006 	for (i = 0; i < q->clhash.hashsize; i++) {
1007 		hlist_for_each_entry(cl, &q->clhash.hash[i], common.hnode) {
1008 			if (cl->level)
1009 				memset(&cl->inner, 0, sizeof(cl->inner));
1010 			else {
1011 				if (cl->leaf.q && !q->offload)
1012 					qdisc_reset(cl->leaf.q);
1013 			}
1014 			cl->prio_activity = 0;
1015 			cl->cmode = HTB_CAN_SEND;
1016 		}
1017 	}
1018 	qdisc_watchdog_cancel(&q->watchdog);
1019 	__qdisc_reset_queue(&q->direct_queue);
1020 	memset(q->hlevel, 0, sizeof(q->hlevel));
1021 	memset(q->row_mask, 0, sizeof(q->row_mask));
1022 }
1023 
1024 static const struct nla_policy htb_policy[TCA_HTB_MAX + 1] = {
1025 	[TCA_HTB_PARMS]	= { .len = sizeof(struct tc_htb_opt) },
1026 	[TCA_HTB_INIT]	= { .len = sizeof(struct tc_htb_glob) },
1027 	[TCA_HTB_CTAB]	= { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
1028 	[TCA_HTB_RTAB]	= { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
1029 	[TCA_HTB_DIRECT_QLEN] = { .type = NLA_U32 },
1030 	[TCA_HTB_RATE64] = { .type = NLA_U64 },
1031 	[TCA_HTB_CEIL64] = { .type = NLA_U64 },
1032 	[TCA_HTB_OFFLOAD] = { .type = NLA_FLAG },
1033 };
1034 
htb_work_func(struct work_struct * work)1035 static void htb_work_func(struct work_struct *work)
1036 {
1037 	struct htb_sched *q = container_of(work, struct htb_sched, work);
1038 	struct Qdisc *sch = q->watchdog.qdisc;
1039 
1040 	rcu_read_lock();
1041 	__netif_schedule(qdisc_root(sch));
1042 	rcu_read_unlock();
1043 }
1044 
htb_offload(struct net_device * dev,struct tc_htb_qopt_offload * opt)1045 static int htb_offload(struct net_device *dev, struct tc_htb_qopt_offload *opt)
1046 {
1047 	return dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_HTB, opt);
1048 }
1049 
htb_init(struct Qdisc * sch,struct nlattr * opt,struct netlink_ext_ack * extack)1050 static int htb_init(struct Qdisc *sch, struct nlattr *opt,
1051 		    struct netlink_ext_ack *extack)
1052 {
1053 	struct net_device *dev = qdisc_dev(sch);
1054 	struct tc_htb_qopt_offload offload_opt;
1055 	struct htb_sched *q = qdisc_priv(sch);
1056 	struct nlattr *tb[TCA_HTB_MAX + 1];
1057 	struct tc_htb_glob *gopt;
1058 	unsigned int ntx;
1059 	bool offload;
1060 	int err;
1061 
1062 	qdisc_watchdog_init(&q->watchdog, sch);
1063 	INIT_WORK(&q->work, htb_work_func);
1064 
1065 	if (!opt)
1066 		return -EINVAL;
1067 
1068 	err = tcf_block_get(&q->block, &q->filter_list, sch, extack);
1069 	if (err)
1070 		return err;
1071 
1072 	err = nla_parse_nested_deprecated(tb, TCA_HTB_MAX, opt, htb_policy,
1073 					  NULL);
1074 	if (err < 0)
1075 		return err;
1076 
1077 	if (!tb[TCA_HTB_INIT])
1078 		return -EINVAL;
1079 
1080 	gopt = nla_data(tb[TCA_HTB_INIT]);
1081 	if (gopt->version != HTB_VER >> 16)
1082 		return -EINVAL;
1083 
1084 	offload = nla_get_flag(tb[TCA_HTB_OFFLOAD]);
1085 
1086 	if (offload) {
1087 		if (sch->parent != TC_H_ROOT) {
1088 			NL_SET_ERR_MSG(extack, "HTB must be the root qdisc to use offload");
1089 			return -EOPNOTSUPP;
1090 		}
1091 
1092 		if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc) {
1093 			NL_SET_ERR_MSG(extack, "hw-tc-offload ethtool feature flag must be on");
1094 			return -EOPNOTSUPP;
1095 		}
1096 
1097 		q->num_direct_qdiscs = dev->real_num_tx_queues;
1098 		q->direct_qdiscs = kzalloc_objs(*q->direct_qdiscs,
1099 						q->num_direct_qdiscs);
1100 		if (!q->direct_qdiscs)
1101 			return -ENOMEM;
1102 	}
1103 
1104 	err = qdisc_class_hash_init(&q->clhash);
1105 	if (err < 0)
1106 		return err;
1107 
1108 	if (tb[TCA_HTB_DIRECT_QLEN])
1109 		q->direct_qlen = nla_get_u32(tb[TCA_HTB_DIRECT_QLEN]);
1110 	else
1111 		q->direct_qlen = qdisc_dev(sch)->tx_queue_len;
1112 
1113 	if ((q->rate2quantum = gopt->rate2quantum) < 1)
1114 		q->rate2quantum = 1;
1115 	q->defcls = gopt->defcls;
1116 
1117 	if (!offload)
1118 		return 0;
1119 
1120 	for (ntx = 0; ntx < q->num_direct_qdiscs; ntx++) {
1121 		struct netdev_queue *dev_queue = netdev_get_tx_queue(dev, ntx);
1122 		struct Qdisc *qdisc;
1123 
1124 		qdisc = qdisc_create_dflt(dev_queue, &pfifo_qdisc_ops,
1125 					  TC_H_MAKE(sch->handle, 0), extack);
1126 		if (!qdisc) {
1127 			return -ENOMEM;
1128 		}
1129 
1130 		q->direct_qdiscs[ntx] = qdisc;
1131 		qdisc->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT;
1132 	}
1133 
1134 	sch->flags |= TCQ_F_MQROOT;
1135 
1136 	offload_opt = (struct tc_htb_qopt_offload) {
1137 		.command = TC_HTB_CREATE,
1138 		.parent_classid = TC_H_MAJ(sch->handle) >> 16,
1139 		.classid = TC_H_MIN(q->defcls),
1140 		.extack = extack,
1141 	};
1142 	err = htb_offload(dev, &offload_opt);
1143 	if (err)
1144 		return err;
1145 
1146 	/* Defer this assignment, so that htb_destroy skips offload-related
1147 	 * parts (especially calling ndo_setup_tc) on errors.
1148 	 */
1149 	q->offload = true;
1150 
1151 	return 0;
1152 }
1153 
htb_attach_offload(struct Qdisc * sch)1154 static void htb_attach_offload(struct Qdisc *sch)
1155 {
1156 	struct net_device *dev = qdisc_dev(sch);
1157 	struct htb_sched *q = qdisc_priv(sch);
1158 	unsigned int ntx;
1159 
1160 	for (ntx = 0; ntx < q->num_direct_qdiscs; ntx++) {
1161 		struct Qdisc *old, *qdisc = q->direct_qdiscs[ntx];
1162 
1163 		old = dev_graft_qdisc(qdisc->dev_queue, qdisc);
1164 		qdisc_put(old);
1165 		qdisc_hash_add(qdisc, false);
1166 	}
1167 	for (ntx = q->num_direct_qdiscs; ntx < dev->num_tx_queues; ntx++) {
1168 		struct netdev_queue *dev_queue = netdev_get_tx_queue(dev, ntx);
1169 		struct Qdisc *old = dev_graft_qdisc(dev_queue, NULL);
1170 
1171 		qdisc_put(old);
1172 	}
1173 
1174 	kfree(q->direct_qdiscs);
1175 	q->direct_qdiscs = NULL;
1176 }
1177 
htb_attach_software(struct Qdisc * sch)1178 static void htb_attach_software(struct Qdisc *sch)
1179 {
1180 	struct net_device *dev = qdisc_dev(sch);
1181 	unsigned int ntx;
1182 
1183 	/* Resemble qdisc_graft behavior. */
1184 	for (ntx = 0; ntx < dev->num_tx_queues; ntx++) {
1185 		struct netdev_queue *dev_queue = netdev_get_tx_queue(dev, ntx);
1186 		struct Qdisc *old = dev_graft_qdisc(dev_queue, sch);
1187 
1188 		qdisc_refcount_inc(sch);
1189 
1190 		qdisc_put(old);
1191 	}
1192 }
1193 
htb_attach(struct Qdisc * sch)1194 static void htb_attach(struct Qdisc *sch)
1195 {
1196 	struct htb_sched *q = qdisc_priv(sch);
1197 
1198 	if (q->offload)
1199 		htb_attach_offload(sch);
1200 	else
1201 		htb_attach_software(sch);
1202 }
1203 
htb_dump(struct Qdisc * sch,struct sk_buff * skb)1204 static int htb_dump(struct Qdisc *sch, struct sk_buff *skb)
1205 {
1206 	struct htb_sched *q = qdisc_priv(sch);
1207 	struct nlattr *nest;
1208 	struct tc_htb_glob gopt;
1209 
1210 	if (q->offload)
1211 		sch->flags |= TCQ_F_OFFLOADED;
1212 	else
1213 		sch->flags &= ~TCQ_F_OFFLOADED;
1214 
1215 	sch->qstats.overlimits = q->overlimits;
1216 	/* Its safe to not acquire qdisc lock. As we hold RTNL,
1217 	 * no change can happen on the qdisc parameters.
1218 	 */
1219 
1220 	gopt.direct_pkts = q->direct_pkts;
1221 	gopt.version = HTB_VER;
1222 	gopt.rate2quantum = q->rate2quantum;
1223 	gopt.defcls = q->defcls;
1224 	gopt.debug = 0;
1225 
1226 	nest = nla_nest_start_noflag(skb, TCA_OPTIONS);
1227 	if (nest == NULL)
1228 		goto nla_put_failure;
1229 	if (nla_put(skb, TCA_HTB_INIT, sizeof(gopt), &gopt) ||
1230 	    nla_put_u32(skb, TCA_HTB_DIRECT_QLEN, q->direct_qlen))
1231 		goto nla_put_failure;
1232 	if (q->offload && nla_put_flag(skb, TCA_HTB_OFFLOAD))
1233 		goto nla_put_failure;
1234 
1235 	return nla_nest_end(skb, nest);
1236 
1237 nla_put_failure:
1238 	nla_nest_cancel(skb, nest);
1239 	return -1;
1240 }
1241 
htb_dump_class(struct Qdisc * sch,unsigned long arg,struct sk_buff * skb,struct tcmsg * tcm)1242 static int htb_dump_class(struct Qdisc *sch, unsigned long arg,
1243 			  struct sk_buff *skb, struct tcmsg *tcm)
1244 {
1245 	struct htb_class *cl = (struct htb_class *)arg;
1246 	struct htb_sched *q = qdisc_priv(sch);
1247 	struct nlattr *nest;
1248 	struct tc_htb_opt opt;
1249 
1250 	/* Its safe to not acquire qdisc lock. As we hold RTNL,
1251 	 * no change can happen on the class parameters.
1252 	 */
1253 	tcm->tcm_parent = cl->parent ? cl->parent->common.classid : TC_H_ROOT;
1254 	tcm->tcm_handle = cl->common.classid;
1255 	if (!cl->level && cl->leaf.q)
1256 		tcm->tcm_info = cl->leaf.q->handle;
1257 
1258 	nest = nla_nest_start_noflag(skb, TCA_OPTIONS);
1259 	if (nest == NULL)
1260 		goto nla_put_failure;
1261 
1262 	memset(&opt, 0, sizeof(opt));
1263 
1264 	psched_ratecfg_getrate(&opt.rate, &cl->rate);
1265 	opt.buffer = PSCHED_NS2TICKS(cl->buffer);
1266 	psched_ratecfg_getrate(&opt.ceil, &cl->ceil);
1267 	opt.cbuffer = PSCHED_NS2TICKS(cl->cbuffer);
1268 	opt.quantum = cl->quantum;
1269 	opt.prio = cl->prio;
1270 	opt.level = cl->level;
1271 	if (nla_put(skb, TCA_HTB_PARMS, sizeof(opt), &opt))
1272 		goto nla_put_failure;
1273 	if (q->offload && nla_put_flag(skb, TCA_HTB_OFFLOAD))
1274 		goto nla_put_failure;
1275 	if ((cl->rate.rate_bytes_ps >= (1ULL << 32)) &&
1276 	    nla_put_u64_64bit(skb, TCA_HTB_RATE64, cl->rate.rate_bytes_ps,
1277 			      TCA_HTB_PAD))
1278 		goto nla_put_failure;
1279 	if ((cl->ceil.rate_bytes_ps >= (1ULL << 32)) &&
1280 	    nla_put_u64_64bit(skb, TCA_HTB_CEIL64, cl->ceil.rate_bytes_ps,
1281 			      TCA_HTB_PAD))
1282 		goto nla_put_failure;
1283 
1284 	return nla_nest_end(skb, nest);
1285 
1286 nla_put_failure:
1287 	nla_nest_cancel(skb, nest);
1288 	return -1;
1289 }
1290 
htb_offload_aggregate_stats(struct htb_sched * q,struct htb_class * cl)1291 static void htb_offload_aggregate_stats(struct htb_sched *q,
1292 					struct htb_class *cl)
1293 {
1294 	u64 bytes = 0, packets = 0;
1295 	struct htb_class *c;
1296 	unsigned int i;
1297 
1298 	gnet_stats_basic_sync_init(&cl->bstats);
1299 
1300 	for (i = 0; i < q->clhash.hashsize; i++) {
1301 		hlist_for_each_entry(c, &q->clhash.hash[i], common.hnode) {
1302 			struct htb_class *p = c;
1303 
1304 			while (p && p->level < cl->level)
1305 				p = p->parent;
1306 
1307 			if (p != cl)
1308 				continue;
1309 
1310 			bytes += u64_stats_read(&c->bstats_bias.bytes);
1311 			packets += u64_stats_read(&c->bstats_bias.packets);
1312 			if (c->level == 0) {
1313 				bytes += u64_stats_read(&c->leaf.q->bstats.bytes);
1314 				packets += u64_stats_read(&c->leaf.q->bstats.packets);
1315 			}
1316 		}
1317 	}
1318 	_bstats_update(&cl->bstats, bytes, packets);
1319 }
1320 
1321 static int
htb_dump_class_stats(struct Qdisc * sch,unsigned long arg,struct gnet_dump * d)1322 htb_dump_class_stats(struct Qdisc *sch, unsigned long arg, struct gnet_dump *d)
1323 {
1324 	struct htb_class *cl = (struct htb_class *)arg;
1325 	struct htb_sched *q = qdisc_priv(sch);
1326 	struct gnet_stats_queue qs = {
1327 		.drops = cl->drops,
1328 		.overlimits = cl->overlimits,
1329 	};
1330 	__u32 qlen = 0;
1331 
1332 	if (!cl->level && cl->leaf.q)
1333 		qdisc_qstats_qlen_backlog(cl->leaf.q, &qlen, &qs.backlog);
1334 
1335 	cl->xstats.tokens = clamp_t(s64, PSCHED_NS2TICKS(cl->tokens),
1336 				    INT_MIN, INT_MAX);
1337 	cl->xstats.ctokens = clamp_t(s64, PSCHED_NS2TICKS(cl->ctokens),
1338 				     INT_MIN, INT_MAX);
1339 
1340 	if (q->offload) {
1341 		if (!cl->level) {
1342 			if (cl->leaf.q)
1343 				cl->bstats = cl->leaf.q->bstats;
1344 			else
1345 				gnet_stats_basic_sync_init(&cl->bstats);
1346 			_bstats_update(&cl->bstats,
1347 				       u64_stats_read(&cl->bstats_bias.bytes),
1348 				       u64_stats_read(&cl->bstats_bias.packets));
1349 		} else {
1350 			htb_offload_aggregate_stats(q, cl);
1351 		}
1352 	}
1353 
1354 	if (gnet_stats_copy_basic(d, NULL, &cl->bstats, true) < 0 ||
1355 	    gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 ||
1356 	    gnet_stats_copy_queue(d, NULL, &qs, qlen) < 0)
1357 		return -1;
1358 
1359 	return gnet_stats_copy_app(d, &cl->xstats, sizeof(cl->xstats));
1360 }
1361 
1362 static struct netdev_queue *
htb_select_queue(struct Qdisc * sch,struct tcmsg * tcm)1363 htb_select_queue(struct Qdisc *sch, struct tcmsg *tcm)
1364 {
1365 	struct net_device *dev = qdisc_dev(sch);
1366 	struct tc_htb_qopt_offload offload_opt;
1367 	struct htb_sched *q = qdisc_priv(sch);
1368 	int err;
1369 
1370 	if (!q->offload)
1371 		return sch->dev_queue;
1372 
1373 	offload_opt = (struct tc_htb_qopt_offload) {
1374 		.command = TC_HTB_LEAF_QUERY_QUEUE,
1375 		.classid = TC_H_MIN(tcm->tcm_parent),
1376 	};
1377 	err = htb_offload(dev, &offload_opt);
1378 	if (err || offload_opt.qid >= dev->num_tx_queues)
1379 		return NULL;
1380 	return netdev_get_tx_queue(dev, offload_opt.qid);
1381 }
1382 
1383 static struct Qdisc *
htb_graft_helper(struct netdev_queue * dev_queue,struct Qdisc * new_q)1384 htb_graft_helper(struct netdev_queue *dev_queue, struct Qdisc *new_q)
1385 {
1386 	struct net_device *dev = dev_queue->dev;
1387 	struct Qdisc *old_q;
1388 
1389 	if (dev->flags & IFF_UP)
1390 		dev_deactivate(dev);
1391 	old_q = dev_graft_qdisc(dev_queue, new_q);
1392 	if (new_q)
1393 		new_q->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT;
1394 	if (dev->flags & IFF_UP)
1395 		dev_activate(dev);
1396 
1397 	return old_q;
1398 }
1399 
htb_offload_get_queue(struct htb_class * cl)1400 static struct netdev_queue *htb_offload_get_queue(struct htb_class *cl)
1401 {
1402 	struct netdev_queue *queue;
1403 
1404 	queue = cl->leaf.offload_queue;
1405 	if (!(cl->leaf.q->flags & TCQ_F_BUILTIN))
1406 		WARN_ON(cl->leaf.q->dev_queue != queue);
1407 
1408 	return queue;
1409 }
1410 
htb_offload_move_qdisc(struct Qdisc * sch,struct htb_class * cl_old,struct htb_class * cl_new,bool destroying)1411 static void htb_offload_move_qdisc(struct Qdisc *sch, struct htb_class *cl_old,
1412 				   struct htb_class *cl_new, bool destroying)
1413 {
1414 	struct netdev_queue *queue_old, *queue_new;
1415 	struct net_device *dev = qdisc_dev(sch);
1416 
1417 	queue_old = htb_offload_get_queue(cl_old);
1418 	queue_new = htb_offload_get_queue(cl_new);
1419 
1420 	if (!destroying) {
1421 		struct Qdisc *qdisc;
1422 
1423 		if (dev->flags & IFF_UP)
1424 			dev_deactivate(dev);
1425 		qdisc = dev_graft_qdisc(queue_old, NULL);
1426 		WARN_ON(qdisc != cl_old->leaf.q);
1427 	}
1428 
1429 	if (!(cl_old->leaf.q->flags & TCQ_F_BUILTIN))
1430 		cl_old->leaf.q->dev_queue = queue_new;
1431 	cl_old->leaf.offload_queue = queue_new;
1432 
1433 	if (!destroying) {
1434 		struct Qdisc *qdisc;
1435 
1436 		qdisc = dev_graft_qdisc(queue_new, cl_old->leaf.q);
1437 		if (dev->flags & IFF_UP)
1438 			dev_activate(dev);
1439 		WARN_ON(!(qdisc->flags & TCQ_F_BUILTIN));
1440 	}
1441 }
1442 
htb_graft(struct Qdisc * sch,unsigned long arg,struct Qdisc * new,struct Qdisc ** old,struct netlink_ext_ack * extack)1443 static int htb_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
1444 		     struct Qdisc **old, struct netlink_ext_ack *extack)
1445 {
1446 	struct netdev_queue *dev_queue = sch->dev_queue;
1447 	struct htb_class *cl = (struct htb_class *)arg;
1448 	struct htb_sched *q = qdisc_priv(sch);
1449 	struct Qdisc *old_q;
1450 
1451 	if (cl->level)
1452 		return -EINVAL;
1453 
1454 	if (q->offload)
1455 		dev_queue = htb_offload_get_queue(cl);
1456 
1457 	if (!new) {
1458 		new = qdisc_create_dflt(dev_queue, &pfifo_qdisc_ops,
1459 					cl->common.classid, extack);
1460 		if (!new)
1461 			return -ENOBUFS;
1462 	}
1463 
1464 	if (q->offload) {
1465 		/* One ref for cl->leaf.q, the other for dev_queue->qdisc. */
1466 		qdisc_refcount_inc(new);
1467 		old_q = htb_graft_helper(dev_queue, new);
1468 	}
1469 
1470 	*old = qdisc_replace(sch, new, &cl->leaf.q);
1471 
1472 	if (q->offload) {
1473 		WARN_ON(old_q != *old);
1474 		qdisc_put(old_q);
1475 	}
1476 
1477 	return 0;
1478 }
1479 
htb_leaf(struct Qdisc * sch,unsigned long arg)1480 static struct Qdisc *htb_leaf(struct Qdisc *sch, unsigned long arg)
1481 {
1482 	struct htb_class *cl = (struct htb_class *)arg;
1483 	return !cl->level ? cl->leaf.q : NULL;
1484 }
1485 
htb_qlen_notify(struct Qdisc * sch,unsigned long arg)1486 static void htb_qlen_notify(struct Qdisc *sch, unsigned long arg)
1487 {
1488 	struct htb_class *cl = (struct htb_class *)arg;
1489 
1490 	htb_deactivate(qdisc_priv(sch), cl);
1491 }
1492 
htb_parent_last_child(struct htb_class * cl)1493 static inline int htb_parent_last_child(struct htb_class *cl)
1494 {
1495 	if (!cl->parent)
1496 		/* the root class */
1497 		return 0;
1498 	if (cl->parent->children > 1)
1499 		/* not the last child */
1500 		return 0;
1501 	return 1;
1502 }
1503 
htb_parent_to_leaf(struct Qdisc * sch,struct htb_class * cl,struct Qdisc * new_q)1504 static void htb_parent_to_leaf(struct Qdisc *sch, struct htb_class *cl,
1505 			       struct Qdisc *new_q)
1506 {
1507 	struct htb_sched *q = qdisc_priv(sch);
1508 	struct htb_class *parent = cl->parent;
1509 
1510 	WARN_ON(cl->level || !cl->leaf.q || cl->prio_activity);
1511 
1512 	if (parent->cmode != HTB_CAN_SEND)
1513 		htb_safe_rb_erase(&parent->pq_node,
1514 				  &q->hlevel[parent->level].wait_pq);
1515 
1516 	parent->level = 0;
1517 	memset(&parent->inner, 0, sizeof(parent->inner));
1518 	parent->leaf.q = new_q ? new_q : &noop_qdisc;
1519 	parent->tokens = parent->buffer;
1520 	parent->ctokens = parent->cbuffer;
1521 	parent->t_c = ktime_get_ns();
1522 	parent->cmode = HTB_CAN_SEND;
1523 	if (q->offload)
1524 		parent->leaf.offload_queue = cl->leaf.offload_queue;
1525 }
1526 
htb_parent_to_leaf_offload(struct Qdisc * sch,struct netdev_queue * dev_queue,struct Qdisc * new_q)1527 static void htb_parent_to_leaf_offload(struct Qdisc *sch,
1528 				       struct netdev_queue *dev_queue,
1529 				       struct Qdisc *new_q)
1530 {
1531 	struct Qdisc *old_q;
1532 
1533 	/* One ref for cl->leaf.q, the other for dev_queue->qdisc. */
1534 	if (new_q)
1535 		qdisc_refcount_inc(new_q);
1536 	old_q = htb_graft_helper(dev_queue, new_q);
1537 	WARN_ON(!(old_q->flags & TCQ_F_BUILTIN));
1538 }
1539 
htb_destroy_class_offload(struct Qdisc * sch,struct htb_class * cl,bool last_child,bool destroying,struct netlink_ext_ack * extack)1540 static int htb_destroy_class_offload(struct Qdisc *sch, struct htb_class *cl,
1541 				     bool last_child, bool destroying,
1542 				     struct netlink_ext_ack *extack)
1543 {
1544 	struct tc_htb_qopt_offload offload_opt;
1545 	struct netdev_queue *dev_queue;
1546 	struct Qdisc *q = cl->leaf.q;
1547 	struct Qdisc *old;
1548 	int err;
1549 
1550 	if (cl->level)
1551 		return -EINVAL;
1552 
1553 	WARN_ON(!q);
1554 	dev_queue = htb_offload_get_queue(cl);
1555 	/* When destroying, caller qdisc_graft grafts the new qdisc and invokes
1556 	 * qdisc_put for the qdisc being destroyed. htb_destroy_class_offload
1557 	 * does not need to graft or qdisc_put the qdisc being destroyed.
1558 	 */
1559 	if (!destroying) {
1560 		old = htb_graft_helper(dev_queue, NULL);
1561 		/* Last qdisc grafted should be the same as cl->leaf.q when
1562 		 * calling htb_delete.
1563 		 */
1564 		WARN_ON(old != q);
1565 	}
1566 
1567 	if (cl->parent) {
1568 		_bstats_update(&cl->parent->bstats_bias,
1569 			       u64_stats_read(&q->bstats.bytes),
1570 			       u64_stats_read(&q->bstats.packets));
1571 	}
1572 
1573 	offload_opt = (struct tc_htb_qopt_offload) {
1574 		.command = !last_child ? TC_HTB_LEAF_DEL :
1575 			   destroying ? TC_HTB_LEAF_DEL_LAST_FORCE :
1576 			   TC_HTB_LEAF_DEL_LAST,
1577 		.classid = cl->common.classid,
1578 		.extack = extack,
1579 	};
1580 	err = htb_offload(qdisc_dev(sch), &offload_opt);
1581 
1582 	if (!destroying) {
1583 		if (!err)
1584 			qdisc_put(old);
1585 		else
1586 			htb_graft_helper(dev_queue, old);
1587 	}
1588 
1589 	if (last_child)
1590 		return err;
1591 
1592 	if (!err && offload_opt.classid != TC_H_MIN(cl->common.classid)) {
1593 		u32 classid = TC_H_MAJ(sch->handle) |
1594 			      TC_H_MIN(offload_opt.classid);
1595 		struct htb_class *moved_cl = htb_find(classid, sch);
1596 
1597 		htb_offload_move_qdisc(sch, moved_cl, cl, destroying);
1598 	}
1599 
1600 	return err;
1601 }
1602 
htb_destroy_class(struct Qdisc * sch,struct htb_class * cl)1603 static void htb_destroy_class(struct Qdisc *sch, struct htb_class *cl)
1604 {
1605 	if (!cl->level) {
1606 		WARN_ON(!cl->leaf.q);
1607 		qdisc_put(cl->leaf.q);
1608 	}
1609 	gen_kill_estimator(&cl->rate_est);
1610 	tcf_block_put(cl->block);
1611 	kfree(cl);
1612 }
1613 
htb_destroy(struct Qdisc * sch)1614 static void htb_destroy(struct Qdisc *sch)
1615 {
1616 	struct net_device *dev = qdisc_dev(sch);
1617 	struct tc_htb_qopt_offload offload_opt;
1618 	struct htb_sched *q = qdisc_priv(sch);
1619 	struct hlist_node *next;
1620 	bool nonempty, changed;
1621 	struct htb_class *cl;
1622 	unsigned int i;
1623 
1624 	cancel_work_sync(&q->work);
1625 	qdisc_watchdog_cancel(&q->watchdog);
1626 	/* This line used to be after htb_destroy_class call below
1627 	 * and surprisingly it worked in 2.4. But it must precede it
1628 	 * because filter need its target class alive to be able to call
1629 	 * unbind_filter on it (without Oops).
1630 	 */
1631 	tcf_block_put(q->block);
1632 
1633 	for (i = 0; i < q->clhash.hashsize; i++) {
1634 		hlist_for_each_entry(cl, &q->clhash.hash[i], common.hnode) {
1635 			tcf_block_put(cl->block);
1636 			cl->block = NULL;
1637 		}
1638 	}
1639 
1640 	do {
1641 		nonempty = false;
1642 		changed = false;
1643 		for (i = 0; i < q->clhash.hashsize; i++) {
1644 			hlist_for_each_entry_safe(cl, next, &q->clhash.hash[i],
1645 						  common.hnode) {
1646 				bool last_child;
1647 
1648 				if (!q->offload) {
1649 					htb_destroy_class(sch, cl);
1650 					continue;
1651 				}
1652 
1653 				nonempty = true;
1654 
1655 				if (cl->level)
1656 					continue;
1657 
1658 				changed = true;
1659 
1660 				last_child = htb_parent_last_child(cl);
1661 				htb_destroy_class_offload(sch, cl, last_child,
1662 							  true, NULL);
1663 				qdisc_class_hash_remove(&q->clhash,
1664 							&cl->common);
1665 				if (cl->parent)
1666 					cl->parent->children--;
1667 				if (last_child)
1668 					htb_parent_to_leaf(sch, cl, NULL);
1669 				htb_destroy_class(sch, cl);
1670 			}
1671 		}
1672 	} while (changed);
1673 	WARN_ON(nonempty);
1674 
1675 	qdisc_class_hash_destroy(&q->clhash);
1676 	__qdisc_reset_queue(&q->direct_queue);
1677 
1678 	if (q->offload) {
1679 		offload_opt = (struct tc_htb_qopt_offload) {
1680 			.command = TC_HTB_DESTROY,
1681 		};
1682 		htb_offload(dev, &offload_opt);
1683 	}
1684 
1685 	if (!q->direct_qdiscs)
1686 		return;
1687 	for (i = 0; i < q->num_direct_qdiscs && q->direct_qdiscs[i]; i++)
1688 		qdisc_put(q->direct_qdiscs[i]);
1689 	kfree(q->direct_qdiscs);
1690 }
1691 
htb_delete(struct Qdisc * sch,unsigned long arg,struct netlink_ext_ack * extack)1692 static int htb_delete(struct Qdisc *sch, unsigned long arg,
1693 		      struct netlink_ext_ack *extack)
1694 {
1695 	struct htb_sched *q = qdisc_priv(sch);
1696 	struct htb_class *cl = (struct htb_class *)arg;
1697 	struct Qdisc *new_q = NULL;
1698 	int last_child = 0;
1699 	int err;
1700 
1701 	/* TODO: why don't allow to delete subtree ? references ? does
1702 	 * tc subsys guarantee us that in htb_destroy it holds no class
1703 	 * refs so that we can remove children safely there ?
1704 	 */
1705 	if (cl->children || qdisc_class_in_use(&cl->common)) {
1706 		NL_SET_ERR_MSG(extack, "HTB class in use");
1707 		return -EBUSY;
1708 	}
1709 
1710 	if (!cl->level && htb_parent_last_child(cl))
1711 		last_child = 1;
1712 
1713 	if (q->offload) {
1714 		err = htb_destroy_class_offload(sch, cl, last_child, false,
1715 						extack);
1716 		if (err)
1717 			return err;
1718 	}
1719 
1720 	if (last_child) {
1721 		struct netdev_queue *dev_queue = sch->dev_queue;
1722 
1723 		if (q->offload)
1724 			dev_queue = htb_offload_get_queue(cl);
1725 
1726 		new_q = qdisc_create_dflt(dev_queue, &pfifo_qdisc_ops,
1727 					  cl->parent->common.classid,
1728 					  NULL);
1729 		if (q->offload)
1730 			htb_parent_to_leaf_offload(sch, dev_queue, new_q);
1731 	}
1732 
1733 	sch_tree_lock(sch);
1734 
1735 	if (!cl->level)
1736 		qdisc_purge_queue(cl->leaf.q);
1737 
1738 	/* delete from hash and active; remainder in destroy_class */
1739 	qdisc_class_hash_remove(&q->clhash, &cl->common);
1740 	if (cl->parent)
1741 		cl->parent->children--;
1742 
1743 	htb_deactivate(q, cl);
1744 
1745 	if (cl->cmode != HTB_CAN_SEND)
1746 		htb_safe_rb_erase(&cl->pq_node,
1747 				  &q->hlevel[cl->level].wait_pq);
1748 
1749 	if (last_child)
1750 		htb_parent_to_leaf(sch, cl, new_q);
1751 
1752 	sch_tree_unlock(sch);
1753 
1754 	htb_destroy_class(sch, cl);
1755 	return 0;
1756 }
1757 
htb_change_class(struct Qdisc * sch,u32 classid,u32 parentid,struct nlattr ** tca,unsigned long * arg,struct netlink_ext_ack * extack)1758 static int htb_change_class(struct Qdisc *sch, u32 classid,
1759 			    u32 parentid, struct nlattr **tca,
1760 			    unsigned long *arg, struct netlink_ext_ack *extack)
1761 {
1762 	int err = -EINVAL;
1763 	struct htb_sched *q = qdisc_priv(sch);
1764 	struct htb_class *cl = (struct htb_class *)*arg, *parent;
1765 	struct tc_htb_qopt_offload offload_opt;
1766 	struct nlattr *opt = tca[TCA_OPTIONS];
1767 	struct nlattr *tb[TCA_HTB_MAX + 1];
1768 	struct Qdisc *parent_qdisc = NULL;
1769 	struct netdev_queue *dev_queue;
1770 	struct tc_htb_opt *hopt;
1771 	u64 rate64, ceil64;
1772 	int warn = 0;
1773 
1774 	/* extract all subattrs from opt attr */
1775 	if (!opt)
1776 		goto failure;
1777 
1778 	err = nla_parse_nested_deprecated(tb, TCA_HTB_MAX, opt, htb_policy,
1779 					  extack);
1780 	if (err < 0)
1781 		goto failure;
1782 
1783 	err = -EINVAL;
1784 	if (tb[TCA_HTB_PARMS] == NULL)
1785 		goto failure;
1786 
1787 	parent = parentid == TC_H_ROOT ? NULL : htb_find(parentid, sch);
1788 
1789 	hopt = nla_data(tb[TCA_HTB_PARMS]);
1790 	if (!hopt->rate.rate || !hopt->ceil.rate)
1791 		goto failure;
1792 
1793 	if (q->offload) {
1794 		/* Options not supported by the offload. */
1795 		if (hopt->rate.overhead || hopt->ceil.overhead) {
1796 			NL_SET_ERR_MSG(extack, "HTB offload doesn't support the overhead parameter");
1797 			goto failure;
1798 		}
1799 		if (hopt->rate.mpu || hopt->ceil.mpu) {
1800 			NL_SET_ERR_MSG(extack, "HTB offload doesn't support the mpu parameter");
1801 			goto failure;
1802 		}
1803 	}
1804 
1805 	/* Keeping backward compatible with rate_table based iproute2 tc */
1806 	if (hopt->rate.linklayer == TC_LINKLAYER_UNAWARE)
1807 		qdisc_put_rtab(qdisc_get_rtab(&hopt->rate, tb[TCA_HTB_RTAB],
1808 					      NULL));
1809 
1810 	if (hopt->ceil.linklayer == TC_LINKLAYER_UNAWARE)
1811 		qdisc_put_rtab(qdisc_get_rtab(&hopt->ceil, tb[TCA_HTB_CTAB],
1812 					      NULL));
1813 
1814 	rate64 = nla_get_u64_default(tb[TCA_HTB_RATE64], 0);
1815 	ceil64 = nla_get_u64_default(tb[TCA_HTB_CEIL64], 0);
1816 
1817 	if (!cl) {		/* new class */
1818 		struct net_device *dev = qdisc_dev(sch);
1819 		struct Qdisc *new_q, *old_q;
1820 		int prio;
1821 		struct {
1822 			struct nlattr		nla;
1823 			struct gnet_estimator	opt;
1824 		} est = {
1825 			.nla = {
1826 				.nla_len	= nla_attr_size(sizeof(est.opt)),
1827 				.nla_type	= TCA_RATE,
1828 			},
1829 			.opt = {
1830 				/* 4s interval, 16s averaging constant */
1831 				.interval	= 2,
1832 				.ewma_log	= 2,
1833 			},
1834 		};
1835 
1836 		/* check for valid classid */
1837 		if (!classid || TC_H_MAJ(classid ^ sch->handle) ||
1838 		    htb_find(classid, sch))
1839 			goto failure;
1840 
1841 		/* check maximal depth */
1842 		if (parent && parent->parent && parent->parent->level < 2) {
1843 			NL_SET_ERR_MSG_MOD(extack, "tree is too deep");
1844 			goto failure;
1845 		}
1846 		err = -ENOBUFS;
1847 		cl = kzalloc_obj(*cl);
1848 		if (!cl)
1849 			goto failure;
1850 
1851 		gnet_stats_basic_sync_init(&cl->bstats);
1852 		gnet_stats_basic_sync_init(&cl->bstats_bias);
1853 
1854 		err = tcf_block_get(&cl->block, &cl->filter_list, sch, extack);
1855 		if (err) {
1856 			kfree(cl);
1857 			goto failure;
1858 		}
1859 		if (htb_rate_est || tca[TCA_RATE]) {
1860 			err = gen_new_estimator(&cl->bstats, NULL,
1861 						&cl->rate_est,
1862 						NULL,
1863 						true,
1864 						tca[TCA_RATE] ? : &est.nla);
1865 			if (err)
1866 				goto err_block_put;
1867 		}
1868 
1869 		cl->children = 0;
1870 		RB_CLEAR_NODE(&cl->pq_node);
1871 
1872 		for (prio = 0; prio < TC_HTB_NUMPRIO; prio++)
1873 			RB_CLEAR_NODE(&cl->node[prio]);
1874 
1875 		cl->common.classid = classid;
1876 
1877 		/* Make sure nothing interrupts us in between of two
1878 		 * ndo_setup_tc calls.
1879 		 */
1880 		ASSERT_RTNL();
1881 
1882 		/* create leaf qdisc early because it uses kmalloc(GFP_KERNEL)
1883 		 * so that can't be used inside of sch_tree_lock
1884 		 * -- thanks to Karlis Peisenieks
1885 		 */
1886 		if (!q->offload) {
1887 			dev_queue = sch->dev_queue;
1888 		} else if (!(parent && !parent->level)) {
1889 			/* Assign a dev_queue to this classid. */
1890 			offload_opt = (struct tc_htb_qopt_offload) {
1891 				.command = TC_HTB_LEAF_ALLOC_QUEUE,
1892 				.classid = cl->common.classid,
1893 				.parent_classid = parent ?
1894 					TC_H_MIN(parent->common.classid) :
1895 					TC_HTB_CLASSID_ROOT,
1896 				.rate = max_t(u64, hopt->rate.rate, rate64),
1897 				.ceil = max_t(u64, hopt->ceil.rate, ceil64),
1898 				.prio = hopt->prio,
1899 				.quantum = hopt->quantum,
1900 				.extack = extack,
1901 			};
1902 			err = htb_offload(dev, &offload_opt);
1903 			if (err) {
1904 				NL_SET_ERR_MSG_WEAK(extack,
1905 						    "Failed to offload TC_HTB_LEAF_ALLOC_QUEUE");
1906 				goto err_kill_estimator;
1907 			}
1908 			dev_queue = netdev_get_tx_queue(dev, offload_opt.qid);
1909 		} else { /* First child. */
1910 			dev_queue = htb_offload_get_queue(parent);
1911 			old_q = htb_graft_helper(dev_queue, NULL);
1912 			WARN_ON(old_q != parent->leaf.q);
1913 			offload_opt = (struct tc_htb_qopt_offload) {
1914 				.command = TC_HTB_LEAF_TO_INNER,
1915 				.classid = cl->common.classid,
1916 				.parent_classid =
1917 					TC_H_MIN(parent->common.classid),
1918 				.rate = max_t(u64, hopt->rate.rate, rate64),
1919 				.ceil = max_t(u64, hopt->ceil.rate, ceil64),
1920 				.prio = hopt->prio,
1921 				.quantum = hopt->quantum,
1922 				.extack = extack,
1923 			};
1924 			err = htb_offload(dev, &offload_opt);
1925 			if (err) {
1926 				NL_SET_ERR_MSG_WEAK(extack,
1927 						    "Failed to offload TC_HTB_LEAF_TO_INNER");
1928 				htb_graft_helper(dev_queue, old_q);
1929 				goto err_kill_estimator;
1930 			}
1931 			_bstats_update(&parent->bstats_bias,
1932 				       u64_stats_read(&old_q->bstats.bytes),
1933 				       u64_stats_read(&old_q->bstats.packets));
1934 			qdisc_put(old_q);
1935 		}
1936 		new_q = qdisc_create_dflt(dev_queue, &pfifo_qdisc_ops,
1937 					  classid, NULL);
1938 		if (q->offload) {
1939 			/* One ref for cl->leaf.q, the other for dev_queue->qdisc. */
1940 			if (new_q)
1941 				qdisc_refcount_inc(new_q);
1942 			old_q = htb_graft_helper(dev_queue, new_q);
1943 			/* No qdisc_put needed. */
1944 			WARN_ON(!(old_q->flags & TCQ_F_BUILTIN));
1945 		}
1946 		sch_tree_lock(sch);
1947 		if (parent && !parent->level) {
1948 			/* turn parent into inner node */
1949 			qdisc_purge_queue(parent->leaf.q);
1950 			parent_qdisc = parent->leaf.q;
1951 			htb_deactivate(q, parent);
1952 
1953 			/* remove from evt list because of level change */
1954 			if (parent->cmode != HTB_CAN_SEND) {
1955 				htb_safe_rb_erase(&parent->pq_node, &q->hlevel[0].wait_pq);
1956 				parent->cmode = HTB_CAN_SEND;
1957 			}
1958 			parent->level = (parent->parent ? parent->parent->level
1959 					 : TC_HTB_MAXDEPTH) - 1;
1960 			memset(&parent->inner, 0, sizeof(parent->inner));
1961 		}
1962 
1963 		/* leaf (we) needs elementary qdisc */
1964 		cl->leaf.q = new_q ? new_q : &noop_qdisc;
1965 		if (q->offload)
1966 			cl->leaf.offload_queue = dev_queue;
1967 
1968 		cl->parent = parent;
1969 
1970 		/* set class to be in HTB_CAN_SEND state */
1971 		cl->tokens = PSCHED_TICKS2NS(hopt->buffer);
1972 		cl->ctokens = PSCHED_TICKS2NS(hopt->cbuffer);
1973 		cl->mbuffer = 60ULL * NSEC_PER_SEC;	/* 1min */
1974 		cl->t_c = ktime_get_ns();
1975 		cl->cmode = HTB_CAN_SEND;
1976 
1977 		/* attach to the hash list and parent's family */
1978 		qdisc_class_hash_insert(&q->clhash, &cl->common);
1979 		if (parent)
1980 			parent->children++;
1981 		if (cl->leaf.q != &noop_qdisc)
1982 			qdisc_hash_add(cl->leaf.q, true);
1983 	} else {
1984 		if (tca[TCA_RATE]) {
1985 			err = gen_replace_estimator(&cl->bstats, NULL,
1986 						    &cl->rate_est,
1987 						    NULL,
1988 						    true,
1989 						    tca[TCA_RATE]);
1990 			if (err)
1991 				return err;
1992 		}
1993 
1994 		if (q->offload) {
1995 			struct net_device *dev = qdisc_dev(sch);
1996 
1997 			offload_opt = (struct tc_htb_qopt_offload) {
1998 				.command = TC_HTB_NODE_MODIFY,
1999 				.classid = cl->common.classid,
2000 				.rate = max_t(u64, hopt->rate.rate, rate64),
2001 				.ceil = max_t(u64, hopt->ceil.rate, ceil64),
2002 				.prio = hopt->prio,
2003 				.quantum = hopt->quantum,
2004 				.extack = extack,
2005 			};
2006 			err = htb_offload(dev, &offload_opt);
2007 			if (err)
2008 				/* Estimator was replaced, and rollback may fail
2009 				 * as well, so we don't try to recover it, and
2010 				 * the estimator won't work property with the
2011 				 * offload anyway, because bstats are updated
2012 				 * only when the stats are queried.
2013 				 */
2014 				return err;
2015 		}
2016 
2017 		sch_tree_lock(sch);
2018 	}
2019 
2020 	psched_ratecfg_precompute(&cl->rate, &hopt->rate, rate64);
2021 	psched_ratecfg_precompute(&cl->ceil, &hopt->ceil, ceil64);
2022 
2023 	/* it used to be a nasty bug here, we have to check that node
2024 	 * is really leaf before changing cl->leaf !
2025 	 */
2026 	if (!cl->level) {
2027 		u64 quantum = cl->rate.rate_bytes_ps;
2028 
2029 		do_div(quantum, q->rate2quantum);
2030 		cl->quantum = min_t(u64, quantum, INT_MAX);
2031 
2032 		if (!hopt->quantum && cl->quantum < 1000) {
2033 			warn = -1;
2034 			cl->quantum = 1000;
2035 		}
2036 		if (!hopt->quantum && cl->quantum > 200000) {
2037 			warn = 1;
2038 			cl->quantum = 200000;
2039 		}
2040 		if (hopt->quantum)
2041 			cl->quantum = hopt->quantum;
2042 		if ((cl->prio = hopt->prio) >= TC_HTB_NUMPRIO)
2043 			cl->prio = TC_HTB_NUMPRIO - 1;
2044 	}
2045 
2046 	cl->buffer = PSCHED_TICKS2NS(hopt->buffer);
2047 	cl->cbuffer = PSCHED_TICKS2NS(hopt->cbuffer);
2048 
2049 	sch_tree_unlock(sch);
2050 	qdisc_put(parent_qdisc);
2051 
2052 	if (warn)
2053 		NL_SET_ERR_MSG_FMT_MOD(extack,
2054 				       "quantum of class %X is %s. Consider r2q change.",
2055 				       cl->common.classid, (warn == -1 ? "small" : "big"));
2056 
2057 	qdisc_class_hash_grow(sch, &q->clhash);
2058 
2059 	*arg = (unsigned long)cl;
2060 	return 0;
2061 
2062 err_kill_estimator:
2063 	gen_kill_estimator(&cl->rate_est);
2064 err_block_put:
2065 	tcf_block_put(cl->block);
2066 	kfree(cl);
2067 failure:
2068 	return err;
2069 }
2070 
htb_tcf_block(struct Qdisc * sch,unsigned long arg,struct netlink_ext_ack * extack)2071 static struct tcf_block *htb_tcf_block(struct Qdisc *sch, unsigned long arg,
2072 				       struct netlink_ext_ack *extack)
2073 {
2074 	struct htb_sched *q = qdisc_priv(sch);
2075 	struct htb_class *cl = (struct htb_class *)arg;
2076 
2077 	return cl ? cl->block : q->block;
2078 }
2079 
htb_bind_filter(struct Qdisc * sch,unsigned long parent,u32 classid)2080 static unsigned long htb_bind_filter(struct Qdisc *sch, unsigned long parent,
2081 				     u32 classid)
2082 {
2083 	struct htb_class *cl = htb_find(classid, sch);
2084 
2085 	/*if (cl && !cl->level) return 0;
2086 	 * The line above used to be there to prevent attaching filters to
2087 	 * leaves. But at least tc_index filter uses this just to get class
2088 	 * for other reasons so that we have to allow for it.
2089 	 * ----
2090 	 * 19.6.2002 As Werner explained it is ok - bind filter is just
2091 	 * another way to "lock" the class - unlike "get" this lock can
2092 	 * be broken by class during destroy IIUC.
2093 	 */
2094 	if (cl)
2095 		qdisc_class_get(&cl->common);
2096 	return (unsigned long)cl;
2097 }
2098 
htb_unbind_filter(struct Qdisc * sch,unsigned long arg)2099 static void htb_unbind_filter(struct Qdisc *sch, unsigned long arg)
2100 {
2101 	struct htb_class *cl = (struct htb_class *)arg;
2102 
2103 	qdisc_class_put(&cl->common);
2104 }
2105 
htb_walk(struct Qdisc * sch,struct qdisc_walker * arg)2106 static void htb_walk(struct Qdisc *sch, struct qdisc_walker *arg)
2107 {
2108 	struct htb_sched *q = qdisc_priv(sch);
2109 	struct htb_class *cl;
2110 	unsigned int i;
2111 
2112 	if (arg->stop)
2113 		return;
2114 
2115 	for (i = 0; i < q->clhash.hashsize; i++) {
2116 		hlist_for_each_entry(cl, &q->clhash.hash[i], common.hnode) {
2117 			if (!tc_qdisc_stats_dump(sch, (unsigned long)cl, arg))
2118 				return;
2119 		}
2120 	}
2121 }
2122 
2123 static const struct Qdisc_class_ops htb_class_ops = {
2124 	.select_queue	=	htb_select_queue,
2125 	.graft		=	htb_graft,
2126 	.leaf		=	htb_leaf,
2127 	.qlen_notify	=	htb_qlen_notify,
2128 	.find		=	htb_search,
2129 	.change		=	htb_change_class,
2130 	.delete		=	htb_delete,
2131 	.walk		=	htb_walk,
2132 	.tcf_block	=	htb_tcf_block,
2133 	.bind_tcf	=	htb_bind_filter,
2134 	.unbind_tcf	=	htb_unbind_filter,
2135 	.dump		=	htb_dump_class,
2136 	.dump_stats	=	htb_dump_class_stats,
2137 };
2138 
2139 static struct Qdisc_ops htb_qdisc_ops __read_mostly = {
2140 	.cl_ops		=	&htb_class_ops,
2141 	.id		=	"htb",
2142 	.priv_size	=	sizeof(struct htb_sched),
2143 	.enqueue	=	htb_enqueue,
2144 	.dequeue	=	htb_dequeue,
2145 	.peek		=	qdisc_peek_dequeued,
2146 	.init		=	htb_init,
2147 	.attach		=	htb_attach,
2148 	.reset		=	htb_reset,
2149 	.destroy	=	htb_destroy,
2150 	.dump		=	htb_dump,
2151 	.owner		=	THIS_MODULE,
2152 };
2153 MODULE_ALIAS_NET_SCH("htb");
2154 
htb_module_init(void)2155 static int __init htb_module_init(void)
2156 {
2157 	return register_qdisc(&htb_qdisc_ops);
2158 }
htb_module_exit(void)2159 static void __exit htb_module_exit(void)
2160 {
2161 	unregister_qdisc(&htb_qdisc_ops);
2162 }
2163 
2164 module_init(htb_module_init)
2165 module_exit(htb_module_exit)
2166 MODULE_LICENSE("GPL");
2167 MODULE_DESCRIPTION("Hierarchical Token Bucket scheduler");
2168