xref: /linux/net/sched/sch_red.c (revision fd639726bf15fca8ee1a00dce8e0096d0ad9bd18)
1 /*
2  * net/sched/sch_red.c	Random Early Detection queue.
3  *
4  *		This program is free software; you can redistribute it and/or
5  *		modify it under the terms of the GNU General Public License
6  *		as published by the Free Software Foundation; either version
7  *		2 of the License, or (at your option) any later version.
8  *
9  * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
10  *
11  * Changes:
12  * J Hadi Salim 980914:	computation fixes
13  * Alexey Makarenko <makar@phoenix.kharkov.ua> 990814: qave on idle link was calculated incorrectly.
14  * J Hadi Salim 980816:  ECN support
15  */
16 
17 #include <linux/module.h>
18 #include <linux/types.h>
19 #include <linux/kernel.h>
20 #include <linux/skbuff.h>
21 #include <net/pkt_sched.h>
22 #include <net/pkt_cls.h>
23 #include <net/inet_ecn.h>
24 #include <net/red.h>
25 
26 
27 /*	Parameters, settable by user:
28 	-----------------------------
29 
30 	limit		- bytes (must be > qth_max + burst)
31 
32 	Hard limit on queue length, should be chosen >qth_max
33 	to allow packet bursts. This parameter does not
34 	affect the algorithms behaviour and can be chosen
35 	arbitrarily high (well, less than ram size)
36 	Really, this limit will never be reached
37 	if RED works correctly.
38  */
39 
40 struct red_sched_data {
41 	u32			limit;		/* HARD maximal queue length */
42 	unsigned char		flags;
43 	struct timer_list	adapt_timer;
44 	struct Qdisc		*sch;
45 	struct red_parms	parms;
46 	struct red_vars		vars;
47 	struct red_stats	stats;
48 	struct Qdisc		*qdisc;
49 };
50 
51 static inline int red_use_ecn(struct red_sched_data *q)
52 {
53 	return q->flags & TC_RED_ECN;
54 }
55 
56 static inline int red_use_harddrop(struct red_sched_data *q)
57 {
58 	return q->flags & TC_RED_HARDDROP;
59 }
60 
61 static int red_enqueue(struct sk_buff *skb, struct Qdisc *sch,
62 		       struct sk_buff **to_free)
63 {
64 	struct red_sched_data *q = qdisc_priv(sch);
65 	struct Qdisc *child = q->qdisc;
66 	int ret;
67 
68 	q->vars.qavg = red_calc_qavg(&q->parms,
69 				     &q->vars,
70 				     child->qstats.backlog);
71 
72 	if (red_is_idling(&q->vars))
73 		red_end_of_idle_period(&q->vars);
74 
75 	switch (red_action(&q->parms, &q->vars, q->vars.qavg)) {
76 	case RED_DONT_MARK:
77 		break;
78 
79 	case RED_PROB_MARK:
80 		qdisc_qstats_overlimit(sch);
81 		if (!red_use_ecn(q) || !INET_ECN_set_ce(skb)) {
82 			q->stats.prob_drop++;
83 			goto congestion_drop;
84 		}
85 
86 		q->stats.prob_mark++;
87 		break;
88 
89 	case RED_HARD_MARK:
90 		qdisc_qstats_overlimit(sch);
91 		if (red_use_harddrop(q) || !red_use_ecn(q) ||
92 		    !INET_ECN_set_ce(skb)) {
93 			q->stats.forced_drop++;
94 			goto congestion_drop;
95 		}
96 
97 		q->stats.forced_mark++;
98 		break;
99 	}
100 
101 	ret = qdisc_enqueue(skb, child, to_free);
102 	if (likely(ret == NET_XMIT_SUCCESS)) {
103 		qdisc_qstats_backlog_inc(sch, skb);
104 		sch->q.qlen++;
105 	} else if (net_xmit_drop_count(ret)) {
106 		q->stats.pdrop++;
107 		qdisc_qstats_drop(sch);
108 	}
109 	return ret;
110 
111 congestion_drop:
112 	qdisc_drop(skb, sch, to_free);
113 	return NET_XMIT_CN;
114 }
115 
116 static struct sk_buff *red_dequeue(struct Qdisc *sch)
117 {
118 	struct sk_buff *skb;
119 	struct red_sched_data *q = qdisc_priv(sch);
120 	struct Qdisc *child = q->qdisc;
121 
122 	skb = child->dequeue(child);
123 	if (skb) {
124 		qdisc_bstats_update(sch, skb);
125 		qdisc_qstats_backlog_dec(sch, skb);
126 		sch->q.qlen--;
127 	} else {
128 		if (!red_is_idling(&q->vars))
129 			red_start_of_idle_period(&q->vars);
130 	}
131 	return skb;
132 }
133 
134 static struct sk_buff *red_peek(struct Qdisc *sch)
135 {
136 	struct red_sched_data *q = qdisc_priv(sch);
137 	struct Qdisc *child = q->qdisc;
138 
139 	return child->ops->peek(child);
140 }
141 
142 static void red_reset(struct Qdisc *sch)
143 {
144 	struct red_sched_data *q = qdisc_priv(sch);
145 
146 	qdisc_reset(q->qdisc);
147 	sch->qstats.backlog = 0;
148 	sch->q.qlen = 0;
149 	red_restart(&q->vars);
150 }
151 
152 static int red_offload(struct Qdisc *sch, bool enable)
153 {
154 	struct red_sched_data *q = qdisc_priv(sch);
155 	struct net_device *dev = qdisc_dev(sch);
156 	struct tc_red_qopt_offload opt = {
157 		.handle = sch->handle,
158 		.parent = sch->parent,
159 	};
160 
161 	if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
162 		return -EOPNOTSUPP;
163 
164 	if (enable) {
165 		opt.command = TC_RED_REPLACE;
166 		opt.set.min = q->parms.qth_min >> q->parms.Wlog;
167 		opt.set.max = q->parms.qth_max >> q->parms.Wlog;
168 		opt.set.probability = q->parms.max_P;
169 		opt.set.is_ecn = red_use_ecn(q);
170 	} else {
171 		opt.command = TC_RED_DESTROY;
172 	}
173 
174 	return dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_RED, &opt);
175 }
176 
177 static void red_destroy(struct Qdisc *sch)
178 {
179 	struct red_sched_data *q = qdisc_priv(sch);
180 
181 	del_timer_sync(&q->adapt_timer);
182 	red_offload(sch, false);
183 	qdisc_destroy(q->qdisc);
184 }
185 
186 static const struct nla_policy red_policy[TCA_RED_MAX + 1] = {
187 	[TCA_RED_PARMS]	= { .len = sizeof(struct tc_red_qopt) },
188 	[TCA_RED_STAB]	= { .len = RED_STAB_SIZE },
189 	[TCA_RED_MAX_P] = { .type = NLA_U32 },
190 };
191 
192 static int red_change(struct Qdisc *sch, struct nlattr *opt)
193 {
194 	struct red_sched_data *q = qdisc_priv(sch);
195 	struct nlattr *tb[TCA_RED_MAX + 1];
196 	struct tc_red_qopt *ctl;
197 	struct Qdisc *child = NULL;
198 	int err;
199 	u32 max_P;
200 
201 	if (opt == NULL)
202 		return -EINVAL;
203 
204 	err = nla_parse_nested(tb, TCA_RED_MAX, opt, red_policy, NULL);
205 	if (err < 0)
206 		return err;
207 
208 	if (tb[TCA_RED_PARMS] == NULL ||
209 	    tb[TCA_RED_STAB] == NULL)
210 		return -EINVAL;
211 
212 	max_P = tb[TCA_RED_MAX_P] ? nla_get_u32(tb[TCA_RED_MAX_P]) : 0;
213 
214 	ctl = nla_data(tb[TCA_RED_PARMS]);
215 	if (!red_check_params(ctl->qth_min, ctl->qth_max, ctl->Wlog))
216 		return -EINVAL;
217 
218 	if (ctl->limit > 0) {
219 		child = fifo_create_dflt(sch, &bfifo_qdisc_ops, ctl->limit);
220 		if (IS_ERR(child))
221 			return PTR_ERR(child);
222 	}
223 
224 	if (child != &noop_qdisc)
225 		qdisc_hash_add(child, true);
226 	sch_tree_lock(sch);
227 	q->flags = ctl->flags;
228 	q->limit = ctl->limit;
229 	if (child) {
230 		qdisc_tree_reduce_backlog(q->qdisc, q->qdisc->q.qlen,
231 					  q->qdisc->qstats.backlog);
232 		qdisc_destroy(q->qdisc);
233 		q->qdisc = child;
234 	}
235 
236 	red_set_parms(&q->parms,
237 		      ctl->qth_min, ctl->qth_max, ctl->Wlog,
238 		      ctl->Plog, ctl->Scell_log,
239 		      nla_data(tb[TCA_RED_STAB]),
240 		      max_P);
241 	red_set_vars(&q->vars);
242 
243 	del_timer(&q->adapt_timer);
244 	if (ctl->flags & TC_RED_ADAPTATIVE)
245 		mod_timer(&q->adapt_timer, jiffies + HZ/2);
246 
247 	if (!q->qdisc->q.qlen)
248 		red_start_of_idle_period(&q->vars);
249 
250 	sch_tree_unlock(sch);
251 	red_offload(sch, true);
252 	return 0;
253 }
254 
255 static inline void red_adaptative_timer(struct timer_list *t)
256 {
257 	struct red_sched_data *q = from_timer(q, t, adapt_timer);
258 	struct Qdisc *sch = q->sch;
259 	spinlock_t *root_lock = qdisc_lock(qdisc_root_sleeping(sch));
260 
261 	spin_lock(root_lock);
262 	red_adaptative_algo(&q->parms, &q->vars);
263 	mod_timer(&q->adapt_timer, jiffies + HZ/2);
264 	spin_unlock(root_lock);
265 }
266 
267 static int red_init(struct Qdisc *sch, struct nlattr *opt)
268 {
269 	struct red_sched_data *q = qdisc_priv(sch);
270 
271 	q->qdisc = &noop_qdisc;
272 	q->sch = sch;
273 	timer_setup(&q->adapt_timer, red_adaptative_timer, 0);
274 	return red_change(sch, opt);
275 }
276 
277 static int red_dump_offload(struct Qdisc *sch, struct tc_red_qopt *opt)
278 {
279 	struct net_device *dev = qdisc_dev(sch);
280 	struct tc_red_qopt_offload hw_stats = {
281 		.command = TC_RED_STATS,
282 		.handle = sch->handle,
283 		.parent = sch->parent,
284 		{
285 			.stats.bstats = &sch->bstats,
286 			.stats.qstats = &sch->qstats,
287 		},
288 	};
289 	int err;
290 
291 	opt->flags &= ~TC_RED_OFFLOADED;
292 	if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
293 		return 0;
294 
295 	err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_RED,
296 					    &hw_stats);
297 	if (err == -EOPNOTSUPP)
298 		return 0;
299 
300 	if (!err)
301 		opt->flags |= TC_RED_OFFLOADED;
302 
303 	return err;
304 }
305 
306 static int red_dump(struct Qdisc *sch, struct sk_buff *skb)
307 {
308 	struct red_sched_data *q = qdisc_priv(sch);
309 	struct nlattr *opts = NULL;
310 	struct tc_red_qopt opt = {
311 		.limit		= q->limit,
312 		.flags		= q->flags,
313 		.qth_min	= q->parms.qth_min >> q->parms.Wlog,
314 		.qth_max	= q->parms.qth_max >> q->parms.Wlog,
315 		.Wlog		= q->parms.Wlog,
316 		.Plog		= q->parms.Plog,
317 		.Scell_log	= q->parms.Scell_log,
318 	};
319 	int err;
320 
321 	sch->qstats.backlog = q->qdisc->qstats.backlog;
322 	err = red_dump_offload(sch, &opt);
323 	if (err)
324 		goto nla_put_failure;
325 
326 	opts = nla_nest_start(skb, TCA_OPTIONS);
327 	if (opts == NULL)
328 		goto nla_put_failure;
329 	if (nla_put(skb, TCA_RED_PARMS, sizeof(opt), &opt) ||
330 	    nla_put_u32(skb, TCA_RED_MAX_P, q->parms.max_P))
331 		goto nla_put_failure;
332 	return nla_nest_end(skb, opts);
333 
334 nla_put_failure:
335 	nla_nest_cancel(skb, opts);
336 	return -EMSGSIZE;
337 }
338 
339 static int red_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
340 {
341 	struct red_sched_data *q = qdisc_priv(sch);
342 	struct net_device *dev = qdisc_dev(sch);
343 	struct tc_red_xstats st = {
344 		.early	= q->stats.prob_drop + q->stats.forced_drop,
345 		.pdrop	= q->stats.pdrop,
346 		.other	= q->stats.other,
347 		.marked	= q->stats.prob_mark + q->stats.forced_mark,
348 	};
349 
350 	if (tc_can_offload(dev) &&  dev->netdev_ops->ndo_setup_tc) {
351 		struct red_stats hw_stats = {0};
352 		struct tc_red_qopt_offload hw_stats_request = {
353 			.command = TC_RED_XSTATS,
354 			.handle = sch->handle,
355 			.parent = sch->parent,
356 			{
357 				.xstats = &hw_stats,
358 			},
359 		};
360 		if (!dev->netdev_ops->ndo_setup_tc(dev,
361 						   TC_SETUP_QDISC_RED,
362 						   &hw_stats_request)) {
363 			st.early += hw_stats.prob_drop + hw_stats.forced_drop;
364 			st.pdrop += hw_stats.pdrop;
365 			st.other += hw_stats.other;
366 			st.marked += hw_stats.prob_mark + hw_stats.forced_mark;
367 		}
368 	}
369 
370 	return gnet_stats_copy_app(d, &st, sizeof(st));
371 }
372 
373 static int red_dump_class(struct Qdisc *sch, unsigned long cl,
374 			  struct sk_buff *skb, struct tcmsg *tcm)
375 {
376 	struct red_sched_data *q = qdisc_priv(sch);
377 
378 	tcm->tcm_handle |= TC_H_MIN(1);
379 	tcm->tcm_info = q->qdisc->handle;
380 	return 0;
381 }
382 
383 static int red_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
384 		     struct Qdisc **old)
385 {
386 	struct red_sched_data *q = qdisc_priv(sch);
387 
388 	if (new == NULL)
389 		new = &noop_qdisc;
390 
391 	*old = qdisc_replace(sch, new, &q->qdisc);
392 	return 0;
393 }
394 
395 static struct Qdisc *red_leaf(struct Qdisc *sch, unsigned long arg)
396 {
397 	struct red_sched_data *q = qdisc_priv(sch);
398 	return q->qdisc;
399 }
400 
401 static unsigned long red_find(struct Qdisc *sch, u32 classid)
402 {
403 	return 1;
404 }
405 
406 static void red_walk(struct Qdisc *sch, struct qdisc_walker *walker)
407 {
408 	if (!walker->stop) {
409 		if (walker->count >= walker->skip)
410 			if (walker->fn(sch, 1, walker) < 0) {
411 				walker->stop = 1;
412 				return;
413 			}
414 		walker->count++;
415 	}
416 }
417 
418 static const struct Qdisc_class_ops red_class_ops = {
419 	.graft		=	red_graft,
420 	.leaf		=	red_leaf,
421 	.find		=	red_find,
422 	.walk		=	red_walk,
423 	.dump		=	red_dump_class,
424 };
425 
426 static struct Qdisc_ops red_qdisc_ops __read_mostly = {
427 	.id		=	"red",
428 	.priv_size	=	sizeof(struct red_sched_data),
429 	.cl_ops		=	&red_class_ops,
430 	.enqueue	=	red_enqueue,
431 	.dequeue	=	red_dequeue,
432 	.peek		=	red_peek,
433 	.init		=	red_init,
434 	.reset		=	red_reset,
435 	.destroy	=	red_destroy,
436 	.change		=	red_change,
437 	.dump		=	red_dump,
438 	.dump_stats	=	red_dump_stats,
439 	.owner		=	THIS_MODULE,
440 };
441 
442 static int __init red_module_init(void)
443 {
444 	return register_qdisc(&red_qdisc_ops);
445 }
446 
447 static void __exit red_module_exit(void)
448 {
449 	unregister_qdisc(&red_qdisc_ops);
450 }
451 
452 module_init(red_module_init)
453 module_exit(red_module_exit)
454 
455 MODULE_LICENSE("GPL");
456