xref: /linux/net/sched/sch_red.c (revision b9b77222d4ff6b5bb8f5d87fca20de0910618bb9)
1 /*
2  * net/sched/sch_red.c	Random Early Detection queue.
3  *
4  *		This program is free software; you can redistribute it and/or
5  *		modify it under the terms of the GNU General Public License
6  *		as published by the Free Software Foundation; either version
7  *		2 of the License, or (at your option) any later version.
8  *
9  * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
10  *
11  * Changes:
12  * J Hadi Salim 980914:	computation fixes
13  * Alexey Makarenko <makar@phoenix.kharkov.ua> 990814: qave on idle link was calculated incorrectly.
14  * J Hadi Salim 980816:  ECN support
15  */
16 
17 #include <linux/module.h>
18 #include <linux/types.h>
19 #include <linux/kernel.h>
20 #include <linux/skbuff.h>
21 #include <net/pkt_sched.h>
22 #include <net/pkt_cls.h>
23 #include <net/inet_ecn.h>
24 #include <net/red.h>
25 
26 
27 /*	Parameters, settable by user:
28 	-----------------------------
29 
30 	limit		- bytes (must be > qth_max + burst)
31 
32 	Hard limit on queue length, should be chosen >qth_max
33 	to allow packet bursts. This parameter does not
34 	affect the algorithms behaviour and can be chosen
35 	arbitrarily high (well, less than ram size)
36 	Really, this limit will never be reached
37 	if RED works correctly.
38  */
39 
40 struct red_sched_data {
41 	u32			limit;		/* HARD maximal queue length */
42 	unsigned char		flags;
43 	struct timer_list	adapt_timer;
44 	struct Qdisc		*sch;
45 	struct red_parms	parms;
46 	struct red_vars		vars;
47 	struct red_stats	stats;
48 	struct Qdisc		*qdisc;
49 };
50 
51 static inline int red_use_ecn(struct red_sched_data *q)
52 {
53 	return q->flags & TC_RED_ECN;
54 }
55 
56 static inline int red_use_harddrop(struct red_sched_data *q)
57 {
58 	return q->flags & TC_RED_HARDDROP;
59 }
60 
61 static int red_enqueue(struct sk_buff *skb, struct Qdisc *sch,
62 		       struct sk_buff **to_free)
63 {
64 	struct red_sched_data *q = qdisc_priv(sch);
65 	struct Qdisc *child = q->qdisc;
66 	int ret;
67 
68 	q->vars.qavg = red_calc_qavg(&q->parms,
69 				     &q->vars,
70 				     child->qstats.backlog);
71 
72 	if (red_is_idling(&q->vars))
73 		red_end_of_idle_period(&q->vars);
74 
75 	switch (red_action(&q->parms, &q->vars, q->vars.qavg)) {
76 	case RED_DONT_MARK:
77 		break;
78 
79 	case RED_PROB_MARK:
80 		qdisc_qstats_overlimit(sch);
81 		if (!red_use_ecn(q) || !INET_ECN_set_ce(skb)) {
82 			q->stats.prob_drop++;
83 			goto congestion_drop;
84 		}
85 
86 		q->stats.prob_mark++;
87 		break;
88 
89 	case RED_HARD_MARK:
90 		qdisc_qstats_overlimit(sch);
91 		if (red_use_harddrop(q) || !red_use_ecn(q) ||
92 		    !INET_ECN_set_ce(skb)) {
93 			q->stats.forced_drop++;
94 			goto congestion_drop;
95 		}
96 
97 		q->stats.forced_mark++;
98 		break;
99 	}
100 
101 	ret = qdisc_enqueue(skb, child, to_free);
102 	if (likely(ret == NET_XMIT_SUCCESS)) {
103 		qdisc_qstats_backlog_inc(sch, skb);
104 		sch->q.qlen++;
105 	} else if (net_xmit_drop_count(ret)) {
106 		q->stats.pdrop++;
107 		qdisc_qstats_drop(sch);
108 	}
109 	return ret;
110 
111 congestion_drop:
112 	qdisc_drop(skb, sch, to_free);
113 	return NET_XMIT_CN;
114 }
115 
116 static struct sk_buff *red_dequeue(struct Qdisc *sch)
117 {
118 	struct sk_buff *skb;
119 	struct red_sched_data *q = qdisc_priv(sch);
120 	struct Qdisc *child = q->qdisc;
121 
122 	skb = child->dequeue(child);
123 	if (skb) {
124 		qdisc_bstats_update(sch, skb);
125 		qdisc_qstats_backlog_dec(sch, skb);
126 		sch->q.qlen--;
127 	} else {
128 		if (!red_is_idling(&q->vars))
129 			red_start_of_idle_period(&q->vars);
130 	}
131 	return skb;
132 }
133 
134 static struct sk_buff *red_peek(struct Qdisc *sch)
135 {
136 	struct red_sched_data *q = qdisc_priv(sch);
137 	struct Qdisc *child = q->qdisc;
138 
139 	return child->ops->peek(child);
140 }
141 
142 static void red_reset(struct Qdisc *sch)
143 {
144 	struct red_sched_data *q = qdisc_priv(sch);
145 
146 	qdisc_reset(q->qdisc);
147 	sch->qstats.backlog = 0;
148 	sch->q.qlen = 0;
149 	red_restart(&q->vars);
150 }
151 
152 static int red_offload(struct Qdisc *sch, bool enable)
153 {
154 	struct red_sched_data *q = qdisc_priv(sch);
155 	struct net_device *dev = qdisc_dev(sch);
156 	struct tc_red_qopt_offload opt = {
157 		.handle = sch->handle,
158 		.parent = sch->parent,
159 	};
160 
161 	if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
162 		return -EOPNOTSUPP;
163 
164 	if (enable) {
165 		opt.command = TC_RED_REPLACE;
166 		opt.set.min = q->parms.qth_min >> q->parms.Wlog;
167 		opt.set.max = q->parms.qth_max >> q->parms.Wlog;
168 		opt.set.probability = q->parms.max_P;
169 		opt.set.is_ecn = red_use_ecn(q);
170 		opt.set.qstats = &sch->qstats;
171 	} else {
172 		opt.command = TC_RED_DESTROY;
173 	}
174 
175 	return dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_RED, &opt);
176 }
177 
178 static void red_destroy(struct Qdisc *sch)
179 {
180 	struct red_sched_data *q = qdisc_priv(sch);
181 
182 	del_timer_sync(&q->adapt_timer);
183 	red_offload(sch, false);
184 	qdisc_destroy(q->qdisc);
185 }
186 
187 static const struct nla_policy red_policy[TCA_RED_MAX + 1] = {
188 	[TCA_RED_PARMS]	= { .len = sizeof(struct tc_red_qopt) },
189 	[TCA_RED_STAB]	= { .len = RED_STAB_SIZE },
190 	[TCA_RED_MAX_P] = { .type = NLA_U32 },
191 };
192 
193 static int red_change(struct Qdisc *sch, struct nlattr *opt,
194 		      struct netlink_ext_ack *extack)
195 {
196 	struct red_sched_data *q = qdisc_priv(sch);
197 	struct nlattr *tb[TCA_RED_MAX + 1];
198 	struct tc_red_qopt *ctl;
199 	struct Qdisc *child = NULL;
200 	int err;
201 	u32 max_P;
202 
203 	if (opt == NULL)
204 		return -EINVAL;
205 
206 	err = nla_parse_nested(tb, TCA_RED_MAX, opt, red_policy, NULL);
207 	if (err < 0)
208 		return err;
209 
210 	if (tb[TCA_RED_PARMS] == NULL ||
211 	    tb[TCA_RED_STAB] == NULL)
212 		return -EINVAL;
213 
214 	max_P = tb[TCA_RED_MAX_P] ? nla_get_u32(tb[TCA_RED_MAX_P]) : 0;
215 
216 	ctl = nla_data(tb[TCA_RED_PARMS]);
217 	if (!red_check_params(ctl->qth_min, ctl->qth_max, ctl->Wlog))
218 		return -EINVAL;
219 
220 	if (ctl->limit > 0) {
221 		child = fifo_create_dflt(sch, &bfifo_qdisc_ops, ctl->limit,
222 					 extack);
223 		if (IS_ERR(child))
224 			return PTR_ERR(child);
225 
226 		/* child is fifo, no need to check for noop_qdisc */
227 		qdisc_hash_add(child, true);
228 	}
229 
230 	sch_tree_lock(sch);
231 	q->flags = ctl->flags;
232 	q->limit = ctl->limit;
233 	if (child) {
234 		qdisc_tree_reduce_backlog(q->qdisc, q->qdisc->q.qlen,
235 					  q->qdisc->qstats.backlog);
236 		qdisc_destroy(q->qdisc);
237 		q->qdisc = child;
238 	}
239 
240 	red_set_parms(&q->parms,
241 		      ctl->qth_min, ctl->qth_max, ctl->Wlog,
242 		      ctl->Plog, ctl->Scell_log,
243 		      nla_data(tb[TCA_RED_STAB]),
244 		      max_P);
245 	red_set_vars(&q->vars);
246 
247 	del_timer(&q->adapt_timer);
248 	if (ctl->flags & TC_RED_ADAPTATIVE)
249 		mod_timer(&q->adapt_timer, jiffies + HZ/2);
250 
251 	if (!q->qdisc->q.qlen)
252 		red_start_of_idle_period(&q->vars);
253 
254 	sch_tree_unlock(sch);
255 	red_offload(sch, true);
256 	return 0;
257 }
258 
259 static inline void red_adaptative_timer(struct timer_list *t)
260 {
261 	struct red_sched_data *q = from_timer(q, t, adapt_timer);
262 	struct Qdisc *sch = q->sch;
263 	spinlock_t *root_lock = qdisc_lock(qdisc_root_sleeping(sch));
264 
265 	spin_lock(root_lock);
266 	red_adaptative_algo(&q->parms, &q->vars);
267 	mod_timer(&q->adapt_timer, jiffies + HZ/2);
268 	spin_unlock(root_lock);
269 }
270 
271 static int red_init(struct Qdisc *sch, struct nlattr *opt,
272 		    struct netlink_ext_ack *extack)
273 {
274 	struct red_sched_data *q = qdisc_priv(sch);
275 
276 	q->qdisc = &noop_qdisc;
277 	q->sch = sch;
278 	timer_setup(&q->adapt_timer, red_adaptative_timer, 0);
279 	return red_change(sch, opt, extack);
280 }
281 
282 static int red_dump_offload_stats(struct Qdisc *sch, struct tc_red_qopt *opt)
283 {
284 	struct net_device *dev = qdisc_dev(sch);
285 	struct tc_red_qopt_offload hw_stats = {
286 		.command = TC_RED_STATS,
287 		.handle = sch->handle,
288 		.parent = sch->parent,
289 		{
290 			.stats.bstats = &sch->bstats,
291 			.stats.qstats = &sch->qstats,
292 		},
293 	};
294 	int err;
295 
296 	sch->flags &= ~TCQ_F_OFFLOADED;
297 
298 	if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
299 		return 0;
300 
301 	err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_RED,
302 					    &hw_stats);
303 	if (err == -EOPNOTSUPP)
304 		return 0;
305 
306 	if (!err)
307 		sch->flags |= TCQ_F_OFFLOADED;
308 
309 	return err;
310 }
311 
312 static int red_dump(struct Qdisc *sch, struct sk_buff *skb)
313 {
314 	struct red_sched_data *q = qdisc_priv(sch);
315 	struct nlattr *opts = NULL;
316 	struct tc_red_qopt opt = {
317 		.limit		= q->limit,
318 		.flags		= q->flags,
319 		.qth_min	= q->parms.qth_min >> q->parms.Wlog,
320 		.qth_max	= q->parms.qth_max >> q->parms.Wlog,
321 		.Wlog		= q->parms.Wlog,
322 		.Plog		= q->parms.Plog,
323 		.Scell_log	= q->parms.Scell_log,
324 	};
325 	int err;
326 
327 	err = red_dump_offload_stats(sch, &opt);
328 	if (err)
329 		goto nla_put_failure;
330 
331 	opts = nla_nest_start(skb, TCA_OPTIONS);
332 	if (opts == NULL)
333 		goto nla_put_failure;
334 	if (nla_put(skb, TCA_RED_PARMS, sizeof(opt), &opt) ||
335 	    nla_put_u32(skb, TCA_RED_MAX_P, q->parms.max_P))
336 		goto nla_put_failure;
337 	return nla_nest_end(skb, opts);
338 
339 nla_put_failure:
340 	nla_nest_cancel(skb, opts);
341 	return -EMSGSIZE;
342 }
343 
344 static int red_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
345 {
346 	struct red_sched_data *q = qdisc_priv(sch);
347 	struct net_device *dev = qdisc_dev(sch);
348 	struct tc_red_xstats st = {0};
349 
350 	if (sch->flags & TCQ_F_OFFLOADED) {
351 		struct tc_red_qopt_offload hw_stats_request = {
352 			.command = TC_RED_XSTATS,
353 			.handle = sch->handle,
354 			.parent = sch->parent,
355 			{
356 				.xstats = &q->stats,
357 			},
358 		};
359 		dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_RED,
360 					      &hw_stats_request);
361 	}
362 	st.early = q->stats.prob_drop + q->stats.forced_drop;
363 	st.pdrop = q->stats.pdrop;
364 	st.other = q->stats.other;
365 	st.marked = q->stats.prob_mark + q->stats.forced_mark;
366 
367 	return gnet_stats_copy_app(d, &st, sizeof(st));
368 }
369 
370 static int red_dump_class(struct Qdisc *sch, unsigned long cl,
371 			  struct sk_buff *skb, struct tcmsg *tcm)
372 {
373 	struct red_sched_data *q = qdisc_priv(sch);
374 
375 	tcm->tcm_handle |= TC_H_MIN(1);
376 	tcm->tcm_info = q->qdisc->handle;
377 	return 0;
378 }
379 
380 static int red_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
381 		     struct Qdisc **old, struct netlink_ext_ack *extack)
382 {
383 	struct red_sched_data *q = qdisc_priv(sch);
384 
385 	if (new == NULL)
386 		new = &noop_qdisc;
387 
388 	*old = qdisc_replace(sch, new, &q->qdisc);
389 	return 0;
390 }
391 
392 static struct Qdisc *red_leaf(struct Qdisc *sch, unsigned long arg)
393 {
394 	struct red_sched_data *q = qdisc_priv(sch);
395 	return q->qdisc;
396 }
397 
398 static unsigned long red_find(struct Qdisc *sch, u32 classid)
399 {
400 	return 1;
401 }
402 
403 static void red_walk(struct Qdisc *sch, struct qdisc_walker *walker)
404 {
405 	if (!walker->stop) {
406 		if (walker->count >= walker->skip)
407 			if (walker->fn(sch, 1, walker) < 0) {
408 				walker->stop = 1;
409 				return;
410 			}
411 		walker->count++;
412 	}
413 }
414 
415 static const struct Qdisc_class_ops red_class_ops = {
416 	.graft		=	red_graft,
417 	.leaf		=	red_leaf,
418 	.find		=	red_find,
419 	.walk		=	red_walk,
420 	.dump		=	red_dump_class,
421 };
422 
423 static struct Qdisc_ops red_qdisc_ops __read_mostly = {
424 	.id		=	"red",
425 	.priv_size	=	sizeof(struct red_sched_data),
426 	.cl_ops		=	&red_class_ops,
427 	.enqueue	=	red_enqueue,
428 	.dequeue	=	red_dequeue,
429 	.peek		=	red_peek,
430 	.init		=	red_init,
431 	.reset		=	red_reset,
432 	.destroy	=	red_destroy,
433 	.change		=	red_change,
434 	.dump		=	red_dump,
435 	.dump_stats	=	red_dump_stats,
436 	.owner		=	THIS_MODULE,
437 };
438 
439 static int __init red_module_init(void)
440 {
441 	return register_qdisc(&red_qdisc_ops);
442 }
443 
444 static void __exit red_module_exit(void)
445 {
446 	unregister_qdisc(&red_qdisc_ops);
447 }
448 
449 module_init(red_module_init)
450 module_exit(red_module_exit)
451 
452 MODULE_LICENSE("GPL");
453