xref: /linux/net/sched/sch_red.c (revision 6ebe6dbd6886af07b102aca42e44edbee94a22d9)
1 /*
2  * net/sched/sch_red.c	Random Early Detection queue.
3  *
4  *		This program is free software; you can redistribute it and/or
5  *		modify it under the terms of the GNU General Public License
6  *		as published by the Free Software Foundation; either version
7  *		2 of the License, or (at your option) any later version.
8  *
9  * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
10  *
11  * Changes:
12  * J Hadi Salim 980914:	computation fixes
13  * Alexey Makarenko <makar@phoenix.kharkov.ua> 990814: qave on idle link was calculated incorrectly.
14  * J Hadi Salim 980816:  ECN support
15  */
16 
17 #include <linux/module.h>
18 #include <linux/types.h>
19 #include <linux/kernel.h>
20 #include <linux/skbuff.h>
21 #include <net/pkt_sched.h>
22 #include <net/pkt_cls.h>
23 #include <net/inet_ecn.h>
24 #include <net/red.h>
25 
26 
27 /*	Parameters, settable by user:
28 	-----------------------------
29 
30 	limit		- bytes (must be > qth_max + burst)
31 
32 	Hard limit on queue length, should be chosen >qth_max
33 	to allow packet bursts. This parameter does not
34 	affect the algorithms behaviour and can be chosen
35 	arbitrarily high (well, less than ram size)
36 	Really, this limit will never be reached
37 	if RED works correctly.
38  */
39 
40 struct red_sched_data {
41 	u32			limit;		/* HARD maximal queue length */
42 	unsigned char		flags;
43 	struct timer_list	adapt_timer;
44 	struct Qdisc		*sch;
45 	struct red_parms	parms;
46 	struct red_vars		vars;
47 	struct red_stats	stats;
48 	struct Qdisc		*qdisc;
49 };
50 
51 static inline int red_use_ecn(struct red_sched_data *q)
52 {
53 	return q->flags & TC_RED_ECN;
54 }
55 
56 static inline int red_use_harddrop(struct red_sched_data *q)
57 {
58 	return q->flags & TC_RED_HARDDROP;
59 }
60 
61 static int red_enqueue(struct sk_buff *skb, struct Qdisc *sch,
62 		       struct sk_buff **to_free)
63 {
64 	struct red_sched_data *q = qdisc_priv(sch);
65 	struct Qdisc *child = q->qdisc;
66 	int ret;
67 
68 	q->vars.qavg = red_calc_qavg(&q->parms,
69 				     &q->vars,
70 				     child->qstats.backlog);
71 
72 	if (red_is_idling(&q->vars))
73 		red_end_of_idle_period(&q->vars);
74 
75 	switch (red_action(&q->parms, &q->vars, q->vars.qavg)) {
76 	case RED_DONT_MARK:
77 		break;
78 
79 	case RED_PROB_MARK:
80 		qdisc_qstats_overlimit(sch);
81 		if (!red_use_ecn(q) || !INET_ECN_set_ce(skb)) {
82 			q->stats.prob_drop++;
83 			goto congestion_drop;
84 		}
85 
86 		q->stats.prob_mark++;
87 		break;
88 
89 	case RED_HARD_MARK:
90 		qdisc_qstats_overlimit(sch);
91 		if (red_use_harddrop(q) || !red_use_ecn(q) ||
92 		    !INET_ECN_set_ce(skb)) {
93 			q->stats.forced_drop++;
94 			goto congestion_drop;
95 		}
96 
97 		q->stats.forced_mark++;
98 		break;
99 	}
100 
101 	ret = qdisc_enqueue(skb, child, to_free);
102 	if (likely(ret == NET_XMIT_SUCCESS)) {
103 		qdisc_qstats_backlog_inc(sch, skb);
104 		sch->q.qlen++;
105 	} else if (net_xmit_drop_count(ret)) {
106 		q->stats.pdrop++;
107 		qdisc_qstats_drop(sch);
108 	}
109 	return ret;
110 
111 congestion_drop:
112 	qdisc_drop(skb, sch, to_free);
113 	return NET_XMIT_CN;
114 }
115 
116 static struct sk_buff *red_dequeue(struct Qdisc *sch)
117 {
118 	struct sk_buff *skb;
119 	struct red_sched_data *q = qdisc_priv(sch);
120 	struct Qdisc *child = q->qdisc;
121 
122 	skb = child->dequeue(child);
123 	if (skb) {
124 		qdisc_bstats_update(sch, skb);
125 		qdisc_qstats_backlog_dec(sch, skb);
126 		sch->q.qlen--;
127 	} else {
128 		if (!red_is_idling(&q->vars))
129 			red_start_of_idle_period(&q->vars);
130 	}
131 	return skb;
132 }
133 
134 static struct sk_buff *red_peek(struct Qdisc *sch)
135 {
136 	struct red_sched_data *q = qdisc_priv(sch);
137 	struct Qdisc *child = q->qdisc;
138 
139 	return child->ops->peek(child);
140 }
141 
142 static void red_reset(struct Qdisc *sch)
143 {
144 	struct red_sched_data *q = qdisc_priv(sch);
145 
146 	qdisc_reset(q->qdisc);
147 	sch->qstats.backlog = 0;
148 	sch->q.qlen = 0;
149 	red_restart(&q->vars);
150 }
151 
152 static int red_offload(struct Qdisc *sch, bool enable)
153 {
154 	struct red_sched_data *q = qdisc_priv(sch);
155 	struct net_device *dev = qdisc_dev(sch);
156 	struct tc_red_qopt_offload opt = {
157 		.handle = sch->handle,
158 		.parent = sch->parent,
159 	};
160 	int err;
161 
162 	if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
163 		return -EOPNOTSUPP;
164 
165 	if (enable) {
166 		opt.command = TC_RED_REPLACE;
167 		opt.set.min = q->parms.qth_min >> q->parms.Wlog;
168 		opt.set.max = q->parms.qth_max >> q->parms.Wlog;
169 		opt.set.probability = q->parms.max_P;
170 		opt.set.is_ecn = red_use_ecn(q);
171 	} else {
172 		opt.command = TC_RED_DESTROY;
173 	}
174 
175 	err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_RED, &opt);
176 
177 	if (!err && enable)
178 		sch->flags |= TCQ_F_OFFLOADED;
179 	else
180 		sch->flags &= ~TCQ_F_OFFLOADED;
181 
182 	return err;
183 }
184 
185 static void red_destroy(struct Qdisc *sch)
186 {
187 	struct red_sched_data *q = qdisc_priv(sch);
188 
189 	del_timer_sync(&q->adapt_timer);
190 	red_offload(sch, false);
191 	qdisc_destroy(q->qdisc);
192 }
193 
194 static const struct nla_policy red_policy[TCA_RED_MAX + 1] = {
195 	[TCA_RED_PARMS]	= { .len = sizeof(struct tc_red_qopt) },
196 	[TCA_RED_STAB]	= { .len = RED_STAB_SIZE },
197 	[TCA_RED_MAX_P] = { .type = NLA_U32 },
198 };
199 
200 static int red_change(struct Qdisc *sch, struct nlattr *opt,
201 		      struct netlink_ext_ack *extack)
202 {
203 	struct red_sched_data *q = qdisc_priv(sch);
204 	struct nlattr *tb[TCA_RED_MAX + 1];
205 	struct tc_red_qopt *ctl;
206 	struct Qdisc *child = NULL;
207 	int err;
208 	u32 max_P;
209 
210 	if (opt == NULL)
211 		return -EINVAL;
212 
213 	err = nla_parse_nested(tb, TCA_RED_MAX, opt, red_policy, NULL);
214 	if (err < 0)
215 		return err;
216 
217 	if (tb[TCA_RED_PARMS] == NULL ||
218 	    tb[TCA_RED_STAB] == NULL)
219 		return -EINVAL;
220 
221 	max_P = tb[TCA_RED_MAX_P] ? nla_get_u32(tb[TCA_RED_MAX_P]) : 0;
222 
223 	ctl = nla_data(tb[TCA_RED_PARMS]);
224 	if (!red_check_params(ctl->qth_min, ctl->qth_max, ctl->Wlog))
225 		return -EINVAL;
226 
227 	if (ctl->limit > 0) {
228 		child = fifo_create_dflt(sch, &bfifo_qdisc_ops, ctl->limit,
229 					 extack);
230 		if (IS_ERR(child))
231 			return PTR_ERR(child);
232 	}
233 
234 	if (child != &noop_qdisc)
235 		qdisc_hash_add(child, true);
236 	sch_tree_lock(sch);
237 	q->flags = ctl->flags;
238 	q->limit = ctl->limit;
239 	if (child) {
240 		qdisc_tree_reduce_backlog(q->qdisc, q->qdisc->q.qlen,
241 					  q->qdisc->qstats.backlog);
242 		qdisc_destroy(q->qdisc);
243 		q->qdisc = child;
244 	}
245 
246 	red_set_parms(&q->parms,
247 		      ctl->qth_min, ctl->qth_max, ctl->Wlog,
248 		      ctl->Plog, ctl->Scell_log,
249 		      nla_data(tb[TCA_RED_STAB]),
250 		      max_P);
251 	red_set_vars(&q->vars);
252 
253 	del_timer(&q->adapt_timer);
254 	if (ctl->flags & TC_RED_ADAPTATIVE)
255 		mod_timer(&q->adapt_timer, jiffies + HZ/2);
256 
257 	if (!q->qdisc->q.qlen)
258 		red_start_of_idle_period(&q->vars);
259 
260 	sch_tree_unlock(sch);
261 	red_offload(sch, true);
262 	return 0;
263 }
264 
265 static inline void red_adaptative_timer(struct timer_list *t)
266 {
267 	struct red_sched_data *q = from_timer(q, t, adapt_timer);
268 	struct Qdisc *sch = q->sch;
269 	spinlock_t *root_lock = qdisc_lock(qdisc_root_sleeping(sch));
270 
271 	spin_lock(root_lock);
272 	red_adaptative_algo(&q->parms, &q->vars);
273 	mod_timer(&q->adapt_timer, jiffies + HZ/2);
274 	spin_unlock(root_lock);
275 }
276 
277 static int red_init(struct Qdisc *sch, struct nlattr *opt,
278 		    struct netlink_ext_ack *extack)
279 {
280 	struct red_sched_data *q = qdisc_priv(sch);
281 
282 	q->qdisc = &noop_qdisc;
283 	q->sch = sch;
284 	timer_setup(&q->adapt_timer, red_adaptative_timer, 0);
285 	return red_change(sch, opt, extack);
286 }
287 
288 static int red_dump_offload_stats(struct Qdisc *sch, struct tc_red_qopt *opt)
289 {
290 	struct net_device *dev = qdisc_dev(sch);
291 	struct tc_red_qopt_offload hw_stats = {
292 		.command = TC_RED_STATS,
293 		.handle = sch->handle,
294 		.parent = sch->parent,
295 		{
296 			.stats.bstats = &sch->bstats,
297 			.stats.qstats = &sch->qstats,
298 		},
299 	};
300 
301 	if (!(sch->flags & TCQ_F_OFFLOADED))
302 		return 0;
303 
304 	return dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_RED,
305 					     &hw_stats);
306 }
307 
308 static int red_dump(struct Qdisc *sch, struct sk_buff *skb)
309 {
310 	struct red_sched_data *q = qdisc_priv(sch);
311 	struct nlattr *opts = NULL;
312 	struct tc_red_qopt opt = {
313 		.limit		= q->limit,
314 		.flags		= q->flags,
315 		.qth_min	= q->parms.qth_min >> q->parms.Wlog,
316 		.qth_max	= q->parms.qth_max >> q->parms.Wlog,
317 		.Wlog		= q->parms.Wlog,
318 		.Plog		= q->parms.Plog,
319 		.Scell_log	= q->parms.Scell_log,
320 	};
321 	int err;
322 
323 	sch->qstats.backlog = q->qdisc->qstats.backlog;
324 	err = red_dump_offload_stats(sch, &opt);
325 	if (err)
326 		goto nla_put_failure;
327 
328 	opts = nla_nest_start(skb, TCA_OPTIONS);
329 	if (opts == NULL)
330 		goto nla_put_failure;
331 	if (nla_put(skb, TCA_RED_PARMS, sizeof(opt), &opt) ||
332 	    nla_put_u32(skb, TCA_RED_MAX_P, q->parms.max_P))
333 		goto nla_put_failure;
334 	return nla_nest_end(skb, opts);
335 
336 nla_put_failure:
337 	nla_nest_cancel(skb, opts);
338 	return -EMSGSIZE;
339 }
340 
341 static int red_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
342 {
343 	struct red_sched_data *q = qdisc_priv(sch);
344 	struct net_device *dev = qdisc_dev(sch);
345 	struct tc_red_xstats st = {
346 		.early	= q->stats.prob_drop + q->stats.forced_drop,
347 		.pdrop	= q->stats.pdrop,
348 		.other	= q->stats.other,
349 		.marked	= q->stats.prob_mark + q->stats.forced_mark,
350 	};
351 
352 	if (sch->flags & TCQ_F_OFFLOADED) {
353 		struct red_stats hw_stats = {0};
354 		struct tc_red_qopt_offload hw_stats_request = {
355 			.command = TC_RED_XSTATS,
356 			.handle = sch->handle,
357 			.parent = sch->parent,
358 			{
359 				.xstats = &hw_stats,
360 			},
361 		};
362 		if (!dev->netdev_ops->ndo_setup_tc(dev,
363 						   TC_SETUP_QDISC_RED,
364 						   &hw_stats_request)) {
365 			st.early += hw_stats.prob_drop + hw_stats.forced_drop;
366 			st.pdrop += hw_stats.pdrop;
367 			st.other += hw_stats.other;
368 			st.marked += hw_stats.prob_mark + hw_stats.forced_mark;
369 		}
370 	}
371 
372 	return gnet_stats_copy_app(d, &st, sizeof(st));
373 }
374 
375 static int red_dump_class(struct Qdisc *sch, unsigned long cl,
376 			  struct sk_buff *skb, struct tcmsg *tcm)
377 {
378 	struct red_sched_data *q = qdisc_priv(sch);
379 
380 	tcm->tcm_handle |= TC_H_MIN(1);
381 	tcm->tcm_info = q->qdisc->handle;
382 	return 0;
383 }
384 
385 static int red_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
386 		     struct Qdisc **old, struct netlink_ext_ack *extack)
387 {
388 	struct red_sched_data *q = qdisc_priv(sch);
389 
390 	if (new == NULL)
391 		new = &noop_qdisc;
392 
393 	*old = qdisc_replace(sch, new, &q->qdisc);
394 	return 0;
395 }
396 
397 static struct Qdisc *red_leaf(struct Qdisc *sch, unsigned long arg)
398 {
399 	struct red_sched_data *q = qdisc_priv(sch);
400 	return q->qdisc;
401 }
402 
403 static unsigned long red_find(struct Qdisc *sch, u32 classid)
404 {
405 	return 1;
406 }
407 
408 static void red_walk(struct Qdisc *sch, struct qdisc_walker *walker)
409 {
410 	if (!walker->stop) {
411 		if (walker->count >= walker->skip)
412 			if (walker->fn(sch, 1, walker) < 0) {
413 				walker->stop = 1;
414 				return;
415 			}
416 		walker->count++;
417 	}
418 }
419 
420 static const struct Qdisc_class_ops red_class_ops = {
421 	.graft		=	red_graft,
422 	.leaf		=	red_leaf,
423 	.find		=	red_find,
424 	.walk		=	red_walk,
425 	.dump		=	red_dump_class,
426 };
427 
428 static struct Qdisc_ops red_qdisc_ops __read_mostly = {
429 	.id		=	"red",
430 	.priv_size	=	sizeof(struct red_sched_data),
431 	.cl_ops		=	&red_class_ops,
432 	.enqueue	=	red_enqueue,
433 	.dequeue	=	red_dequeue,
434 	.peek		=	red_peek,
435 	.init		=	red_init,
436 	.reset		=	red_reset,
437 	.destroy	=	red_destroy,
438 	.change		=	red_change,
439 	.dump		=	red_dump,
440 	.dump_stats	=	red_dump_stats,
441 	.owner		=	THIS_MODULE,
442 };
443 
444 static int __init red_module_init(void)
445 {
446 	return register_qdisc(&red_qdisc_ops);
447 }
448 
449 static void __exit red_module_exit(void)
450 {
451 	unregister_qdisc(&red_qdisc_ops);
452 }
453 
454 module_init(red_module_init)
455 module_exit(red_module_exit)
456 
457 MODULE_LICENSE("GPL");
458