xref: /linux/net/sched/sch_red.c (revision 995231c820e3bd3633cb38bf4ea6f2541e1da331)
1 /*
2  * net/sched/sch_red.c	Random Early Detection queue.
3  *
4  *		This program is free software; you can redistribute it and/or
5  *		modify it under the terms of the GNU General Public License
6  *		as published by the Free Software Foundation; either version
7  *		2 of the License, or (at your option) any later version.
8  *
9  * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
10  *
11  * Changes:
12  * J Hadi Salim 980914:	computation fixes
13  * Alexey Makarenko <makar@phoenix.kharkov.ua> 990814: qave on idle link was calculated incorrectly.
14  * J Hadi Salim 980816:  ECN support
15  */
16 
17 #include <linux/module.h>
18 #include <linux/types.h>
19 #include <linux/kernel.h>
20 #include <linux/skbuff.h>
21 #include <net/pkt_sched.h>
22 #include <net/inet_ecn.h>
23 #include <net/red.h>
24 
25 
26 /*	Parameters, settable by user:
27 	-----------------------------
28 
29 	limit		- bytes (must be > qth_max + burst)
30 
31 	Hard limit on queue length, should be chosen >qth_max
32 	to allow packet bursts. This parameter does not
33 	affect the algorithms behaviour and can be chosen
34 	arbitrarily high (well, less than ram size)
35 	Really, this limit will never be reached
36 	if RED works correctly.
37  */
38 
39 struct red_sched_data {
40 	u32			limit;		/* HARD maximal queue length */
41 	unsigned char		flags;
42 	struct timer_list	adapt_timer;
43 	struct Qdisc		*sch;
44 	struct red_parms	parms;
45 	struct red_vars		vars;
46 	struct red_stats	stats;
47 	struct Qdisc		*qdisc;
48 };
49 
50 static inline int red_use_ecn(struct red_sched_data *q)
51 {
52 	return q->flags & TC_RED_ECN;
53 }
54 
55 static inline int red_use_harddrop(struct red_sched_data *q)
56 {
57 	return q->flags & TC_RED_HARDDROP;
58 }
59 
60 static int red_enqueue(struct sk_buff *skb, struct Qdisc *sch,
61 		       struct sk_buff **to_free)
62 {
63 	struct red_sched_data *q = qdisc_priv(sch);
64 	struct Qdisc *child = q->qdisc;
65 	int ret;
66 
67 	q->vars.qavg = red_calc_qavg(&q->parms,
68 				     &q->vars,
69 				     child->qstats.backlog);
70 
71 	if (red_is_idling(&q->vars))
72 		red_end_of_idle_period(&q->vars);
73 
74 	switch (red_action(&q->parms, &q->vars, q->vars.qavg)) {
75 	case RED_DONT_MARK:
76 		break;
77 
78 	case RED_PROB_MARK:
79 		qdisc_qstats_overlimit(sch);
80 		if (!red_use_ecn(q) || !INET_ECN_set_ce(skb)) {
81 			q->stats.prob_drop++;
82 			goto congestion_drop;
83 		}
84 
85 		q->stats.prob_mark++;
86 		break;
87 
88 	case RED_HARD_MARK:
89 		qdisc_qstats_overlimit(sch);
90 		if (red_use_harddrop(q) || !red_use_ecn(q) ||
91 		    !INET_ECN_set_ce(skb)) {
92 			q->stats.forced_drop++;
93 			goto congestion_drop;
94 		}
95 
96 		q->stats.forced_mark++;
97 		break;
98 	}
99 
100 	ret = qdisc_enqueue(skb, child, to_free);
101 	if (likely(ret == NET_XMIT_SUCCESS)) {
102 		qdisc_qstats_backlog_inc(sch, skb);
103 		sch->q.qlen++;
104 	} else if (net_xmit_drop_count(ret)) {
105 		q->stats.pdrop++;
106 		qdisc_qstats_drop(sch);
107 	}
108 	return ret;
109 
110 congestion_drop:
111 	qdisc_drop(skb, sch, to_free);
112 	return NET_XMIT_CN;
113 }
114 
115 static struct sk_buff *red_dequeue(struct Qdisc *sch)
116 {
117 	struct sk_buff *skb;
118 	struct red_sched_data *q = qdisc_priv(sch);
119 	struct Qdisc *child = q->qdisc;
120 
121 	skb = child->dequeue(child);
122 	if (skb) {
123 		qdisc_bstats_update(sch, skb);
124 		qdisc_qstats_backlog_dec(sch, skb);
125 		sch->q.qlen--;
126 	} else {
127 		if (!red_is_idling(&q->vars))
128 			red_start_of_idle_period(&q->vars);
129 	}
130 	return skb;
131 }
132 
133 static struct sk_buff *red_peek(struct Qdisc *sch)
134 {
135 	struct red_sched_data *q = qdisc_priv(sch);
136 	struct Qdisc *child = q->qdisc;
137 
138 	return child->ops->peek(child);
139 }
140 
141 static void red_reset(struct Qdisc *sch)
142 {
143 	struct red_sched_data *q = qdisc_priv(sch);
144 
145 	qdisc_reset(q->qdisc);
146 	sch->qstats.backlog = 0;
147 	sch->q.qlen = 0;
148 	red_restart(&q->vars);
149 }
150 
151 static void red_destroy(struct Qdisc *sch)
152 {
153 	struct red_sched_data *q = qdisc_priv(sch);
154 
155 	del_timer_sync(&q->adapt_timer);
156 	qdisc_destroy(q->qdisc);
157 }
158 
159 static const struct nla_policy red_policy[TCA_RED_MAX + 1] = {
160 	[TCA_RED_PARMS]	= { .len = sizeof(struct tc_red_qopt) },
161 	[TCA_RED_STAB]	= { .len = RED_STAB_SIZE },
162 	[TCA_RED_MAX_P] = { .type = NLA_U32 },
163 };
164 
165 static int red_change(struct Qdisc *sch, struct nlattr *opt)
166 {
167 	struct red_sched_data *q = qdisc_priv(sch);
168 	struct nlattr *tb[TCA_RED_MAX + 1];
169 	struct tc_red_qopt *ctl;
170 	struct Qdisc *child = NULL;
171 	int err;
172 	u32 max_P;
173 
174 	if (opt == NULL)
175 		return -EINVAL;
176 
177 	err = nla_parse_nested(tb, TCA_RED_MAX, opt, red_policy, NULL);
178 	if (err < 0)
179 		return err;
180 
181 	if (tb[TCA_RED_PARMS] == NULL ||
182 	    tb[TCA_RED_STAB] == NULL)
183 		return -EINVAL;
184 
185 	max_P = tb[TCA_RED_MAX_P] ? nla_get_u32(tb[TCA_RED_MAX_P]) : 0;
186 
187 	ctl = nla_data(tb[TCA_RED_PARMS]);
188 
189 	if (ctl->limit > 0) {
190 		child = fifo_create_dflt(sch, &bfifo_qdisc_ops, ctl->limit);
191 		if (IS_ERR(child))
192 			return PTR_ERR(child);
193 	}
194 
195 	if (child != &noop_qdisc)
196 		qdisc_hash_add(child, true);
197 	sch_tree_lock(sch);
198 	q->flags = ctl->flags;
199 	q->limit = ctl->limit;
200 	if (child) {
201 		qdisc_tree_reduce_backlog(q->qdisc, q->qdisc->q.qlen,
202 					  q->qdisc->qstats.backlog);
203 		qdisc_destroy(q->qdisc);
204 		q->qdisc = child;
205 	}
206 
207 	red_set_parms(&q->parms,
208 		      ctl->qth_min, ctl->qth_max, ctl->Wlog,
209 		      ctl->Plog, ctl->Scell_log,
210 		      nla_data(tb[TCA_RED_STAB]),
211 		      max_P);
212 	red_set_vars(&q->vars);
213 
214 	del_timer(&q->adapt_timer);
215 	if (ctl->flags & TC_RED_ADAPTATIVE)
216 		mod_timer(&q->adapt_timer, jiffies + HZ/2);
217 
218 	if (!q->qdisc->q.qlen)
219 		red_start_of_idle_period(&q->vars);
220 
221 	sch_tree_unlock(sch);
222 	return 0;
223 }
224 
225 static inline void red_adaptative_timer(struct timer_list *t)
226 {
227 	struct red_sched_data *q = from_timer(q, t, adapt_timer);
228 	struct Qdisc *sch = q->sch;
229 	spinlock_t *root_lock = qdisc_lock(qdisc_root_sleeping(sch));
230 
231 	spin_lock(root_lock);
232 	red_adaptative_algo(&q->parms, &q->vars);
233 	mod_timer(&q->adapt_timer, jiffies + HZ/2);
234 	spin_unlock(root_lock);
235 }
236 
237 static int red_init(struct Qdisc *sch, struct nlattr *opt)
238 {
239 	struct red_sched_data *q = qdisc_priv(sch);
240 
241 	q->qdisc = &noop_qdisc;
242 	q->sch = sch;
243 	timer_setup(&q->adapt_timer, red_adaptative_timer, 0);
244 	return red_change(sch, opt);
245 }
246 
247 static int red_dump(struct Qdisc *sch, struct sk_buff *skb)
248 {
249 	struct red_sched_data *q = qdisc_priv(sch);
250 	struct nlattr *opts = NULL;
251 	struct tc_red_qopt opt = {
252 		.limit		= q->limit,
253 		.flags		= q->flags,
254 		.qth_min	= q->parms.qth_min >> q->parms.Wlog,
255 		.qth_max	= q->parms.qth_max >> q->parms.Wlog,
256 		.Wlog		= q->parms.Wlog,
257 		.Plog		= q->parms.Plog,
258 		.Scell_log	= q->parms.Scell_log,
259 	};
260 
261 	sch->qstats.backlog = q->qdisc->qstats.backlog;
262 	opts = nla_nest_start(skb, TCA_OPTIONS);
263 	if (opts == NULL)
264 		goto nla_put_failure;
265 	if (nla_put(skb, TCA_RED_PARMS, sizeof(opt), &opt) ||
266 	    nla_put_u32(skb, TCA_RED_MAX_P, q->parms.max_P))
267 		goto nla_put_failure;
268 	return nla_nest_end(skb, opts);
269 
270 nla_put_failure:
271 	nla_nest_cancel(skb, opts);
272 	return -EMSGSIZE;
273 }
274 
275 static int red_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
276 {
277 	struct red_sched_data *q = qdisc_priv(sch);
278 	struct tc_red_xstats st = {
279 		.early	= q->stats.prob_drop + q->stats.forced_drop,
280 		.pdrop	= q->stats.pdrop,
281 		.other	= q->stats.other,
282 		.marked	= q->stats.prob_mark + q->stats.forced_mark,
283 	};
284 
285 	return gnet_stats_copy_app(d, &st, sizeof(st));
286 }
287 
288 static int red_dump_class(struct Qdisc *sch, unsigned long cl,
289 			  struct sk_buff *skb, struct tcmsg *tcm)
290 {
291 	struct red_sched_data *q = qdisc_priv(sch);
292 
293 	tcm->tcm_handle |= TC_H_MIN(1);
294 	tcm->tcm_info = q->qdisc->handle;
295 	return 0;
296 }
297 
298 static int red_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
299 		     struct Qdisc **old)
300 {
301 	struct red_sched_data *q = qdisc_priv(sch);
302 
303 	if (new == NULL)
304 		new = &noop_qdisc;
305 
306 	*old = qdisc_replace(sch, new, &q->qdisc);
307 	return 0;
308 }
309 
310 static struct Qdisc *red_leaf(struct Qdisc *sch, unsigned long arg)
311 {
312 	struct red_sched_data *q = qdisc_priv(sch);
313 	return q->qdisc;
314 }
315 
316 static unsigned long red_find(struct Qdisc *sch, u32 classid)
317 {
318 	return 1;
319 }
320 
321 static void red_walk(struct Qdisc *sch, struct qdisc_walker *walker)
322 {
323 	if (!walker->stop) {
324 		if (walker->count >= walker->skip)
325 			if (walker->fn(sch, 1, walker) < 0) {
326 				walker->stop = 1;
327 				return;
328 			}
329 		walker->count++;
330 	}
331 }
332 
333 static const struct Qdisc_class_ops red_class_ops = {
334 	.graft		=	red_graft,
335 	.leaf		=	red_leaf,
336 	.find		=	red_find,
337 	.walk		=	red_walk,
338 	.dump		=	red_dump_class,
339 };
340 
341 static struct Qdisc_ops red_qdisc_ops __read_mostly = {
342 	.id		=	"red",
343 	.priv_size	=	sizeof(struct red_sched_data),
344 	.cl_ops		=	&red_class_ops,
345 	.enqueue	=	red_enqueue,
346 	.dequeue	=	red_dequeue,
347 	.peek		=	red_peek,
348 	.init		=	red_init,
349 	.reset		=	red_reset,
350 	.destroy	=	red_destroy,
351 	.change		=	red_change,
352 	.dump		=	red_dump,
353 	.dump_stats	=	red_dump_stats,
354 	.owner		=	THIS_MODULE,
355 };
356 
357 static int __init red_module_init(void)
358 {
359 	return register_qdisc(&red_qdisc_ops);
360 }
361 
362 static void __exit red_module_exit(void)
363 {
364 	unregister_qdisc(&red_qdisc_ops);
365 }
366 
367 module_init(red_module_init)
368 module_exit(red_module_exit)
369 
370 MODULE_LICENSE("GPL");
371