xref: /linux/net/sched/sch_red.c (revision 9e8ba5f3ec35cba4fd8a8bebda548c4db2651e40)
1 /*
2  * net/sched/sch_red.c	Random Early Detection queue.
3  *
4  *		This program is free software; you can redistribute it and/or
5  *		modify it under the terms of the GNU General Public License
6  *		as published by the Free Software Foundation; either version
7  *		2 of the License, or (at your option) any later version.
8  *
9  * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
10  *
11  * Changes:
12  * J Hadi Salim 980914:	computation fixes
13  * Alexey Makarenko <makar@phoenix.kharkov.ua> 990814: qave on idle link was calculated incorrectly.
14  * J Hadi Salim 980816:  ECN support
15  */
16 
17 #include <linux/module.h>
18 #include <linux/types.h>
19 #include <linux/kernel.h>
20 #include <linux/skbuff.h>
21 #include <net/pkt_sched.h>
22 #include <net/inet_ecn.h>
23 #include <net/red.h>
24 
25 
26 /*	Parameters, settable by user:
27 	-----------------------------
28 
29 	limit		- bytes (must be > qth_max + burst)
30 
31 	Hard limit on queue length, should be chosen >qth_max
32 	to allow packet bursts. This parameter does not
33 	affect the algorithms behaviour and can be chosen
34 	arbitrarily high (well, less than ram size)
35 	Really, this limit will never be reached
36 	if RED works correctly.
37  */
38 
39 struct red_sched_data {
40 	u32			limit;		/* HARD maximal queue length */
41 	unsigned char		flags;
42 	struct timer_list	adapt_timer;
43 	struct red_parms	parms;
44 	struct red_stats	stats;
45 	struct Qdisc		*qdisc;
46 };
47 
48 static inline int red_use_ecn(struct red_sched_data *q)
49 {
50 	return q->flags & TC_RED_ECN;
51 }
52 
53 static inline int red_use_harddrop(struct red_sched_data *q)
54 {
55 	return q->flags & TC_RED_HARDDROP;
56 }
57 
58 static int red_enqueue(struct sk_buff *skb, struct Qdisc *sch)
59 {
60 	struct red_sched_data *q = qdisc_priv(sch);
61 	struct Qdisc *child = q->qdisc;
62 	int ret;
63 
64 	q->parms.qavg = red_calc_qavg(&q->parms, child->qstats.backlog);
65 
66 	if (red_is_idling(&q->parms))
67 		red_end_of_idle_period(&q->parms);
68 
69 	switch (red_action(&q->parms, q->parms.qavg)) {
70 	case RED_DONT_MARK:
71 		break;
72 
73 	case RED_PROB_MARK:
74 		sch->qstats.overlimits++;
75 		if (!red_use_ecn(q) || !INET_ECN_set_ce(skb)) {
76 			q->stats.prob_drop++;
77 			goto congestion_drop;
78 		}
79 
80 		q->stats.prob_mark++;
81 		break;
82 
83 	case RED_HARD_MARK:
84 		sch->qstats.overlimits++;
85 		if (red_use_harddrop(q) || !red_use_ecn(q) ||
86 		    !INET_ECN_set_ce(skb)) {
87 			q->stats.forced_drop++;
88 			goto congestion_drop;
89 		}
90 
91 		q->stats.forced_mark++;
92 		break;
93 	}
94 
95 	ret = qdisc_enqueue(skb, child);
96 	if (likely(ret == NET_XMIT_SUCCESS)) {
97 		sch->q.qlen++;
98 	} else if (net_xmit_drop_count(ret)) {
99 		q->stats.pdrop++;
100 		sch->qstats.drops++;
101 	}
102 	return ret;
103 
104 congestion_drop:
105 	qdisc_drop(skb, sch);
106 	return NET_XMIT_CN;
107 }
108 
109 static struct sk_buff *red_dequeue(struct Qdisc *sch)
110 {
111 	struct sk_buff *skb;
112 	struct red_sched_data *q = qdisc_priv(sch);
113 	struct Qdisc *child = q->qdisc;
114 
115 	skb = child->dequeue(child);
116 	if (skb) {
117 		qdisc_bstats_update(sch, skb);
118 		sch->q.qlen--;
119 	} else {
120 		if (!red_is_idling(&q->parms))
121 			red_start_of_idle_period(&q->parms);
122 	}
123 	return skb;
124 }
125 
126 static struct sk_buff *red_peek(struct Qdisc *sch)
127 {
128 	struct red_sched_data *q = qdisc_priv(sch);
129 	struct Qdisc *child = q->qdisc;
130 
131 	return child->ops->peek(child);
132 }
133 
134 static unsigned int red_drop(struct Qdisc *sch)
135 {
136 	struct red_sched_data *q = qdisc_priv(sch);
137 	struct Qdisc *child = q->qdisc;
138 	unsigned int len;
139 
140 	if (child->ops->drop && (len = child->ops->drop(child)) > 0) {
141 		q->stats.other++;
142 		sch->qstats.drops++;
143 		sch->q.qlen--;
144 		return len;
145 	}
146 
147 	if (!red_is_idling(&q->parms))
148 		red_start_of_idle_period(&q->parms);
149 
150 	return 0;
151 }
152 
153 static void red_reset(struct Qdisc *sch)
154 {
155 	struct red_sched_data *q = qdisc_priv(sch);
156 
157 	qdisc_reset(q->qdisc);
158 	sch->q.qlen = 0;
159 	red_restart(&q->parms);
160 }
161 
162 static void red_destroy(struct Qdisc *sch)
163 {
164 	struct red_sched_data *q = qdisc_priv(sch);
165 
166 	del_timer_sync(&q->adapt_timer);
167 	qdisc_destroy(q->qdisc);
168 }
169 
170 static const struct nla_policy red_policy[TCA_RED_MAX + 1] = {
171 	[TCA_RED_PARMS]	= { .len = sizeof(struct tc_red_qopt) },
172 	[TCA_RED_STAB]	= { .len = RED_STAB_SIZE },
173 	[TCA_RED_MAX_P] = { .type = NLA_U32 },
174 };
175 
176 static int red_change(struct Qdisc *sch, struct nlattr *opt)
177 {
178 	struct red_sched_data *q = qdisc_priv(sch);
179 	struct nlattr *tb[TCA_RED_MAX + 1];
180 	struct tc_red_qopt *ctl;
181 	struct Qdisc *child = NULL;
182 	int err;
183 	u32 max_P;
184 
185 	if (opt == NULL)
186 		return -EINVAL;
187 
188 	err = nla_parse_nested(tb, TCA_RED_MAX, opt, red_policy);
189 	if (err < 0)
190 		return err;
191 
192 	if (tb[TCA_RED_PARMS] == NULL ||
193 	    tb[TCA_RED_STAB] == NULL)
194 		return -EINVAL;
195 
196 	max_P = tb[TCA_RED_MAX_P] ? nla_get_u32(tb[TCA_RED_MAX_P]) : 0;
197 
198 	ctl = nla_data(tb[TCA_RED_PARMS]);
199 
200 	if (ctl->limit > 0) {
201 		child = fifo_create_dflt(sch, &bfifo_qdisc_ops, ctl->limit);
202 		if (IS_ERR(child))
203 			return PTR_ERR(child);
204 	}
205 
206 	sch_tree_lock(sch);
207 	q->flags = ctl->flags;
208 	q->limit = ctl->limit;
209 	if (child) {
210 		qdisc_tree_decrease_qlen(q->qdisc, q->qdisc->q.qlen);
211 		qdisc_destroy(q->qdisc);
212 		q->qdisc = child;
213 	}
214 
215 	red_set_parms(&q->parms, ctl->qth_min, ctl->qth_max, ctl->Wlog,
216 		      ctl->Plog, ctl->Scell_log,
217 		      nla_data(tb[TCA_RED_STAB]),
218 		      max_P);
219 
220 	del_timer(&q->adapt_timer);
221 	if (ctl->flags & TC_RED_ADAPTATIVE)
222 		mod_timer(&q->adapt_timer, jiffies + HZ/2);
223 
224 	if (!q->qdisc->q.qlen)
225 		red_start_of_idle_period(&q->parms);
226 
227 	sch_tree_unlock(sch);
228 	return 0;
229 }
230 
231 static inline void red_adaptative_timer(unsigned long arg)
232 {
233 	struct Qdisc *sch = (struct Qdisc *)arg;
234 	struct red_sched_data *q = qdisc_priv(sch);
235 	spinlock_t *root_lock = qdisc_lock(qdisc_root_sleeping(sch));
236 
237 	spin_lock(root_lock);
238 	red_adaptative_algo(&q->parms);
239 	mod_timer(&q->adapt_timer, jiffies + HZ/2);
240 	spin_unlock(root_lock);
241 }
242 
243 static int red_init(struct Qdisc *sch, struct nlattr *opt)
244 {
245 	struct red_sched_data *q = qdisc_priv(sch);
246 
247 	q->qdisc = &noop_qdisc;
248 	setup_timer(&q->adapt_timer, red_adaptative_timer, (unsigned long)sch);
249 	return red_change(sch, opt);
250 }
251 
252 static int red_dump(struct Qdisc *sch, struct sk_buff *skb)
253 {
254 	struct red_sched_data *q = qdisc_priv(sch);
255 	struct nlattr *opts = NULL;
256 	struct tc_red_qopt opt = {
257 		.limit		= q->limit,
258 		.flags		= q->flags,
259 		.qth_min	= q->parms.qth_min >> q->parms.Wlog,
260 		.qth_max	= q->parms.qth_max >> q->parms.Wlog,
261 		.Wlog		= q->parms.Wlog,
262 		.Plog		= q->parms.Plog,
263 		.Scell_log	= q->parms.Scell_log,
264 	};
265 
266 	sch->qstats.backlog = q->qdisc->qstats.backlog;
267 	opts = nla_nest_start(skb, TCA_OPTIONS);
268 	if (opts == NULL)
269 		goto nla_put_failure;
270 	NLA_PUT(skb, TCA_RED_PARMS, sizeof(opt), &opt);
271 	NLA_PUT_U32(skb, TCA_RED_MAX_P, q->parms.max_P);
272 	return nla_nest_end(skb, opts);
273 
274 nla_put_failure:
275 	nla_nest_cancel(skb, opts);
276 	return -EMSGSIZE;
277 }
278 
279 static int red_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
280 {
281 	struct red_sched_data *q = qdisc_priv(sch);
282 	struct tc_red_xstats st = {
283 		.early	= q->stats.prob_drop + q->stats.forced_drop,
284 		.pdrop	= q->stats.pdrop,
285 		.other	= q->stats.other,
286 		.marked	= q->stats.prob_mark + q->stats.forced_mark,
287 	};
288 
289 	return gnet_stats_copy_app(d, &st, sizeof(st));
290 }
291 
292 static int red_dump_class(struct Qdisc *sch, unsigned long cl,
293 			  struct sk_buff *skb, struct tcmsg *tcm)
294 {
295 	struct red_sched_data *q = qdisc_priv(sch);
296 
297 	tcm->tcm_handle |= TC_H_MIN(1);
298 	tcm->tcm_info = q->qdisc->handle;
299 	return 0;
300 }
301 
302 static int red_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
303 		     struct Qdisc **old)
304 {
305 	struct red_sched_data *q = qdisc_priv(sch);
306 
307 	if (new == NULL)
308 		new = &noop_qdisc;
309 
310 	sch_tree_lock(sch);
311 	*old = q->qdisc;
312 	q->qdisc = new;
313 	qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
314 	qdisc_reset(*old);
315 	sch_tree_unlock(sch);
316 	return 0;
317 }
318 
319 static struct Qdisc *red_leaf(struct Qdisc *sch, unsigned long arg)
320 {
321 	struct red_sched_data *q = qdisc_priv(sch);
322 	return q->qdisc;
323 }
324 
325 static unsigned long red_get(struct Qdisc *sch, u32 classid)
326 {
327 	return 1;
328 }
329 
330 static void red_put(struct Qdisc *sch, unsigned long arg)
331 {
332 }
333 
334 static void red_walk(struct Qdisc *sch, struct qdisc_walker *walker)
335 {
336 	if (!walker->stop) {
337 		if (walker->count >= walker->skip)
338 			if (walker->fn(sch, 1, walker) < 0) {
339 				walker->stop = 1;
340 				return;
341 			}
342 		walker->count++;
343 	}
344 }
345 
346 static const struct Qdisc_class_ops red_class_ops = {
347 	.graft		=	red_graft,
348 	.leaf		=	red_leaf,
349 	.get		=	red_get,
350 	.put		=	red_put,
351 	.walk		=	red_walk,
352 	.dump		=	red_dump_class,
353 };
354 
355 static struct Qdisc_ops red_qdisc_ops __read_mostly = {
356 	.id		=	"red",
357 	.priv_size	=	sizeof(struct red_sched_data),
358 	.cl_ops		=	&red_class_ops,
359 	.enqueue	=	red_enqueue,
360 	.dequeue	=	red_dequeue,
361 	.peek		=	red_peek,
362 	.drop		=	red_drop,
363 	.init		=	red_init,
364 	.reset		=	red_reset,
365 	.destroy	=	red_destroy,
366 	.change		=	red_change,
367 	.dump		=	red_dump,
368 	.dump_stats	=	red_dump_stats,
369 	.owner		=	THIS_MODULE,
370 };
371 
372 static int __init red_module_init(void)
373 {
374 	return register_qdisc(&red_qdisc_ops);
375 }
376 
377 static void __exit red_module_exit(void)
378 {
379 	unregister_qdisc(&red_qdisc_ops);
380 }
381 
382 module_init(red_module_init)
383 module_exit(red_module_exit)
384 
385 MODULE_LICENSE("GPL");
386