xref: /linux/net/sched/sch_red.c (revision d6869352cb3c3cf3450637a52349e2e87c1354aa)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * net/sched/sch_red.c	Random Early Detection queue.
4  *
5  * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
6  *
7  * Changes:
8  * J Hadi Salim 980914:	computation fixes
9  * Alexey Makarenko <makar@phoenix.kharkov.ua> 990814: qave on idle link was calculated incorrectly.
10  * J Hadi Salim 980816:  ECN support
11  */
12 
13 #include <linux/module.h>
14 #include <linux/types.h>
15 #include <linux/kernel.h>
16 #include <linux/skbuff.h>
17 #include <net/pkt_sched.h>
18 #include <net/pkt_cls.h>
19 #include <net/inet_ecn.h>
20 #include <net/red.h>
21 
22 
23 /*	Parameters, settable by user:
24 	-----------------------------
25 
26 	limit		- bytes (must be > qth_max + burst)
27 
28 	Hard limit on queue length, should be chosen >qth_max
29 	to allow packet bursts. This parameter does not
30 	affect the algorithms behaviour and can be chosen
31 	arbitrarily high (well, less than ram size)
32 	Really, this limit will never be reached
33 	if RED works correctly.
34  */
35 
36 struct red_sched_data {
37 	u32			limit;		/* HARD maximal queue length */
38 	unsigned char		flags;
39 	struct timer_list	adapt_timer;
40 	struct Qdisc		*sch;
41 	struct red_parms	parms;
42 	struct red_vars		vars;
43 	struct red_stats	stats;
44 	struct Qdisc		*qdisc;
45 };
46 
47 static inline int red_use_ecn(struct red_sched_data *q)
48 {
49 	return q->flags & TC_RED_ECN;
50 }
51 
52 static inline int red_use_harddrop(struct red_sched_data *q)
53 {
54 	return q->flags & TC_RED_HARDDROP;
55 }
56 
57 static int red_enqueue(struct sk_buff *skb, struct Qdisc *sch,
58 		       struct sk_buff **to_free)
59 {
60 	struct red_sched_data *q = qdisc_priv(sch);
61 	struct Qdisc *child = q->qdisc;
62 	int ret;
63 
64 	q->vars.qavg = red_calc_qavg(&q->parms,
65 				     &q->vars,
66 				     child->qstats.backlog);
67 
68 	if (red_is_idling(&q->vars))
69 		red_end_of_idle_period(&q->vars);
70 
71 	switch (red_action(&q->parms, &q->vars, q->vars.qavg)) {
72 	case RED_DONT_MARK:
73 		break;
74 
75 	case RED_PROB_MARK:
76 		qdisc_qstats_overlimit(sch);
77 		if (!red_use_ecn(q) || !INET_ECN_set_ce(skb)) {
78 			q->stats.prob_drop++;
79 			goto congestion_drop;
80 		}
81 
82 		q->stats.prob_mark++;
83 		break;
84 
85 	case RED_HARD_MARK:
86 		qdisc_qstats_overlimit(sch);
87 		if (red_use_harddrop(q) || !red_use_ecn(q) ||
88 		    !INET_ECN_set_ce(skb)) {
89 			q->stats.forced_drop++;
90 			goto congestion_drop;
91 		}
92 
93 		q->stats.forced_mark++;
94 		break;
95 	}
96 
97 	ret = qdisc_enqueue(skb, child, to_free);
98 	if (likely(ret == NET_XMIT_SUCCESS)) {
99 		qdisc_qstats_backlog_inc(sch, skb);
100 		sch->q.qlen++;
101 	} else if (net_xmit_drop_count(ret)) {
102 		q->stats.pdrop++;
103 		qdisc_qstats_drop(sch);
104 	}
105 	return ret;
106 
107 congestion_drop:
108 	qdisc_drop(skb, sch, to_free);
109 	return NET_XMIT_CN;
110 }
111 
112 static struct sk_buff *red_dequeue(struct Qdisc *sch)
113 {
114 	struct sk_buff *skb;
115 	struct red_sched_data *q = qdisc_priv(sch);
116 	struct Qdisc *child = q->qdisc;
117 
118 	skb = child->dequeue(child);
119 	if (skb) {
120 		qdisc_bstats_update(sch, skb);
121 		qdisc_qstats_backlog_dec(sch, skb);
122 		sch->q.qlen--;
123 	} else {
124 		if (!red_is_idling(&q->vars))
125 			red_start_of_idle_period(&q->vars);
126 	}
127 	return skb;
128 }
129 
130 static struct sk_buff *red_peek(struct Qdisc *sch)
131 {
132 	struct red_sched_data *q = qdisc_priv(sch);
133 	struct Qdisc *child = q->qdisc;
134 
135 	return child->ops->peek(child);
136 }
137 
138 static void red_reset(struct Qdisc *sch)
139 {
140 	struct red_sched_data *q = qdisc_priv(sch);
141 
142 	qdisc_reset(q->qdisc);
143 	sch->qstats.backlog = 0;
144 	sch->q.qlen = 0;
145 	red_restart(&q->vars);
146 }
147 
148 static int red_offload(struct Qdisc *sch, bool enable)
149 {
150 	struct red_sched_data *q = qdisc_priv(sch);
151 	struct net_device *dev = qdisc_dev(sch);
152 	struct tc_red_qopt_offload opt = {
153 		.handle = sch->handle,
154 		.parent = sch->parent,
155 	};
156 
157 	if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
158 		return -EOPNOTSUPP;
159 
160 	if (enable) {
161 		opt.command = TC_RED_REPLACE;
162 		opt.set.min = q->parms.qth_min >> q->parms.Wlog;
163 		opt.set.max = q->parms.qth_max >> q->parms.Wlog;
164 		opt.set.probability = q->parms.max_P;
165 		opt.set.limit = q->limit;
166 		opt.set.is_ecn = red_use_ecn(q);
167 		opt.set.is_harddrop = red_use_harddrop(q);
168 		opt.set.qstats = &sch->qstats;
169 	} else {
170 		opt.command = TC_RED_DESTROY;
171 	}
172 
173 	return dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_RED, &opt);
174 }
175 
176 static void red_destroy(struct Qdisc *sch)
177 {
178 	struct red_sched_data *q = qdisc_priv(sch);
179 
180 	del_timer_sync(&q->adapt_timer);
181 	red_offload(sch, false);
182 	qdisc_put(q->qdisc);
183 }
184 
185 static const struct nla_policy red_policy[TCA_RED_MAX + 1] = {
186 	[TCA_RED_PARMS]	= { .len = sizeof(struct tc_red_qopt) },
187 	[TCA_RED_STAB]	= { .len = RED_STAB_SIZE },
188 	[TCA_RED_MAX_P] = { .type = NLA_U32 },
189 };
190 
191 static int red_change(struct Qdisc *sch, struct nlattr *opt,
192 		      struct netlink_ext_ack *extack)
193 {
194 	struct Qdisc *old_child = NULL, *child = NULL;
195 	struct red_sched_data *q = qdisc_priv(sch);
196 	struct nlattr *tb[TCA_RED_MAX + 1];
197 	struct tc_red_qopt *ctl;
198 	int err;
199 	u32 max_P;
200 
201 	if (opt == NULL)
202 		return -EINVAL;
203 
204 	err = nla_parse_nested_deprecated(tb, TCA_RED_MAX, opt, red_policy,
205 					  NULL);
206 	if (err < 0)
207 		return err;
208 
209 	if (tb[TCA_RED_PARMS] == NULL ||
210 	    tb[TCA_RED_STAB] == NULL)
211 		return -EINVAL;
212 
213 	max_P = tb[TCA_RED_MAX_P] ? nla_get_u32(tb[TCA_RED_MAX_P]) : 0;
214 
215 	ctl = nla_data(tb[TCA_RED_PARMS]);
216 	if (!red_check_params(ctl->qth_min, ctl->qth_max, ctl->Wlog))
217 		return -EINVAL;
218 
219 	if (ctl->limit > 0) {
220 		child = fifo_create_dflt(sch, &bfifo_qdisc_ops, ctl->limit,
221 					 extack);
222 		if (IS_ERR(child))
223 			return PTR_ERR(child);
224 
225 		/* child is fifo, no need to check for noop_qdisc */
226 		qdisc_hash_add(child, true);
227 	}
228 
229 	sch_tree_lock(sch);
230 	q->flags = ctl->flags;
231 	q->limit = ctl->limit;
232 	if (child) {
233 		qdisc_tree_flush_backlog(q->qdisc);
234 		old_child = q->qdisc;
235 		q->qdisc = child;
236 	}
237 
238 	red_set_parms(&q->parms,
239 		      ctl->qth_min, ctl->qth_max, ctl->Wlog,
240 		      ctl->Plog, ctl->Scell_log,
241 		      nla_data(tb[TCA_RED_STAB]),
242 		      max_P);
243 	red_set_vars(&q->vars);
244 
245 	del_timer(&q->adapt_timer);
246 	if (ctl->flags & TC_RED_ADAPTATIVE)
247 		mod_timer(&q->adapt_timer, jiffies + HZ/2);
248 
249 	if (!q->qdisc->q.qlen)
250 		red_start_of_idle_period(&q->vars);
251 
252 	sch_tree_unlock(sch);
253 
254 	red_offload(sch, true);
255 
256 	if (old_child)
257 		qdisc_put(old_child);
258 	return 0;
259 }
260 
261 static inline void red_adaptative_timer(struct timer_list *t)
262 {
263 	struct red_sched_data *q = from_timer(q, t, adapt_timer);
264 	struct Qdisc *sch = q->sch;
265 	spinlock_t *root_lock = qdisc_lock(qdisc_root_sleeping(sch));
266 
267 	spin_lock(root_lock);
268 	red_adaptative_algo(&q->parms, &q->vars);
269 	mod_timer(&q->adapt_timer, jiffies + HZ/2);
270 	spin_unlock(root_lock);
271 }
272 
273 static int red_init(struct Qdisc *sch, struct nlattr *opt,
274 		    struct netlink_ext_ack *extack)
275 {
276 	struct red_sched_data *q = qdisc_priv(sch);
277 
278 	q->qdisc = &noop_qdisc;
279 	q->sch = sch;
280 	timer_setup(&q->adapt_timer, red_adaptative_timer, 0);
281 	return red_change(sch, opt, extack);
282 }
283 
284 static int red_dump_offload_stats(struct Qdisc *sch)
285 {
286 	struct tc_red_qopt_offload hw_stats = {
287 		.command = TC_RED_STATS,
288 		.handle = sch->handle,
289 		.parent = sch->parent,
290 		{
291 			.stats.bstats = &sch->bstats,
292 			.stats.qstats = &sch->qstats,
293 		},
294 	};
295 
296 	return qdisc_offload_dump_helper(sch, TC_SETUP_QDISC_RED, &hw_stats);
297 }
298 
299 static int red_dump(struct Qdisc *sch, struct sk_buff *skb)
300 {
301 	struct red_sched_data *q = qdisc_priv(sch);
302 	struct nlattr *opts = NULL;
303 	struct tc_red_qopt opt = {
304 		.limit		= q->limit,
305 		.flags		= q->flags,
306 		.qth_min	= q->parms.qth_min >> q->parms.Wlog,
307 		.qth_max	= q->parms.qth_max >> q->parms.Wlog,
308 		.Wlog		= q->parms.Wlog,
309 		.Plog		= q->parms.Plog,
310 		.Scell_log	= q->parms.Scell_log,
311 	};
312 	int err;
313 
314 	err = red_dump_offload_stats(sch);
315 	if (err)
316 		goto nla_put_failure;
317 
318 	opts = nla_nest_start_noflag(skb, TCA_OPTIONS);
319 	if (opts == NULL)
320 		goto nla_put_failure;
321 	if (nla_put(skb, TCA_RED_PARMS, sizeof(opt), &opt) ||
322 	    nla_put_u32(skb, TCA_RED_MAX_P, q->parms.max_P))
323 		goto nla_put_failure;
324 	return nla_nest_end(skb, opts);
325 
326 nla_put_failure:
327 	nla_nest_cancel(skb, opts);
328 	return -EMSGSIZE;
329 }
330 
331 static int red_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
332 {
333 	struct red_sched_data *q = qdisc_priv(sch);
334 	struct net_device *dev = qdisc_dev(sch);
335 	struct tc_red_xstats st = {0};
336 
337 	if (sch->flags & TCQ_F_OFFLOADED) {
338 		struct tc_red_qopt_offload hw_stats_request = {
339 			.command = TC_RED_XSTATS,
340 			.handle = sch->handle,
341 			.parent = sch->parent,
342 			{
343 				.xstats = &q->stats,
344 			},
345 		};
346 		dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_RED,
347 					      &hw_stats_request);
348 	}
349 	st.early = q->stats.prob_drop + q->stats.forced_drop;
350 	st.pdrop = q->stats.pdrop;
351 	st.other = q->stats.other;
352 	st.marked = q->stats.prob_mark + q->stats.forced_mark;
353 
354 	return gnet_stats_copy_app(d, &st, sizeof(st));
355 }
356 
357 static int red_dump_class(struct Qdisc *sch, unsigned long cl,
358 			  struct sk_buff *skb, struct tcmsg *tcm)
359 {
360 	struct red_sched_data *q = qdisc_priv(sch);
361 
362 	tcm->tcm_handle |= TC_H_MIN(1);
363 	tcm->tcm_info = q->qdisc->handle;
364 	return 0;
365 }
366 
367 static void red_graft_offload(struct Qdisc *sch,
368 			      struct Qdisc *new, struct Qdisc *old,
369 			      struct netlink_ext_ack *extack)
370 {
371 	struct tc_red_qopt_offload graft_offload = {
372 		.handle		= sch->handle,
373 		.parent		= sch->parent,
374 		.child_handle	= new->handle,
375 		.command	= TC_RED_GRAFT,
376 	};
377 
378 	qdisc_offload_graft_helper(qdisc_dev(sch), sch, new, old,
379 				   TC_SETUP_QDISC_RED, &graft_offload, extack);
380 }
381 
382 static int red_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
383 		     struct Qdisc **old, struct netlink_ext_ack *extack)
384 {
385 	struct red_sched_data *q = qdisc_priv(sch);
386 
387 	if (new == NULL)
388 		new = &noop_qdisc;
389 
390 	*old = qdisc_replace(sch, new, &q->qdisc);
391 
392 	red_graft_offload(sch, new, *old, extack);
393 	return 0;
394 }
395 
396 static struct Qdisc *red_leaf(struct Qdisc *sch, unsigned long arg)
397 {
398 	struct red_sched_data *q = qdisc_priv(sch);
399 	return q->qdisc;
400 }
401 
402 static unsigned long red_find(struct Qdisc *sch, u32 classid)
403 {
404 	return 1;
405 }
406 
407 static void red_walk(struct Qdisc *sch, struct qdisc_walker *walker)
408 {
409 	if (!walker->stop) {
410 		if (walker->count >= walker->skip)
411 			if (walker->fn(sch, 1, walker) < 0) {
412 				walker->stop = 1;
413 				return;
414 			}
415 		walker->count++;
416 	}
417 }
418 
419 static const struct Qdisc_class_ops red_class_ops = {
420 	.graft		=	red_graft,
421 	.leaf		=	red_leaf,
422 	.find		=	red_find,
423 	.walk		=	red_walk,
424 	.dump		=	red_dump_class,
425 };
426 
427 static struct Qdisc_ops red_qdisc_ops __read_mostly = {
428 	.id		=	"red",
429 	.priv_size	=	sizeof(struct red_sched_data),
430 	.cl_ops		=	&red_class_ops,
431 	.enqueue	=	red_enqueue,
432 	.dequeue	=	red_dequeue,
433 	.peek		=	red_peek,
434 	.init		=	red_init,
435 	.reset		=	red_reset,
436 	.destroy	=	red_destroy,
437 	.change		=	red_change,
438 	.dump		=	red_dump,
439 	.dump_stats	=	red_dump_stats,
440 	.owner		=	THIS_MODULE,
441 };
442 
443 static int __init red_module_init(void)
444 {
445 	return register_qdisc(&red_qdisc_ops);
446 }
447 
448 static void __exit red_module_exit(void)
449 {
450 	unregister_qdisc(&red_qdisc_ops);
451 }
452 
453 module_init(red_module_init)
454 module_exit(red_module_exit)
455 
456 MODULE_LICENSE("GPL");
457