xref: /linux/net/sched/sch_red.c (revision 65c93628599dff4cd7cfb70130d1f6a2203731ea)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * net/sched/sch_red.c	Random Early Detection queue.
4  *
5  * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
6  *
7  * Changes:
8  * J Hadi Salim 980914:	computation fixes
9  * Alexey Makarenko <makar@phoenix.kharkov.ua> 990814: qave on idle link was calculated incorrectly.
10  * J Hadi Salim 980816:  ECN support
11  */
12 
13 #include <linux/module.h>
14 #include <linux/types.h>
15 #include <linux/kernel.h>
16 #include <linux/skbuff.h>
17 #include <net/pkt_sched.h>
18 #include <net/pkt_cls.h>
19 #include <net/inet_ecn.h>
20 #include <net/red.h>
21 
22 
23 /*	Parameters, settable by user:
24 	-----------------------------
25 
26 	limit		- bytes (must be > qth_max + burst)
27 
28 	Hard limit on queue length, should be chosen >qth_max
29 	to allow packet bursts. This parameter does not
30 	affect the algorithms behaviour and can be chosen
31 	arbitrarily high (well, less than ram size)
32 	Really, this limit will never be reached
33 	if RED works correctly.
34  */
35 
36 struct red_sched_data {
37 	u32			limit;		/* HARD maximal queue length */
38 
39 	unsigned char		flags;
40 	/* Non-flags in tc_red_qopt.flags. */
41 	unsigned char		userbits;
42 
43 	struct timer_list	adapt_timer;
44 	struct Qdisc		*sch;
45 	struct red_parms	parms;
46 	struct red_vars		vars;
47 	struct red_stats	stats;
48 	struct Qdisc		*qdisc;
49 };
50 
51 static const u32 red_supported_flags = TC_RED_HISTORIC_FLAGS | TC_RED_NODROP;
52 
53 static inline int red_use_ecn(struct red_sched_data *q)
54 {
55 	return q->flags & TC_RED_ECN;
56 }
57 
58 static inline int red_use_harddrop(struct red_sched_data *q)
59 {
60 	return q->flags & TC_RED_HARDDROP;
61 }
62 
63 static int red_use_nodrop(struct red_sched_data *q)
64 {
65 	return q->flags & TC_RED_NODROP;
66 }
67 
68 static int red_enqueue(struct sk_buff *skb, struct Qdisc *sch,
69 		       struct sk_buff **to_free)
70 {
71 	struct red_sched_data *q = qdisc_priv(sch);
72 	struct Qdisc *child = q->qdisc;
73 	int ret;
74 
75 	q->vars.qavg = red_calc_qavg(&q->parms,
76 				     &q->vars,
77 				     child->qstats.backlog);
78 
79 	if (red_is_idling(&q->vars))
80 		red_end_of_idle_period(&q->vars);
81 
82 	switch (red_action(&q->parms, &q->vars, q->vars.qavg)) {
83 	case RED_DONT_MARK:
84 		break;
85 
86 	case RED_PROB_MARK:
87 		qdisc_qstats_overlimit(sch);
88 		if (!red_use_ecn(q)) {
89 			q->stats.prob_drop++;
90 			goto congestion_drop;
91 		}
92 
93 		if (INET_ECN_set_ce(skb)) {
94 			q->stats.prob_mark++;
95 		} else if (!red_use_nodrop(q)) {
96 			q->stats.prob_drop++;
97 			goto congestion_drop;
98 		}
99 
100 		/* Non-ECT packet in ECN nodrop mode: queue it. */
101 		break;
102 
103 	case RED_HARD_MARK:
104 		qdisc_qstats_overlimit(sch);
105 		if (red_use_harddrop(q) || !red_use_ecn(q)) {
106 			q->stats.forced_drop++;
107 			goto congestion_drop;
108 		}
109 
110 		if (INET_ECN_set_ce(skb)) {
111 			q->stats.forced_mark++;
112 		} else if (!red_use_nodrop(q)) {
113 			q->stats.forced_drop++;
114 			goto congestion_drop;
115 		}
116 
117 		/* Non-ECT packet in ECN nodrop mode: queue it. */
118 		break;
119 	}
120 
121 	ret = qdisc_enqueue(skb, child, to_free);
122 	if (likely(ret == NET_XMIT_SUCCESS)) {
123 		qdisc_qstats_backlog_inc(sch, skb);
124 		sch->q.qlen++;
125 	} else if (net_xmit_drop_count(ret)) {
126 		q->stats.pdrop++;
127 		qdisc_qstats_drop(sch);
128 	}
129 	return ret;
130 
131 congestion_drop:
132 	qdisc_drop(skb, sch, to_free);
133 	return NET_XMIT_CN;
134 }
135 
136 static struct sk_buff *red_dequeue(struct Qdisc *sch)
137 {
138 	struct sk_buff *skb;
139 	struct red_sched_data *q = qdisc_priv(sch);
140 	struct Qdisc *child = q->qdisc;
141 
142 	skb = child->dequeue(child);
143 	if (skb) {
144 		qdisc_bstats_update(sch, skb);
145 		qdisc_qstats_backlog_dec(sch, skb);
146 		sch->q.qlen--;
147 	} else {
148 		if (!red_is_idling(&q->vars))
149 			red_start_of_idle_period(&q->vars);
150 	}
151 	return skb;
152 }
153 
154 static struct sk_buff *red_peek(struct Qdisc *sch)
155 {
156 	struct red_sched_data *q = qdisc_priv(sch);
157 	struct Qdisc *child = q->qdisc;
158 
159 	return child->ops->peek(child);
160 }
161 
162 static void red_reset(struct Qdisc *sch)
163 {
164 	struct red_sched_data *q = qdisc_priv(sch);
165 
166 	qdisc_reset(q->qdisc);
167 	sch->qstats.backlog = 0;
168 	sch->q.qlen = 0;
169 	red_restart(&q->vars);
170 }
171 
172 static int red_offload(struct Qdisc *sch, bool enable)
173 {
174 	struct red_sched_data *q = qdisc_priv(sch);
175 	struct net_device *dev = qdisc_dev(sch);
176 	struct tc_red_qopt_offload opt = {
177 		.handle = sch->handle,
178 		.parent = sch->parent,
179 	};
180 
181 	if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
182 		return -EOPNOTSUPP;
183 
184 	if (enable) {
185 		opt.command = TC_RED_REPLACE;
186 		opt.set.min = q->parms.qth_min >> q->parms.Wlog;
187 		opt.set.max = q->parms.qth_max >> q->parms.Wlog;
188 		opt.set.probability = q->parms.max_P;
189 		opt.set.limit = q->limit;
190 		opt.set.is_ecn = red_use_ecn(q);
191 		opt.set.is_harddrop = red_use_harddrop(q);
192 		opt.set.is_nodrop = red_use_nodrop(q);
193 		opt.set.qstats = &sch->qstats;
194 	} else {
195 		opt.command = TC_RED_DESTROY;
196 	}
197 
198 	return dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_RED, &opt);
199 }
200 
201 static void red_destroy(struct Qdisc *sch)
202 {
203 	struct red_sched_data *q = qdisc_priv(sch);
204 
205 	del_timer_sync(&q->adapt_timer);
206 	red_offload(sch, false);
207 	qdisc_put(q->qdisc);
208 }
209 
210 static const struct nla_policy red_policy[TCA_RED_MAX + 1] = {
211 	[TCA_RED_UNSPEC] = { .strict_start_type = TCA_RED_FLAGS },
212 	[TCA_RED_PARMS]	= { .len = sizeof(struct tc_red_qopt) },
213 	[TCA_RED_STAB]	= { .len = RED_STAB_SIZE },
214 	[TCA_RED_MAX_P] = { .type = NLA_U32 },
215 	[TCA_RED_FLAGS] = { .type = NLA_BITFIELD32,
216 			    .validation_data = &red_supported_flags },
217 };
218 
219 static int red_change(struct Qdisc *sch, struct nlattr *opt,
220 		      struct netlink_ext_ack *extack)
221 {
222 	struct Qdisc *old_child = NULL, *child = NULL;
223 	struct red_sched_data *q = qdisc_priv(sch);
224 	struct nlattr *tb[TCA_RED_MAX + 1];
225 	struct nla_bitfield32 flags_bf;
226 	struct tc_red_qopt *ctl;
227 	unsigned char userbits;
228 	unsigned char flags;
229 	int err;
230 	u32 max_P;
231 
232 	if (opt == NULL)
233 		return -EINVAL;
234 
235 	err = nla_parse_nested_deprecated(tb, TCA_RED_MAX, opt, red_policy,
236 					  NULL);
237 	if (err < 0)
238 		return err;
239 
240 	if (tb[TCA_RED_PARMS] == NULL ||
241 	    tb[TCA_RED_STAB] == NULL)
242 		return -EINVAL;
243 
244 	max_P = tb[TCA_RED_MAX_P] ? nla_get_u32(tb[TCA_RED_MAX_P]) : 0;
245 
246 	ctl = nla_data(tb[TCA_RED_PARMS]);
247 	if (!red_check_params(ctl->qth_min, ctl->qth_max, ctl->Wlog))
248 		return -EINVAL;
249 
250 	err = red_get_flags(ctl->flags, TC_RED_HISTORIC_FLAGS,
251 			    tb[TCA_RED_FLAGS], red_supported_flags,
252 			    &flags_bf, &userbits, extack);
253 	if (err)
254 		return err;
255 
256 	if (ctl->limit > 0) {
257 		child = fifo_create_dflt(sch, &bfifo_qdisc_ops, ctl->limit,
258 					 extack);
259 		if (IS_ERR(child))
260 			return PTR_ERR(child);
261 
262 		/* child is fifo, no need to check for noop_qdisc */
263 		qdisc_hash_add(child, true);
264 	}
265 
266 	sch_tree_lock(sch);
267 
268 	flags = (q->flags & ~flags_bf.selector) | flags_bf.value;
269 	err = red_validate_flags(flags, extack);
270 	if (err)
271 		goto unlock_out;
272 
273 	q->flags = flags;
274 	q->userbits = userbits;
275 	q->limit = ctl->limit;
276 	if (child) {
277 		qdisc_tree_flush_backlog(q->qdisc);
278 		old_child = q->qdisc;
279 		q->qdisc = child;
280 	}
281 
282 	red_set_parms(&q->parms,
283 		      ctl->qth_min, ctl->qth_max, ctl->Wlog,
284 		      ctl->Plog, ctl->Scell_log,
285 		      nla_data(tb[TCA_RED_STAB]),
286 		      max_P);
287 	red_set_vars(&q->vars);
288 
289 	del_timer(&q->adapt_timer);
290 	if (ctl->flags & TC_RED_ADAPTATIVE)
291 		mod_timer(&q->adapt_timer, jiffies + HZ/2);
292 
293 	if (!q->qdisc->q.qlen)
294 		red_start_of_idle_period(&q->vars);
295 
296 	sch_tree_unlock(sch);
297 
298 	red_offload(sch, true);
299 
300 	if (old_child)
301 		qdisc_put(old_child);
302 	return 0;
303 
304 unlock_out:
305 	sch_tree_unlock(sch);
306 	if (child)
307 		qdisc_put(child);
308 	return err;
309 }
310 
311 static inline void red_adaptative_timer(struct timer_list *t)
312 {
313 	struct red_sched_data *q = from_timer(q, t, adapt_timer);
314 	struct Qdisc *sch = q->sch;
315 	spinlock_t *root_lock = qdisc_lock(qdisc_root_sleeping(sch));
316 
317 	spin_lock(root_lock);
318 	red_adaptative_algo(&q->parms, &q->vars);
319 	mod_timer(&q->adapt_timer, jiffies + HZ/2);
320 	spin_unlock(root_lock);
321 }
322 
323 static int red_init(struct Qdisc *sch, struct nlattr *opt,
324 		    struct netlink_ext_ack *extack)
325 {
326 	struct red_sched_data *q = qdisc_priv(sch);
327 
328 	q->qdisc = &noop_qdisc;
329 	q->sch = sch;
330 	timer_setup(&q->adapt_timer, red_adaptative_timer, 0);
331 	return red_change(sch, opt, extack);
332 }
333 
334 static int red_dump_offload_stats(struct Qdisc *sch)
335 {
336 	struct tc_red_qopt_offload hw_stats = {
337 		.command = TC_RED_STATS,
338 		.handle = sch->handle,
339 		.parent = sch->parent,
340 		{
341 			.stats.bstats = &sch->bstats,
342 			.stats.qstats = &sch->qstats,
343 		},
344 	};
345 
346 	return qdisc_offload_dump_helper(sch, TC_SETUP_QDISC_RED, &hw_stats);
347 }
348 
349 static int red_dump(struct Qdisc *sch, struct sk_buff *skb)
350 {
351 	struct red_sched_data *q = qdisc_priv(sch);
352 	struct nla_bitfield32 flags_bf = {
353 		.selector = red_supported_flags,
354 		.value = q->flags,
355 	};
356 	struct nlattr *opts = NULL;
357 	struct tc_red_qopt opt = {
358 		.limit		= q->limit,
359 		.flags		= (q->flags & TC_RED_HISTORIC_FLAGS) |
360 				  q->userbits,
361 		.qth_min	= q->parms.qth_min >> q->parms.Wlog,
362 		.qth_max	= q->parms.qth_max >> q->parms.Wlog,
363 		.Wlog		= q->parms.Wlog,
364 		.Plog		= q->parms.Plog,
365 		.Scell_log	= q->parms.Scell_log,
366 	};
367 	int err;
368 
369 	err = red_dump_offload_stats(sch);
370 	if (err)
371 		goto nla_put_failure;
372 
373 	opts = nla_nest_start_noflag(skb, TCA_OPTIONS);
374 	if (opts == NULL)
375 		goto nla_put_failure;
376 	if (nla_put(skb, TCA_RED_PARMS, sizeof(opt), &opt) ||
377 	    nla_put_u32(skb, TCA_RED_MAX_P, q->parms.max_P) ||
378 	    nla_put(skb, TCA_RED_FLAGS, sizeof(flags_bf), &flags_bf))
379 		goto nla_put_failure;
380 	return nla_nest_end(skb, opts);
381 
382 nla_put_failure:
383 	nla_nest_cancel(skb, opts);
384 	return -EMSGSIZE;
385 }
386 
387 static int red_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
388 {
389 	struct red_sched_data *q = qdisc_priv(sch);
390 	struct net_device *dev = qdisc_dev(sch);
391 	struct tc_red_xstats st = {0};
392 
393 	if (sch->flags & TCQ_F_OFFLOADED) {
394 		struct tc_red_qopt_offload hw_stats_request = {
395 			.command = TC_RED_XSTATS,
396 			.handle = sch->handle,
397 			.parent = sch->parent,
398 			{
399 				.xstats = &q->stats,
400 			},
401 		};
402 		dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_RED,
403 					      &hw_stats_request);
404 	}
405 	st.early = q->stats.prob_drop + q->stats.forced_drop;
406 	st.pdrop = q->stats.pdrop;
407 	st.other = q->stats.other;
408 	st.marked = q->stats.prob_mark + q->stats.forced_mark;
409 
410 	return gnet_stats_copy_app(d, &st, sizeof(st));
411 }
412 
413 static int red_dump_class(struct Qdisc *sch, unsigned long cl,
414 			  struct sk_buff *skb, struct tcmsg *tcm)
415 {
416 	struct red_sched_data *q = qdisc_priv(sch);
417 
418 	tcm->tcm_handle |= TC_H_MIN(1);
419 	tcm->tcm_info = q->qdisc->handle;
420 	return 0;
421 }
422 
423 static void red_graft_offload(struct Qdisc *sch,
424 			      struct Qdisc *new, struct Qdisc *old,
425 			      struct netlink_ext_ack *extack)
426 {
427 	struct tc_red_qopt_offload graft_offload = {
428 		.handle		= sch->handle,
429 		.parent		= sch->parent,
430 		.child_handle	= new->handle,
431 		.command	= TC_RED_GRAFT,
432 	};
433 
434 	qdisc_offload_graft_helper(qdisc_dev(sch), sch, new, old,
435 				   TC_SETUP_QDISC_RED, &graft_offload, extack);
436 }
437 
438 static int red_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
439 		     struct Qdisc **old, struct netlink_ext_ack *extack)
440 {
441 	struct red_sched_data *q = qdisc_priv(sch);
442 
443 	if (new == NULL)
444 		new = &noop_qdisc;
445 
446 	*old = qdisc_replace(sch, new, &q->qdisc);
447 
448 	red_graft_offload(sch, new, *old, extack);
449 	return 0;
450 }
451 
452 static struct Qdisc *red_leaf(struct Qdisc *sch, unsigned long arg)
453 {
454 	struct red_sched_data *q = qdisc_priv(sch);
455 	return q->qdisc;
456 }
457 
458 static unsigned long red_find(struct Qdisc *sch, u32 classid)
459 {
460 	return 1;
461 }
462 
463 static void red_walk(struct Qdisc *sch, struct qdisc_walker *walker)
464 {
465 	if (!walker->stop) {
466 		if (walker->count >= walker->skip)
467 			if (walker->fn(sch, 1, walker) < 0) {
468 				walker->stop = 1;
469 				return;
470 			}
471 		walker->count++;
472 	}
473 }
474 
475 static const struct Qdisc_class_ops red_class_ops = {
476 	.graft		=	red_graft,
477 	.leaf		=	red_leaf,
478 	.find		=	red_find,
479 	.walk		=	red_walk,
480 	.dump		=	red_dump_class,
481 };
482 
483 static struct Qdisc_ops red_qdisc_ops __read_mostly = {
484 	.id		=	"red",
485 	.priv_size	=	sizeof(struct red_sched_data),
486 	.cl_ops		=	&red_class_ops,
487 	.enqueue	=	red_enqueue,
488 	.dequeue	=	red_dequeue,
489 	.peek		=	red_peek,
490 	.init		=	red_init,
491 	.reset		=	red_reset,
492 	.destroy	=	red_destroy,
493 	.change		=	red_change,
494 	.dump		=	red_dump,
495 	.dump_stats	=	red_dump_stats,
496 	.owner		=	THIS_MODULE,
497 };
498 
499 static int __init red_module_init(void)
500 {
501 	return register_qdisc(&red_qdisc_ops);
502 }
503 
504 static void __exit red_module_exit(void)
505 {
506 	unregister_qdisc(&red_qdisc_ops);
507 }
508 
509 module_init(red_module_init)
510 module_exit(red_module_exit)
511 
512 MODULE_LICENSE("GPL");
513