xref: /linux/net/sched/sch_red.c (revision e728258debd553c95d2e70f9cd97c9fde27c7130)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * net/sched/sch_red.c	Random Early Detection queue.
4  *
5  * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
6  *
7  * Changes:
8  * J Hadi Salim 980914:	computation fixes
9  * Alexey Makarenko <makar@phoenix.kharkov.ua> 990814: qave on idle link was calculated incorrectly.
10  * J Hadi Salim 980816:  ECN support
11  */
12 
13 #include <linux/module.h>
14 #include <linux/types.h>
15 #include <linux/kernel.h>
16 #include <linux/skbuff.h>
17 #include <net/pkt_sched.h>
18 #include <net/pkt_cls.h>
19 #include <net/inet_ecn.h>
20 #include <net/red.h>
21 
22 
23 /*	Parameters, settable by user:
24 	-----------------------------
25 
26 	limit		- bytes (must be > qth_max + burst)
27 
28 	Hard limit on queue length, should be chosen >qth_max
29 	to allow packet bursts. This parameter does not
30 	affect the algorithms behaviour and can be chosen
31 	arbitrarily high (well, less than ram size)
32 	Really, this limit will never be reached
33 	if RED works correctly.
34  */
35 
36 struct red_sched_data {
37 	u32			limit;		/* HARD maximal queue length */
38 
39 	unsigned char		flags;
40 	/* Non-flags in tc_red_qopt.flags. */
41 	unsigned char		userbits;
42 
43 	struct timer_list	adapt_timer;
44 	struct Qdisc		*sch;
45 	struct red_parms	parms;
46 	struct red_vars		vars;
47 	struct red_stats	stats;
48 	struct Qdisc		*qdisc;
49 	struct tcf_qevent	qe_early_drop;
50 	struct tcf_qevent	qe_mark;
51 };
52 
53 #define TC_RED_SUPPORTED_FLAGS (TC_RED_HISTORIC_FLAGS | TC_RED_NODROP)
54 
55 static inline int red_use_ecn(struct red_sched_data *q)
56 {
57 	return q->flags & TC_RED_ECN;
58 }
59 
60 static inline int red_use_harddrop(struct red_sched_data *q)
61 {
62 	return q->flags & TC_RED_HARDDROP;
63 }
64 
65 static int red_use_nodrop(struct red_sched_data *q)
66 {
67 	return q->flags & TC_RED_NODROP;
68 }
69 
70 static int red_enqueue(struct sk_buff *skb, struct Qdisc *sch,
71 		       struct sk_buff **to_free)
72 {
73 	enum qdisc_drop_reason reason = QDISC_DROP_CONGESTED;
74 	struct red_sched_data *q = qdisc_priv(sch);
75 	struct Qdisc *child = q->qdisc;
76 	unsigned int len;
77 	int ret;
78 
79 	q->vars.qavg = red_calc_qavg(&q->parms,
80 				     &q->vars,
81 				     child->qstats.backlog);
82 
83 	if (red_is_idling(&q->vars))
84 		red_end_of_idle_period(&q->vars);
85 
86 	switch (red_action(&q->parms, &q->vars, q->vars.qavg)) {
87 	case RED_DONT_MARK:
88 		break;
89 
90 	case RED_PROB_MARK:
91 		qdisc_qstats_overlimit(sch);
92 		if (!red_use_ecn(q)) {
93 			WRITE_ONCE(q->stats.prob_drop,
94 				   q->stats.prob_drop + 1);
95 			goto congestion_drop;
96 		}
97 
98 		if (INET_ECN_set_ce(skb)) {
99 			WRITE_ONCE(q->stats.prob_mark,
100 				   q->stats.prob_mark + 1);
101 			skb = tcf_qevent_handle(&q->qe_mark, sch, skb, to_free, &ret);
102 			if (!skb)
103 				return NET_XMIT_CN | ret;
104 		} else if (!red_use_nodrop(q)) {
105 			WRITE_ONCE(q->stats.prob_drop,
106 				   q->stats.prob_drop + 1);
107 			goto congestion_drop;
108 		}
109 
110 		/* Non-ECT packet in ECN nodrop mode: queue it. */
111 		break;
112 
113 	case RED_HARD_MARK:
114 		reason = QDISC_DROP_OVERLIMIT;
115 		qdisc_qstats_overlimit(sch);
116 		if (red_use_harddrop(q) || !red_use_ecn(q)) {
117 			WRITE_ONCE(q->stats.forced_drop,
118 				   q->stats.forced_drop + 1);
119 			goto congestion_drop;
120 		}
121 
122 		if (INET_ECN_set_ce(skb)) {
123 			WRITE_ONCE(q->stats.forced_mark,
124 				   q->stats.forced_mark + 1);
125 			skb = tcf_qevent_handle(&q->qe_mark, sch, skb, to_free, &ret);
126 			if (!skb)
127 				return NET_XMIT_CN | ret;
128 		} else if (!red_use_nodrop(q)) {
129 			WRITE_ONCE(q->stats.forced_drop,
130 				   q->stats.forced_drop + 1);
131 			goto congestion_drop;
132 		}
133 
134 		/* Non-ECT packet in ECN nodrop mode: queue it. */
135 		break;
136 	}
137 
138 	len = qdisc_pkt_len(skb);
139 	ret = qdisc_enqueue(skb, child, to_free);
140 	if (likely(ret == NET_XMIT_SUCCESS)) {
141 		sch->qstats.backlog += len;
142 		sch->q.qlen++;
143 	} else if (net_xmit_drop_count(ret)) {
144 		WRITE_ONCE(q->stats.pdrop,
145 			   q->stats.pdrop + 1);
146 		qdisc_qstats_drop(sch);
147 	}
148 	return ret;
149 
150 congestion_drop:
151 	skb = tcf_qevent_handle(&q->qe_early_drop, sch, skb, to_free, &ret);
152 	if (!skb)
153 		return NET_XMIT_CN | ret;
154 
155 	qdisc_drop_reason(skb, sch, to_free, reason);
156 	return NET_XMIT_CN;
157 }
158 
159 static struct sk_buff *red_dequeue(struct Qdisc *sch)
160 {
161 	struct sk_buff *skb;
162 	struct red_sched_data *q = qdisc_priv(sch);
163 	struct Qdisc *child = q->qdisc;
164 
165 	skb = child->dequeue(child);
166 	if (skb) {
167 		qdisc_bstats_update(sch, skb);
168 		qdisc_qstats_backlog_dec(sch, skb);
169 		sch->q.qlen--;
170 	} else {
171 		if (!red_is_idling(&q->vars))
172 			red_start_of_idle_period(&q->vars);
173 	}
174 	return skb;
175 }
176 
177 static struct sk_buff *red_peek(struct Qdisc *sch)
178 {
179 	struct red_sched_data *q = qdisc_priv(sch);
180 	struct Qdisc *child = q->qdisc;
181 
182 	return child->ops->peek(child);
183 }
184 
185 static void red_reset(struct Qdisc *sch)
186 {
187 	struct red_sched_data *q = qdisc_priv(sch);
188 
189 	qdisc_reset(q->qdisc);
190 	red_restart(&q->vars);
191 }
192 
193 static int red_offload(struct Qdisc *sch, bool enable)
194 {
195 	struct red_sched_data *q = qdisc_priv(sch);
196 	struct net_device *dev = qdisc_dev(sch);
197 	struct tc_red_qopt_offload opt = {
198 		.handle = sch->handle,
199 		.parent = sch->parent,
200 	};
201 
202 	if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
203 		return -EOPNOTSUPP;
204 
205 	if (enable) {
206 		opt.command = TC_RED_REPLACE;
207 		opt.set.min = q->parms.qth_min >> q->parms.Wlog;
208 		opt.set.max = q->parms.qth_max >> q->parms.Wlog;
209 		opt.set.probability = q->parms.max_P;
210 		opt.set.limit = q->limit;
211 		opt.set.is_ecn = red_use_ecn(q);
212 		opt.set.is_harddrop = red_use_harddrop(q);
213 		opt.set.is_nodrop = red_use_nodrop(q);
214 		opt.set.qstats = &sch->qstats;
215 	} else {
216 		opt.command = TC_RED_DESTROY;
217 	}
218 
219 	return dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_RED, &opt);
220 }
221 
222 static void red_destroy(struct Qdisc *sch)
223 {
224 	struct red_sched_data *q = qdisc_priv(sch);
225 
226 	tcf_qevent_destroy(&q->qe_mark, sch);
227 	tcf_qevent_destroy(&q->qe_early_drop, sch);
228 	timer_delete_sync(&q->adapt_timer);
229 	red_offload(sch, false);
230 	qdisc_put(q->qdisc);
231 }
232 
233 static const struct nla_policy red_policy[TCA_RED_MAX + 1] = {
234 	[TCA_RED_UNSPEC] = { .strict_start_type = TCA_RED_FLAGS },
235 	[TCA_RED_PARMS]	= { .len = sizeof(struct tc_red_qopt) },
236 	[TCA_RED_STAB]	= { .len = RED_STAB_SIZE },
237 	[TCA_RED_MAX_P] = { .type = NLA_U32 },
238 	[TCA_RED_FLAGS] = NLA_POLICY_BITFIELD32(TC_RED_SUPPORTED_FLAGS),
239 	[TCA_RED_EARLY_DROP_BLOCK] = { .type = NLA_U32 },
240 	[TCA_RED_MARK_BLOCK] = { .type = NLA_U32 },
241 };
242 
243 static int __red_change(struct Qdisc *sch, struct nlattr **tb,
244 			struct netlink_ext_ack *extack)
245 {
246 	struct Qdisc *old_child = NULL, *child = NULL;
247 	struct red_sched_data *q = qdisc_priv(sch);
248 	struct nla_bitfield32 flags_bf;
249 	struct tc_red_qopt *ctl;
250 	unsigned char userbits;
251 	unsigned char flags;
252 	int err;
253 	u32 max_P;
254 	u8 *stab;
255 
256 	if (tb[TCA_RED_PARMS] == NULL ||
257 	    tb[TCA_RED_STAB] == NULL)
258 		return -EINVAL;
259 
260 	max_P = nla_get_u32_default(tb[TCA_RED_MAX_P], 0);
261 
262 	ctl = nla_data(tb[TCA_RED_PARMS]);
263 	stab = nla_data(tb[TCA_RED_STAB]);
264 	if (!red_check_params(ctl->qth_min, ctl->qth_max, ctl->Wlog,
265 			      ctl->Scell_log, stab))
266 		return -EINVAL;
267 
268 	err = red_get_flags(ctl->flags, TC_RED_HISTORIC_FLAGS,
269 			    tb[TCA_RED_FLAGS], TC_RED_SUPPORTED_FLAGS,
270 			    &flags_bf, &userbits, extack);
271 	if (err)
272 		return err;
273 
274 	if (ctl->limit > 0) {
275 		child = fifo_create_dflt(sch, &bfifo_qdisc_ops, ctl->limit,
276 					 extack);
277 		if (IS_ERR(child))
278 			return PTR_ERR(child);
279 
280 		/* child is fifo, no need to check for noop_qdisc */
281 		qdisc_hash_add(child, true);
282 	}
283 
284 	sch_tree_lock(sch);
285 
286 	flags = (q->flags & ~flags_bf.selector) | flags_bf.value;
287 	err = red_validate_flags(flags, extack);
288 	if (err)
289 		goto unlock_out;
290 
291 	q->flags = flags;
292 	q->userbits = userbits;
293 	q->limit = ctl->limit;
294 	if (child) {
295 		qdisc_purge_queue(q->qdisc);
296 		old_child = q->qdisc;
297 		q->qdisc = child;
298 	}
299 
300 	red_set_parms(&q->parms,
301 		      ctl->qth_min, ctl->qth_max, ctl->Wlog,
302 		      ctl->Plog, ctl->Scell_log,
303 		      stab,
304 		      max_P);
305 	red_set_vars(&q->vars);
306 
307 	timer_delete(&q->adapt_timer);
308 	if (ctl->flags & TC_RED_ADAPTATIVE)
309 		mod_timer(&q->adapt_timer, jiffies + HZ/2);
310 
311 	if (!q->qdisc->q.qlen)
312 		red_start_of_idle_period(&q->vars);
313 
314 	sch_tree_unlock(sch);
315 
316 	red_offload(sch, true);
317 
318 	if (old_child)
319 		qdisc_put(old_child);
320 	return 0;
321 
322 unlock_out:
323 	sch_tree_unlock(sch);
324 	if (child)
325 		qdisc_put(child);
326 	return err;
327 }
328 
329 static inline void red_adaptative_timer(struct timer_list *t)
330 {
331 	struct red_sched_data *q = timer_container_of(q, t, adapt_timer);
332 	struct Qdisc *sch = q->sch;
333 	spinlock_t *root_lock;
334 
335 	rcu_read_lock();
336 	root_lock = qdisc_lock(qdisc_root_sleeping(sch));
337 	spin_lock(root_lock);
338 	red_adaptative_algo(&q->parms, &q->vars);
339 	mod_timer(&q->adapt_timer, jiffies + HZ/2);
340 	spin_unlock(root_lock);
341 	rcu_read_unlock();
342 }
343 
344 static int red_init(struct Qdisc *sch, struct nlattr *opt,
345 		    struct netlink_ext_ack *extack)
346 {
347 	struct red_sched_data *q = qdisc_priv(sch);
348 	struct nlattr *tb[TCA_RED_MAX + 1];
349 	int err;
350 
351 	q->qdisc = &noop_qdisc;
352 	q->sch = sch;
353 	timer_setup(&q->adapt_timer, red_adaptative_timer, 0);
354 
355 	if (!opt)
356 		return -EINVAL;
357 
358 	err = nla_parse_nested_deprecated(tb, TCA_RED_MAX, opt, red_policy,
359 					  extack);
360 	if (err < 0)
361 		return err;
362 
363 	err = __red_change(sch, tb, extack);
364 	if (err)
365 		return err;
366 
367 	err = tcf_qevent_init(&q->qe_early_drop, sch,
368 			      FLOW_BLOCK_BINDER_TYPE_RED_EARLY_DROP,
369 			      tb[TCA_RED_EARLY_DROP_BLOCK], extack);
370 	if (err)
371 		return err;
372 
373 	return tcf_qevent_init(&q->qe_mark, sch,
374 			       FLOW_BLOCK_BINDER_TYPE_RED_MARK,
375 			       tb[TCA_RED_MARK_BLOCK], extack);
376 }
377 
378 static int red_change(struct Qdisc *sch, struct nlattr *opt,
379 		      struct netlink_ext_ack *extack)
380 {
381 	struct red_sched_data *q = qdisc_priv(sch);
382 	struct nlattr *tb[TCA_RED_MAX + 1];
383 	int err;
384 
385 	err = nla_parse_nested_deprecated(tb, TCA_RED_MAX, opt, red_policy,
386 					  extack);
387 	if (err < 0)
388 		return err;
389 
390 	err = tcf_qevent_validate_change(&q->qe_early_drop,
391 					 tb[TCA_RED_EARLY_DROP_BLOCK], extack);
392 	if (err)
393 		return err;
394 
395 	err = tcf_qevent_validate_change(&q->qe_mark,
396 					 tb[TCA_RED_MARK_BLOCK], extack);
397 	if (err)
398 		return err;
399 
400 	return __red_change(sch, tb, extack);
401 }
402 
403 static int red_dump_offload_stats(struct Qdisc *sch)
404 {
405 	struct tc_red_qopt_offload hw_stats = {
406 		.command = TC_RED_STATS,
407 		.handle = sch->handle,
408 		.parent = sch->parent,
409 		{
410 			.stats.bstats = &sch->bstats,
411 			.stats.qstats = &sch->qstats,
412 		},
413 	};
414 
415 	return qdisc_offload_dump_helper(sch, TC_SETUP_QDISC_RED, &hw_stats);
416 }
417 
418 static int red_dump(struct Qdisc *sch, struct sk_buff *skb)
419 {
420 	struct red_sched_data *q = qdisc_priv(sch);
421 	struct nlattr *opts = NULL;
422 	struct tc_red_qopt opt = {
423 		.limit		= q->limit,
424 		.flags		= (q->flags & TC_RED_HISTORIC_FLAGS) |
425 				  q->userbits,
426 		.qth_min	= q->parms.qth_min >> q->parms.Wlog,
427 		.qth_max	= q->parms.qth_max >> q->parms.Wlog,
428 		.Wlog		= q->parms.Wlog,
429 		.Plog		= q->parms.Plog,
430 		.Scell_log	= q->parms.Scell_log,
431 	};
432 	int err;
433 
434 	err = red_dump_offload_stats(sch);
435 	if (err)
436 		goto nla_put_failure;
437 
438 	opts = nla_nest_start_noflag(skb, TCA_OPTIONS);
439 	if (opts == NULL)
440 		goto nla_put_failure;
441 	if (nla_put(skb, TCA_RED_PARMS, sizeof(opt), &opt) ||
442 	    nla_put_u32(skb, TCA_RED_MAX_P, q->parms.max_P) ||
443 	    nla_put_bitfield32(skb, TCA_RED_FLAGS,
444 			       q->flags, TC_RED_SUPPORTED_FLAGS) ||
445 	    tcf_qevent_dump(skb, TCA_RED_MARK_BLOCK, &q->qe_mark) ||
446 	    tcf_qevent_dump(skb, TCA_RED_EARLY_DROP_BLOCK, &q->qe_early_drop))
447 		goto nla_put_failure;
448 	return nla_nest_end(skb, opts);
449 
450 nla_put_failure:
451 	nla_nest_cancel(skb, opts);
452 	return -EMSGSIZE;
453 }
454 
455 static int red_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
456 {
457 	struct red_sched_data *q = qdisc_priv(sch);
458 	struct net_device *dev = qdisc_dev(sch);
459 	struct tc_red_xstats st = {0};
460 
461 	if (sch->flags & TCQ_F_OFFLOADED) {
462 		struct tc_red_qopt_offload hw_stats_request = {
463 			.command = TC_RED_XSTATS,
464 			.handle = sch->handle,
465 			.parent = sch->parent,
466 			{
467 				.xstats = &q->stats,
468 			},
469 		};
470 		dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_RED,
471 					      &hw_stats_request);
472 	}
473 	st.early = READ_ONCE(q->stats.prob_drop) +
474 		   READ_ONCE(q->stats.forced_drop);
475 
476 	st.pdrop = READ_ONCE(q->stats.pdrop);
477 
478 	st.marked = READ_ONCE(q->stats.prob_mark) +
479 		    READ_ONCE(q->stats.forced_mark);
480 
481 	return gnet_stats_copy_app(d, &st, sizeof(st));
482 }
483 
484 static int red_dump_class(struct Qdisc *sch, unsigned long cl,
485 			  struct sk_buff *skb, struct tcmsg *tcm)
486 {
487 	struct red_sched_data *q = qdisc_priv(sch);
488 
489 	tcm->tcm_handle |= TC_H_MIN(1);
490 	tcm->tcm_info = q->qdisc->handle;
491 	return 0;
492 }
493 
494 static void red_graft_offload(struct Qdisc *sch,
495 			      struct Qdisc *new, struct Qdisc *old,
496 			      struct netlink_ext_ack *extack)
497 {
498 	struct tc_red_qopt_offload graft_offload = {
499 		.handle		= sch->handle,
500 		.parent		= sch->parent,
501 		.child_handle	= new->handle,
502 		.command	= TC_RED_GRAFT,
503 	};
504 
505 	qdisc_offload_graft_helper(qdisc_dev(sch), sch, new, old,
506 				   TC_SETUP_QDISC_RED, &graft_offload, extack);
507 }
508 
509 static int red_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
510 		     struct Qdisc **old, struct netlink_ext_ack *extack)
511 {
512 	struct red_sched_data *q = qdisc_priv(sch);
513 
514 	if (new == NULL)
515 		new = &noop_qdisc;
516 
517 	*old = qdisc_replace(sch, new, &q->qdisc);
518 
519 	red_graft_offload(sch, new, *old, extack);
520 	return 0;
521 }
522 
523 static struct Qdisc *red_leaf(struct Qdisc *sch, unsigned long arg)
524 {
525 	struct red_sched_data *q = qdisc_priv(sch);
526 	return q->qdisc;
527 }
528 
529 static unsigned long red_find(struct Qdisc *sch, u32 classid)
530 {
531 	return 1;
532 }
533 
534 static void red_walk(struct Qdisc *sch, struct qdisc_walker *walker)
535 {
536 	if (!walker->stop) {
537 		tc_qdisc_stats_dump(sch, 1, walker);
538 	}
539 }
540 
541 static const struct Qdisc_class_ops red_class_ops = {
542 	.graft		=	red_graft,
543 	.leaf		=	red_leaf,
544 	.find		=	red_find,
545 	.walk		=	red_walk,
546 	.dump		=	red_dump_class,
547 };
548 
549 static struct Qdisc_ops red_qdisc_ops __read_mostly = {
550 	.id		=	"red",
551 	.priv_size	=	sizeof(struct red_sched_data),
552 	.cl_ops		=	&red_class_ops,
553 	.enqueue	=	red_enqueue,
554 	.dequeue	=	red_dequeue,
555 	.peek		=	red_peek,
556 	.init		=	red_init,
557 	.reset		=	red_reset,
558 	.destroy	=	red_destroy,
559 	.change		=	red_change,
560 	.dump		=	red_dump,
561 	.dump_stats	=	red_dump_stats,
562 	.owner		=	THIS_MODULE,
563 };
564 MODULE_ALIAS_NET_SCH("red");
565 
566 static int __init red_module_init(void)
567 {
568 	return register_qdisc(&red_qdisc_ops);
569 }
570 
571 static void __exit red_module_exit(void)
572 {
573 	unregister_qdisc(&red_qdisc_ops);
574 }
575 
576 module_init(red_module_init)
577 module_exit(red_module_exit)
578 
579 MODULE_LICENSE("GPL");
580 MODULE_DESCRIPTION("Random Early Detection qdisc");
581