1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3 * net/sched/sch_red.c Random Early Detection queue.
4 *
5 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
6 *
7 * Changes:
8 * J Hadi Salim 980914: computation fixes
9 * Alexey Makarenko <makar@phoenix.kharkov.ua> 990814: qave on idle link was calculated incorrectly.
10 * J Hadi Salim 980816: ECN support
11 */
12
13 #include <linux/module.h>
14 #include <linux/types.h>
15 #include <linux/kernel.h>
16 #include <linux/skbuff.h>
17 #include <net/pkt_sched.h>
18 #include <net/pkt_cls.h>
19 #include <net/inet_ecn.h>
20 #include <net/red.h>
21
22
23 /* Parameters, settable by user:
24 -----------------------------
25
26 limit - bytes (must be > qth_max + burst)
27
28 Hard limit on queue length, should be chosen >qth_max
29 to allow packet bursts. This parameter does not
30 affect the algorithms behaviour and can be chosen
31 arbitrarily high (well, less than ram size)
32 Really, this limit will never be reached
33 if RED works correctly.
34 */
35
36 struct red_sched_data {
37 u32 limit; /* HARD maximal queue length */
38
39 unsigned char flags;
40 /* Non-flags in tc_red_qopt.flags. */
41 unsigned char userbits;
42
43 struct timer_list adapt_timer;
44 struct Qdisc *sch;
45 struct red_parms parms;
46 struct red_vars vars;
47 struct red_stats stats;
48 struct Qdisc *qdisc;
49 struct tcf_qevent qe_early_drop;
50 struct tcf_qevent qe_mark;
51 };
52
53 #define TC_RED_SUPPORTED_FLAGS (TC_RED_HISTORIC_FLAGS | TC_RED_NODROP)
54
red_use_ecn(struct red_sched_data * q)55 static inline int red_use_ecn(struct red_sched_data *q)
56 {
57 return q->flags & TC_RED_ECN;
58 }
59
red_use_harddrop(struct red_sched_data * q)60 static inline int red_use_harddrop(struct red_sched_data *q)
61 {
62 return q->flags & TC_RED_HARDDROP;
63 }
64
red_use_nodrop(struct red_sched_data * q)65 static int red_use_nodrop(struct red_sched_data *q)
66 {
67 return q->flags & TC_RED_NODROP;
68 }
69
red_enqueue(struct sk_buff * skb,struct Qdisc * sch,struct sk_buff ** to_free)70 static int red_enqueue(struct sk_buff *skb, struct Qdisc *sch,
71 struct sk_buff **to_free)
72 {
73 enum qdisc_drop_reason reason = QDISC_DROP_CONGESTED;
74 struct red_sched_data *q = qdisc_priv(sch);
75 struct Qdisc *child = q->qdisc;
76 unsigned int len;
77 int ret;
78
79 q->vars.qavg = red_calc_qavg(&q->parms,
80 &q->vars,
81 child->qstats.backlog);
82
83 if (red_is_idling(&q->vars))
84 red_end_of_idle_period(&q->vars);
85
86 switch (red_action(&q->parms, &q->vars, q->vars.qavg)) {
87 case RED_DONT_MARK:
88 break;
89
90 case RED_PROB_MARK:
91 qdisc_qstats_overlimit(sch);
92 if (!red_use_ecn(q)) {
93 WRITE_ONCE(q->stats.prob_drop,
94 q->stats.prob_drop + 1);
95 goto congestion_drop;
96 }
97
98 if (INET_ECN_set_ce(skb)) {
99 WRITE_ONCE(q->stats.prob_mark,
100 q->stats.prob_mark + 1);
101 skb = tcf_qevent_handle(&q->qe_mark, sch, skb, to_free, &ret);
102 if (!skb)
103 return NET_XMIT_CN | ret;
104 } else if (!red_use_nodrop(q)) {
105 WRITE_ONCE(q->stats.prob_drop,
106 q->stats.prob_drop + 1);
107 goto congestion_drop;
108 }
109
110 /* Non-ECT packet in ECN nodrop mode: queue it. */
111 break;
112
113 case RED_HARD_MARK:
114 reason = QDISC_DROP_OVERLIMIT;
115 qdisc_qstats_overlimit(sch);
116 if (red_use_harddrop(q) || !red_use_ecn(q)) {
117 WRITE_ONCE(q->stats.forced_drop,
118 q->stats.forced_drop + 1);
119 goto congestion_drop;
120 }
121
122 if (INET_ECN_set_ce(skb)) {
123 WRITE_ONCE(q->stats.forced_mark,
124 q->stats.forced_mark + 1);
125 skb = tcf_qevent_handle(&q->qe_mark, sch, skb, to_free, &ret);
126 if (!skb)
127 return NET_XMIT_CN | ret;
128 } else if (!red_use_nodrop(q)) {
129 WRITE_ONCE(q->stats.forced_drop,
130 q->stats.forced_drop + 1);
131 goto congestion_drop;
132 }
133
134 /* Non-ECT packet in ECN nodrop mode: queue it. */
135 break;
136 }
137
138 len = qdisc_pkt_len(skb);
139 ret = qdisc_enqueue(skb, child, to_free);
140 if (likely(ret == NET_XMIT_SUCCESS)) {
141 sch->qstats.backlog += len;
142 sch->q.qlen++;
143 } else if (net_xmit_drop_count(ret)) {
144 WRITE_ONCE(q->stats.pdrop,
145 q->stats.pdrop + 1);
146 qdisc_qstats_drop(sch);
147 }
148 return ret;
149
150 congestion_drop:
151 skb = tcf_qevent_handle(&q->qe_early_drop, sch, skb, to_free, &ret);
152 if (!skb)
153 return NET_XMIT_CN | ret;
154
155 qdisc_drop_reason(skb, sch, to_free, reason);
156 return NET_XMIT_CN;
157 }
158
red_dequeue(struct Qdisc * sch)159 static struct sk_buff *red_dequeue(struct Qdisc *sch)
160 {
161 struct sk_buff *skb;
162 struct red_sched_data *q = qdisc_priv(sch);
163 struct Qdisc *child = q->qdisc;
164
165 skb = qdisc_dequeue_peeked(child);
166 if (skb) {
167 qdisc_bstats_update(sch, skb);
168 qdisc_qstats_backlog_dec(sch, skb);
169 sch->q.qlen--;
170 } else {
171 if (!red_is_idling(&q->vars))
172 red_start_of_idle_period(&q->vars);
173 }
174 return skb;
175 }
176
red_peek(struct Qdisc * sch)177 static struct sk_buff *red_peek(struct Qdisc *sch)
178 {
179 struct red_sched_data *q = qdisc_priv(sch);
180 struct Qdisc *child = q->qdisc;
181
182 return child->ops->peek(child);
183 }
184
red_reset(struct Qdisc * sch)185 static void red_reset(struct Qdisc *sch)
186 {
187 struct red_sched_data *q = qdisc_priv(sch);
188
189 qdisc_reset(q->qdisc);
190 red_restart(&q->vars);
191 }
192
red_offload(struct Qdisc * sch,bool enable)193 static int red_offload(struct Qdisc *sch, bool enable)
194 {
195 struct red_sched_data *q = qdisc_priv(sch);
196 struct net_device *dev = qdisc_dev(sch);
197 struct tc_red_qopt_offload opt = {
198 .handle = sch->handle,
199 .parent = sch->parent,
200 };
201
202 if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
203 return -EOPNOTSUPP;
204
205 if (enable) {
206 opt.command = TC_RED_REPLACE;
207 opt.set.min = q->parms.qth_min >> q->parms.Wlog;
208 opt.set.max = q->parms.qth_max >> q->parms.Wlog;
209 opt.set.probability = q->parms.max_P;
210 opt.set.limit = q->limit;
211 opt.set.is_ecn = red_use_ecn(q);
212 opt.set.is_harddrop = red_use_harddrop(q);
213 opt.set.is_nodrop = red_use_nodrop(q);
214 opt.set.qstats = &sch->qstats;
215 } else {
216 opt.command = TC_RED_DESTROY;
217 }
218
219 return dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_RED, &opt);
220 }
221
red_destroy(struct Qdisc * sch)222 static void red_destroy(struct Qdisc *sch)
223 {
224 struct red_sched_data *q = qdisc_priv(sch);
225
226 tcf_qevent_destroy(&q->qe_mark, sch);
227 tcf_qevent_destroy(&q->qe_early_drop, sch);
228 timer_delete_sync(&q->adapt_timer);
229 red_offload(sch, false);
230 qdisc_put(q->qdisc);
231 }
232
233 static const struct nla_policy red_policy[TCA_RED_MAX + 1] = {
234 [TCA_RED_UNSPEC] = { .strict_start_type = TCA_RED_FLAGS },
235 [TCA_RED_PARMS] = { .len = sizeof(struct tc_red_qopt) },
236 [TCA_RED_STAB] = { .len = RED_STAB_SIZE },
237 [TCA_RED_MAX_P] = { .type = NLA_U32 },
238 [TCA_RED_FLAGS] = NLA_POLICY_BITFIELD32(TC_RED_SUPPORTED_FLAGS),
239 [TCA_RED_EARLY_DROP_BLOCK] = { .type = NLA_U32 },
240 [TCA_RED_MARK_BLOCK] = { .type = NLA_U32 },
241 };
242
__red_change(struct Qdisc * sch,struct nlattr ** tb,struct netlink_ext_ack * extack)243 static int __red_change(struct Qdisc *sch, struct nlattr **tb,
244 struct netlink_ext_ack *extack)
245 {
246 struct Qdisc *old_child = NULL, *child = NULL;
247 struct red_sched_data *q = qdisc_priv(sch);
248 struct nla_bitfield32 flags_bf;
249 struct tc_red_qopt *ctl;
250 unsigned char userbits;
251 unsigned char flags;
252 int err;
253 u32 max_P;
254 u8 *stab;
255
256 if (tb[TCA_RED_PARMS] == NULL ||
257 tb[TCA_RED_STAB] == NULL)
258 return -EINVAL;
259
260 max_P = nla_get_u32_default(tb[TCA_RED_MAX_P], 0);
261
262 ctl = nla_data(tb[TCA_RED_PARMS]);
263 stab = nla_data(tb[TCA_RED_STAB]);
264 if (!red_check_params(ctl->qth_min, ctl->qth_max, ctl->Wlog,
265 ctl->Scell_log, stab))
266 return -EINVAL;
267
268 err = red_get_flags(ctl->flags, TC_RED_HISTORIC_FLAGS,
269 tb[TCA_RED_FLAGS], TC_RED_SUPPORTED_FLAGS,
270 &flags_bf, &userbits, extack);
271 if (err)
272 return err;
273
274 if (ctl->limit > 0) {
275 child = fifo_create_dflt(sch, &bfifo_qdisc_ops, ctl->limit,
276 extack);
277 if (IS_ERR(child))
278 return PTR_ERR(child);
279
280 /* child is fifo, no need to check for noop_qdisc */
281 qdisc_hash_add(child, true);
282 }
283
284 sch_tree_lock(sch);
285
286 flags = (q->flags & ~flags_bf.selector) | flags_bf.value;
287 err = red_validate_flags(flags, extack);
288 if (err)
289 goto unlock_out;
290
291 q->flags = flags;
292 q->userbits = userbits;
293 q->limit = ctl->limit;
294 if (child) {
295 qdisc_purge_queue(q->qdisc);
296 old_child = q->qdisc;
297 q->qdisc = child;
298 }
299
300 red_set_parms(&q->parms,
301 ctl->qth_min, ctl->qth_max, ctl->Wlog,
302 ctl->Plog, ctl->Scell_log,
303 stab,
304 max_P);
305 red_set_vars(&q->vars);
306
307 timer_delete(&q->adapt_timer);
308 if (ctl->flags & TC_RED_ADAPTATIVE)
309 mod_timer(&q->adapt_timer, jiffies + HZ/2);
310
311 if (!q->qdisc->q.qlen)
312 red_start_of_idle_period(&q->vars);
313
314 sch_tree_unlock(sch);
315
316 red_offload(sch, true);
317
318 if (old_child)
319 qdisc_put(old_child);
320 return 0;
321
322 unlock_out:
323 sch_tree_unlock(sch);
324 if (child)
325 qdisc_put(child);
326 return err;
327 }
328
red_adaptative_timer(struct timer_list * t)329 static inline void red_adaptative_timer(struct timer_list *t)
330 {
331 struct red_sched_data *q = timer_container_of(q, t, adapt_timer);
332 struct Qdisc *sch = q->sch;
333 spinlock_t *root_lock;
334
335 rcu_read_lock();
336 root_lock = qdisc_lock(qdisc_root_sleeping(sch));
337 spin_lock(root_lock);
338 red_adaptative_algo(&q->parms, &q->vars);
339 mod_timer(&q->adapt_timer, jiffies + HZ/2);
340 spin_unlock(root_lock);
341 rcu_read_unlock();
342 }
343
red_init(struct Qdisc * sch,struct nlattr * opt,struct netlink_ext_ack * extack)344 static int red_init(struct Qdisc *sch, struct nlattr *opt,
345 struct netlink_ext_ack *extack)
346 {
347 struct red_sched_data *q = qdisc_priv(sch);
348 struct nlattr *tb[TCA_RED_MAX + 1];
349 int err;
350
351 q->qdisc = &noop_qdisc;
352 q->sch = sch;
353 timer_setup(&q->adapt_timer, red_adaptative_timer, 0);
354
355 if (!opt)
356 return -EINVAL;
357
358 err = nla_parse_nested_deprecated(tb, TCA_RED_MAX, opt, red_policy,
359 extack);
360 if (err < 0)
361 return err;
362
363 err = __red_change(sch, tb, extack);
364 if (err)
365 return err;
366
367 err = tcf_qevent_init(&q->qe_early_drop, sch,
368 FLOW_BLOCK_BINDER_TYPE_RED_EARLY_DROP,
369 tb[TCA_RED_EARLY_DROP_BLOCK], extack);
370 if (err)
371 return err;
372
373 return tcf_qevent_init(&q->qe_mark, sch,
374 FLOW_BLOCK_BINDER_TYPE_RED_MARK,
375 tb[TCA_RED_MARK_BLOCK], extack);
376 }
377
red_change(struct Qdisc * sch,struct nlattr * opt,struct netlink_ext_ack * extack)378 static int red_change(struct Qdisc *sch, struct nlattr *opt,
379 struct netlink_ext_ack *extack)
380 {
381 struct red_sched_data *q = qdisc_priv(sch);
382 struct nlattr *tb[TCA_RED_MAX + 1];
383 int err;
384
385 err = nla_parse_nested_deprecated(tb, TCA_RED_MAX, opt, red_policy,
386 extack);
387 if (err < 0)
388 return err;
389
390 err = tcf_qevent_validate_change(&q->qe_early_drop,
391 tb[TCA_RED_EARLY_DROP_BLOCK], extack);
392 if (err)
393 return err;
394
395 err = tcf_qevent_validate_change(&q->qe_mark,
396 tb[TCA_RED_MARK_BLOCK], extack);
397 if (err)
398 return err;
399
400 return __red_change(sch, tb, extack);
401 }
402
red_dump_offload_stats(struct Qdisc * sch)403 static int red_dump_offload_stats(struct Qdisc *sch)
404 {
405 struct tc_red_qopt_offload hw_stats = {
406 .command = TC_RED_STATS,
407 .handle = sch->handle,
408 .parent = sch->parent,
409 {
410 .stats.bstats = &sch->bstats,
411 .stats.qstats = &sch->qstats,
412 },
413 };
414
415 return qdisc_offload_dump_helper(sch, TC_SETUP_QDISC_RED, &hw_stats);
416 }
417
red_dump(struct Qdisc * sch,struct sk_buff * skb)418 static int red_dump(struct Qdisc *sch, struct sk_buff *skb)
419 {
420 struct red_sched_data *q = qdisc_priv(sch);
421 struct nlattr *opts = NULL;
422 struct tc_red_qopt opt = {
423 .limit = q->limit,
424 .flags = (q->flags & TC_RED_HISTORIC_FLAGS) |
425 q->userbits,
426 .qth_min = q->parms.qth_min >> q->parms.Wlog,
427 .qth_max = q->parms.qth_max >> q->parms.Wlog,
428 .Wlog = q->parms.Wlog,
429 .Plog = q->parms.Plog,
430 .Scell_log = q->parms.Scell_log,
431 };
432 int err;
433
434 err = red_dump_offload_stats(sch);
435 if (err)
436 goto nla_put_failure;
437
438 opts = nla_nest_start_noflag(skb, TCA_OPTIONS);
439 if (opts == NULL)
440 goto nla_put_failure;
441 if (nla_put(skb, TCA_RED_PARMS, sizeof(opt), &opt) ||
442 nla_put_u32(skb, TCA_RED_MAX_P, q->parms.max_P) ||
443 nla_put_bitfield32(skb, TCA_RED_FLAGS,
444 q->flags, TC_RED_SUPPORTED_FLAGS) ||
445 tcf_qevent_dump(skb, TCA_RED_MARK_BLOCK, &q->qe_mark) ||
446 tcf_qevent_dump(skb, TCA_RED_EARLY_DROP_BLOCK, &q->qe_early_drop))
447 goto nla_put_failure;
448 return nla_nest_end(skb, opts);
449
450 nla_put_failure:
451 nla_nest_cancel(skb, opts);
452 return -EMSGSIZE;
453 }
454
red_dump_stats(struct Qdisc * sch,struct gnet_dump * d)455 static int red_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
456 {
457 struct red_sched_data *q = qdisc_priv(sch);
458 struct net_device *dev = qdisc_dev(sch);
459 struct tc_red_xstats st = {0};
460
461 if (sch->flags & TCQ_F_OFFLOADED) {
462 struct tc_red_qopt_offload hw_stats_request = {
463 .command = TC_RED_XSTATS,
464 .handle = sch->handle,
465 .parent = sch->parent,
466 {
467 .xstats = &q->stats,
468 },
469 };
470 dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_RED,
471 &hw_stats_request);
472 }
473 st.early = READ_ONCE(q->stats.prob_drop) +
474 READ_ONCE(q->stats.forced_drop);
475
476 st.pdrop = READ_ONCE(q->stats.pdrop);
477
478 st.marked = READ_ONCE(q->stats.prob_mark) +
479 READ_ONCE(q->stats.forced_mark);
480
481 return gnet_stats_copy_app(d, &st, sizeof(st));
482 }
483
red_dump_class(struct Qdisc * sch,unsigned long cl,struct sk_buff * skb,struct tcmsg * tcm)484 static int red_dump_class(struct Qdisc *sch, unsigned long cl,
485 struct sk_buff *skb, struct tcmsg *tcm)
486 {
487 struct red_sched_data *q = qdisc_priv(sch);
488
489 tcm->tcm_handle |= TC_H_MIN(1);
490 tcm->tcm_info = q->qdisc->handle;
491 return 0;
492 }
493
red_graft_offload(struct Qdisc * sch,struct Qdisc * new,struct Qdisc * old,struct netlink_ext_ack * extack)494 static void red_graft_offload(struct Qdisc *sch,
495 struct Qdisc *new, struct Qdisc *old,
496 struct netlink_ext_ack *extack)
497 {
498 struct tc_red_qopt_offload graft_offload = {
499 .handle = sch->handle,
500 .parent = sch->parent,
501 .child_handle = new->handle,
502 .command = TC_RED_GRAFT,
503 };
504
505 qdisc_offload_graft_helper(qdisc_dev(sch), sch, new, old,
506 TC_SETUP_QDISC_RED, &graft_offload, extack);
507 }
508
red_graft(struct Qdisc * sch,unsigned long arg,struct Qdisc * new,struct Qdisc ** old,struct netlink_ext_ack * extack)509 static int red_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
510 struct Qdisc **old, struct netlink_ext_ack *extack)
511 {
512 struct red_sched_data *q = qdisc_priv(sch);
513
514 if (new == NULL)
515 new = &noop_qdisc;
516
517 *old = qdisc_replace(sch, new, &q->qdisc);
518
519 red_graft_offload(sch, new, *old, extack);
520 return 0;
521 }
522
red_leaf(struct Qdisc * sch,unsigned long arg)523 static struct Qdisc *red_leaf(struct Qdisc *sch, unsigned long arg)
524 {
525 struct red_sched_data *q = qdisc_priv(sch);
526 return q->qdisc;
527 }
528
red_find(struct Qdisc * sch,u32 classid)529 static unsigned long red_find(struct Qdisc *sch, u32 classid)
530 {
531 return 1;
532 }
533
red_walk(struct Qdisc * sch,struct qdisc_walker * walker)534 static void red_walk(struct Qdisc *sch, struct qdisc_walker *walker)
535 {
536 if (!walker->stop) {
537 tc_qdisc_stats_dump(sch, 1, walker);
538 }
539 }
540
541 static const struct Qdisc_class_ops red_class_ops = {
542 .graft = red_graft,
543 .leaf = red_leaf,
544 .find = red_find,
545 .walk = red_walk,
546 .dump = red_dump_class,
547 };
548
549 static struct Qdisc_ops red_qdisc_ops __read_mostly = {
550 .id = "red",
551 .priv_size = sizeof(struct red_sched_data),
552 .cl_ops = &red_class_ops,
553 .enqueue = red_enqueue,
554 .dequeue = red_dequeue,
555 .peek = red_peek,
556 .init = red_init,
557 .reset = red_reset,
558 .destroy = red_destroy,
559 .change = red_change,
560 .dump = red_dump,
561 .dump_stats = red_dump_stats,
562 .owner = THIS_MODULE,
563 };
564 MODULE_ALIAS_NET_SCH("red");
565
red_module_init(void)566 static int __init red_module_init(void)
567 {
568 return register_qdisc(&red_qdisc_ops);
569 }
570
red_module_exit(void)571 static void __exit red_module_exit(void)
572 {
573 unregister_qdisc(&red_qdisc_ops);
574 }
575
576 module_init(red_module_init)
577 module_exit(red_module_exit)
578
579 MODULE_LICENSE("GPL");
580 MODULE_DESCRIPTION("Random Early Detection qdisc");
581