1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * net/sched/sch_red.c Random Early Detection queue. 4 * 5 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> 6 * 7 * Changes: 8 * J Hadi Salim 980914: computation fixes 9 * Alexey Makarenko <makar@phoenix.kharkov.ua> 990814: qave on idle link was calculated incorrectly. 10 * J Hadi Salim 980816: ECN support 11 */ 12 13 #include <linux/module.h> 14 #include <linux/types.h> 15 #include <linux/kernel.h> 16 #include <linux/skbuff.h> 17 #include <net/pkt_sched.h> 18 #include <net/pkt_cls.h> 19 #include <net/inet_ecn.h> 20 #include <net/red.h> 21 22 23 /* Parameters, settable by user: 24 ----------------------------- 25 26 limit - bytes (must be > qth_max + burst) 27 28 Hard limit on queue length, should be chosen >qth_max 29 to allow packet bursts. This parameter does not 30 affect the algorithms behaviour and can be chosen 31 arbitrarily high (well, less than ram size) 32 Really, this limit will never be reached 33 if RED works correctly. 34 */ 35 36 struct red_sched_data { 37 u32 limit; /* HARD maximal queue length */ 38 39 unsigned char flags; 40 /* Non-flags in tc_red_qopt.flags. */ 41 unsigned char userbits; 42 43 struct timer_list adapt_timer; 44 struct Qdisc *sch; 45 struct red_parms parms; 46 struct red_vars vars; 47 struct red_stats stats; 48 struct Qdisc *qdisc; 49 struct tcf_qevent qe_early_drop; 50 struct tcf_qevent qe_mark; 51 }; 52 53 #define TC_RED_SUPPORTED_FLAGS (TC_RED_HISTORIC_FLAGS | TC_RED_NODROP) 54 55 static inline int red_use_ecn(struct red_sched_data *q) 56 { 57 return q->flags & TC_RED_ECN; 58 } 59 60 static inline int red_use_harddrop(struct red_sched_data *q) 61 { 62 return q->flags & TC_RED_HARDDROP; 63 } 64 65 static int red_use_nodrop(struct red_sched_data *q) 66 { 67 return q->flags & TC_RED_NODROP; 68 } 69 70 static int red_enqueue(struct sk_buff *skb, struct Qdisc *sch, 71 struct sk_buff **to_free) 72 { 73 enum qdisc_drop_reason reason = QDISC_DROP_CONGESTED; 74 struct red_sched_data *q = qdisc_priv(sch); 75 struct Qdisc *child = q->qdisc; 76 unsigned int len; 77 int ret; 78 79 q->vars.qavg = red_calc_qavg(&q->parms, 80 &q->vars, 81 child->qstats.backlog); 82 83 if (red_is_idling(&q->vars)) 84 red_end_of_idle_period(&q->vars); 85 86 switch (red_action(&q->parms, &q->vars, q->vars.qavg)) { 87 case RED_DONT_MARK: 88 break; 89 90 case RED_PROB_MARK: 91 qdisc_qstats_overlimit(sch); 92 if (!red_use_ecn(q)) { 93 WRITE_ONCE(q->stats.prob_drop, 94 q->stats.prob_drop + 1); 95 goto congestion_drop; 96 } 97 98 if (INET_ECN_set_ce(skb)) { 99 WRITE_ONCE(q->stats.prob_mark, 100 q->stats.prob_mark + 1); 101 skb = tcf_qevent_handle(&q->qe_mark, sch, skb, to_free, &ret); 102 if (!skb) 103 return NET_XMIT_CN | ret; 104 } else if (!red_use_nodrop(q)) { 105 WRITE_ONCE(q->stats.prob_drop, 106 q->stats.prob_drop + 1); 107 goto congestion_drop; 108 } 109 110 /* Non-ECT packet in ECN nodrop mode: queue it. */ 111 break; 112 113 case RED_HARD_MARK: 114 reason = QDISC_DROP_OVERLIMIT; 115 qdisc_qstats_overlimit(sch); 116 if (red_use_harddrop(q) || !red_use_ecn(q)) { 117 WRITE_ONCE(q->stats.forced_drop, 118 q->stats.forced_drop + 1); 119 goto congestion_drop; 120 } 121 122 if (INET_ECN_set_ce(skb)) { 123 WRITE_ONCE(q->stats.forced_mark, 124 q->stats.forced_mark + 1); 125 skb = tcf_qevent_handle(&q->qe_mark, sch, skb, to_free, &ret); 126 if (!skb) 127 return NET_XMIT_CN | ret; 128 } else if (!red_use_nodrop(q)) { 129 WRITE_ONCE(q->stats.forced_drop, 130 q->stats.forced_drop + 1); 131 goto congestion_drop; 132 } 133 134 /* Non-ECT packet in ECN nodrop mode: queue it. */ 135 break; 136 } 137 138 len = qdisc_pkt_len(skb); 139 ret = qdisc_enqueue(skb, child, to_free); 140 if (likely(ret == NET_XMIT_SUCCESS)) { 141 sch->qstats.backlog += len; 142 sch->q.qlen++; 143 } else if (net_xmit_drop_count(ret)) { 144 WRITE_ONCE(q->stats.pdrop, 145 q->stats.pdrop + 1); 146 qdisc_qstats_drop(sch); 147 } 148 return ret; 149 150 congestion_drop: 151 skb = tcf_qevent_handle(&q->qe_early_drop, sch, skb, to_free, &ret); 152 if (!skb) 153 return NET_XMIT_CN | ret; 154 155 qdisc_drop_reason(skb, sch, to_free, reason); 156 return NET_XMIT_CN; 157 } 158 159 static struct sk_buff *red_dequeue(struct Qdisc *sch) 160 { 161 struct sk_buff *skb; 162 struct red_sched_data *q = qdisc_priv(sch); 163 struct Qdisc *child = q->qdisc; 164 165 skb = child->dequeue(child); 166 if (skb) { 167 qdisc_bstats_update(sch, skb); 168 qdisc_qstats_backlog_dec(sch, skb); 169 sch->q.qlen--; 170 } else { 171 if (!red_is_idling(&q->vars)) 172 red_start_of_idle_period(&q->vars); 173 } 174 return skb; 175 } 176 177 static struct sk_buff *red_peek(struct Qdisc *sch) 178 { 179 struct red_sched_data *q = qdisc_priv(sch); 180 struct Qdisc *child = q->qdisc; 181 182 return child->ops->peek(child); 183 } 184 185 static void red_reset(struct Qdisc *sch) 186 { 187 struct red_sched_data *q = qdisc_priv(sch); 188 189 qdisc_reset(q->qdisc); 190 red_restart(&q->vars); 191 } 192 193 static int red_offload(struct Qdisc *sch, bool enable) 194 { 195 struct red_sched_data *q = qdisc_priv(sch); 196 struct net_device *dev = qdisc_dev(sch); 197 struct tc_red_qopt_offload opt = { 198 .handle = sch->handle, 199 .parent = sch->parent, 200 }; 201 202 if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc) 203 return -EOPNOTSUPP; 204 205 if (enable) { 206 opt.command = TC_RED_REPLACE; 207 opt.set.min = q->parms.qth_min >> q->parms.Wlog; 208 opt.set.max = q->parms.qth_max >> q->parms.Wlog; 209 opt.set.probability = q->parms.max_P; 210 opt.set.limit = q->limit; 211 opt.set.is_ecn = red_use_ecn(q); 212 opt.set.is_harddrop = red_use_harddrop(q); 213 opt.set.is_nodrop = red_use_nodrop(q); 214 opt.set.qstats = &sch->qstats; 215 } else { 216 opt.command = TC_RED_DESTROY; 217 } 218 219 return dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_RED, &opt); 220 } 221 222 static void red_destroy(struct Qdisc *sch) 223 { 224 struct red_sched_data *q = qdisc_priv(sch); 225 226 tcf_qevent_destroy(&q->qe_mark, sch); 227 tcf_qevent_destroy(&q->qe_early_drop, sch); 228 timer_delete_sync(&q->adapt_timer); 229 red_offload(sch, false); 230 qdisc_put(q->qdisc); 231 } 232 233 static const struct nla_policy red_policy[TCA_RED_MAX + 1] = { 234 [TCA_RED_UNSPEC] = { .strict_start_type = TCA_RED_FLAGS }, 235 [TCA_RED_PARMS] = { .len = sizeof(struct tc_red_qopt) }, 236 [TCA_RED_STAB] = { .len = RED_STAB_SIZE }, 237 [TCA_RED_MAX_P] = { .type = NLA_U32 }, 238 [TCA_RED_FLAGS] = NLA_POLICY_BITFIELD32(TC_RED_SUPPORTED_FLAGS), 239 [TCA_RED_EARLY_DROP_BLOCK] = { .type = NLA_U32 }, 240 [TCA_RED_MARK_BLOCK] = { .type = NLA_U32 }, 241 }; 242 243 static int __red_change(struct Qdisc *sch, struct nlattr **tb, 244 struct netlink_ext_ack *extack) 245 { 246 struct Qdisc *old_child = NULL, *child = NULL; 247 struct red_sched_data *q = qdisc_priv(sch); 248 struct nla_bitfield32 flags_bf; 249 struct tc_red_qopt *ctl; 250 unsigned char userbits; 251 unsigned char flags; 252 int err; 253 u32 max_P; 254 u8 *stab; 255 256 if (tb[TCA_RED_PARMS] == NULL || 257 tb[TCA_RED_STAB] == NULL) 258 return -EINVAL; 259 260 max_P = nla_get_u32_default(tb[TCA_RED_MAX_P], 0); 261 262 ctl = nla_data(tb[TCA_RED_PARMS]); 263 stab = nla_data(tb[TCA_RED_STAB]); 264 if (!red_check_params(ctl->qth_min, ctl->qth_max, ctl->Wlog, 265 ctl->Scell_log, stab)) 266 return -EINVAL; 267 268 err = red_get_flags(ctl->flags, TC_RED_HISTORIC_FLAGS, 269 tb[TCA_RED_FLAGS], TC_RED_SUPPORTED_FLAGS, 270 &flags_bf, &userbits, extack); 271 if (err) 272 return err; 273 274 if (ctl->limit > 0) { 275 child = fifo_create_dflt(sch, &bfifo_qdisc_ops, ctl->limit, 276 extack); 277 if (IS_ERR(child)) 278 return PTR_ERR(child); 279 280 /* child is fifo, no need to check for noop_qdisc */ 281 qdisc_hash_add(child, true); 282 } 283 284 sch_tree_lock(sch); 285 286 flags = (q->flags & ~flags_bf.selector) | flags_bf.value; 287 err = red_validate_flags(flags, extack); 288 if (err) 289 goto unlock_out; 290 291 q->flags = flags; 292 q->userbits = userbits; 293 q->limit = ctl->limit; 294 if (child) { 295 qdisc_purge_queue(q->qdisc); 296 old_child = q->qdisc; 297 q->qdisc = child; 298 } 299 300 red_set_parms(&q->parms, 301 ctl->qth_min, ctl->qth_max, ctl->Wlog, 302 ctl->Plog, ctl->Scell_log, 303 stab, 304 max_P); 305 red_set_vars(&q->vars); 306 307 timer_delete(&q->adapt_timer); 308 if (ctl->flags & TC_RED_ADAPTATIVE) 309 mod_timer(&q->adapt_timer, jiffies + HZ/2); 310 311 if (!q->qdisc->q.qlen) 312 red_start_of_idle_period(&q->vars); 313 314 sch_tree_unlock(sch); 315 316 red_offload(sch, true); 317 318 if (old_child) 319 qdisc_put(old_child); 320 return 0; 321 322 unlock_out: 323 sch_tree_unlock(sch); 324 if (child) 325 qdisc_put(child); 326 return err; 327 } 328 329 static inline void red_adaptative_timer(struct timer_list *t) 330 { 331 struct red_sched_data *q = timer_container_of(q, t, adapt_timer); 332 struct Qdisc *sch = q->sch; 333 spinlock_t *root_lock; 334 335 rcu_read_lock(); 336 root_lock = qdisc_lock(qdisc_root_sleeping(sch)); 337 spin_lock(root_lock); 338 red_adaptative_algo(&q->parms, &q->vars); 339 mod_timer(&q->adapt_timer, jiffies + HZ/2); 340 spin_unlock(root_lock); 341 rcu_read_unlock(); 342 } 343 344 static int red_init(struct Qdisc *sch, struct nlattr *opt, 345 struct netlink_ext_ack *extack) 346 { 347 struct red_sched_data *q = qdisc_priv(sch); 348 struct nlattr *tb[TCA_RED_MAX + 1]; 349 int err; 350 351 q->qdisc = &noop_qdisc; 352 q->sch = sch; 353 timer_setup(&q->adapt_timer, red_adaptative_timer, 0); 354 355 if (!opt) 356 return -EINVAL; 357 358 err = nla_parse_nested_deprecated(tb, TCA_RED_MAX, opt, red_policy, 359 extack); 360 if (err < 0) 361 return err; 362 363 err = __red_change(sch, tb, extack); 364 if (err) 365 return err; 366 367 err = tcf_qevent_init(&q->qe_early_drop, sch, 368 FLOW_BLOCK_BINDER_TYPE_RED_EARLY_DROP, 369 tb[TCA_RED_EARLY_DROP_BLOCK], extack); 370 if (err) 371 return err; 372 373 return tcf_qevent_init(&q->qe_mark, sch, 374 FLOW_BLOCK_BINDER_TYPE_RED_MARK, 375 tb[TCA_RED_MARK_BLOCK], extack); 376 } 377 378 static int red_change(struct Qdisc *sch, struct nlattr *opt, 379 struct netlink_ext_ack *extack) 380 { 381 struct red_sched_data *q = qdisc_priv(sch); 382 struct nlattr *tb[TCA_RED_MAX + 1]; 383 int err; 384 385 err = nla_parse_nested_deprecated(tb, TCA_RED_MAX, opt, red_policy, 386 extack); 387 if (err < 0) 388 return err; 389 390 err = tcf_qevent_validate_change(&q->qe_early_drop, 391 tb[TCA_RED_EARLY_DROP_BLOCK], extack); 392 if (err) 393 return err; 394 395 err = tcf_qevent_validate_change(&q->qe_mark, 396 tb[TCA_RED_MARK_BLOCK], extack); 397 if (err) 398 return err; 399 400 return __red_change(sch, tb, extack); 401 } 402 403 static int red_dump_offload_stats(struct Qdisc *sch) 404 { 405 struct tc_red_qopt_offload hw_stats = { 406 .command = TC_RED_STATS, 407 .handle = sch->handle, 408 .parent = sch->parent, 409 { 410 .stats.bstats = &sch->bstats, 411 .stats.qstats = &sch->qstats, 412 }, 413 }; 414 415 return qdisc_offload_dump_helper(sch, TC_SETUP_QDISC_RED, &hw_stats); 416 } 417 418 static int red_dump(struct Qdisc *sch, struct sk_buff *skb) 419 { 420 struct red_sched_data *q = qdisc_priv(sch); 421 struct nlattr *opts = NULL; 422 struct tc_red_qopt opt = { 423 .limit = q->limit, 424 .flags = (q->flags & TC_RED_HISTORIC_FLAGS) | 425 q->userbits, 426 .qth_min = q->parms.qth_min >> q->parms.Wlog, 427 .qth_max = q->parms.qth_max >> q->parms.Wlog, 428 .Wlog = q->parms.Wlog, 429 .Plog = q->parms.Plog, 430 .Scell_log = q->parms.Scell_log, 431 }; 432 int err; 433 434 err = red_dump_offload_stats(sch); 435 if (err) 436 goto nla_put_failure; 437 438 opts = nla_nest_start_noflag(skb, TCA_OPTIONS); 439 if (opts == NULL) 440 goto nla_put_failure; 441 if (nla_put(skb, TCA_RED_PARMS, sizeof(opt), &opt) || 442 nla_put_u32(skb, TCA_RED_MAX_P, q->parms.max_P) || 443 nla_put_bitfield32(skb, TCA_RED_FLAGS, 444 q->flags, TC_RED_SUPPORTED_FLAGS) || 445 tcf_qevent_dump(skb, TCA_RED_MARK_BLOCK, &q->qe_mark) || 446 tcf_qevent_dump(skb, TCA_RED_EARLY_DROP_BLOCK, &q->qe_early_drop)) 447 goto nla_put_failure; 448 return nla_nest_end(skb, opts); 449 450 nla_put_failure: 451 nla_nest_cancel(skb, opts); 452 return -EMSGSIZE; 453 } 454 455 static int red_dump_stats(struct Qdisc *sch, struct gnet_dump *d) 456 { 457 struct red_sched_data *q = qdisc_priv(sch); 458 struct net_device *dev = qdisc_dev(sch); 459 struct tc_red_xstats st = {0}; 460 461 if (sch->flags & TCQ_F_OFFLOADED) { 462 struct tc_red_qopt_offload hw_stats_request = { 463 .command = TC_RED_XSTATS, 464 .handle = sch->handle, 465 .parent = sch->parent, 466 { 467 .xstats = &q->stats, 468 }, 469 }; 470 dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_RED, 471 &hw_stats_request); 472 } 473 st.early = READ_ONCE(q->stats.prob_drop) + 474 READ_ONCE(q->stats.forced_drop); 475 476 st.pdrop = READ_ONCE(q->stats.pdrop); 477 478 st.marked = READ_ONCE(q->stats.prob_mark) + 479 READ_ONCE(q->stats.forced_mark); 480 481 return gnet_stats_copy_app(d, &st, sizeof(st)); 482 } 483 484 static int red_dump_class(struct Qdisc *sch, unsigned long cl, 485 struct sk_buff *skb, struct tcmsg *tcm) 486 { 487 struct red_sched_data *q = qdisc_priv(sch); 488 489 tcm->tcm_handle |= TC_H_MIN(1); 490 tcm->tcm_info = q->qdisc->handle; 491 return 0; 492 } 493 494 static void red_graft_offload(struct Qdisc *sch, 495 struct Qdisc *new, struct Qdisc *old, 496 struct netlink_ext_ack *extack) 497 { 498 struct tc_red_qopt_offload graft_offload = { 499 .handle = sch->handle, 500 .parent = sch->parent, 501 .child_handle = new->handle, 502 .command = TC_RED_GRAFT, 503 }; 504 505 qdisc_offload_graft_helper(qdisc_dev(sch), sch, new, old, 506 TC_SETUP_QDISC_RED, &graft_offload, extack); 507 } 508 509 static int red_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, 510 struct Qdisc **old, struct netlink_ext_ack *extack) 511 { 512 struct red_sched_data *q = qdisc_priv(sch); 513 514 if (new == NULL) 515 new = &noop_qdisc; 516 517 *old = qdisc_replace(sch, new, &q->qdisc); 518 519 red_graft_offload(sch, new, *old, extack); 520 return 0; 521 } 522 523 static struct Qdisc *red_leaf(struct Qdisc *sch, unsigned long arg) 524 { 525 struct red_sched_data *q = qdisc_priv(sch); 526 return q->qdisc; 527 } 528 529 static unsigned long red_find(struct Qdisc *sch, u32 classid) 530 { 531 return 1; 532 } 533 534 static void red_walk(struct Qdisc *sch, struct qdisc_walker *walker) 535 { 536 if (!walker->stop) { 537 tc_qdisc_stats_dump(sch, 1, walker); 538 } 539 } 540 541 static const struct Qdisc_class_ops red_class_ops = { 542 .graft = red_graft, 543 .leaf = red_leaf, 544 .find = red_find, 545 .walk = red_walk, 546 .dump = red_dump_class, 547 }; 548 549 static struct Qdisc_ops red_qdisc_ops __read_mostly = { 550 .id = "red", 551 .priv_size = sizeof(struct red_sched_data), 552 .cl_ops = &red_class_ops, 553 .enqueue = red_enqueue, 554 .dequeue = red_dequeue, 555 .peek = red_peek, 556 .init = red_init, 557 .reset = red_reset, 558 .destroy = red_destroy, 559 .change = red_change, 560 .dump = red_dump, 561 .dump_stats = red_dump_stats, 562 .owner = THIS_MODULE, 563 }; 564 MODULE_ALIAS_NET_SCH("red"); 565 566 static int __init red_module_init(void) 567 { 568 return register_qdisc(&red_qdisc_ops); 569 } 570 571 static void __exit red_module_exit(void) 572 { 573 unregister_qdisc(&red_qdisc_ops); 574 } 575 576 module_init(red_module_init) 577 module_exit(red_module_exit) 578 579 MODULE_LICENSE("GPL"); 580 MODULE_DESCRIPTION("Random Early Detection qdisc"); 581