1 /* 2 * net/sched/sch_gred.c Generic Random Early Detection queue. 3 * 4 * 5 * This program is free software; you can redistribute it and/or 6 * modify it under the terms of the GNU General Public License 7 * as published by the Free Software Foundation; either version 8 * 2 of the License, or (at your option) any later version. 9 * 10 * Authors: J Hadi Salim (hadi@cyberus.ca) 1998-2002 11 * 12 * 991129: - Bug fix with grio mode 13 * - a better sing. AvgQ mode with Grio(WRED) 14 * - A finer grained VQ dequeue based on sugestion 15 * from Ren Liu 16 * - More error checks 17 * 18 * For all the glorious comments look at include/net/red.h 19 */ 20 21 #include <linux/config.h> 22 #include <linux/module.h> 23 #include <linux/types.h> 24 #include <linux/kernel.h> 25 #include <linux/netdevice.h> 26 #include <linux/skbuff.h> 27 #include <net/pkt_sched.h> 28 #include <net/red.h> 29 30 #define GRED_DEF_PRIO (MAX_DPs / 2) 31 #define GRED_VQ_MASK (MAX_DPs - 1) 32 33 struct gred_sched_data; 34 struct gred_sched; 35 36 struct gred_sched_data 37 { 38 u32 limit; /* HARD maximal queue length */ 39 u32 DP; /* the drop pramaters */ 40 u32 bytesin; /* bytes seen on virtualQ so far*/ 41 u32 packetsin; /* packets seen on virtualQ so far*/ 42 u32 backlog; /* bytes on the virtualQ */ 43 u8 prio; /* the prio of this vq */ 44 45 struct red_parms parms; 46 struct red_stats stats; 47 }; 48 49 enum { 50 GRED_WRED_MODE = 1, 51 GRED_RIO_MODE, 52 }; 53 54 struct gred_sched 55 { 56 struct gred_sched_data *tab[MAX_DPs]; 57 unsigned long flags; 58 u32 red_flags; 59 u32 DPs; 60 u32 def; 61 struct red_parms wred_set; 62 }; 63 64 static inline int gred_wred_mode(struct gred_sched *table) 65 { 66 return test_bit(GRED_WRED_MODE, &table->flags); 67 } 68 69 static inline void gred_enable_wred_mode(struct gred_sched *table) 70 { 71 __set_bit(GRED_WRED_MODE, &table->flags); 72 } 73 74 static inline void gred_disable_wred_mode(struct gred_sched *table) 75 { 76 __clear_bit(GRED_WRED_MODE, &table->flags); 77 } 78 79 static inline int gred_rio_mode(struct gred_sched *table) 80 { 81 return test_bit(GRED_RIO_MODE, &table->flags); 82 } 83 84 static inline void gred_enable_rio_mode(struct gred_sched *table) 85 { 86 __set_bit(GRED_RIO_MODE, &table->flags); 87 } 88 89 static inline void gred_disable_rio_mode(struct gred_sched *table) 90 { 91 __clear_bit(GRED_RIO_MODE, &table->flags); 92 } 93 94 static inline int gred_wred_mode_check(struct Qdisc *sch) 95 { 96 struct gred_sched *table = qdisc_priv(sch); 97 int i; 98 99 /* Really ugly O(n^2) but shouldn't be necessary too frequent. */ 100 for (i = 0; i < table->DPs; i++) { 101 struct gred_sched_data *q = table->tab[i]; 102 int n; 103 104 if (q == NULL) 105 continue; 106 107 for (n = 0; n < table->DPs; n++) 108 if (table->tab[n] && table->tab[n] != q && 109 table->tab[n]->prio == q->prio) 110 return 1; 111 } 112 113 return 0; 114 } 115 116 static inline unsigned int gred_backlog(struct gred_sched *table, 117 struct gred_sched_data *q, 118 struct Qdisc *sch) 119 { 120 if (gred_wred_mode(table)) 121 return sch->qstats.backlog; 122 else 123 return q->backlog; 124 } 125 126 static inline u16 tc_index_to_dp(struct sk_buff *skb) 127 { 128 return skb->tc_index & GRED_VQ_MASK; 129 } 130 131 static inline void gred_load_wred_set(struct gred_sched *table, 132 struct gred_sched_data *q) 133 { 134 q->parms.qavg = table->wred_set.qavg; 135 q->parms.qidlestart = table->wred_set.qidlestart; 136 } 137 138 static inline void gred_store_wred_set(struct gred_sched *table, 139 struct gred_sched_data *q) 140 { 141 table->wred_set.qavg = q->parms.qavg; 142 } 143 144 static inline int gred_use_ecn(struct gred_sched *t) 145 { 146 return t->red_flags & TC_RED_ECN; 147 } 148 149 static inline int gred_use_harddrop(struct gred_sched *t) 150 { 151 return t->red_flags & TC_RED_HARDDROP; 152 } 153 154 static int gred_enqueue(struct sk_buff *skb, struct Qdisc* sch) 155 { 156 struct gred_sched_data *q=NULL; 157 struct gred_sched *t= qdisc_priv(sch); 158 unsigned long qavg = 0; 159 u16 dp = tc_index_to_dp(skb); 160 161 if (dp >= t->DPs || (q = t->tab[dp]) == NULL) { 162 dp = t->def; 163 164 if ((q = t->tab[dp]) == NULL) { 165 /* Pass through packets not assigned to a DP 166 * if no default DP has been configured. This 167 * allows for DP flows to be left untouched. 168 */ 169 if (skb_queue_len(&sch->q) < sch->dev->tx_queue_len) 170 return qdisc_enqueue_tail(skb, sch); 171 else 172 goto drop; 173 } 174 175 /* fix tc_index? --could be controvesial but needed for 176 requeueing */ 177 skb->tc_index = (skb->tc_index & ~GRED_VQ_MASK) | dp; 178 } 179 180 /* sum up all the qaves of prios <= to ours to get the new qave */ 181 if (!gred_wred_mode(t) && gred_rio_mode(t)) { 182 int i; 183 184 for (i = 0; i < t->DPs; i++) { 185 if (t->tab[i] && t->tab[i]->prio < q->prio && 186 !red_is_idling(&t->tab[i]->parms)) 187 qavg +=t->tab[i]->parms.qavg; 188 } 189 190 } 191 192 q->packetsin++; 193 q->bytesin += skb->len; 194 195 if (gred_wred_mode(t)) 196 gred_load_wred_set(t, q); 197 198 q->parms.qavg = red_calc_qavg(&q->parms, gred_backlog(t, q, sch)); 199 200 if (red_is_idling(&q->parms)) 201 red_end_of_idle_period(&q->parms); 202 203 if (gred_wred_mode(t)) 204 gred_store_wred_set(t, q); 205 206 switch (red_action(&q->parms, q->parms.qavg + qavg)) { 207 case RED_DONT_MARK: 208 break; 209 210 case RED_PROB_MARK: 211 sch->qstats.overlimits++; 212 if (!gred_use_ecn(t) || !INET_ECN_set_ce(skb)) { 213 q->stats.prob_drop++; 214 goto congestion_drop; 215 } 216 217 q->stats.prob_mark++; 218 break; 219 220 case RED_HARD_MARK: 221 sch->qstats.overlimits++; 222 if (gred_use_harddrop(t) || !gred_use_ecn(t) || 223 !INET_ECN_set_ce(skb)) { 224 q->stats.forced_drop++; 225 goto congestion_drop; 226 } 227 q->stats.forced_mark++; 228 break; 229 } 230 231 if (q->backlog + skb->len <= q->limit) { 232 q->backlog += skb->len; 233 return qdisc_enqueue_tail(skb, sch); 234 } 235 236 q->stats.pdrop++; 237 drop: 238 return qdisc_drop(skb, sch); 239 240 congestion_drop: 241 qdisc_drop(skb, sch); 242 return NET_XMIT_CN; 243 } 244 245 static int gred_requeue(struct sk_buff *skb, struct Qdisc* sch) 246 { 247 struct gred_sched *t = qdisc_priv(sch); 248 struct gred_sched_data *q; 249 u16 dp = tc_index_to_dp(skb); 250 251 if (dp >= t->DPs || (q = t->tab[dp]) == NULL) { 252 if (net_ratelimit()) 253 printk(KERN_WARNING "GRED: Unable to relocate VQ 0x%x " 254 "for requeue, screwing up backlog.\n", 255 tc_index_to_dp(skb)); 256 } else { 257 if (red_is_idling(&q->parms)) 258 red_end_of_idle_period(&q->parms); 259 q->backlog += skb->len; 260 } 261 262 return qdisc_requeue(skb, sch); 263 } 264 265 static struct sk_buff *gred_dequeue(struct Qdisc* sch) 266 { 267 struct sk_buff *skb; 268 struct gred_sched *t = qdisc_priv(sch); 269 270 skb = qdisc_dequeue_head(sch); 271 272 if (skb) { 273 struct gred_sched_data *q; 274 u16 dp = tc_index_to_dp(skb); 275 276 if (dp >= t->DPs || (q = t->tab[dp]) == NULL) { 277 if (net_ratelimit()) 278 printk(KERN_WARNING "GRED: Unable to relocate " 279 "VQ 0x%x after dequeue, screwing up " 280 "backlog.\n", tc_index_to_dp(skb)); 281 } else { 282 q->backlog -= skb->len; 283 284 if (!q->backlog && !gred_wred_mode(t)) 285 red_start_of_idle_period(&q->parms); 286 } 287 288 return skb; 289 } 290 291 if (gred_wred_mode(t) && !red_is_idling(&t->wred_set)) 292 red_start_of_idle_period(&t->wred_set); 293 294 return NULL; 295 } 296 297 static unsigned int gred_drop(struct Qdisc* sch) 298 { 299 struct sk_buff *skb; 300 struct gred_sched *t = qdisc_priv(sch); 301 302 skb = qdisc_dequeue_tail(sch); 303 if (skb) { 304 unsigned int len = skb->len; 305 struct gred_sched_data *q; 306 u16 dp = tc_index_to_dp(skb); 307 308 if (dp >= t->DPs || (q = t->tab[dp]) == NULL) { 309 if (net_ratelimit()) 310 printk(KERN_WARNING "GRED: Unable to relocate " 311 "VQ 0x%x while dropping, screwing up " 312 "backlog.\n", tc_index_to_dp(skb)); 313 } else { 314 q->backlog -= len; 315 q->stats.other++; 316 317 if (!q->backlog && !gred_wred_mode(t)) 318 red_start_of_idle_period(&q->parms); 319 } 320 321 qdisc_drop(skb, sch); 322 return len; 323 } 324 325 if (gred_wred_mode(t) && !red_is_idling(&t->wred_set)) 326 red_start_of_idle_period(&t->wred_set); 327 328 return 0; 329 330 } 331 332 static void gred_reset(struct Qdisc* sch) 333 { 334 int i; 335 struct gred_sched *t = qdisc_priv(sch); 336 337 qdisc_reset_queue(sch); 338 339 for (i = 0; i < t->DPs; i++) { 340 struct gred_sched_data *q = t->tab[i]; 341 342 if (!q) 343 continue; 344 345 red_restart(&q->parms); 346 q->backlog = 0; 347 } 348 } 349 350 static inline void gred_destroy_vq(struct gred_sched_data *q) 351 { 352 kfree(q); 353 } 354 355 static inline int gred_change_table_def(struct Qdisc *sch, struct rtattr *dps) 356 { 357 struct gred_sched *table = qdisc_priv(sch); 358 struct tc_gred_sopt *sopt; 359 int i; 360 361 if (dps == NULL || RTA_PAYLOAD(dps) < sizeof(*sopt)) 362 return -EINVAL; 363 364 sopt = RTA_DATA(dps); 365 366 if (sopt->DPs > MAX_DPs || sopt->DPs == 0 || sopt->def_DP >= sopt->DPs) 367 return -EINVAL; 368 369 sch_tree_lock(sch); 370 table->DPs = sopt->DPs; 371 table->def = sopt->def_DP; 372 table->red_flags = sopt->flags; 373 374 /* 375 * Every entry point to GRED is synchronized with the above code 376 * and the DP is checked against DPs, i.e. shadowed VQs can no 377 * longer be found so we can unlock right here. 378 */ 379 sch_tree_unlock(sch); 380 381 if (sopt->grio) { 382 gred_enable_rio_mode(table); 383 gred_disable_wred_mode(table); 384 if (gred_wred_mode_check(sch)) 385 gred_enable_wred_mode(table); 386 } else { 387 gred_disable_rio_mode(table); 388 gred_disable_wred_mode(table); 389 } 390 391 for (i = table->DPs; i < MAX_DPs; i++) { 392 if (table->tab[i]) { 393 printk(KERN_WARNING "GRED: Warning: Destroying " 394 "shadowed VQ 0x%x\n", i); 395 gred_destroy_vq(table->tab[i]); 396 table->tab[i] = NULL; 397 } 398 } 399 400 return 0; 401 } 402 403 static inline int gred_change_vq(struct Qdisc *sch, int dp, 404 struct tc_gred_qopt *ctl, int prio, u8 *stab) 405 { 406 struct gred_sched *table = qdisc_priv(sch); 407 struct gred_sched_data *q; 408 409 if (table->tab[dp] == NULL) { 410 table->tab[dp] = kmalloc(sizeof(*q), GFP_KERNEL); 411 if (table->tab[dp] == NULL) 412 return -ENOMEM; 413 memset(table->tab[dp], 0, sizeof(*q)); 414 } 415 416 q = table->tab[dp]; 417 q->DP = dp; 418 q->prio = prio; 419 q->limit = ctl->limit; 420 421 if (q->backlog == 0) 422 red_end_of_idle_period(&q->parms); 423 424 red_set_parms(&q->parms, 425 ctl->qth_min, ctl->qth_max, ctl->Wlog, ctl->Plog, 426 ctl->Scell_log, stab); 427 428 return 0; 429 } 430 431 static int gred_change(struct Qdisc *sch, struct rtattr *opt) 432 { 433 struct gred_sched *table = qdisc_priv(sch); 434 struct tc_gred_qopt *ctl; 435 struct rtattr *tb[TCA_GRED_MAX]; 436 int err = -EINVAL, prio = GRED_DEF_PRIO; 437 u8 *stab; 438 439 if (opt == NULL || rtattr_parse_nested(tb, TCA_GRED_MAX, opt)) 440 return -EINVAL; 441 442 if (tb[TCA_GRED_PARMS-1] == NULL && tb[TCA_GRED_STAB-1] == NULL) 443 return gred_change_table_def(sch, opt); 444 445 if (tb[TCA_GRED_PARMS-1] == NULL || 446 RTA_PAYLOAD(tb[TCA_GRED_PARMS-1]) < sizeof(*ctl) || 447 tb[TCA_GRED_STAB-1] == NULL || 448 RTA_PAYLOAD(tb[TCA_GRED_STAB-1]) < 256) 449 return -EINVAL; 450 451 ctl = RTA_DATA(tb[TCA_GRED_PARMS-1]); 452 stab = RTA_DATA(tb[TCA_GRED_STAB-1]); 453 454 if (ctl->DP >= table->DPs) 455 goto errout; 456 457 if (gred_rio_mode(table)) { 458 if (ctl->prio == 0) { 459 int def_prio = GRED_DEF_PRIO; 460 461 if (table->tab[table->def]) 462 def_prio = table->tab[table->def]->prio; 463 464 printk(KERN_DEBUG "GRED: DP %u does not have a prio " 465 "setting default to %d\n", ctl->DP, def_prio); 466 467 prio = def_prio; 468 } else 469 prio = ctl->prio; 470 } 471 472 sch_tree_lock(sch); 473 474 err = gred_change_vq(sch, ctl->DP, ctl, prio, stab); 475 if (err < 0) 476 goto errout_locked; 477 478 if (gred_rio_mode(table)) { 479 gred_disable_wred_mode(table); 480 if (gred_wred_mode_check(sch)) 481 gred_enable_wred_mode(table); 482 } 483 484 err = 0; 485 486 errout_locked: 487 sch_tree_unlock(sch); 488 errout: 489 return err; 490 } 491 492 static int gred_init(struct Qdisc *sch, struct rtattr *opt) 493 { 494 struct rtattr *tb[TCA_GRED_MAX]; 495 496 if (opt == NULL || rtattr_parse_nested(tb, TCA_GRED_MAX, opt)) 497 return -EINVAL; 498 499 if (tb[TCA_GRED_PARMS-1] || tb[TCA_GRED_STAB-1]) 500 return -EINVAL; 501 502 return gred_change_table_def(sch, tb[TCA_GRED_DPS-1]); 503 } 504 505 static int gred_dump(struct Qdisc *sch, struct sk_buff *skb) 506 { 507 struct gred_sched *table = qdisc_priv(sch); 508 struct rtattr *parms, *opts = NULL; 509 int i; 510 struct tc_gred_sopt sopt = { 511 .DPs = table->DPs, 512 .def_DP = table->def, 513 .grio = gred_rio_mode(table), 514 .flags = table->red_flags, 515 }; 516 517 opts = RTA_NEST(skb, TCA_OPTIONS); 518 RTA_PUT(skb, TCA_GRED_DPS, sizeof(sopt), &sopt); 519 parms = RTA_NEST(skb, TCA_GRED_PARMS); 520 521 for (i = 0; i < MAX_DPs; i++) { 522 struct gred_sched_data *q = table->tab[i]; 523 struct tc_gred_qopt opt; 524 525 memset(&opt, 0, sizeof(opt)); 526 527 if (!q) { 528 /* hack -- fix at some point with proper message 529 This is how we indicate to tc that there is no VQ 530 at this DP */ 531 532 opt.DP = MAX_DPs + i; 533 goto append_opt; 534 } 535 536 opt.limit = q->limit; 537 opt.DP = q->DP; 538 opt.backlog = q->backlog; 539 opt.prio = q->prio; 540 opt.qth_min = q->parms.qth_min >> q->parms.Wlog; 541 opt.qth_max = q->parms.qth_max >> q->parms.Wlog; 542 opt.Wlog = q->parms.Wlog; 543 opt.Plog = q->parms.Plog; 544 opt.Scell_log = q->parms.Scell_log; 545 opt.other = q->stats.other; 546 opt.early = q->stats.prob_drop; 547 opt.forced = q->stats.forced_drop; 548 opt.pdrop = q->stats.pdrop; 549 opt.packets = q->packetsin; 550 opt.bytesin = q->bytesin; 551 552 if (gred_wred_mode(table)) { 553 q->parms.qidlestart = 554 table->tab[table->def]->parms.qidlestart; 555 q->parms.qavg = table->tab[table->def]->parms.qavg; 556 } 557 558 opt.qave = red_calc_qavg(&q->parms, q->parms.qavg); 559 560 append_opt: 561 RTA_APPEND(skb, sizeof(opt), &opt); 562 } 563 564 RTA_NEST_END(skb, parms); 565 566 return RTA_NEST_END(skb, opts); 567 568 rtattr_failure: 569 return RTA_NEST_CANCEL(skb, opts); 570 } 571 572 static void gred_destroy(struct Qdisc *sch) 573 { 574 struct gred_sched *table = qdisc_priv(sch); 575 int i; 576 577 for (i = 0; i < table->DPs; i++) { 578 if (table->tab[i]) 579 gred_destroy_vq(table->tab[i]); 580 } 581 } 582 583 static struct Qdisc_ops gred_qdisc_ops = { 584 .id = "gred", 585 .priv_size = sizeof(struct gred_sched), 586 .enqueue = gred_enqueue, 587 .dequeue = gred_dequeue, 588 .requeue = gred_requeue, 589 .drop = gred_drop, 590 .init = gred_init, 591 .reset = gred_reset, 592 .destroy = gred_destroy, 593 .change = gred_change, 594 .dump = gred_dump, 595 .owner = THIS_MODULE, 596 }; 597 598 static int __init gred_module_init(void) 599 { 600 return register_qdisc(&gred_qdisc_ops); 601 } 602 603 static void __exit gred_module_exit(void) 604 { 605 unregister_qdisc(&gred_qdisc_ops); 606 } 607 608 module_init(gred_module_init) 609 module_exit(gred_module_exit) 610 611 MODULE_LICENSE("GPL"); 612