1 /* 2 * net/sched/sch_netem.c Network emulator 3 * 4 * This program is free software; you can redistribute it and/or 5 * modify it under the terms of the GNU General Public License 6 * as published by the Free Software Foundation; either version 7 * 2 of the License. 8 * 9 * Many of the algorithms and ideas for this came from 10 * NIST Net which is not copyrighted. 11 * 12 * Authors: Stephen Hemminger <shemminger@osdl.org> 13 * Catalin(ux aka Dino) BOIE <catab at umbrella dot ro> 14 */ 15 16 #include <linux/mm.h> 17 #include <linux/module.h> 18 #include <linux/slab.h> 19 #include <linux/types.h> 20 #include <linux/kernel.h> 21 #include <linux/errno.h> 22 #include <linux/skbuff.h> 23 #include <linux/vmalloc.h> 24 #include <linux/rtnetlink.h> 25 #include <linux/reciprocal_div.h> 26 27 #include <net/netlink.h> 28 #include <net/pkt_sched.h> 29 30 #define VERSION "1.3" 31 32 /* Network Emulation Queuing algorithm. 33 ==================================== 34 35 Sources: [1] Mark Carson, Darrin Santay, "NIST Net - A Linux-based 36 Network Emulation Tool 37 [2] Luigi Rizzo, DummyNet for FreeBSD 38 39 ---------------------------------------------------------------- 40 41 This started out as a simple way to delay outgoing packets to 42 test TCP but has grown to include most of the functionality 43 of a full blown network emulator like NISTnet. It can delay 44 packets and add random jitter (and correlation). The random 45 distribution can be loaded from a table as well to provide 46 normal, Pareto, or experimental curves. Packet loss, 47 duplication, and reordering can also be emulated. 48 49 This qdisc does not do classification that can be handled in 50 layering other disciplines. It does not need to do bandwidth 51 control either since that can be handled by using token 52 bucket or other rate control. 53 54 Correlated Loss Generator models 55 56 Added generation of correlated loss according to the 57 "Gilbert-Elliot" model, a 4-state markov model. 58 59 References: 60 [1] NetemCLG Home http://netgroup.uniroma2.it/NetemCLG 61 [2] S. Salsano, F. Ludovici, A. Ordine, "Definition of a general 62 and intuitive loss model for packet networks and its implementation 63 in the Netem module in the Linux kernel", available in [1] 64 65 Authors: Stefano Salsano <stefano.salsano at uniroma2.it 66 Fabio Ludovici <fabio.ludovici at yahoo.it> 67 */ 68 69 struct netem_sched_data { 70 struct Qdisc *qdisc; 71 struct qdisc_watchdog watchdog; 72 73 psched_tdiff_t latency; 74 psched_tdiff_t jitter; 75 76 u32 loss; 77 u32 limit; 78 u32 counter; 79 u32 gap; 80 u32 duplicate; 81 u32 reorder; 82 u32 corrupt; 83 u32 rate; 84 s32 packet_overhead; 85 u32 cell_size; 86 u32 cell_size_reciprocal; 87 s32 cell_overhead; 88 89 struct crndstate { 90 u32 last; 91 u32 rho; 92 } delay_cor, loss_cor, dup_cor, reorder_cor, corrupt_cor; 93 94 struct disttable { 95 u32 size; 96 s16 table[0]; 97 } *delay_dist; 98 99 enum { 100 CLG_RANDOM, 101 CLG_4_STATES, 102 CLG_GILB_ELL, 103 } loss_model; 104 105 /* Correlated Loss Generation models */ 106 struct clgstate { 107 /* state of the Markov chain */ 108 u8 state; 109 110 /* 4-states and Gilbert-Elliot models */ 111 u32 a1; /* p13 for 4-states or p for GE */ 112 u32 a2; /* p31 for 4-states or r for GE */ 113 u32 a3; /* p32 for 4-states or h for GE */ 114 u32 a4; /* p14 for 4-states or 1-k for GE */ 115 u32 a5; /* p23 used only in 4-states */ 116 } clg; 117 118 }; 119 120 /* Time stamp put into socket buffer control block */ 121 struct netem_skb_cb { 122 psched_time_t time_to_send; 123 }; 124 125 static inline struct netem_skb_cb *netem_skb_cb(struct sk_buff *skb) 126 { 127 BUILD_BUG_ON(sizeof(skb->cb) < 128 sizeof(struct qdisc_skb_cb) + sizeof(struct netem_skb_cb)); 129 return (struct netem_skb_cb *)qdisc_skb_cb(skb)->data; 130 } 131 132 /* init_crandom - initialize correlated random number generator 133 * Use entropy source for initial seed. 134 */ 135 static void init_crandom(struct crndstate *state, unsigned long rho) 136 { 137 state->rho = rho; 138 state->last = net_random(); 139 } 140 141 /* get_crandom - correlated random number generator 142 * Next number depends on last value. 143 * rho is scaled to avoid floating point. 144 */ 145 static u32 get_crandom(struct crndstate *state) 146 { 147 u64 value, rho; 148 unsigned long answer; 149 150 if (state->rho == 0) /* no correlation */ 151 return net_random(); 152 153 value = net_random(); 154 rho = (u64)state->rho + 1; 155 answer = (value * ((1ull<<32) - rho) + state->last * rho) >> 32; 156 state->last = answer; 157 return answer; 158 } 159 160 /* loss_4state - 4-state model loss generator 161 * Generates losses according to the 4-state Markov chain adopted in 162 * the GI (General and Intuitive) loss model. 163 */ 164 static bool loss_4state(struct netem_sched_data *q) 165 { 166 struct clgstate *clg = &q->clg; 167 u32 rnd = net_random(); 168 169 /* 170 * Makes a comparison between rnd and the transition 171 * probabilities outgoing from the current state, then decides the 172 * next state and if the next packet has to be transmitted or lost. 173 * The four states correspond to: 174 * 1 => successfully transmitted packets within a gap period 175 * 4 => isolated losses within a gap period 176 * 3 => lost packets within a burst period 177 * 2 => successfully transmitted packets within a burst period 178 */ 179 switch (clg->state) { 180 case 1: 181 if (rnd < clg->a4) { 182 clg->state = 4; 183 return true; 184 } else if (clg->a4 < rnd && rnd < clg->a1) { 185 clg->state = 3; 186 return true; 187 } else if (clg->a1 < rnd) 188 clg->state = 1; 189 190 break; 191 case 2: 192 if (rnd < clg->a5) { 193 clg->state = 3; 194 return true; 195 } else 196 clg->state = 2; 197 198 break; 199 case 3: 200 if (rnd < clg->a3) 201 clg->state = 2; 202 else if (clg->a3 < rnd && rnd < clg->a2 + clg->a3) { 203 clg->state = 1; 204 return true; 205 } else if (clg->a2 + clg->a3 < rnd) { 206 clg->state = 3; 207 return true; 208 } 209 break; 210 case 4: 211 clg->state = 1; 212 break; 213 } 214 215 return false; 216 } 217 218 /* loss_gilb_ell - Gilbert-Elliot model loss generator 219 * Generates losses according to the Gilbert-Elliot loss model or 220 * its special cases (Gilbert or Simple Gilbert) 221 * 222 * Makes a comparison between random number and the transition 223 * probabilities outgoing from the current state, then decides the 224 * next state. A second random number is extracted and the comparison 225 * with the loss probability of the current state decides if the next 226 * packet will be transmitted or lost. 227 */ 228 static bool loss_gilb_ell(struct netem_sched_data *q) 229 { 230 struct clgstate *clg = &q->clg; 231 232 switch (clg->state) { 233 case 1: 234 if (net_random() < clg->a1) 235 clg->state = 2; 236 if (net_random() < clg->a4) 237 return true; 238 case 2: 239 if (net_random() < clg->a2) 240 clg->state = 1; 241 if (clg->a3 > net_random()) 242 return true; 243 } 244 245 return false; 246 } 247 248 static bool loss_event(struct netem_sched_data *q) 249 { 250 switch (q->loss_model) { 251 case CLG_RANDOM: 252 /* Random packet drop 0 => none, ~0 => all */ 253 return q->loss && q->loss >= get_crandom(&q->loss_cor); 254 255 case CLG_4_STATES: 256 /* 4state loss model algorithm (used also for GI model) 257 * Extracts a value from the markov 4 state loss generator, 258 * if it is 1 drops a packet and if needed writes the event in 259 * the kernel logs 260 */ 261 return loss_4state(q); 262 263 case CLG_GILB_ELL: 264 /* Gilbert-Elliot loss model algorithm 265 * Extracts a value from the Gilbert-Elliot loss generator, 266 * if it is 1 drops a packet and if needed writes the event in 267 * the kernel logs 268 */ 269 return loss_gilb_ell(q); 270 } 271 272 return false; /* not reached */ 273 } 274 275 276 /* tabledist - return a pseudo-randomly distributed value with mean mu and 277 * std deviation sigma. Uses table lookup to approximate the desired 278 * distribution, and a uniformly-distributed pseudo-random source. 279 */ 280 static psched_tdiff_t tabledist(psched_tdiff_t mu, psched_tdiff_t sigma, 281 struct crndstate *state, 282 const struct disttable *dist) 283 { 284 psched_tdiff_t x; 285 long t; 286 u32 rnd; 287 288 if (sigma == 0) 289 return mu; 290 291 rnd = get_crandom(state); 292 293 /* default uniform distribution */ 294 if (dist == NULL) 295 return (rnd % (2*sigma)) - sigma + mu; 296 297 t = dist->table[rnd % dist->size]; 298 x = (sigma % NETEM_DIST_SCALE) * t; 299 if (x >= 0) 300 x += NETEM_DIST_SCALE/2; 301 else 302 x -= NETEM_DIST_SCALE/2; 303 304 return x / NETEM_DIST_SCALE + (sigma / NETEM_DIST_SCALE) * t + mu; 305 } 306 307 static psched_time_t packet_len_2_sched_time(unsigned int len, struct netem_sched_data *q) 308 { 309 u64 ticks; 310 311 len += q->packet_overhead; 312 313 if (q->cell_size) { 314 u32 cells = reciprocal_divide(len, q->cell_size_reciprocal); 315 316 if (len > cells * q->cell_size) /* extra cell needed for remainder */ 317 cells++; 318 len = cells * (q->cell_size + q->cell_overhead); 319 } 320 321 ticks = (u64)len * NSEC_PER_SEC; 322 323 do_div(ticks, q->rate); 324 return PSCHED_NS2TICKS(ticks); 325 } 326 327 /* 328 * Insert one skb into qdisc. 329 * Note: parent depends on return value to account for queue length. 330 * NET_XMIT_DROP: queue length didn't change. 331 * NET_XMIT_SUCCESS: one skb was queued. 332 */ 333 static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch) 334 { 335 struct netem_sched_data *q = qdisc_priv(sch); 336 /* We don't fill cb now as skb_unshare() may invalidate it */ 337 struct netem_skb_cb *cb; 338 struct sk_buff *skb2; 339 int ret; 340 int count = 1; 341 342 /* Random duplication */ 343 if (q->duplicate && q->duplicate >= get_crandom(&q->dup_cor)) 344 ++count; 345 346 /* Drop packet? */ 347 if (loss_event(q)) 348 --count; 349 350 if (count == 0) { 351 sch->qstats.drops++; 352 kfree_skb(skb); 353 return NET_XMIT_SUCCESS | __NET_XMIT_BYPASS; 354 } 355 356 skb_orphan(skb); 357 358 /* 359 * If we need to duplicate packet, then re-insert at top of the 360 * qdisc tree, since parent queuer expects that only one 361 * skb will be queued. 362 */ 363 if (count > 1 && (skb2 = skb_clone(skb, GFP_ATOMIC)) != NULL) { 364 struct Qdisc *rootq = qdisc_root(sch); 365 u32 dupsave = q->duplicate; /* prevent duplicating a dup... */ 366 q->duplicate = 0; 367 368 qdisc_enqueue_root(skb2, rootq); 369 q->duplicate = dupsave; 370 } 371 372 /* 373 * Randomized packet corruption. 374 * Make copy if needed since we are modifying 375 * If packet is going to be hardware checksummed, then 376 * do it now in software before we mangle it. 377 */ 378 if (q->corrupt && q->corrupt >= get_crandom(&q->corrupt_cor)) { 379 if (!(skb = skb_unshare(skb, GFP_ATOMIC)) || 380 (skb->ip_summed == CHECKSUM_PARTIAL && 381 skb_checksum_help(skb))) { 382 sch->qstats.drops++; 383 return NET_XMIT_DROP; 384 } 385 386 skb->data[net_random() % skb_headlen(skb)] ^= 1<<(net_random() % 8); 387 } 388 389 cb = netem_skb_cb(skb); 390 if (q->gap == 0 || /* not doing reordering */ 391 q->counter < q->gap || /* inside last reordering gap */ 392 q->reorder < get_crandom(&q->reorder_cor)) { 393 psched_time_t now; 394 psched_tdiff_t delay; 395 396 delay = tabledist(q->latency, q->jitter, 397 &q->delay_cor, q->delay_dist); 398 399 now = psched_get_time(); 400 401 if (q->rate) { 402 struct sk_buff_head *list = &q->qdisc->q; 403 404 delay += packet_len_2_sched_time(skb->len, q); 405 406 if (!skb_queue_empty(list)) { 407 /* 408 * Last packet in queue is reference point (now). 409 * First packet in queue is already in flight, 410 * calculate this time bonus and substract 411 * from delay. 412 */ 413 delay -= now - netem_skb_cb(skb_peek(list))->time_to_send; 414 now = netem_skb_cb(skb_peek_tail(list))->time_to_send; 415 } 416 } 417 418 cb->time_to_send = now + delay; 419 ++q->counter; 420 ret = qdisc_enqueue(skb, q->qdisc); 421 } else { 422 /* 423 * Do re-ordering by putting one out of N packets at the front 424 * of the queue. 425 */ 426 cb->time_to_send = psched_get_time(); 427 q->counter = 0; 428 429 __skb_queue_head(&q->qdisc->q, skb); 430 q->qdisc->qstats.backlog += qdisc_pkt_len(skb); 431 q->qdisc->qstats.requeues++; 432 ret = NET_XMIT_SUCCESS; 433 } 434 435 if (ret != NET_XMIT_SUCCESS) { 436 if (net_xmit_drop_count(ret)) { 437 sch->qstats.drops++; 438 return ret; 439 } 440 } 441 442 sch->q.qlen++; 443 return NET_XMIT_SUCCESS; 444 } 445 446 static unsigned int netem_drop(struct Qdisc *sch) 447 { 448 struct netem_sched_data *q = qdisc_priv(sch); 449 unsigned int len = 0; 450 451 if (q->qdisc->ops->drop && (len = q->qdisc->ops->drop(q->qdisc)) != 0) { 452 sch->q.qlen--; 453 sch->qstats.drops++; 454 } 455 return len; 456 } 457 458 static struct sk_buff *netem_dequeue(struct Qdisc *sch) 459 { 460 struct netem_sched_data *q = qdisc_priv(sch); 461 struct sk_buff *skb; 462 463 if (qdisc_is_throttled(sch)) 464 return NULL; 465 466 skb = q->qdisc->ops->peek(q->qdisc); 467 if (skb) { 468 const struct netem_skb_cb *cb = netem_skb_cb(skb); 469 psched_time_t now = psched_get_time(); 470 471 /* if more time remaining? */ 472 if (cb->time_to_send <= now) { 473 skb = qdisc_dequeue_peeked(q->qdisc); 474 if (unlikely(!skb)) 475 return NULL; 476 477 #ifdef CONFIG_NET_CLS_ACT 478 /* 479 * If it's at ingress let's pretend the delay is 480 * from the network (tstamp will be updated). 481 */ 482 if (G_TC_FROM(skb->tc_verd) & AT_INGRESS) 483 skb->tstamp.tv64 = 0; 484 #endif 485 486 sch->q.qlen--; 487 qdisc_unthrottled(sch); 488 qdisc_bstats_update(sch, skb); 489 return skb; 490 } 491 492 qdisc_watchdog_schedule(&q->watchdog, cb->time_to_send); 493 } 494 495 return NULL; 496 } 497 498 static void netem_reset(struct Qdisc *sch) 499 { 500 struct netem_sched_data *q = qdisc_priv(sch); 501 502 qdisc_reset(q->qdisc); 503 sch->q.qlen = 0; 504 qdisc_watchdog_cancel(&q->watchdog); 505 } 506 507 static void dist_free(struct disttable *d) 508 { 509 if (d) { 510 if (is_vmalloc_addr(d)) 511 vfree(d); 512 else 513 kfree(d); 514 } 515 } 516 517 /* 518 * Distribution data is a variable size payload containing 519 * signed 16 bit values. 520 */ 521 static int get_dist_table(struct Qdisc *sch, const struct nlattr *attr) 522 { 523 struct netem_sched_data *q = qdisc_priv(sch); 524 size_t n = nla_len(attr)/sizeof(__s16); 525 const __s16 *data = nla_data(attr); 526 spinlock_t *root_lock; 527 struct disttable *d; 528 int i; 529 size_t s; 530 531 if (n > NETEM_DIST_MAX) 532 return -EINVAL; 533 534 s = sizeof(struct disttable) + n * sizeof(s16); 535 d = kmalloc(s, GFP_KERNEL); 536 if (!d) 537 d = vmalloc(s); 538 if (!d) 539 return -ENOMEM; 540 541 d->size = n; 542 for (i = 0; i < n; i++) 543 d->table[i] = data[i]; 544 545 root_lock = qdisc_root_sleeping_lock(sch); 546 547 spin_lock_bh(root_lock); 548 dist_free(q->delay_dist); 549 q->delay_dist = d; 550 spin_unlock_bh(root_lock); 551 return 0; 552 } 553 554 static void get_correlation(struct Qdisc *sch, const struct nlattr *attr) 555 { 556 struct netem_sched_data *q = qdisc_priv(sch); 557 const struct tc_netem_corr *c = nla_data(attr); 558 559 init_crandom(&q->delay_cor, c->delay_corr); 560 init_crandom(&q->loss_cor, c->loss_corr); 561 init_crandom(&q->dup_cor, c->dup_corr); 562 } 563 564 static void get_reorder(struct Qdisc *sch, const struct nlattr *attr) 565 { 566 struct netem_sched_data *q = qdisc_priv(sch); 567 const struct tc_netem_reorder *r = nla_data(attr); 568 569 q->reorder = r->probability; 570 init_crandom(&q->reorder_cor, r->correlation); 571 } 572 573 static void get_corrupt(struct Qdisc *sch, const struct nlattr *attr) 574 { 575 struct netem_sched_data *q = qdisc_priv(sch); 576 const struct tc_netem_corrupt *r = nla_data(attr); 577 578 q->corrupt = r->probability; 579 init_crandom(&q->corrupt_cor, r->correlation); 580 } 581 582 static void get_rate(struct Qdisc *sch, const struct nlattr *attr) 583 { 584 struct netem_sched_data *q = qdisc_priv(sch); 585 const struct tc_netem_rate *r = nla_data(attr); 586 587 q->rate = r->rate; 588 q->packet_overhead = r->packet_overhead; 589 q->cell_size = r->cell_size; 590 if (q->cell_size) 591 q->cell_size_reciprocal = reciprocal_value(q->cell_size); 592 q->cell_overhead = r->cell_overhead; 593 } 594 595 static int get_loss_clg(struct Qdisc *sch, const struct nlattr *attr) 596 { 597 struct netem_sched_data *q = qdisc_priv(sch); 598 const struct nlattr *la; 599 int rem; 600 601 nla_for_each_nested(la, attr, rem) { 602 u16 type = nla_type(la); 603 604 switch(type) { 605 case NETEM_LOSS_GI: { 606 const struct tc_netem_gimodel *gi = nla_data(la); 607 608 if (nla_len(la) != sizeof(struct tc_netem_gimodel)) { 609 pr_info("netem: incorrect gi model size\n"); 610 return -EINVAL; 611 } 612 613 q->loss_model = CLG_4_STATES; 614 615 q->clg.state = 1; 616 q->clg.a1 = gi->p13; 617 q->clg.a2 = gi->p31; 618 q->clg.a3 = gi->p32; 619 q->clg.a4 = gi->p14; 620 q->clg.a5 = gi->p23; 621 break; 622 } 623 624 case NETEM_LOSS_GE: { 625 const struct tc_netem_gemodel *ge = nla_data(la); 626 627 if (nla_len(la) != sizeof(struct tc_netem_gemodel)) { 628 pr_info("netem: incorrect gi model size\n"); 629 return -EINVAL; 630 } 631 632 q->loss_model = CLG_GILB_ELL; 633 q->clg.state = 1; 634 q->clg.a1 = ge->p; 635 q->clg.a2 = ge->r; 636 q->clg.a3 = ge->h; 637 q->clg.a4 = ge->k1; 638 break; 639 } 640 641 default: 642 pr_info("netem: unknown loss type %u\n", type); 643 return -EINVAL; 644 } 645 } 646 647 return 0; 648 } 649 650 static const struct nla_policy netem_policy[TCA_NETEM_MAX + 1] = { 651 [TCA_NETEM_CORR] = { .len = sizeof(struct tc_netem_corr) }, 652 [TCA_NETEM_REORDER] = { .len = sizeof(struct tc_netem_reorder) }, 653 [TCA_NETEM_CORRUPT] = { .len = sizeof(struct tc_netem_corrupt) }, 654 [TCA_NETEM_RATE] = { .len = sizeof(struct tc_netem_rate) }, 655 [TCA_NETEM_LOSS] = { .type = NLA_NESTED }, 656 }; 657 658 static int parse_attr(struct nlattr *tb[], int maxtype, struct nlattr *nla, 659 const struct nla_policy *policy, int len) 660 { 661 int nested_len = nla_len(nla) - NLA_ALIGN(len); 662 663 if (nested_len < 0) { 664 pr_info("netem: invalid attributes len %d\n", nested_len); 665 return -EINVAL; 666 } 667 668 if (nested_len >= nla_attr_size(0)) 669 return nla_parse(tb, maxtype, nla_data(nla) + NLA_ALIGN(len), 670 nested_len, policy); 671 672 memset(tb, 0, sizeof(struct nlattr *) * (maxtype + 1)); 673 return 0; 674 } 675 676 /* Parse netlink message to set options */ 677 static int netem_change(struct Qdisc *sch, struct nlattr *opt) 678 { 679 struct netem_sched_data *q = qdisc_priv(sch); 680 struct nlattr *tb[TCA_NETEM_MAX + 1]; 681 struct tc_netem_qopt *qopt; 682 int ret; 683 684 if (opt == NULL) 685 return -EINVAL; 686 687 qopt = nla_data(opt); 688 ret = parse_attr(tb, TCA_NETEM_MAX, opt, netem_policy, sizeof(*qopt)); 689 if (ret < 0) 690 return ret; 691 692 ret = fifo_set_limit(q->qdisc, qopt->limit); 693 if (ret) { 694 pr_info("netem: can't set fifo limit\n"); 695 return ret; 696 } 697 698 q->latency = qopt->latency; 699 q->jitter = qopt->jitter; 700 q->limit = qopt->limit; 701 q->gap = qopt->gap; 702 q->counter = 0; 703 q->loss = qopt->loss; 704 q->duplicate = qopt->duplicate; 705 706 /* for compatibility with earlier versions. 707 * if gap is set, need to assume 100% probability 708 */ 709 if (q->gap) 710 q->reorder = ~0; 711 712 if (tb[TCA_NETEM_CORR]) 713 get_correlation(sch, tb[TCA_NETEM_CORR]); 714 715 if (tb[TCA_NETEM_DELAY_DIST]) { 716 ret = get_dist_table(sch, tb[TCA_NETEM_DELAY_DIST]); 717 if (ret) 718 return ret; 719 } 720 721 if (tb[TCA_NETEM_REORDER]) 722 get_reorder(sch, tb[TCA_NETEM_REORDER]); 723 724 if (tb[TCA_NETEM_CORRUPT]) 725 get_corrupt(sch, tb[TCA_NETEM_CORRUPT]); 726 727 if (tb[TCA_NETEM_RATE]) 728 get_rate(sch, tb[TCA_NETEM_RATE]); 729 730 q->loss_model = CLG_RANDOM; 731 if (tb[TCA_NETEM_LOSS]) 732 ret = get_loss_clg(sch, tb[TCA_NETEM_LOSS]); 733 734 return ret; 735 } 736 737 /* 738 * Special case version of FIFO queue for use by netem. 739 * It queues in order based on timestamps in skb's 740 */ 741 struct fifo_sched_data { 742 u32 limit; 743 psched_time_t oldest; 744 }; 745 746 static int tfifo_enqueue(struct sk_buff *nskb, struct Qdisc *sch) 747 { 748 struct fifo_sched_data *q = qdisc_priv(sch); 749 struct sk_buff_head *list = &sch->q; 750 psched_time_t tnext = netem_skb_cb(nskb)->time_to_send; 751 struct sk_buff *skb; 752 753 if (likely(skb_queue_len(list) < q->limit)) { 754 /* Optimize for add at tail */ 755 if (likely(skb_queue_empty(list) || tnext >= q->oldest)) { 756 q->oldest = tnext; 757 return qdisc_enqueue_tail(nskb, sch); 758 } 759 760 skb_queue_reverse_walk(list, skb) { 761 const struct netem_skb_cb *cb = netem_skb_cb(skb); 762 763 if (tnext >= cb->time_to_send) 764 break; 765 } 766 767 __skb_queue_after(list, skb, nskb); 768 769 sch->qstats.backlog += qdisc_pkt_len(nskb); 770 771 return NET_XMIT_SUCCESS; 772 } 773 774 return qdisc_reshape_fail(nskb, sch); 775 } 776 777 static int tfifo_init(struct Qdisc *sch, struct nlattr *opt) 778 { 779 struct fifo_sched_data *q = qdisc_priv(sch); 780 781 if (opt) { 782 struct tc_fifo_qopt *ctl = nla_data(opt); 783 if (nla_len(opt) < sizeof(*ctl)) 784 return -EINVAL; 785 786 q->limit = ctl->limit; 787 } else 788 q->limit = max_t(u32, qdisc_dev(sch)->tx_queue_len, 1); 789 790 q->oldest = PSCHED_PASTPERFECT; 791 return 0; 792 } 793 794 static int tfifo_dump(struct Qdisc *sch, struct sk_buff *skb) 795 { 796 struct fifo_sched_data *q = qdisc_priv(sch); 797 struct tc_fifo_qopt opt = { .limit = q->limit }; 798 799 NLA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt); 800 return skb->len; 801 802 nla_put_failure: 803 return -1; 804 } 805 806 static struct Qdisc_ops tfifo_qdisc_ops __read_mostly = { 807 .id = "tfifo", 808 .priv_size = sizeof(struct fifo_sched_data), 809 .enqueue = tfifo_enqueue, 810 .dequeue = qdisc_dequeue_head, 811 .peek = qdisc_peek_head, 812 .drop = qdisc_queue_drop, 813 .init = tfifo_init, 814 .reset = qdisc_reset_queue, 815 .change = tfifo_init, 816 .dump = tfifo_dump, 817 }; 818 819 static int netem_init(struct Qdisc *sch, struct nlattr *opt) 820 { 821 struct netem_sched_data *q = qdisc_priv(sch); 822 int ret; 823 824 if (!opt) 825 return -EINVAL; 826 827 qdisc_watchdog_init(&q->watchdog, sch); 828 829 q->loss_model = CLG_RANDOM; 830 q->qdisc = qdisc_create_dflt(sch->dev_queue, &tfifo_qdisc_ops, 831 TC_H_MAKE(sch->handle, 1)); 832 if (!q->qdisc) { 833 pr_notice("netem: qdisc create tfifo qdisc failed\n"); 834 return -ENOMEM; 835 } 836 837 ret = netem_change(sch, opt); 838 if (ret) { 839 pr_info("netem: change failed\n"); 840 qdisc_destroy(q->qdisc); 841 } 842 return ret; 843 } 844 845 static void netem_destroy(struct Qdisc *sch) 846 { 847 struct netem_sched_data *q = qdisc_priv(sch); 848 849 qdisc_watchdog_cancel(&q->watchdog); 850 qdisc_destroy(q->qdisc); 851 dist_free(q->delay_dist); 852 } 853 854 static int dump_loss_model(const struct netem_sched_data *q, 855 struct sk_buff *skb) 856 { 857 struct nlattr *nest; 858 859 nest = nla_nest_start(skb, TCA_NETEM_LOSS); 860 if (nest == NULL) 861 goto nla_put_failure; 862 863 switch (q->loss_model) { 864 case CLG_RANDOM: 865 /* legacy loss model */ 866 nla_nest_cancel(skb, nest); 867 return 0; /* no data */ 868 869 case CLG_4_STATES: { 870 struct tc_netem_gimodel gi = { 871 .p13 = q->clg.a1, 872 .p31 = q->clg.a2, 873 .p32 = q->clg.a3, 874 .p14 = q->clg.a4, 875 .p23 = q->clg.a5, 876 }; 877 878 NLA_PUT(skb, NETEM_LOSS_GI, sizeof(gi), &gi); 879 break; 880 } 881 case CLG_GILB_ELL: { 882 struct tc_netem_gemodel ge = { 883 .p = q->clg.a1, 884 .r = q->clg.a2, 885 .h = q->clg.a3, 886 .k1 = q->clg.a4, 887 }; 888 889 NLA_PUT(skb, NETEM_LOSS_GE, sizeof(ge), &ge); 890 break; 891 } 892 } 893 894 nla_nest_end(skb, nest); 895 return 0; 896 897 nla_put_failure: 898 nla_nest_cancel(skb, nest); 899 return -1; 900 } 901 902 static int netem_dump(struct Qdisc *sch, struct sk_buff *skb) 903 { 904 const struct netem_sched_data *q = qdisc_priv(sch); 905 struct nlattr *nla = (struct nlattr *) skb_tail_pointer(skb); 906 struct tc_netem_qopt qopt; 907 struct tc_netem_corr cor; 908 struct tc_netem_reorder reorder; 909 struct tc_netem_corrupt corrupt; 910 struct tc_netem_rate rate; 911 912 qopt.latency = q->latency; 913 qopt.jitter = q->jitter; 914 qopt.limit = q->limit; 915 qopt.loss = q->loss; 916 qopt.gap = q->gap; 917 qopt.duplicate = q->duplicate; 918 NLA_PUT(skb, TCA_OPTIONS, sizeof(qopt), &qopt); 919 920 cor.delay_corr = q->delay_cor.rho; 921 cor.loss_corr = q->loss_cor.rho; 922 cor.dup_corr = q->dup_cor.rho; 923 NLA_PUT(skb, TCA_NETEM_CORR, sizeof(cor), &cor); 924 925 reorder.probability = q->reorder; 926 reorder.correlation = q->reorder_cor.rho; 927 NLA_PUT(skb, TCA_NETEM_REORDER, sizeof(reorder), &reorder); 928 929 corrupt.probability = q->corrupt; 930 corrupt.correlation = q->corrupt_cor.rho; 931 NLA_PUT(skb, TCA_NETEM_CORRUPT, sizeof(corrupt), &corrupt); 932 933 rate.rate = q->rate; 934 rate.packet_overhead = q->packet_overhead; 935 rate.cell_size = q->cell_size; 936 rate.cell_overhead = q->cell_overhead; 937 NLA_PUT(skb, TCA_NETEM_RATE, sizeof(rate), &rate); 938 939 if (dump_loss_model(q, skb) != 0) 940 goto nla_put_failure; 941 942 return nla_nest_end(skb, nla); 943 944 nla_put_failure: 945 nlmsg_trim(skb, nla); 946 return -1; 947 } 948 949 static int netem_dump_class(struct Qdisc *sch, unsigned long cl, 950 struct sk_buff *skb, struct tcmsg *tcm) 951 { 952 struct netem_sched_data *q = qdisc_priv(sch); 953 954 if (cl != 1) /* only one class */ 955 return -ENOENT; 956 957 tcm->tcm_handle |= TC_H_MIN(1); 958 tcm->tcm_info = q->qdisc->handle; 959 960 return 0; 961 } 962 963 static int netem_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, 964 struct Qdisc **old) 965 { 966 struct netem_sched_data *q = qdisc_priv(sch); 967 968 if (new == NULL) 969 new = &noop_qdisc; 970 971 sch_tree_lock(sch); 972 *old = q->qdisc; 973 q->qdisc = new; 974 qdisc_tree_decrease_qlen(*old, (*old)->q.qlen); 975 qdisc_reset(*old); 976 sch_tree_unlock(sch); 977 978 return 0; 979 } 980 981 static struct Qdisc *netem_leaf(struct Qdisc *sch, unsigned long arg) 982 { 983 struct netem_sched_data *q = qdisc_priv(sch); 984 return q->qdisc; 985 } 986 987 static unsigned long netem_get(struct Qdisc *sch, u32 classid) 988 { 989 return 1; 990 } 991 992 static void netem_put(struct Qdisc *sch, unsigned long arg) 993 { 994 } 995 996 static void netem_walk(struct Qdisc *sch, struct qdisc_walker *walker) 997 { 998 if (!walker->stop) { 999 if (walker->count >= walker->skip) 1000 if (walker->fn(sch, 1, walker) < 0) { 1001 walker->stop = 1; 1002 return; 1003 } 1004 walker->count++; 1005 } 1006 } 1007 1008 static const struct Qdisc_class_ops netem_class_ops = { 1009 .graft = netem_graft, 1010 .leaf = netem_leaf, 1011 .get = netem_get, 1012 .put = netem_put, 1013 .walk = netem_walk, 1014 .dump = netem_dump_class, 1015 }; 1016 1017 static struct Qdisc_ops netem_qdisc_ops __read_mostly = { 1018 .id = "netem", 1019 .cl_ops = &netem_class_ops, 1020 .priv_size = sizeof(struct netem_sched_data), 1021 .enqueue = netem_enqueue, 1022 .dequeue = netem_dequeue, 1023 .peek = qdisc_peek_dequeued, 1024 .drop = netem_drop, 1025 .init = netem_init, 1026 .reset = netem_reset, 1027 .destroy = netem_destroy, 1028 .change = netem_change, 1029 .dump = netem_dump, 1030 .owner = THIS_MODULE, 1031 }; 1032 1033 1034 static int __init netem_module_init(void) 1035 { 1036 pr_info("netem: version " VERSION "\n"); 1037 return register_qdisc(&netem_qdisc_ops); 1038 } 1039 static void __exit netem_module_exit(void) 1040 { 1041 unregister_qdisc(&netem_qdisc_ops); 1042 } 1043 module_init(netem_module_init) 1044 module_exit(netem_module_exit) 1045 MODULE_LICENSE("GPL"); 1046