1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * net/sched/sch_netem.c Network emulator 4 * 5 * Many of the algorithms and ideas for this came from 6 * NIST Net which is not copyrighted. 7 * 8 * Authors: Stephen Hemminger <shemminger@osdl.org> 9 * Catalin(ux aka Dino) BOIE <catab at umbrella dot ro> 10 */ 11 12 #include <linux/mm.h> 13 #include <linux/module.h> 14 #include <linux/slab.h> 15 #include <linux/types.h> 16 #include <linux/kernel.h> 17 #include <linux/errno.h> 18 #include <linux/skbuff.h> 19 #include <linux/vmalloc.h> 20 #include <linux/prandom.h> 21 #include <linux/rtnetlink.h> 22 #include <linux/reciprocal_div.h> 23 #include <linux/rbtree.h> 24 25 #include <net/gso.h> 26 #include <net/netlink.h> 27 #include <net/pkt_sched.h> 28 #include <net/inet_ecn.h> 29 30 /* Network Emulation Queuing algorithm. 31 ==================================== 32 33 Sources: [1] Mark Carson, Darrin Santay, "NIST Net - A Linux-based 34 Network Emulation Tool 35 [2] Luigi Rizzo, DummyNet for FreeBSD 36 37 ---------------------------------------------------------------- 38 39 This started out as a simple way to delay outgoing packets to 40 test TCP but has grown to include most of the functionality 41 of a full blown network emulator like NISTnet. It can delay 42 packets and add random jitter (and correlation). The random 43 distribution can be loaded from a table as well to provide 44 normal, Pareto, or experimental curves. Packet loss, 45 duplication, and reordering can also be emulated. 46 47 This qdisc does not do classification that can be handled in 48 layering other disciplines. It does not need to do bandwidth 49 control either since that can be handled by using token 50 bucket or other rate control. 51 52 Correlated Loss Generator models 53 54 Added generation of correlated loss according to the 55 "Gilbert-Elliot" model, a 4-state markov model. 56 57 References: 58 [1] NetemCLG Home http://netgroup.uniroma2.it/NetemCLG 59 [2] S. Salsano, F. Ludovici, A. Ordine, "Definition of a general 60 and intuitive loss model for packet networks and its implementation 61 in the Netem module in the Linux kernel", available in [1] 62 63 Authors: Stefano Salsano <stefano.salsano at uniroma2.it 64 Fabio Ludovici <fabio.ludovici at yahoo.it> 65 */ 66 67 struct disttable { 68 u32 size; 69 s16 table[] __counted_by(size); 70 }; 71 72 /* Loss models */ 73 enum { 74 CLG_RANDOM, 75 CLG_4_STATES, 76 CLG_GILB_ELL, 77 }; 78 79 /* States in GE model */ 80 enum { 81 GOOD_STATE = 1, 82 BAD_STATE, 83 }; 84 85 /* States in 4 state model */ 86 enum { 87 TX_IN_GAP_PERIOD = 1, 88 TX_IN_BURST_PERIOD, 89 LOST_IN_GAP_PERIOD, 90 LOST_IN_BURST_PERIOD, 91 }; 92 93 struct netem_sched_data { 94 /* Cacheline 0: tfifo state and per-packet enqueue/dequeue scalars. */ 95 struct rb_root t_root; 96 struct sk_buff *t_head; 97 struct sk_buff *t_tail; 98 u32 t_len; 99 u32 counter; 100 s64 latency; 101 s64 jitter; 102 u64 rate; 103 u32 gap; 104 u32 loss; 105 106 /* Cacheline 1: zero-check scalars and correlation states. */ 107 u32 duplicate; 108 u32 reorder; 109 u32 corrupt; 110 u32 ecn; 111 struct crndstate { 112 u32 last; 113 u32 rho; 114 } delay_cor, loss_cor, dup_cor, reorder_cor, corrupt_cor; 115 u8 loss_model; 116 117 /* Cacheline 2: PRNG, distribution tables, slot dequeue state etc. */ 118 struct prng { 119 u64 seed; 120 struct rnd_state prng_state; 121 } prng; 122 struct disttable *delay_dist; 123 struct slotstate { 124 u64 slot_next; 125 s32 packets_left; 126 s32 bytes_left; 127 } slot; 128 struct disttable *slot_dist; 129 struct Qdisc *qdisc; 130 131 /* 132 * Warm: rate-shaping parameters (only read when rate != 0) and 133 * configuration-only fields. The fast path reads sch->limit, not 134 * q->limit. 135 */ 136 s32 packet_overhead; 137 u32 cell_size; 138 struct reciprocal_value cell_size_reciprocal; 139 s32 cell_overhead; 140 u32 limit; 141 142 /* Correlated Loss Generation models */ 143 struct clgstate { 144 /* 4-states and Gilbert-Elliot models */ 145 u32 a1; /* p13 for 4-states or p for GE */ 146 u32 a2; /* p31 for 4-states or r for GE */ 147 u32 a3; /* p32 for 4-states or h for GE */ 148 u32 a4; /* p14 for 4-states or 1-k for GE */ 149 u32 a5; /* p23 used only in 4-states */ 150 151 /* state of the Markov chain */ 152 u8 state; 153 } clg; 154 155 /* Impairment counters */ 156 u64 delayed; 157 u64 dropped; 158 u64 corrupted; 159 u64 duplicated; 160 u64 ecn_marked; 161 u64 reordered; 162 u64 allocation_errors; 163 164 /* Cold tail: slot reschedule config and the watchdog timer. */ 165 struct tc_netem_slot slot_config; 166 struct qdisc_watchdog watchdog; 167 }; 168 169 /* Time stamp put into socket buffer control block 170 * Only valid when skbs are in our internal t(ime)fifo queue. 171 * 172 * As skb->rbnode uses same storage than skb->next, skb->prev and skb->tstamp, 173 * and skb->next & skb->prev are scratch space for a qdisc, 174 * we save skb->tstamp value in skb->cb[] before destroying it. 175 */ 176 struct netem_skb_cb { 177 u64 time_to_send; 178 }; 179 180 static inline struct netem_skb_cb *netem_skb_cb(struct sk_buff *skb) 181 { 182 /* we assume we can use skb next/prev/tstamp as storage for rb_node */ 183 qdisc_cb_private_validate(skb, sizeof(struct netem_skb_cb)); 184 return (struct netem_skb_cb *)qdisc_skb_cb(skb)->data; 185 } 186 187 /* init_crandom - initialize correlated random number generator 188 * Use entropy source for initial seed. 189 */ 190 static void init_crandom(struct crndstate *state, unsigned long rho) 191 { 192 state->rho = rho; 193 state->last = get_random_u32(); 194 } 195 196 /* get_crandom - correlated random number generator 197 * Next number depends on last value. 198 * rho is scaled to avoid floating point. 199 */ 200 static u32 get_crandom(struct crndstate *state, struct prng *p) 201 { 202 u64 value, rho; 203 unsigned long answer; 204 struct rnd_state *s = &p->prng_state; 205 206 if (!state || state->rho == 0) /* no correlation */ 207 return prandom_u32_state(s); 208 209 value = prandom_u32_state(s); 210 rho = (u64)state->rho + 1; 211 answer = (value * ((1ull<<32) - rho) + state->last * rho) >> 32; 212 state->last = answer; 213 return answer; 214 } 215 216 /* loss_4state - 4-state model loss generator 217 * Generates losses according to the 4-state Markov chain adopted in 218 * the GI (General and Intuitive) loss model. 219 */ 220 static bool loss_4state(struct netem_sched_data *q) 221 { 222 struct clgstate *clg = &q->clg; 223 u32 rnd = prandom_u32_state(&q->prng.prng_state); 224 225 /* 226 * Makes a comparison between rnd and the transition 227 * probabilities outgoing from the current state, then decides the 228 * next state and if the next packet has to be transmitted or lost. 229 * The four states correspond to: 230 * TX_IN_GAP_PERIOD => successfully transmitted packets within a gap period 231 * LOST_IN_GAP_PERIOD => isolated losses within a gap period 232 * LOST_IN_BURST_PERIOD => lost packets within a burst period 233 * TX_IN_BURST_PERIOD => successfully transmitted packets within a burst period 234 */ 235 switch (clg->state) { 236 case TX_IN_GAP_PERIOD: 237 if (rnd < clg->a4) { 238 clg->state = LOST_IN_GAP_PERIOD; 239 return true; 240 } else if (rnd < clg->a1 + clg->a4) { 241 clg->state = LOST_IN_BURST_PERIOD; 242 return true; 243 } else { 244 clg->state = TX_IN_GAP_PERIOD; 245 } 246 247 break; 248 case TX_IN_BURST_PERIOD: 249 if (rnd < clg->a5) { 250 clg->state = LOST_IN_BURST_PERIOD; 251 return true; 252 } else { 253 clg->state = TX_IN_BURST_PERIOD; 254 } 255 256 break; 257 case LOST_IN_BURST_PERIOD: 258 if (rnd < clg->a3) 259 clg->state = TX_IN_BURST_PERIOD; 260 else if (rnd < clg->a2 + clg->a3) { 261 clg->state = TX_IN_GAP_PERIOD; 262 } else { 263 clg->state = LOST_IN_BURST_PERIOD; 264 return true; 265 } 266 break; 267 case LOST_IN_GAP_PERIOD: 268 clg->state = TX_IN_GAP_PERIOD; 269 break; 270 } 271 272 return false; 273 } 274 275 /* loss_gilb_ell - Gilbert-Elliot model loss generator 276 * Generates losses according to the Gilbert-Elliot loss model or 277 * its special cases (Gilbert or Simple Gilbert) 278 * 279 * Makes a comparison between random number and the transition 280 * probabilities outgoing from the current state, then decides the 281 * next state. A second random number is extracted and the comparison 282 * with the loss probability of the current state decides if the next 283 * packet will be transmitted or lost. 284 */ 285 static bool loss_gilb_ell(struct netem_sched_data *q) 286 { 287 struct clgstate *clg = &q->clg; 288 struct rnd_state *s = &q->prng.prng_state; 289 290 switch (clg->state) { 291 case GOOD_STATE: 292 if (prandom_u32_state(s) < clg->a1) 293 clg->state = BAD_STATE; 294 if (prandom_u32_state(s) < clg->a4) 295 return true; 296 break; 297 case BAD_STATE: 298 if (prandom_u32_state(s) < clg->a2) 299 clg->state = GOOD_STATE; 300 if (prandom_u32_state(s) > clg->a3) 301 return true; 302 } 303 304 return false; 305 } 306 307 static bool loss_event(struct netem_sched_data *q) 308 { 309 switch (q->loss_model) { 310 case CLG_RANDOM: 311 /* Random packet drop 0 => none, ~0 => all */ 312 return q->loss && q->loss >= get_crandom(&q->loss_cor, &q->prng); 313 314 case CLG_4_STATES: 315 /* 4state loss model algorithm (used also for GI model) 316 * Extracts a value from the markov 4 state loss generator, 317 * if it is 1 drops a packet and if needed writes the event in 318 * the kernel logs 319 */ 320 return loss_4state(q); 321 322 case CLG_GILB_ELL: 323 /* Gilbert-Elliot loss model algorithm 324 * Extracts a value from the Gilbert-Elliot loss generator, 325 * if it is 1 drops a packet and if needed writes the event in 326 * the kernel logs 327 */ 328 return loss_gilb_ell(q); 329 } 330 331 return false; /* not reached */ 332 } 333 334 335 /* tabledist - return a pseudo-randomly distributed value with mean mu and 336 * std deviation sigma. Uses table lookup to approximate the desired 337 * distribution, and a uniformly-distributed pseudo-random source. 338 */ 339 static s64 tabledist(s64 mu, s32 sigma, 340 struct crndstate *state, 341 struct prng *prng, 342 const struct disttable *dist) 343 { 344 s64 x; 345 long t; 346 u32 rnd; 347 348 if (sigma == 0) 349 return mu; 350 351 rnd = get_crandom(state, prng); 352 353 /* default uniform distribution */ 354 if (dist == NULL) 355 return ((rnd % (2 * (u32)sigma)) + mu) - sigma; 356 357 t = dist->table[rnd % dist->size]; 358 x = (sigma % NETEM_DIST_SCALE) * t; 359 if (x >= 0) 360 x += NETEM_DIST_SCALE/2; 361 else 362 x -= NETEM_DIST_SCALE/2; 363 364 return x / NETEM_DIST_SCALE + (sigma / NETEM_DIST_SCALE) * t + mu; 365 } 366 367 static u64 packet_time_ns(u64 len, const struct netem_sched_data *q) 368 { 369 len += q->packet_overhead; 370 371 if (q->cell_size) { 372 u32 cells = reciprocal_divide(len, q->cell_size_reciprocal); 373 374 if (len > cells * q->cell_size) /* extra cell needed for remainder */ 375 cells++; 376 len = cells * (q->cell_size + q->cell_overhead); 377 } 378 379 return div64_u64(len * NSEC_PER_SEC, q->rate); 380 } 381 382 static void tfifo_reset(struct Qdisc *sch) 383 { 384 struct netem_sched_data *q = qdisc_priv(sch); 385 struct rb_node *p = rb_first(&q->t_root); 386 387 while (p) { 388 struct sk_buff *skb = rb_to_skb(p); 389 390 p = rb_next(p); 391 rb_erase(&skb->rbnode, &q->t_root); 392 rtnl_kfree_skbs(skb, skb); 393 } 394 395 rtnl_kfree_skbs(q->t_head, q->t_tail); 396 q->t_head = NULL; 397 q->t_tail = NULL; 398 q->t_len = 0; 399 } 400 401 static void tfifo_enqueue(struct sk_buff *nskb, struct Qdisc *sch) 402 { 403 struct netem_sched_data *q = qdisc_priv(sch); 404 u64 tnext = netem_skb_cb(nskb)->time_to_send; 405 406 if (!q->t_tail || tnext >= netem_skb_cb(q->t_tail)->time_to_send) { 407 if (q->t_tail) 408 q->t_tail->next = nskb; 409 else 410 q->t_head = nskb; 411 q->t_tail = nskb; 412 } else { 413 struct rb_node **p = &q->t_root.rb_node, *parent = NULL; 414 415 while (*p) { 416 struct sk_buff *skb; 417 418 parent = *p; 419 skb = rb_to_skb(parent); 420 if (tnext >= netem_skb_cb(skb)->time_to_send) 421 p = &parent->rb_right; 422 else 423 p = &parent->rb_left; 424 } 425 rb_link_node(&nskb->rbnode, parent, p); 426 rb_insert_color(&nskb->rbnode, &q->t_root); 427 } 428 q->t_len++; 429 qdisc_qlen_inc(sch); 430 } 431 432 /* netem can't properly corrupt a megapacket (like we get from GSO), so instead 433 * when we statistically choose to corrupt one, we instead segment it, returning 434 * the first packet to be corrupted, and re-enqueue the remaining frames 435 */ 436 static struct sk_buff *netem_segment(struct sk_buff *skb, struct Qdisc *sch, 437 struct sk_buff **to_free) 438 { 439 struct sk_buff *segs; 440 netdev_features_t features = netif_skb_features(skb); 441 442 qdisc_skb_cb(skb)->pkt_segs = 1; 443 segs = skb_gso_segment(skb, features & ~NETIF_F_GSO_MASK); 444 445 if (IS_ERR_OR_NULL(segs)) { 446 qdisc_drop(skb, sch, to_free); 447 return NULL; 448 } 449 consume_skb(skb); 450 return segs; 451 } 452 453 /* 454 * Insert one skb into qdisc. 455 * Note: parent depends on return value to account for queue length. 456 * NET_XMIT_DROP: queue length didn't change. 457 * NET_XMIT_SUCCESS: one skb was queued. 458 */ 459 static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch, 460 struct sk_buff **to_free) 461 { 462 struct netem_sched_data *q = qdisc_priv(sch); 463 /* We don't fill cb now as skb_unshare() may invalidate it */ 464 struct netem_skb_cb *cb; 465 struct sk_buff *skb2 = NULL; 466 struct sk_buff *segs = NULL; 467 unsigned int prev_len = qdisc_pkt_len(skb); 468 int count = 1; 469 470 /* Do not fool qdisc_drop_all() */ 471 skb->prev = NULL; 472 473 /* Random duplication */ 474 if (q->duplicate && skb->tc_depth == 0 && 475 q->duplicate >= get_crandom(&q->dup_cor, &q->prng)) { 476 ++count; 477 WRITE_ONCE(q->duplicated, q->duplicated + 1); 478 } 479 480 /* Drop packet? */ 481 if (loss_event(q)) { 482 if (q->ecn && INET_ECN_set_ce(skb)) { 483 WRITE_ONCE(q->ecn_marked, q->ecn_marked + 1); 484 } else { 485 WRITE_ONCE(q->dropped, q->dropped + 1); 486 --count; 487 } 488 } 489 490 if (count == 0) { 491 qdisc_qstats_drop(sch); 492 __qdisc_drop(skb, to_free); 493 return NET_XMIT_SUCCESS | __NET_XMIT_BYPASS; 494 } 495 496 /* If a delay is expected, orphan the skb. (orphaning usually takes 497 * place at TX completion time, so _before_ the link transit delay) 498 */ 499 if (q->latency || q->jitter || q->rate) 500 skb_orphan_partial(skb); 501 502 /* 503 * If we need to duplicate packet, then clone it before 504 * original is modified. 505 */ 506 if (count > 1) { 507 skb2 = skb_clone(skb, GFP_ATOMIC); 508 if (!skb2) 509 WRITE_ONCE(q->allocation_errors, q->allocation_errors + 1); 510 } 511 512 /* 513 * Randomized packet corruption. 514 * Make copy if needed since we are modifying 515 * If packet is going to be hardware checksummed, then 516 * do it now in software before we mangle it. 517 */ 518 if (q->corrupt && q->corrupt >= get_crandom(&q->corrupt_cor, &q->prng)) { 519 if (skb_is_gso(skb)) { 520 skb = netem_segment(skb, sch, to_free); 521 if (!skb) { 522 WRITE_ONCE(q->allocation_errors, q->allocation_errors + 1); 523 goto finish_segs; 524 } 525 526 segs = skb->next; 527 skb_mark_not_on_list(skb); 528 qdisc_skb_cb(skb)->pkt_len = skb->len; 529 } 530 531 skb = skb_unshare(skb, GFP_ATOMIC); 532 if (unlikely(!skb)) { 533 WRITE_ONCE(q->allocation_errors, q->allocation_errors + 1); 534 qdisc_qstats_drop(sch); 535 goto finish_segs; 536 } 537 if (skb_linearize(skb) || 538 (skb->ip_summed == CHECKSUM_PARTIAL && skb_checksum_help(skb))) { 539 WRITE_ONCE(q->allocation_errors, q->allocation_errors + 1); 540 qdisc_drop(skb, sch, to_free); 541 skb = NULL; 542 goto finish_segs; 543 } 544 545 if (skb->len) { 546 u32 offset = get_random_u32_below(skb->len); 547 skb->data[offset] ^= 1 << get_random_u32_below(8); 548 WRITE_ONCE(q->corrupted, q->corrupted + 1); 549 } 550 } 551 552 if (unlikely(sch->q.qlen >= sch->limit)) { 553 /* re-link segs, so that qdisc_drop_all() frees them all */ 554 skb->next = segs; 555 qdisc_drop_all(skb, sch, to_free); 556 if (skb2) 557 __qdisc_drop(skb2, to_free); 558 return NET_XMIT_DROP; 559 } 560 561 /* 562 * If doing duplication then re-insert at top of the 563 * qdisc tree, since parent queuer expects that only one 564 * skb will be queued. 565 */ 566 if (skb2) { 567 struct Qdisc *rootq = qdisc_root_bh(sch); 568 569 skb2->tc_depth++; /* prevent duplicating a dup... */ 570 rootq->enqueue(skb2, rootq, to_free); 571 skb2 = NULL; 572 } 573 574 qdisc_qstats_backlog_inc(sch, skb); 575 576 cb = netem_skb_cb(skb); 577 if (q->gap == 0 || /* not doing reordering */ 578 q->counter < q->gap - 1 || /* inside last reordering gap */ 579 q->reorder < get_crandom(&q->reorder_cor, &q->prng)) { 580 u64 now; 581 s64 delay; 582 583 delay = tabledist(q->latency, q->jitter, 584 &q->delay_cor, &q->prng, q->delay_dist); 585 586 now = ktime_get_ns(); 587 588 if (q->rate) { 589 struct netem_skb_cb *last = NULL; 590 591 if (sch->q.tail) 592 last = netem_skb_cb(sch->q.tail); 593 if (q->t_root.rb_node) { 594 struct sk_buff *t_skb; 595 struct netem_skb_cb *t_last; 596 597 t_skb = skb_rb_last(&q->t_root); 598 t_last = netem_skb_cb(t_skb); 599 if (!last || 600 t_last->time_to_send > last->time_to_send) 601 last = t_last; 602 } 603 if (q->t_tail) { 604 struct netem_skb_cb *t_last = 605 netem_skb_cb(q->t_tail); 606 607 if (!last || 608 t_last->time_to_send > last->time_to_send) 609 last = t_last; 610 } 611 612 if (last) { 613 /* 614 * Last packet in queue is reference point (now), 615 * calculate this time bonus and subtract 616 * from delay. 617 */ 618 delay -= last->time_to_send - now; 619 delay = max_t(s64, 0, delay); 620 now = last->time_to_send; 621 } 622 623 delay += packet_time_ns(qdisc_pkt_len(skb), q); 624 } 625 626 cb->time_to_send = now + delay; 627 ++q->counter; 628 if (delay) 629 WRITE_ONCE(q->delayed, q->delayed + 1); 630 631 tfifo_enqueue(skb, sch); 632 } else { 633 /* 634 * Do re-ordering by putting one out of N packets at the front 635 * of the queue. 636 */ 637 WRITE_ONCE(q->reordered, q->reordered + 1); 638 cb->time_to_send = ktime_get_ns(); 639 q->counter = 0; 640 641 __qdisc_enqueue_head(skb, &sch->q); 642 sch->qstats.requeues++; 643 } 644 645 finish_segs: 646 if (skb2) 647 __qdisc_drop(skb2, to_free); 648 649 if (segs) { 650 unsigned int len, last_len; 651 int rc, nb; 652 653 len = skb ? skb->len : 0; 654 nb = skb ? 1 : 0; 655 656 while (segs) { 657 skb2 = segs->next; 658 skb_mark_not_on_list(segs); 659 qdisc_skb_cb(segs)->pkt_len = segs->len; 660 last_len = segs->len; 661 rc = qdisc_enqueue(segs, sch, to_free); 662 if (rc != NET_XMIT_SUCCESS) { 663 if (net_xmit_drop_count(rc)) 664 qdisc_qstats_drop(sch); 665 } else { 666 nb++; 667 len += last_len; 668 } 669 segs = skb2; 670 } 671 /* Parent qdiscs accounted for 1 skb of size @prev_len */ 672 qdisc_tree_reduce_backlog(sch, -(nb - 1), -(len - prev_len)); 673 } else if (!skb) { 674 return NET_XMIT_DROP; 675 } 676 return NET_XMIT_SUCCESS; 677 } 678 679 /* Delay the next round with a new future slot with a 680 * correct number of bytes and packets. 681 */ 682 683 static void get_slot_next(struct netem_sched_data *q, u64 now) 684 { 685 s64 next_delay; 686 687 if (!q->slot_dist) 688 next_delay = q->slot_config.min_delay + 689 mul_u64_u32_shr(q->slot_config.max_delay - q->slot_config.min_delay, 690 get_random_u32(), 32); 691 else 692 next_delay = tabledist(q->slot_config.dist_delay, 693 (s32)(q->slot_config.dist_jitter), 694 NULL, &q->prng, q->slot_dist); 695 696 q->slot.slot_next = now + next_delay; 697 q->slot.packets_left = q->slot_config.max_packets; 698 q->slot.bytes_left = q->slot_config.max_bytes; 699 } 700 701 static struct sk_buff *netem_peek(struct netem_sched_data *q) 702 { 703 struct sk_buff *skb = skb_rb_first(&q->t_root); 704 u64 t1, t2; 705 706 if (!skb) 707 return q->t_head; 708 if (!q->t_head) 709 return skb; 710 711 t1 = netem_skb_cb(skb)->time_to_send; 712 t2 = netem_skb_cb(q->t_head)->time_to_send; 713 if (t1 < t2) 714 return skb; 715 return q->t_head; 716 } 717 718 static void netem_erase_head(struct netem_sched_data *q, struct sk_buff *skb) 719 { 720 if (skb == q->t_head) { 721 q->t_head = skb->next; 722 if (!q->t_head) 723 q->t_tail = NULL; 724 } else { 725 rb_erase(&skb->rbnode, &q->t_root); 726 } 727 } 728 729 static struct sk_buff *netem_dequeue(struct Qdisc *sch) 730 { 731 struct netem_sched_data *q = qdisc_priv(sch); 732 struct sk_buff *skb; 733 734 tfifo_dequeue: 735 skb = __qdisc_dequeue_head(&sch->q); 736 if (skb) { 737 deliver: 738 qdisc_qstats_backlog_dec(sch, skb); 739 qdisc_bstats_update(sch, skb); 740 return skb; 741 } 742 skb = netem_peek(q); 743 if (skb) { 744 u64 time_to_send; 745 u64 now = ktime_get_ns(); 746 747 /* if more time remaining? */ 748 time_to_send = netem_skb_cb(skb)->time_to_send; 749 if (q->slot.slot_next && q->slot.slot_next < time_to_send) 750 get_slot_next(q, now); 751 752 if (time_to_send <= now && q->slot.slot_next <= now) { 753 netem_erase_head(q, skb); 754 q->t_len--; 755 skb->next = NULL; 756 skb->prev = NULL; 757 /* skb->dev shares skb->rbnode area, 758 * we need to restore its value. 759 */ 760 skb->dev = qdisc_dev(sch); 761 762 if (q->slot.slot_next) { 763 q->slot.packets_left--; 764 q->slot.bytes_left -= qdisc_pkt_len(skb); 765 if (q->slot.packets_left <= 0 || 766 q->slot.bytes_left <= 0) 767 get_slot_next(q, now); 768 } 769 770 if (q->qdisc) { 771 unsigned int pkt_len = qdisc_pkt_len(skb); 772 struct sk_buff *to_free = NULL; 773 int err; 774 775 err = qdisc_enqueue(skb, q->qdisc, &to_free); 776 kfree_skb_list(to_free); 777 if (err != NET_XMIT_SUCCESS) { 778 if (net_xmit_drop_count(err)) 779 qdisc_qstats_drop(sch); 780 qstats_backlog_sub(sch, pkt_len); 781 qdisc_qlen_dec(sch); 782 qdisc_tree_reduce_backlog(sch, 1, pkt_len); 783 } 784 goto tfifo_dequeue; 785 } 786 qdisc_qlen_dec(sch); 787 goto deliver; 788 } 789 790 if (q->qdisc) { 791 skb = q->qdisc->ops->dequeue(q->qdisc); 792 if (skb) { 793 qdisc_qlen_dec(sch); 794 goto deliver; 795 } 796 } 797 798 qdisc_watchdog_schedule_ns(&q->watchdog, 799 max(time_to_send, 800 q->slot.slot_next)); 801 } 802 803 if (q->qdisc) { 804 skb = q->qdisc->ops->dequeue(q->qdisc); 805 if (skb) { 806 qdisc_qlen_dec(sch); 807 goto deliver; 808 } 809 } 810 return NULL; 811 } 812 813 static void netem_reset(struct Qdisc *sch) 814 { 815 struct netem_sched_data *q = qdisc_priv(sch); 816 817 qdisc_reset_queue(sch); 818 tfifo_reset(sch); 819 if (q->qdisc) 820 qdisc_reset(q->qdisc); 821 qdisc_watchdog_cancel(&q->watchdog); 822 } 823 824 static void dist_free(struct disttable *d) 825 { 826 kvfree(d); 827 } 828 829 /* 830 * Distribution data is a variable size payload containing 831 * signed 16 bit values. 832 */ 833 834 static int get_dist_table(struct disttable **tbl, const struct nlattr *attr) 835 { 836 size_t n = nla_len(attr)/sizeof(__s16); 837 const __s16 *data = nla_data(attr); 838 struct disttable *d; 839 int i; 840 841 if (!n || n > NETEM_DIST_MAX) 842 return -EINVAL; 843 844 d = kvmalloc_flex(*d, table, n); 845 if (!d) 846 return -ENOMEM; 847 848 d->size = n; 849 for (i = 0; i < n; i++) 850 d->table[i] = data[i]; 851 852 *tbl = d; 853 return 0; 854 } 855 856 static int validate_time(const struct nlattr *attr, const char *name, 857 struct netlink_ext_ack *extack) 858 { 859 if (nla_get_s64(attr) < 0) { 860 NL_SET_ERR_MSG_ATTR_FMT(extack, attr, "negative %s", name); 861 return -EINVAL; 862 } 863 return 0; 864 } 865 866 static int validate_slot(const struct nlattr *attr, struct netlink_ext_ack *extack) 867 { 868 const struct tc_netem_slot *c = nla_data(attr); 869 870 if (c->min_delay < 0 || c->max_delay < 0) { 871 NL_SET_ERR_MSG_ATTR(extack, attr, "negative slot delay"); 872 return -EINVAL; 873 } 874 if (c->min_delay > c->max_delay) { 875 NL_SET_ERR_MSG_ATTR(extack, attr, "slot min delay greater than max delay"); 876 return -EINVAL; 877 } 878 if (c->dist_delay < 0 || c->dist_jitter < 0) { 879 NL_SET_ERR_MSG_ATTR(extack, attr, "negative dist delay"); 880 return -EINVAL; 881 } 882 if (c->max_packets < 0 || c->max_bytes < 0) { 883 NL_SET_ERR_MSG_ATTR(extack, attr, "negative slot limit"); 884 return -EINVAL; 885 } 886 return 0; 887 } 888 889 static void get_slot(struct netem_sched_data *q, const struct nlattr *attr) 890 { 891 const struct tc_netem_slot *c = nla_data(attr); 892 893 q->slot_config = *c; 894 if (q->slot_config.max_packets == 0) 895 q->slot_config.max_packets = INT_MAX; 896 if (q->slot_config.max_bytes == 0) 897 q->slot_config.max_bytes = INT_MAX; 898 899 /* capping dist_jitter to the range acceptable by tabledist() */ 900 q->slot_config.dist_jitter = min_t(__s64, INT_MAX, abs(q->slot_config.dist_jitter)); 901 902 q->slot.packets_left = q->slot_config.max_packets; 903 q->slot.bytes_left = q->slot_config.max_bytes; 904 if (q->slot_config.min_delay | q->slot_config.max_delay | 905 q->slot_config.dist_jitter) 906 q->slot.slot_next = ktime_get_ns(); 907 else 908 q->slot.slot_next = 0; 909 } 910 911 static void get_correlation(struct netem_sched_data *q, const struct nlattr *attr) 912 { 913 const struct tc_netem_corr *c = nla_data(attr); 914 915 init_crandom(&q->delay_cor, c->delay_corr); 916 init_crandom(&q->loss_cor, c->loss_corr); 917 init_crandom(&q->dup_cor, c->dup_corr); 918 } 919 920 static void get_reorder(struct netem_sched_data *q, const struct nlattr *attr) 921 { 922 const struct tc_netem_reorder *r = nla_data(attr); 923 924 q->reorder = r->probability; 925 init_crandom(&q->reorder_cor, r->correlation); 926 } 927 928 static void get_corrupt(struct netem_sched_data *q, const struct nlattr *attr) 929 { 930 const struct tc_netem_corrupt *r = nla_data(attr); 931 932 q->corrupt = r->probability; 933 init_crandom(&q->corrupt_cor, r->correlation); 934 } 935 936 static void get_rate(struct netem_sched_data *q, const struct nlattr *attr) 937 { 938 const struct tc_netem_rate *r = nla_data(attr); 939 940 q->rate = r->rate; 941 q->packet_overhead = r->packet_overhead; 942 q->cell_size = r->cell_size; 943 q->cell_overhead = r->cell_overhead; 944 if (q->cell_size) 945 q->cell_size_reciprocal = reciprocal_value(q->cell_size); 946 else 947 q->cell_size_reciprocal = (struct reciprocal_value) { 0 }; 948 } 949 950 static int get_loss_clg(struct netem_sched_data *q, const struct nlattr *attr, 951 struct netlink_ext_ack *extack) 952 { 953 const struct nlattr *la; 954 int rem; 955 956 nla_for_each_nested(la, attr, rem) { 957 u16 type = nla_type(la); 958 959 switch (type) { 960 case NETEM_LOSS_GI: { 961 const struct tc_netem_gimodel *gi = nla_data(la); 962 963 if (nla_len(la) < sizeof(struct tc_netem_gimodel)) { 964 NL_SET_ERR_MSG_ATTR(extack, la, 965 "netem: incorrect gi model size"); 966 return -EINVAL; 967 } 968 969 q->loss_model = CLG_4_STATES; 970 971 q->clg.state = TX_IN_GAP_PERIOD; 972 q->clg.a1 = gi->p13; 973 q->clg.a2 = gi->p31; 974 q->clg.a3 = gi->p32; 975 q->clg.a4 = gi->p14; 976 q->clg.a5 = gi->p23; 977 break; 978 } 979 980 case NETEM_LOSS_GE: { 981 const struct tc_netem_gemodel *ge = nla_data(la); 982 983 if (nla_len(la) < sizeof(struct tc_netem_gemodel)) { 984 NL_SET_ERR_MSG_ATTR(extack, la, 985 "netem: incorrect ge model size"); 986 return -EINVAL; 987 } 988 989 q->loss_model = CLG_GILB_ELL; 990 q->clg.state = GOOD_STATE; 991 q->clg.a1 = ge->p; 992 q->clg.a2 = ge->r; 993 q->clg.a3 = ge->h; 994 q->clg.a4 = ge->k1; 995 break; 996 } 997 998 default: 999 NL_SET_ERR_MSG_ATTR_FMT(extack, la, 1000 "netem: unknown loss type %u", type); 1001 return -EINVAL; 1002 } 1003 } 1004 1005 return 0; 1006 } 1007 1008 static const struct nla_policy netem_policy[TCA_NETEM_MAX + 1] = { 1009 [TCA_NETEM_CORR] = { .len = sizeof(struct tc_netem_corr) }, 1010 [TCA_NETEM_REORDER] = { .len = sizeof(struct tc_netem_reorder) }, 1011 [TCA_NETEM_CORRUPT] = { .len = sizeof(struct tc_netem_corrupt) }, 1012 [TCA_NETEM_RATE] = { .len = sizeof(struct tc_netem_rate) }, 1013 [TCA_NETEM_LOSS] = { .type = NLA_NESTED }, 1014 [TCA_NETEM_ECN] = { .type = NLA_U32 }, 1015 [TCA_NETEM_RATE64] = { .type = NLA_U64 }, 1016 [TCA_NETEM_LATENCY64] = { .type = NLA_S64 }, 1017 [TCA_NETEM_JITTER64] = { .type = NLA_S64 }, 1018 [TCA_NETEM_SLOT] = { .len = sizeof(struct tc_netem_slot) }, 1019 [TCA_NETEM_PRNG_SEED] = { .type = NLA_U64 }, 1020 }; 1021 1022 static int parse_attr(struct nlattr *tb[], int maxtype, struct nlattr *nla, 1023 const struct nla_policy *policy, int len, 1024 struct netlink_ext_ack *extack) 1025 { 1026 int nested_len = nla_len(nla) - NLA_ALIGN(len); 1027 1028 if (nested_len < 0) { 1029 NL_SET_ERR_MSG_FMT(extack, "netem: invalid attributes len %d < %d", 1030 nla_len(nla), NLA_ALIGN(len)); 1031 return -EINVAL; 1032 } 1033 1034 if (nested_len >= nla_attr_size(0)) 1035 return nla_parse_deprecated(tb, maxtype, 1036 nla_data(nla) + NLA_ALIGN(len), 1037 nested_len, policy, extack); 1038 1039 memset(tb, 0, sizeof(struct nlattr *) * (maxtype + 1)); 1040 return 0; 1041 } 1042 1043 /* Parse netlink message to set options */ 1044 static int netem_change(struct Qdisc *sch, struct nlattr *opt, 1045 struct netlink_ext_ack *extack) 1046 { 1047 struct netem_sched_data *q = qdisc_priv(sch); 1048 struct nlattr *tb[TCA_NETEM_MAX + 1]; 1049 struct disttable *delay_dist = NULL; 1050 struct disttable *slot_dist = NULL; 1051 struct tc_netem_qopt *qopt; 1052 struct clgstate old_clg; 1053 int old_loss_model = CLG_RANDOM; 1054 int ret; 1055 1056 qopt = nla_data(opt); 1057 ret = parse_attr(tb, TCA_NETEM_MAX, opt, netem_policy, sizeof(*qopt), extack); 1058 if (ret < 0) 1059 return ret; 1060 1061 if (tb[TCA_NETEM_DELAY_DIST]) { 1062 ret = get_dist_table(&delay_dist, tb[TCA_NETEM_DELAY_DIST]); 1063 if (ret) 1064 goto table_free; 1065 } 1066 1067 if (tb[TCA_NETEM_SLOT_DIST]) { 1068 ret = get_dist_table(&slot_dist, tb[TCA_NETEM_SLOT_DIST]); 1069 if (ret) 1070 goto table_free; 1071 } 1072 1073 if (tb[TCA_NETEM_SLOT]) { 1074 ret = validate_slot(tb[TCA_NETEM_SLOT], extack); 1075 if (ret) 1076 goto table_free; 1077 } 1078 1079 if (tb[TCA_NETEM_LATENCY64]) { 1080 ret = validate_time(tb[TCA_NETEM_LATENCY64], "latency", extack); 1081 if (ret) 1082 goto table_free; 1083 } 1084 1085 if (tb[TCA_NETEM_JITTER64]) { 1086 ret = validate_time(tb[TCA_NETEM_JITTER64], "jitter", extack); 1087 if (ret) 1088 goto table_free; 1089 } 1090 1091 sch_tree_lock(sch); 1092 /* backup q->clg and q->loss_model */ 1093 old_clg = q->clg; 1094 old_loss_model = q->loss_model; 1095 1096 if (tb[TCA_NETEM_LOSS]) { 1097 ret = get_loss_clg(q, tb[TCA_NETEM_LOSS], extack); 1098 if (ret) { 1099 q->loss_model = old_loss_model; 1100 q->clg = old_clg; 1101 goto unlock; 1102 } 1103 } else { 1104 q->loss_model = CLG_RANDOM; 1105 } 1106 1107 if (delay_dist) 1108 swap(q->delay_dist, delay_dist); 1109 if (slot_dist) 1110 swap(q->slot_dist, slot_dist); 1111 sch->limit = qopt->limit; 1112 1113 q->latency = PSCHED_TICKS2NS(qopt->latency); 1114 q->jitter = PSCHED_TICKS2NS(qopt->jitter); 1115 q->limit = qopt->limit; 1116 q->gap = qopt->gap; 1117 q->counter = 0; 1118 q->loss = qopt->loss; 1119 q->duplicate = qopt->duplicate; 1120 1121 /* for compatibility with earlier versions. 1122 * if gap is set, need to assume 100% probability 1123 */ 1124 if (q->gap) 1125 q->reorder = ~0; 1126 1127 if (tb[TCA_NETEM_CORR]) 1128 get_correlation(q, tb[TCA_NETEM_CORR]); 1129 1130 if (tb[TCA_NETEM_REORDER]) 1131 get_reorder(q, tb[TCA_NETEM_REORDER]); 1132 1133 if (tb[TCA_NETEM_CORRUPT]) 1134 get_corrupt(q, tb[TCA_NETEM_CORRUPT]); 1135 1136 if (tb[TCA_NETEM_RATE]) 1137 get_rate(q, tb[TCA_NETEM_RATE]); 1138 1139 if (tb[TCA_NETEM_RATE64]) 1140 q->rate = max_t(u64, q->rate, 1141 nla_get_u64(tb[TCA_NETEM_RATE64])); 1142 1143 if (tb[TCA_NETEM_LATENCY64]) 1144 q->latency = nla_get_s64(tb[TCA_NETEM_LATENCY64]); 1145 1146 if (tb[TCA_NETEM_JITTER64]) 1147 q->jitter = nla_get_s64(tb[TCA_NETEM_JITTER64]); 1148 1149 if (tb[TCA_NETEM_ECN]) 1150 q->ecn = nla_get_u32(tb[TCA_NETEM_ECN]); 1151 1152 if (tb[TCA_NETEM_SLOT]) 1153 get_slot(q, tb[TCA_NETEM_SLOT]); 1154 1155 /* capping jitter to the range acceptable by tabledist() */ 1156 q->jitter = min_t(s64, abs(q->jitter), INT_MAX); 1157 1158 if (tb[TCA_NETEM_PRNG_SEED]) { 1159 q->prng.seed = nla_get_u64(tb[TCA_NETEM_PRNG_SEED]); 1160 prandom_seed_state(&q->prng.prng_state, q->prng.seed); 1161 } 1162 1163 unlock: 1164 sch_tree_unlock(sch); 1165 1166 table_free: 1167 dist_free(delay_dist); 1168 dist_free(slot_dist); 1169 return ret; 1170 } 1171 1172 static int netem_init(struct Qdisc *sch, struct nlattr *opt, 1173 struct netlink_ext_ack *extack) 1174 { 1175 struct netem_sched_data *q = qdisc_priv(sch); 1176 1177 qdisc_watchdog_init(&q->watchdog, sch); 1178 1179 if (!opt) 1180 return -EINVAL; 1181 1182 q->loss_model = CLG_RANDOM; 1183 q->prng.seed = get_random_u64(); 1184 prandom_seed_state(&q->prng.prng_state, q->prng.seed); 1185 1186 return netem_change(sch, opt, extack); 1187 } 1188 1189 static void netem_destroy(struct Qdisc *sch) 1190 { 1191 struct netem_sched_data *q = qdisc_priv(sch); 1192 1193 qdisc_watchdog_cancel(&q->watchdog); 1194 if (q->qdisc) 1195 qdisc_put(q->qdisc); 1196 dist_free(q->delay_dist); 1197 dist_free(q->slot_dist); 1198 } 1199 1200 static int dump_loss_model(const struct netem_sched_data *q, 1201 struct sk_buff *skb) 1202 { 1203 struct nlattr *nest; 1204 1205 nest = nla_nest_start_noflag(skb, TCA_NETEM_LOSS); 1206 if (nest == NULL) 1207 goto nla_put_failure; 1208 1209 switch (q->loss_model) { 1210 case CLG_RANDOM: 1211 /* legacy loss model */ 1212 nla_nest_cancel(skb, nest); 1213 return 0; /* no data */ 1214 1215 case CLG_4_STATES: { 1216 struct tc_netem_gimodel gi = { 1217 .p13 = q->clg.a1, 1218 .p31 = q->clg.a2, 1219 .p32 = q->clg.a3, 1220 .p14 = q->clg.a4, 1221 .p23 = q->clg.a5, 1222 }; 1223 1224 if (nla_put(skb, NETEM_LOSS_GI, sizeof(gi), &gi)) 1225 goto nla_put_failure; 1226 break; 1227 } 1228 case CLG_GILB_ELL: { 1229 struct tc_netem_gemodel ge = { 1230 .p = q->clg.a1, 1231 .r = q->clg.a2, 1232 .h = q->clg.a3, 1233 .k1 = q->clg.a4, 1234 }; 1235 1236 if (nla_put(skb, NETEM_LOSS_GE, sizeof(ge), &ge)) 1237 goto nla_put_failure; 1238 break; 1239 } 1240 } 1241 1242 nla_nest_end(skb, nest); 1243 return 0; 1244 1245 nla_put_failure: 1246 nla_nest_cancel(skb, nest); 1247 return -1; 1248 } 1249 1250 static int netem_dump(struct Qdisc *sch, struct sk_buff *skb) 1251 { 1252 const struct netem_sched_data *q = qdisc_priv(sch); 1253 struct nlattr *nla = (struct nlattr *) skb_tail_pointer(skb); 1254 struct tc_netem_qopt qopt; 1255 struct tc_netem_corr cor; 1256 struct tc_netem_reorder reorder; 1257 struct tc_netem_corrupt corrupt; 1258 struct tc_netem_rate rate; 1259 struct tc_netem_slot slot; 1260 1261 qopt.latency = min_t(psched_time_t, PSCHED_NS2TICKS(q->latency), 1262 UINT_MAX); 1263 qopt.jitter = min_t(psched_time_t, PSCHED_NS2TICKS(q->jitter), 1264 UINT_MAX); 1265 qopt.limit = q->limit; 1266 qopt.loss = q->loss; 1267 qopt.gap = q->gap; 1268 qopt.duplicate = q->duplicate; 1269 if (nla_put(skb, TCA_OPTIONS, sizeof(qopt), &qopt)) 1270 goto nla_put_failure; 1271 1272 if (nla_put(skb, TCA_NETEM_LATENCY64, sizeof(q->latency), &q->latency)) 1273 goto nla_put_failure; 1274 1275 if (nla_put(skb, TCA_NETEM_JITTER64, sizeof(q->jitter), &q->jitter)) 1276 goto nla_put_failure; 1277 1278 cor.delay_corr = q->delay_cor.rho; 1279 cor.loss_corr = q->loss_cor.rho; 1280 cor.dup_corr = q->dup_cor.rho; 1281 if (nla_put(skb, TCA_NETEM_CORR, sizeof(cor), &cor)) 1282 goto nla_put_failure; 1283 1284 reorder.probability = q->reorder; 1285 reorder.correlation = q->reorder_cor.rho; 1286 if (nla_put(skb, TCA_NETEM_REORDER, sizeof(reorder), &reorder)) 1287 goto nla_put_failure; 1288 1289 corrupt.probability = q->corrupt; 1290 corrupt.correlation = q->corrupt_cor.rho; 1291 if (nla_put(skb, TCA_NETEM_CORRUPT, sizeof(corrupt), &corrupt)) 1292 goto nla_put_failure; 1293 1294 if (q->rate >= (1ULL << 32)) { 1295 if (nla_put_u64_64bit(skb, TCA_NETEM_RATE64, q->rate, 1296 TCA_NETEM_PAD)) 1297 goto nla_put_failure; 1298 rate.rate = ~0U; 1299 } else { 1300 rate.rate = q->rate; 1301 } 1302 rate.packet_overhead = q->packet_overhead; 1303 rate.cell_size = q->cell_size; 1304 rate.cell_overhead = q->cell_overhead; 1305 if (nla_put(skb, TCA_NETEM_RATE, sizeof(rate), &rate)) 1306 goto nla_put_failure; 1307 1308 if (q->ecn && nla_put_u32(skb, TCA_NETEM_ECN, q->ecn)) 1309 goto nla_put_failure; 1310 1311 if (dump_loss_model(q, skb) != 0) 1312 goto nla_put_failure; 1313 1314 if (q->slot_config.min_delay | q->slot_config.max_delay | 1315 q->slot_config.dist_jitter) { 1316 slot = q->slot_config; 1317 if (slot.max_packets == INT_MAX) 1318 slot.max_packets = 0; 1319 if (slot.max_bytes == INT_MAX) 1320 slot.max_bytes = 0; 1321 if (nla_put(skb, TCA_NETEM_SLOT, sizeof(slot), &slot)) 1322 goto nla_put_failure; 1323 } 1324 1325 if (nla_put_u64_64bit(skb, TCA_NETEM_PRNG_SEED, q->prng.seed, 1326 TCA_NETEM_PAD)) 1327 goto nla_put_failure; 1328 1329 return nla_nest_end(skb, nla); 1330 1331 nla_put_failure: 1332 nlmsg_trim(skb, nla); 1333 return -1; 1334 } 1335 1336 static int netem_dump_stats(struct Qdisc *sch, struct gnet_dump *d) 1337 { 1338 struct netem_sched_data *q = qdisc_priv(sch); 1339 struct tc_netem_xstats st = { 1340 .delayed = READ_ONCE(q->delayed), 1341 .dropped = READ_ONCE(q->dropped), 1342 .corrupted = READ_ONCE(q->corrupted), 1343 .duplicated = READ_ONCE(q->duplicated), 1344 .reordered = READ_ONCE(q->reordered), 1345 .ecn_marked = READ_ONCE(q->ecn_marked), 1346 .allocation_errors = READ_ONCE(q->allocation_errors), 1347 }; 1348 1349 return gnet_stats_copy_app(d, &st, sizeof(st)); 1350 } 1351 1352 static int netem_dump_class(struct Qdisc *sch, unsigned long cl, 1353 struct sk_buff *skb, struct tcmsg *tcm) 1354 { 1355 struct netem_sched_data *q = qdisc_priv(sch); 1356 1357 if (cl != 1 || !q->qdisc) /* only one class */ 1358 return -ENOENT; 1359 1360 tcm->tcm_handle |= TC_H_MIN(1); 1361 tcm->tcm_info = q->qdisc->handle; 1362 1363 return 0; 1364 } 1365 1366 static int netem_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, 1367 struct Qdisc **old, struct netlink_ext_ack *extack) 1368 { 1369 struct netem_sched_data *q = qdisc_priv(sch); 1370 1371 *old = qdisc_replace(sch, new, &q->qdisc); 1372 return 0; 1373 } 1374 1375 static struct Qdisc *netem_leaf(struct Qdisc *sch, unsigned long arg) 1376 { 1377 struct netem_sched_data *q = qdisc_priv(sch); 1378 return q->qdisc; 1379 } 1380 1381 static unsigned long netem_find(struct Qdisc *sch, u32 classid) 1382 { 1383 return 1; 1384 } 1385 1386 static void netem_walk(struct Qdisc *sch, struct qdisc_walker *walker) 1387 { 1388 if (!walker->stop) { 1389 if (!tc_qdisc_stats_dump(sch, 1, walker)) 1390 return; 1391 } 1392 } 1393 1394 static const struct Qdisc_class_ops netem_class_ops = { 1395 .graft = netem_graft, 1396 .leaf = netem_leaf, 1397 .find = netem_find, 1398 .walk = netem_walk, 1399 .dump = netem_dump_class, 1400 }; 1401 1402 static struct Qdisc_ops netem_qdisc_ops __read_mostly = { 1403 .id = "netem", 1404 .cl_ops = &netem_class_ops, 1405 .priv_size = sizeof(struct netem_sched_data), 1406 .enqueue = netem_enqueue, 1407 .dequeue = netem_dequeue, 1408 .peek = qdisc_peek_dequeued, 1409 .init = netem_init, 1410 .reset = netem_reset, 1411 .destroy = netem_destroy, 1412 .change = netem_change, 1413 .dump = netem_dump, 1414 .dump_stats = netem_dump_stats, 1415 .owner = THIS_MODULE, 1416 }; 1417 MODULE_ALIAS_NET_SCH("netem"); 1418 1419 static int __init netem_module_init(void) 1420 { 1421 return register_qdisc(&netem_qdisc_ops); 1422 } 1423 static void __exit netem_module_exit(void) 1424 { 1425 unregister_qdisc(&netem_qdisc_ops); 1426 } 1427 module_init(netem_module_init) 1428 module_exit(netem_module_exit) 1429 MODULE_LICENSE("GPL"); 1430 MODULE_DESCRIPTION("Network characteristics emulator qdisc"); 1431