1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * net/sched/sch_netem.c Network emulator 4 * 5 * Many of the algorithms and ideas for this came from 6 * NIST Net which is not copyrighted. 7 * 8 * Authors: Stephen Hemminger <shemminger@osdl.org> 9 * Catalin(ux aka Dino) BOIE <catab at umbrella dot ro> 10 */ 11 12 #include <linux/mm.h> 13 #include <linux/module.h> 14 #include <linux/slab.h> 15 #include <linux/types.h> 16 #include <linux/kernel.h> 17 #include <linux/errno.h> 18 #include <linux/skbuff.h> 19 #include <linux/vmalloc.h> 20 #include <linux/prandom.h> 21 #include <linux/rtnetlink.h> 22 #include <linux/reciprocal_div.h> 23 #include <linux/rbtree.h> 24 25 #include <net/gso.h> 26 #include <net/netlink.h> 27 #include <net/pkt_sched.h> 28 #include <net/inet_ecn.h> 29 30 /* Network Emulation Queuing algorithm. 31 ==================================== 32 33 Sources: [1] Mark Carson, Darrin Santay, "NIST Net - A Linux-based 34 Network Emulation Tool 35 [2] Luigi Rizzo, DummyNet for FreeBSD 36 37 ---------------------------------------------------------------- 38 39 This started out as a simple way to delay outgoing packets to 40 test TCP but has grown to include most of the functionality 41 of a full blown network emulator like NISTnet. It can delay 42 packets and add random jitter (and correlation). The random 43 distribution can be loaded from a table as well to provide 44 normal, Pareto, or experimental curves. Packet loss, 45 duplication, and reordering can also be emulated. 46 47 This qdisc does not do classification that can be handled in 48 layering other disciplines. It does not need to do bandwidth 49 control either since that can be handled by using token 50 bucket or other rate control. 51 52 Correlated Loss Generator models 53 54 Added generation of correlated loss according to the 55 "Gilbert-Elliot" model, a 4-state markov model. 56 57 References: 58 [1] NetemCLG Home http://netgroup.uniroma2.it/NetemCLG 59 [2] S. Salsano, F. Ludovici, A. Ordine, "Definition of a general 60 and intuitive loss model for packet networks and its implementation 61 in the Netem module in the Linux kernel", available in [1] 62 63 Authors: Stefano Salsano <stefano.salsano at uniroma2.it 64 Fabio Ludovici <fabio.ludovici at yahoo.it> 65 */ 66 67 struct disttable { 68 u32 size; 69 s16 table[] __counted_by(size); 70 }; 71 72 /* Loss models */ 73 enum { 74 CLG_RANDOM, 75 CLG_4_STATES, 76 CLG_GILB_ELL, 77 }; 78 79 /* States in GE model */ 80 enum { 81 GOOD_STATE = 1, 82 BAD_STATE, 83 }; 84 85 /* States in 4 state model */ 86 enum { 87 TX_IN_GAP_PERIOD = 1, 88 TX_IN_BURST_PERIOD, 89 LOST_IN_GAP_PERIOD, 90 LOST_IN_BURST_PERIOD, 91 }; 92 93 struct netem_sched_data { 94 /* Cacheline 0: tfifo state and per-packet enqueue/dequeue scalars. */ 95 struct rb_root t_root; 96 struct sk_buff *t_head; 97 struct sk_buff *t_tail; 98 u32 t_len; 99 u32 counter; 100 s64 latency; 101 s64 jitter; 102 u64 rate; 103 u32 gap; 104 u32 loss; 105 106 /* Cacheline 1: zero-check scalars and correlation states. */ 107 u32 duplicate; 108 u32 reorder; 109 u32 corrupt; 110 u32 ecn; 111 struct crndstate { 112 u32 last; 113 u32 rho; 114 } delay_cor, loss_cor, dup_cor, reorder_cor, corrupt_cor; 115 u8 loss_model; 116 117 /* Cacheline 2: PRNG, distribution tables, slot dequeue state etc. */ 118 struct prng { 119 u64 seed; 120 struct rnd_state prng_state; 121 } prng; 122 struct disttable *delay_dist; 123 struct slotstate { 124 u64 slot_next; 125 s32 packets_left; 126 s32 bytes_left; 127 } slot; 128 struct disttable *slot_dist; 129 struct Qdisc *qdisc; 130 131 /* 132 * Warm: rate-shaping parameters (only read when rate != 0) and 133 * configuration-only fields. The fast path reads sch->limit, not 134 * q->limit. 135 */ 136 s32 packet_overhead; 137 u32 cell_size; 138 struct reciprocal_value cell_size_reciprocal; 139 s32 cell_overhead; 140 u32 limit; 141 142 /* Correlated Loss Generation models */ 143 struct clgstate { 144 /* 4-states and Gilbert-Elliot models */ 145 u32 a1; /* p13 for 4-states or p for GE */ 146 u32 a2; /* p31 for 4-states or r for GE */ 147 u32 a3; /* p32 for 4-states or h for GE */ 148 u32 a4; /* p14 for 4-states or 1-k for GE */ 149 u32 a5; /* p23 used only in 4-states */ 150 151 /* state of the Markov chain */ 152 u8 state; 153 } clg; 154 155 /* Impairment counters */ 156 u64 delayed; 157 u64 dropped; 158 u64 corrupted; 159 u64 duplicated; 160 u64 ecn_marked; 161 u64 reordered; 162 u64 allocation_errors; 163 164 /* Cold tail: slot reschedule config and the watchdog timer. */ 165 struct tc_netem_slot slot_config; 166 struct qdisc_watchdog watchdog; 167 }; 168 169 /* Time stamp put into socket buffer control block 170 * Only valid when skbs are in our internal t(ime)fifo queue. 171 * 172 * As skb->rbnode uses same storage than skb->next, skb->prev and skb->tstamp, 173 * and skb->next & skb->prev are scratch space for a qdisc, 174 * we save skb->tstamp value in skb->cb[] before destroying it. 175 */ 176 struct netem_skb_cb { 177 u64 time_to_send; 178 }; 179 180 static inline struct netem_skb_cb *netem_skb_cb(struct sk_buff *skb) 181 { 182 /* we assume we can use skb next/prev/tstamp as storage for rb_node */ 183 qdisc_cb_private_validate(skb, sizeof(struct netem_skb_cb)); 184 return (struct netem_skb_cb *)qdisc_skb_cb(skb)->data; 185 } 186 187 /* init_crandom - initialize correlated random number generator 188 * Use entropy source for initial seed. 189 */ 190 static void init_crandom(struct crndstate *state, unsigned long rho) 191 { 192 state->rho = rho; 193 state->last = get_random_u32(); 194 } 195 196 /* get_crandom - correlated random number generator 197 * Next number depends on last value. 198 * rho is scaled to avoid floating point. 199 */ 200 static u32 get_crandom(struct crndstate *state, struct prng *p) 201 { 202 u64 value, rho; 203 unsigned long answer; 204 struct rnd_state *s = &p->prng_state; 205 206 if (!state || state->rho == 0) /* no correlation */ 207 return prandom_u32_state(s); 208 209 value = prandom_u32_state(s); 210 rho = (u64)state->rho + 1; 211 answer = (value * ((1ull<<32) - rho) + state->last * rho) >> 32; 212 state->last = answer; 213 return answer; 214 } 215 216 /* loss_4state - 4-state model loss generator 217 * Generates losses according to the 4-state Markov chain adopted in 218 * the GI (General and Intuitive) loss model. 219 */ 220 static bool loss_4state(struct netem_sched_data *q) 221 { 222 struct clgstate *clg = &q->clg; 223 u32 rnd = prandom_u32_state(&q->prng.prng_state); 224 225 /* 226 * Makes a comparison between rnd and the transition 227 * probabilities outgoing from the current state, then decides the 228 * next state and if the next packet has to be transmitted or lost. 229 * The four states correspond to: 230 * TX_IN_GAP_PERIOD => successfully transmitted packets within a gap period 231 * LOST_IN_GAP_PERIOD => isolated losses within a gap period 232 * LOST_IN_BURST_PERIOD => lost packets within a burst period 233 * TX_IN_BURST_PERIOD => successfully transmitted packets within a burst period 234 */ 235 switch (clg->state) { 236 case TX_IN_GAP_PERIOD: 237 if (rnd < clg->a4) { 238 clg->state = LOST_IN_GAP_PERIOD; 239 return true; 240 } else if (rnd < clg->a1 + clg->a4) { 241 clg->state = LOST_IN_BURST_PERIOD; 242 return true; 243 } else { 244 clg->state = TX_IN_GAP_PERIOD; 245 } 246 247 break; 248 case TX_IN_BURST_PERIOD: 249 if (rnd < clg->a5) { 250 clg->state = LOST_IN_BURST_PERIOD; 251 return true; 252 } else { 253 clg->state = TX_IN_BURST_PERIOD; 254 } 255 256 break; 257 case LOST_IN_BURST_PERIOD: 258 if (rnd < clg->a3) 259 clg->state = TX_IN_BURST_PERIOD; 260 else if (rnd < clg->a2 + clg->a3) { 261 clg->state = TX_IN_GAP_PERIOD; 262 } else { 263 clg->state = LOST_IN_BURST_PERIOD; 264 return true; 265 } 266 break; 267 case LOST_IN_GAP_PERIOD: 268 clg->state = TX_IN_GAP_PERIOD; 269 break; 270 } 271 272 return false; 273 } 274 275 /* loss_gilb_ell - Gilbert-Elliot model loss generator 276 * Generates losses according to the Gilbert-Elliot loss model or 277 * its special cases (Gilbert or Simple Gilbert) 278 * 279 * Makes a comparison between random number and the transition 280 * probabilities outgoing from the current state, then decides the 281 * next state. A second random number is extracted and the comparison 282 * with the loss probability of the current state decides if the next 283 * packet will be transmitted or lost. 284 */ 285 static bool loss_gilb_ell(struct netem_sched_data *q) 286 { 287 struct clgstate *clg = &q->clg; 288 struct rnd_state *s = &q->prng.prng_state; 289 290 switch (clg->state) { 291 case GOOD_STATE: 292 if (prandom_u32_state(s) < clg->a1) 293 clg->state = BAD_STATE; 294 if (prandom_u32_state(s) < clg->a4) 295 return true; 296 break; 297 case BAD_STATE: 298 if (prandom_u32_state(s) < clg->a2) 299 clg->state = GOOD_STATE; 300 if (prandom_u32_state(s) > clg->a3) 301 return true; 302 } 303 304 return false; 305 } 306 307 static bool loss_event(struct netem_sched_data *q) 308 { 309 switch (q->loss_model) { 310 case CLG_RANDOM: 311 /* Random packet drop 0 => none, ~0 => all */ 312 return q->loss && q->loss >= get_crandom(&q->loss_cor, &q->prng); 313 314 case CLG_4_STATES: 315 /* 4state loss model algorithm (used also for GI model) 316 * Extracts a value from the markov 4 state loss generator, 317 * if it is 1 drops a packet and if needed writes the event in 318 * the kernel logs 319 */ 320 return loss_4state(q); 321 322 case CLG_GILB_ELL: 323 /* Gilbert-Elliot loss model algorithm 324 * Extracts a value from the Gilbert-Elliot loss generator, 325 * if it is 1 drops a packet and if needed writes the event in 326 * the kernel logs 327 */ 328 return loss_gilb_ell(q); 329 } 330 331 return false; /* not reached */ 332 } 333 334 335 /* tabledist - return a pseudo-randomly distributed value with mean mu and 336 * std deviation sigma. Uses table lookup to approximate the desired 337 * distribution, and a uniformly-distributed pseudo-random source. 338 */ 339 static s64 tabledist(s64 mu, s32 sigma, 340 struct crndstate *state, 341 struct prng *prng, 342 const struct disttable *dist) 343 { 344 s64 x; 345 long t; 346 u32 rnd; 347 348 if (sigma == 0) 349 return mu; 350 351 rnd = get_crandom(state, prng); 352 353 /* default uniform distribution */ 354 if (dist == NULL) 355 return ((rnd % (2 * (u32)sigma)) + mu) - sigma; 356 357 t = dist->table[rnd % dist->size]; 358 x = (sigma % NETEM_DIST_SCALE) * t; 359 if (x >= 0) 360 x += NETEM_DIST_SCALE/2; 361 else 362 x -= NETEM_DIST_SCALE/2; 363 364 return x / NETEM_DIST_SCALE + (sigma / NETEM_DIST_SCALE) * t + mu; 365 } 366 367 static u64 packet_time_ns(u64 len, const struct netem_sched_data *q) 368 { 369 len += q->packet_overhead; 370 371 if (q->cell_size) { 372 u32 cells = reciprocal_divide(len, q->cell_size_reciprocal); 373 374 if (len > cells * q->cell_size) /* extra cell needed for remainder */ 375 cells++; 376 len = cells * (q->cell_size + q->cell_overhead); 377 } 378 379 return div64_u64(len * NSEC_PER_SEC, q->rate); 380 } 381 382 static void tfifo_reset(struct Qdisc *sch) 383 { 384 struct netem_sched_data *q = qdisc_priv(sch); 385 struct rb_node *p = rb_first(&q->t_root); 386 387 while (p) { 388 struct sk_buff *skb = rb_to_skb(p); 389 390 p = rb_next(p); 391 rb_erase(&skb->rbnode, &q->t_root); 392 rtnl_kfree_skbs(skb, skb); 393 } 394 395 rtnl_kfree_skbs(q->t_head, q->t_tail); 396 q->t_head = NULL; 397 q->t_tail = NULL; 398 q->t_len = 0; 399 } 400 401 static void tfifo_enqueue(struct sk_buff *nskb, struct Qdisc *sch) 402 { 403 struct netem_sched_data *q = qdisc_priv(sch); 404 u64 tnext = netem_skb_cb(nskb)->time_to_send; 405 406 if (!q->t_tail || tnext >= netem_skb_cb(q->t_tail)->time_to_send) { 407 if (q->t_tail) 408 q->t_tail->next = nskb; 409 else 410 q->t_head = nskb; 411 q->t_tail = nskb; 412 } else { 413 struct rb_node **p = &q->t_root.rb_node, *parent = NULL; 414 415 while (*p) { 416 struct sk_buff *skb; 417 418 parent = *p; 419 skb = rb_to_skb(parent); 420 if (tnext >= netem_skb_cb(skb)->time_to_send) 421 p = &parent->rb_right; 422 else 423 p = &parent->rb_left; 424 } 425 rb_link_node(&nskb->rbnode, parent, p); 426 rb_insert_color(&nskb->rbnode, &q->t_root); 427 } 428 q->t_len++; 429 qdisc_qlen_inc(sch); 430 } 431 432 /* netem can't properly corrupt a megapacket (like we get from GSO), so instead 433 * when we statistically choose to corrupt one, we instead segment it, returning 434 * the first packet to be corrupted, and re-enqueue the remaining frames 435 */ 436 static struct sk_buff *netem_segment(struct sk_buff *skb, struct Qdisc *sch, 437 struct sk_buff **to_free) 438 { 439 struct sk_buff *segs; 440 netdev_features_t features = netif_skb_features(skb); 441 442 qdisc_skb_cb(skb)->pkt_segs = 1; 443 segs = skb_gso_segment(skb, features & ~NETIF_F_GSO_MASK); 444 445 if (IS_ERR_OR_NULL(segs)) { 446 qdisc_drop(skb, sch, to_free); 447 return NULL; 448 } 449 consume_skb(skb); 450 return segs; 451 } 452 453 /* 454 * Insert one skb into qdisc. 455 * Note: parent depends on return value to account for queue length. 456 * NET_XMIT_DROP: queue length didn't change. 457 * NET_XMIT_SUCCESS: one skb was queued. 458 */ 459 static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch, 460 struct sk_buff **to_free) 461 { 462 struct netem_sched_data *q = qdisc_priv(sch); 463 /* We don't fill cb now as skb_unshare() may invalidate it */ 464 struct netem_skb_cb *cb; 465 struct sk_buff *skb2 = NULL; 466 struct sk_buff *segs = NULL; 467 unsigned int prev_len = qdisc_pkt_len(skb); 468 int count = 1; 469 470 /* Do not fool qdisc_drop_all() */ 471 skb->prev = NULL; 472 473 /* Random duplication */ 474 if (q->duplicate && q->duplicate >= get_crandom(&q->dup_cor, &q->prng)) { 475 ++count; 476 WRITE_ONCE(q->duplicated, q->duplicated + 1); 477 } 478 479 /* Drop packet? */ 480 if (loss_event(q)) { 481 if (q->ecn && INET_ECN_set_ce(skb)) { 482 WRITE_ONCE(q->ecn_marked, q->ecn_marked + 1); 483 } else { 484 WRITE_ONCE(q->dropped, q->dropped + 1); 485 --count; 486 } 487 } 488 489 if (count == 0) { 490 qdisc_qstats_drop(sch); 491 __qdisc_drop(skb, to_free); 492 return NET_XMIT_SUCCESS | __NET_XMIT_BYPASS; 493 } 494 495 /* If a delay is expected, orphan the skb. (orphaning usually takes 496 * place at TX completion time, so _before_ the link transit delay) 497 */ 498 if (q->latency || q->jitter || q->rate) 499 skb_orphan_partial(skb); 500 501 /* 502 * If we need to duplicate packet, then clone it before 503 * original is modified. 504 */ 505 if (count > 1) { 506 skb2 = skb_clone(skb, GFP_ATOMIC); 507 if (!skb2) 508 WRITE_ONCE(q->allocation_errors, q->allocation_errors + 1); 509 } 510 511 /* 512 * Randomized packet corruption. 513 * Make copy if needed since we are modifying 514 * If packet is going to be hardware checksummed, then 515 * do it now in software before we mangle it. 516 */ 517 if (q->corrupt && q->corrupt >= get_crandom(&q->corrupt_cor, &q->prng)) { 518 if (skb_is_gso(skb)) { 519 skb = netem_segment(skb, sch, to_free); 520 if (!skb) { 521 WRITE_ONCE(q->allocation_errors, q->allocation_errors + 1); 522 goto finish_segs; 523 } 524 525 segs = skb->next; 526 skb_mark_not_on_list(skb); 527 qdisc_skb_cb(skb)->pkt_len = skb->len; 528 } 529 530 skb = skb_unshare(skb, GFP_ATOMIC); 531 if (unlikely(!skb)) { 532 WRITE_ONCE(q->allocation_errors, q->allocation_errors + 1); 533 qdisc_qstats_drop(sch); 534 goto finish_segs; 535 } 536 if (skb_linearize(skb) || 537 (skb->ip_summed == CHECKSUM_PARTIAL && skb_checksum_help(skb))) { 538 WRITE_ONCE(q->allocation_errors, q->allocation_errors + 1); 539 qdisc_drop(skb, sch, to_free); 540 skb = NULL; 541 goto finish_segs; 542 } 543 544 if (skb->len) { 545 u32 offset = get_random_u32_below(skb->len); 546 skb->data[offset] ^= 1 << get_random_u32_below(8); 547 WRITE_ONCE(q->corrupted, q->corrupted + 1); 548 } 549 } 550 551 if (unlikely(sch->q.qlen >= sch->limit)) { 552 /* re-link segs, so that qdisc_drop_all() frees them all */ 553 skb->next = segs; 554 qdisc_drop_all(skb, sch, to_free); 555 if (skb2) 556 __qdisc_drop(skb2, to_free); 557 return NET_XMIT_DROP; 558 } 559 560 /* 561 * If doing duplication then re-insert at top of the 562 * qdisc tree, since parent queuer expects that only one 563 * skb will be queued. 564 */ 565 if (skb2) { 566 struct Qdisc *rootq = qdisc_root_bh(sch); 567 u32 dupsave = q->duplicate; /* prevent duplicating a dup... */ 568 569 q->duplicate = 0; 570 rootq->enqueue(skb2, rootq, to_free); 571 q->duplicate = dupsave; 572 skb2 = NULL; 573 } 574 575 qdisc_qstats_backlog_inc(sch, skb); 576 577 cb = netem_skb_cb(skb); 578 if (q->gap == 0 || /* not doing reordering */ 579 q->counter < q->gap - 1 || /* inside last reordering gap */ 580 q->reorder < get_crandom(&q->reorder_cor, &q->prng)) { 581 u64 now; 582 s64 delay; 583 584 delay = tabledist(q->latency, q->jitter, 585 &q->delay_cor, &q->prng, q->delay_dist); 586 587 now = ktime_get_ns(); 588 589 if (q->rate) { 590 struct netem_skb_cb *last = NULL; 591 592 if (sch->q.tail) 593 last = netem_skb_cb(sch->q.tail); 594 if (q->t_root.rb_node) { 595 struct sk_buff *t_skb; 596 struct netem_skb_cb *t_last; 597 598 t_skb = skb_rb_last(&q->t_root); 599 t_last = netem_skb_cb(t_skb); 600 if (!last || 601 t_last->time_to_send > last->time_to_send) 602 last = t_last; 603 } 604 if (q->t_tail) { 605 struct netem_skb_cb *t_last = 606 netem_skb_cb(q->t_tail); 607 608 if (!last || 609 t_last->time_to_send > last->time_to_send) 610 last = t_last; 611 } 612 613 if (last) { 614 /* 615 * Last packet in queue is reference point (now), 616 * calculate this time bonus and subtract 617 * from delay. 618 */ 619 delay -= last->time_to_send - now; 620 delay = max_t(s64, 0, delay); 621 now = last->time_to_send; 622 } 623 624 delay += packet_time_ns(qdisc_pkt_len(skb), q); 625 } 626 627 cb->time_to_send = now + delay; 628 ++q->counter; 629 if (delay) 630 WRITE_ONCE(q->delayed, q->delayed + 1); 631 632 tfifo_enqueue(skb, sch); 633 } else { 634 /* 635 * Do re-ordering by putting one out of N packets at the front 636 * of the queue. 637 */ 638 WRITE_ONCE(q->reordered, q->reordered + 1); 639 cb->time_to_send = ktime_get_ns(); 640 q->counter = 0; 641 642 __qdisc_enqueue_head(skb, &sch->q); 643 sch->qstats.requeues++; 644 } 645 646 finish_segs: 647 if (skb2) 648 __qdisc_drop(skb2, to_free); 649 650 if (segs) { 651 unsigned int len, last_len; 652 int rc, nb; 653 654 len = skb ? skb->len : 0; 655 nb = skb ? 1 : 0; 656 657 while (segs) { 658 skb2 = segs->next; 659 skb_mark_not_on_list(segs); 660 qdisc_skb_cb(segs)->pkt_len = segs->len; 661 last_len = segs->len; 662 rc = qdisc_enqueue(segs, sch, to_free); 663 if (rc != NET_XMIT_SUCCESS) { 664 if (net_xmit_drop_count(rc)) 665 qdisc_qstats_drop(sch); 666 } else { 667 nb++; 668 len += last_len; 669 } 670 segs = skb2; 671 } 672 /* Parent qdiscs accounted for 1 skb of size @prev_len */ 673 qdisc_tree_reduce_backlog(sch, -(nb - 1), -(len - prev_len)); 674 } else if (!skb) { 675 return NET_XMIT_DROP; 676 } 677 return NET_XMIT_SUCCESS; 678 } 679 680 /* Delay the next round with a new future slot with a 681 * correct number of bytes and packets. 682 */ 683 684 static void get_slot_next(struct netem_sched_data *q, u64 now) 685 { 686 s64 next_delay; 687 688 if (!q->slot_dist) 689 next_delay = q->slot_config.min_delay + 690 mul_u64_u32_shr(q->slot_config.max_delay - q->slot_config.min_delay, 691 get_random_u32(), 32); 692 else 693 next_delay = tabledist(q->slot_config.dist_delay, 694 (s32)(q->slot_config.dist_jitter), 695 NULL, &q->prng, q->slot_dist); 696 697 q->slot.slot_next = now + next_delay; 698 q->slot.packets_left = q->slot_config.max_packets; 699 q->slot.bytes_left = q->slot_config.max_bytes; 700 } 701 702 static struct sk_buff *netem_peek(struct netem_sched_data *q) 703 { 704 struct sk_buff *skb = skb_rb_first(&q->t_root); 705 u64 t1, t2; 706 707 if (!skb) 708 return q->t_head; 709 if (!q->t_head) 710 return skb; 711 712 t1 = netem_skb_cb(skb)->time_to_send; 713 t2 = netem_skb_cb(q->t_head)->time_to_send; 714 if (t1 < t2) 715 return skb; 716 return q->t_head; 717 } 718 719 static void netem_erase_head(struct netem_sched_data *q, struct sk_buff *skb) 720 { 721 if (skb == q->t_head) { 722 q->t_head = skb->next; 723 if (!q->t_head) 724 q->t_tail = NULL; 725 } else { 726 rb_erase(&skb->rbnode, &q->t_root); 727 } 728 } 729 730 static struct sk_buff *netem_dequeue(struct Qdisc *sch) 731 { 732 struct netem_sched_data *q = qdisc_priv(sch); 733 struct sk_buff *skb; 734 735 tfifo_dequeue: 736 skb = __qdisc_dequeue_head(&sch->q); 737 if (skb) { 738 deliver: 739 qdisc_qstats_backlog_dec(sch, skb); 740 qdisc_bstats_update(sch, skb); 741 return skb; 742 } 743 skb = netem_peek(q); 744 if (skb) { 745 u64 time_to_send; 746 u64 now = ktime_get_ns(); 747 748 /* if more time remaining? */ 749 time_to_send = netem_skb_cb(skb)->time_to_send; 750 if (q->slot.slot_next && q->slot.slot_next < time_to_send) 751 get_slot_next(q, now); 752 753 if (time_to_send <= now && q->slot.slot_next <= now) { 754 netem_erase_head(q, skb); 755 q->t_len--; 756 skb->next = NULL; 757 skb->prev = NULL; 758 /* skb->dev shares skb->rbnode area, 759 * we need to restore its value. 760 */ 761 skb->dev = qdisc_dev(sch); 762 763 if (q->slot.slot_next) { 764 q->slot.packets_left--; 765 q->slot.bytes_left -= qdisc_pkt_len(skb); 766 if (q->slot.packets_left <= 0 || 767 q->slot.bytes_left <= 0) 768 get_slot_next(q, now); 769 } 770 771 if (q->qdisc) { 772 unsigned int pkt_len = qdisc_pkt_len(skb); 773 struct sk_buff *to_free = NULL; 774 int err; 775 776 err = qdisc_enqueue(skb, q->qdisc, &to_free); 777 kfree_skb_list(to_free); 778 if (err != NET_XMIT_SUCCESS) { 779 if (net_xmit_drop_count(err)) 780 qdisc_qstats_drop(sch); 781 qstats_backlog_sub(sch, pkt_len); 782 qdisc_qlen_dec(sch); 783 qdisc_tree_reduce_backlog(sch, 1, pkt_len); 784 } 785 goto tfifo_dequeue; 786 } 787 qdisc_qlen_dec(sch); 788 goto deliver; 789 } 790 791 if (q->qdisc) { 792 skb = q->qdisc->ops->dequeue(q->qdisc); 793 if (skb) { 794 qdisc_qlen_dec(sch); 795 goto deliver; 796 } 797 } 798 799 qdisc_watchdog_schedule_ns(&q->watchdog, 800 max(time_to_send, 801 q->slot.slot_next)); 802 } 803 804 if (q->qdisc) { 805 skb = q->qdisc->ops->dequeue(q->qdisc); 806 if (skb) { 807 qdisc_qlen_dec(sch); 808 goto deliver; 809 } 810 } 811 return NULL; 812 } 813 814 static void netem_reset(struct Qdisc *sch) 815 { 816 struct netem_sched_data *q = qdisc_priv(sch); 817 818 qdisc_reset_queue(sch); 819 tfifo_reset(sch); 820 if (q->qdisc) 821 qdisc_reset(q->qdisc); 822 qdisc_watchdog_cancel(&q->watchdog); 823 } 824 825 static void dist_free(struct disttable *d) 826 { 827 kvfree(d); 828 } 829 830 /* 831 * Distribution data is a variable size payload containing 832 * signed 16 bit values. 833 */ 834 835 static int get_dist_table(struct disttable **tbl, const struct nlattr *attr) 836 { 837 size_t n = nla_len(attr)/sizeof(__s16); 838 const __s16 *data = nla_data(attr); 839 struct disttable *d; 840 int i; 841 842 if (!n || n > NETEM_DIST_MAX) 843 return -EINVAL; 844 845 d = kvmalloc_flex(*d, table, n); 846 if (!d) 847 return -ENOMEM; 848 849 d->size = n; 850 for (i = 0; i < n; i++) 851 d->table[i] = data[i]; 852 853 *tbl = d; 854 return 0; 855 } 856 857 static int validate_time(const struct nlattr *attr, const char *name, 858 struct netlink_ext_ack *extack) 859 { 860 if (nla_get_s64(attr) < 0) { 861 NL_SET_ERR_MSG_ATTR_FMT(extack, attr, "negative %s", name); 862 return -EINVAL; 863 } 864 return 0; 865 } 866 867 static int validate_slot(const struct nlattr *attr, struct netlink_ext_ack *extack) 868 { 869 const struct tc_netem_slot *c = nla_data(attr); 870 871 if (c->min_delay < 0 || c->max_delay < 0) { 872 NL_SET_ERR_MSG_ATTR(extack, attr, "negative slot delay"); 873 return -EINVAL; 874 } 875 if (c->min_delay > c->max_delay) { 876 NL_SET_ERR_MSG_ATTR(extack, attr, "slot min delay greater than max delay"); 877 return -EINVAL; 878 } 879 if (c->dist_delay < 0 || c->dist_jitter < 0) { 880 NL_SET_ERR_MSG_ATTR(extack, attr, "negative dist delay"); 881 return -EINVAL; 882 } 883 if (c->max_packets < 0 || c->max_bytes < 0) { 884 NL_SET_ERR_MSG_ATTR(extack, attr, "negative slot limit"); 885 return -EINVAL; 886 } 887 return 0; 888 } 889 890 static void get_slot(struct netem_sched_data *q, const struct nlattr *attr) 891 { 892 const struct tc_netem_slot *c = nla_data(attr); 893 894 q->slot_config = *c; 895 if (q->slot_config.max_packets == 0) 896 q->slot_config.max_packets = INT_MAX; 897 if (q->slot_config.max_bytes == 0) 898 q->slot_config.max_bytes = INT_MAX; 899 900 /* capping dist_jitter to the range acceptable by tabledist() */ 901 q->slot_config.dist_jitter = min_t(__s64, INT_MAX, abs(q->slot_config.dist_jitter)); 902 903 q->slot.packets_left = q->slot_config.max_packets; 904 q->slot.bytes_left = q->slot_config.max_bytes; 905 if (q->slot_config.min_delay | q->slot_config.max_delay | 906 q->slot_config.dist_jitter) 907 q->slot.slot_next = ktime_get_ns(); 908 else 909 q->slot.slot_next = 0; 910 } 911 912 static void get_correlation(struct netem_sched_data *q, const struct nlattr *attr) 913 { 914 const struct tc_netem_corr *c = nla_data(attr); 915 916 init_crandom(&q->delay_cor, c->delay_corr); 917 init_crandom(&q->loss_cor, c->loss_corr); 918 init_crandom(&q->dup_cor, c->dup_corr); 919 } 920 921 static void get_reorder(struct netem_sched_data *q, const struct nlattr *attr) 922 { 923 const struct tc_netem_reorder *r = nla_data(attr); 924 925 q->reorder = r->probability; 926 init_crandom(&q->reorder_cor, r->correlation); 927 } 928 929 static void get_corrupt(struct netem_sched_data *q, const struct nlattr *attr) 930 { 931 const struct tc_netem_corrupt *r = nla_data(attr); 932 933 q->corrupt = r->probability; 934 init_crandom(&q->corrupt_cor, r->correlation); 935 } 936 937 static void get_rate(struct netem_sched_data *q, const struct nlattr *attr) 938 { 939 const struct tc_netem_rate *r = nla_data(attr); 940 941 q->rate = r->rate; 942 q->packet_overhead = r->packet_overhead; 943 q->cell_size = r->cell_size; 944 q->cell_overhead = r->cell_overhead; 945 if (q->cell_size) 946 q->cell_size_reciprocal = reciprocal_value(q->cell_size); 947 else 948 q->cell_size_reciprocal = (struct reciprocal_value) { 0 }; 949 } 950 951 static int get_loss_clg(struct netem_sched_data *q, const struct nlattr *attr, 952 struct netlink_ext_ack *extack) 953 { 954 const struct nlattr *la; 955 int rem; 956 957 nla_for_each_nested(la, attr, rem) { 958 u16 type = nla_type(la); 959 960 switch (type) { 961 case NETEM_LOSS_GI: { 962 const struct tc_netem_gimodel *gi = nla_data(la); 963 964 if (nla_len(la) < sizeof(struct tc_netem_gimodel)) { 965 NL_SET_ERR_MSG_ATTR(extack, la, 966 "netem: incorrect gi model size"); 967 return -EINVAL; 968 } 969 970 q->loss_model = CLG_4_STATES; 971 972 q->clg.state = TX_IN_GAP_PERIOD; 973 q->clg.a1 = gi->p13; 974 q->clg.a2 = gi->p31; 975 q->clg.a3 = gi->p32; 976 q->clg.a4 = gi->p14; 977 q->clg.a5 = gi->p23; 978 break; 979 } 980 981 case NETEM_LOSS_GE: { 982 const struct tc_netem_gemodel *ge = nla_data(la); 983 984 if (nla_len(la) < sizeof(struct tc_netem_gemodel)) { 985 NL_SET_ERR_MSG_ATTR(extack, la, 986 "netem: incorrect ge model size"); 987 return -EINVAL; 988 } 989 990 q->loss_model = CLG_GILB_ELL; 991 q->clg.state = GOOD_STATE; 992 q->clg.a1 = ge->p; 993 q->clg.a2 = ge->r; 994 q->clg.a3 = ge->h; 995 q->clg.a4 = ge->k1; 996 break; 997 } 998 999 default: 1000 NL_SET_ERR_MSG_ATTR_FMT(extack, la, 1001 "netem: unknown loss type %u", type); 1002 return -EINVAL; 1003 } 1004 } 1005 1006 return 0; 1007 } 1008 1009 static const struct nla_policy netem_policy[TCA_NETEM_MAX + 1] = { 1010 [TCA_NETEM_CORR] = { .len = sizeof(struct tc_netem_corr) }, 1011 [TCA_NETEM_REORDER] = { .len = sizeof(struct tc_netem_reorder) }, 1012 [TCA_NETEM_CORRUPT] = { .len = sizeof(struct tc_netem_corrupt) }, 1013 [TCA_NETEM_RATE] = { .len = sizeof(struct tc_netem_rate) }, 1014 [TCA_NETEM_LOSS] = { .type = NLA_NESTED }, 1015 [TCA_NETEM_ECN] = { .type = NLA_U32 }, 1016 [TCA_NETEM_RATE64] = { .type = NLA_U64 }, 1017 [TCA_NETEM_LATENCY64] = { .type = NLA_S64 }, 1018 [TCA_NETEM_JITTER64] = { .type = NLA_S64 }, 1019 [TCA_NETEM_SLOT] = { .len = sizeof(struct tc_netem_slot) }, 1020 [TCA_NETEM_PRNG_SEED] = { .type = NLA_U64 }, 1021 }; 1022 1023 static int parse_attr(struct nlattr *tb[], int maxtype, struct nlattr *nla, 1024 const struct nla_policy *policy, int len, 1025 struct netlink_ext_ack *extack) 1026 { 1027 int nested_len = nla_len(nla) - NLA_ALIGN(len); 1028 1029 if (nested_len < 0) { 1030 NL_SET_ERR_MSG_FMT(extack, "netem: invalid attributes len %d < %d", 1031 nla_len(nla), NLA_ALIGN(len)); 1032 return -EINVAL; 1033 } 1034 1035 if (nested_len >= nla_attr_size(0)) 1036 return nla_parse_deprecated(tb, maxtype, 1037 nla_data(nla) + NLA_ALIGN(len), 1038 nested_len, policy, extack); 1039 1040 memset(tb, 0, sizeof(struct nlattr *) * (maxtype + 1)); 1041 return 0; 1042 } 1043 1044 static const struct Qdisc_class_ops netem_class_ops; 1045 1046 static int check_netem_in_tree(struct Qdisc *sch, bool duplicates, 1047 struct netlink_ext_ack *extack) 1048 { 1049 struct Qdisc *root, *q; 1050 unsigned int i; 1051 1052 root = qdisc_root_sleeping(sch); 1053 1054 if (sch != root && root->ops->cl_ops == &netem_class_ops) { 1055 if (duplicates || 1056 ((struct netem_sched_data *)qdisc_priv(root))->duplicate) 1057 goto err; 1058 } 1059 1060 if (!qdisc_dev(root)) 1061 return 0; 1062 1063 hash_for_each(qdisc_dev(root)->qdisc_hash, i, q, hash) { 1064 if (sch != q && q->ops->cl_ops == &netem_class_ops) { 1065 if (duplicates || 1066 ((struct netem_sched_data *)qdisc_priv(q))->duplicate) 1067 goto err; 1068 } 1069 } 1070 1071 return 0; 1072 1073 err: 1074 NL_SET_ERR_MSG(extack, 1075 "netem: cannot mix duplicating netems with other netems in tree"); 1076 return -EINVAL; 1077 } 1078 1079 /* Parse netlink message to set options */ 1080 static int netem_change(struct Qdisc *sch, struct nlattr *opt, 1081 struct netlink_ext_ack *extack) 1082 { 1083 struct netem_sched_data *q = qdisc_priv(sch); 1084 struct nlattr *tb[TCA_NETEM_MAX + 1]; 1085 struct disttable *delay_dist = NULL; 1086 struct disttable *slot_dist = NULL; 1087 struct tc_netem_qopt *qopt; 1088 struct clgstate old_clg; 1089 int old_loss_model = CLG_RANDOM; 1090 int ret; 1091 1092 qopt = nla_data(opt); 1093 ret = parse_attr(tb, TCA_NETEM_MAX, opt, netem_policy, sizeof(*qopt), extack); 1094 if (ret < 0) 1095 return ret; 1096 1097 if (tb[TCA_NETEM_DELAY_DIST]) { 1098 ret = get_dist_table(&delay_dist, tb[TCA_NETEM_DELAY_DIST]); 1099 if (ret) 1100 goto table_free; 1101 } 1102 1103 if (tb[TCA_NETEM_SLOT_DIST]) { 1104 ret = get_dist_table(&slot_dist, tb[TCA_NETEM_SLOT_DIST]); 1105 if (ret) 1106 goto table_free; 1107 } 1108 1109 if (tb[TCA_NETEM_SLOT]) { 1110 ret = validate_slot(tb[TCA_NETEM_SLOT], extack); 1111 if (ret) 1112 goto table_free; 1113 } 1114 1115 if (tb[TCA_NETEM_LATENCY64]) { 1116 ret = validate_time(tb[TCA_NETEM_LATENCY64], "latency", extack); 1117 if (ret) 1118 goto table_free; 1119 } 1120 1121 if (tb[TCA_NETEM_JITTER64]) { 1122 ret = validate_time(tb[TCA_NETEM_JITTER64], "jitter", extack); 1123 if (ret) 1124 goto table_free; 1125 } 1126 1127 sch_tree_lock(sch); 1128 /* backup q->clg and q->loss_model */ 1129 old_clg = q->clg; 1130 old_loss_model = q->loss_model; 1131 1132 if (tb[TCA_NETEM_LOSS]) { 1133 ret = get_loss_clg(q, tb[TCA_NETEM_LOSS], extack); 1134 if (ret) { 1135 q->loss_model = old_loss_model; 1136 q->clg = old_clg; 1137 goto unlock; 1138 } 1139 } else { 1140 q->loss_model = CLG_RANDOM; 1141 } 1142 1143 if (delay_dist) 1144 swap(q->delay_dist, delay_dist); 1145 if (slot_dist) 1146 swap(q->slot_dist, slot_dist); 1147 sch->limit = qopt->limit; 1148 1149 q->latency = PSCHED_TICKS2NS(qopt->latency); 1150 q->jitter = PSCHED_TICKS2NS(qopt->jitter); 1151 q->limit = qopt->limit; 1152 q->gap = qopt->gap; 1153 q->counter = 0; 1154 q->loss = qopt->loss; 1155 1156 ret = check_netem_in_tree(sch, qopt->duplicate, extack); 1157 if (ret) 1158 goto unlock; 1159 1160 q->duplicate = qopt->duplicate; 1161 1162 /* for compatibility with earlier versions. 1163 * if gap is set, need to assume 100% probability 1164 */ 1165 if (q->gap) 1166 q->reorder = ~0; 1167 1168 if (tb[TCA_NETEM_CORR]) 1169 get_correlation(q, tb[TCA_NETEM_CORR]); 1170 1171 if (tb[TCA_NETEM_REORDER]) 1172 get_reorder(q, tb[TCA_NETEM_REORDER]); 1173 1174 if (tb[TCA_NETEM_CORRUPT]) 1175 get_corrupt(q, tb[TCA_NETEM_CORRUPT]); 1176 1177 if (tb[TCA_NETEM_RATE]) 1178 get_rate(q, tb[TCA_NETEM_RATE]); 1179 1180 if (tb[TCA_NETEM_RATE64]) 1181 q->rate = max_t(u64, q->rate, 1182 nla_get_u64(tb[TCA_NETEM_RATE64])); 1183 1184 if (tb[TCA_NETEM_LATENCY64]) 1185 q->latency = nla_get_s64(tb[TCA_NETEM_LATENCY64]); 1186 1187 if (tb[TCA_NETEM_JITTER64]) 1188 q->jitter = nla_get_s64(tb[TCA_NETEM_JITTER64]); 1189 1190 if (tb[TCA_NETEM_ECN]) 1191 q->ecn = nla_get_u32(tb[TCA_NETEM_ECN]); 1192 1193 if (tb[TCA_NETEM_SLOT]) 1194 get_slot(q, tb[TCA_NETEM_SLOT]); 1195 1196 /* capping jitter to the range acceptable by tabledist() */ 1197 q->jitter = min_t(s64, abs(q->jitter), INT_MAX); 1198 1199 if (tb[TCA_NETEM_PRNG_SEED]) { 1200 q->prng.seed = nla_get_u64(tb[TCA_NETEM_PRNG_SEED]); 1201 prandom_seed_state(&q->prng.prng_state, q->prng.seed); 1202 } 1203 1204 unlock: 1205 sch_tree_unlock(sch); 1206 1207 table_free: 1208 dist_free(delay_dist); 1209 dist_free(slot_dist); 1210 return ret; 1211 } 1212 1213 static int netem_init(struct Qdisc *sch, struct nlattr *opt, 1214 struct netlink_ext_ack *extack) 1215 { 1216 struct netem_sched_data *q = qdisc_priv(sch); 1217 1218 qdisc_watchdog_init(&q->watchdog, sch); 1219 1220 if (!opt) 1221 return -EINVAL; 1222 1223 q->loss_model = CLG_RANDOM; 1224 q->prng.seed = get_random_u64(); 1225 prandom_seed_state(&q->prng.prng_state, q->prng.seed); 1226 1227 return netem_change(sch, opt, extack); 1228 } 1229 1230 static void netem_destroy(struct Qdisc *sch) 1231 { 1232 struct netem_sched_data *q = qdisc_priv(sch); 1233 1234 qdisc_watchdog_cancel(&q->watchdog); 1235 if (q->qdisc) 1236 qdisc_put(q->qdisc); 1237 dist_free(q->delay_dist); 1238 dist_free(q->slot_dist); 1239 } 1240 1241 static int dump_loss_model(const struct netem_sched_data *q, 1242 struct sk_buff *skb) 1243 { 1244 struct nlattr *nest; 1245 1246 nest = nla_nest_start_noflag(skb, TCA_NETEM_LOSS); 1247 if (nest == NULL) 1248 goto nla_put_failure; 1249 1250 switch (q->loss_model) { 1251 case CLG_RANDOM: 1252 /* legacy loss model */ 1253 nla_nest_cancel(skb, nest); 1254 return 0; /* no data */ 1255 1256 case CLG_4_STATES: { 1257 struct tc_netem_gimodel gi = { 1258 .p13 = q->clg.a1, 1259 .p31 = q->clg.a2, 1260 .p32 = q->clg.a3, 1261 .p14 = q->clg.a4, 1262 .p23 = q->clg.a5, 1263 }; 1264 1265 if (nla_put(skb, NETEM_LOSS_GI, sizeof(gi), &gi)) 1266 goto nla_put_failure; 1267 break; 1268 } 1269 case CLG_GILB_ELL: { 1270 struct tc_netem_gemodel ge = { 1271 .p = q->clg.a1, 1272 .r = q->clg.a2, 1273 .h = q->clg.a3, 1274 .k1 = q->clg.a4, 1275 }; 1276 1277 if (nla_put(skb, NETEM_LOSS_GE, sizeof(ge), &ge)) 1278 goto nla_put_failure; 1279 break; 1280 } 1281 } 1282 1283 nla_nest_end(skb, nest); 1284 return 0; 1285 1286 nla_put_failure: 1287 nla_nest_cancel(skb, nest); 1288 return -1; 1289 } 1290 1291 static int netem_dump(struct Qdisc *sch, struct sk_buff *skb) 1292 { 1293 const struct netem_sched_data *q = qdisc_priv(sch); 1294 struct nlattr *nla = (struct nlattr *) skb_tail_pointer(skb); 1295 struct tc_netem_qopt qopt; 1296 struct tc_netem_corr cor; 1297 struct tc_netem_reorder reorder; 1298 struct tc_netem_corrupt corrupt; 1299 struct tc_netem_rate rate; 1300 struct tc_netem_slot slot; 1301 1302 qopt.latency = min_t(psched_time_t, PSCHED_NS2TICKS(q->latency), 1303 UINT_MAX); 1304 qopt.jitter = min_t(psched_time_t, PSCHED_NS2TICKS(q->jitter), 1305 UINT_MAX); 1306 qopt.limit = q->limit; 1307 qopt.loss = q->loss; 1308 qopt.gap = q->gap; 1309 qopt.duplicate = q->duplicate; 1310 if (nla_put(skb, TCA_OPTIONS, sizeof(qopt), &qopt)) 1311 goto nla_put_failure; 1312 1313 if (nla_put(skb, TCA_NETEM_LATENCY64, sizeof(q->latency), &q->latency)) 1314 goto nla_put_failure; 1315 1316 if (nla_put(skb, TCA_NETEM_JITTER64, sizeof(q->jitter), &q->jitter)) 1317 goto nla_put_failure; 1318 1319 cor.delay_corr = q->delay_cor.rho; 1320 cor.loss_corr = q->loss_cor.rho; 1321 cor.dup_corr = q->dup_cor.rho; 1322 if (nla_put(skb, TCA_NETEM_CORR, sizeof(cor), &cor)) 1323 goto nla_put_failure; 1324 1325 reorder.probability = q->reorder; 1326 reorder.correlation = q->reorder_cor.rho; 1327 if (nla_put(skb, TCA_NETEM_REORDER, sizeof(reorder), &reorder)) 1328 goto nla_put_failure; 1329 1330 corrupt.probability = q->corrupt; 1331 corrupt.correlation = q->corrupt_cor.rho; 1332 if (nla_put(skb, TCA_NETEM_CORRUPT, sizeof(corrupt), &corrupt)) 1333 goto nla_put_failure; 1334 1335 if (q->rate >= (1ULL << 32)) { 1336 if (nla_put_u64_64bit(skb, TCA_NETEM_RATE64, q->rate, 1337 TCA_NETEM_PAD)) 1338 goto nla_put_failure; 1339 rate.rate = ~0U; 1340 } else { 1341 rate.rate = q->rate; 1342 } 1343 rate.packet_overhead = q->packet_overhead; 1344 rate.cell_size = q->cell_size; 1345 rate.cell_overhead = q->cell_overhead; 1346 if (nla_put(skb, TCA_NETEM_RATE, sizeof(rate), &rate)) 1347 goto nla_put_failure; 1348 1349 if (q->ecn && nla_put_u32(skb, TCA_NETEM_ECN, q->ecn)) 1350 goto nla_put_failure; 1351 1352 if (dump_loss_model(q, skb) != 0) 1353 goto nla_put_failure; 1354 1355 if (q->slot_config.min_delay | q->slot_config.max_delay | 1356 q->slot_config.dist_jitter) { 1357 slot = q->slot_config; 1358 if (slot.max_packets == INT_MAX) 1359 slot.max_packets = 0; 1360 if (slot.max_bytes == INT_MAX) 1361 slot.max_bytes = 0; 1362 if (nla_put(skb, TCA_NETEM_SLOT, sizeof(slot), &slot)) 1363 goto nla_put_failure; 1364 } 1365 1366 if (nla_put_u64_64bit(skb, TCA_NETEM_PRNG_SEED, q->prng.seed, 1367 TCA_NETEM_PAD)) 1368 goto nla_put_failure; 1369 1370 return nla_nest_end(skb, nla); 1371 1372 nla_put_failure: 1373 nlmsg_trim(skb, nla); 1374 return -1; 1375 } 1376 1377 static int netem_dump_stats(struct Qdisc *sch, struct gnet_dump *d) 1378 { 1379 struct netem_sched_data *q = qdisc_priv(sch); 1380 struct tc_netem_xstats st = { 1381 .delayed = READ_ONCE(q->delayed), 1382 .dropped = READ_ONCE(q->dropped), 1383 .corrupted = READ_ONCE(q->corrupted), 1384 .duplicated = READ_ONCE(q->duplicated), 1385 .reordered = READ_ONCE(q->reordered), 1386 .ecn_marked = READ_ONCE(q->ecn_marked), 1387 .allocation_errors = READ_ONCE(q->allocation_errors), 1388 }; 1389 1390 return gnet_stats_copy_app(d, &st, sizeof(st)); 1391 } 1392 1393 static int netem_dump_class(struct Qdisc *sch, unsigned long cl, 1394 struct sk_buff *skb, struct tcmsg *tcm) 1395 { 1396 struct netem_sched_data *q = qdisc_priv(sch); 1397 1398 if (cl != 1 || !q->qdisc) /* only one class */ 1399 return -ENOENT; 1400 1401 tcm->tcm_handle |= TC_H_MIN(1); 1402 tcm->tcm_info = q->qdisc->handle; 1403 1404 return 0; 1405 } 1406 1407 static int netem_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, 1408 struct Qdisc **old, struct netlink_ext_ack *extack) 1409 { 1410 struct netem_sched_data *q = qdisc_priv(sch); 1411 1412 *old = qdisc_replace(sch, new, &q->qdisc); 1413 return 0; 1414 } 1415 1416 static struct Qdisc *netem_leaf(struct Qdisc *sch, unsigned long arg) 1417 { 1418 struct netem_sched_data *q = qdisc_priv(sch); 1419 return q->qdisc; 1420 } 1421 1422 static unsigned long netem_find(struct Qdisc *sch, u32 classid) 1423 { 1424 return 1; 1425 } 1426 1427 static void netem_walk(struct Qdisc *sch, struct qdisc_walker *walker) 1428 { 1429 if (!walker->stop) { 1430 if (!tc_qdisc_stats_dump(sch, 1, walker)) 1431 return; 1432 } 1433 } 1434 1435 static const struct Qdisc_class_ops netem_class_ops = { 1436 .graft = netem_graft, 1437 .leaf = netem_leaf, 1438 .find = netem_find, 1439 .walk = netem_walk, 1440 .dump = netem_dump_class, 1441 }; 1442 1443 static struct Qdisc_ops netem_qdisc_ops __read_mostly = { 1444 .id = "netem", 1445 .cl_ops = &netem_class_ops, 1446 .priv_size = sizeof(struct netem_sched_data), 1447 .enqueue = netem_enqueue, 1448 .dequeue = netem_dequeue, 1449 .peek = qdisc_peek_dequeued, 1450 .init = netem_init, 1451 .reset = netem_reset, 1452 .destroy = netem_destroy, 1453 .change = netem_change, 1454 .dump = netem_dump, 1455 .dump_stats = netem_dump_stats, 1456 .owner = THIS_MODULE, 1457 }; 1458 MODULE_ALIAS_NET_SCH("netem"); 1459 1460 static int __init netem_module_init(void) 1461 { 1462 return register_qdisc(&netem_qdisc_ops); 1463 } 1464 static void __exit netem_module_exit(void) 1465 { 1466 unregister_qdisc(&netem_qdisc_ops); 1467 } 1468 module_init(netem_module_init) 1469 module_exit(netem_module_exit) 1470 MODULE_LICENSE("GPL"); 1471 MODULE_DESCRIPTION("Network characteristics emulator qdisc"); 1472