xref: /linux/net/sched/sch_netem.c (revision 8be4d31cb8aaeea27bde4b7ddb26e28a89062ebf)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * net/sched/sch_netem.c	Network emulator
4  *
5  *  		Many of the algorithms and ideas for this came from
6  *		NIST Net which is not copyrighted.
7  *
8  * Authors:	Stephen Hemminger <shemminger@osdl.org>
9  *		Catalin(ux aka Dino) BOIE <catab at umbrella dot ro>
10  */
11 
12 #include <linux/mm.h>
13 #include <linux/module.h>
14 #include <linux/slab.h>
15 #include <linux/types.h>
16 #include <linux/kernel.h>
17 #include <linux/errno.h>
18 #include <linux/skbuff.h>
19 #include <linux/vmalloc.h>
20 #include <linux/prandom.h>
21 #include <linux/rtnetlink.h>
22 #include <linux/reciprocal_div.h>
23 #include <linux/rbtree.h>
24 
25 #include <net/gso.h>
26 #include <net/netlink.h>
27 #include <net/pkt_sched.h>
28 #include <net/inet_ecn.h>
29 
30 #define VERSION "1.3"
31 
32 /*	Network Emulation Queuing algorithm.
33 	====================================
34 
35 	Sources: [1] Mark Carson, Darrin Santay, "NIST Net - A Linux-based
36 		 Network Emulation Tool
37 		 [2] Luigi Rizzo, DummyNet for FreeBSD
38 
39 	 ----------------------------------------------------------------
40 
41 	 This started out as a simple way to delay outgoing packets to
42 	 test TCP but has grown to include most of the functionality
43 	 of a full blown network emulator like NISTnet. It can delay
44 	 packets and add random jitter (and correlation). The random
45 	 distribution can be loaded from a table as well to provide
46 	 normal, Pareto, or experimental curves. Packet loss,
47 	 duplication, and reordering can also be emulated.
48 
49 	 This qdisc does not do classification that can be handled in
50 	 layering other disciplines.  It does not need to do bandwidth
51 	 control either since that can be handled by using token
52 	 bucket or other rate control.
53 
54      Correlated Loss Generator models
55 
56 	Added generation of correlated loss according to the
57 	"Gilbert-Elliot" model, a 4-state markov model.
58 
59 	References:
60 	[1] NetemCLG Home http://netgroup.uniroma2.it/NetemCLG
61 	[2] S. Salsano, F. Ludovici, A. Ordine, "Definition of a general
62 	and intuitive loss model for packet networks and its implementation
63 	in the Netem module in the Linux kernel", available in [1]
64 
65 	Authors: Stefano Salsano <stefano.salsano at uniroma2.it
66 		 Fabio Ludovici <fabio.ludovici at yahoo.it>
67 */
68 
69 struct disttable {
70 	u32  size;
71 	s16 table[] __counted_by(size);
72 };
73 
74 struct netem_sched_data {
75 	/* internal t(ime)fifo qdisc uses t_root and sch->limit */
76 	struct rb_root t_root;
77 
78 	/* a linear queue; reduces rbtree rebalancing when jitter is low */
79 	struct sk_buff	*t_head;
80 	struct sk_buff	*t_tail;
81 
82 	u32 t_len;
83 
84 	/* optional qdisc for classful handling (NULL at netem init) */
85 	struct Qdisc	*qdisc;
86 
87 	struct qdisc_watchdog watchdog;
88 
89 	s64 latency;
90 	s64 jitter;
91 
92 	u32 loss;
93 	u32 ecn;
94 	u32 limit;
95 	u32 counter;
96 	u32 gap;
97 	u32 duplicate;
98 	u32 reorder;
99 	u32 corrupt;
100 	u64 rate;
101 	s32 packet_overhead;
102 	u32 cell_size;
103 	struct reciprocal_value cell_size_reciprocal;
104 	s32 cell_overhead;
105 
106 	struct crndstate {
107 		u32 last;
108 		u32 rho;
109 	} delay_cor, loss_cor, dup_cor, reorder_cor, corrupt_cor;
110 
111 	struct prng  {
112 		u64 seed;
113 		struct rnd_state prng_state;
114 	} prng;
115 
116 	struct disttable *delay_dist;
117 
118 	enum  {
119 		CLG_RANDOM,
120 		CLG_4_STATES,
121 		CLG_GILB_ELL,
122 	} loss_model;
123 
124 	enum {
125 		TX_IN_GAP_PERIOD = 1,
126 		TX_IN_BURST_PERIOD,
127 		LOST_IN_GAP_PERIOD,
128 		LOST_IN_BURST_PERIOD,
129 	} _4_state_model;
130 
131 	enum {
132 		GOOD_STATE = 1,
133 		BAD_STATE,
134 	} GE_state_model;
135 
136 	/* Correlated Loss Generation models */
137 	struct clgstate {
138 		/* state of the Markov chain */
139 		u8 state;
140 
141 		/* 4-states and Gilbert-Elliot models */
142 		u32 a1;	/* p13 for 4-states or p for GE */
143 		u32 a2;	/* p31 for 4-states or r for GE */
144 		u32 a3;	/* p32 for 4-states or h for GE */
145 		u32 a4;	/* p14 for 4-states or 1-k for GE */
146 		u32 a5; /* p23 used only in 4-states */
147 	} clg;
148 
149 	struct tc_netem_slot slot_config;
150 	struct slotstate {
151 		u64 slot_next;
152 		s32 packets_left;
153 		s32 bytes_left;
154 	} slot;
155 
156 	struct disttable *slot_dist;
157 };
158 
159 /* Time stamp put into socket buffer control block
160  * Only valid when skbs are in our internal t(ime)fifo queue.
161  *
162  * As skb->rbnode uses same storage than skb->next, skb->prev and skb->tstamp,
163  * and skb->next & skb->prev are scratch space for a qdisc,
164  * we save skb->tstamp value in skb->cb[] before destroying it.
165  */
166 struct netem_skb_cb {
167 	u64	        time_to_send;
168 };
169 
netem_skb_cb(struct sk_buff * skb)170 static inline struct netem_skb_cb *netem_skb_cb(struct sk_buff *skb)
171 {
172 	/* we assume we can use skb next/prev/tstamp as storage for rb_node */
173 	qdisc_cb_private_validate(skb, sizeof(struct netem_skb_cb));
174 	return (struct netem_skb_cb *)qdisc_skb_cb(skb)->data;
175 }
176 
177 /* init_crandom - initialize correlated random number generator
178  * Use entropy source for initial seed.
179  */
init_crandom(struct crndstate * state,unsigned long rho)180 static void init_crandom(struct crndstate *state, unsigned long rho)
181 {
182 	state->rho = rho;
183 	state->last = get_random_u32();
184 }
185 
186 /* get_crandom - correlated random number generator
187  * Next number depends on last value.
188  * rho is scaled to avoid floating point.
189  */
get_crandom(struct crndstate * state,struct prng * p)190 static u32 get_crandom(struct crndstate *state, struct prng *p)
191 {
192 	u64 value, rho;
193 	unsigned long answer;
194 	struct rnd_state *s = &p->prng_state;
195 
196 	if (!state || state->rho == 0)	/* no correlation */
197 		return prandom_u32_state(s);
198 
199 	value = prandom_u32_state(s);
200 	rho = (u64)state->rho + 1;
201 	answer = (value * ((1ull<<32) - rho) + state->last * rho) >> 32;
202 	state->last = answer;
203 	return answer;
204 }
205 
206 /* loss_4state - 4-state model loss generator
207  * Generates losses according to the 4-state Markov chain adopted in
208  * the GI (General and Intuitive) loss model.
209  */
loss_4state(struct netem_sched_data * q)210 static bool loss_4state(struct netem_sched_data *q)
211 {
212 	struct clgstate *clg = &q->clg;
213 	u32 rnd = prandom_u32_state(&q->prng.prng_state);
214 
215 	/*
216 	 * Makes a comparison between rnd and the transition
217 	 * probabilities outgoing from the current state, then decides the
218 	 * next state and if the next packet has to be transmitted or lost.
219 	 * The four states correspond to:
220 	 *   TX_IN_GAP_PERIOD => successfully transmitted packets within a gap period
221 	 *   LOST_IN_GAP_PERIOD => isolated losses within a gap period
222 	 *   LOST_IN_BURST_PERIOD => lost packets within a burst period
223 	 *   TX_IN_BURST_PERIOD => successfully transmitted packets within a burst period
224 	 */
225 	switch (clg->state) {
226 	case TX_IN_GAP_PERIOD:
227 		if (rnd < clg->a4) {
228 			clg->state = LOST_IN_GAP_PERIOD;
229 			return true;
230 		} else if (clg->a4 < rnd && rnd < clg->a1 + clg->a4) {
231 			clg->state = LOST_IN_BURST_PERIOD;
232 			return true;
233 		} else if (clg->a1 + clg->a4 < rnd) {
234 			clg->state = TX_IN_GAP_PERIOD;
235 		}
236 
237 		break;
238 	case TX_IN_BURST_PERIOD:
239 		if (rnd < clg->a5) {
240 			clg->state = LOST_IN_BURST_PERIOD;
241 			return true;
242 		} else {
243 			clg->state = TX_IN_BURST_PERIOD;
244 		}
245 
246 		break;
247 	case LOST_IN_BURST_PERIOD:
248 		if (rnd < clg->a3)
249 			clg->state = TX_IN_BURST_PERIOD;
250 		else if (clg->a3 < rnd && rnd < clg->a2 + clg->a3) {
251 			clg->state = TX_IN_GAP_PERIOD;
252 		} else if (clg->a2 + clg->a3 < rnd) {
253 			clg->state = LOST_IN_BURST_PERIOD;
254 			return true;
255 		}
256 		break;
257 	case LOST_IN_GAP_PERIOD:
258 		clg->state = TX_IN_GAP_PERIOD;
259 		break;
260 	}
261 
262 	return false;
263 }
264 
265 /* loss_gilb_ell - Gilbert-Elliot model loss generator
266  * Generates losses according to the Gilbert-Elliot loss model or
267  * its special cases  (Gilbert or Simple Gilbert)
268  *
269  * Makes a comparison between random number and the transition
270  * probabilities outgoing from the current state, then decides the
271  * next state. A second random number is extracted and the comparison
272  * with the loss probability of the current state decides if the next
273  * packet will be transmitted or lost.
274  */
loss_gilb_ell(struct netem_sched_data * q)275 static bool loss_gilb_ell(struct netem_sched_data *q)
276 {
277 	struct clgstate *clg = &q->clg;
278 	struct rnd_state *s = &q->prng.prng_state;
279 
280 	switch (clg->state) {
281 	case GOOD_STATE:
282 		if (prandom_u32_state(s) < clg->a1)
283 			clg->state = BAD_STATE;
284 		if (prandom_u32_state(s) < clg->a4)
285 			return true;
286 		break;
287 	case BAD_STATE:
288 		if (prandom_u32_state(s) < clg->a2)
289 			clg->state = GOOD_STATE;
290 		if (prandom_u32_state(s) > clg->a3)
291 			return true;
292 	}
293 
294 	return false;
295 }
296 
loss_event(struct netem_sched_data * q)297 static bool loss_event(struct netem_sched_data *q)
298 {
299 	switch (q->loss_model) {
300 	case CLG_RANDOM:
301 		/* Random packet drop 0 => none, ~0 => all */
302 		return q->loss && q->loss >= get_crandom(&q->loss_cor, &q->prng);
303 
304 	case CLG_4_STATES:
305 		/* 4state loss model algorithm (used also for GI model)
306 		* Extracts a value from the markov 4 state loss generator,
307 		* if it is 1 drops a packet and if needed writes the event in
308 		* the kernel logs
309 		*/
310 		return loss_4state(q);
311 
312 	case CLG_GILB_ELL:
313 		/* Gilbert-Elliot loss model algorithm
314 		* Extracts a value from the Gilbert-Elliot loss generator,
315 		* if it is 1 drops a packet and if needed writes the event in
316 		* the kernel logs
317 		*/
318 		return loss_gilb_ell(q);
319 	}
320 
321 	return false;	/* not reached */
322 }
323 
324 
325 /* tabledist - return a pseudo-randomly distributed value with mean mu and
326  * std deviation sigma.  Uses table lookup to approximate the desired
327  * distribution, and a uniformly-distributed pseudo-random source.
328  */
tabledist(s64 mu,s32 sigma,struct crndstate * state,struct prng * prng,const struct disttable * dist)329 static s64 tabledist(s64 mu, s32 sigma,
330 		     struct crndstate *state,
331 		     struct prng *prng,
332 		     const struct disttable *dist)
333 {
334 	s64 x;
335 	long t;
336 	u32 rnd;
337 
338 	if (sigma == 0)
339 		return mu;
340 
341 	rnd = get_crandom(state, prng);
342 
343 	/* default uniform distribution */
344 	if (dist == NULL)
345 		return ((rnd % (2 * (u32)sigma)) + mu) - sigma;
346 
347 	t = dist->table[rnd % dist->size];
348 	x = (sigma % NETEM_DIST_SCALE) * t;
349 	if (x >= 0)
350 		x += NETEM_DIST_SCALE/2;
351 	else
352 		x -= NETEM_DIST_SCALE/2;
353 
354 	return  x / NETEM_DIST_SCALE + (sigma / NETEM_DIST_SCALE) * t + mu;
355 }
356 
packet_time_ns(u64 len,const struct netem_sched_data * q)357 static u64 packet_time_ns(u64 len, const struct netem_sched_data *q)
358 {
359 	len += q->packet_overhead;
360 
361 	if (q->cell_size) {
362 		u32 cells = reciprocal_divide(len, q->cell_size_reciprocal);
363 
364 		if (len > cells * q->cell_size)	/* extra cell needed for remainder */
365 			cells++;
366 		len = cells * (q->cell_size + q->cell_overhead);
367 	}
368 
369 	return div64_u64(len * NSEC_PER_SEC, q->rate);
370 }
371 
tfifo_reset(struct Qdisc * sch)372 static void tfifo_reset(struct Qdisc *sch)
373 {
374 	struct netem_sched_data *q = qdisc_priv(sch);
375 	struct rb_node *p = rb_first(&q->t_root);
376 
377 	while (p) {
378 		struct sk_buff *skb = rb_to_skb(p);
379 
380 		p = rb_next(p);
381 		rb_erase(&skb->rbnode, &q->t_root);
382 		rtnl_kfree_skbs(skb, skb);
383 	}
384 
385 	rtnl_kfree_skbs(q->t_head, q->t_tail);
386 	q->t_head = NULL;
387 	q->t_tail = NULL;
388 	q->t_len = 0;
389 }
390 
tfifo_enqueue(struct sk_buff * nskb,struct Qdisc * sch)391 static void tfifo_enqueue(struct sk_buff *nskb, struct Qdisc *sch)
392 {
393 	struct netem_sched_data *q = qdisc_priv(sch);
394 	u64 tnext = netem_skb_cb(nskb)->time_to_send;
395 
396 	if (!q->t_tail || tnext >= netem_skb_cb(q->t_tail)->time_to_send) {
397 		if (q->t_tail)
398 			q->t_tail->next = nskb;
399 		else
400 			q->t_head = nskb;
401 		q->t_tail = nskb;
402 	} else {
403 		struct rb_node **p = &q->t_root.rb_node, *parent = NULL;
404 
405 		while (*p) {
406 			struct sk_buff *skb;
407 
408 			parent = *p;
409 			skb = rb_to_skb(parent);
410 			if (tnext >= netem_skb_cb(skb)->time_to_send)
411 				p = &parent->rb_right;
412 			else
413 				p = &parent->rb_left;
414 		}
415 		rb_link_node(&nskb->rbnode, parent, p);
416 		rb_insert_color(&nskb->rbnode, &q->t_root);
417 	}
418 	q->t_len++;
419 	sch->q.qlen++;
420 }
421 
422 /* netem can't properly corrupt a megapacket (like we get from GSO), so instead
423  * when we statistically choose to corrupt one, we instead segment it, returning
424  * the first packet to be corrupted, and re-enqueue the remaining frames
425  */
netem_segment(struct sk_buff * skb,struct Qdisc * sch,struct sk_buff ** to_free)426 static struct sk_buff *netem_segment(struct sk_buff *skb, struct Qdisc *sch,
427 				     struct sk_buff **to_free)
428 {
429 	struct sk_buff *segs;
430 	netdev_features_t features = netif_skb_features(skb);
431 
432 	segs = skb_gso_segment(skb, features & ~NETIF_F_GSO_MASK);
433 
434 	if (IS_ERR_OR_NULL(segs)) {
435 		qdisc_drop(skb, sch, to_free);
436 		return NULL;
437 	}
438 	consume_skb(skb);
439 	return segs;
440 }
441 
442 /*
443  * Insert one skb into qdisc.
444  * Note: parent depends on return value to account for queue length.
445  * 	NET_XMIT_DROP: queue length didn't change.
446  *      NET_XMIT_SUCCESS: one skb was queued.
447  */
netem_enqueue(struct sk_buff * skb,struct Qdisc * sch,struct sk_buff ** to_free)448 static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch,
449 			 struct sk_buff **to_free)
450 {
451 	struct netem_sched_data *q = qdisc_priv(sch);
452 	/* We don't fill cb now as skb_unshare() may invalidate it */
453 	struct netem_skb_cb *cb;
454 	struct sk_buff *skb2 = NULL;
455 	struct sk_buff *segs = NULL;
456 	unsigned int prev_len = qdisc_pkt_len(skb);
457 	int count = 1;
458 
459 	/* Do not fool qdisc_drop_all() */
460 	skb->prev = NULL;
461 
462 	/* Random duplication */
463 	if (q->duplicate && q->duplicate >= get_crandom(&q->dup_cor, &q->prng))
464 		++count;
465 
466 	/* Drop packet? */
467 	if (loss_event(q)) {
468 		if (q->ecn && INET_ECN_set_ce(skb))
469 			qdisc_qstats_drop(sch); /* mark packet */
470 		else
471 			--count;
472 	}
473 	if (count == 0) {
474 		qdisc_qstats_drop(sch);
475 		__qdisc_drop(skb, to_free);
476 		return NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
477 	}
478 
479 	/* If a delay is expected, orphan the skb. (orphaning usually takes
480 	 * place at TX completion time, so _before_ the link transit delay)
481 	 */
482 	if (q->latency || q->jitter || q->rate)
483 		skb_orphan_partial(skb);
484 
485 	/*
486 	 * If we need to duplicate packet, then clone it before
487 	 * original is modified.
488 	 */
489 	if (count > 1)
490 		skb2 = skb_clone(skb, GFP_ATOMIC);
491 
492 	/*
493 	 * Randomized packet corruption.
494 	 * Make copy if needed since we are modifying
495 	 * If packet is going to be hardware checksummed, then
496 	 * do it now in software before we mangle it.
497 	 */
498 	if (q->corrupt && q->corrupt >= get_crandom(&q->corrupt_cor, &q->prng)) {
499 		if (skb_is_gso(skb)) {
500 			skb = netem_segment(skb, sch, to_free);
501 			if (!skb)
502 				goto finish_segs;
503 
504 			segs = skb->next;
505 			skb_mark_not_on_list(skb);
506 			qdisc_skb_cb(skb)->pkt_len = skb->len;
507 		}
508 
509 		skb = skb_unshare(skb, GFP_ATOMIC);
510 		if (unlikely(!skb)) {
511 			qdisc_qstats_drop(sch);
512 			goto finish_segs;
513 		}
514 		if (skb->ip_summed == CHECKSUM_PARTIAL &&
515 		    skb_checksum_help(skb)) {
516 			qdisc_drop(skb, sch, to_free);
517 			skb = NULL;
518 			goto finish_segs;
519 		}
520 
521 		skb->data[get_random_u32_below(skb_headlen(skb))] ^=
522 			1<<get_random_u32_below(8);
523 	}
524 
525 	if (unlikely(q->t_len >= sch->limit)) {
526 		/* re-link segs, so that qdisc_drop_all() frees them all */
527 		skb->next = segs;
528 		qdisc_drop_all(skb, sch, to_free);
529 		if (skb2)
530 			__qdisc_drop(skb2, to_free);
531 		return NET_XMIT_DROP;
532 	}
533 
534 	/*
535 	 * If doing duplication then re-insert at top of the
536 	 * qdisc tree, since parent queuer expects that only one
537 	 * skb will be queued.
538 	 */
539 	if (skb2) {
540 		struct Qdisc *rootq = qdisc_root_bh(sch);
541 		u32 dupsave = q->duplicate; /* prevent duplicating a dup... */
542 
543 		q->duplicate = 0;
544 		rootq->enqueue(skb2, rootq, to_free);
545 		q->duplicate = dupsave;
546 		skb2 = NULL;
547 	}
548 
549 	qdisc_qstats_backlog_inc(sch, skb);
550 
551 	cb = netem_skb_cb(skb);
552 	if (q->gap == 0 ||		/* not doing reordering */
553 	    q->counter < q->gap - 1 ||	/* inside last reordering gap */
554 	    q->reorder < get_crandom(&q->reorder_cor, &q->prng)) {
555 		u64 now;
556 		s64 delay;
557 
558 		delay = tabledist(q->latency, q->jitter,
559 				  &q->delay_cor, &q->prng, q->delay_dist);
560 
561 		now = ktime_get_ns();
562 
563 		if (q->rate) {
564 			struct netem_skb_cb *last = NULL;
565 
566 			if (sch->q.tail)
567 				last = netem_skb_cb(sch->q.tail);
568 			if (q->t_root.rb_node) {
569 				struct sk_buff *t_skb;
570 				struct netem_skb_cb *t_last;
571 
572 				t_skb = skb_rb_last(&q->t_root);
573 				t_last = netem_skb_cb(t_skb);
574 				if (!last ||
575 				    t_last->time_to_send > last->time_to_send)
576 					last = t_last;
577 			}
578 			if (q->t_tail) {
579 				struct netem_skb_cb *t_last =
580 					netem_skb_cb(q->t_tail);
581 
582 				if (!last ||
583 				    t_last->time_to_send > last->time_to_send)
584 					last = t_last;
585 			}
586 
587 			if (last) {
588 				/*
589 				 * Last packet in queue is reference point (now),
590 				 * calculate this time bonus and subtract
591 				 * from delay.
592 				 */
593 				delay -= last->time_to_send - now;
594 				delay = max_t(s64, 0, delay);
595 				now = last->time_to_send;
596 			}
597 
598 			delay += packet_time_ns(qdisc_pkt_len(skb), q);
599 		}
600 
601 		cb->time_to_send = now + delay;
602 		++q->counter;
603 		tfifo_enqueue(skb, sch);
604 	} else {
605 		/*
606 		 * Do re-ordering by putting one out of N packets at the front
607 		 * of the queue.
608 		 */
609 		cb->time_to_send = ktime_get_ns();
610 		q->counter = 0;
611 
612 		__qdisc_enqueue_head(skb, &sch->q);
613 		sch->qstats.requeues++;
614 	}
615 
616 finish_segs:
617 	if (skb2)
618 		__qdisc_drop(skb2, to_free);
619 
620 	if (segs) {
621 		unsigned int len, last_len;
622 		int rc, nb;
623 
624 		len = skb ? skb->len : 0;
625 		nb = skb ? 1 : 0;
626 
627 		while (segs) {
628 			skb2 = segs->next;
629 			skb_mark_not_on_list(segs);
630 			qdisc_skb_cb(segs)->pkt_len = segs->len;
631 			last_len = segs->len;
632 			rc = qdisc_enqueue(segs, sch, to_free);
633 			if (rc != NET_XMIT_SUCCESS) {
634 				if (net_xmit_drop_count(rc))
635 					qdisc_qstats_drop(sch);
636 			} else {
637 				nb++;
638 				len += last_len;
639 			}
640 			segs = skb2;
641 		}
642 		/* Parent qdiscs accounted for 1 skb of size @prev_len */
643 		qdisc_tree_reduce_backlog(sch, -(nb - 1), -(len - prev_len));
644 	} else if (!skb) {
645 		return NET_XMIT_DROP;
646 	}
647 	return NET_XMIT_SUCCESS;
648 }
649 
650 /* Delay the next round with a new future slot with a
651  * correct number of bytes and packets.
652  */
653 
get_slot_next(struct netem_sched_data * q,u64 now)654 static void get_slot_next(struct netem_sched_data *q, u64 now)
655 {
656 	s64 next_delay;
657 
658 	if (!q->slot_dist)
659 		next_delay = q->slot_config.min_delay +
660 				(get_random_u32() *
661 				 (q->slot_config.max_delay -
662 				  q->slot_config.min_delay) >> 32);
663 	else
664 		next_delay = tabledist(q->slot_config.dist_delay,
665 				       (s32)(q->slot_config.dist_jitter),
666 				       NULL, &q->prng, q->slot_dist);
667 
668 	q->slot.slot_next = now + next_delay;
669 	q->slot.packets_left = q->slot_config.max_packets;
670 	q->slot.bytes_left = q->slot_config.max_bytes;
671 }
672 
netem_peek(struct netem_sched_data * q)673 static struct sk_buff *netem_peek(struct netem_sched_data *q)
674 {
675 	struct sk_buff *skb = skb_rb_first(&q->t_root);
676 	u64 t1, t2;
677 
678 	if (!skb)
679 		return q->t_head;
680 	if (!q->t_head)
681 		return skb;
682 
683 	t1 = netem_skb_cb(skb)->time_to_send;
684 	t2 = netem_skb_cb(q->t_head)->time_to_send;
685 	if (t1 < t2)
686 		return skb;
687 	return q->t_head;
688 }
689 
netem_erase_head(struct netem_sched_data * q,struct sk_buff * skb)690 static void netem_erase_head(struct netem_sched_data *q, struct sk_buff *skb)
691 {
692 	if (skb == q->t_head) {
693 		q->t_head = skb->next;
694 		if (!q->t_head)
695 			q->t_tail = NULL;
696 	} else {
697 		rb_erase(&skb->rbnode, &q->t_root);
698 	}
699 }
700 
netem_dequeue(struct Qdisc * sch)701 static struct sk_buff *netem_dequeue(struct Qdisc *sch)
702 {
703 	struct netem_sched_data *q = qdisc_priv(sch);
704 	struct sk_buff *skb;
705 
706 tfifo_dequeue:
707 	skb = __qdisc_dequeue_head(&sch->q);
708 	if (skb) {
709 deliver:
710 		qdisc_qstats_backlog_dec(sch, skb);
711 		qdisc_bstats_update(sch, skb);
712 		return skb;
713 	}
714 	skb = netem_peek(q);
715 	if (skb) {
716 		u64 time_to_send;
717 		u64 now = ktime_get_ns();
718 
719 		/* if more time remaining? */
720 		time_to_send = netem_skb_cb(skb)->time_to_send;
721 		if (q->slot.slot_next && q->slot.slot_next < time_to_send)
722 			get_slot_next(q, now);
723 
724 		if (time_to_send <= now && q->slot.slot_next <= now) {
725 			netem_erase_head(q, skb);
726 			q->t_len--;
727 			skb->next = NULL;
728 			skb->prev = NULL;
729 			/* skb->dev shares skb->rbnode area,
730 			 * we need to restore its value.
731 			 */
732 			skb->dev = qdisc_dev(sch);
733 
734 			if (q->slot.slot_next) {
735 				q->slot.packets_left--;
736 				q->slot.bytes_left -= qdisc_pkt_len(skb);
737 				if (q->slot.packets_left <= 0 ||
738 				    q->slot.bytes_left <= 0)
739 					get_slot_next(q, now);
740 			}
741 
742 			if (q->qdisc) {
743 				unsigned int pkt_len = qdisc_pkt_len(skb);
744 				struct sk_buff *to_free = NULL;
745 				int err;
746 
747 				err = qdisc_enqueue(skb, q->qdisc, &to_free);
748 				kfree_skb_list(to_free);
749 				if (err != NET_XMIT_SUCCESS) {
750 					if (net_xmit_drop_count(err))
751 						qdisc_qstats_drop(sch);
752 					sch->qstats.backlog -= pkt_len;
753 					sch->q.qlen--;
754 					qdisc_tree_reduce_backlog(sch, 1, pkt_len);
755 				}
756 				goto tfifo_dequeue;
757 			}
758 			sch->q.qlen--;
759 			goto deliver;
760 		}
761 
762 		if (q->qdisc) {
763 			skb = q->qdisc->ops->dequeue(q->qdisc);
764 			if (skb) {
765 				sch->q.qlen--;
766 				goto deliver;
767 			}
768 		}
769 
770 		qdisc_watchdog_schedule_ns(&q->watchdog,
771 					   max(time_to_send,
772 					       q->slot.slot_next));
773 	}
774 
775 	if (q->qdisc) {
776 		skb = q->qdisc->ops->dequeue(q->qdisc);
777 		if (skb) {
778 			sch->q.qlen--;
779 			goto deliver;
780 		}
781 	}
782 	return NULL;
783 }
784 
netem_reset(struct Qdisc * sch)785 static void netem_reset(struct Qdisc *sch)
786 {
787 	struct netem_sched_data *q = qdisc_priv(sch);
788 
789 	qdisc_reset_queue(sch);
790 	tfifo_reset(sch);
791 	if (q->qdisc)
792 		qdisc_reset(q->qdisc);
793 	qdisc_watchdog_cancel(&q->watchdog);
794 }
795 
dist_free(struct disttable * d)796 static void dist_free(struct disttable *d)
797 {
798 	kvfree(d);
799 }
800 
801 /*
802  * Distribution data is a variable size payload containing
803  * signed 16 bit values.
804  */
805 
get_dist_table(struct disttable ** tbl,const struct nlattr * attr)806 static int get_dist_table(struct disttable **tbl, const struct nlattr *attr)
807 {
808 	size_t n = nla_len(attr)/sizeof(__s16);
809 	const __s16 *data = nla_data(attr);
810 	struct disttable *d;
811 	int i;
812 
813 	if (!n || n > NETEM_DIST_MAX)
814 		return -EINVAL;
815 
816 	d = kvmalloc(struct_size(d, table, n), GFP_KERNEL);
817 	if (!d)
818 		return -ENOMEM;
819 
820 	d->size = n;
821 	for (i = 0; i < n; i++)
822 		d->table[i] = data[i];
823 
824 	*tbl = d;
825 	return 0;
826 }
827 
get_slot(struct netem_sched_data * q,const struct nlattr * attr)828 static void get_slot(struct netem_sched_data *q, const struct nlattr *attr)
829 {
830 	const struct tc_netem_slot *c = nla_data(attr);
831 
832 	q->slot_config = *c;
833 	if (q->slot_config.max_packets == 0)
834 		q->slot_config.max_packets = INT_MAX;
835 	if (q->slot_config.max_bytes == 0)
836 		q->slot_config.max_bytes = INT_MAX;
837 
838 	/* capping dist_jitter to the range acceptable by tabledist() */
839 	q->slot_config.dist_jitter = min_t(__s64, INT_MAX, abs(q->slot_config.dist_jitter));
840 
841 	q->slot.packets_left = q->slot_config.max_packets;
842 	q->slot.bytes_left = q->slot_config.max_bytes;
843 	if (q->slot_config.min_delay | q->slot_config.max_delay |
844 	    q->slot_config.dist_jitter)
845 		q->slot.slot_next = ktime_get_ns();
846 	else
847 		q->slot.slot_next = 0;
848 }
849 
get_correlation(struct netem_sched_data * q,const struct nlattr * attr)850 static void get_correlation(struct netem_sched_data *q, const struct nlattr *attr)
851 {
852 	const struct tc_netem_corr *c = nla_data(attr);
853 
854 	init_crandom(&q->delay_cor, c->delay_corr);
855 	init_crandom(&q->loss_cor, c->loss_corr);
856 	init_crandom(&q->dup_cor, c->dup_corr);
857 }
858 
get_reorder(struct netem_sched_data * q,const struct nlattr * attr)859 static void get_reorder(struct netem_sched_data *q, const struct nlattr *attr)
860 {
861 	const struct tc_netem_reorder *r = nla_data(attr);
862 
863 	q->reorder = r->probability;
864 	init_crandom(&q->reorder_cor, r->correlation);
865 }
866 
get_corrupt(struct netem_sched_data * q,const struct nlattr * attr)867 static void get_corrupt(struct netem_sched_data *q, const struct nlattr *attr)
868 {
869 	const struct tc_netem_corrupt *r = nla_data(attr);
870 
871 	q->corrupt = r->probability;
872 	init_crandom(&q->corrupt_cor, r->correlation);
873 }
874 
get_rate(struct netem_sched_data * q,const struct nlattr * attr)875 static void get_rate(struct netem_sched_data *q, const struct nlattr *attr)
876 {
877 	const struct tc_netem_rate *r = nla_data(attr);
878 
879 	q->rate = r->rate;
880 	q->packet_overhead = r->packet_overhead;
881 	q->cell_size = r->cell_size;
882 	q->cell_overhead = r->cell_overhead;
883 	if (q->cell_size)
884 		q->cell_size_reciprocal = reciprocal_value(q->cell_size);
885 	else
886 		q->cell_size_reciprocal = (struct reciprocal_value) { 0 };
887 }
888 
get_loss_clg(struct netem_sched_data * q,const struct nlattr * attr)889 static int get_loss_clg(struct netem_sched_data *q, const struct nlattr *attr)
890 {
891 	const struct nlattr *la;
892 	int rem;
893 
894 	nla_for_each_nested(la, attr, rem) {
895 		u16 type = nla_type(la);
896 
897 		switch (type) {
898 		case NETEM_LOSS_GI: {
899 			const struct tc_netem_gimodel *gi = nla_data(la);
900 
901 			if (nla_len(la) < sizeof(struct tc_netem_gimodel)) {
902 				pr_info("netem: incorrect gi model size\n");
903 				return -EINVAL;
904 			}
905 
906 			q->loss_model = CLG_4_STATES;
907 
908 			q->clg.state = TX_IN_GAP_PERIOD;
909 			q->clg.a1 = gi->p13;
910 			q->clg.a2 = gi->p31;
911 			q->clg.a3 = gi->p32;
912 			q->clg.a4 = gi->p14;
913 			q->clg.a5 = gi->p23;
914 			break;
915 		}
916 
917 		case NETEM_LOSS_GE: {
918 			const struct tc_netem_gemodel *ge = nla_data(la);
919 
920 			if (nla_len(la) < sizeof(struct tc_netem_gemodel)) {
921 				pr_info("netem: incorrect ge model size\n");
922 				return -EINVAL;
923 			}
924 
925 			q->loss_model = CLG_GILB_ELL;
926 			q->clg.state = GOOD_STATE;
927 			q->clg.a1 = ge->p;
928 			q->clg.a2 = ge->r;
929 			q->clg.a3 = ge->h;
930 			q->clg.a4 = ge->k1;
931 			break;
932 		}
933 
934 		default:
935 			pr_info("netem: unknown loss type %u\n", type);
936 			return -EINVAL;
937 		}
938 	}
939 
940 	return 0;
941 }
942 
943 static const struct nla_policy netem_policy[TCA_NETEM_MAX + 1] = {
944 	[TCA_NETEM_CORR]	= { .len = sizeof(struct tc_netem_corr) },
945 	[TCA_NETEM_REORDER]	= { .len = sizeof(struct tc_netem_reorder) },
946 	[TCA_NETEM_CORRUPT]	= { .len = sizeof(struct tc_netem_corrupt) },
947 	[TCA_NETEM_RATE]	= { .len = sizeof(struct tc_netem_rate) },
948 	[TCA_NETEM_LOSS]	= { .type = NLA_NESTED },
949 	[TCA_NETEM_ECN]		= { .type = NLA_U32 },
950 	[TCA_NETEM_RATE64]	= { .type = NLA_U64 },
951 	[TCA_NETEM_LATENCY64]	= { .type = NLA_S64 },
952 	[TCA_NETEM_JITTER64]	= { .type = NLA_S64 },
953 	[TCA_NETEM_SLOT]	= { .len = sizeof(struct tc_netem_slot) },
954 	[TCA_NETEM_PRNG_SEED]	= { .type = NLA_U64 },
955 };
956 
parse_attr(struct nlattr * tb[],int maxtype,struct nlattr * nla,const struct nla_policy * policy,int len)957 static int parse_attr(struct nlattr *tb[], int maxtype, struct nlattr *nla,
958 		      const struct nla_policy *policy, int len)
959 {
960 	int nested_len = nla_len(nla) - NLA_ALIGN(len);
961 
962 	if (nested_len < 0) {
963 		pr_info("netem: invalid attributes len %d\n", nested_len);
964 		return -EINVAL;
965 	}
966 
967 	if (nested_len >= nla_attr_size(0))
968 		return nla_parse_deprecated(tb, maxtype,
969 					    nla_data(nla) + NLA_ALIGN(len),
970 					    nested_len, policy, NULL);
971 
972 	memset(tb, 0, sizeof(struct nlattr *) * (maxtype + 1));
973 	return 0;
974 }
975 
976 static const struct Qdisc_class_ops netem_class_ops;
977 
check_netem_in_tree(struct Qdisc * sch,bool duplicates,struct netlink_ext_ack * extack)978 static int check_netem_in_tree(struct Qdisc *sch, bool duplicates,
979 			       struct netlink_ext_ack *extack)
980 {
981 	struct Qdisc *root, *q;
982 	unsigned int i;
983 
984 	root = qdisc_root_sleeping(sch);
985 
986 	if (sch != root && root->ops->cl_ops == &netem_class_ops) {
987 		if (duplicates ||
988 		    ((struct netem_sched_data *)qdisc_priv(root))->duplicate)
989 			goto err;
990 	}
991 
992 	if (!qdisc_dev(root))
993 		return 0;
994 
995 	hash_for_each(qdisc_dev(root)->qdisc_hash, i, q, hash) {
996 		if (sch != q && q->ops->cl_ops == &netem_class_ops) {
997 			if (duplicates ||
998 			    ((struct netem_sched_data *)qdisc_priv(q))->duplicate)
999 				goto err;
1000 		}
1001 	}
1002 
1003 	return 0;
1004 
1005 err:
1006 	NL_SET_ERR_MSG(extack,
1007 		       "netem: cannot mix duplicating netems with other netems in tree");
1008 	return -EINVAL;
1009 }
1010 
1011 /* Parse netlink message to set options */
netem_change(struct Qdisc * sch,struct nlattr * opt,struct netlink_ext_ack * extack)1012 static int netem_change(struct Qdisc *sch, struct nlattr *opt,
1013 			struct netlink_ext_ack *extack)
1014 {
1015 	struct netem_sched_data *q = qdisc_priv(sch);
1016 	struct nlattr *tb[TCA_NETEM_MAX + 1];
1017 	struct disttable *delay_dist = NULL;
1018 	struct disttable *slot_dist = NULL;
1019 	struct tc_netem_qopt *qopt;
1020 	struct clgstate old_clg;
1021 	int old_loss_model = CLG_RANDOM;
1022 	int ret;
1023 
1024 	qopt = nla_data(opt);
1025 	ret = parse_attr(tb, TCA_NETEM_MAX, opt, netem_policy, sizeof(*qopt));
1026 	if (ret < 0)
1027 		return ret;
1028 
1029 	if (tb[TCA_NETEM_DELAY_DIST]) {
1030 		ret = get_dist_table(&delay_dist, tb[TCA_NETEM_DELAY_DIST]);
1031 		if (ret)
1032 			goto table_free;
1033 	}
1034 
1035 	if (tb[TCA_NETEM_SLOT_DIST]) {
1036 		ret = get_dist_table(&slot_dist, tb[TCA_NETEM_SLOT_DIST]);
1037 		if (ret)
1038 			goto table_free;
1039 	}
1040 
1041 	sch_tree_lock(sch);
1042 	/* backup q->clg and q->loss_model */
1043 	old_clg = q->clg;
1044 	old_loss_model = q->loss_model;
1045 
1046 	if (tb[TCA_NETEM_LOSS]) {
1047 		ret = get_loss_clg(q, tb[TCA_NETEM_LOSS]);
1048 		if (ret) {
1049 			q->loss_model = old_loss_model;
1050 			q->clg = old_clg;
1051 			goto unlock;
1052 		}
1053 	} else {
1054 		q->loss_model = CLG_RANDOM;
1055 	}
1056 
1057 	if (delay_dist)
1058 		swap(q->delay_dist, delay_dist);
1059 	if (slot_dist)
1060 		swap(q->slot_dist, slot_dist);
1061 	sch->limit = qopt->limit;
1062 
1063 	q->latency = PSCHED_TICKS2NS(qopt->latency);
1064 	q->jitter = PSCHED_TICKS2NS(qopt->jitter);
1065 	q->limit = qopt->limit;
1066 	q->gap = qopt->gap;
1067 	q->counter = 0;
1068 	q->loss = qopt->loss;
1069 
1070 	ret = check_netem_in_tree(sch, qopt->duplicate, extack);
1071 	if (ret)
1072 		goto unlock;
1073 
1074 	q->duplicate = qopt->duplicate;
1075 
1076 	/* for compatibility with earlier versions.
1077 	 * if gap is set, need to assume 100% probability
1078 	 */
1079 	if (q->gap)
1080 		q->reorder = ~0;
1081 
1082 	if (tb[TCA_NETEM_CORR])
1083 		get_correlation(q, tb[TCA_NETEM_CORR]);
1084 
1085 	if (tb[TCA_NETEM_REORDER])
1086 		get_reorder(q, tb[TCA_NETEM_REORDER]);
1087 
1088 	if (tb[TCA_NETEM_CORRUPT])
1089 		get_corrupt(q, tb[TCA_NETEM_CORRUPT]);
1090 
1091 	if (tb[TCA_NETEM_RATE])
1092 		get_rate(q, tb[TCA_NETEM_RATE]);
1093 
1094 	if (tb[TCA_NETEM_RATE64])
1095 		q->rate = max_t(u64, q->rate,
1096 				nla_get_u64(tb[TCA_NETEM_RATE64]));
1097 
1098 	if (tb[TCA_NETEM_LATENCY64])
1099 		q->latency = nla_get_s64(tb[TCA_NETEM_LATENCY64]);
1100 
1101 	if (tb[TCA_NETEM_JITTER64])
1102 		q->jitter = nla_get_s64(tb[TCA_NETEM_JITTER64]);
1103 
1104 	if (tb[TCA_NETEM_ECN])
1105 		q->ecn = nla_get_u32(tb[TCA_NETEM_ECN]);
1106 
1107 	if (tb[TCA_NETEM_SLOT])
1108 		get_slot(q, tb[TCA_NETEM_SLOT]);
1109 
1110 	/* capping jitter to the range acceptable by tabledist() */
1111 	q->jitter = min_t(s64, abs(q->jitter), INT_MAX);
1112 
1113 	if (tb[TCA_NETEM_PRNG_SEED])
1114 		q->prng.seed = nla_get_u64(tb[TCA_NETEM_PRNG_SEED]);
1115 	else
1116 		q->prng.seed = get_random_u64();
1117 	prandom_seed_state(&q->prng.prng_state, q->prng.seed);
1118 
1119 unlock:
1120 	sch_tree_unlock(sch);
1121 
1122 table_free:
1123 	dist_free(delay_dist);
1124 	dist_free(slot_dist);
1125 	return ret;
1126 }
1127 
netem_init(struct Qdisc * sch,struct nlattr * opt,struct netlink_ext_ack * extack)1128 static int netem_init(struct Qdisc *sch, struct nlattr *opt,
1129 		      struct netlink_ext_ack *extack)
1130 {
1131 	struct netem_sched_data *q = qdisc_priv(sch);
1132 	int ret;
1133 
1134 	qdisc_watchdog_init(&q->watchdog, sch);
1135 
1136 	if (!opt)
1137 		return -EINVAL;
1138 
1139 	q->loss_model = CLG_RANDOM;
1140 	ret = netem_change(sch, opt, extack);
1141 	if (ret)
1142 		pr_info("netem: change failed\n");
1143 	return ret;
1144 }
1145 
netem_destroy(struct Qdisc * sch)1146 static void netem_destroy(struct Qdisc *sch)
1147 {
1148 	struct netem_sched_data *q = qdisc_priv(sch);
1149 
1150 	qdisc_watchdog_cancel(&q->watchdog);
1151 	if (q->qdisc)
1152 		qdisc_put(q->qdisc);
1153 	dist_free(q->delay_dist);
1154 	dist_free(q->slot_dist);
1155 }
1156 
dump_loss_model(const struct netem_sched_data * q,struct sk_buff * skb)1157 static int dump_loss_model(const struct netem_sched_data *q,
1158 			   struct sk_buff *skb)
1159 {
1160 	struct nlattr *nest;
1161 
1162 	nest = nla_nest_start_noflag(skb, TCA_NETEM_LOSS);
1163 	if (nest == NULL)
1164 		goto nla_put_failure;
1165 
1166 	switch (q->loss_model) {
1167 	case CLG_RANDOM:
1168 		/* legacy loss model */
1169 		nla_nest_cancel(skb, nest);
1170 		return 0;	/* no data */
1171 
1172 	case CLG_4_STATES: {
1173 		struct tc_netem_gimodel gi = {
1174 			.p13 = q->clg.a1,
1175 			.p31 = q->clg.a2,
1176 			.p32 = q->clg.a3,
1177 			.p14 = q->clg.a4,
1178 			.p23 = q->clg.a5,
1179 		};
1180 
1181 		if (nla_put(skb, NETEM_LOSS_GI, sizeof(gi), &gi))
1182 			goto nla_put_failure;
1183 		break;
1184 	}
1185 	case CLG_GILB_ELL: {
1186 		struct tc_netem_gemodel ge = {
1187 			.p = q->clg.a1,
1188 			.r = q->clg.a2,
1189 			.h = q->clg.a3,
1190 			.k1 = q->clg.a4,
1191 		};
1192 
1193 		if (nla_put(skb, NETEM_LOSS_GE, sizeof(ge), &ge))
1194 			goto nla_put_failure;
1195 		break;
1196 	}
1197 	}
1198 
1199 	nla_nest_end(skb, nest);
1200 	return 0;
1201 
1202 nla_put_failure:
1203 	nla_nest_cancel(skb, nest);
1204 	return -1;
1205 }
1206 
netem_dump(struct Qdisc * sch,struct sk_buff * skb)1207 static int netem_dump(struct Qdisc *sch, struct sk_buff *skb)
1208 {
1209 	const struct netem_sched_data *q = qdisc_priv(sch);
1210 	struct nlattr *nla = (struct nlattr *) skb_tail_pointer(skb);
1211 	struct tc_netem_qopt qopt;
1212 	struct tc_netem_corr cor;
1213 	struct tc_netem_reorder reorder;
1214 	struct tc_netem_corrupt corrupt;
1215 	struct tc_netem_rate rate;
1216 	struct tc_netem_slot slot;
1217 
1218 	qopt.latency = min_t(psched_time_t, PSCHED_NS2TICKS(q->latency),
1219 			     UINT_MAX);
1220 	qopt.jitter = min_t(psched_time_t, PSCHED_NS2TICKS(q->jitter),
1221 			    UINT_MAX);
1222 	qopt.limit = q->limit;
1223 	qopt.loss = q->loss;
1224 	qopt.gap = q->gap;
1225 	qopt.duplicate = q->duplicate;
1226 	if (nla_put(skb, TCA_OPTIONS, sizeof(qopt), &qopt))
1227 		goto nla_put_failure;
1228 
1229 	if (nla_put(skb, TCA_NETEM_LATENCY64, sizeof(q->latency), &q->latency))
1230 		goto nla_put_failure;
1231 
1232 	if (nla_put(skb, TCA_NETEM_JITTER64, sizeof(q->jitter), &q->jitter))
1233 		goto nla_put_failure;
1234 
1235 	cor.delay_corr = q->delay_cor.rho;
1236 	cor.loss_corr = q->loss_cor.rho;
1237 	cor.dup_corr = q->dup_cor.rho;
1238 	if (nla_put(skb, TCA_NETEM_CORR, sizeof(cor), &cor))
1239 		goto nla_put_failure;
1240 
1241 	reorder.probability = q->reorder;
1242 	reorder.correlation = q->reorder_cor.rho;
1243 	if (nla_put(skb, TCA_NETEM_REORDER, sizeof(reorder), &reorder))
1244 		goto nla_put_failure;
1245 
1246 	corrupt.probability = q->corrupt;
1247 	corrupt.correlation = q->corrupt_cor.rho;
1248 	if (nla_put(skb, TCA_NETEM_CORRUPT, sizeof(corrupt), &corrupt))
1249 		goto nla_put_failure;
1250 
1251 	if (q->rate >= (1ULL << 32)) {
1252 		if (nla_put_u64_64bit(skb, TCA_NETEM_RATE64, q->rate,
1253 				      TCA_NETEM_PAD))
1254 			goto nla_put_failure;
1255 		rate.rate = ~0U;
1256 	} else {
1257 		rate.rate = q->rate;
1258 	}
1259 	rate.packet_overhead = q->packet_overhead;
1260 	rate.cell_size = q->cell_size;
1261 	rate.cell_overhead = q->cell_overhead;
1262 	if (nla_put(skb, TCA_NETEM_RATE, sizeof(rate), &rate))
1263 		goto nla_put_failure;
1264 
1265 	if (q->ecn && nla_put_u32(skb, TCA_NETEM_ECN, q->ecn))
1266 		goto nla_put_failure;
1267 
1268 	if (dump_loss_model(q, skb) != 0)
1269 		goto nla_put_failure;
1270 
1271 	if (q->slot_config.min_delay | q->slot_config.max_delay |
1272 	    q->slot_config.dist_jitter) {
1273 		slot = q->slot_config;
1274 		if (slot.max_packets == INT_MAX)
1275 			slot.max_packets = 0;
1276 		if (slot.max_bytes == INT_MAX)
1277 			slot.max_bytes = 0;
1278 		if (nla_put(skb, TCA_NETEM_SLOT, sizeof(slot), &slot))
1279 			goto nla_put_failure;
1280 	}
1281 
1282 	if (nla_put_u64_64bit(skb, TCA_NETEM_PRNG_SEED, q->prng.seed,
1283 			      TCA_NETEM_PAD))
1284 		goto nla_put_failure;
1285 
1286 	return nla_nest_end(skb, nla);
1287 
1288 nla_put_failure:
1289 	nlmsg_trim(skb, nla);
1290 	return -1;
1291 }
1292 
netem_dump_class(struct Qdisc * sch,unsigned long cl,struct sk_buff * skb,struct tcmsg * tcm)1293 static int netem_dump_class(struct Qdisc *sch, unsigned long cl,
1294 			  struct sk_buff *skb, struct tcmsg *tcm)
1295 {
1296 	struct netem_sched_data *q = qdisc_priv(sch);
1297 
1298 	if (cl != 1 || !q->qdisc) 	/* only one class */
1299 		return -ENOENT;
1300 
1301 	tcm->tcm_handle |= TC_H_MIN(1);
1302 	tcm->tcm_info = q->qdisc->handle;
1303 
1304 	return 0;
1305 }
1306 
netem_graft(struct Qdisc * sch,unsigned long arg,struct Qdisc * new,struct Qdisc ** old,struct netlink_ext_ack * extack)1307 static int netem_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
1308 		     struct Qdisc **old, struct netlink_ext_ack *extack)
1309 {
1310 	struct netem_sched_data *q = qdisc_priv(sch);
1311 
1312 	*old = qdisc_replace(sch, new, &q->qdisc);
1313 	return 0;
1314 }
1315 
netem_leaf(struct Qdisc * sch,unsigned long arg)1316 static struct Qdisc *netem_leaf(struct Qdisc *sch, unsigned long arg)
1317 {
1318 	struct netem_sched_data *q = qdisc_priv(sch);
1319 	return q->qdisc;
1320 }
1321 
netem_find(struct Qdisc * sch,u32 classid)1322 static unsigned long netem_find(struct Qdisc *sch, u32 classid)
1323 {
1324 	return 1;
1325 }
1326 
netem_walk(struct Qdisc * sch,struct qdisc_walker * walker)1327 static void netem_walk(struct Qdisc *sch, struct qdisc_walker *walker)
1328 {
1329 	if (!walker->stop) {
1330 		if (!tc_qdisc_stats_dump(sch, 1, walker))
1331 			return;
1332 	}
1333 }
1334 
1335 static const struct Qdisc_class_ops netem_class_ops = {
1336 	.graft		=	netem_graft,
1337 	.leaf		=	netem_leaf,
1338 	.find		=	netem_find,
1339 	.walk		=	netem_walk,
1340 	.dump		=	netem_dump_class,
1341 };
1342 
1343 static struct Qdisc_ops netem_qdisc_ops __read_mostly = {
1344 	.id		=	"netem",
1345 	.cl_ops		=	&netem_class_ops,
1346 	.priv_size	=	sizeof(struct netem_sched_data),
1347 	.enqueue	=	netem_enqueue,
1348 	.dequeue	=	netem_dequeue,
1349 	.peek		=	qdisc_peek_dequeued,
1350 	.init		=	netem_init,
1351 	.reset		=	netem_reset,
1352 	.destroy	=	netem_destroy,
1353 	.change		=	netem_change,
1354 	.dump		=	netem_dump,
1355 	.owner		=	THIS_MODULE,
1356 };
1357 MODULE_ALIAS_NET_SCH("netem");
1358 
1359 
netem_module_init(void)1360 static int __init netem_module_init(void)
1361 {
1362 	pr_info("netem: version " VERSION "\n");
1363 	return register_qdisc(&netem_qdisc_ops);
1364 }
netem_module_exit(void)1365 static void __exit netem_module_exit(void)
1366 {
1367 	unregister_qdisc(&netem_qdisc_ops);
1368 }
1369 module_init(netem_module_init)
1370 module_exit(netem_module_exit)
1371 MODULE_LICENSE("GPL");
1372 MODULE_DESCRIPTION("Network characteristics emulator qdisc");
1373