xref: /linux/net/sched/sch_netem.c (revision 36ca1195ad7f760a6af3814cb002bd3a3d4b4db1)
1 /*
2  * net/sched/sch_netem.c	Network emulator
3  *
4  * 		This program is free software; you can redistribute it and/or
5  * 		modify it under the terms of the GNU General Public License
6  * 		as published by the Free Software Foundation; either version
7  * 		2 of the License, or (at your option) any later version.
8  *
9  *  		Many of the algorithms and ideas for this came from
10  *		NIST Net which is not copyrighted.
11  *
12  * Authors:	Stephen Hemminger <shemminger@osdl.org>
13  *		Catalin(ux aka Dino) BOIE <catab at umbrella dot ro>
14  */
15 
16 #include <linux/config.h>
17 #include <linux/module.h>
18 #include <linux/bitops.h>
19 #include <linux/types.h>
20 #include <linux/kernel.h>
21 #include <linux/errno.h>
22 #include <linux/netdevice.h>
23 #include <linux/skbuff.h>
24 #include <linux/rtnetlink.h>
25 
26 #include <net/pkt_sched.h>
27 
28 /*	Network Emulation Queuing algorithm.
29 	====================================
30 
31 	Sources: [1] Mark Carson, Darrin Santay, "NIST Net - A Linux-based
32 		 Network Emulation Tool
33 		 [2] Luigi Rizzo, DummyNet for FreeBSD
34 
35 	 ----------------------------------------------------------------
36 
37 	 This started out as a simple way to delay outgoing packets to
38 	 test TCP but has grown to include most of the functionality
39 	 of a full blown network emulator like NISTnet. It can delay
40 	 packets and add random jitter (and correlation). The random
41 	 distribution can be loaded from a table as well to provide
42 	 normal, Pareto, or experimental curves. Packet loss,
43 	 duplication, and reordering can also be emulated.
44 
45 	 This qdisc does not do classification that can be handled in
46 	 layering other disciplines.  It does not need to do bandwidth
47 	 control either since that can be handled by using token
48 	 bucket or other rate control.
49 
50 	 The simulator is limited by the Linux timer resolution
51 	 and will create packet bursts on the HZ boundary (1ms).
52 */
53 
54 struct netem_sched_data {
55 	struct Qdisc	*qdisc;
56 	struct timer_list timer;
57 
58 	u32 latency;
59 	u32 loss;
60 	u32 limit;
61 	u32 counter;
62 	u32 gap;
63 	u32 jitter;
64 	u32 duplicate;
65 	u32 reorder;
66 
67 	struct crndstate {
68 		unsigned long last;
69 		unsigned long rho;
70 	} delay_cor, loss_cor, dup_cor, reorder_cor;
71 
72 	struct disttable {
73 		u32  size;
74 		s16 table[0];
75 	} *delay_dist;
76 };
77 
78 /* Time stamp put into socket buffer control block */
79 struct netem_skb_cb {
80 	psched_time_t	time_to_send;
81 };
82 
83 /* init_crandom - initialize correlated random number generator
84  * Use entropy source for initial seed.
85  */
86 static void init_crandom(struct crndstate *state, unsigned long rho)
87 {
88 	state->rho = rho;
89 	state->last = net_random();
90 }
91 
92 /* get_crandom - correlated random number generator
93  * Next number depends on last value.
94  * rho is scaled to avoid floating point.
95  */
96 static unsigned long get_crandom(struct crndstate *state)
97 {
98 	u64 value, rho;
99 	unsigned long answer;
100 
101 	if (state->rho == 0)	/* no correllation */
102 		return net_random();
103 
104 	value = net_random();
105 	rho = (u64)state->rho + 1;
106 	answer = (value * ((1ull<<32) - rho) + state->last * rho) >> 32;
107 	state->last = answer;
108 	return answer;
109 }
110 
111 /* tabledist - return a pseudo-randomly distributed value with mean mu and
112  * std deviation sigma.  Uses table lookup to approximate the desired
113  * distribution, and a uniformly-distributed pseudo-random source.
114  */
115 static long tabledist(unsigned long mu, long sigma,
116 		      struct crndstate *state, const struct disttable *dist)
117 {
118 	long t, x;
119 	unsigned long rnd;
120 
121 	if (sigma == 0)
122 		return mu;
123 
124 	rnd = get_crandom(state);
125 
126 	/* default uniform distribution */
127 	if (dist == NULL)
128 		return (rnd % (2*sigma)) - sigma + mu;
129 
130 	t = dist->table[rnd % dist->size];
131 	x = (sigma % NETEM_DIST_SCALE) * t;
132 	if (x >= 0)
133 		x += NETEM_DIST_SCALE/2;
134 	else
135 		x -= NETEM_DIST_SCALE/2;
136 
137 	return  x / NETEM_DIST_SCALE + (sigma / NETEM_DIST_SCALE) * t + mu;
138 }
139 
140 /*
141  * Insert one skb into qdisc.
142  * Note: parent depends on return value to account for queue length.
143  * 	NET_XMIT_DROP: queue length didn't change.
144  *      NET_XMIT_SUCCESS: one skb was queued.
145  */
146 static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch)
147 {
148 	struct netem_sched_data *q = qdisc_priv(sch);
149 	struct netem_skb_cb *cb = (struct netem_skb_cb *)skb->cb;
150 	struct sk_buff *skb2;
151 	int ret;
152 	int count = 1;
153 
154 	pr_debug("netem_enqueue skb=%p\n", skb);
155 
156 	/* Random duplication */
157 	if (q->duplicate && q->duplicate >= get_crandom(&q->dup_cor))
158 		++count;
159 
160 	/* Random packet drop 0 => none, ~0 => all */
161 	if (q->loss && q->loss >= get_crandom(&q->loss_cor))
162 		--count;
163 
164 	if (count == 0) {
165 		sch->qstats.drops++;
166 		kfree_skb(skb);
167 		return NET_XMIT_DROP;
168 	}
169 
170 	/*
171 	 * If we need to duplicate packet, then re-insert at top of the
172 	 * qdisc tree, since parent queuer expects that only one
173 	 * skb will be queued.
174 	 */
175 	if (count > 1 && (skb2 = skb_clone(skb, GFP_ATOMIC)) != NULL) {
176 		struct Qdisc *rootq = sch->dev->qdisc;
177 		u32 dupsave = q->duplicate; /* prevent duplicating a dup... */
178 		q->duplicate = 0;
179 
180 		rootq->enqueue(skb2, rootq);
181 		q->duplicate = dupsave;
182 	}
183 
184 	if (q->gap == 0 		/* not doing reordering */
185 	    || q->counter < q->gap 	/* inside last reordering gap */
186 	    || q->reorder < get_crandom(&q->reorder_cor)) {
187 		psched_time_t now;
188 		PSCHED_GET_TIME(now);
189 		PSCHED_TADD2(now, tabledist(q->latency, q->jitter,
190 					    &q->delay_cor, q->delay_dist),
191 			     cb->time_to_send);
192 		++q->counter;
193 		ret = q->qdisc->enqueue(skb, q->qdisc);
194 	} else {
195 		/*
196 		 * Do re-ordering by putting one out of N packets at the front
197 		 * of the queue.
198 		 */
199 		PSCHED_GET_TIME(cb->time_to_send);
200 		q->counter = 0;
201 		ret = q->qdisc->ops->requeue(skb, q->qdisc);
202 	}
203 
204 	if (likely(ret == NET_XMIT_SUCCESS)) {
205 		sch->q.qlen++;
206 		sch->bstats.bytes += skb->len;
207 		sch->bstats.packets++;
208 	} else
209 		sch->qstats.drops++;
210 
211 	pr_debug("netem: enqueue ret %d\n", ret);
212 	return ret;
213 }
214 
215 /* Requeue packets but don't change time stamp */
216 static int netem_requeue(struct sk_buff *skb, struct Qdisc *sch)
217 {
218 	struct netem_sched_data *q = qdisc_priv(sch);
219 	int ret;
220 
221 	if ((ret = q->qdisc->ops->requeue(skb, q->qdisc)) == 0) {
222 		sch->q.qlen++;
223 		sch->qstats.requeues++;
224 	}
225 
226 	return ret;
227 }
228 
229 static unsigned int netem_drop(struct Qdisc* sch)
230 {
231 	struct netem_sched_data *q = qdisc_priv(sch);
232 	unsigned int len;
233 
234 	if ((len = q->qdisc->ops->drop(q->qdisc)) != 0) {
235 		sch->q.qlen--;
236 		sch->qstats.drops++;
237 	}
238 	return len;
239 }
240 
241 static struct sk_buff *netem_dequeue(struct Qdisc *sch)
242 {
243 	struct netem_sched_data *q = qdisc_priv(sch);
244 	struct sk_buff *skb;
245 
246 	skb = q->qdisc->dequeue(q->qdisc);
247 	if (skb) {
248 		const struct netem_skb_cb *cb
249 			= (const struct netem_skb_cb *)skb->cb;
250 		psched_time_t now;
251 		long delay;
252 
253 		/* if more time remaining? */
254 		PSCHED_GET_TIME(now);
255 		delay = PSCHED_US2JIFFIE(PSCHED_TDIFF(cb->time_to_send, now));
256 		pr_debug("netem_run: skb=%p delay=%ld\n", skb, delay);
257 		if (delay <= 0) {
258 			pr_debug("netem_dequeue: return skb=%p\n", skb);
259 			sch->q.qlen--;
260 			sch->flags &= ~TCQ_F_THROTTLED;
261 			return skb;
262 		}
263 
264 		mod_timer(&q->timer, jiffies + delay);
265 		sch->flags |= TCQ_F_THROTTLED;
266 
267 		if (q->qdisc->ops->requeue(skb, q->qdisc) != 0)
268 			sch->qstats.drops++;
269 	}
270 
271 	return NULL;
272 }
273 
274 static void netem_watchdog(unsigned long arg)
275 {
276 	struct Qdisc *sch = (struct Qdisc *)arg;
277 
278 	pr_debug("netem_watchdog qlen=%d\n", sch->q.qlen);
279 	sch->flags &= ~TCQ_F_THROTTLED;
280 	netif_schedule(sch->dev);
281 }
282 
283 static void netem_reset(struct Qdisc *sch)
284 {
285 	struct netem_sched_data *q = qdisc_priv(sch);
286 
287 	qdisc_reset(q->qdisc);
288 	sch->q.qlen = 0;
289 	sch->flags &= ~TCQ_F_THROTTLED;
290 	del_timer_sync(&q->timer);
291 }
292 
293 static int set_fifo_limit(struct Qdisc *q, int limit)
294 {
295         struct rtattr *rta;
296 	int ret = -ENOMEM;
297 
298 	rta = kmalloc(RTA_LENGTH(sizeof(struct tc_fifo_qopt)), GFP_KERNEL);
299 	if (rta) {
300 		rta->rta_type = RTM_NEWQDISC;
301 		rta->rta_len = RTA_LENGTH(sizeof(struct tc_fifo_qopt));
302 		((struct tc_fifo_qopt *)RTA_DATA(rta))->limit = limit;
303 
304 		ret = q->ops->change(q, rta);
305 		kfree(rta);
306 	}
307 	return ret;
308 }
309 
310 /*
311  * Distribution data is a variable size payload containing
312  * signed 16 bit values.
313  */
314 static int get_dist_table(struct Qdisc *sch, const struct rtattr *attr)
315 {
316 	struct netem_sched_data *q = qdisc_priv(sch);
317 	unsigned long n = RTA_PAYLOAD(attr)/sizeof(__s16);
318 	const __s16 *data = RTA_DATA(attr);
319 	struct disttable *d;
320 	int i;
321 
322 	if (n > 65536)
323 		return -EINVAL;
324 
325 	d = kmalloc(sizeof(*d) + n*sizeof(d->table[0]), GFP_KERNEL);
326 	if (!d)
327 		return -ENOMEM;
328 
329 	d->size = n;
330 	for (i = 0; i < n; i++)
331 		d->table[i] = data[i];
332 
333 	spin_lock_bh(&sch->dev->queue_lock);
334 	d = xchg(&q->delay_dist, d);
335 	spin_unlock_bh(&sch->dev->queue_lock);
336 
337 	kfree(d);
338 	return 0;
339 }
340 
341 static int get_correlation(struct Qdisc *sch, const struct rtattr *attr)
342 {
343 	struct netem_sched_data *q = qdisc_priv(sch);
344 	const struct tc_netem_corr *c = RTA_DATA(attr);
345 
346 	if (RTA_PAYLOAD(attr) != sizeof(*c))
347 		return -EINVAL;
348 
349 	init_crandom(&q->delay_cor, c->delay_corr);
350 	init_crandom(&q->loss_cor, c->loss_corr);
351 	init_crandom(&q->dup_cor, c->dup_corr);
352 	return 0;
353 }
354 
355 static int get_reorder(struct Qdisc *sch, const struct rtattr *attr)
356 {
357 	struct netem_sched_data *q = qdisc_priv(sch);
358 	const struct tc_netem_reorder *r = RTA_DATA(attr);
359 
360 	if (RTA_PAYLOAD(attr) != sizeof(*r))
361 		return -EINVAL;
362 
363 	q->reorder = r->probability;
364 	init_crandom(&q->reorder_cor, r->correlation);
365 	return 0;
366 }
367 
368 static int netem_change(struct Qdisc *sch, struct rtattr *opt)
369 {
370 	struct netem_sched_data *q = qdisc_priv(sch);
371 	struct tc_netem_qopt *qopt;
372 	int ret;
373 
374 	if (opt == NULL || RTA_PAYLOAD(opt) < sizeof(*qopt))
375 		return -EINVAL;
376 
377 	qopt = RTA_DATA(opt);
378 	ret = set_fifo_limit(q->qdisc, qopt->limit);
379 	if (ret) {
380 		pr_debug("netem: can't set fifo limit\n");
381 		return ret;
382 	}
383 
384 	q->latency = qopt->latency;
385 	q->jitter = qopt->jitter;
386 	q->limit = qopt->limit;
387 	q->gap = qopt->gap;
388 	q->counter = 0;
389 	q->loss = qopt->loss;
390 	q->duplicate = qopt->duplicate;
391 
392 	/* for compatiablity with earlier versions.
393 	 * if gap is set, need to assume 100% probablity
394 	 */
395 	q->reorder = ~0;
396 
397 	/* Handle nested options after initial queue options.
398 	 * Should have put all options in nested format but too late now.
399 	 */
400 	if (RTA_PAYLOAD(opt) > sizeof(*qopt)) {
401 		struct rtattr *tb[TCA_NETEM_MAX];
402 		if (rtattr_parse(tb, TCA_NETEM_MAX,
403 				 RTA_DATA(opt) + sizeof(*qopt),
404 				 RTA_PAYLOAD(opt) - sizeof(*qopt)))
405 			return -EINVAL;
406 
407 		if (tb[TCA_NETEM_CORR-1]) {
408 			ret = get_correlation(sch, tb[TCA_NETEM_CORR-1]);
409 			if (ret)
410 				return ret;
411 		}
412 
413 		if (tb[TCA_NETEM_DELAY_DIST-1]) {
414 			ret = get_dist_table(sch, tb[TCA_NETEM_DELAY_DIST-1]);
415 			if (ret)
416 				return ret;
417 		}
418 		if (tb[TCA_NETEM_REORDER-1]) {
419 			ret = get_reorder(sch, tb[TCA_NETEM_REORDER-1]);
420 			if (ret)
421 				return ret;
422 		}
423 	}
424 
425 
426 	return 0;
427 }
428 
429 static int netem_init(struct Qdisc *sch, struct rtattr *opt)
430 {
431 	struct netem_sched_data *q = qdisc_priv(sch);
432 	int ret;
433 
434 	if (!opt)
435 		return -EINVAL;
436 
437 	init_timer(&q->timer);
438 	q->timer.function = netem_watchdog;
439 	q->timer.data = (unsigned long) sch;
440 
441 	q->qdisc = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops);
442 	if (!q->qdisc) {
443 		pr_debug("netem: qdisc create failed\n");
444 		return -ENOMEM;
445 	}
446 
447 	ret = netem_change(sch, opt);
448 	if (ret) {
449 		pr_debug("netem: change failed\n");
450 		qdisc_destroy(q->qdisc);
451 	}
452 	return ret;
453 }
454 
455 static void netem_destroy(struct Qdisc *sch)
456 {
457 	struct netem_sched_data *q = qdisc_priv(sch);
458 
459 	del_timer_sync(&q->timer);
460 	qdisc_destroy(q->qdisc);
461 	kfree(q->delay_dist);
462 }
463 
464 static int netem_dump(struct Qdisc *sch, struct sk_buff *skb)
465 {
466 	const struct netem_sched_data *q = qdisc_priv(sch);
467 	unsigned char	 *b = skb->tail;
468 	struct rtattr *rta = (struct rtattr *) b;
469 	struct tc_netem_qopt qopt;
470 	struct tc_netem_corr cor;
471 	struct tc_netem_reorder reorder;
472 
473 	qopt.latency = q->latency;
474 	qopt.jitter = q->jitter;
475 	qopt.limit = q->limit;
476 	qopt.loss = q->loss;
477 	qopt.gap = q->gap;
478 	qopt.duplicate = q->duplicate;
479 	RTA_PUT(skb, TCA_OPTIONS, sizeof(qopt), &qopt);
480 
481 	cor.delay_corr = q->delay_cor.rho;
482 	cor.loss_corr = q->loss_cor.rho;
483 	cor.dup_corr = q->dup_cor.rho;
484 	RTA_PUT(skb, TCA_NETEM_CORR, sizeof(cor), &cor);
485 
486 	reorder.probability = q->reorder;
487 	reorder.correlation = q->reorder_cor.rho;
488 	RTA_PUT(skb, TCA_NETEM_REORDER, sizeof(reorder), &reorder);
489 
490 	rta->rta_len = skb->tail - b;
491 
492 	return skb->len;
493 
494 rtattr_failure:
495 	skb_trim(skb, b - skb->data);
496 	return -1;
497 }
498 
499 static int netem_dump_class(struct Qdisc *sch, unsigned long cl,
500 			  struct sk_buff *skb, struct tcmsg *tcm)
501 {
502 	struct netem_sched_data *q = qdisc_priv(sch);
503 
504 	if (cl != 1) 	/* only one class */
505 		return -ENOENT;
506 
507 	tcm->tcm_handle |= TC_H_MIN(1);
508 	tcm->tcm_info = q->qdisc->handle;
509 
510 	return 0;
511 }
512 
513 static int netem_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
514 		     struct Qdisc **old)
515 {
516 	struct netem_sched_data *q = qdisc_priv(sch);
517 
518 	if (new == NULL)
519 		new = &noop_qdisc;
520 
521 	sch_tree_lock(sch);
522 	*old = xchg(&q->qdisc, new);
523 	qdisc_reset(*old);
524 	sch->q.qlen = 0;
525 	sch_tree_unlock(sch);
526 
527 	return 0;
528 }
529 
530 static struct Qdisc *netem_leaf(struct Qdisc *sch, unsigned long arg)
531 {
532 	struct netem_sched_data *q = qdisc_priv(sch);
533 	return q->qdisc;
534 }
535 
536 static unsigned long netem_get(struct Qdisc *sch, u32 classid)
537 {
538 	return 1;
539 }
540 
541 static void netem_put(struct Qdisc *sch, unsigned long arg)
542 {
543 }
544 
545 static int netem_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
546 			    struct rtattr **tca, unsigned long *arg)
547 {
548 	return -ENOSYS;
549 }
550 
551 static int netem_delete(struct Qdisc *sch, unsigned long arg)
552 {
553 	return -ENOSYS;
554 }
555 
556 static void netem_walk(struct Qdisc *sch, struct qdisc_walker *walker)
557 {
558 	if (!walker->stop) {
559 		if (walker->count >= walker->skip)
560 			if (walker->fn(sch, 1, walker) < 0) {
561 				walker->stop = 1;
562 				return;
563 			}
564 		walker->count++;
565 	}
566 }
567 
568 static struct tcf_proto **netem_find_tcf(struct Qdisc *sch, unsigned long cl)
569 {
570 	return NULL;
571 }
572 
573 static struct Qdisc_class_ops netem_class_ops = {
574 	.graft		=	netem_graft,
575 	.leaf		=	netem_leaf,
576 	.get		=	netem_get,
577 	.put		=	netem_put,
578 	.change		=	netem_change_class,
579 	.delete		=	netem_delete,
580 	.walk		=	netem_walk,
581 	.tcf_chain	=	netem_find_tcf,
582 	.dump		=	netem_dump_class,
583 };
584 
585 static struct Qdisc_ops netem_qdisc_ops = {
586 	.id		=	"netem",
587 	.cl_ops		=	&netem_class_ops,
588 	.priv_size	=	sizeof(struct netem_sched_data),
589 	.enqueue	=	netem_enqueue,
590 	.dequeue	=	netem_dequeue,
591 	.requeue	=	netem_requeue,
592 	.drop		=	netem_drop,
593 	.init		=	netem_init,
594 	.reset		=	netem_reset,
595 	.destroy	=	netem_destroy,
596 	.change		=	netem_change,
597 	.dump		=	netem_dump,
598 	.owner		=	THIS_MODULE,
599 };
600 
601 
602 static int __init netem_module_init(void)
603 {
604 	return register_qdisc(&netem_qdisc_ops);
605 }
606 static void __exit netem_module_exit(void)
607 {
608 	unregister_qdisc(&netem_qdisc_ops);
609 }
610 module_init(netem_module_init)
611 module_exit(netem_module_exit)
612 MODULE_LICENSE("GPL");
613