xref: /linux/net/sched/sch_teql.c (revision f3d9478b2ce468c3115b02ecae7e975990697f15)
1 /* net/sched/sch_teql.c	"True" (or "trivial") link equalizer.
2  *
3  *		This program is free software; you can redistribute it and/or
4  *		modify it under the terms of the GNU General Public License
5  *		as published by the Free Software Foundation; either version
6  *		2 of the License, or (at your option) any later version.
7  *
8  * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
9  */
10 
11 #include <linux/module.h>
12 #include <asm/uaccess.h>
13 #include <asm/system.h>
14 #include <linux/bitops.h>
15 #include <linux/types.h>
16 #include <linux/kernel.h>
17 #include <linux/sched.h>
18 #include <linux/string.h>
19 #include <linux/mm.h>
20 #include <linux/socket.h>
21 #include <linux/sockios.h>
22 #include <linux/in.h>
23 #include <linux/errno.h>
24 #include <linux/interrupt.h>
25 #include <linux/if_arp.h>
26 #include <linux/if_ether.h>
27 #include <linux/inet.h>
28 #include <linux/netdevice.h>
29 #include <linux/etherdevice.h>
30 #include <linux/notifier.h>
31 #include <linux/init.h>
32 #include <net/ip.h>
33 #include <net/route.h>
34 #include <linux/skbuff.h>
35 #include <linux/moduleparam.h>
36 #include <net/sock.h>
37 #include <net/pkt_sched.h>
38 
39 /*
40    How to setup it.
41    ----------------
42 
43    After loading this module you will find a new device teqlN
44    and new qdisc with the same name. To join a slave to the equalizer
45    you should just set this qdisc on a device f.e.
46 
47    # tc qdisc add dev eth0 root teql0
48    # tc qdisc add dev eth1 root teql0
49 
50    That's all. Full PnP 8)
51 
52    Applicability.
53    --------------
54 
55    1. Slave devices MUST be active devices, i.e., they must raise the tbusy
56       signal and generate EOI events. If you want to equalize virtual devices
57       like tunnels, use a normal eql device.
58    2. This device puts no limitations on physical slave characteristics
59       f.e. it will equalize 9600baud line and 100Mb ethernet perfectly :-)
60       Certainly, large difference in link speeds will make the resulting
61       eqalized link unusable, because of huge packet reordering.
62       I estimate an upper useful difference as ~10 times.
63    3. If the slave requires address resolution, only protocols using
64       neighbour cache (IPv4/IPv6) will work over the equalized link.
65       Other protocols are still allowed to use the slave device directly,
66       which will not break load balancing, though native slave
67       traffic will have the highest priority.  */
68 
69 struct teql_master
70 {
71 	struct Qdisc_ops qops;
72 	struct net_device *dev;
73 	struct Qdisc *slaves;
74 	struct list_head master_list;
75 	struct net_device_stats stats;
76 };
77 
78 struct teql_sched_data
79 {
80 	struct Qdisc *next;
81 	struct teql_master *m;
82 	struct neighbour *ncache;
83 	struct sk_buff_head q;
84 };
85 
86 #define NEXT_SLAVE(q) (((struct teql_sched_data*)qdisc_priv(q))->next)
87 
88 #define FMASK (IFF_BROADCAST|IFF_POINTOPOINT|IFF_BROADCAST)
89 
90 /* "teql*" qdisc routines */
91 
92 static int
93 teql_enqueue(struct sk_buff *skb, struct Qdisc* sch)
94 {
95 	struct net_device *dev = sch->dev;
96 	struct teql_sched_data *q = qdisc_priv(sch);
97 
98 	__skb_queue_tail(&q->q, skb);
99 	if (q->q.qlen <= dev->tx_queue_len) {
100 		sch->bstats.bytes += skb->len;
101 		sch->bstats.packets++;
102 		return 0;
103 	}
104 
105 	__skb_unlink(skb, &q->q);
106 	kfree_skb(skb);
107 	sch->qstats.drops++;
108 	return NET_XMIT_DROP;
109 }
110 
111 static int
112 teql_requeue(struct sk_buff *skb, struct Qdisc* sch)
113 {
114 	struct teql_sched_data *q = qdisc_priv(sch);
115 
116 	__skb_queue_head(&q->q, skb);
117 	sch->qstats.requeues++;
118 	return 0;
119 }
120 
121 static struct sk_buff *
122 teql_dequeue(struct Qdisc* sch)
123 {
124 	struct teql_sched_data *dat = qdisc_priv(sch);
125 	struct sk_buff *skb;
126 
127 	skb = __skb_dequeue(&dat->q);
128 	if (skb == NULL) {
129 		struct net_device *m = dat->m->dev->qdisc->dev;
130 		if (m) {
131 			dat->m->slaves = sch;
132 			netif_wake_queue(m);
133 		}
134 	}
135 	sch->q.qlen = dat->q.qlen + dat->m->dev->qdisc->q.qlen;
136 	return skb;
137 }
138 
139 static __inline__ void
140 teql_neigh_release(struct neighbour *n)
141 {
142 	if (n)
143 		neigh_release(n);
144 }
145 
146 static void
147 teql_reset(struct Qdisc* sch)
148 {
149 	struct teql_sched_data *dat = qdisc_priv(sch);
150 
151 	skb_queue_purge(&dat->q);
152 	sch->q.qlen = 0;
153 	teql_neigh_release(xchg(&dat->ncache, NULL));
154 }
155 
156 static void
157 teql_destroy(struct Qdisc* sch)
158 {
159 	struct Qdisc *q, *prev;
160 	struct teql_sched_data *dat = qdisc_priv(sch);
161 	struct teql_master *master = dat->m;
162 
163 	if ((prev = master->slaves) != NULL) {
164 		do {
165 			q = NEXT_SLAVE(prev);
166 			if (q == sch) {
167 				NEXT_SLAVE(prev) = NEXT_SLAVE(q);
168 				if (q == master->slaves) {
169 					master->slaves = NEXT_SLAVE(q);
170 					if (q == master->slaves) {
171 						master->slaves = NULL;
172 						spin_lock_bh(&master->dev->queue_lock);
173 						qdisc_reset(master->dev->qdisc);
174 						spin_unlock_bh(&master->dev->queue_lock);
175 					}
176 				}
177 				skb_queue_purge(&dat->q);
178 				teql_neigh_release(xchg(&dat->ncache, NULL));
179 				break;
180 			}
181 
182 		} while ((prev = q) != master->slaves);
183 	}
184 }
185 
186 static int teql_qdisc_init(struct Qdisc *sch, struct rtattr *opt)
187 {
188 	struct net_device *dev = sch->dev;
189 	struct teql_master *m = (struct teql_master*)sch->ops;
190 	struct teql_sched_data *q = qdisc_priv(sch);
191 
192 	if (dev->hard_header_len > m->dev->hard_header_len)
193 		return -EINVAL;
194 
195 	if (m->dev == dev)
196 		return -ELOOP;
197 
198 	q->m = m;
199 
200 	skb_queue_head_init(&q->q);
201 
202 	if (m->slaves) {
203 		if (m->dev->flags & IFF_UP) {
204 			if ((m->dev->flags&IFF_POINTOPOINT && !(dev->flags&IFF_POINTOPOINT))
205 			    || (m->dev->flags&IFF_BROADCAST && !(dev->flags&IFF_BROADCAST))
206 			    || (m->dev->flags&IFF_MULTICAST && !(dev->flags&IFF_MULTICAST))
207 			    || dev->mtu < m->dev->mtu)
208 				return -EINVAL;
209 		} else {
210 			if (!(dev->flags&IFF_POINTOPOINT))
211 				m->dev->flags &= ~IFF_POINTOPOINT;
212 			if (!(dev->flags&IFF_BROADCAST))
213 				m->dev->flags &= ~IFF_BROADCAST;
214 			if (!(dev->flags&IFF_MULTICAST))
215 				m->dev->flags &= ~IFF_MULTICAST;
216 			if (dev->mtu < m->dev->mtu)
217 				m->dev->mtu = dev->mtu;
218 		}
219 		q->next = NEXT_SLAVE(m->slaves);
220 		NEXT_SLAVE(m->slaves) = sch;
221 	} else {
222 		q->next = sch;
223 		m->slaves = sch;
224 		m->dev->mtu = dev->mtu;
225 		m->dev->flags = (m->dev->flags&~FMASK)|(dev->flags&FMASK);
226 	}
227 	return 0;
228 }
229 
230 /* "teql*" netdevice routines */
231 
232 static int
233 __teql_resolve(struct sk_buff *skb, struct sk_buff *skb_res, struct net_device *dev)
234 {
235 	struct teql_sched_data *q = qdisc_priv(dev->qdisc);
236 	struct neighbour *mn = skb->dst->neighbour;
237 	struct neighbour *n = q->ncache;
238 
239 	if (mn->tbl == NULL)
240 		return -EINVAL;
241 	if (n && n->tbl == mn->tbl &&
242 	    memcmp(n->primary_key, mn->primary_key, mn->tbl->key_len) == 0) {
243 		atomic_inc(&n->refcnt);
244 	} else {
245 		n = __neigh_lookup_errno(mn->tbl, mn->primary_key, dev);
246 		if (IS_ERR(n))
247 			return PTR_ERR(n);
248 	}
249 	if (neigh_event_send(n, skb_res) == 0) {
250 		int err;
251 		read_lock(&n->lock);
252 		err = dev->hard_header(skb, dev, ntohs(skb->protocol), n->ha, NULL, skb->len);
253 		read_unlock(&n->lock);
254 		if (err < 0) {
255 			neigh_release(n);
256 			return -EINVAL;
257 		}
258 		teql_neigh_release(xchg(&q->ncache, n));
259 		return 0;
260 	}
261 	neigh_release(n);
262 	return (skb_res == NULL) ? -EAGAIN : 1;
263 }
264 
265 static __inline__ int
266 teql_resolve(struct sk_buff *skb, struct sk_buff *skb_res, struct net_device *dev)
267 {
268 	if (dev->hard_header == NULL ||
269 	    skb->dst == NULL ||
270 	    skb->dst->neighbour == NULL)
271 		return 0;
272 	return __teql_resolve(skb, skb_res, dev);
273 }
274 
275 static int teql_master_xmit(struct sk_buff *skb, struct net_device *dev)
276 {
277 	struct teql_master *master = netdev_priv(dev);
278 	struct Qdisc *start, *q;
279 	int busy;
280 	int nores;
281 	int len = skb->len;
282 	struct sk_buff *skb_res = NULL;
283 
284 	start = master->slaves;
285 
286 restart:
287 	nores = 0;
288 	busy = 0;
289 
290 	if ((q = start) == NULL)
291 		goto drop;
292 
293 	do {
294 		struct net_device *slave = q->dev;
295 
296 		if (slave->qdisc_sleeping != q)
297 			continue;
298 		if (netif_queue_stopped(slave) || ! netif_running(slave)) {
299 			busy = 1;
300 			continue;
301 		}
302 
303 		switch (teql_resolve(skb, skb_res, slave)) {
304 		case 0:
305 			if (netif_tx_trylock(slave)) {
306 				if (!netif_queue_stopped(slave) &&
307 				    slave->hard_start_xmit(skb, slave) == 0) {
308 					netif_tx_unlock(slave);
309 					master->slaves = NEXT_SLAVE(q);
310 					netif_wake_queue(dev);
311 					master->stats.tx_packets++;
312 					master->stats.tx_bytes += len;
313 					return 0;
314 				}
315 				netif_tx_unlock(slave);
316 			}
317 			if (netif_queue_stopped(dev))
318 				busy = 1;
319 			break;
320 		case 1:
321 			master->slaves = NEXT_SLAVE(q);
322 			return 0;
323 		default:
324 			nores = 1;
325 			break;
326 		}
327 		__skb_pull(skb, skb->nh.raw - skb->data);
328 	} while ((q = NEXT_SLAVE(q)) != start);
329 
330 	if (nores && skb_res == NULL) {
331 		skb_res = skb;
332 		goto restart;
333 	}
334 
335 	if (busy) {
336 		netif_stop_queue(dev);
337 		return 1;
338 	}
339 	master->stats.tx_errors++;
340 
341 drop:
342 	master->stats.tx_dropped++;
343 	dev_kfree_skb(skb);
344 	return 0;
345 }
346 
347 static int teql_master_open(struct net_device *dev)
348 {
349 	struct Qdisc * q;
350 	struct teql_master *m = netdev_priv(dev);
351 	int mtu = 0xFFFE;
352 	unsigned flags = IFF_NOARP|IFF_MULTICAST;
353 
354 	if (m->slaves == NULL)
355 		return -EUNATCH;
356 
357 	flags = FMASK;
358 
359 	q = m->slaves;
360 	do {
361 		struct net_device *slave = q->dev;
362 
363 		if (slave == NULL)
364 			return -EUNATCH;
365 
366 		if (slave->mtu < mtu)
367 			mtu = slave->mtu;
368 		if (slave->hard_header_len > LL_MAX_HEADER)
369 			return -EINVAL;
370 
371 		/* If all the slaves are BROADCAST, master is BROADCAST
372 		   If all the slaves are PtP, master is PtP
373 		   Otherwise, master is NBMA.
374 		 */
375 		if (!(slave->flags&IFF_POINTOPOINT))
376 			flags &= ~IFF_POINTOPOINT;
377 		if (!(slave->flags&IFF_BROADCAST))
378 			flags &= ~IFF_BROADCAST;
379 		if (!(slave->flags&IFF_MULTICAST))
380 			flags &= ~IFF_MULTICAST;
381 	} while ((q = NEXT_SLAVE(q)) != m->slaves);
382 
383 	m->dev->mtu = mtu;
384 	m->dev->flags = (m->dev->flags&~FMASK) | flags;
385 	netif_start_queue(m->dev);
386 	return 0;
387 }
388 
389 static int teql_master_close(struct net_device *dev)
390 {
391 	netif_stop_queue(dev);
392 	return 0;
393 }
394 
395 static struct net_device_stats *teql_master_stats(struct net_device *dev)
396 {
397 	struct teql_master *m = netdev_priv(dev);
398 	return &m->stats;
399 }
400 
401 static int teql_master_mtu(struct net_device *dev, int new_mtu)
402 {
403 	struct teql_master *m = netdev_priv(dev);
404 	struct Qdisc *q;
405 
406 	if (new_mtu < 68)
407 		return -EINVAL;
408 
409 	q = m->slaves;
410 	if (q) {
411 		do {
412 			if (new_mtu > q->dev->mtu)
413 				return -EINVAL;
414 		} while ((q=NEXT_SLAVE(q)) != m->slaves);
415 	}
416 
417 	dev->mtu = new_mtu;
418 	return 0;
419 }
420 
421 static __init void teql_master_setup(struct net_device *dev)
422 {
423 	struct teql_master *master = netdev_priv(dev);
424 	struct Qdisc_ops *ops = &master->qops;
425 
426 	master->dev	= dev;
427 	ops->priv_size  = sizeof(struct teql_sched_data);
428 
429 	ops->enqueue	=	teql_enqueue;
430 	ops->dequeue	=	teql_dequeue;
431 	ops->requeue	=	teql_requeue;
432 	ops->init	=	teql_qdisc_init;
433 	ops->reset	=	teql_reset;
434 	ops->destroy	=	teql_destroy;
435 	ops->owner	=	THIS_MODULE;
436 
437 	dev->open		= teql_master_open;
438 	dev->hard_start_xmit	= teql_master_xmit;
439 	dev->stop		= teql_master_close;
440 	dev->get_stats		= teql_master_stats;
441 	dev->change_mtu		= teql_master_mtu;
442 	dev->type		= ARPHRD_VOID;
443 	dev->mtu		= 1500;
444 	dev->tx_queue_len	= 100;
445 	dev->flags		= IFF_NOARP;
446 	dev->hard_header_len	= LL_MAX_HEADER;
447 	SET_MODULE_OWNER(dev);
448 }
449 
450 static LIST_HEAD(master_dev_list);
451 static int max_equalizers = 1;
452 module_param(max_equalizers, int, 0);
453 MODULE_PARM_DESC(max_equalizers, "Max number of link equalizers");
454 
455 static int __init teql_init(void)
456 {
457 	int i;
458 	int err = -ENODEV;
459 
460 	for (i = 0; i < max_equalizers; i++) {
461 		struct net_device *dev;
462 		struct teql_master *master;
463 
464 		dev = alloc_netdev(sizeof(struct teql_master),
465 				  "teql%d", teql_master_setup);
466 		if (!dev) {
467 			err = -ENOMEM;
468 			break;
469 		}
470 
471 		if ((err = register_netdev(dev))) {
472 			free_netdev(dev);
473 			break;
474 		}
475 
476 		master = netdev_priv(dev);
477 
478 		strlcpy(master->qops.id, dev->name, IFNAMSIZ);
479 		err = register_qdisc(&master->qops);
480 
481 		if (err) {
482 			unregister_netdev(dev);
483 			free_netdev(dev);
484 			break;
485 		}
486 
487 		list_add_tail(&master->master_list, &master_dev_list);
488 	}
489 	return i ? 0 : err;
490 }
491 
492 static void __exit teql_exit(void)
493 {
494 	struct teql_master *master, *nxt;
495 
496 	list_for_each_entry_safe(master, nxt, &master_dev_list, master_list) {
497 
498 		list_del(&master->master_list);
499 
500 		unregister_qdisc(&master->qops);
501 		unregister_netdev(master->dev);
502 		free_netdev(master->dev);
503 	}
504 }
505 
506 module_init(teql_init);
507 module_exit(teql_exit);
508 
509 MODULE_LICENSE("GPL");
510