xref: /linux/net/sched/sch_teql.c (revision 2b8232ce512105e28453f301d1510de8363bccd1)
1 /* net/sched/sch_teql.c	"True" (or "trivial") link equalizer.
2  *
3  *		This program is free software; you can redistribute it and/or
4  *		modify it under the terms of the GNU General Public License
5  *		as published by the Free Software Foundation; either version
6  *		2 of the License, or (at your option) any later version.
7  *
8  * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
9  */
10 
11 #include <linux/module.h>
12 #include <linux/types.h>
13 #include <linux/kernel.h>
14 #include <linux/string.h>
15 #include <linux/errno.h>
16 #include <linux/if_arp.h>
17 #include <linux/netdevice.h>
18 #include <linux/init.h>
19 #include <linux/skbuff.h>
20 #include <linux/moduleparam.h>
21 #include <net/dst.h>
22 #include <net/neighbour.h>
23 #include <net/pkt_sched.h>
24 
25 /*
26    How to setup it.
27    ----------------
28 
29    After loading this module you will find a new device teqlN
30    and new qdisc with the same name. To join a slave to the equalizer
31    you should just set this qdisc on a device f.e.
32 
33    # tc qdisc add dev eth0 root teql0
34    # tc qdisc add dev eth1 root teql0
35 
36    That's all. Full PnP 8)
37 
38    Applicability.
39    --------------
40 
41    1. Slave devices MUST be active devices, i.e., they must raise the tbusy
42       signal and generate EOI events. If you want to equalize virtual devices
43       like tunnels, use a normal eql device.
44    2. This device puts no limitations on physical slave characteristics
45       f.e. it will equalize 9600baud line and 100Mb ethernet perfectly :-)
46       Certainly, large difference in link speeds will make the resulting
47       eqalized link unusable, because of huge packet reordering.
48       I estimate an upper useful difference as ~10 times.
49    3. If the slave requires address resolution, only protocols using
50       neighbour cache (IPv4/IPv6) will work over the equalized link.
51       Other protocols are still allowed to use the slave device directly,
52       which will not break load balancing, though native slave
53       traffic will have the highest priority.  */
54 
55 struct teql_master
56 {
57 	struct Qdisc_ops qops;
58 	struct net_device *dev;
59 	struct Qdisc *slaves;
60 	struct list_head master_list;
61 	struct net_device_stats stats;
62 };
63 
64 struct teql_sched_data
65 {
66 	struct Qdisc *next;
67 	struct teql_master *m;
68 	struct neighbour *ncache;
69 	struct sk_buff_head q;
70 };
71 
72 #define NEXT_SLAVE(q) (((struct teql_sched_data*)qdisc_priv(q))->next)
73 
74 #define FMASK (IFF_BROADCAST|IFF_POINTOPOINT|IFF_BROADCAST)
75 
76 /* "teql*" qdisc routines */
77 
78 static int
79 teql_enqueue(struct sk_buff *skb, struct Qdisc* sch)
80 {
81 	struct net_device *dev = sch->dev;
82 	struct teql_sched_data *q = qdisc_priv(sch);
83 
84 	if (q->q.qlen < dev->tx_queue_len) {
85 		__skb_queue_tail(&q->q, skb);
86 		sch->bstats.bytes += skb->len;
87 		sch->bstats.packets++;
88 		return 0;
89 	}
90 
91 	kfree_skb(skb);
92 	sch->qstats.drops++;
93 	return NET_XMIT_DROP;
94 }
95 
96 static int
97 teql_requeue(struct sk_buff *skb, struct Qdisc* sch)
98 {
99 	struct teql_sched_data *q = qdisc_priv(sch);
100 
101 	__skb_queue_head(&q->q, skb);
102 	sch->qstats.requeues++;
103 	return 0;
104 }
105 
106 static struct sk_buff *
107 teql_dequeue(struct Qdisc* sch)
108 {
109 	struct teql_sched_data *dat = qdisc_priv(sch);
110 	struct sk_buff *skb;
111 
112 	skb = __skb_dequeue(&dat->q);
113 	if (skb == NULL) {
114 		struct net_device *m = dat->m->dev->qdisc->dev;
115 		if (m) {
116 			dat->m->slaves = sch;
117 			netif_wake_queue(m);
118 		}
119 	}
120 	sch->q.qlen = dat->q.qlen + dat->m->dev->qdisc->q.qlen;
121 	return skb;
122 }
123 
124 static __inline__ void
125 teql_neigh_release(struct neighbour *n)
126 {
127 	if (n)
128 		neigh_release(n);
129 }
130 
131 static void
132 teql_reset(struct Qdisc* sch)
133 {
134 	struct teql_sched_data *dat = qdisc_priv(sch);
135 
136 	skb_queue_purge(&dat->q);
137 	sch->q.qlen = 0;
138 	teql_neigh_release(xchg(&dat->ncache, NULL));
139 }
140 
141 static void
142 teql_destroy(struct Qdisc* sch)
143 {
144 	struct Qdisc *q, *prev;
145 	struct teql_sched_data *dat = qdisc_priv(sch);
146 	struct teql_master *master = dat->m;
147 
148 	if ((prev = master->slaves) != NULL) {
149 		do {
150 			q = NEXT_SLAVE(prev);
151 			if (q == sch) {
152 				NEXT_SLAVE(prev) = NEXT_SLAVE(q);
153 				if (q == master->slaves) {
154 					master->slaves = NEXT_SLAVE(q);
155 					if (q == master->slaves) {
156 						master->slaves = NULL;
157 						spin_lock_bh(&master->dev->queue_lock);
158 						qdisc_reset(master->dev->qdisc);
159 						spin_unlock_bh(&master->dev->queue_lock);
160 					}
161 				}
162 				skb_queue_purge(&dat->q);
163 				teql_neigh_release(xchg(&dat->ncache, NULL));
164 				break;
165 			}
166 
167 		} while ((prev = q) != master->slaves);
168 	}
169 }
170 
171 static int teql_qdisc_init(struct Qdisc *sch, struct rtattr *opt)
172 {
173 	struct net_device *dev = sch->dev;
174 	struct teql_master *m = (struct teql_master*)sch->ops;
175 	struct teql_sched_data *q = qdisc_priv(sch);
176 
177 	if (dev->hard_header_len > m->dev->hard_header_len)
178 		return -EINVAL;
179 
180 	if (m->dev == dev)
181 		return -ELOOP;
182 
183 	q->m = m;
184 
185 	skb_queue_head_init(&q->q);
186 
187 	if (m->slaves) {
188 		if (m->dev->flags & IFF_UP) {
189 			if ((m->dev->flags&IFF_POINTOPOINT && !(dev->flags&IFF_POINTOPOINT))
190 			    || (m->dev->flags&IFF_BROADCAST && !(dev->flags&IFF_BROADCAST))
191 			    || (m->dev->flags&IFF_MULTICAST && !(dev->flags&IFF_MULTICAST))
192 			    || dev->mtu < m->dev->mtu)
193 				return -EINVAL;
194 		} else {
195 			if (!(dev->flags&IFF_POINTOPOINT))
196 				m->dev->flags &= ~IFF_POINTOPOINT;
197 			if (!(dev->flags&IFF_BROADCAST))
198 				m->dev->flags &= ~IFF_BROADCAST;
199 			if (!(dev->flags&IFF_MULTICAST))
200 				m->dev->flags &= ~IFF_MULTICAST;
201 			if (dev->mtu < m->dev->mtu)
202 				m->dev->mtu = dev->mtu;
203 		}
204 		q->next = NEXT_SLAVE(m->slaves);
205 		NEXT_SLAVE(m->slaves) = sch;
206 	} else {
207 		q->next = sch;
208 		m->slaves = sch;
209 		m->dev->mtu = dev->mtu;
210 		m->dev->flags = (m->dev->flags&~FMASK)|(dev->flags&FMASK);
211 	}
212 	return 0;
213 }
214 
215 
216 static int
217 __teql_resolve(struct sk_buff *skb, struct sk_buff *skb_res, struct net_device *dev)
218 {
219 	struct teql_sched_data *q = qdisc_priv(dev->qdisc);
220 	struct neighbour *mn = skb->dst->neighbour;
221 	struct neighbour *n = q->ncache;
222 
223 	if (mn->tbl == NULL)
224 		return -EINVAL;
225 	if (n && n->tbl == mn->tbl &&
226 	    memcmp(n->primary_key, mn->primary_key, mn->tbl->key_len) == 0) {
227 		atomic_inc(&n->refcnt);
228 	} else {
229 		n = __neigh_lookup_errno(mn->tbl, mn->primary_key, dev);
230 		if (IS_ERR(n))
231 			return PTR_ERR(n);
232 	}
233 	if (neigh_event_send(n, skb_res) == 0) {
234 		int err;
235 
236 		read_lock(&n->lock);
237 		err = dev_hard_header(skb, dev, ntohs(skb->protocol),
238 				      n->ha, NULL, skb->len);
239 		read_unlock(&n->lock);
240 
241 		if (err < 0) {
242 			neigh_release(n);
243 			return -EINVAL;
244 		}
245 		teql_neigh_release(xchg(&q->ncache, n));
246 		return 0;
247 	}
248 	neigh_release(n);
249 	return (skb_res == NULL) ? -EAGAIN : 1;
250 }
251 
252 static inline int teql_resolve(struct sk_buff *skb,
253 			       struct sk_buff *skb_res, struct net_device *dev)
254 {
255 	if (dev->header_ops == NULL ||
256 	    skb->dst == NULL ||
257 	    skb->dst->neighbour == NULL)
258 		return 0;
259 	return __teql_resolve(skb, skb_res, dev);
260 }
261 
262 static int teql_master_xmit(struct sk_buff *skb, struct net_device *dev)
263 {
264 	struct teql_master *master = netdev_priv(dev);
265 	struct Qdisc *start, *q;
266 	int busy;
267 	int nores;
268 	int len = skb->len;
269 	int subq = skb->queue_mapping;
270 	struct sk_buff *skb_res = NULL;
271 
272 	start = master->slaves;
273 
274 restart:
275 	nores = 0;
276 	busy = 0;
277 
278 	if ((q = start) == NULL)
279 		goto drop;
280 
281 	do {
282 		struct net_device *slave = q->dev;
283 
284 		if (slave->qdisc_sleeping != q)
285 			continue;
286 		if (netif_queue_stopped(slave) ||
287 		    netif_subqueue_stopped(slave, subq) ||
288 		    !netif_running(slave)) {
289 			busy = 1;
290 			continue;
291 		}
292 
293 		switch (teql_resolve(skb, skb_res, slave)) {
294 		case 0:
295 			if (netif_tx_trylock(slave)) {
296 				if (!netif_queue_stopped(slave) &&
297 				    !netif_subqueue_stopped(slave, subq) &&
298 				    slave->hard_start_xmit(skb, slave) == 0) {
299 					netif_tx_unlock(slave);
300 					master->slaves = NEXT_SLAVE(q);
301 					netif_wake_queue(dev);
302 					master->stats.tx_packets++;
303 					master->stats.tx_bytes += len;
304 					return 0;
305 				}
306 				netif_tx_unlock(slave);
307 			}
308 			if (netif_queue_stopped(dev))
309 				busy = 1;
310 			break;
311 		case 1:
312 			master->slaves = NEXT_SLAVE(q);
313 			return 0;
314 		default:
315 			nores = 1;
316 			break;
317 		}
318 		__skb_pull(skb, skb_network_offset(skb));
319 	} while ((q = NEXT_SLAVE(q)) != start);
320 
321 	if (nores && skb_res == NULL) {
322 		skb_res = skb;
323 		goto restart;
324 	}
325 
326 	if (busy) {
327 		netif_stop_queue(dev);
328 		return 1;
329 	}
330 	master->stats.tx_errors++;
331 
332 drop:
333 	master->stats.tx_dropped++;
334 	dev_kfree_skb(skb);
335 	return 0;
336 }
337 
338 static int teql_master_open(struct net_device *dev)
339 {
340 	struct Qdisc * q;
341 	struct teql_master *m = netdev_priv(dev);
342 	int mtu = 0xFFFE;
343 	unsigned flags = IFF_NOARP|IFF_MULTICAST;
344 
345 	if (m->slaves == NULL)
346 		return -EUNATCH;
347 
348 	flags = FMASK;
349 
350 	q = m->slaves;
351 	do {
352 		struct net_device *slave = q->dev;
353 
354 		if (slave == NULL)
355 			return -EUNATCH;
356 
357 		if (slave->mtu < mtu)
358 			mtu = slave->mtu;
359 		if (slave->hard_header_len > LL_MAX_HEADER)
360 			return -EINVAL;
361 
362 		/* If all the slaves are BROADCAST, master is BROADCAST
363 		   If all the slaves are PtP, master is PtP
364 		   Otherwise, master is NBMA.
365 		 */
366 		if (!(slave->flags&IFF_POINTOPOINT))
367 			flags &= ~IFF_POINTOPOINT;
368 		if (!(slave->flags&IFF_BROADCAST))
369 			flags &= ~IFF_BROADCAST;
370 		if (!(slave->flags&IFF_MULTICAST))
371 			flags &= ~IFF_MULTICAST;
372 	} while ((q = NEXT_SLAVE(q)) != m->slaves);
373 
374 	m->dev->mtu = mtu;
375 	m->dev->flags = (m->dev->flags&~FMASK) | flags;
376 	netif_start_queue(m->dev);
377 	return 0;
378 }
379 
380 static int teql_master_close(struct net_device *dev)
381 {
382 	netif_stop_queue(dev);
383 	return 0;
384 }
385 
386 static struct net_device_stats *teql_master_stats(struct net_device *dev)
387 {
388 	struct teql_master *m = netdev_priv(dev);
389 	return &m->stats;
390 }
391 
392 static int teql_master_mtu(struct net_device *dev, int new_mtu)
393 {
394 	struct teql_master *m = netdev_priv(dev);
395 	struct Qdisc *q;
396 
397 	if (new_mtu < 68)
398 		return -EINVAL;
399 
400 	q = m->slaves;
401 	if (q) {
402 		do {
403 			if (new_mtu > q->dev->mtu)
404 				return -EINVAL;
405 		} while ((q=NEXT_SLAVE(q)) != m->slaves);
406 	}
407 
408 	dev->mtu = new_mtu;
409 	return 0;
410 }
411 
412 static __init void teql_master_setup(struct net_device *dev)
413 {
414 	struct teql_master *master = netdev_priv(dev);
415 	struct Qdisc_ops *ops = &master->qops;
416 
417 	master->dev	= dev;
418 	ops->priv_size  = sizeof(struct teql_sched_data);
419 
420 	ops->enqueue	=	teql_enqueue;
421 	ops->dequeue	=	teql_dequeue;
422 	ops->requeue	=	teql_requeue;
423 	ops->init	=	teql_qdisc_init;
424 	ops->reset	=	teql_reset;
425 	ops->destroy	=	teql_destroy;
426 	ops->owner	=	THIS_MODULE;
427 
428 	dev->open		= teql_master_open;
429 	dev->hard_start_xmit	= teql_master_xmit;
430 	dev->stop		= teql_master_close;
431 	dev->get_stats		= teql_master_stats;
432 	dev->change_mtu		= teql_master_mtu;
433 	dev->type		= ARPHRD_VOID;
434 	dev->mtu		= 1500;
435 	dev->tx_queue_len	= 100;
436 	dev->flags		= IFF_NOARP;
437 	dev->hard_header_len	= LL_MAX_HEADER;
438 }
439 
440 static LIST_HEAD(master_dev_list);
441 static int max_equalizers = 1;
442 module_param(max_equalizers, int, 0);
443 MODULE_PARM_DESC(max_equalizers, "Max number of link equalizers");
444 
445 static int __init teql_init(void)
446 {
447 	int i;
448 	int err = -ENODEV;
449 
450 	for (i = 0; i < max_equalizers; i++) {
451 		struct net_device *dev;
452 		struct teql_master *master;
453 
454 		dev = alloc_netdev(sizeof(struct teql_master),
455 				  "teql%d", teql_master_setup);
456 		if (!dev) {
457 			err = -ENOMEM;
458 			break;
459 		}
460 
461 		if ((err = register_netdev(dev))) {
462 			free_netdev(dev);
463 			break;
464 		}
465 
466 		master = netdev_priv(dev);
467 
468 		strlcpy(master->qops.id, dev->name, IFNAMSIZ);
469 		err = register_qdisc(&master->qops);
470 
471 		if (err) {
472 			unregister_netdev(dev);
473 			free_netdev(dev);
474 			break;
475 		}
476 
477 		list_add_tail(&master->master_list, &master_dev_list);
478 	}
479 	return i ? 0 : err;
480 }
481 
482 static void __exit teql_exit(void)
483 {
484 	struct teql_master *master, *nxt;
485 
486 	list_for_each_entry_safe(master, nxt, &master_dev_list, master_list) {
487 
488 		list_del(&master->master_list);
489 
490 		unregister_qdisc(&master->qops);
491 		unregister_netdev(master->dev);
492 		free_netdev(master->dev);
493 	}
494 }
495 
496 module_init(teql_init);
497 module_exit(teql_exit);
498 
499 MODULE_LICENSE("GPL");
500