xref: /linux/net/sched/sch_teql.c (revision 2c7e63d702f6c4209c5af833308e7fcbc7d4ab17)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /* net/sched/sch_teql.c	"True" (or "trivial") link equalizer.
3  *
4  * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
5  */
6 
7 #include <linux/module.h>
8 #include <linux/types.h>
9 #include <linux/kernel.h>
10 #include <linux/slab.h>
11 #include <linux/string.h>
12 #include <linux/errno.h>
13 #include <linux/if_arp.h>
14 #include <linux/netdevice.h>
15 #include <linux/init.h>
16 #include <linux/skbuff.h>
17 #include <linux/moduleparam.h>
18 #include <net/dst.h>
19 #include <net/neighbour.h>
20 #include <net/pkt_sched.h>
21 
22 /*
23    How to setup it.
24    ----------------
25 
26    After loading this module you will find a new device teqlN
27    and new qdisc with the same name. To join a slave to the equalizer
28    you should just set this qdisc on a device f.e.
29 
30    # tc qdisc add dev eth0 root teql0
31    # tc qdisc add dev eth1 root teql0
32 
33    That's all. Full PnP 8)
34 
35    Applicability.
36    --------------
37 
38    1. Slave devices MUST be active devices, i.e., they must raise the tbusy
39       signal and generate EOI events. If you want to equalize virtual devices
40       like tunnels, use a normal eql device.
41    2. This device puts no limitations on physical slave characteristics
42       f.e. it will equalize 9600baud line and 100Mb ethernet perfectly :-)
43       Certainly, large difference in link speeds will make the resulting
44       eqalized link unusable, because of huge packet reordering.
45       I estimate an upper useful difference as ~10 times.
46    3. If the slave requires address resolution, only protocols using
47       neighbour cache (IPv4/IPv6) will work over the equalized link.
48       Other protocols are still allowed to use the slave device directly,
49       which will not break load balancing, though native slave
50       traffic will have the highest priority.  */
51 
52 struct teql_master {
53 	struct Qdisc_ops qops;
54 	struct net_device *dev;
55 	struct Qdisc *slaves;
56 	struct list_head master_list;
57 	unsigned long	tx_bytes;
58 	unsigned long	tx_packets;
59 	unsigned long	tx_errors;
60 	unsigned long	tx_dropped;
61 };
62 
63 struct teql_sched_data {
64 	struct Qdisc *next;
65 	struct teql_master *m;
66 	struct sk_buff_head q;
67 };
68 
69 #define NEXT_SLAVE(q) (((struct teql_sched_data *)qdisc_priv(q))->next)
70 
71 #define FMASK (IFF_BROADCAST | IFF_POINTOPOINT)
72 
73 /* "teql*" qdisc routines */
74 
75 static int
teql_enqueue(struct sk_buff * skb,struct Qdisc * sch,struct sk_buff ** to_free)76 teql_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free)
77 {
78 	struct net_device *dev = qdisc_dev(sch);
79 	struct teql_sched_data *q = qdisc_priv(sch);
80 
81 	if (q->q.qlen < READ_ONCE(dev->tx_queue_len)) {
82 		__skb_queue_tail(&q->q, skb);
83 		return NET_XMIT_SUCCESS;
84 	}
85 
86 	return qdisc_drop(skb, sch, to_free);
87 }
88 
89 static struct sk_buff *
teql_dequeue(struct Qdisc * sch)90 teql_dequeue(struct Qdisc *sch)
91 {
92 	struct teql_sched_data *dat = qdisc_priv(sch);
93 	struct netdev_queue *dat_queue;
94 	struct sk_buff *skb;
95 	struct Qdisc *q;
96 
97 	skb = __skb_dequeue(&dat->q);
98 	dat_queue = netdev_get_tx_queue(dat->m->dev, 0);
99 	q = rcu_dereference_bh(dat_queue->qdisc);
100 
101 	if (skb == NULL) {
102 		struct net_device *m = qdisc_dev(q);
103 		if (m) {
104 			dat->m->slaves = sch;
105 			netif_wake_queue(m);
106 		}
107 	} else {
108 		qdisc_bstats_update(sch, skb);
109 	}
110 	sch->q.qlen = dat->q.qlen + q->q.qlen;
111 	return skb;
112 }
113 
114 static struct sk_buff *
teql_peek(struct Qdisc * sch)115 teql_peek(struct Qdisc *sch)
116 {
117 	/* teql is meant to be used as root qdisc */
118 	return NULL;
119 }
120 
121 static void
teql_reset(struct Qdisc * sch)122 teql_reset(struct Qdisc *sch)
123 {
124 	struct teql_sched_data *dat = qdisc_priv(sch);
125 
126 	skb_queue_purge(&dat->q);
127 }
128 
129 static void
teql_destroy(struct Qdisc * sch)130 teql_destroy(struct Qdisc *sch)
131 {
132 	struct Qdisc *q, *prev;
133 	struct teql_sched_data *dat = qdisc_priv(sch);
134 	struct teql_master *master = dat->m;
135 
136 	if (!master)
137 		return;
138 
139 	prev = master->slaves;
140 	if (prev) {
141 		do {
142 			q = NEXT_SLAVE(prev);
143 			if (q == sch) {
144 				NEXT_SLAVE(prev) = NEXT_SLAVE(q);
145 				if (q == master->slaves) {
146 					master->slaves = NEXT_SLAVE(q);
147 					if (q == master->slaves) {
148 						struct netdev_queue *txq;
149 						spinlock_t *root_lock;
150 
151 						txq = netdev_get_tx_queue(master->dev, 0);
152 						master->slaves = NULL;
153 
154 						root_lock = qdisc_root_sleeping_lock(rtnl_dereference(txq->qdisc));
155 						spin_lock_bh(root_lock);
156 						qdisc_reset(rtnl_dereference(txq->qdisc));
157 						spin_unlock_bh(root_lock);
158 					}
159 				}
160 				skb_queue_purge(&dat->q);
161 				break;
162 			}
163 
164 		} while ((prev = q) != master->slaves);
165 	}
166 }
167 
teql_qdisc_init(struct Qdisc * sch,struct nlattr * opt,struct netlink_ext_ack * extack)168 static int teql_qdisc_init(struct Qdisc *sch, struct nlattr *opt,
169 			   struct netlink_ext_ack *extack)
170 {
171 	struct net_device *dev = qdisc_dev(sch);
172 	struct teql_master *m = (struct teql_master *)sch->ops;
173 	struct teql_sched_data *q = qdisc_priv(sch);
174 
175 	if (dev->hard_header_len > m->dev->hard_header_len)
176 		return -EINVAL;
177 
178 	if (m->dev == dev)
179 		return -ELOOP;
180 
181 	if (sch->parent != TC_H_ROOT) {
182 		NL_SET_ERR_MSG_MOD(extack, "teql can only be used as root");
183 		return -EOPNOTSUPP;
184 	}
185 
186 	q->m = m;
187 
188 	skb_queue_head_init(&q->q);
189 
190 	if (m->slaves) {
191 		if (m->dev->flags & IFF_UP) {
192 			if ((m->dev->flags & IFF_POINTOPOINT &&
193 			     !(dev->flags & IFF_POINTOPOINT)) ||
194 			    (m->dev->flags & IFF_BROADCAST &&
195 			     !(dev->flags & IFF_BROADCAST)) ||
196 			    (m->dev->flags & IFF_MULTICAST &&
197 			     !(dev->flags & IFF_MULTICAST)) ||
198 			    dev->mtu < m->dev->mtu)
199 				return -EINVAL;
200 		} else {
201 			if (!(dev->flags&IFF_POINTOPOINT))
202 				m->dev->flags &= ~IFF_POINTOPOINT;
203 			if (!(dev->flags&IFF_BROADCAST))
204 				m->dev->flags &= ~IFF_BROADCAST;
205 			if (!(dev->flags&IFF_MULTICAST))
206 				m->dev->flags &= ~IFF_MULTICAST;
207 			if (dev->mtu < m->dev->mtu)
208 				m->dev->mtu = dev->mtu;
209 		}
210 		q->next = NEXT_SLAVE(m->slaves);
211 		NEXT_SLAVE(m->slaves) = sch;
212 	} else {
213 		q->next = sch;
214 		m->slaves = sch;
215 		m->dev->mtu = dev->mtu;
216 		m->dev->flags = (m->dev->flags&~FMASK)|(dev->flags&FMASK);
217 	}
218 	return 0;
219 }
220 
221 
222 static int
__teql_resolve(struct sk_buff * skb,struct sk_buff * skb_res,struct net_device * dev,struct netdev_queue * txq,struct dst_entry * dst)223 __teql_resolve(struct sk_buff *skb, struct sk_buff *skb_res,
224 	       struct net_device *dev, struct netdev_queue *txq,
225 	       struct dst_entry *dst)
226 {
227 	struct neighbour *n;
228 	int err = 0;
229 
230 	n = dst_neigh_lookup_skb(dst, skb);
231 	if (!n)
232 		return -ENOENT;
233 
234 	if (dst->dev != dev) {
235 		struct neighbour *mn;
236 
237 		mn = __neigh_lookup_errno(n->tbl, n->primary_key, dev);
238 		neigh_release(n);
239 		if (IS_ERR(mn))
240 			return PTR_ERR(mn);
241 		n = mn;
242 	}
243 
244 	if (neigh_event_send(n, skb_res) == 0) {
245 		int err;
246 		char haddr[MAX_ADDR_LEN];
247 
248 		neigh_ha_snapshot(haddr, n, dev);
249 		err = dev_hard_header(skb, dev, ntohs(skb_protocol(skb, false)),
250 				      haddr, NULL, skb->len);
251 
252 		if (err < 0)
253 			err = -EINVAL;
254 	} else {
255 		err = (skb_res == NULL) ? -EAGAIN : 1;
256 	}
257 	neigh_release(n);
258 	return err;
259 }
260 
teql_resolve(struct sk_buff * skb,struct sk_buff * skb_res,struct net_device * dev,struct netdev_queue * txq)261 static inline int teql_resolve(struct sk_buff *skb,
262 			       struct sk_buff *skb_res,
263 			       struct net_device *dev,
264 			       struct netdev_queue *txq)
265 {
266 	struct dst_entry *dst = skb_dst(skb);
267 	int res;
268 
269 	if (rcu_access_pointer(txq->qdisc) == &noop_qdisc)
270 		return -ENODEV;
271 
272 	if (!dev->header_ops || !dst)
273 		return 0;
274 
275 	rcu_read_lock();
276 	res = __teql_resolve(skb, skb_res, dev, txq, dst);
277 	rcu_read_unlock();
278 
279 	return res;
280 }
281 
teql_master_xmit(struct sk_buff * skb,struct net_device * dev)282 static netdev_tx_t teql_master_xmit(struct sk_buff *skb, struct net_device *dev)
283 {
284 	struct teql_master *master = netdev_priv(dev);
285 	struct Qdisc *start, *q;
286 	int busy;
287 	int nores;
288 	int subq = skb_get_queue_mapping(skb);
289 	struct sk_buff *skb_res = NULL;
290 
291 	start = master->slaves;
292 
293 restart:
294 	nores = 0;
295 	busy = 0;
296 
297 	q = start;
298 	if (!q)
299 		goto drop;
300 
301 	do {
302 		struct net_device *slave = qdisc_dev(q);
303 		struct netdev_queue *slave_txq = netdev_get_tx_queue(slave, 0);
304 
305 		if (rcu_access_pointer(slave_txq->qdisc_sleeping) != q)
306 			continue;
307 		if (netif_xmit_stopped(netdev_get_tx_queue(slave, subq)) ||
308 		    !netif_running(slave)) {
309 			busy = 1;
310 			continue;
311 		}
312 
313 		switch (teql_resolve(skb, skb_res, slave, slave_txq)) {
314 		case 0:
315 			if (__netif_tx_trylock(slave_txq)) {
316 				unsigned int length = qdisc_pkt_len(skb);
317 
318 				skb->dev = slave;
319 				if (!netif_xmit_frozen_or_stopped(slave_txq) &&
320 				    netdev_start_xmit(skb, slave, slave_txq, false) ==
321 				    NETDEV_TX_OK) {
322 					__netif_tx_unlock(slave_txq);
323 					master->slaves = NEXT_SLAVE(q);
324 					netif_wake_queue(dev);
325 					master->tx_packets++;
326 					master->tx_bytes += length;
327 					return NETDEV_TX_OK;
328 				}
329 				__netif_tx_unlock(slave_txq);
330 			}
331 			if (netif_xmit_stopped(netdev_get_tx_queue(dev, 0)))
332 				busy = 1;
333 			break;
334 		case 1:
335 			master->slaves = NEXT_SLAVE(q);
336 			return NETDEV_TX_OK;
337 		default:
338 			nores = 1;
339 			break;
340 		}
341 		__skb_pull(skb, skb_network_offset(skb));
342 	} while ((q = NEXT_SLAVE(q)) != start);
343 
344 	if (nores && skb_res == NULL) {
345 		skb_res = skb;
346 		goto restart;
347 	}
348 
349 	if (busy) {
350 		netif_stop_queue(dev);
351 		return NETDEV_TX_BUSY;
352 	}
353 	master->tx_errors++;
354 
355 drop:
356 	master->tx_dropped++;
357 	dev_kfree_skb(skb);
358 	return NETDEV_TX_OK;
359 }
360 
teql_master_open(struct net_device * dev)361 static int teql_master_open(struct net_device *dev)
362 {
363 	struct Qdisc *q;
364 	struct teql_master *m = netdev_priv(dev);
365 	int mtu = 0xFFFE;
366 	unsigned int flags = IFF_NOARP | IFF_MULTICAST;
367 
368 	if (m->slaves == NULL)
369 		return -EUNATCH;
370 
371 	flags = FMASK;
372 
373 	q = m->slaves;
374 	do {
375 		struct net_device *slave = qdisc_dev(q);
376 
377 		if (slave == NULL)
378 			return -EUNATCH;
379 
380 		if (slave->mtu < mtu)
381 			mtu = slave->mtu;
382 		if (slave->hard_header_len > LL_MAX_HEADER)
383 			return -EINVAL;
384 
385 		/* If all the slaves are BROADCAST, master is BROADCAST
386 		   If all the slaves are PtP, master is PtP
387 		   Otherwise, master is NBMA.
388 		 */
389 		if (!(slave->flags&IFF_POINTOPOINT))
390 			flags &= ~IFF_POINTOPOINT;
391 		if (!(slave->flags&IFF_BROADCAST))
392 			flags &= ~IFF_BROADCAST;
393 		if (!(slave->flags&IFF_MULTICAST))
394 			flags &= ~IFF_MULTICAST;
395 	} while ((q = NEXT_SLAVE(q)) != m->slaves);
396 
397 	m->dev->mtu = mtu;
398 	m->dev->flags = (m->dev->flags&~FMASK) | flags;
399 	netif_start_queue(m->dev);
400 	return 0;
401 }
402 
teql_master_close(struct net_device * dev)403 static int teql_master_close(struct net_device *dev)
404 {
405 	netif_stop_queue(dev);
406 	return 0;
407 }
408 
teql_master_stats64(struct net_device * dev,struct rtnl_link_stats64 * stats)409 static void teql_master_stats64(struct net_device *dev,
410 				struct rtnl_link_stats64 *stats)
411 {
412 	struct teql_master *m = netdev_priv(dev);
413 
414 	stats->tx_packets	= m->tx_packets;
415 	stats->tx_bytes		= m->tx_bytes;
416 	stats->tx_errors	= m->tx_errors;
417 	stats->tx_dropped	= m->tx_dropped;
418 }
419 
teql_master_mtu(struct net_device * dev,int new_mtu)420 static int teql_master_mtu(struct net_device *dev, int new_mtu)
421 {
422 	struct teql_master *m = netdev_priv(dev);
423 	struct Qdisc *q;
424 
425 	q = m->slaves;
426 	if (q) {
427 		do {
428 			if (new_mtu > qdisc_dev(q)->mtu)
429 				return -EINVAL;
430 		} while ((q = NEXT_SLAVE(q)) != m->slaves);
431 	}
432 
433 	WRITE_ONCE(dev->mtu, new_mtu);
434 	return 0;
435 }
436 
437 static const struct net_device_ops teql_netdev_ops = {
438 	.ndo_open	= teql_master_open,
439 	.ndo_stop	= teql_master_close,
440 	.ndo_start_xmit	= teql_master_xmit,
441 	.ndo_get_stats64 = teql_master_stats64,
442 	.ndo_change_mtu	= teql_master_mtu,
443 };
444 
teql_master_setup(struct net_device * dev)445 static __init void teql_master_setup(struct net_device *dev)
446 {
447 	struct teql_master *master = netdev_priv(dev);
448 	struct Qdisc_ops *ops = &master->qops;
449 
450 	master->dev	= dev;
451 	ops->priv_size  = sizeof(struct teql_sched_data);
452 
453 	ops->enqueue	=	teql_enqueue;
454 	ops->dequeue	=	teql_dequeue;
455 	ops->peek	=	teql_peek;
456 	ops->init	=	teql_qdisc_init;
457 	ops->reset	=	teql_reset;
458 	ops->destroy	=	teql_destroy;
459 	ops->owner	=	THIS_MODULE;
460 
461 	dev->netdev_ops =       &teql_netdev_ops;
462 	dev->type		= ARPHRD_VOID;
463 	dev->mtu		= 1500;
464 	dev->min_mtu		= 68;
465 	dev->max_mtu		= 65535;
466 	dev->tx_queue_len	= 100;
467 	dev->flags		= IFF_NOARP;
468 	dev->hard_header_len	= LL_MAX_HEADER;
469 	netif_keep_dst(dev);
470 }
471 
472 static LIST_HEAD(master_dev_list);
473 static int max_equalizers = 1;
474 module_param(max_equalizers, int, 0);
475 MODULE_PARM_DESC(max_equalizers, "Max number of link equalizers");
476 
teql_init(void)477 static int __init teql_init(void)
478 {
479 	int i;
480 	int err = -ENODEV;
481 
482 	for (i = 0; i < max_equalizers; i++) {
483 		struct net_device *dev;
484 		struct teql_master *master;
485 
486 		dev = alloc_netdev(sizeof(struct teql_master), "teql%d",
487 				   NET_NAME_UNKNOWN, teql_master_setup);
488 		if (!dev) {
489 			err = -ENOMEM;
490 			break;
491 		}
492 
493 		if ((err = register_netdev(dev))) {
494 			free_netdev(dev);
495 			break;
496 		}
497 
498 		master = netdev_priv(dev);
499 
500 		strscpy(master->qops.id, dev->name, IFNAMSIZ);
501 		err = register_qdisc(&master->qops);
502 
503 		if (err) {
504 			unregister_netdev(dev);
505 			free_netdev(dev);
506 			break;
507 		}
508 
509 		list_add_tail(&master->master_list, &master_dev_list);
510 	}
511 	return i ? 0 : err;
512 }
513 
teql_exit(void)514 static void __exit teql_exit(void)
515 {
516 	struct teql_master *master, *nxt;
517 
518 	list_for_each_entry_safe(master, nxt, &master_dev_list, master_list) {
519 
520 		list_del(&master->master_list);
521 
522 		unregister_qdisc(&master->qops);
523 		unregister_netdev(master->dev);
524 		free_netdev(master->dev);
525 	}
526 }
527 
528 module_init(teql_init);
529 module_exit(teql_exit);
530 
531 MODULE_LICENSE("GPL");
532 MODULE_DESCRIPTION("True (or trivial) link equalizer qdisc");
533