xref: /linux/net/sched/sch_teql.c (revision bde5d79d00255db609fe9d859eef8c7b6d38b137)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /* net/sched/sch_teql.c	"True" (or "trivial") link equalizer.
3  *
4  * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
5  */
6 
7 #include <linux/module.h>
8 #include <linux/types.h>
9 #include <linux/kernel.h>
10 #include <linux/slab.h>
11 #include <linux/string.h>
12 #include <linux/errno.h>
13 #include <linux/if_arp.h>
14 #include <linux/netdevice.h>
15 #include <linux/init.h>
16 #include <linux/skbuff.h>
17 #include <linux/moduleparam.h>
18 #include <net/dst.h>
19 #include <net/neighbour.h>
20 #include <net/pkt_sched.h>
21 
22 /*
23    How to setup it.
24    ----------------
25 
26    After loading this module you will find a new device teqlN
27    and new qdisc with the same name. To join a slave to the equalizer
28    you should just set this qdisc on a device f.e.
29 
30    # tc qdisc add dev eth0 root teql0
31    # tc qdisc add dev eth1 root teql0
32 
33    That's all. Full PnP 8)
34 
35    Applicability.
36    --------------
37 
38    1. Slave devices MUST be active devices, i.e., they must raise the tbusy
39       signal and generate EOI events. If you want to equalize virtual devices
40       like tunnels, use a normal eql device.
41    2. This device puts no limitations on physical slave characteristics
42       f.e. it will equalize 9600baud line and 100Mb ethernet perfectly :-)
43       Certainly, large difference in link speeds will make the resulting
44       eqalized link unusable, because of huge packet reordering.
45       I estimate an upper useful difference as ~10 times.
46    3. If the slave requires address resolution, only protocols using
47       neighbour cache (IPv4/IPv6) will work over the equalized link.
48       Other protocols are still allowed to use the slave device directly,
49       which will not break load balancing, though native slave
50       traffic will have the highest priority.  */
51 
52 struct teql_master {
53 	struct Qdisc_ops qops;
54 	struct net_device *dev;
55 	struct Qdisc *slaves;
56 	struct list_head master_list;
57 	unsigned long	tx_bytes;
58 	unsigned long	tx_packets;
59 	unsigned long	tx_errors;
60 	unsigned long	tx_dropped;
61 };
62 
63 struct teql_sched_data {
64 	struct Qdisc *next;
65 	struct teql_master *m;
66 	struct sk_buff_head q;
67 };
68 
69 #define NEXT_SLAVE(q) (((struct teql_sched_data *)qdisc_priv(q))->next)
70 
71 #define FMASK (IFF_BROADCAST | IFF_POINTOPOINT)
72 
73 /* "teql*" qdisc routines */
74 
75 static int
76 teql_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free)
77 {
78 	struct net_device *dev = qdisc_dev(sch);
79 	struct teql_sched_data *q = qdisc_priv(sch);
80 
81 	if (q->q.qlen < READ_ONCE(dev->tx_queue_len)) {
82 		__skb_queue_tail(&q->q, skb);
83 		return NET_XMIT_SUCCESS;
84 	}
85 
86 	return qdisc_drop(skb, sch, to_free);
87 }
88 
89 static struct sk_buff *
90 teql_dequeue(struct Qdisc *sch)
91 {
92 	struct teql_sched_data *dat = qdisc_priv(sch);
93 	struct netdev_queue *dat_queue;
94 	struct sk_buff *skb;
95 	struct Qdisc *q;
96 
97 	skb = __skb_dequeue(&dat->q);
98 	dat_queue = netdev_get_tx_queue(dat->m->dev, 0);
99 	q = rcu_dereference_bh(dat_queue->qdisc);
100 
101 	if (skb == NULL) {
102 		struct net_device *m = qdisc_dev(q);
103 		if (m) {
104 			dat->m->slaves = sch;
105 			netif_wake_queue(m);
106 		}
107 	} else {
108 		qdisc_bstats_update(sch, skb);
109 	}
110 	sch->q.qlen = dat->q.qlen + q->q.qlen;
111 	return skb;
112 }
113 
114 static struct sk_buff *
115 teql_peek(struct Qdisc *sch)
116 {
117 	/* teql is meant to be used as root qdisc */
118 	return NULL;
119 }
120 
121 static void
122 teql_reset(struct Qdisc *sch)
123 {
124 	struct teql_sched_data *dat = qdisc_priv(sch);
125 
126 	skb_queue_purge(&dat->q);
127 }
128 
129 static void
130 teql_destroy(struct Qdisc *sch)
131 {
132 	struct Qdisc *q, *prev;
133 	struct teql_sched_data *dat = qdisc_priv(sch);
134 	struct teql_master *master = dat->m;
135 
136 	if (!master)
137 		return;
138 
139 	prev = master->slaves;
140 	if (prev) {
141 		do {
142 			q = NEXT_SLAVE(prev);
143 			if (q == sch) {
144 				NEXT_SLAVE(prev) = NEXT_SLAVE(q);
145 				if (q == master->slaves) {
146 					master->slaves = NEXT_SLAVE(q);
147 					if (q == master->slaves) {
148 						struct netdev_queue *txq;
149 						spinlock_t *root_lock;
150 
151 						txq = netdev_get_tx_queue(master->dev, 0);
152 						master->slaves = NULL;
153 
154 						root_lock = qdisc_root_sleeping_lock(rtnl_dereference(txq->qdisc));
155 						spin_lock_bh(root_lock);
156 						qdisc_reset(rtnl_dereference(txq->qdisc));
157 						spin_unlock_bh(root_lock);
158 					}
159 				}
160 				skb_queue_purge(&dat->q);
161 				break;
162 			}
163 
164 		} while ((prev = q) != master->slaves);
165 	}
166 }
167 
168 static int teql_qdisc_init(struct Qdisc *sch, struct nlattr *opt,
169 			   struct netlink_ext_ack *extack)
170 {
171 	struct net_device *dev = qdisc_dev(sch);
172 	struct teql_master *m = (struct teql_master *)sch->ops;
173 	struct teql_sched_data *q = qdisc_priv(sch);
174 
175 	if (dev->hard_header_len > m->dev->hard_header_len)
176 		return -EINVAL;
177 
178 	if (m->dev == dev)
179 		return -ELOOP;
180 
181 	q->m = m;
182 
183 	skb_queue_head_init(&q->q);
184 
185 	if (m->slaves) {
186 		if (m->dev->flags & IFF_UP) {
187 			if ((m->dev->flags & IFF_POINTOPOINT &&
188 			     !(dev->flags & IFF_POINTOPOINT)) ||
189 			    (m->dev->flags & IFF_BROADCAST &&
190 			     !(dev->flags & IFF_BROADCAST)) ||
191 			    (m->dev->flags & IFF_MULTICAST &&
192 			     !(dev->flags & IFF_MULTICAST)) ||
193 			    dev->mtu < m->dev->mtu)
194 				return -EINVAL;
195 		} else {
196 			if (!(dev->flags&IFF_POINTOPOINT))
197 				m->dev->flags &= ~IFF_POINTOPOINT;
198 			if (!(dev->flags&IFF_BROADCAST))
199 				m->dev->flags &= ~IFF_BROADCAST;
200 			if (!(dev->flags&IFF_MULTICAST))
201 				m->dev->flags &= ~IFF_MULTICAST;
202 			if (dev->mtu < m->dev->mtu)
203 				m->dev->mtu = dev->mtu;
204 		}
205 		q->next = NEXT_SLAVE(m->slaves);
206 		NEXT_SLAVE(m->slaves) = sch;
207 	} else {
208 		q->next = sch;
209 		m->slaves = sch;
210 		m->dev->mtu = dev->mtu;
211 		m->dev->flags = (m->dev->flags&~FMASK)|(dev->flags&FMASK);
212 	}
213 	return 0;
214 }
215 
216 
217 static int
218 __teql_resolve(struct sk_buff *skb, struct sk_buff *skb_res,
219 	       struct net_device *dev, struct netdev_queue *txq,
220 	       struct dst_entry *dst)
221 {
222 	struct neighbour *n;
223 	int err = 0;
224 
225 	n = dst_neigh_lookup_skb(dst, skb);
226 	if (!n)
227 		return -ENOENT;
228 
229 	if (dst->dev != dev) {
230 		struct neighbour *mn;
231 
232 		mn = __neigh_lookup_errno(n->tbl, n->primary_key, dev);
233 		neigh_release(n);
234 		if (IS_ERR(mn))
235 			return PTR_ERR(mn);
236 		n = mn;
237 	}
238 
239 	if (neigh_event_send(n, skb_res) == 0) {
240 		int err;
241 		char haddr[MAX_ADDR_LEN];
242 
243 		neigh_ha_snapshot(haddr, n, dev);
244 		err = dev_hard_header(skb, dev, ntohs(skb_protocol(skb, false)),
245 				      haddr, NULL, skb->len);
246 
247 		if (err < 0)
248 			err = -EINVAL;
249 	} else {
250 		err = (skb_res == NULL) ? -EAGAIN : 1;
251 	}
252 	neigh_release(n);
253 	return err;
254 }
255 
256 static inline int teql_resolve(struct sk_buff *skb,
257 			       struct sk_buff *skb_res,
258 			       struct net_device *dev,
259 			       struct netdev_queue *txq)
260 {
261 	struct dst_entry *dst = skb_dst(skb);
262 	int res;
263 
264 	if (rcu_access_pointer(txq->qdisc) == &noop_qdisc)
265 		return -ENODEV;
266 
267 	if (!dev->header_ops || !dst)
268 		return 0;
269 
270 	rcu_read_lock();
271 	res = __teql_resolve(skb, skb_res, dev, txq, dst);
272 	rcu_read_unlock();
273 
274 	return res;
275 }
276 
277 static netdev_tx_t teql_master_xmit(struct sk_buff *skb, struct net_device *dev)
278 {
279 	struct teql_master *master = netdev_priv(dev);
280 	struct Qdisc *start, *q;
281 	int busy;
282 	int nores;
283 	int subq = skb_get_queue_mapping(skb);
284 	struct sk_buff *skb_res = NULL;
285 
286 	start = master->slaves;
287 
288 restart:
289 	nores = 0;
290 	busy = 0;
291 
292 	q = start;
293 	if (!q)
294 		goto drop;
295 
296 	do {
297 		struct net_device *slave = qdisc_dev(q);
298 		struct netdev_queue *slave_txq = netdev_get_tx_queue(slave, 0);
299 
300 		if (rcu_access_pointer(slave_txq->qdisc_sleeping) != q)
301 			continue;
302 		if (netif_xmit_stopped(netdev_get_tx_queue(slave, subq)) ||
303 		    !netif_running(slave)) {
304 			busy = 1;
305 			continue;
306 		}
307 
308 		switch (teql_resolve(skb, skb_res, slave, slave_txq)) {
309 		case 0:
310 			if (__netif_tx_trylock(slave_txq)) {
311 				unsigned int length = qdisc_pkt_len(skb);
312 
313 				if (!netif_xmit_frozen_or_stopped(slave_txq) &&
314 				    netdev_start_xmit(skb, slave, slave_txq, false) ==
315 				    NETDEV_TX_OK) {
316 					__netif_tx_unlock(slave_txq);
317 					master->slaves = NEXT_SLAVE(q);
318 					netif_wake_queue(dev);
319 					master->tx_packets++;
320 					master->tx_bytes += length;
321 					return NETDEV_TX_OK;
322 				}
323 				__netif_tx_unlock(slave_txq);
324 			}
325 			if (netif_xmit_stopped(netdev_get_tx_queue(dev, 0)))
326 				busy = 1;
327 			break;
328 		case 1:
329 			master->slaves = NEXT_SLAVE(q);
330 			return NETDEV_TX_OK;
331 		default:
332 			nores = 1;
333 			break;
334 		}
335 		__skb_pull(skb, skb_network_offset(skb));
336 	} while ((q = NEXT_SLAVE(q)) != start);
337 
338 	if (nores && skb_res == NULL) {
339 		skb_res = skb;
340 		goto restart;
341 	}
342 
343 	if (busy) {
344 		netif_stop_queue(dev);
345 		return NETDEV_TX_BUSY;
346 	}
347 	master->tx_errors++;
348 
349 drop:
350 	master->tx_dropped++;
351 	dev_kfree_skb(skb);
352 	return NETDEV_TX_OK;
353 }
354 
355 static int teql_master_open(struct net_device *dev)
356 {
357 	struct Qdisc *q;
358 	struct teql_master *m = netdev_priv(dev);
359 	int mtu = 0xFFFE;
360 	unsigned int flags = IFF_NOARP | IFF_MULTICAST;
361 
362 	if (m->slaves == NULL)
363 		return -EUNATCH;
364 
365 	flags = FMASK;
366 
367 	q = m->slaves;
368 	do {
369 		struct net_device *slave = qdisc_dev(q);
370 
371 		if (slave == NULL)
372 			return -EUNATCH;
373 
374 		if (slave->mtu < mtu)
375 			mtu = slave->mtu;
376 		if (slave->hard_header_len > LL_MAX_HEADER)
377 			return -EINVAL;
378 
379 		/* If all the slaves are BROADCAST, master is BROADCAST
380 		   If all the slaves are PtP, master is PtP
381 		   Otherwise, master is NBMA.
382 		 */
383 		if (!(slave->flags&IFF_POINTOPOINT))
384 			flags &= ~IFF_POINTOPOINT;
385 		if (!(slave->flags&IFF_BROADCAST))
386 			flags &= ~IFF_BROADCAST;
387 		if (!(slave->flags&IFF_MULTICAST))
388 			flags &= ~IFF_MULTICAST;
389 	} while ((q = NEXT_SLAVE(q)) != m->slaves);
390 
391 	m->dev->mtu = mtu;
392 	m->dev->flags = (m->dev->flags&~FMASK) | flags;
393 	netif_start_queue(m->dev);
394 	return 0;
395 }
396 
397 static int teql_master_close(struct net_device *dev)
398 {
399 	netif_stop_queue(dev);
400 	return 0;
401 }
402 
403 static void teql_master_stats64(struct net_device *dev,
404 				struct rtnl_link_stats64 *stats)
405 {
406 	struct teql_master *m = netdev_priv(dev);
407 
408 	stats->tx_packets	= m->tx_packets;
409 	stats->tx_bytes		= m->tx_bytes;
410 	stats->tx_errors	= m->tx_errors;
411 	stats->tx_dropped	= m->tx_dropped;
412 }
413 
414 static int teql_master_mtu(struct net_device *dev, int new_mtu)
415 {
416 	struct teql_master *m = netdev_priv(dev);
417 	struct Qdisc *q;
418 
419 	q = m->slaves;
420 	if (q) {
421 		do {
422 			if (new_mtu > qdisc_dev(q)->mtu)
423 				return -EINVAL;
424 		} while ((q = NEXT_SLAVE(q)) != m->slaves);
425 	}
426 
427 	WRITE_ONCE(dev->mtu, new_mtu);
428 	return 0;
429 }
430 
431 static const struct net_device_ops teql_netdev_ops = {
432 	.ndo_open	= teql_master_open,
433 	.ndo_stop	= teql_master_close,
434 	.ndo_start_xmit	= teql_master_xmit,
435 	.ndo_get_stats64 = teql_master_stats64,
436 	.ndo_change_mtu	= teql_master_mtu,
437 };
438 
439 static __init void teql_master_setup(struct net_device *dev)
440 {
441 	struct teql_master *master = netdev_priv(dev);
442 	struct Qdisc_ops *ops = &master->qops;
443 
444 	master->dev	= dev;
445 	ops->priv_size  = sizeof(struct teql_sched_data);
446 
447 	ops->enqueue	=	teql_enqueue;
448 	ops->dequeue	=	teql_dequeue;
449 	ops->peek	=	teql_peek;
450 	ops->init	=	teql_qdisc_init;
451 	ops->reset	=	teql_reset;
452 	ops->destroy	=	teql_destroy;
453 	ops->owner	=	THIS_MODULE;
454 
455 	dev->netdev_ops =       &teql_netdev_ops;
456 	dev->type		= ARPHRD_VOID;
457 	dev->mtu		= 1500;
458 	dev->min_mtu		= 68;
459 	dev->max_mtu		= 65535;
460 	dev->tx_queue_len	= 100;
461 	dev->flags		= IFF_NOARP;
462 	dev->hard_header_len	= LL_MAX_HEADER;
463 	netif_keep_dst(dev);
464 }
465 
466 static LIST_HEAD(master_dev_list);
467 static int max_equalizers = 1;
468 module_param(max_equalizers, int, 0);
469 MODULE_PARM_DESC(max_equalizers, "Max number of link equalizers");
470 
471 static int __init teql_init(void)
472 {
473 	int i;
474 	int err = -ENODEV;
475 
476 	for (i = 0; i < max_equalizers; i++) {
477 		struct net_device *dev;
478 		struct teql_master *master;
479 
480 		dev = alloc_netdev(sizeof(struct teql_master), "teql%d",
481 				   NET_NAME_UNKNOWN, teql_master_setup);
482 		if (!dev) {
483 			err = -ENOMEM;
484 			break;
485 		}
486 
487 		if ((err = register_netdev(dev))) {
488 			free_netdev(dev);
489 			break;
490 		}
491 
492 		master = netdev_priv(dev);
493 
494 		strscpy(master->qops.id, dev->name, IFNAMSIZ);
495 		err = register_qdisc(&master->qops);
496 
497 		if (err) {
498 			unregister_netdev(dev);
499 			free_netdev(dev);
500 			break;
501 		}
502 
503 		list_add_tail(&master->master_list, &master_dev_list);
504 	}
505 	return i ? 0 : err;
506 }
507 
508 static void __exit teql_exit(void)
509 {
510 	struct teql_master *master, *nxt;
511 
512 	list_for_each_entry_safe(master, nxt, &master_dev_list, master_list) {
513 
514 		list_del(&master->master_list);
515 
516 		unregister_qdisc(&master->qops);
517 		unregister_netdev(master->dev);
518 		free_netdev(master->dev);
519 	}
520 }
521 
522 module_init(teql_init);
523 module_exit(teql_exit);
524 
525 MODULE_LICENSE("GPL");
526 MODULE_DESCRIPTION("True (or trivial) link equalizer qdisc");
527