xref: /linux/net/sched/sch_teql.c (revision 50da4b9d07a7a463e2cfb738f3ad4cff6b2c9c3b)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /* net/sched/sch_teql.c	"True" (or "trivial") link equalizer.
3  *
4  * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
5  */
6 
7 #include <linux/module.h>
8 #include <linux/types.h>
9 #include <linux/kernel.h>
10 #include <linux/slab.h>
11 #include <linux/string.h>
12 #include <linux/errno.h>
13 #include <linux/if_arp.h>
14 #include <linux/netdevice.h>
15 #include <linux/init.h>
16 #include <linux/skbuff.h>
17 #include <linux/moduleparam.h>
18 #include <net/dst.h>
19 #include <net/neighbour.h>
20 #include <net/pkt_sched.h>
21 
22 /*
23    How to setup it.
24    ----------------
25 
26    After loading this module you will find a new device teqlN
27    and new qdisc with the same name. To join a slave to the equalizer
28    you should just set this qdisc on a device f.e.
29 
30    # tc qdisc add dev eth0 root teql0
31    # tc qdisc add dev eth1 root teql0
32 
33    That's all. Full PnP 8)
34 
35    Applicability.
36    --------------
37 
38    1. Slave devices MUST be active devices, i.e., they must raise the tbusy
39       signal and generate EOI events. If you want to equalize virtual devices
40       like tunnels, use a normal eql device.
41    2. This device puts no limitations on physical slave characteristics
42       f.e. it will equalize 9600baud line and 100Mb ethernet perfectly :-)
43       Certainly, large difference in link speeds will make the resulting
44       eqalized link unusable, because of huge packet reordering.
45       I estimate an upper useful difference as ~10 times.
46    3. If the slave requires address resolution, only protocols using
47       neighbour cache (IPv4/IPv6) will work over the equalized link.
48       Other protocols are still allowed to use the slave device directly,
49       which will not break load balancing, though native slave
50       traffic will have the highest priority.  */
51 
52 struct teql_master {
53 	struct Qdisc_ops qops;
54 	struct net_device *dev;
55 	struct Qdisc *slaves;
56 	struct list_head master_list;
57 	unsigned long	tx_bytes;
58 	unsigned long	tx_packets;
59 	unsigned long	tx_errors;
60 	unsigned long	tx_dropped;
61 };
62 
63 struct teql_sched_data {
64 	struct Qdisc *next;
65 	struct teql_master *m;
66 	struct sk_buff_head q;
67 };
68 
69 #define NEXT_SLAVE(q) (((struct teql_sched_data *)qdisc_priv(q))->next)
70 
71 #define FMASK (IFF_BROADCAST | IFF_POINTOPOINT)
72 
73 /* "teql*" qdisc routines */
74 
75 static int
76 teql_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free)
77 {
78 	struct net_device *dev = qdisc_dev(sch);
79 	struct teql_sched_data *q = qdisc_priv(sch);
80 
81 	if (q->q.qlen < READ_ONCE(dev->tx_queue_len)) {
82 		__skb_queue_tail(&q->q, skb);
83 		return NET_XMIT_SUCCESS;
84 	}
85 
86 	return qdisc_drop(skb, sch, to_free);
87 }
88 
89 static struct sk_buff *
90 teql_dequeue(struct Qdisc *sch)
91 {
92 	struct teql_sched_data *dat = qdisc_priv(sch);
93 	struct netdev_queue *dat_queue;
94 	struct sk_buff *skb;
95 	struct Qdisc *q;
96 
97 	skb = __skb_dequeue(&dat->q);
98 	dat_queue = netdev_get_tx_queue(dat->m->dev, 0);
99 	q = rcu_dereference_bh(dat_queue->qdisc);
100 
101 	if (skb == NULL) {
102 		struct net_device *m = qdisc_dev(q);
103 		if (m) {
104 			dat->m->slaves = sch;
105 			netif_wake_queue(m);
106 		}
107 	} else {
108 		qdisc_bstats_update(sch, skb);
109 	}
110 	sch->q.qlen = dat->q.qlen + q->q.qlen;
111 	return skb;
112 }
113 
114 static struct sk_buff *
115 teql_peek(struct Qdisc *sch)
116 {
117 	/* teql is meant to be used as root qdisc */
118 	return NULL;
119 }
120 
121 static void
122 teql_reset(struct Qdisc *sch)
123 {
124 	struct teql_sched_data *dat = qdisc_priv(sch);
125 
126 	skb_queue_purge(&dat->q);
127 }
128 
129 static void
130 teql_destroy(struct Qdisc *sch)
131 {
132 	struct Qdisc *q, *prev;
133 	struct teql_sched_data *dat = qdisc_priv(sch);
134 	struct teql_master *master = dat->m;
135 
136 	if (!master)
137 		return;
138 
139 	prev = master->slaves;
140 	if (prev) {
141 		do {
142 			q = NEXT_SLAVE(prev);
143 			if (q == sch) {
144 				NEXT_SLAVE(prev) = NEXT_SLAVE(q);
145 				if (q == master->slaves) {
146 					master->slaves = NEXT_SLAVE(q);
147 					if (q == master->slaves) {
148 						struct netdev_queue *txq;
149 						spinlock_t *root_lock;
150 
151 						txq = netdev_get_tx_queue(master->dev, 0);
152 						master->slaves = NULL;
153 
154 						root_lock = qdisc_root_sleeping_lock(rtnl_dereference(txq->qdisc));
155 						spin_lock_bh(root_lock);
156 						qdisc_reset(rtnl_dereference(txq->qdisc));
157 						spin_unlock_bh(root_lock);
158 					}
159 				}
160 				skb_queue_purge(&dat->q);
161 				break;
162 			}
163 
164 		} while ((prev = q) != master->slaves);
165 	}
166 }
167 
168 static int teql_qdisc_init(struct Qdisc *sch, struct nlattr *opt,
169 			   struct netlink_ext_ack *extack)
170 {
171 	struct net_device *dev = qdisc_dev(sch);
172 	struct teql_master *m = (struct teql_master *)sch->ops;
173 	struct teql_sched_data *q = qdisc_priv(sch);
174 
175 	if (dev->hard_header_len > m->dev->hard_header_len)
176 		return -EINVAL;
177 
178 	if (m->dev == dev)
179 		return -ELOOP;
180 
181 	if (sch->parent != TC_H_ROOT) {
182 		NL_SET_ERR_MSG_MOD(extack, "teql can only be used as root");
183 		return -EOPNOTSUPP;
184 	}
185 
186 	q->m = m;
187 
188 	skb_queue_head_init(&q->q);
189 
190 	if (m->slaves) {
191 		if (m->dev->flags & IFF_UP) {
192 			if ((m->dev->flags & IFF_POINTOPOINT &&
193 			     !(dev->flags & IFF_POINTOPOINT)) ||
194 			    (m->dev->flags & IFF_BROADCAST &&
195 			     !(dev->flags & IFF_BROADCAST)) ||
196 			    (m->dev->flags & IFF_MULTICAST &&
197 			     !(dev->flags & IFF_MULTICAST)) ||
198 			    dev->mtu < m->dev->mtu)
199 				return -EINVAL;
200 		} else {
201 			if (!(dev->flags&IFF_POINTOPOINT))
202 				m->dev->flags &= ~IFF_POINTOPOINT;
203 			if (!(dev->flags&IFF_BROADCAST))
204 				m->dev->flags &= ~IFF_BROADCAST;
205 			if (!(dev->flags&IFF_MULTICAST))
206 				m->dev->flags &= ~IFF_MULTICAST;
207 			if (dev->mtu < m->dev->mtu)
208 				m->dev->mtu = dev->mtu;
209 		}
210 		q->next = NEXT_SLAVE(m->slaves);
211 		NEXT_SLAVE(m->slaves) = sch;
212 	} else {
213 		q->next = sch;
214 		m->slaves = sch;
215 		m->dev->mtu = dev->mtu;
216 		m->dev->flags = (m->dev->flags&~FMASK)|(dev->flags&FMASK);
217 	}
218 	return 0;
219 }
220 
221 
222 static int
223 __teql_resolve(struct sk_buff *skb, struct sk_buff *skb_res,
224 	       struct net_device *dev, struct netdev_queue *txq,
225 	       struct dst_entry *dst)
226 {
227 	struct neighbour *n;
228 	int err = 0;
229 
230 	n = dst_neigh_lookup_skb(dst, skb);
231 	if (!n)
232 		return -ENOENT;
233 
234 	if (dst->dev != dev) {
235 		struct neighbour *mn;
236 
237 		mn = __neigh_lookup_errno(n->tbl, n->primary_key, dev);
238 		neigh_release(n);
239 		if (IS_ERR(mn))
240 			return PTR_ERR(mn);
241 		n = mn;
242 	}
243 
244 	if (neigh_event_send(n, skb_res) == 0) {
245 		int err;
246 		char haddr[MAX_ADDR_LEN];
247 
248 		neigh_ha_snapshot(haddr, n, dev);
249 		err = dev_hard_header(skb, dev, ntohs(skb_protocol(skb, false)),
250 				      haddr, NULL, skb->len);
251 
252 		if (err < 0)
253 			err = -EINVAL;
254 	} else {
255 		err = (skb_res == NULL) ? -EAGAIN : 1;
256 	}
257 	neigh_release(n);
258 	return err;
259 }
260 
261 static inline int teql_resolve(struct sk_buff *skb,
262 			       struct sk_buff *skb_res,
263 			       struct net_device *dev,
264 			       struct netdev_queue *txq)
265 {
266 	struct dst_entry *dst = skb_dst(skb);
267 	int res;
268 
269 	if (rcu_access_pointer(txq->qdisc) == &noop_qdisc)
270 		return -ENODEV;
271 
272 	if (!dev->header_ops || !dst)
273 		return 0;
274 
275 	rcu_read_lock();
276 	res = __teql_resolve(skb, skb_res, dev, txq, dst);
277 	rcu_read_unlock();
278 
279 	return res;
280 }
281 
282 static netdev_tx_t teql_master_xmit(struct sk_buff *skb, struct net_device *dev)
283 {
284 	struct teql_master *master = netdev_priv(dev);
285 	struct Qdisc *start, *q;
286 	int busy;
287 	int nores;
288 	int subq = skb_get_queue_mapping(skb);
289 	struct sk_buff *skb_res = NULL;
290 
291 	start = master->slaves;
292 
293 restart:
294 	nores = 0;
295 	busy = 0;
296 
297 	q = start;
298 	if (!q)
299 		goto drop;
300 
301 	do {
302 		struct net_device *slave = qdisc_dev(q);
303 		struct netdev_queue *slave_txq = netdev_get_tx_queue(slave, 0);
304 
305 		if (rcu_access_pointer(slave_txq->qdisc_sleeping) != q)
306 			continue;
307 		if (netif_xmit_stopped(netdev_get_tx_queue(slave, subq)) ||
308 		    !netif_running(slave)) {
309 			busy = 1;
310 			continue;
311 		}
312 
313 		switch (teql_resolve(skb, skb_res, slave, slave_txq)) {
314 		case 0:
315 			if (__netif_tx_trylock(slave_txq)) {
316 				unsigned int length = qdisc_pkt_len(skb);
317 
318 				if (!netif_xmit_frozen_or_stopped(slave_txq) &&
319 				    netdev_start_xmit(skb, slave, slave_txq, false) ==
320 				    NETDEV_TX_OK) {
321 					__netif_tx_unlock(slave_txq);
322 					master->slaves = NEXT_SLAVE(q);
323 					netif_wake_queue(dev);
324 					master->tx_packets++;
325 					master->tx_bytes += length;
326 					return NETDEV_TX_OK;
327 				}
328 				__netif_tx_unlock(slave_txq);
329 			}
330 			if (netif_xmit_stopped(netdev_get_tx_queue(dev, 0)))
331 				busy = 1;
332 			break;
333 		case 1:
334 			master->slaves = NEXT_SLAVE(q);
335 			return NETDEV_TX_OK;
336 		default:
337 			nores = 1;
338 			break;
339 		}
340 		__skb_pull(skb, skb_network_offset(skb));
341 	} while ((q = NEXT_SLAVE(q)) != start);
342 
343 	if (nores && skb_res == NULL) {
344 		skb_res = skb;
345 		goto restart;
346 	}
347 
348 	if (busy) {
349 		netif_stop_queue(dev);
350 		return NETDEV_TX_BUSY;
351 	}
352 	master->tx_errors++;
353 
354 drop:
355 	master->tx_dropped++;
356 	dev_kfree_skb(skb);
357 	return NETDEV_TX_OK;
358 }
359 
360 static int teql_master_open(struct net_device *dev)
361 {
362 	struct Qdisc *q;
363 	struct teql_master *m = netdev_priv(dev);
364 	int mtu = 0xFFFE;
365 	unsigned int flags = IFF_NOARP | IFF_MULTICAST;
366 
367 	if (m->slaves == NULL)
368 		return -EUNATCH;
369 
370 	flags = FMASK;
371 
372 	q = m->slaves;
373 	do {
374 		struct net_device *slave = qdisc_dev(q);
375 
376 		if (slave == NULL)
377 			return -EUNATCH;
378 
379 		if (slave->mtu < mtu)
380 			mtu = slave->mtu;
381 		if (slave->hard_header_len > LL_MAX_HEADER)
382 			return -EINVAL;
383 
384 		/* If all the slaves are BROADCAST, master is BROADCAST
385 		   If all the slaves are PtP, master is PtP
386 		   Otherwise, master is NBMA.
387 		 */
388 		if (!(slave->flags&IFF_POINTOPOINT))
389 			flags &= ~IFF_POINTOPOINT;
390 		if (!(slave->flags&IFF_BROADCAST))
391 			flags &= ~IFF_BROADCAST;
392 		if (!(slave->flags&IFF_MULTICAST))
393 			flags &= ~IFF_MULTICAST;
394 	} while ((q = NEXT_SLAVE(q)) != m->slaves);
395 
396 	m->dev->mtu = mtu;
397 	m->dev->flags = (m->dev->flags&~FMASK) | flags;
398 	netif_start_queue(m->dev);
399 	return 0;
400 }
401 
402 static int teql_master_close(struct net_device *dev)
403 {
404 	netif_stop_queue(dev);
405 	return 0;
406 }
407 
408 static void teql_master_stats64(struct net_device *dev,
409 				struct rtnl_link_stats64 *stats)
410 {
411 	struct teql_master *m = netdev_priv(dev);
412 
413 	stats->tx_packets	= m->tx_packets;
414 	stats->tx_bytes		= m->tx_bytes;
415 	stats->tx_errors	= m->tx_errors;
416 	stats->tx_dropped	= m->tx_dropped;
417 }
418 
419 static int teql_master_mtu(struct net_device *dev, int new_mtu)
420 {
421 	struct teql_master *m = netdev_priv(dev);
422 	struct Qdisc *q;
423 
424 	q = m->slaves;
425 	if (q) {
426 		do {
427 			if (new_mtu > qdisc_dev(q)->mtu)
428 				return -EINVAL;
429 		} while ((q = NEXT_SLAVE(q)) != m->slaves);
430 	}
431 
432 	WRITE_ONCE(dev->mtu, new_mtu);
433 	return 0;
434 }
435 
436 static const struct net_device_ops teql_netdev_ops = {
437 	.ndo_open	= teql_master_open,
438 	.ndo_stop	= teql_master_close,
439 	.ndo_start_xmit	= teql_master_xmit,
440 	.ndo_get_stats64 = teql_master_stats64,
441 	.ndo_change_mtu	= teql_master_mtu,
442 };
443 
444 static __init void teql_master_setup(struct net_device *dev)
445 {
446 	struct teql_master *master = netdev_priv(dev);
447 	struct Qdisc_ops *ops = &master->qops;
448 
449 	master->dev	= dev;
450 	ops->priv_size  = sizeof(struct teql_sched_data);
451 
452 	ops->enqueue	=	teql_enqueue;
453 	ops->dequeue	=	teql_dequeue;
454 	ops->peek	=	teql_peek;
455 	ops->init	=	teql_qdisc_init;
456 	ops->reset	=	teql_reset;
457 	ops->destroy	=	teql_destroy;
458 	ops->owner	=	THIS_MODULE;
459 
460 	dev->netdev_ops =       &teql_netdev_ops;
461 	dev->type		= ARPHRD_VOID;
462 	dev->mtu		= 1500;
463 	dev->min_mtu		= 68;
464 	dev->max_mtu		= 65535;
465 	dev->tx_queue_len	= 100;
466 	dev->flags		= IFF_NOARP;
467 	dev->hard_header_len	= LL_MAX_HEADER;
468 	netif_keep_dst(dev);
469 }
470 
471 static LIST_HEAD(master_dev_list);
472 static int max_equalizers = 1;
473 module_param(max_equalizers, int, 0);
474 MODULE_PARM_DESC(max_equalizers, "Max number of link equalizers");
475 
476 static int __init teql_init(void)
477 {
478 	int i;
479 	int err = -ENODEV;
480 
481 	for (i = 0; i < max_equalizers; i++) {
482 		struct net_device *dev;
483 		struct teql_master *master;
484 
485 		dev = alloc_netdev(sizeof(struct teql_master), "teql%d",
486 				   NET_NAME_UNKNOWN, teql_master_setup);
487 		if (!dev) {
488 			err = -ENOMEM;
489 			break;
490 		}
491 
492 		if ((err = register_netdev(dev))) {
493 			free_netdev(dev);
494 			break;
495 		}
496 
497 		master = netdev_priv(dev);
498 
499 		strscpy(master->qops.id, dev->name, IFNAMSIZ);
500 		err = register_qdisc(&master->qops);
501 
502 		if (err) {
503 			unregister_netdev(dev);
504 			free_netdev(dev);
505 			break;
506 		}
507 
508 		list_add_tail(&master->master_list, &master_dev_list);
509 	}
510 	return i ? 0 : err;
511 }
512 
513 static void __exit teql_exit(void)
514 {
515 	struct teql_master *master, *nxt;
516 
517 	list_for_each_entry_safe(master, nxt, &master_dev_list, master_list) {
518 
519 		list_del(&master->master_list);
520 
521 		unregister_qdisc(&master->qops);
522 		unregister_netdev(master->dev);
523 		free_netdev(master->dev);
524 	}
525 }
526 
527 module_init(teql_init);
528 module_exit(teql_exit);
529 
530 MODULE_LICENSE("GPL");
531 MODULE_DESCRIPTION("True (or trivial) link equalizer qdisc");
532