1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /* net/sched/sch_teql.c "True" (or "trivial") link equalizer.
3 *
4 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
5 */
6
7 #include <linux/module.h>
8 #include <linux/types.h>
9 #include <linux/kernel.h>
10 #include <linux/slab.h>
11 #include <linux/string.h>
12 #include <linux/errno.h>
13 #include <linux/if_arp.h>
14 #include <linux/netdevice.h>
15 #include <linux/init.h>
16 #include <linux/skbuff.h>
17 #include <linux/moduleparam.h>
18 #include <net/dst.h>
19 #include <net/neighbour.h>
20 #include <net/pkt_sched.h>
21
22 /*
23 How to setup it.
24 ----------------
25
26 After loading this module you will find a new device teqlN
27 and new qdisc with the same name. To join a slave to the equalizer
28 you should just set this qdisc on a device f.e.
29
30 # tc qdisc add dev eth0 root teql0
31 # tc qdisc add dev eth1 root teql0
32
33 That's all. Full PnP 8)
34
35 Applicability.
36 --------------
37
38 1. Slave devices MUST be active devices, i.e., they must raise the tbusy
39 signal and generate EOI events. If you want to equalize virtual devices
40 like tunnels, use a normal eql device.
41 2. This device puts no limitations on physical slave characteristics
42 f.e. it will equalize 9600baud line and 100Mb ethernet perfectly :-)
43 Certainly, large difference in link speeds will make the resulting
44 eqalized link unusable, because of huge packet reordering.
45 I estimate an upper useful difference as ~10 times.
46 3. If the slave requires address resolution, only protocols using
47 neighbour cache (IPv4/IPv6) will work over the equalized link.
48 Other protocols are still allowed to use the slave device directly,
49 which will not break load balancing, though native slave
50 traffic will have the highest priority. */
51
52 struct teql_master {
53 struct Qdisc_ops qops;
54 struct net_device *dev;
55 struct Qdisc *slaves;
56 struct list_head master_list;
57 unsigned long tx_bytes;
58 unsigned long tx_packets;
59 unsigned long tx_errors;
60 unsigned long tx_dropped;
61 };
62
63 struct teql_sched_data {
64 struct Qdisc *next;
65 struct teql_master *m;
66 struct sk_buff_head q;
67 };
68
69 #define NEXT_SLAVE(q) (((struct teql_sched_data *)qdisc_priv(q))->next)
70
71 #define FMASK (IFF_BROADCAST | IFF_POINTOPOINT)
72
73 /* "teql*" qdisc routines */
74
75 static int
teql_enqueue(struct sk_buff * skb,struct Qdisc * sch,struct sk_buff ** to_free)76 teql_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free)
77 {
78 struct net_device *dev = qdisc_dev(sch);
79 struct teql_sched_data *q = qdisc_priv(sch);
80
81 if (q->q.qlen < READ_ONCE(dev->tx_queue_len)) {
82 __skb_queue_tail(&q->q, skb);
83 return NET_XMIT_SUCCESS;
84 }
85
86 return qdisc_drop(skb, sch, to_free);
87 }
88
89 static struct sk_buff *
teql_dequeue(struct Qdisc * sch)90 teql_dequeue(struct Qdisc *sch)
91 {
92 struct teql_sched_data *dat = qdisc_priv(sch);
93 struct netdev_queue *dat_queue;
94 struct sk_buff *skb;
95 struct Qdisc *q;
96
97 skb = __skb_dequeue(&dat->q);
98 dat_queue = netdev_get_tx_queue(dat->m->dev, 0);
99 q = rcu_dereference_bh(dat_queue->qdisc);
100
101 if (skb == NULL) {
102 struct net_device *m = qdisc_dev(q);
103 if (m) {
104 dat->m->slaves = sch;
105 netif_wake_queue(m);
106 }
107 } else {
108 qdisc_bstats_update(sch, skb);
109 }
110 sch->q.qlen = dat->q.qlen + q->q.qlen;
111 return skb;
112 }
113
114 static struct sk_buff *
teql_peek(struct Qdisc * sch)115 teql_peek(struct Qdisc *sch)
116 {
117 /* teql is meant to be used as root qdisc */
118 return NULL;
119 }
120
121 static void
teql_reset(struct Qdisc * sch)122 teql_reset(struct Qdisc *sch)
123 {
124 struct teql_sched_data *dat = qdisc_priv(sch);
125
126 skb_queue_purge(&dat->q);
127 }
128
129 static void
teql_destroy(struct Qdisc * sch)130 teql_destroy(struct Qdisc *sch)
131 {
132 struct Qdisc *q, *prev;
133 struct teql_sched_data *dat = qdisc_priv(sch);
134 struct teql_master *master = dat->m;
135
136 if (!master)
137 return;
138
139 prev = master->slaves;
140 if (prev) {
141 do {
142 q = NEXT_SLAVE(prev);
143 if (q == sch) {
144 NEXT_SLAVE(prev) = NEXT_SLAVE(q);
145 if (q == master->slaves) {
146 master->slaves = NEXT_SLAVE(q);
147 if (q == master->slaves) {
148 struct netdev_queue *txq;
149 spinlock_t *root_lock;
150
151 txq = netdev_get_tx_queue(master->dev, 0);
152 master->slaves = NULL;
153
154 root_lock = qdisc_root_sleeping_lock(rtnl_dereference(txq->qdisc));
155 spin_lock_bh(root_lock);
156 qdisc_reset(rtnl_dereference(txq->qdisc));
157 spin_unlock_bh(root_lock);
158 }
159 }
160 skb_queue_purge(&dat->q);
161 break;
162 }
163
164 } while ((prev = q) != master->slaves);
165 }
166 }
167
teql_qdisc_init(struct Qdisc * sch,struct nlattr * opt,struct netlink_ext_ack * extack)168 static int teql_qdisc_init(struct Qdisc *sch, struct nlattr *opt,
169 struct netlink_ext_ack *extack)
170 {
171 struct net_device *dev = qdisc_dev(sch);
172 struct teql_master *m = (struct teql_master *)sch->ops;
173 struct teql_sched_data *q = qdisc_priv(sch);
174
175 if (dev->hard_header_len > m->dev->hard_header_len)
176 return -EINVAL;
177
178 if (m->dev == dev)
179 return -ELOOP;
180
181 if (sch->parent != TC_H_ROOT) {
182 NL_SET_ERR_MSG_MOD(extack, "teql can only be used as root");
183 return -EOPNOTSUPP;
184 }
185
186 q->m = m;
187
188 skb_queue_head_init(&q->q);
189
190 if (m->slaves) {
191 if (m->dev->flags & IFF_UP) {
192 if ((m->dev->flags & IFF_POINTOPOINT &&
193 !(dev->flags & IFF_POINTOPOINT)) ||
194 (m->dev->flags & IFF_BROADCAST &&
195 !(dev->flags & IFF_BROADCAST)) ||
196 (m->dev->flags & IFF_MULTICAST &&
197 !(dev->flags & IFF_MULTICAST)) ||
198 dev->mtu < m->dev->mtu)
199 return -EINVAL;
200 } else {
201 if (!(dev->flags&IFF_POINTOPOINT))
202 m->dev->flags &= ~IFF_POINTOPOINT;
203 if (!(dev->flags&IFF_BROADCAST))
204 m->dev->flags &= ~IFF_BROADCAST;
205 if (!(dev->flags&IFF_MULTICAST))
206 m->dev->flags &= ~IFF_MULTICAST;
207 if (dev->mtu < m->dev->mtu)
208 m->dev->mtu = dev->mtu;
209 }
210 q->next = NEXT_SLAVE(m->slaves);
211 NEXT_SLAVE(m->slaves) = sch;
212 } else {
213 q->next = sch;
214 m->slaves = sch;
215 m->dev->mtu = dev->mtu;
216 m->dev->flags = (m->dev->flags&~FMASK)|(dev->flags&FMASK);
217 }
218 return 0;
219 }
220
221
222 static int
__teql_resolve(struct sk_buff * skb,struct sk_buff * skb_res,struct net_device * dev,struct netdev_queue * txq,struct dst_entry * dst)223 __teql_resolve(struct sk_buff *skb, struct sk_buff *skb_res,
224 struct net_device *dev, struct netdev_queue *txq,
225 struct dst_entry *dst)
226 {
227 struct neighbour *n;
228 int err = 0;
229
230 n = dst_neigh_lookup_skb(dst, skb);
231 if (!n)
232 return -ENOENT;
233
234 if (dst->dev != dev) {
235 struct neighbour *mn;
236
237 mn = __neigh_lookup_errno(n->tbl, n->primary_key, dev);
238 neigh_release(n);
239 if (IS_ERR(mn))
240 return PTR_ERR(mn);
241 n = mn;
242 }
243
244 if (neigh_event_send(n, skb_res) == 0) {
245 int err;
246 char haddr[MAX_ADDR_LEN];
247
248 neigh_ha_snapshot(haddr, n, dev);
249 err = dev_hard_header(skb, dev, ntohs(skb_protocol(skb, false)),
250 haddr, NULL, skb->len);
251
252 if (err < 0)
253 err = -EINVAL;
254 } else {
255 err = (skb_res == NULL) ? -EAGAIN : 1;
256 }
257 neigh_release(n);
258 return err;
259 }
260
teql_resolve(struct sk_buff * skb,struct sk_buff * skb_res,struct net_device * dev,struct netdev_queue * txq)261 static inline int teql_resolve(struct sk_buff *skb,
262 struct sk_buff *skb_res,
263 struct net_device *dev,
264 struct netdev_queue *txq)
265 {
266 struct dst_entry *dst = skb_dst(skb);
267 int res;
268
269 if (rcu_access_pointer(txq->qdisc) == &noop_qdisc)
270 return -ENODEV;
271
272 if (!dev->header_ops || !dst)
273 return 0;
274
275 rcu_read_lock();
276 res = __teql_resolve(skb, skb_res, dev, txq, dst);
277 rcu_read_unlock();
278
279 return res;
280 }
281
teql_master_xmit(struct sk_buff * skb,struct net_device * dev)282 static netdev_tx_t teql_master_xmit(struct sk_buff *skb, struct net_device *dev)
283 {
284 struct teql_master *master = netdev_priv(dev);
285 struct Qdisc *start, *q;
286 int busy;
287 int nores;
288 int subq = skb_get_queue_mapping(skb);
289 struct sk_buff *skb_res = NULL;
290
291 start = master->slaves;
292
293 restart:
294 nores = 0;
295 busy = 0;
296
297 q = start;
298 if (!q)
299 goto drop;
300
301 do {
302 struct net_device *slave = qdisc_dev(q);
303 struct netdev_queue *slave_txq = netdev_get_tx_queue(slave, 0);
304
305 if (rcu_access_pointer(slave_txq->qdisc_sleeping) != q)
306 continue;
307 if (netif_xmit_stopped(netdev_get_tx_queue(slave, subq)) ||
308 !netif_running(slave)) {
309 busy = 1;
310 continue;
311 }
312
313 switch (teql_resolve(skb, skb_res, slave, slave_txq)) {
314 case 0:
315 if (__netif_tx_trylock(slave_txq)) {
316 unsigned int length = qdisc_pkt_len(skb);
317
318 skb->dev = slave;
319 if (!netif_xmit_frozen_or_stopped(slave_txq) &&
320 netdev_start_xmit(skb, slave, slave_txq, false) ==
321 NETDEV_TX_OK) {
322 __netif_tx_unlock(slave_txq);
323 master->slaves = NEXT_SLAVE(q);
324 netif_wake_queue(dev);
325 master->tx_packets++;
326 master->tx_bytes += length;
327 return NETDEV_TX_OK;
328 }
329 __netif_tx_unlock(slave_txq);
330 }
331 if (netif_xmit_stopped(netdev_get_tx_queue(dev, 0)))
332 busy = 1;
333 break;
334 case 1:
335 master->slaves = NEXT_SLAVE(q);
336 return NETDEV_TX_OK;
337 default:
338 nores = 1;
339 break;
340 }
341 __skb_pull(skb, skb_network_offset(skb));
342 } while ((q = NEXT_SLAVE(q)) != start);
343
344 if (nores && skb_res == NULL) {
345 skb_res = skb;
346 goto restart;
347 }
348
349 if (busy) {
350 netif_stop_queue(dev);
351 return NETDEV_TX_BUSY;
352 }
353 master->tx_errors++;
354
355 drop:
356 master->tx_dropped++;
357 dev_kfree_skb(skb);
358 return NETDEV_TX_OK;
359 }
360
teql_master_open(struct net_device * dev)361 static int teql_master_open(struct net_device *dev)
362 {
363 struct Qdisc *q;
364 struct teql_master *m = netdev_priv(dev);
365 int mtu = 0xFFFE;
366 unsigned int flags = IFF_NOARP | IFF_MULTICAST;
367
368 if (m->slaves == NULL)
369 return -EUNATCH;
370
371 flags = FMASK;
372
373 q = m->slaves;
374 do {
375 struct net_device *slave = qdisc_dev(q);
376
377 if (slave == NULL)
378 return -EUNATCH;
379
380 if (slave->mtu < mtu)
381 mtu = slave->mtu;
382 if (slave->hard_header_len > LL_MAX_HEADER)
383 return -EINVAL;
384
385 /* If all the slaves are BROADCAST, master is BROADCAST
386 If all the slaves are PtP, master is PtP
387 Otherwise, master is NBMA.
388 */
389 if (!(slave->flags&IFF_POINTOPOINT))
390 flags &= ~IFF_POINTOPOINT;
391 if (!(slave->flags&IFF_BROADCAST))
392 flags &= ~IFF_BROADCAST;
393 if (!(slave->flags&IFF_MULTICAST))
394 flags &= ~IFF_MULTICAST;
395 } while ((q = NEXT_SLAVE(q)) != m->slaves);
396
397 m->dev->mtu = mtu;
398 m->dev->flags = (m->dev->flags&~FMASK) | flags;
399 netif_start_queue(m->dev);
400 return 0;
401 }
402
teql_master_close(struct net_device * dev)403 static int teql_master_close(struct net_device *dev)
404 {
405 netif_stop_queue(dev);
406 return 0;
407 }
408
teql_master_stats64(struct net_device * dev,struct rtnl_link_stats64 * stats)409 static void teql_master_stats64(struct net_device *dev,
410 struct rtnl_link_stats64 *stats)
411 {
412 struct teql_master *m = netdev_priv(dev);
413
414 stats->tx_packets = m->tx_packets;
415 stats->tx_bytes = m->tx_bytes;
416 stats->tx_errors = m->tx_errors;
417 stats->tx_dropped = m->tx_dropped;
418 }
419
teql_master_mtu(struct net_device * dev,int new_mtu)420 static int teql_master_mtu(struct net_device *dev, int new_mtu)
421 {
422 struct teql_master *m = netdev_priv(dev);
423 struct Qdisc *q;
424
425 q = m->slaves;
426 if (q) {
427 do {
428 if (new_mtu > qdisc_dev(q)->mtu)
429 return -EINVAL;
430 } while ((q = NEXT_SLAVE(q)) != m->slaves);
431 }
432
433 WRITE_ONCE(dev->mtu, new_mtu);
434 return 0;
435 }
436
437 static const struct net_device_ops teql_netdev_ops = {
438 .ndo_open = teql_master_open,
439 .ndo_stop = teql_master_close,
440 .ndo_start_xmit = teql_master_xmit,
441 .ndo_get_stats64 = teql_master_stats64,
442 .ndo_change_mtu = teql_master_mtu,
443 };
444
teql_master_setup(struct net_device * dev)445 static __init void teql_master_setup(struct net_device *dev)
446 {
447 struct teql_master *master = netdev_priv(dev);
448 struct Qdisc_ops *ops = &master->qops;
449
450 master->dev = dev;
451 ops->priv_size = sizeof(struct teql_sched_data);
452
453 ops->enqueue = teql_enqueue;
454 ops->dequeue = teql_dequeue;
455 ops->peek = teql_peek;
456 ops->init = teql_qdisc_init;
457 ops->reset = teql_reset;
458 ops->destroy = teql_destroy;
459 ops->owner = THIS_MODULE;
460
461 dev->netdev_ops = &teql_netdev_ops;
462 dev->type = ARPHRD_VOID;
463 dev->mtu = 1500;
464 dev->min_mtu = 68;
465 dev->max_mtu = 65535;
466 dev->tx_queue_len = 100;
467 dev->flags = IFF_NOARP;
468 dev->hard_header_len = LL_MAX_HEADER;
469 netif_keep_dst(dev);
470 }
471
472 static LIST_HEAD(master_dev_list);
473 static int max_equalizers = 1;
474 module_param(max_equalizers, int, 0);
475 MODULE_PARM_DESC(max_equalizers, "Max number of link equalizers");
476
teql_init(void)477 static int __init teql_init(void)
478 {
479 int i;
480 int err = -ENODEV;
481
482 for (i = 0; i < max_equalizers; i++) {
483 struct net_device *dev;
484 struct teql_master *master;
485
486 dev = alloc_netdev(sizeof(struct teql_master), "teql%d",
487 NET_NAME_UNKNOWN, teql_master_setup);
488 if (!dev) {
489 err = -ENOMEM;
490 break;
491 }
492
493 if ((err = register_netdev(dev))) {
494 free_netdev(dev);
495 break;
496 }
497
498 master = netdev_priv(dev);
499
500 strscpy(master->qops.id, dev->name, IFNAMSIZ);
501 err = register_qdisc(&master->qops);
502
503 if (err) {
504 unregister_netdev(dev);
505 free_netdev(dev);
506 break;
507 }
508
509 list_add_tail(&master->master_list, &master_dev_list);
510 }
511 return i ? 0 : err;
512 }
513
teql_exit(void)514 static void __exit teql_exit(void)
515 {
516 struct teql_master *master, *nxt;
517
518 list_for_each_entry_safe(master, nxt, &master_dev_list, master_list) {
519
520 list_del(&master->master_list);
521
522 unregister_qdisc(&master->qops);
523 unregister_netdev(master->dev);
524 free_netdev(master->dev);
525 }
526 }
527
528 module_init(teql_init);
529 module_exit(teql_exit);
530
531 MODULE_LICENSE("GPL");
532 MODULE_DESCRIPTION("True (or trivial) link equalizer qdisc");
533