1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /* net/sched/sch_teql.c "True" (or "trivial") link equalizer.
3 *
4 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
5 */
6
7 #include <linux/module.h>
8 #include <linux/types.h>
9 #include <linux/kernel.h>
10 #include <linux/slab.h>
11 #include <linux/string.h>
12 #include <linux/errno.h>
13 #include <linux/if_arp.h>
14 #include <linux/netdevice.h>
15 #include <linux/init.h>
16 #include <linux/skbuff.h>
17 #include <linux/moduleparam.h>
18 #include <net/dst.h>
19 #include <net/neighbour.h>
20 #include <net/pkt_sched.h>
21
22 /*
23 How to setup it.
24 ----------------
25
26 After loading this module you will find a new device teqlN
27 and new qdisc with the same name. To join a slave to the equalizer
28 you should just set this qdisc on a device f.e.
29
30 # tc qdisc add dev eth0 root teql0
31 # tc qdisc add dev eth1 root teql0
32
33 That's all. Full PnP 8)
34
35 Applicability.
36 --------------
37
38 1. Slave devices MUST be active devices, i.e., they must raise the tbusy
39 signal and generate EOI events. If you want to equalize virtual devices
40 like tunnels, use a normal eql device.
41 2. This device puts no limitations on physical slave characteristics
42 f.e. it will equalize 9600baud line and 100Mb ethernet perfectly :-)
43 Certainly, large difference in link speeds will make the resulting
44 eqalized link unusable, because of huge packet reordering.
45 I estimate an upper useful difference as ~10 times.
46 3. If the slave requires address resolution, only protocols using
47 neighbour cache (IPv4/IPv6) will work over the equalized link.
48 Other protocols are still allowed to use the slave device directly,
49 which will not break load balancing, though native slave
50 traffic will have the highest priority. */
51
52 struct teql_master {
53 struct Qdisc_ops qops;
54 struct net_device *dev;
55 struct Qdisc *slaves;
56 struct list_head master_list;
57 unsigned long tx_bytes;
58 unsigned long tx_packets;
59 unsigned long tx_errors;
60 unsigned long tx_dropped;
61 };
62
63 struct teql_sched_data {
64 struct Qdisc *next;
65 struct teql_master *m;
66 struct sk_buff_head q;
67 };
68
69 #define NEXT_SLAVE(q) (((struct teql_sched_data *)qdisc_priv(q))->next)
70
71 #define FMASK (IFF_BROADCAST | IFF_POINTOPOINT)
72
73 /* "teql*" qdisc routines */
74
75 static int
teql_enqueue(struct sk_buff * skb,struct Qdisc * sch,struct sk_buff ** to_free)76 teql_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free)
77 {
78 struct net_device *dev = qdisc_dev(sch);
79 struct teql_sched_data *q = qdisc_priv(sch);
80
81 if (q->q.qlen < READ_ONCE(dev->tx_queue_len)) {
82 __skb_queue_tail(&q->q, skb);
83 return NET_XMIT_SUCCESS;
84 }
85
86 return qdisc_drop(skb, sch, to_free);
87 }
88
89 static struct sk_buff *
teql_dequeue(struct Qdisc * sch)90 teql_dequeue(struct Qdisc *sch)
91 {
92 struct teql_sched_data *dat = qdisc_priv(sch);
93 struct netdev_queue *dat_queue;
94 struct sk_buff *skb;
95 struct Qdisc *q;
96
97 skb = __skb_dequeue(&dat->q);
98 dat_queue = netdev_get_tx_queue(dat->m->dev, 0);
99 q = rcu_dereference_bh(dat_queue->qdisc);
100
101 if (skb == NULL) {
102 struct net_device *m = qdisc_dev(q);
103 if (m) {
104 dat->m->slaves = sch;
105 netif_wake_queue(m);
106 }
107 } else {
108 qdisc_bstats_update(sch, skb);
109 }
110 sch->q.qlen = dat->q.qlen + q->q.qlen;
111 return skb;
112 }
113
114 static struct sk_buff *
teql_peek(struct Qdisc * sch)115 teql_peek(struct Qdisc *sch)
116 {
117 /* teql is meant to be used as root qdisc */
118 return NULL;
119 }
120
121 static void
teql_reset(struct Qdisc * sch)122 teql_reset(struct Qdisc *sch)
123 {
124 struct teql_sched_data *dat = qdisc_priv(sch);
125
126 skb_queue_purge(&dat->q);
127 }
128
129 static void
teql_destroy(struct Qdisc * sch)130 teql_destroy(struct Qdisc *sch)
131 {
132 struct Qdisc *q, *prev;
133 struct teql_sched_data *dat = qdisc_priv(sch);
134 struct teql_master *master = dat->m;
135
136 if (!master)
137 return;
138
139 prev = master->slaves;
140 if (prev) {
141 do {
142 q = NEXT_SLAVE(prev);
143 if (q == sch) {
144 NEXT_SLAVE(prev) = NEXT_SLAVE(q);
145 if (q == master->slaves) {
146 master->slaves = NEXT_SLAVE(q);
147 if (q == master->slaves) {
148 struct netdev_queue *txq;
149 spinlock_t *root_lock;
150
151 txq = netdev_get_tx_queue(master->dev, 0);
152 master->slaves = NULL;
153
154 root_lock = qdisc_root_sleeping_lock(rtnl_dereference(txq->qdisc));
155 spin_lock_bh(root_lock);
156 qdisc_reset(rtnl_dereference(txq->qdisc));
157 spin_unlock_bh(root_lock);
158 }
159 }
160 skb_queue_purge(&dat->q);
161 break;
162 }
163
164 } while ((prev = q) != master->slaves);
165 }
166 }
167
teql_qdisc_init(struct Qdisc * sch,struct nlattr * opt,struct netlink_ext_ack * extack)168 static int teql_qdisc_init(struct Qdisc *sch, struct nlattr *opt,
169 struct netlink_ext_ack *extack)
170 {
171 struct net_device *dev = qdisc_dev(sch);
172 struct teql_master *m = (struct teql_master *)sch->ops;
173 struct teql_sched_data *q = qdisc_priv(sch);
174
175 if (dev->hard_header_len > m->dev->hard_header_len)
176 return -EINVAL;
177
178 if (m->dev == dev)
179 return -ELOOP;
180
181 if (sch->parent != TC_H_ROOT) {
182 NL_SET_ERR_MSG_MOD(extack, "teql can only be used as root");
183 return -EOPNOTSUPP;
184 }
185
186 q->m = m;
187
188 skb_queue_head_init(&q->q);
189
190 if (m->slaves) {
191 if (m->dev->flags & IFF_UP) {
192 if ((m->dev->flags & IFF_POINTOPOINT &&
193 !(dev->flags & IFF_POINTOPOINT)) ||
194 (m->dev->flags & IFF_BROADCAST &&
195 !(dev->flags & IFF_BROADCAST)) ||
196 (m->dev->flags & IFF_MULTICAST &&
197 !(dev->flags & IFF_MULTICAST)) ||
198 dev->mtu < m->dev->mtu)
199 return -EINVAL;
200 } else {
201 if (!(dev->flags&IFF_POINTOPOINT))
202 m->dev->flags &= ~IFF_POINTOPOINT;
203 if (!(dev->flags&IFF_BROADCAST))
204 m->dev->flags &= ~IFF_BROADCAST;
205 if (!(dev->flags&IFF_MULTICAST))
206 m->dev->flags &= ~IFF_MULTICAST;
207 if (dev->mtu < m->dev->mtu)
208 m->dev->mtu = dev->mtu;
209 }
210 q->next = NEXT_SLAVE(m->slaves);
211 NEXT_SLAVE(m->slaves) = sch;
212 } else {
213 q->next = sch;
214 m->slaves = sch;
215 m->dev->mtu = dev->mtu;
216 m->dev->flags = (m->dev->flags&~FMASK)|(dev->flags&FMASK);
217 }
218 return 0;
219 }
220
221
222 static int
__teql_resolve(struct sk_buff * skb,struct sk_buff * skb_res,struct net_device * dev,struct netdev_queue * txq,struct dst_entry * dst)223 __teql_resolve(struct sk_buff *skb, struct sk_buff *skb_res,
224 struct net_device *dev, struct netdev_queue *txq,
225 struct dst_entry *dst)
226 {
227 struct neighbour *n;
228 int err = 0;
229
230 n = dst_neigh_lookup_skb(dst, skb);
231 if (!n)
232 return -ENOENT;
233
234 if (dst->dev != dev) {
235 struct neighbour *mn;
236
237 mn = __neigh_lookup_errno(n->tbl, n->primary_key, dev);
238 neigh_release(n);
239 if (IS_ERR(mn))
240 return PTR_ERR(mn);
241 n = mn;
242 }
243
244 if (neigh_event_send(n, skb_res) == 0) {
245 int err;
246 char haddr[MAX_ADDR_LEN];
247
248 neigh_ha_snapshot(haddr, n, dev);
249 err = dev_hard_header(skb, dev, ntohs(skb_protocol(skb, false)),
250 haddr, NULL, skb->len);
251
252 if (err < 0)
253 err = -EINVAL;
254 } else {
255 err = (skb_res == NULL) ? -EAGAIN : 1;
256 }
257 neigh_release(n);
258 return err;
259 }
260
teql_resolve(struct sk_buff * skb,struct sk_buff * skb_res,struct net_device * dev,struct netdev_queue * txq)261 static inline int teql_resolve(struct sk_buff *skb,
262 struct sk_buff *skb_res,
263 struct net_device *dev,
264 struct netdev_queue *txq)
265 {
266 struct dst_entry *dst = skb_dst(skb);
267 int res;
268
269 if (rcu_access_pointer(txq->qdisc) == &noop_qdisc)
270 return -ENODEV;
271
272 if (!dev->header_ops || !dst)
273 return 0;
274
275 rcu_read_lock();
276 res = __teql_resolve(skb, skb_res, dev, txq, dst);
277 rcu_read_unlock();
278
279 return res;
280 }
281
teql_master_xmit(struct sk_buff * skb,struct net_device * dev)282 static netdev_tx_t teql_master_xmit(struct sk_buff *skb, struct net_device *dev)
283 {
284 struct teql_master *master = netdev_priv(dev);
285 struct Qdisc *start, *q;
286 int busy;
287 int nores;
288 int subq = skb_get_queue_mapping(skb);
289 struct sk_buff *skb_res = NULL;
290
291 start = master->slaves;
292
293 restart:
294 nores = 0;
295 busy = 0;
296
297 q = start;
298 if (!q)
299 goto drop;
300
301 do {
302 struct net_device *slave = qdisc_dev(q);
303 struct netdev_queue *slave_txq = netdev_get_tx_queue(slave, 0);
304
305 if (rcu_access_pointer(slave_txq->qdisc_sleeping) != q)
306 continue;
307 if (netif_xmit_stopped(netdev_get_tx_queue(slave, subq)) ||
308 !netif_running(slave)) {
309 busy = 1;
310 continue;
311 }
312
313 switch (teql_resolve(skb, skb_res, slave, slave_txq)) {
314 case 0:
315 if (__netif_tx_trylock(slave_txq)) {
316 unsigned int length = qdisc_pkt_len(skb);
317
318 if (!netif_xmit_frozen_or_stopped(slave_txq) &&
319 netdev_start_xmit(skb, slave, slave_txq, false) ==
320 NETDEV_TX_OK) {
321 __netif_tx_unlock(slave_txq);
322 master->slaves = NEXT_SLAVE(q);
323 netif_wake_queue(dev);
324 master->tx_packets++;
325 master->tx_bytes += length;
326 return NETDEV_TX_OK;
327 }
328 __netif_tx_unlock(slave_txq);
329 }
330 if (netif_xmit_stopped(netdev_get_tx_queue(dev, 0)))
331 busy = 1;
332 break;
333 case 1:
334 master->slaves = NEXT_SLAVE(q);
335 return NETDEV_TX_OK;
336 default:
337 nores = 1;
338 break;
339 }
340 __skb_pull(skb, skb_network_offset(skb));
341 } while ((q = NEXT_SLAVE(q)) != start);
342
343 if (nores && skb_res == NULL) {
344 skb_res = skb;
345 goto restart;
346 }
347
348 if (busy) {
349 netif_stop_queue(dev);
350 return NETDEV_TX_BUSY;
351 }
352 master->tx_errors++;
353
354 drop:
355 master->tx_dropped++;
356 dev_kfree_skb(skb);
357 return NETDEV_TX_OK;
358 }
359
teql_master_open(struct net_device * dev)360 static int teql_master_open(struct net_device *dev)
361 {
362 struct Qdisc *q;
363 struct teql_master *m = netdev_priv(dev);
364 int mtu = 0xFFFE;
365 unsigned int flags = IFF_NOARP | IFF_MULTICAST;
366
367 if (m->slaves == NULL)
368 return -EUNATCH;
369
370 flags = FMASK;
371
372 q = m->slaves;
373 do {
374 struct net_device *slave = qdisc_dev(q);
375
376 if (slave == NULL)
377 return -EUNATCH;
378
379 if (slave->mtu < mtu)
380 mtu = slave->mtu;
381 if (slave->hard_header_len > LL_MAX_HEADER)
382 return -EINVAL;
383
384 /* If all the slaves are BROADCAST, master is BROADCAST
385 If all the slaves are PtP, master is PtP
386 Otherwise, master is NBMA.
387 */
388 if (!(slave->flags&IFF_POINTOPOINT))
389 flags &= ~IFF_POINTOPOINT;
390 if (!(slave->flags&IFF_BROADCAST))
391 flags &= ~IFF_BROADCAST;
392 if (!(slave->flags&IFF_MULTICAST))
393 flags &= ~IFF_MULTICAST;
394 } while ((q = NEXT_SLAVE(q)) != m->slaves);
395
396 m->dev->mtu = mtu;
397 m->dev->flags = (m->dev->flags&~FMASK) | flags;
398 netif_start_queue(m->dev);
399 return 0;
400 }
401
teql_master_close(struct net_device * dev)402 static int teql_master_close(struct net_device *dev)
403 {
404 netif_stop_queue(dev);
405 return 0;
406 }
407
teql_master_stats64(struct net_device * dev,struct rtnl_link_stats64 * stats)408 static void teql_master_stats64(struct net_device *dev,
409 struct rtnl_link_stats64 *stats)
410 {
411 struct teql_master *m = netdev_priv(dev);
412
413 stats->tx_packets = m->tx_packets;
414 stats->tx_bytes = m->tx_bytes;
415 stats->tx_errors = m->tx_errors;
416 stats->tx_dropped = m->tx_dropped;
417 }
418
teql_master_mtu(struct net_device * dev,int new_mtu)419 static int teql_master_mtu(struct net_device *dev, int new_mtu)
420 {
421 struct teql_master *m = netdev_priv(dev);
422 struct Qdisc *q;
423
424 q = m->slaves;
425 if (q) {
426 do {
427 if (new_mtu > qdisc_dev(q)->mtu)
428 return -EINVAL;
429 } while ((q = NEXT_SLAVE(q)) != m->slaves);
430 }
431
432 WRITE_ONCE(dev->mtu, new_mtu);
433 return 0;
434 }
435
436 static const struct net_device_ops teql_netdev_ops = {
437 .ndo_open = teql_master_open,
438 .ndo_stop = teql_master_close,
439 .ndo_start_xmit = teql_master_xmit,
440 .ndo_get_stats64 = teql_master_stats64,
441 .ndo_change_mtu = teql_master_mtu,
442 };
443
teql_master_setup(struct net_device * dev)444 static __init void teql_master_setup(struct net_device *dev)
445 {
446 struct teql_master *master = netdev_priv(dev);
447 struct Qdisc_ops *ops = &master->qops;
448
449 master->dev = dev;
450 ops->priv_size = sizeof(struct teql_sched_data);
451
452 ops->enqueue = teql_enqueue;
453 ops->dequeue = teql_dequeue;
454 ops->peek = teql_peek;
455 ops->init = teql_qdisc_init;
456 ops->reset = teql_reset;
457 ops->destroy = teql_destroy;
458 ops->owner = THIS_MODULE;
459
460 dev->netdev_ops = &teql_netdev_ops;
461 dev->type = ARPHRD_VOID;
462 dev->mtu = 1500;
463 dev->min_mtu = 68;
464 dev->max_mtu = 65535;
465 dev->tx_queue_len = 100;
466 dev->flags = IFF_NOARP;
467 dev->hard_header_len = LL_MAX_HEADER;
468 netif_keep_dst(dev);
469 }
470
471 static LIST_HEAD(master_dev_list);
472 static int max_equalizers = 1;
473 module_param(max_equalizers, int, 0);
474 MODULE_PARM_DESC(max_equalizers, "Max number of link equalizers");
475
teql_init(void)476 static int __init teql_init(void)
477 {
478 int i;
479 int err = -ENODEV;
480
481 for (i = 0; i < max_equalizers; i++) {
482 struct net_device *dev;
483 struct teql_master *master;
484
485 dev = alloc_netdev(sizeof(struct teql_master), "teql%d",
486 NET_NAME_UNKNOWN, teql_master_setup);
487 if (!dev) {
488 err = -ENOMEM;
489 break;
490 }
491
492 if ((err = register_netdev(dev))) {
493 free_netdev(dev);
494 break;
495 }
496
497 master = netdev_priv(dev);
498
499 strscpy(master->qops.id, dev->name, IFNAMSIZ);
500 err = register_qdisc(&master->qops);
501
502 if (err) {
503 unregister_netdev(dev);
504 free_netdev(dev);
505 break;
506 }
507
508 list_add_tail(&master->master_list, &master_dev_list);
509 }
510 return i ? 0 : err;
511 }
512
teql_exit(void)513 static void __exit teql_exit(void)
514 {
515 struct teql_master *master, *nxt;
516
517 list_for_each_entry_safe(master, nxt, &master_dev_list, master_list) {
518
519 list_del(&master->master_list);
520
521 unregister_qdisc(&master->qops);
522 unregister_netdev(master->dev);
523 free_netdev(master->dev);
524 }
525 }
526
527 module_init(teql_init);
528 module_exit(teql_exit);
529
530 MODULE_LICENSE("GPL");
531 MODULE_DESCRIPTION("True (or trivial) link equalizer qdisc");
532