1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /* net/sched/sch_teql.c "True" (or "trivial") link equalizer.
3 *
4 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
5 */
6
7 #include <linux/module.h>
8 #include <linux/types.h>
9 #include <linux/kernel.h>
10 #include <linux/slab.h>
11 #include <linux/string.h>
12 #include <linux/errno.h>
13 #include <linux/if_arp.h>
14 #include <linux/netdevice.h>
15 #include <linux/init.h>
16 #include <linux/skbuff.h>
17 #include <linux/moduleparam.h>
18 #include <net/dst.h>
19 #include <net/neighbour.h>
20 #include <net/pkt_sched.h>
21
22 /*
23 How to setup it.
24 ----------------
25
26 After loading this module you will find a new device teqlN
27 and new qdisc with the same name. To join a slave to the equalizer
28 you should just set this qdisc on a device f.e.
29
30 # tc qdisc add dev eth0 root teql0
31 # tc qdisc add dev eth1 root teql0
32
33 That's all. Full PnP 8)
34
35 Applicability.
36 --------------
37
38 1. Slave devices MUST be active devices, i.e., they must raise the tbusy
39 signal and generate EOI events. If you want to equalize virtual devices
40 like tunnels, use a normal eql device.
41 2. This device puts no limitations on physical slave characteristics
42 f.e. it will equalize 9600baud line and 100Mb ethernet perfectly :-)
43 Certainly, large difference in link speeds will make the resulting
44 eqalized link unusable, because of huge packet reordering.
45 I estimate an upper useful difference as ~10 times.
46 3. If the slave requires address resolution, only protocols using
47 neighbour cache (IPv4/IPv6) will work over the equalized link.
48 Other protocols are still allowed to use the slave device directly,
49 which will not break load balancing, though native slave
50 traffic will have the highest priority. */
51
52 struct teql_master {
53 struct Qdisc_ops qops;
54 struct net_device *dev;
55 struct Qdisc *slaves;
56 struct list_head master_list;
57 unsigned long tx_bytes;
58 unsigned long tx_packets;
59 unsigned long tx_errors;
60 unsigned long tx_dropped;
61 };
62
63 struct teql_sched_data {
64 struct Qdisc *next;
65 struct teql_master *m;
66 struct sk_buff_head q;
67 };
68
69 #define NEXT_SLAVE(q) (((struct teql_sched_data *)qdisc_priv(q))->next)
70
71 #define FMASK (IFF_BROADCAST | IFF_POINTOPOINT)
72
73 /* "teql*" qdisc routines */
74
75 static int
teql_enqueue(struct sk_buff * skb,struct Qdisc * sch,struct sk_buff ** to_free)76 teql_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free)
77 {
78 struct net_device *dev = qdisc_dev(sch);
79 struct teql_sched_data *q = qdisc_priv(sch);
80
81 if (q->q.qlen < READ_ONCE(dev->tx_queue_len)) {
82 __skb_queue_tail(&q->q, skb);
83 return NET_XMIT_SUCCESS;
84 }
85
86 return qdisc_drop(skb, sch, to_free);
87 }
88
89 static struct sk_buff *
teql_dequeue(struct Qdisc * sch)90 teql_dequeue(struct Qdisc *sch)
91 {
92 struct teql_sched_data *dat = qdisc_priv(sch);
93 struct netdev_queue *dat_queue;
94 struct sk_buff *skb;
95 struct Qdisc *q;
96
97 skb = __skb_dequeue(&dat->q);
98 dat_queue = netdev_get_tx_queue(dat->m->dev, 0);
99 q = rcu_dereference_bh(dat_queue->qdisc);
100
101 if (skb == NULL) {
102 struct net_device *m = qdisc_dev(q);
103 if (m) {
104 dat->m->slaves = sch;
105 netif_wake_queue(m);
106 }
107 } else {
108 qdisc_bstats_update(sch, skb);
109 }
110 sch->q.qlen = dat->q.qlen + q->q.qlen;
111 return skb;
112 }
113
114 static struct sk_buff *
teql_peek(struct Qdisc * sch)115 teql_peek(struct Qdisc *sch)
116 {
117 /* teql is meant to be used as root qdisc */
118 return NULL;
119 }
120
121 static void
teql_reset(struct Qdisc * sch)122 teql_reset(struct Qdisc *sch)
123 {
124 struct teql_sched_data *dat = qdisc_priv(sch);
125
126 skb_queue_purge(&dat->q);
127 }
128
129 static void
teql_destroy(struct Qdisc * sch)130 teql_destroy(struct Qdisc *sch)
131 {
132 struct Qdisc *q, *prev;
133 struct teql_sched_data *dat = qdisc_priv(sch);
134 struct teql_master *master = dat->m;
135
136 if (!master)
137 return;
138
139 prev = master->slaves;
140 if (prev) {
141 do {
142 q = NEXT_SLAVE(prev);
143 if (q == sch) {
144 NEXT_SLAVE(prev) = NEXT_SLAVE(q);
145 if (q == master->slaves) {
146 master->slaves = NEXT_SLAVE(q);
147 if (q == master->slaves) {
148 struct netdev_queue *txq;
149
150 txq = netdev_get_tx_queue(master->dev, 0);
151 master->slaves = NULL;
152
153 dev_reset_queue(master->dev,
154 txq, NULL);
155 }
156 }
157 skb_queue_purge(&dat->q);
158 break;
159 }
160
161 } while ((prev = q) != master->slaves);
162 }
163 }
164
teql_qdisc_init(struct Qdisc * sch,struct nlattr * opt,struct netlink_ext_ack * extack)165 static int teql_qdisc_init(struct Qdisc *sch, struct nlattr *opt,
166 struct netlink_ext_ack *extack)
167 {
168 struct net_device *dev = qdisc_dev(sch);
169 struct teql_master *m = (struct teql_master *)sch->ops;
170 struct teql_sched_data *q = qdisc_priv(sch);
171
172 if (dev->hard_header_len > m->dev->hard_header_len)
173 return -EINVAL;
174
175 if (m->dev == dev)
176 return -ELOOP;
177
178 if (sch->parent != TC_H_ROOT) {
179 NL_SET_ERR_MSG_MOD(extack, "teql can only be used as root");
180 return -EOPNOTSUPP;
181 }
182
183 q->m = m;
184
185 skb_queue_head_init(&q->q);
186
187 if (m->slaves) {
188 if (m->dev->flags & IFF_UP) {
189 if ((m->dev->flags & IFF_POINTOPOINT &&
190 !(dev->flags & IFF_POINTOPOINT)) ||
191 (m->dev->flags & IFF_BROADCAST &&
192 !(dev->flags & IFF_BROADCAST)) ||
193 (m->dev->flags & IFF_MULTICAST &&
194 !(dev->flags & IFF_MULTICAST)) ||
195 dev->mtu < m->dev->mtu)
196 return -EINVAL;
197 } else {
198 if (!(dev->flags&IFF_POINTOPOINT))
199 m->dev->flags &= ~IFF_POINTOPOINT;
200 if (!(dev->flags&IFF_BROADCAST))
201 m->dev->flags &= ~IFF_BROADCAST;
202 if (!(dev->flags&IFF_MULTICAST))
203 m->dev->flags &= ~IFF_MULTICAST;
204 if (dev->mtu < m->dev->mtu)
205 m->dev->mtu = dev->mtu;
206 }
207 q->next = NEXT_SLAVE(m->slaves);
208 NEXT_SLAVE(m->slaves) = sch;
209 } else {
210 q->next = sch;
211 m->slaves = sch;
212 m->dev->mtu = dev->mtu;
213 m->dev->flags = (m->dev->flags&~FMASK)|(dev->flags&FMASK);
214 }
215 return 0;
216 }
217
218
219 static int
__teql_resolve(struct sk_buff * skb,struct sk_buff * skb_res,struct net_device * dev,struct netdev_queue * txq,struct dst_entry * dst)220 __teql_resolve(struct sk_buff *skb, struct sk_buff *skb_res,
221 struct net_device *dev, struct netdev_queue *txq,
222 struct dst_entry *dst)
223 {
224 struct neighbour *n;
225 int err = 0;
226
227 n = dst_neigh_lookup_skb(dst, skb);
228 if (!n)
229 return -ENOENT;
230
231 if (dst->dev != dev) {
232 struct neighbour *mn;
233
234 mn = __neigh_lookup_errno(n->tbl, n->primary_key, dev);
235 neigh_release(n);
236 if (IS_ERR(mn))
237 return PTR_ERR(mn);
238 n = mn;
239 }
240
241 if (neigh_event_send(n, skb_res) == 0) {
242 int err;
243 char haddr[MAX_ADDR_LEN];
244
245 neigh_ha_snapshot(haddr, n, dev);
246 err = dev_hard_header(skb, dev, ntohs(skb_protocol(skb, false)),
247 haddr, NULL, skb->len);
248
249 if (err < 0)
250 err = -EINVAL;
251 } else {
252 err = (skb_res == NULL) ? -EAGAIN : 1;
253 }
254 neigh_release(n);
255 return err;
256 }
257
teql_resolve(struct sk_buff * skb,struct sk_buff * skb_res,struct net_device * dev,struct netdev_queue * txq)258 static inline int teql_resolve(struct sk_buff *skb,
259 struct sk_buff *skb_res,
260 struct net_device *dev,
261 struct netdev_queue *txq)
262 {
263 struct dst_entry *dst = skb_dst(skb);
264 int res;
265
266 if (rcu_access_pointer(txq->qdisc) == &noop_qdisc)
267 return -ENODEV;
268
269 if (!dev->header_ops || !dst)
270 return 0;
271
272 rcu_read_lock();
273 res = __teql_resolve(skb, skb_res, dev, txq, dst);
274 rcu_read_unlock();
275
276 return res;
277 }
278
teql_master_xmit(struct sk_buff * skb,struct net_device * dev)279 static netdev_tx_t teql_master_xmit(struct sk_buff *skb, struct net_device *dev)
280 {
281 struct teql_master *master = netdev_priv(dev);
282 struct Qdisc *start, *q;
283 int busy;
284 int nores;
285 int subq = skb_get_queue_mapping(skb);
286 struct sk_buff *skb_res = NULL;
287
288 start = master->slaves;
289
290 restart:
291 nores = 0;
292 busy = 0;
293
294 q = start;
295 if (!q)
296 goto drop;
297
298 do {
299 struct net_device *slave = qdisc_dev(q);
300 struct netdev_queue *slave_txq = netdev_get_tx_queue(slave, 0);
301
302 if (rcu_access_pointer(slave_txq->qdisc_sleeping) != q)
303 continue;
304 if (netif_xmit_stopped(netdev_get_tx_queue(slave, subq)) ||
305 !netif_running(slave)) {
306 busy = 1;
307 continue;
308 }
309
310 switch (teql_resolve(skb, skb_res, slave, slave_txq)) {
311 case 0:
312 if (__netif_tx_trylock(slave_txq)) {
313 unsigned int length = qdisc_pkt_len(skb);
314
315 skb->dev = slave;
316 if (!netif_xmit_frozen_or_stopped(slave_txq) &&
317 netdev_start_xmit(skb, slave, slave_txq, false) ==
318 NETDEV_TX_OK) {
319 __netif_tx_unlock(slave_txq);
320 master->slaves = NEXT_SLAVE(q);
321 netif_wake_queue(dev);
322 master->tx_packets++;
323 master->tx_bytes += length;
324 return NETDEV_TX_OK;
325 }
326 __netif_tx_unlock(slave_txq);
327 }
328 if (netif_xmit_stopped(netdev_get_tx_queue(dev, 0)))
329 busy = 1;
330 break;
331 case 1:
332 master->slaves = NEXT_SLAVE(q);
333 return NETDEV_TX_OK;
334 default:
335 nores = 1;
336 break;
337 }
338 __skb_pull(skb, skb_network_offset(skb));
339 } while ((q = NEXT_SLAVE(q)) != start);
340
341 if (nores && skb_res == NULL) {
342 skb_res = skb;
343 goto restart;
344 }
345
346 if (busy) {
347 netif_stop_queue(dev);
348 return NETDEV_TX_BUSY;
349 }
350 master->tx_errors++;
351
352 drop:
353 master->tx_dropped++;
354 dev_kfree_skb(skb);
355 return NETDEV_TX_OK;
356 }
357
teql_master_open(struct net_device * dev)358 static int teql_master_open(struct net_device *dev)
359 {
360 struct Qdisc *q;
361 struct teql_master *m = netdev_priv(dev);
362 int mtu = 0xFFFE;
363 unsigned int flags = IFF_NOARP | IFF_MULTICAST;
364
365 if (m->slaves == NULL)
366 return -EUNATCH;
367
368 flags = FMASK;
369
370 q = m->slaves;
371 do {
372 struct net_device *slave = qdisc_dev(q);
373
374 if (slave == NULL)
375 return -EUNATCH;
376
377 if (slave->mtu < mtu)
378 mtu = slave->mtu;
379 if (slave->hard_header_len > LL_MAX_HEADER)
380 return -EINVAL;
381
382 /* If all the slaves are BROADCAST, master is BROADCAST
383 If all the slaves are PtP, master is PtP
384 Otherwise, master is NBMA.
385 */
386 if (!(slave->flags&IFF_POINTOPOINT))
387 flags &= ~IFF_POINTOPOINT;
388 if (!(slave->flags&IFF_BROADCAST))
389 flags &= ~IFF_BROADCAST;
390 if (!(slave->flags&IFF_MULTICAST))
391 flags &= ~IFF_MULTICAST;
392 } while ((q = NEXT_SLAVE(q)) != m->slaves);
393
394 m->dev->mtu = mtu;
395 m->dev->flags = (m->dev->flags&~FMASK) | flags;
396 netif_start_queue(m->dev);
397 return 0;
398 }
399
teql_master_close(struct net_device * dev)400 static int teql_master_close(struct net_device *dev)
401 {
402 netif_stop_queue(dev);
403 return 0;
404 }
405
teql_master_stats64(struct net_device * dev,struct rtnl_link_stats64 * stats)406 static void teql_master_stats64(struct net_device *dev,
407 struct rtnl_link_stats64 *stats)
408 {
409 struct teql_master *m = netdev_priv(dev);
410
411 stats->tx_packets = m->tx_packets;
412 stats->tx_bytes = m->tx_bytes;
413 stats->tx_errors = m->tx_errors;
414 stats->tx_dropped = m->tx_dropped;
415 }
416
teql_master_mtu(struct net_device * dev,int new_mtu)417 static int teql_master_mtu(struct net_device *dev, int new_mtu)
418 {
419 struct teql_master *m = netdev_priv(dev);
420 struct Qdisc *q;
421
422 q = m->slaves;
423 if (q) {
424 do {
425 if (new_mtu > qdisc_dev(q)->mtu)
426 return -EINVAL;
427 } while ((q = NEXT_SLAVE(q)) != m->slaves);
428 }
429
430 WRITE_ONCE(dev->mtu, new_mtu);
431 return 0;
432 }
433
434 static const struct net_device_ops teql_netdev_ops = {
435 .ndo_open = teql_master_open,
436 .ndo_stop = teql_master_close,
437 .ndo_start_xmit = teql_master_xmit,
438 .ndo_get_stats64 = teql_master_stats64,
439 .ndo_change_mtu = teql_master_mtu,
440 };
441
teql_master_setup(struct net_device * dev)442 static __init void teql_master_setup(struct net_device *dev)
443 {
444 struct teql_master *master = netdev_priv(dev);
445 struct Qdisc_ops *ops = &master->qops;
446
447 master->dev = dev;
448 ops->priv_size = sizeof(struct teql_sched_data);
449
450 ops->enqueue = teql_enqueue;
451 ops->dequeue = teql_dequeue;
452 ops->peek = teql_peek;
453 ops->init = teql_qdisc_init;
454 ops->reset = teql_reset;
455 ops->destroy = teql_destroy;
456 ops->owner = THIS_MODULE;
457
458 dev->netdev_ops = &teql_netdev_ops;
459 dev->type = ARPHRD_VOID;
460 dev->mtu = 1500;
461 dev->min_mtu = 68;
462 dev->max_mtu = 65535;
463 dev->tx_queue_len = 100;
464 dev->flags = IFF_NOARP;
465 dev->hard_header_len = LL_MAX_HEADER;
466 netif_keep_dst(dev);
467 }
468
469 static LIST_HEAD(master_dev_list);
470 static int max_equalizers = 1;
471 module_param(max_equalizers, int, 0);
472 MODULE_PARM_DESC(max_equalizers, "Max number of link equalizers");
473
teql_init(void)474 static int __init teql_init(void)
475 {
476 int i;
477 int err = -ENODEV;
478
479 for (i = 0; i < max_equalizers; i++) {
480 struct net_device *dev;
481 struct teql_master *master;
482
483 dev = alloc_netdev(sizeof(struct teql_master), "teql%d",
484 NET_NAME_UNKNOWN, teql_master_setup);
485 if (!dev) {
486 err = -ENOMEM;
487 break;
488 }
489
490 if ((err = register_netdev(dev))) {
491 free_netdev(dev);
492 break;
493 }
494
495 master = netdev_priv(dev);
496
497 strscpy(master->qops.id, dev->name, IFNAMSIZ);
498 err = register_qdisc(&master->qops);
499
500 if (err) {
501 unregister_netdev(dev);
502 free_netdev(dev);
503 break;
504 }
505
506 list_add_tail(&master->master_list, &master_dev_list);
507 }
508 return i ? 0 : err;
509 }
510
teql_exit(void)511 static void __exit teql_exit(void)
512 {
513 struct teql_master *master, *nxt;
514
515 list_for_each_entry_safe(master, nxt, &master_dev_list, master_list) {
516
517 list_del(&master->master_list);
518
519 unregister_qdisc(&master->qops);
520 unregister_netdev(master->dev);
521 free_netdev(master->dev);
522 }
523 }
524
525 module_init(teql_init);
526 module_exit(teql_exit);
527
528 MODULE_LICENSE("GPL");
529 MODULE_DESCRIPTION("True (or trivial) link equalizer qdisc");
530