1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* net/sched/sch_teql.c "True" (or "trivial") link equalizer. 3 * 4 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> 5 */ 6 7 #include <linux/module.h> 8 #include <linux/types.h> 9 #include <linux/kernel.h> 10 #include <linux/slab.h> 11 #include <linux/string.h> 12 #include <linux/errno.h> 13 #include <linux/if_arp.h> 14 #include <linux/netdevice.h> 15 #include <linux/init.h> 16 #include <linux/skbuff.h> 17 #include <linux/moduleparam.h> 18 #include <net/dst.h> 19 #include <net/neighbour.h> 20 #include <net/pkt_sched.h> 21 22 /* 23 How to setup it. 24 ---------------- 25 26 After loading this module you will find a new device teqlN 27 and new qdisc with the same name. To join a slave to the equalizer 28 you should just set this qdisc on a device f.e. 29 30 # tc qdisc add dev eth0 root teql0 31 # tc qdisc add dev eth1 root teql0 32 33 That's all. Full PnP 8) 34 35 Applicability. 36 -------------- 37 38 1. Slave devices MUST be active devices, i.e., they must raise the tbusy 39 signal and generate EOI events. If you want to equalize virtual devices 40 like tunnels, use a normal eql device. 41 2. This device puts no limitations on physical slave characteristics 42 f.e. it will equalize 9600baud line and 100Mb ethernet perfectly :-) 43 Certainly, large difference in link speeds will make the resulting 44 eqalized link unusable, because of huge packet reordering. 45 I estimate an upper useful difference as ~10 times. 46 3. If the slave requires address resolution, only protocols using 47 neighbour cache (IPv4/IPv6) will work over the equalized link. 48 Other protocols are still allowed to use the slave device directly, 49 which will not break load balancing, though native slave 50 traffic will have the highest priority. */ 51 52 struct teql_master { 53 struct Qdisc_ops qops; 54 struct net_device *dev; 55 struct Qdisc *slaves; 56 struct list_head master_list; 57 unsigned long tx_bytes; 58 unsigned long tx_packets; 59 unsigned long tx_errors; 60 unsigned long tx_dropped; 61 }; 62 63 struct teql_sched_data { 64 struct Qdisc *next; 65 struct teql_master *m; 66 struct sk_buff_head q; 67 }; 68 69 #define NEXT_SLAVE(q) (((struct teql_sched_data *)qdisc_priv(q))->next) 70 71 #define FMASK (IFF_BROADCAST | IFF_POINTOPOINT) 72 73 /* "teql*" qdisc routines */ 74 75 static int 76 teql_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) 77 { 78 struct net_device *dev = qdisc_dev(sch); 79 struct teql_sched_data *q = qdisc_priv(sch); 80 81 if (q->q.qlen < READ_ONCE(dev->tx_queue_len)) { 82 __skb_queue_tail(&q->q, skb); 83 return NET_XMIT_SUCCESS; 84 } 85 86 return qdisc_drop(skb, sch, to_free); 87 } 88 89 static struct sk_buff * 90 teql_dequeue(struct Qdisc *sch) 91 { 92 struct teql_sched_data *dat = qdisc_priv(sch); 93 struct netdev_queue *dat_queue; 94 struct sk_buff *skb; 95 struct Qdisc *q; 96 97 skb = __skb_dequeue(&dat->q); 98 dat_queue = netdev_get_tx_queue(dat->m->dev, 0); 99 q = rcu_dereference_bh(dat_queue->qdisc); 100 101 if (skb == NULL) { 102 struct net_device *m = qdisc_dev(q); 103 if (m) { 104 dat->m->slaves = sch; 105 netif_wake_queue(m); 106 } 107 } else { 108 qdisc_bstats_update(sch, skb); 109 } 110 sch->q.qlen = dat->q.qlen + q->q.qlen; 111 return skb; 112 } 113 114 static struct sk_buff * 115 teql_peek(struct Qdisc *sch) 116 { 117 /* teql is meant to be used as root qdisc */ 118 return NULL; 119 } 120 121 static void 122 teql_reset(struct Qdisc *sch) 123 { 124 struct teql_sched_data *dat = qdisc_priv(sch); 125 126 skb_queue_purge(&dat->q); 127 } 128 129 static void 130 teql_destroy(struct Qdisc *sch) 131 { 132 struct Qdisc *q, *prev; 133 struct teql_sched_data *dat = qdisc_priv(sch); 134 struct teql_master *master = dat->m; 135 136 if (!master) 137 return; 138 139 prev = master->slaves; 140 if (prev) { 141 do { 142 q = NEXT_SLAVE(prev); 143 if (q == sch) { 144 NEXT_SLAVE(prev) = NEXT_SLAVE(q); 145 if (q == master->slaves) { 146 master->slaves = NEXT_SLAVE(q); 147 if (q == master->slaves) { 148 struct netdev_queue *txq; 149 spinlock_t *root_lock; 150 151 txq = netdev_get_tx_queue(master->dev, 0); 152 master->slaves = NULL; 153 154 root_lock = qdisc_root_sleeping_lock(rtnl_dereference(txq->qdisc)); 155 spin_lock_bh(root_lock); 156 qdisc_reset(rtnl_dereference(txq->qdisc)); 157 spin_unlock_bh(root_lock); 158 } 159 } 160 skb_queue_purge(&dat->q); 161 break; 162 } 163 164 } while ((prev = q) != master->slaves); 165 } 166 } 167 168 static int teql_qdisc_init(struct Qdisc *sch, struct nlattr *opt, 169 struct netlink_ext_ack *extack) 170 { 171 struct net_device *dev = qdisc_dev(sch); 172 struct teql_master *m = (struct teql_master *)sch->ops; 173 struct teql_sched_data *q = qdisc_priv(sch); 174 175 if (dev->hard_header_len > m->dev->hard_header_len) 176 return -EINVAL; 177 178 if (m->dev == dev) 179 return -ELOOP; 180 181 if (sch->parent != TC_H_ROOT) { 182 NL_SET_ERR_MSG_MOD(extack, "teql can only be used as root"); 183 return -EOPNOTSUPP; 184 } 185 186 q->m = m; 187 188 skb_queue_head_init(&q->q); 189 190 if (m->slaves) { 191 if (m->dev->flags & IFF_UP) { 192 if ((m->dev->flags & IFF_POINTOPOINT && 193 !(dev->flags & IFF_POINTOPOINT)) || 194 (m->dev->flags & IFF_BROADCAST && 195 !(dev->flags & IFF_BROADCAST)) || 196 (m->dev->flags & IFF_MULTICAST && 197 !(dev->flags & IFF_MULTICAST)) || 198 dev->mtu < m->dev->mtu) 199 return -EINVAL; 200 } else { 201 if (!(dev->flags&IFF_POINTOPOINT)) 202 m->dev->flags &= ~IFF_POINTOPOINT; 203 if (!(dev->flags&IFF_BROADCAST)) 204 m->dev->flags &= ~IFF_BROADCAST; 205 if (!(dev->flags&IFF_MULTICAST)) 206 m->dev->flags &= ~IFF_MULTICAST; 207 if (dev->mtu < m->dev->mtu) 208 m->dev->mtu = dev->mtu; 209 } 210 q->next = NEXT_SLAVE(m->slaves); 211 NEXT_SLAVE(m->slaves) = sch; 212 } else { 213 q->next = sch; 214 m->slaves = sch; 215 m->dev->mtu = dev->mtu; 216 m->dev->flags = (m->dev->flags&~FMASK)|(dev->flags&FMASK); 217 } 218 return 0; 219 } 220 221 222 static int 223 __teql_resolve(struct sk_buff *skb, struct sk_buff *skb_res, 224 struct net_device *dev, struct netdev_queue *txq, 225 struct dst_entry *dst) 226 { 227 struct neighbour *n; 228 int err = 0; 229 230 n = dst_neigh_lookup_skb(dst, skb); 231 if (!n) 232 return -ENOENT; 233 234 if (dst->dev != dev) { 235 struct neighbour *mn; 236 237 mn = __neigh_lookup_errno(n->tbl, n->primary_key, dev); 238 neigh_release(n); 239 if (IS_ERR(mn)) 240 return PTR_ERR(mn); 241 n = mn; 242 } 243 244 if (neigh_event_send(n, skb_res) == 0) { 245 int err; 246 char haddr[MAX_ADDR_LEN]; 247 248 neigh_ha_snapshot(haddr, n, dev); 249 err = dev_hard_header(skb, dev, ntohs(skb_protocol(skb, false)), 250 haddr, NULL, skb->len); 251 252 if (err < 0) 253 err = -EINVAL; 254 } else { 255 err = (skb_res == NULL) ? -EAGAIN : 1; 256 } 257 neigh_release(n); 258 return err; 259 } 260 261 static inline int teql_resolve(struct sk_buff *skb, 262 struct sk_buff *skb_res, 263 struct net_device *dev, 264 struct netdev_queue *txq) 265 { 266 struct dst_entry *dst = skb_dst(skb); 267 int res; 268 269 if (rcu_access_pointer(txq->qdisc) == &noop_qdisc) 270 return -ENODEV; 271 272 if (!dev->header_ops || !dst) 273 return 0; 274 275 rcu_read_lock(); 276 res = __teql_resolve(skb, skb_res, dev, txq, dst); 277 rcu_read_unlock(); 278 279 return res; 280 } 281 282 static netdev_tx_t teql_master_xmit(struct sk_buff *skb, struct net_device *dev) 283 { 284 struct teql_master *master = netdev_priv(dev); 285 struct Qdisc *start, *q; 286 int busy; 287 int nores; 288 int subq = skb_get_queue_mapping(skb); 289 struct sk_buff *skb_res = NULL; 290 291 start = master->slaves; 292 293 restart: 294 nores = 0; 295 busy = 0; 296 297 q = start; 298 if (!q) 299 goto drop; 300 301 do { 302 struct net_device *slave = qdisc_dev(q); 303 struct netdev_queue *slave_txq = netdev_get_tx_queue(slave, 0); 304 305 if (rcu_access_pointer(slave_txq->qdisc_sleeping) != q) 306 continue; 307 if (netif_xmit_stopped(netdev_get_tx_queue(slave, subq)) || 308 !netif_running(slave)) { 309 busy = 1; 310 continue; 311 } 312 313 switch (teql_resolve(skb, skb_res, slave, slave_txq)) { 314 case 0: 315 if (__netif_tx_trylock(slave_txq)) { 316 unsigned int length = qdisc_pkt_len(skb); 317 318 if (!netif_xmit_frozen_or_stopped(slave_txq) && 319 netdev_start_xmit(skb, slave, slave_txq, false) == 320 NETDEV_TX_OK) { 321 __netif_tx_unlock(slave_txq); 322 master->slaves = NEXT_SLAVE(q); 323 netif_wake_queue(dev); 324 master->tx_packets++; 325 master->tx_bytes += length; 326 return NETDEV_TX_OK; 327 } 328 __netif_tx_unlock(slave_txq); 329 } 330 if (netif_xmit_stopped(netdev_get_tx_queue(dev, 0))) 331 busy = 1; 332 break; 333 case 1: 334 master->slaves = NEXT_SLAVE(q); 335 return NETDEV_TX_OK; 336 default: 337 nores = 1; 338 break; 339 } 340 __skb_pull(skb, skb_network_offset(skb)); 341 } while ((q = NEXT_SLAVE(q)) != start); 342 343 if (nores && skb_res == NULL) { 344 skb_res = skb; 345 goto restart; 346 } 347 348 if (busy) { 349 netif_stop_queue(dev); 350 return NETDEV_TX_BUSY; 351 } 352 master->tx_errors++; 353 354 drop: 355 master->tx_dropped++; 356 dev_kfree_skb(skb); 357 return NETDEV_TX_OK; 358 } 359 360 static int teql_master_open(struct net_device *dev) 361 { 362 struct Qdisc *q; 363 struct teql_master *m = netdev_priv(dev); 364 int mtu = 0xFFFE; 365 unsigned int flags = IFF_NOARP | IFF_MULTICAST; 366 367 if (m->slaves == NULL) 368 return -EUNATCH; 369 370 flags = FMASK; 371 372 q = m->slaves; 373 do { 374 struct net_device *slave = qdisc_dev(q); 375 376 if (slave == NULL) 377 return -EUNATCH; 378 379 if (slave->mtu < mtu) 380 mtu = slave->mtu; 381 if (slave->hard_header_len > LL_MAX_HEADER) 382 return -EINVAL; 383 384 /* If all the slaves are BROADCAST, master is BROADCAST 385 If all the slaves are PtP, master is PtP 386 Otherwise, master is NBMA. 387 */ 388 if (!(slave->flags&IFF_POINTOPOINT)) 389 flags &= ~IFF_POINTOPOINT; 390 if (!(slave->flags&IFF_BROADCAST)) 391 flags &= ~IFF_BROADCAST; 392 if (!(slave->flags&IFF_MULTICAST)) 393 flags &= ~IFF_MULTICAST; 394 } while ((q = NEXT_SLAVE(q)) != m->slaves); 395 396 m->dev->mtu = mtu; 397 m->dev->flags = (m->dev->flags&~FMASK) | flags; 398 netif_start_queue(m->dev); 399 return 0; 400 } 401 402 static int teql_master_close(struct net_device *dev) 403 { 404 netif_stop_queue(dev); 405 return 0; 406 } 407 408 static void teql_master_stats64(struct net_device *dev, 409 struct rtnl_link_stats64 *stats) 410 { 411 struct teql_master *m = netdev_priv(dev); 412 413 stats->tx_packets = m->tx_packets; 414 stats->tx_bytes = m->tx_bytes; 415 stats->tx_errors = m->tx_errors; 416 stats->tx_dropped = m->tx_dropped; 417 } 418 419 static int teql_master_mtu(struct net_device *dev, int new_mtu) 420 { 421 struct teql_master *m = netdev_priv(dev); 422 struct Qdisc *q; 423 424 q = m->slaves; 425 if (q) { 426 do { 427 if (new_mtu > qdisc_dev(q)->mtu) 428 return -EINVAL; 429 } while ((q = NEXT_SLAVE(q)) != m->slaves); 430 } 431 432 WRITE_ONCE(dev->mtu, new_mtu); 433 return 0; 434 } 435 436 static const struct net_device_ops teql_netdev_ops = { 437 .ndo_open = teql_master_open, 438 .ndo_stop = teql_master_close, 439 .ndo_start_xmit = teql_master_xmit, 440 .ndo_get_stats64 = teql_master_stats64, 441 .ndo_change_mtu = teql_master_mtu, 442 }; 443 444 static __init void teql_master_setup(struct net_device *dev) 445 { 446 struct teql_master *master = netdev_priv(dev); 447 struct Qdisc_ops *ops = &master->qops; 448 449 master->dev = dev; 450 ops->priv_size = sizeof(struct teql_sched_data); 451 452 ops->enqueue = teql_enqueue; 453 ops->dequeue = teql_dequeue; 454 ops->peek = teql_peek; 455 ops->init = teql_qdisc_init; 456 ops->reset = teql_reset; 457 ops->destroy = teql_destroy; 458 ops->owner = THIS_MODULE; 459 460 dev->netdev_ops = &teql_netdev_ops; 461 dev->type = ARPHRD_VOID; 462 dev->mtu = 1500; 463 dev->min_mtu = 68; 464 dev->max_mtu = 65535; 465 dev->tx_queue_len = 100; 466 dev->flags = IFF_NOARP; 467 dev->hard_header_len = LL_MAX_HEADER; 468 netif_keep_dst(dev); 469 } 470 471 static LIST_HEAD(master_dev_list); 472 static int max_equalizers = 1; 473 module_param(max_equalizers, int, 0); 474 MODULE_PARM_DESC(max_equalizers, "Max number of link equalizers"); 475 476 static int __init teql_init(void) 477 { 478 int i; 479 int err = -ENODEV; 480 481 for (i = 0; i < max_equalizers; i++) { 482 struct net_device *dev; 483 struct teql_master *master; 484 485 dev = alloc_netdev(sizeof(struct teql_master), "teql%d", 486 NET_NAME_UNKNOWN, teql_master_setup); 487 if (!dev) { 488 err = -ENOMEM; 489 break; 490 } 491 492 if ((err = register_netdev(dev))) { 493 free_netdev(dev); 494 break; 495 } 496 497 master = netdev_priv(dev); 498 499 strscpy(master->qops.id, dev->name, IFNAMSIZ); 500 err = register_qdisc(&master->qops); 501 502 if (err) { 503 unregister_netdev(dev); 504 free_netdev(dev); 505 break; 506 } 507 508 list_add_tail(&master->master_list, &master_dev_list); 509 } 510 return i ? 0 : err; 511 } 512 513 static void __exit teql_exit(void) 514 { 515 struct teql_master *master, *nxt; 516 517 list_for_each_entry_safe(master, nxt, &master_dev_list, master_list) { 518 519 list_del(&master->master_list); 520 521 unregister_qdisc(&master->qops); 522 unregister_netdev(master->dev); 523 free_netdev(master->dev); 524 } 525 } 526 527 module_init(teql_init); 528 module_exit(teql_exit); 529 530 MODULE_LICENSE("GPL"); 531 MODULE_DESCRIPTION("True (or trivial) link equalizer qdisc"); 532