1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* net/sched/sch_teql.c "True" (or "trivial") link equalizer. 3 * 4 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> 5 */ 6 7 #include <linux/module.h> 8 #include <linux/types.h> 9 #include <linux/kernel.h> 10 #include <linux/slab.h> 11 #include <linux/string.h> 12 #include <linux/errno.h> 13 #include <linux/if_arp.h> 14 #include <linux/netdevice.h> 15 #include <linux/init.h> 16 #include <linux/skbuff.h> 17 #include <linux/moduleparam.h> 18 #include <net/dst.h> 19 #include <net/neighbour.h> 20 #include <net/pkt_sched.h> 21 22 /* 23 How to setup it. 24 ---------------- 25 26 After loading this module you will find a new device teqlN 27 and new qdisc with the same name. To join a slave to the equalizer 28 you should just set this qdisc on a device f.e. 29 30 # tc qdisc add dev eth0 root teql0 31 # tc qdisc add dev eth1 root teql0 32 33 That's all. Full PnP 8) 34 35 Applicability. 36 -------------- 37 38 1. Slave devices MUST be active devices, i.e., they must raise the tbusy 39 signal and generate EOI events. If you want to equalize virtual devices 40 like tunnels, use a normal eql device. 41 2. This device puts no limitations on physical slave characteristics 42 f.e. it will equalize 9600baud line and 100Mb ethernet perfectly :-) 43 Certainly, large difference in link speeds will make the resulting 44 eqalized link unusable, because of huge packet reordering. 45 I estimate an upper useful difference as ~10 times. 46 3. If the slave requires address resolution, only protocols using 47 neighbour cache (IPv4/IPv6) will work over the equalized link. 48 Other protocols are still allowed to use the slave device directly, 49 which will not break load balancing, though native slave 50 traffic will have the highest priority. */ 51 52 struct teql_master { 53 struct Qdisc_ops qops; 54 struct net_device *dev; 55 struct Qdisc *slaves; 56 struct list_head master_list; 57 unsigned long tx_bytes; 58 unsigned long tx_packets; 59 unsigned long tx_errors; 60 unsigned long tx_dropped; 61 }; 62 63 struct teql_sched_data { 64 struct Qdisc *next; 65 struct teql_master *m; 66 struct sk_buff_head q; 67 }; 68 69 #define NEXT_SLAVE(q) (((struct teql_sched_data *)qdisc_priv(q))->next) 70 71 #define FMASK (IFF_BROADCAST | IFF_POINTOPOINT) 72 73 /* "teql*" qdisc routines */ 74 75 static int 76 teql_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) 77 { 78 struct net_device *dev = qdisc_dev(sch); 79 struct teql_sched_data *q = qdisc_priv(sch); 80 81 if (q->q.qlen < READ_ONCE(dev->tx_queue_len)) { 82 __skb_queue_tail(&q->q, skb); 83 return NET_XMIT_SUCCESS; 84 } 85 86 return qdisc_drop(skb, sch, to_free); 87 } 88 89 static struct sk_buff * 90 teql_dequeue(struct Qdisc *sch) 91 { 92 struct teql_sched_data *dat = qdisc_priv(sch); 93 struct netdev_queue *dat_queue; 94 struct sk_buff *skb; 95 struct Qdisc *q; 96 97 skb = __skb_dequeue(&dat->q); 98 dat_queue = netdev_get_tx_queue(dat->m->dev, 0); 99 q = rcu_dereference_bh(dat_queue->qdisc); 100 101 if (skb == NULL) { 102 struct net_device *m = qdisc_dev(q); 103 if (m) { 104 dat->m->slaves = sch; 105 netif_wake_queue(m); 106 } 107 } else { 108 qdisc_bstats_update(sch, skb); 109 } 110 sch->q.qlen = dat->q.qlen + q->q.qlen; 111 return skb; 112 } 113 114 static struct sk_buff * 115 teql_peek(struct Qdisc *sch) 116 { 117 /* teql is meant to be used as root qdisc */ 118 return NULL; 119 } 120 121 static void 122 teql_reset(struct Qdisc *sch) 123 { 124 struct teql_sched_data *dat = qdisc_priv(sch); 125 126 skb_queue_purge(&dat->q); 127 } 128 129 static void 130 teql_destroy(struct Qdisc *sch) 131 { 132 struct Qdisc *q, *prev; 133 struct teql_sched_data *dat = qdisc_priv(sch); 134 struct teql_master *master = dat->m; 135 136 if (!master) 137 return; 138 139 prev = master->slaves; 140 if (prev) { 141 do { 142 q = NEXT_SLAVE(prev); 143 if (q == sch) { 144 NEXT_SLAVE(prev) = NEXT_SLAVE(q); 145 if (q == master->slaves) { 146 master->slaves = NEXT_SLAVE(q); 147 if (q == master->slaves) { 148 struct netdev_queue *txq; 149 150 txq = netdev_get_tx_queue(master->dev, 0); 151 master->slaves = NULL; 152 153 dev_reset_queue(master->dev, 154 txq, NULL); 155 } 156 } 157 skb_queue_purge(&dat->q); 158 break; 159 } 160 161 } while ((prev = q) != master->slaves); 162 } 163 } 164 165 static int teql_qdisc_init(struct Qdisc *sch, struct nlattr *opt, 166 struct netlink_ext_ack *extack) 167 { 168 struct net_device *dev = qdisc_dev(sch); 169 struct teql_master *m = (struct teql_master *)sch->ops; 170 struct teql_sched_data *q = qdisc_priv(sch); 171 172 if (dev->hard_header_len > m->dev->hard_header_len) 173 return -EINVAL; 174 175 if (m->dev == dev) 176 return -ELOOP; 177 178 if (sch->parent != TC_H_ROOT) { 179 NL_SET_ERR_MSG_MOD(extack, "teql can only be used as root"); 180 return -EOPNOTSUPP; 181 } 182 183 q->m = m; 184 185 skb_queue_head_init(&q->q); 186 187 if (m->slaves) { 188 if (m->dev->flags & IFF_UP) { 189 if ((m->dev->flags & IFF_POINTOPOINT && 190 !(dev->flags & IFF_POINTOPOINT)) || 191 (m->dev->flags & IFF_BROADCAST && 192 !(dev->flags & IFF_BROADCAST)) || 193 (m->dev->flags & IFF_MULTICAST && 194 !(dev->flags & IFF_MULTICAST)) || 195 dev->mtu < m->dev->mtu) 196 return -EINVAL; 197 } else { 198 if (!(dev->flags&IFF_POINTOPOINT)) 199 m->dev->flags &= ~IFF_POINTOPOINT; 200 if (!(dev->flags&IFF_BROADCAST)) 201 m->dev->flags &= ~IFF_BROADCAST; 202 if (!(dev->flags&IFF_MULTICAST)) 203 m->dev->flags &= ~IFF_MULTICAST; 204 if (dev->mtu < m->dev->mtu) 205 m->dev->mtu = dev->mtu; 206 } 207 q->next = NEXT_SLAVE(m->slaves); 208 NEXT_SLAVE(m->slaves) = sch; 209 } else { 210 q->next = sch; 211 m->slaves = sch; 212 m->dev->mtu = dev->mtu; 213 m->dev->flags = (m->dev->flags&~FMASK)|(dev->flags&FMASK); 214 } 215 return 0; 216 } 217 218 219 static int 220 __teql_resolve(struct sk_buff *skb, struct sk_buff *skb_res, 221 struct net_device *dev, struct netdev_queue *txq, 222 struct dst_entry *dst) 223 { 224 struct neighbour *n; 225 int err = 0; 226 227 n = dst_neigh_lookup_skb(dst, skb); 228 if (!n) 229 return -ENOENT; 230 231 if (dst->dev != dev) { 232 struct neighbour *mn; 233 234 mn = __neigh_lookup_errno(n->tbl, n->primary_key, dev); 235 neigh_release(n); 236 if (IS_ERR(mn)) 237 return PTR_ERR(mn); 238 n = mn; 239 } 240 241 if (neigh_event_send(n, skb_res) == 0) { 242 int err; 243 char haddr[MAX_ADDR_LEN]; 244 245 neigh_ha_snapshot(haddr, n, dev); 246 err = dev_hard_header(skb, dev, ntohs(skb_protocol(skb, false)), 247 haddr, NULL, skb->len); 248 249 if (err < 0) 250 err = -EINVAL; 251 } else { 252 err = (skb_res == NULL) ? -EAGAIN : 1; 253 } 254 neigh_release(n); 255 return err; 256 } 257 258 static inline int teql_resolve(struct sk_buff *skb, 259 struct sk_buff *skb_res, 260 struct net_device *dev, 261 struct netdev_queue *txq) 262 { 263 struct dst_entry *dst = skb_dst(skb); 264 int res; 265 266 if (rcu_access_pointer(txq->qdisc) == &noop_qdisc) 267 return -ENODEV; 268 269 if (!dev->header_ops || !dst) 270 return 0; 271 272 rcu_read_lock(); 273 res = __teql_resolve(skb, skb_res, dev, txq, dst); 274 rcu_read_unlock(); 275 276 return res; 277 } 278 279 static netdev_tx_t teql_master_xmit(struct sk_buff *skb, struct net_device *dev) 280 { 281 struct teql_master *master = netdev_priv(dev); 282 struct Qdisc *start, *q; 283 int busy; 284 int nores; 285 int subq = skb_get_queue_mapping(skb); 286 struct sk_buff *skb_res = NULL; 287 288 start = master->slaves; 289 290 restart: 291 nores = 0; 292 busy = 0; 293 294 q = start; 295 if (!q) 296 goto drop; 297 298 do { 299 struct net_device *slave = qdisc_dev(q); 300 struct netdev_queue *slave_txq = netdev_get_tx_queue(slave, 0); 301 302 if (rcu_access_pointer(slave_txq->qdisc_sleeping) != q) 303 continue; 304 if (netif_xmit_stopped(netdev_get_tx_queue(slave, subq)) || 305 !netif_running(slave)) { 306 busy = 1; 307 continue; 308 } 309 310 switch (teql_resolve(skb, skb_res, slave, slave_txq)) { 311 case 0: 312 if (__netif_tx_trylock(slave_txq)) { 313 unsigned int length = qdisc_pkt_len(skb); 314 315 skb->dev = slave; 316 if (!netif_xmit_frozen_or_stopped(slave_txq) && 317 netdev_start_xmit(skb, slave, slave_txq, false) == 318 NETDEV_TX_OK) { 319 __netif_tx_unlock(slave_txq); 320 master->slaves = NEXT_SLAVE(q); 321 netif_wake_queue(dev); 322 master->tx_packets++; 323 master->tx_bytes += length; 324 return NETDEV_TX_OK; 325 } 326 __netif_tx_unlock(slave_txq); 327 } 328 if (netif_xmit_stopped(netdev_get_tx_queue(dev, 0))) 329 busy = 1; 330 break; 331 case 1: 332 master->slaves = NEXT_SLAVE(q); 333 return NETDEV_TX_OK; 334 default: 335 nores = 1; 336 break; 337 } 338 __skb_pull(skb, skb_network_offset(skb)); 339 } while ((q = NEXT_SLAVE(q)) != start); 340 341 if (nores && skb_res == NULL) { 342 skb_res = skb; 343 goto restart; 344 } 345 346 if (busy) { 347 netif_stop_queue(dev); 348 return NETDEV_TX_BUSY; 349 } 350 master->tx_errors++; 351 352 drop: 353 master->tx_dropped++; 354 dev_kfree_skb(skb); 355 return NETDEV_TX_OK; 356 } 357 358 static int teql_master_open(struct net_device *dev) 359 { 360 struct Qdisc *q; 361 struct teql_master *m = netdev_priv(dev); 362 int mtu = 0xFFFE; 363 unsigned int flags = IFF_NOARP | IFF_MULTICAST; 364 365 if (m->slaves == NULL) 366 return -EUNATCH; 367 368 flags = FMASK; 369 370 q = m->slaves; 371 do { 372 struct net_device *slave = qdisc_dev(q); 373 374 if (slave == NULL) 375 return -EUNATCH; 376 377 if (slave->mtu < mtu) 378 mtu = slave->mtu; 379 if (slave->hard_header_len > LL_MAX_HEADER) 380 return -EINVAL; 381 382 /* If all the slaves are BROADCAST, master is BROADCAST 383 If all the slaves are PtP, master is PtP 384 Otherwise, master is NBMA. 385 */ 386 if (!(slave->flags&IFF_POINTOPOINT)) 387 flags &= ~IFF_POINTOPOINT; 388 if (!(slave->flags&IFF_BROADCAST)) 389 flags &= ~IFF_BROADCAST; 390 if (!(slave->flags&IFF_MULTICAST)) 391 flags &= ~IFF_MULTICAST; 392 } while ((q = NEXT_SLAVE(q)) != m->slaves); 393 394 m->dev->mtu = mtu; 395 m->dev->flags = (m->dev->flags&~FMASK) | flags; 396 netif_start_queue(m->dev); 397 return 0; 398 } 399 400 static int teql_master_close(struct net_device *dev) 401 { 402 netif_stop_queue(dev); 403 return 0; 404 } 405 406 static void teql_master_stats64(struct net_device *dev, 407 struct rtnl_link_stats64 *stats) 408 { 409 struct teql_master *m = netdev_priv(dev); 410 411 stats->tx_packets = m->tx_packets; 412 stats->tx_bytes = m->tx_bytes; 413 stats->tx_errors = m->tx_errors; 414 stats->tx_dropped = m->tx_dropped; 415 } 416 417 static int teql_master_mtu(struct net_device *dev, int new_mtu) 418 { 419 struct teql_master *m = netdev_priv(dev); 420 struct Qdisc *q; 421 422 q = m->slaves; 423 if (q) { 424 do { 425 if (new_mtu > qdisc_dev(q)->mtu) 426 return -EINVAL; 427 } while ((q = NEXT_SLAVE(q)) != m->slaves); 428 } 429 430 WRITE_ONCE(dev->mtu, new_mtu); 431 return 0; 432 } 433 434 static const struct net_device_ops teql_netdev_ops = { 435 .ndo_open = teql_master_open, 436 .ndo_stop = teql_master_close, 437 .ndo_start_xmit = teql_master_xmit, 438 .ndo_get_stats64 = teql_master_stats64, 439 .ndo_change_mtu = teql_master_mtu, 440 }; 441 442 static __init void teql_master_setup(struct net_device *dev) 443 { 444 struct teql_master *master = netdev_priv(dev); 445 struct Qdisc_ops *ops = &master->qops; 446 447 master->dev = dev; 448 ops->priv_size = sizeof(struct teql_sched_data); 449 450 ops->enqueue = teql_enqueue; 451 ops->dequeue = teql_dequeue; 452 ops->peek = teql_peek; 453 ops->init = teql_qdisc_init; 454 ops->reset = teql_reset; 455 ops->destroy = teql_destroy; 456 ops->owner = THIS_MODULE; 457 458 dev->netdev_ops = &teql_netdev_ops; 459 dev->type = ARPHRD_VOID; 460 dev->mtu = 1500; 461 dev->min_mtu = 68; 462 dev->max_mtu = 65535; 463 dev->tx_queue_len = 100; 464 dev->flags = IFF_NOARP; 465 dev->hard_header_len = LL_MAX_HEADER; 466 netif_keep_dst(dev); 467 } 468 469 static LIST_HEAD(master_dev_list); 470 static int max_equalizers = 1; 471 module_param(max_equalizers, int, 0); 472 MODULE_PARM_DESC(max_equalizers, "Max number of link equalizers"); 473 474 static int __init teql_init(void) 475 { 476 int i; 477 int err = -ENODEV; 478 479 for (i = 0; i < max_equalizers; i++) { 480 struct net_device *dev; 481 struct teql_master *master; 482 483 dev = alloc_netdev(sizeof(struct teql_master), "teql%d", 484 NET_NAME_UNKNOWN, teql_master_setup); 485 if (!dev) { 486 err = -ENOMEM; 487 break; 488 } 489 490 if ((err = register_netdev(dev))) { 491 free_netdev(dev); 492 break; 493 } 494 495 master = netdev_priv(dev); 496 497 strscpy(master->qops.id, dev->name, IFNAMSIZ); 498 err = register_qdisc(&master->qops); 499 500 if (err) { 501 unregister_netdev(dev); 502 free_netdev(dev); 503 break; 504 } 505 506 list_add_tail(&master->master_list, &master_dev_list); 507 } 508 return i ? 0 : err; 509 } 510 511 static void __exit teql_exit(void) 512 { 513 struct teql_master *master, *nxt; 514 515 list_for_each_entry_safe(master, nxt, &master_dev_list, master_list) { 516 517 list_del(&master->master_list); 518 519 unregister_qdisc(&master->qops); 520 unregister_netdev(master->dev); 521 free_netdev(master->dev); 522 } 523 } 524 525 module_init(teql_init); 526 module_exit(teql_exit); 527 528 MODULE_LICENSE("GPL"); 529 MODULE_DESCRIPTION("True (or trivial) link equalizer qdisc"); 530