1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* drivers/net/ifb.c: 3 4 The purpose of this driver is to provide a device that allows 5 for sharing of resources: 6 7 1) qdiscs/policies that are per device as opposed to system wide. 8 ifb allows for a device which can be redirected to thus providing 9 an impression of sharing. 10 11 2) Allows for queueing incoming traffic for shaping instead of 12 dropping. 13 14 The original concept is based on what is known as the IMQ 15 driver initially written by Martin Devera, later rewritten 16 by Patrick McHardy and then maintained by Andre Correa. 17 18 You need the tc action mirror or redirect to feed this device 19 packets. 20 21 22 Authors: Jamal Hadi Salim (2005) 23 24 */ 25 26 27 #include <linux/module.h> 28 #include <linux/kernel.h> 29 #include <linux/netdevice.h> 30 #include <linux/ethtool.h> 31 #include <linux/etherdevice.h> 32 #include <linux/init.h> 33 #include <linux/interrupt.h> 34 #include <linux/moduleparam.h> 35 #include <linux/netfilter_netdev.h> 36 #include <net/pkt_sched.h> 37 #include <net/net_namespace.h> 38 39 #define TX_Q_LIMIT 32 40 41 struct ifb_q_stats { 42 u64 packets; 43 u64 bytes; 44 struct u64_stats_sync sync; 45 }; 46 47 struct ifb_q_private { 48 struct net_device *dev; 49 struct tasklet_struct ifb_tasklet; 50 int tasklet_pending; 51 int txqnum; 52 struct sk_buff_head rq; 53 struct sk_buff_head tq; 54 struct ifb_q_stats rx_stats; 55 struct ifb_q_stats tx_stats; 56 } ____cacheline_aligned_in_smp; 57 58 struct ifb_dev_private { 59 struct ifb_q_private *tx_private; 60 }; 61 62 /* For ethtools stats. */ 63 struct ifb_q_stats_desc { 64 char desc[ETH_GSTRING_LEN]; 65 size_t offset; 66 }; 67 68 #define IFB_Q_STAT(m) offsetof(struct ifb_q_stats, m) 69 70 static const struct ifb_q_stats_desc ifb_q_stats_desc[] = { 71 { "packets", IFB_Q_STAT(packets) }, 72 { "bytes", IFB_Q_STAT(bytes) }, 73 }; 74 75 #define IFB_Q_STATS_LEN ARRAY_SIZE(ifb_q_stats_desc) 76 77 static netdev_tx_t ifb_xmit(struct sk_buff *skb, struct net_device *dev); 78 static int ifb_open(struct net_device *dev); 79 static int ifb_close(struct net_device *dev); 80 81 static void ifb_update_q_stats(struct ifb_q_stats *stats, int len) 82 { 83 u64_stats_update_begin(&stats->sync); 84 stats->packets++; 85 stats->bytes += len; 86 u64_stats_update_end(&stats->sync); 87 } 88 89 static void ifb_ri_tasklet(struct tasklet_struct *t) 90 { 91 struct ifb_q_private *txp = from_tasklet(txp, t, ifb_tasklet); 92 struct netdev_queue *txq; 93 struct sk_buff *skb; 94 95 txq = netdev_get_tx_queue(txp->dev, txp->txqnum); 96 skb = skb_peek(&txp->tq); 97 if (!skb) { 98 if (!__netif_tx_trylock(txq)) 99 goto resched; 100 skb_queue_splice_tail_init(&txp->rq, &txp->tq); 101 __netif_tx_unlock(txq); 102 } 103 104 while ((skb = __skb_dequeue(&txp->tq)) != NULL) { 105 /* Skip tc and netfilter to prevent redirection loop. */ 106 skb->redirected = 0; 107 #ifdef CONFIG_NET_CLS_ACT 108 skb->tc_skip_classify = 1; 109 #endif 110 nf_skip_egress(skb, true); 111 112 ifb_update_q_stats(&txp->tx_stats, skb->len); 113 114 rcu_read_lock(); 115 skb->dev = dev_get_by_index_rcu(dev_net(txp->dev), skb->skb_iif); 116 if (!skb->dev) { 117 rcu_read_unlock(); 118 dev_kfree_skb(skb); 119 txp->dev->stats.tx_dropped++; 120 if (skb_queue_len(&txp->tq) != 0) 121 goto resched; 122 break; 123 } 124 rcu_read_unlock(); 125 skb->skb_iif = txp->dev->ifindex; 126 127 if (!skb->from_ingress) { 128 dev_queue_xmit(skb); 129 } else { 130 skb_pull_rcsum(skb, skb->mac_len); 131 netif_receive_skb(skb); 132 } 133 } 134 135 if (__netif_tx_trylock(txq)) { 136 skb = skb_peek(&txp->rq); 137 if (!skb) { 138 txp->tasklet_pending = 0; 139 if (netif_tx_queue_stopped(txq)) 140 netif_tx_wake_queue(txq); 141 } else { 142 __netif_tx_unlock(txq); 143 goto resched; 144 } 145 __netif_tx_unlock(txq); 146 } else { 147 resched: 148 txp->tasklet_pending = 1; 149 tasklet_schedule(&txp->ifb_tasklet); 150 } 151 152 } 153 154 static void ifb_stats64(struct net_device *dev, 155 struct rtnl_link_stats64 *stats) 156 { 157 struct ifb_dev_private *dp = netdev_priv(dev); 158 struct ifb_q_private *txp = dp->tx_private; 159 unsigned int start; 160 u64 packets, bytes; 161 int i; 162 163 for (i = 0; i < dev->num_tx_queues; i++,txp++) { 164 do { 165 start = u64_stats_fetch_begin(&txp->rx_stats.sync); 166 packets = txp->rx_stats.packets; 167 bytes = txp->rx_stats.bytes; 168 } while (u64_stats_fetch_retry(&txp->rx_stats.sync, start)); 169 stats->rx_packets += packets; 170 stats->rx_bytes += bytes; 171 172 do { 173 start = u64_stats_fetch_begin(&txp->tx_stats.sync); 174 packets = txp->tx_stats.packets; 175 bytes = txp->tx_stats.bytes; 176 } while (u64_stats_fetch_retry(&txp->tx_stats.sync, start)); 177 stats->tx_packets += packets; 178 stats->tx_bytes += bytes; 179 } 180 stats->rx_dropped = dev->stats.rx_dropped; 181 stats->tx_dropped = dev->stats.tx_dropped; 182 } 183 184 static int ifb_dev_init(struct net_device *dev) 185 { 186 struct ifb_dev_private *dp = netdev_priv(dev); 187 struct ifb_q_private *txp; 188 int i; 189 190 txp = kcalloc(dev->num_tx_queues, sizeof(*txp), GFP_KERNEL); 191 if (!txp) 192 return -ENOMEM; 193 dp->tx_private = txp; 194 for (i = 0; i < dev->num_tx_queues; i++,txp++) { 195 txp->txqnum = i; 196 txp->dev = dev; 197 __skb_queue_head_init(&txp->rq); 198 __skb_queue_head_init(&txp->tq); 199 u64_stats_init(&txp->rx_stats.sync); 200 u64_stats_init(&txp->tx_stats.sync); 201 tasklet_setup(&txp->ifb_tasklet, ifb_ri_tasklet); 202 netif_tx_start_queue(netdev_get_tx_queue(dev, i)); 203 } 204 return 0; 205 } 206 207 static void ifb_get_strings(struct net_device *dev, u32 stringset, u8 *buf) 208 { 209 u8 *p = buf; 210 int i, j; 211 212 switch (stringset) { 213 case ETH_SS_STATS: 214 for (i = 0; i < dev->real_num_rx_queues; i++) 215 for (j = 0; j < IFB_Q_STATS_LEN; j++) 216 ethtool_sprintf(&p, "rx_queue_%u_%.18s", 217 i, ifb_q_stats_desc[j].desc); 218 219 for (i = 0; i < dev->real_num_tx_queues; i++) 220 for (j = 0; j < IFB_Q_STATS_LEN; j++) 221 ethtool_sprintf(&p, "tx_queue_%u_%.18s", 222 i, ifb_q_stats_desc[j].desc); 223 224 break; 225 } 226 } 227 228 static int ifb_get_sset_count(struct net_device *dev, int sset) 229 { 230 switch (sset) { 231 case ETH_SS_STATS: 232 return IFB_Q_STATS_LEN * (dev->real_num_rx_queues + 233 dev->real_num_tx_queues); 234 default: 235 return -EOPNOTSUPP; 236 } 237 } 238 239 static void ifb_fill_stats_data(u64 **data, 240 struct ifb_q_stats *q_stats) 241 { 242 void *stats_base = (void *)q_stats; 243 unsigned int start; 244 size_t offset; 245 int j; 246 247 do { 248 start = u64_stats_fetch_begin(&q_stats->sync); 249 for (j = 0; j < IFB_Q_STATS_LEN; j++) { 250 offset = ifb_q_stats_desc[j].offset; 251 (*data)[j] = *(u64 *)(stats_base + offset); 252 } 253 } while (u64_stats_fetch_retry(&q_stats->sync, start)); 254 255 *data += IFB_Q_STATS_LEN; 256 } 257 258 static void ifb_get_ethtool_stats(struct net_device *dev, 259 struct ethtool_stats *stats, u64 *data) 260 { 261 struct ifb_dev_private *dp = netdev_priv(dev); 262 struct ifb_q_private *txp; 263 int i; 264 265 for (i = 0; i < dev->real_num_rx_queues; i++) { 266 txp = dp->tx_private + i; 267 ifb_fill_stats_data(&data, &txp->rx_stats); 268 } 269 270 for (i = 0; i < dev->real_num_tx_queues; i++) { 271 txp = dp->tx_private + i; 272 ifb_fill_stats_data(&data, &txp->tx_stats); 273 } 274 } 275 276 static const struct net_device_ops ifb_netdev_ops = { 277 .ndo_open = ifb_open, 278 .ndo_stop = ifb_close, 279 .ndo_get_stats64 = ifb_stats64, 280 .ndo_start_xmit = ifb_xmit, 281 .ndo_validate_addr = eth_validate_addr, 282 .ndo_init = ifb_dev_init, 283 }; 284 285 static const struct ethtool_ops ifb_ethtool_ops = { 286 .get_strings = ifb_get_strings, 287 .get_sset_count = ifb_get_sset_count, 288 .get_ethtool_stats = ifb_get_ethtool_stats, 289 }; 290 291 #define IFB_FEATURES (NETIF_F_HW_CSUM | NETIF_F_SG | NETIF_F_FRAGLIST | \ 292 NETIF_F_GSO_SOFTWARE | NETIF_F_GSO_ENCAP_ALL | \ 293 NETIF_F_HIGHDMA | NETIF_F_HW_VLAN_CTAG_TX | \ 294 NETIF_F_HW_VLAN_STAG_TX) 295 296 static void ifb_dev_free(struct net_device *dev) 297 { 298 struct ifb_dev_private *dp = netdev_priv(dev); 299 struct ifb_q_private *txp = dp->tx_private; 300 int i; 301 302 for (i = 0; i < dev->num_tx_queues; i++,txp++) { 303 tasklet_kill(&txp->ifb_tasklet); 304 __skb_queue_purge(&txp->rq); 305 __skb_queue_purge(&txp->tq); 306 } 307 kfree(dp->tx_private); 308 } 309 310 static void ifb_setup(struct net_device *dev) 311 { 312 /* Initialize the device structure. */ 313 dev->netdev_ops = &ifb_netdev_ops; 314 dev->ethtool_ops = &ifb_ethtool_ops; 315 316 /* Fill in device structure with ethernet-generic values. */ 317 ether_setup(dev); 318 dev->tx_queue_len = TX_Q_LIMIT; 319 320 dev->features |= IFB_FEATURES; 321 dev->hw_features |= dev->features; 322 dev->hw_enc_features |= dev->features; 323 dev->vlan_features |= IFB_FEATURES & ~(NETIF_F_HW_VLAN_CTAG_TX | 324 NETIF_F_HW_VLAN_STAG_TX); 325 326 dev->flags |= IFF_NOARP; 327 dev->flags &= ~IFF_MULTICAST; 328 dev->priv_flags &= ~IFF_TX_SKB_SHARING; 329 netif_keep_dst(dev); 330 eth_hw_addr_random(dev); 331 dev->needs_free_netdev = true; 332 dev->priv_destructor = ifb_dev_free; 333 334 dev->min_mtu = 0; 335 dev->max_mtu = 0; 336 netif_set_tso_max_size(dev, GSO_MAX_SIZE); 337 } 338 339 static netdev_tx_t ifb_xmit(struct sk_buff *skb, struct net_device *dev) 340 { 341 struct ifb_dev_private *dp = netdev_priv(dev); 342 struct ifb_q_private *txp = dp->tx_private + skb_get_queue_mapping(skb); 343 344 ifb_update_q_stats(&txp->rx_stats, skb->len); 345 346 if (!skb->redirected || !skb->skb_iif) { 347 dev_kfree_skb(skb); 348 dev->stats.rx_dropped++; 349 return NETDEV_TX_OK; 350 } 351 352 if (skb_queue_len(&txp->rq) >= dev->tx_queue_len) 353 netif_tx_stop_queue(netdev_get_tx_queue(dev, txp->txqnum)); 354 355 __skb_queue_tail(&txp->rq, skb); 356 if (!txp->tasklet_pending) { 357 txp->tasklet_pending = 1; 358 tasklet_schedule(&txp->ifb_tasklet); 359 } 360 361 return NETDEV_TX_OK; 362 } 363 364 static int ifb_close(struct net_device *dev) 365 { 366 netif_tx_stop_all_queues(dev); 367 return 0; 368 } 369 370 static int ifb_open(struct net_device *dev) 371 { 372 netif_tx_start_all_queues(dev); 373 return 0; 374 } 375 376 static int ifb_validate(struct nlattr *tb[], struct nlattr *data[], 377 struct netlink_ext_ack *extack) 378 { 379 if (tb[IFLA_ADDRESS]) { 380 if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN) 381 return -EINVAL; 382 if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS]))) 383 return -EADDRNOTAVAIL; 384 } 385 return 0; 386 } 387 388 static struct rtnl_link_ops ifb_link_ops __read_mostly = { 389 .kind = "ifb", 390 .priv_size = sizeof(struct ifb_dev_private), 391 .setup = ifb_setup, 392 .validate = ifb_validate, 393 }; 394 395 /* Number of ifb devices to be set up by this module. 396 * Note that these legacy devices have one queue. 397 * Prefer something like : ip link add ifb10 numtxqueues 8 type ifb 398 */ 399 static int numifbs = 2; 400 module_param(numifbs, int, 0); 401 MODULE_PARM_DESC(numifbs, "Number of ifb devices"); 402 403 static int __init ifb_init_one(int index) 404 { 405 struct net_device *dev_ifb; 406 int err; 407 408 dev_ifb = alloc_netdev(sizeof(struct ifb_dev_private), "ifb%d", 409 NET_NAME_UNKNOWN, ifb_setup); 410 411 if (!dev_ifb) 412 return -ENOMEM; 413 414 dev_ifb->rtnl_link_ops = &ifb_link_ops; 415 err = register_netdevice(dev_ifb); 416 if (err < 0) 417 goto err; 418 419 return 0; 420 421 err: 422 free_netdev(dev_ifb); 423 return err; 424 } 425 426 static int __init ifb_init_module(void) 427 { 428 int i, err; 429 430 err = rtnl_link_register(&ifb_link_ops); 431 if (err < 0) 432 return err; 433 434 rtnl_net_lock(&init_net); 435 436 for (i = 0; i < numifbs && !err; i++) { 437 err = ifb_init_one(i); 438 cond_resched(); 439 } 440 441 rtnl_net_unlock(&init_net); 442 443 if (err) 444 rtnl_link_unregister(&ifb_link_ops); 445 446 return err; 447 } 448 449 static void __exit ifb_cleanup_module(void) 450 { 451 rtnl_link_unregister(&ifb_link_ops); 452 } 453 454 module_init(ifb_init_module); 455 module_exit(ifb_cleanup_module); 456 MODULE_LICENSE("GPL"); 457 MODULE_DESCRIPTION("Intermediate Functional Block (ifb) netdevice driver for sharing of resources and ingress packet queuing"); 458 MODULE_AUTHOR("Jamal Hadi Salim"); 459 MODULE_ALIAS_RTNL_LINK("ifb"); 460