1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /* drivers/net/ifb.c:
3
4 The purpose of this driver is to provide a device that allows
5 for sharing of resources:
6
7 1) qdiscs/policies that are per device as opposed to system wide.
8 ifb allows for a device which can be redirected to thus providing
9 an impression of sharing.
10
11 2) Allows for queueing incoming traffic for shaping instead of
12 dropping.
13
14 The original concept is based on what is known as the IMQ
15 driver initially written by Martin Devera, later rewritten
16 by Patrick McHardy and then maintained by Andre Correa.
17
18 You need the tc action mirror or redirect to feed this device
19 packets.
20
21
22 Authors: Jamal Hadi Salim (2005)
23
24 */
25
26
27 #include <linux/module.h>
28 #include <linux/kernel.h>
29 #include <linux/netdevice.h>
30 #include <linux/ethtool.h>
31 #include <linux/etherdevice.h>
32 #include <linux/init.h>
33 #include <linux/interrupt.h>
34 #include <linux/moduleparam.h>
35 #include <linux/netfilter_netdev.h>
36 #include <net/pkt_sched.h>
37 #include <net/net_namespace.h>
38
39 #define TX_Q_LIMIT 32
40
41 struct ifb_q_stats {
42 u64_stats_t packets;
43 u64_stats_t bytes;
44 struct u64_stats_sync sync;
45 };
46
47 struct ifb_q_private {
48 struct net_device *dev;
49 struct tasklet_struct ifb_tasklet;
50 int tasklet_pending;
51 int txqnum;
52 struct sk_buff_head rq;
53 struct sk_buff_head tq;
54 struct ifb_q_stats rx_stats;
55 struct ifb_q_stats tx_stats;
56 } ____cacheline_aligned_in_smp;
57
58 struct ifb_dev_private {
59 struct ifb_q_private *tx_private;
60 };
61
62 /* For ethtools stats. */
63 struct ifb_q_stats_desc {
64 char desc[ETH_GSTRING_LEN];
65 size_t offset;
66 };
67
68 #define IFB_Q_STAT(m) offsetof(struct ifb_q_stats, m)
69
70 static const struct ifb_q_stats_desc ifb_q_stats_desc[] = {
71 { "packets", IFB_Q_STAT(packets) },
72 { "bytes", IFB_Q_STAT(bytes) },
73 };
74
75 #define IFB_Q_STATS_LEN ARRAY_SIZE(ifb_q_stats_desc)
76
77 static netdev_tx_t ifb_xmit(struct sk_buff *skb, struct net_device *dev);
78 static int ifb_open(struct net_device *dev);
79 static int ifb_close(struct net_device *dev);
80
ifb_update_q_stats(struct ifb_q_stats * stats,int len)81 static void ifb_update_q_stats(struct ifb_q_stats *stats, int len)
82 {
83 u64_stats_update_begin(&stats->sync);
84 u64_stats_inc(&stats->packets);
85 u64_stats_add(&stats->bytes, len);
86 u64_stats_update_end(&stats->sync);
87 }
88
ifb_ri_tasklet(struct tasklet_struct * t)89 static void ifb_ri_tasklet(struct tasklet_struct *t)
90 {
91 struct ifb_q_private *txp = from_tasklet(txp, t, ifb_tasklet);
92 struct netdev_queue *txq;
93 struct sk_buff *skb;
94
95 txq = netdev_get_tx_queue(txp->dev, txp->txqnum);
96 skb = skb_peek(&txp->tq);
97 if (!skb) {
98 if (!__netif_tx_trylock(txq))
99 goto resched;
100 skb_queue_splice_tail_init(&txp->rq, &txp->tq);
101 __netif_tx_unlock(txq);
102 }
103
104 while ((skb = __skb_dequeue(&txp->tq)) != NULL) {
105 /* Skip tc and netfilter to prevent redirection loop. */
106 skb->redirected = 0;
107 #ifdef CONFIG_NET_CLS_ACT
108 skb->tc_skip_classify = 1;
109 #endif
110 nf_skip_egress(skb, true);
111
112 ifb_update_q_stats(&txp->tx_stats, skb->len);
113
114 rcu_read_lock();
115 skb->dev = dev_get_by_index_rcu(dev_net(txp->dev), skb->skb_iif);
116 if (!skb->dev) {
117 rcu_read_unlock();
118 dev_kfree_skb(skb);
119 txp->dev->stats.tx_dropped++;
120 if (skb_queue_len(&txp->tq) != 0)
121 goto resched;
122 break;
123 }
124 rcu_read_unlock();
125 skb->skb_iif = txp->dev->ifindex;
126
127 if (!skb->from_ingress) {
128 dev_queue_xmit(skb);
129 } else {
130 skb_pull_rcsum(skb, skb->mac_len);
131 netif_receive_skb(skb);
132 }
133 }
134
135 if (__netif_tx_trylock(txq)) {
136 skb = skb_peek(&txp->rq);
137 if (!skb) {
138 txp->tasklet_pending = 0;
139 if (netif_tx_queue_stopped(txq))
140 netif_tx_wake_queue(txq);
141 } else {
142 __netif_tx_unlock(txq);
143 goto resched;
144 }
145 __netif_tx_unlock(txq);
146 } else {
147 resched:
148 txp->tasklet_pending = 1;
149 tasklet_schedule(&txp->ifb_tasklet);
150 }
151
152 }
153
ifb_stats64(struct net_device * dev,struct rtnl_link_stats64 * stats)154 static void ifb_stats64(struct net_device *dev,
155 struct rtnl_link_stats64 *stats)
156 {
157 struct ifb_dev_private *dp = netdev_priv(dev);
158 struct ifb_q_private *txp = dp->tx_private;
159 unsigned int start;
160 u64 packets, bytes;
161 int i;
162
163 for (i = 0; i < dev->num_tx_queues; i++,txp++) {
164 do {
165 start = u64_stats_fetch_begin(&txp->rx_stats.sync);
166 packets = u64_stats_read(&txp->rx_stats.packets);
167 bytes = u64_stats_read(&txp->rx_stats.bytes);
168 } while (u64_stats_fetch_retry(&txp->rx_stats.sync, start));
169 stats->rx_packets += packets;
170 stats->rx_bytes += bytes;
171
172 do {
173 start = u64_stats_fetch_begin(&txp->tx_stats.sync);
174 packets = u64_stats_read(&txp->tx_stats.packets);
175 bytes = u64_stats_read(&txp->tx_stats.bytes);
176 } while (u64_stats_fetch_retry(&txp->tx_stats.sync, start));
177 stats->tx_packets += packets;
178 stats->tx_bytes += bytes;
179 }
180 stats->rx_dropped = dev->stats.rx_dropped;
181 stats->tx_dropped = dev->stats.tx_dropped;
182 }
183
ifb_dev_init(struct net_device * dev)184 static int ifb_dev_init(struct net_device *dev)
185 {
186 struct ifb_dev_private *dp = netdev_priv(dev);
187 struct ifb_q_private *txp;
188 int i;
189
190 txp = kzalloc_objs(*txp, dev->num_tx_queues);
191 if (!txp)
192 return -ENOMEM;
193 dp->tx_private = txp;
194 for (i = 0; i < dev->num_tx_queues; i++,txp++) {
195 txp->txqnum = i;
196 txp->dev = dev;
197 __skb_queue_head_init(&txp->rq);
198 __skb_queue_head_init(&txp->tq);
199 u64_stats_init(&txp->rx_stats.sync);
200 u64_stats_init(&txp->tx_stats.sync);
201 tasklet_setup(&txp->ifb_tasklet, ifb_ri_tasklet);
202 netif_tx_start_queue(netdev_get_tx_queue(dev, i));
203 }
204 return 0;
205 }
206
ifb_get_strings(struct net_device * dev,u32 stringset,u8 * buf)207 static void ifb_get_strings(struct net_device *dev, u32 stringset, u8 *buf)
208 {
209 u8 *p = buf;
210 int i, j;
211
212 switch (stringset) {
213 case ETH_SS_STATS:
214 for (i = 0; i < dev->num_tx_queues; i++)
215 for (j = 0; j < IFB_Q_STATS_LEN; j++)
216 ethtool_sprintf(&p, "rx_queue_%u_%.18s",
217 i, ifb_q_stats_desc[j].desc);
218
219 for (i = 0; i < dev->num_tx_queues; i++)
220 for (j = 0; j < IFB_Q_STATS_LEN; j++)
221 ethtool_sprintf(&p, "tx_queue_%u_%.18s",
222 i, ifb_q_stats_desc[j].desc);
223
224 break;
225 }
226 }
227
ifb_get_sset_count(struct net_device * dev,int sset)228 static int ifb_get_sset_count(struct net_device *dev, int sset)
229 {
230 switch (sset) {
231 case ETH_SS_STATS:
232 return IFB_Q_STATS_LEN * dev->num_tx_queues * 2;
233 default:
234 return -EOPNOTSUPP;
235 }
236 }
237
ifb_fill_stats_data(u64 ** data,struct ifb_q_stats * q_stats)238 static void ifb_fill_stats_data(u64 **data,
239 struct ifb_q_stats *q_stats)
240 {
241 void *stats_base = (void *)q_stats;
242 unsigned int start;
243 size_t offset;
244 int j;
245
246 do {
247 start = u64_stats_fetch_begin(&q_stats->sync);
248 for (j = 0; j < IFB_Q_STATS_LEN; j++) {
249 offset = ifb_q_stats_desc[j].offset;
250 (*data)[j] = u64_stats_read((u64_stats_t *)(stats_base + offset));
251 }
252 } while (u64_stats_fetch_retry(&q_stats->sync, start));
253
254 *data += IFB_Q_STATS_LEN;
255 }
256
ifb_get_ethtool_stats(struct net_device * dev,struct ethtool_stats * stats,u64 * data)257 static void ifb_get_ethtool_stats(struct net_device *dev,
258 struct ethtool_stats *stats, u64 *data)
259 {
260 struct ifb_dev_private *dp = netdev_priv(dev);
261 struct ifb_q_private *txp;
262 int i;
263
264 for (i = 0; i < dev->num_tx_queues; i++) {
265 txp = dp->tx_private + i;
266 ifb_fill_stats_data(&data, &txp->rx_stats);
267 }
268
269 for (i = 0; i < dev->num_tx_queues; i++) {
270 txp = dp->tx_private + i;
271 ifb_fill_stats_data(&data, &txp->tx_stats);
272 }
273 }
274
275 static const struct net_device_ops ifb_netdev_ops = {
276 .ndo_open = ifb_open,
277 .ndo_stop = ifb_close,
278 .ndo_get_stats64 = ifb_stats64,
279 .ndo_start_xmit = ifb_xmit,
280 .ndo_validate_addr = eth_validate_addr,
281 .ndo_init = ifb_dev_init,
282 };
283
284 static const struct ethtool_ops ifb_ethtool_ops = {
285 .get_strings = ifb_get_strings,
286 .get_sset_count = ifb_get_sset_count,
287 .get_ethtool_stats = ifb_get_ethtool_stats,
288 };
289
290 #define IFB_FEATURES (NETIF_F_HW_CSUM | NETIF_F_SG | NETIF_F_FRAGLIST | \
291 NETIF_F_GSO_SOFTWARE | NETIF_F_GSO_ENCAP_ALL | \
292 NETIF_F_HIGHDMA | NETIF_F_HW_VLAN_CTAG_TX | \
293 NETIF_F_HW_VLAN_STAG_TX)
294
ifb_dev_free(struct net_device * dev)295 static void ifb_dev_free(struct net_device *dev)
296 {
297 struct ifb_dev_private *dp = netdev_priv(dev);
298 struct ifb_q_private *txp = dp->tx_private;
299 int i;
300
301 for (i = 0; i < dev->num_tx_queues; i++,txp++) {
302 tasklet_kill(&txp->ifb_tasklet);
303 __skb_queue_purge(&txp->rq);
304 __skb_queue_purge(&txp->tq);
305 }
306 kfree(dp->tx_private);
307 }
308
ifb_setup(struct net_device * dev)309 static void ifb_setup(struct net_device *dev)
310 {
311 /* Initialize the device structure. */
312 dev->netdev_ops = &ifb_netdev_ops;
313 dev->ethtool_ops = &ifb_ethtool_ops;
314
315 /* Fill in device structure with ethernet-generic values. */
316 ether_setup(dev);
317 dev->tx_queue_len = TX_Q_LIMIT;
318
319 dev->features |= IFB_FEATURES;
320 dev->hw_features |= dev->features;
321 dev->hw_enc_features |= dev->features;
322 dev->vlan_features |= IFB_FEATURES & ~(NETIF_F_HW_VLAN_CTAG_TX |
323 NETIF_F_HW_VLAN_STAG_TX);
324
325 dev->flags |= IFF_NOARP;
326 dev->flags &= ~IFF_MULTICAST;
327 dev->priv_flags &= ~IFF_TX_SKB_SHARING;
328 netif_keep_dst(dev);
329 eth_hw_addr_random(dev);
330 dev->needs_free_netdev = true;
331 dev->priv_destructor = ifb_dev_free;
332
333 dev->min_mtu = 0;
334 dev->max_mtu = 0;
335 netif_set_tso_max_size(dev, GSO_MAX_SIZE);
336 }
337
ifb_xmit(struct sk_buff * skb,struct net_device * dev)338 static netdev_tx_t ifb_xmit(struct sk_buff *skb, struct net_device *dev)
339 {
340 struct ifb_dev_private *dp = netdev_priv(dev);
341 struct ifb_q_private *txp = dp->tx_private + skb_get_queue_mapping(skb);
342
343 ifb_update_q_stats(&txp->rx_stats, skb->len);
344
345 if (!skb->redirected || !skb->skb_iif) {
346 dev_kfree_skb(skb);
347 dev->stats.rx_dropped++;
348 return NETDEV_TX_OK;
349 }
350
351 if (skb_queue_len(&txp->rq) >= dev->tx_queue_len)
352 netif_tx_stop_queue(netdev_get_tx_queue(dev, txp->txqnum));
353
354 __skb_queue_tail(&txp->rq, skb);
355 if (!txp->tasklet_pending) {
356 txp->tasklet_pending = 1;
357 tasklet_schedule(&txp->ifb_tasklet);
358 }
359
360 return NETDEV_TX_OK;
361 }
362
ifb_close(struct net_device * dev)363 static int ifb_close(struct net_device *dev)
364 {
365 netif_tx_stop_all_queues(dev);
366 return 0;
367 }
368
ifb_open(struct net_device * dev)369 static int ifb_open(struct net_device *dev)
370 {
371 netif_tx_start_all_queues(dev);
372 return 0;
373 }
374
ifb_validate(struct nlattr * tb[],struct nlattr * data[],struct netlink_ext_ack * extack)375 static int ifb_validate(struct nlattr *tb[], struct nlattr *data[],
376 struct netlink_ext_ack *extack)
377 {
378 if (tb[IFLA_ADDRESS]) {
379 if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN)
380 return -EINVAL;
381 if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS])))
382 return -EADDRNOTAVAIL;
383 }
384 return 0;
385 }
386
387 static struct rtnl_link_ops ifb_link_ops __read_mostly = {
388 .kind = "ifb",
389 .priv_size = sizeof(struct ifb_dev_private),
390 .setup = ifb_setup,
391 .validate = ifb_validate,
392 };
393
394 /* Number of ifb devices to be set up by this module.
395 * Note that these legacy devices have one queue.
396 * Prefer something like : ip link add ifb10 numtxqueues 8 type ifb
397 */
398 static int numifbs = 2;
399 module_param(numifbs, int, 0);
400 MODULE_PARM_DESC(numifbs, "Number of ifb devices");
401
ifb_init_one(int index)402 static int __init ifb_init_one(int index)
403 {
404 struct net_device *dev_ifb;
405 int err;
406
407 dev_ifb = alloc_netdev(sizeof(struct ifb_dev_private), "ifb%d",
408 NET_NAME_UNKNOWN, ifb_setup);
409
410 if (!dev_ifb)
411 return -ENOMEM;
412
413 dev_ifb->rtnl_link_ops = &ifb_link_ops;
414 err = register_netdevice(dev_ifb);
415 if (err < 0)
416 goto err;
417
418 return 0;
419
420 err:
421 free_netdev(dev_ifb);
422 return err;
423 }
424
ifb_init_module(void)425 static int __init ifb_init_module(void)
426 {
427 int i, err;
428
429 err = rtnl_link_register(&ifb_link_ops);
430 if (err < 0)
431 return err;
432
433 rtnl_net_lock(&init_net);
434
435 for (i = 0; i < numifbs && !err; i++) {
436 err = ifb_init_one(i);
437 cond_resched();
438 }
439
440 rtnl_net_unlock(&init_net);
441
442 if (err)
443 rtnl_link_unregister(&ifb_link_ops);
444
445 return err;
446 }
447
ifb_cleanup_module(void)448 static void __exit ifb_cleanup_module(void)
449 {
450 rtnl_link_unregister(&ifb_link_ops);
451 }
452
453 module_init(ifb_init_module);
454 module_exit(ifb_cleanup_module);
455 MODULE_LICENSE("GPL");
456 MODULE_DESCRIPTION("Intermediate Functional Block (ifb) netdevice driver for sharing of resources and ingress packet queuing");
457 MODULE_AUTHOR("Jamal Hadi Salim");
458 MODULE_ALIAS_RTNL_LINK("ifb");
459