xref: /linux/drivers/net/ifb.c (revision 8be4d31cb8aaeea27bde4b7ddb26e28a89062ebf)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /* drivers/net/ifb.c:
3 
4 	The purpose of this driver is to provide a device that allows
5 	for sharing of resources:
6 
7 	1) qdiscs/policies that are per device as opposed to system wide.
8 	ifb allows for a device which can be redirected to thus providing
9 	an impression of sharing.
10 
11 	2) Allows for queueing incoming traffic for shaping instead of
12 	dropping.
13 
14 	The original concept is based on what is known as the IMQ
15 	driver initially written by Martin Devera, later rewritten
16 	by Patrick McHardy and then maintained by Andre Correa.
17 
18 	You need the tc action  mirror or redirect to feed this device
19 	packets.
20 
21 
22 	Authors:	Jamal Hadi Salim (2005)
23 
24 */
25 
26 
27 #include <linux/module.h>
28 #include <linux/kernel.h>
29 #include <linux/netdevice.h>
30 #include <linux/ethtool.h>
31 #include <linux/etherdevice.h>
32 #include <linux/init.h>
33 #include <linux/interrupt.h>
34 #include <linux/moduleparam.h>
35 #include <linux/netfilter_netdev.h>
36 #include <net/pkt_sched.h>
37 #include <net/net_namespace.h>
38 
39 #define TX_Q_LIMIT    32
40 
41 struct ifb_q_stats {
42 	u64 packets;
43 	u64 bytes;
44 	struct u64_stats_sync	sync;
45 };
46 
47 struct ifb_q_private {
48 	struct net_device	*dev;
49 	struct tasklet_struct   ifb_tasklet;
50 	int			tasklet_pending;
51 	int			txqnum;
52 	struct sk_buff_head     rq;
53 	struct sk_buff_head     tq;
54 	struct ifb_q_stats	rx_stats;
55 	struct ifb_q_stats	tx_stats;
56 } ____cacheline_aligned_in_smp;
57 
58 struct ifb_dev_private {
59 	struct ifb_q_private *tx_private;
60 };
61 
62 /* For ethtools stats. */
63 struct ifb_q_stats_desc {
64 	char	desc[ETH_GSTRING_LEN];
65 	size_t	offset;
66 };
67 
68 #define IFB_Q_STAT(m)	offsetof(struct ifb_q_stats, m)
69 
70 static const struct ifb_q_stats_desc ifb_q_stats_desc[] = {
71 	{ "packets",	IFB_Q_STAT(packets) },
72 	{ "bytes",	IFB_Q_STAT(bytes) },
73 };
74 
75 #define IFB_Q_STATS_LEN	ARRAY_SIZE(ifb_q_stats_desc)
76 
77 static netdev_tx_t ifb_xmit(struct sk_buff *skb, struct net_device *dev);
78 static int ifb_open(struct net_device *dev);
79 static int ifb_close(struct net_device *dev);
80 
ifb_update_q_stats(struct ifb_q_stats * stats,int len)81 static void ifb_update_q_stats(struct ifb_q_stats *stats, int len)
82 {
83 	u64_stats_update_begin(&stats->sync);
84 	stats->packets++;
85 	stats->bytes += len;
86 	u64_stats_update_end(&stats->sync);
87 }
88 
ifb_ri_tasklet(struct tasklet_struct * t)89 static void ifb_ri_tasklet(struct tasklet_struct *t)
90 {
91 	struct ifb_q_private *txp = from_tasklet(txp, t, ifb_tasklet);
92 	struct netdev_queue *txq;
93 	struct sk_buff *skb;
94 
95 	txq = netdev_get_tx_queue(txp->dev, txp->txqnum);
96 	skb = skb_peek(&txp->tq);
97 	if (!skb) {
98 		if (!__netif_tx_trylock(txq))
99 			goto resched;
100 		skb_queue_splice_tail_init(&txp->rq, &txp->tq);
101 		__netif_tx_unlock(txq);
102 	}
103 
104 	while ((skb = __skb_dequeue(&txp->tq)) != NULL) {
105 		/* Skip tc and netfilter to prevent redirection loop. */
106 		skb->redirected = 0;
107 #ifdef CONFIG_NET_CLS_ACT
108 		skb->tc_skip_classify = 1;
109 #endif
110 		nf_skip_egress(skb, true);
111 
112 		ifb_update_q_stats(&txp->tx_stats, skb->len);
113 
114 		rcu_read_lock();
115 		skb->dev = dev_get_by_index_rcu(dev_net(txp->dev), skb->skb_iif);
116 		if (!skb->dev) {
117 			rcu_read_unlock();
118 			dev_kfree_skb(skb);
119 			txp->dev->stats.tx_dropped++;
120 			if (skb_queue_len(&txp->tq) != 0)
121 				goto resched;
122 			break;
123 		}
124 		rcu_read_unlock();
125 		skb->skb_iif = txp->dev->ifindex;
126 
127 		if (!skb->from_ingress) {
128 			dev_queue_xmit(skb);
129 		} else {
130 			skb_pull_rcsum(skb, skb->mac_len);
131 			netif_receive_skb(skb);
132 		}
133 	}
134 
135 	if (__netif_tx_trylock(txq)) {
136 		skb = skb_peek(&txp->rq);
137 		if (!skb) {
138 			txp->tasklet_pending = 0;
139 			if (netif_tx_queue_stopped(txq))
140 				netif_tx_wake_queue(txq);
141 		} else {
142 			__netif_tx_unlock(txq);
143 			goto resched;
144 		}
145 		__netif_tx_unlock(txq);
146 	} else {
147 resched:
148 		txp->tasklet_pending = 1;
149 		tasklet_schedule(&txp->ifb_tasklet);
150 	}
151 
152 }
153 
ifb_stats64(struct net_device * dev,struct rtnl_link_stats64 * stats)154 static void ifb_stats64(struct net_device *dev,
155 			struct rtnl_link_stats64 *stats)
156 {
157 	struct ifb_dev_private *dp = netdev_priv(dev);
158 	struct ifb_q_private *txp = dp->tx_private;
159 	unsigned int start;
160 	u64 packets, bytes;
161 	int i;
162 
163 	for (i = 0; i < dev->num_tx_queues; i++,txp++) {
164 		do {
165 			start = u64_stats_fetch_begin(&txp->rx_stats.sync);
166 			packets = txp->rx_stats.packets;
167 			bytes = txp->rx_stats.bytes;
168 		} while (u64_stats_fetch_retry(&txp->rx_stats.sync, start));
169 		stats->rx_packets += packets;
170 		stats->rx_bytes += bytes;
171 
172 		do {
173 			start = u64_stats_fetch_begin(&txp->tx_stats.sync);
174 			packets = txp->tx_stats.packets;
175 			bytes = txp->tx_stats.bytes;
176 		} while (u64_stats_fetch_retry(&txp->tx_stats.sync, start));
177 		stats->tx_packets += packets;
178 		stats->tx_bytes += bytes;
179 	}
180 	stats->rx_dropped = dev->stats.rx_dropped;
181 	stats->tx_dropped = dev->stats.tx_dropped;
182 }
183 
ifb_dev_init(struct net_device * dev)184 static int ifb_dev_init(struct net_device *dev)
185 {
186 	struct ifb_dev_private *dp = netdev_priv(dev);
187 	struct ifb_q_private *txp;
188 	int i;
189 
190 	txp = kcalloc(dev->num_tx_queues, sizeof(*txp), GFP_KERNEL);
191 	if (!txp)
192 		return -ENOMEM;
193 	dp->tx_private = txp;
194 	for (i = 0; i < dev->num_tx_queues; i++,txp++) {
195 		txp->txqnum = i;
196 		txp->dev = dev;
197 		__skb_queue_head_init(&txp->rq);
198 		__skb_queue_head_init(&txp->tq);
199 		u64_stats_init(&txp->rx_stats.sync);
200 		u64_stats_init(&txp->tx_stats.sync);
201 		tasklet_setup(&txp->ifb_tasklet, ifb_ri_tasklet);
202 		netif_tx_start_queue(netdev_get_tx_queue(dev, i));
203 	}
204 	return 0;
205 }
206 
ifb_get_strings(struct net_device * dev,u32 stringset,u8 * buf)207 static void ifb_get_strings(struct net_device *dev, u32 stringset, u8 *buf)
208 {
209 	u8 *p = buf;
210 	int i, j;
211 
212 	switch (stringset) {
213 	case ETH_SS_STATS:
214 		for (i = 0; i < dev->real_num_rx_queues; i++)
215 			for (j = 0; j < IFB_Q_STATS_LEN; j++)
216 				ethtool_sprintf(&p, "rx_queue_%u_%.18s",
217 						i, ifb_q_stats_desc[j].desc);
218 
219 		for (i = 0; i < dev->real_num_tx_queues; i++)
220 			for (j = 0; j < IFB_Q_STATS_LEN; j++)
221 				ethtool_sprintf(&p, "tx_queue_%u_%.18s",
222 						i, ifb_q_stats_desc[j].desc);
223 
224 		break;
225 	}
226 }
227 
ifb_get_sset_count(struct net_device * dev,int sset)228 static int ifb_get_sset_count(struct net_device *dev, int sset)
229 {
230 	switch (sset) {
231 	case ETH_SS_STATS:
232 		return IFB_Q_STATS_LEN * (dev->real_num_rx_queues +
233 					  dev->real_num_tx_queues);
234 	default:
235 		return -EOPNOTSUPP;
236 	}
237 }
238 
ifb_fill_stats_data(u64 ** data,struct ifb_q_stats * q_stats)239 static void ifb_fill_stats_data(u64 **data,
240 				struct ifb_q_stats *q_stats)
241 {
242 	void *stats_base = (void *)q_stats;
243 	unsigned int start;
244 	size_t offset;
245 	int j;
246 
247 	do {
248 		start = u64_stats_fetch_begin(&q_stats->sync);
249 		for (j = 0; j < IFB_Q_STATS_LEN; j++) {
250 			offset = ifb_q_stats_desc[j].offset;
251 			(*data)[j] = *(u64 *)(stats_base + offset);
252 		}
253 	} while (u64_stats_fetch_retry(&q_stats->sync, start));
254 
255 	*data += IFB_Q_STATS_LEN;
256 }
257 
ifb_get_ethtool_stats(struct net_device * dev,struct ethtool_stats * stats,u64 * data)258 static void ifb_get_ethtool_stats(struct net_device *dev,
259 				  struct ethtool_stats *stats, u64 *data)
260 {
261 	struct ifb_dev_private *dp = netdev_priv(dev);
262 	struct ifb_q_private *txp;
263 	int i;
264 
265 	for (i = 0; i < dev->real_num_rx_queues; i++) {
266 		txp = dp->tx_private + i;
267 		ifb_fill_stats_data(&data, &txp->rx_stats);
268 	}
269 
270 	for (i = 0; i < dev->real_num_tx_queues; i++) {
271 		txp = dp->tx_private + i;
272 		ifb_fill_stats_data(&data, &txp->tx_stats);
273 	}
274 }
275 
276 static const struct net_device_ops ifb_netdev_ops = {
277 	.ndo_open	= ifb_open,
278 	.ndo_stop	= ifb_close,
279 	.ndo_get_stats64 = ifb_stats64,
280 	.ndo_start_xmit	= ifb_xmit,
281 	.ndo_validate_addr = eth_validate_addr,
282 	.ndo_init	= ifb_dev_init,
283 };
284 
285 static const struct ethtool_ops ifb_ethtool_ops = {
286 	.get_strings		= ifb_get_strings,
287 	.get_sset_count		= ifb_get_sset_count,
288 	.get_ethtool_stats	= ifb_get_ethtool_stats,
289 };
290 
291 #define IFB_FEATURES (NETIF_F_HW_CSUM | NETIF_F_SG  | NETIF_F_FRAGLIST	| \
292 		      NETIF_F_GSO_SOFTWARE | NETIF_F_GSO_ENCAP_ALL	| \
293 		      NETIF_F_HIGHDMA | NETIF_F_HW_VLAN_CTAG_TX		| \
294 		      NETIF_F_HW_VLAN_STAG_TX)
295 
ifb_dev_free(struct net_device * dev)296 static void ifb_dev_free(struct net_device *dev)
297 {
298 	struct ifb_dev_private *dp = netdev_priv(dev);
299 	struct ifb_q_private *txp = dp->tx_private;
300 	int i;
301 
302 	for (i = 0; i < dev->num_tx_queues; i++,txp++) {
303 		tasklet_kill(&txp->ifb_tasklet);
304 		__skb_queue_purge(&txp->rq);
305 		__skb_queue_purge(&txp->tq);
306 	}
307 	kfree(dp->tx_private);
308 }
309 
ifb_setup(struct net_device * dev)310 static void ifb_setup(struct net_device *dev)
311 {
312 	/* Initialize the device structure. */
313 	dev->netdev_ops = &ifb_netdev_ops;
314 	dev->ethtool_ops = &ifb_ethtool_ops;
315 
316 	/* Fill in device structure with ethernet-generic values. */
317 	ether_setup(dev);
318 	dev->tx_queue_len = TX_Q_LIMIT;
319 
320 	dev->features |= IFB_FEATURES;
321 	dev->hw_features |= dev->features;
322 	dev->hw_enc_features |= dev->features;
323 	dev->vlan_features |= IFB_FEATURES & ~(NETIF_F_HW_VLAN_CTAG_TX |
324 					       NETIF_F_HW_VLAN_STAG_TX);
325 
326 	dev->flags |= IFF_NOARP;
327 	dev->flags &= ~IFF_MULTICAST;
328 	dev->priv_flags &= ~IFF_TX_SKB_SHARING;
329 	netif_keep_dst(dev);
330 	eth_hw_addr_random(dev);
331 	dev->needs_free_netdev = true;
332 	dev->priv_destructor = ifb_dev_free;
333 
334 	dev->min_mtu = 0;
335 	dev->max_mtu = 0;
336 	netif_set_tso_max_size(dev, GSO_MAX_SIZE);
337 }
338 
ifb_xmit(struct sk_buff * skb,struct net_device * dev)339 static netdev_tx_t ifb_xmit(struct sk_buff *skb, struct net_device *dev)
340 {
341 	struct ifb_dev_private *dp = netdev_priv(dev);
342 	struct ifb_q_private *txp = dp->tx_private + skb_get_queue_mapping(skb);
343 
344 	ifb_update_q_stats(&txp->rx_stats, skb->len);
345 
346 	if (!skb->redirected || !skb->skb_iif) {
347 		dev_kfree_skb(skb);
348 		dev->stats.rx_dropped++;
349 		return NETDEV_TX_OK;
350 	}
351 
352 	if (skb_queue_len(&txp->rq) >= dev->tx_queue_len)
353 		netif_tx_stop_queue(netdev_get_tx_queue(dev, txp->txqnum));
354 
355 	__skb_queue_tail(&txp->rq, skb);
356 	if (!txp->tasklet_pending) {
357 		txp->tasklet_pending = 1;
358 		tasklet_schedule(&txp->ifb_tasklet);
359 	}
360 
361 	return NETDEV_TX_OK;
362 }
363 
ifb_close(struct net_device * dev)364 static int ifb_close(struct net_device *dev)
365 {
366 	netif_tx_stop_all_queues(dev);
367 	return 0;
368 }
369 
ifb_open(struct net_device * dev)370 static int ifb_open(struct net_device *dev)
371 {
372 	netif_tx_start_all_queues(dev);
373 	return 0;
374 }
375 
ifb_validate(struct nlattr * tb[],struct nlattr * data[],struct netlink_ext_ack * extack)376 static int ifb_validate(struct nlattr *tb[], struct nlattr *data[],
377 			struct netlink_ext_ack *extack)
378 {
379 	if (tb[IFLA_ADDRESS]) {
380 		if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN)
381 			return -EINVAL;
382 		if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS])))
383 			return -EADDRNOTAVAIL;
384 	}
385 	return 0;
386 }
387 
388 static struct rtnl_link_ops ifb_link_ops __read_mostly = {
389 	.kind		= "ifb",
390 	.priv_size	= sizeof(struct ifb_dev_private),
391 	.setup		= ifb_setup,
392 	.validate	= ifb_validate,
393 };
394 
395 /* Number of ifb devices to be set up by this module.
396  * Note that these legacy devices have one queue.
397  * Prefer something like : ip link add ifb10 numtxqueues 8 type ifb
398  */
399 static int numifbs = 2;
400 module_param(numifbs, int, 0);
401 MODULE_PARM_DESC(numifbs, "Number of ifb devices");
402 
ifb_init_one(int index)403 static int __init ifb_init_one(int index)
404 {
405 	struct net_device *dev_ifb;
406 	int err;
407 
408 	dev_ifb = alloc_netdev(sizeof(struct ifb_dev_private), "ifb%d",
409 			       NET_NAME_UNKNOWN, ifb_setup);
410 
411 	if (!dev_ifb)
412 		return -ENOMEM;
413 
414 	dev_ifb->rtnl_link_ops = &ifb_link_ops;
415 	err = register_netdevice(dev_ifb);
416 	if (err < 0)
417 		goto err;
418 
419 	return 0;
420 
421 err:
422 	free_netdev(dev_ifb);
423 	return err;
424 }
425 
ifb_init_module(void)426 static int __init ifb_init_module(void)
427 {
428 	int i, err;
429 
430 	err = rtnl_link_register(&ifb_link_ops);
431 	if (err < 0)
432 		return err;
433 
434 	rtnl_net_lock(&init_net);
435 
436 	for (i = 0; i < numifbs && !err; i++) {
437 		err = ifb_init_one(i);
438 		cond_resched();
439 	}
440 
441 	rtnl_net_unlock(&init_net);
442 
443 	if (err)
444 		rtnl_link_unregister(&ifb_link_ops);
445 
446 	return err;
447 }
448 
ifb_cleanup_module(void)449 static void __exit ifb_cleanup_module(void)
450 {
451 	rtnl_link_unregister(&ifb_link_ops);
452 }
453 
454 module_init(ifb_init_module);
455 module_exit(ifb_cleanup_module);
456 MODULE_LICENSE("GPL");
457 MODULE_DESCRIPTION("Intermediate Functional Block (ifb) netdevice driver for sharing of resources and ingress packet queuing");
458 MODULE_AUTHOR("Jamal Hadi Salim");
459 MODULE_ALIAS_RTNL_LINK("ifb");
460