xref: /linux/drivers/net/netdevsim/netdev.c (revision af2d6148d2a159e1a0862bce5a2c88c1618a2b27)
1 /*
2  * Copyright (C) 2017 Netronome Systems, Inc.
3  *
4  * This software is licensed under the GNU General License Version 2,
5  * June 1991 as shown in the file COPYING in the top-level directory of this
6  * source tree.
7  *
8  * THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS"
9  * WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING,
10  * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
11  * FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE
12  * OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME
13  * THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
14  */
15 
16 #include <linux/debugfs.h>
17 #include <linux/etherdevice.h>
18 #include <linux/ethtool_netlink.h>
19 #include <linux/kernel.h>
20 #include <linux/module.h>
21 #include <linux/netdevice.h>
22 #include <linux/slab.h>
23 #include <net/netdev_queues.h>
24 #include <net/netdev_rx_queue.h>
25 #include <net/page_pool/helpers.h>
26 #include <net/netlink.h>
27 #include <net/net_shaper.h>
28 #include <net/netdev_lock.h>
29 #include <net/pkt_cls.h>
30 #include <net/rtnetlink.h>
31 #include <net/udp_tunnel.h>
32 #include <net/busy_poll.h>
33 
34 #include "netdevsim.h"
35 
36 MODULE_IMPORT_NS("NETDEV_INTERNAL");
37 
38 #define NSIM_RING_SIZE		256
39 
40 static void nsim_start_peer_tx_queue(struct net_device *dev, struct nsim_rq *rq)
41 {
42 	struct netdevsim *ns = netdev_priv(dev);
43 	struct net_device *peer_dev;
44 	struct netdevsim *peer_ns;
45 	struct netdev_queue *txq;
46 	u16 idx;
47 
48 	idx = rq->napi.index;
49 	rcu_read_lock();
50 	peer_ns = rcu_dereference(ns->peer);
51 	if (!peer_ns)
52 		goto out;
53 
54 	/* TX device */
55 	peer_dev = peer_ns->netdev;
56 	if (dev->real_num_tx_queues != peer_dev->num_rx_queues)
57 		goto out;
58 
59 	txq = netdev_get_tx_queue(peer_dev, idx);
60 	if (!netif_tx_queue_stopped(txq))
61 		goto out;
62 
63 	netif_tx_wake_queue(txq);
64 out:
65 	rcu_read_unlock();
66 }
67 
68 static void nsim_stop_tx_queue(struct net_device *tx_dev,
69 			       struct net_device *rx_dev,
70 			       struct nsim_rq *rq,
71 			       u16 idx)
72 {
73 	/* If different queues size, do not stop, since it is not
74 	 * easy to find which TX queue is mapped here
75 	 */
76 	if (rx_dev->real_num_tx_queues != tx_dev->num_rx_queues)
77 		return;
78 
79 	/* rq is the queue on the receive side */
80 	netif_subqueue_try_stop(tx_dev, idx,
81 				NSIM_RING_SIZE - skb_queue_len(&rq->skb_queue),
82 				NSIM_RING_SIZE / 2);
83 }
84 
85 static int nsim_napi_rx(struct net_device *tx_dev, struct net_device *rx_dev,
86 			struct nsim_rq *rq, struct sk_buff *skb)
87 {
88 	if (skb_queue_len(&rq->skb_queue) > NSIM_RING_SIZE) {
89 		dev_kfree_skb_any(skb);
90 		return NET_RX_DROP;
91 	}
92 
93 	skb_queue_tail(&rq->skb_queue, skb);
94 
95 	/* Stop the peer TX queue avoiding dropping packets later */
96 	if (skb_queue_len(&rq->skb_queue) >= NSIM_RING_SIZE)
97 		nsim_stop_tx_queue(tx_dev, rx_dev, rq,
98 				   skb_get_queue_mapping(skb));
99 
100 	return NET_RX_SUCCESS;
101 }
102 
103 static int nsim_forward_skb(struct net_device *tx_dev,
104 			    struct net_device *rx_dev,
105 			    struct sk_buff *skb,
106 			    struct nsim_rq *rq)
107 {
108 	return __dev_forward_skb(rx_dev, skb) ?:
109 		nsim_napi_rx(tx_dev, rx_dev, rq, skb);
110 }
111 
112 static netdev_tx_t nsim_start_xmit(struct sk_buff *skb, struct net_device *dev)
113 {
114 	struct netdevsim *ns = netdev_priv(dev);
115 	struct net_device *peer_dev;
116 	unsigned int len = skb->len;
117 	struct netdevsim *peer_ns;
118 	struct netdev_config *cfg;
119 	struct nsim_rq *rq;
120 	int rxq;
121 
122 	rcu_read_lock();
123 	if (!nsim_ipsec_tx(ns, skb))
124 		goto out_drop_free;
125 
126 	peer_ns = rcu_dereference(ns->peer);
127 	if (!peer_ns)
128 		goto out_drop_free;
129 
130 	peer_dev = peer_ns->netdev;
131 	rxq = skb_get_queue_mapping(skb);
132 	if (rxq >= peer_dev->num_rx_queues)
133 		rxq = rxq % peer_dev->num_rx_queues;
134 	rq = peer_ns->rq[rxq];
135 
136 	cfg = peer_dev->cfg;
137 	if (skb_is_nonlinear(skb) &&
138 	    (cfg->hds_config != ETHTOOL_TCP_DATA_SPLIT_ENABLED ||
139 	     (cfg->hds_config == ETHTOOL_TCP_DATA_SPLIT_ENABLED &&
140 	      cfg->hds_thresh > len)))
141 		skb_linearize(skb);
142 
143 	skb_tx_timestamp(skb);
144 	if (unlikely(nsim_forward_skb(dev, peer_dev, skb, rq) == NET_RX_DROP))
145 		goto out_drop_cnt;
146 
147 	if (!hrtimer_active(&rq->napi_timer))
148 		hrtimer_start(&rq->napi_timer, us_to_ktime(5), HRTIMER_MODE_REL);
149 
150 	rcu_read_unlock();
151 	dev_dstats_tx_add(dev, len);
152 	return NETDEV_TX_OK;
153 
154 out_drop_free:
155 	dev_kfree_skb(skb);
156 out_drop_cnt:
157 	rcu_read_unlock();
158 	dev_dstats_tx_dropped(dev);
159 	return NETDEV_TX_OK;
160 }
161 
162 static void nsim_set_rx_mode(struct net_device *dev)
163 {
164 }
165 
166 static int nsim_change_mtu(struct net_device *dev, int new_mtu)
167 {
168 	struct netdevsim *ns = netdev_priv(dev);
169 
170 	if (ns->xdp.prog && !ns->xdp.prog->aux->xdp_has_frags &&
171 	    new_mtu > NSIM_XDP_MAX_MTU)
172 		return -EBUSY;
173 
174 	WRITE_ONCE(dev->mtu, new_mtu);
175 
176 	return 0;
177 }
178 
179 static int
180 nsim_setup_tc_block_cb(enum tc_setup_type type, void *type_data, void *cb_priv)
181 {
182 	return nsim_bpf_setup_tc_block_cb(type, type_data, cb_priv);
183 }
184 
185 static int nsim_set_vf_mac(struct net_device *dev, int vf, u8 *mac)
186 {
187 	struct netdevsim *ns = netdev_priv(dev);
188 	struct nsim_dev *nsim_dev = ns->nsim_dev;
189 
190 	/* Only refuse multicast addresses, zero address can mean unset/any. */
191 	if (vf >= nsim_dev_get_vfs(nsim_dev) || is_multicast_ether_addr(mac))
192 		return -EINVAL;
193 	memcpy(nsim_dev->vfconfigs[vf].vf_mac, mac, ETH_ALEN);
194 
195 	return 0;
196 }
197 
198 static int nsim_set_vf_vlan(struct net_device *dev, int vf,
199 			    u16 vlan, u8 qos, __be16 vlan_proto)
200 {
201 	struct netdevsim *ns = netdev_priv(dev);
202 	struct nsim_dev *nsim_dev = ns->nsim_dev;
203 
204 	if (vf >= nsim_dev_get_vfs(nsim_dev) || vlan > 4095 || qos > 7)
205 		return -EINVAL;
206 
207 	nsim_dev->vfconfigs[vf].vlan = vlan;
208 	nsim_dev->vfconfigs[vf].qos = qos;
209 	nsim_dev->vfconfigs[vf].vlan_proto = vlan_proto;
210 
211 	return 0;
212 }
213 
214 static int nsim_set_vf_rate(struct net_device *dev, int vf, int min, int max)
215 {
216 	struct netdevsim *ns = netdev_priv(dev);
217 	struct nsim_dev *nsim_dev = ns->nsim_dev;
218 
219 	if (nsim_esw_mode_is_switchdev(ns->nsim_dev)) {
220 		pr_err("Not supported in switchdev mode. Please use devlink API.\n");
221 		return -EOPNOTSUPP;
222 	}
223 
224 	if (vf >= nsim_dev_get_vfs(nsim_dev))
225 		return -EINVAL;
226 
227 	nsim_dev->vfconfigs[vf].min_tx_rate = min;
228 	nsim_dev->vfconfigs[vf].max_tx_rate = max;
229 
230 	return 0;
231 }
232 
233 static int nsim_set_vf_spoofchk(struct net_device *dev, int vf, bool val)
234 {
235 	struct netdevsim *ns = netdev_priv(dev);
236 	struct nsim_dev *nsim_dev = ns->nsim_dev;
237 
238 	if (vf >= nsim_dev_get_vfs(nsim_dev))
239 		return -EINVAL;
240 	nsim_dev->vfconfigs[vf].spoofchk_enabled = val;
241 
242 	return 0;
243 }
244 
245 static int nsim_set_vf_rss_query_en(struct net_device *dev, int vf, bool val)
246 {
247 	struct netdevsim *ns = netdev_priv(dev);
248 	struct nsim_dev *nsim_dev = ns->nsim_dev;
249 
250 	if (vf >= nsim_dev_get_vfs(nsim_dev))
251 		return -EINVAL;
252 	nsim_dev->vfconfigs[vf].rss_query_enabled = val;
253 
254 	return 0;
255 }
256 
257 static int nsim_set_vf_trust(struct net_device *dev, int vf, bool val)
258 {
259 	struct netdevsim *ns = netdev_priv(dev);
260 	struct nsim_dev *nsim_dev = ns->nsim_dev;
261 
262 	if (vf >= nsim_dev_get_vfs(nsim_dev))
263 		return -EINVAL;
264 	nsim_dev->vfconfigs[vf].trusted = val;
265 
266 	return 0;
267 }
268 
269 static int
270 nsim_get_vf_config(struct net_device *dev, int vf, struct ifla_vf_info *ivi)
271 {
272 	struct netdevsim *ns = netdev_priv(dev);
273 	struct nsim_dev *nsim_dev = ns->nsim_dev;
274 
275 	if (vf >= nsim_dev_get_vfs(nsim_dev))
276 		return -EINVAL;
277 
278 	ivi->vf = vf;
279 	ivi->linkstate = nsim_dev->vfconfigs[vf].link_state;
280 	ivi->min_tx_rate = nsim_dev->vfconfigs[vf].min_tx_rate;
281 	ivi->max_tx_rate = nsim_dev->vfconfigs[vf].max_tx_rate;
282 	ivi->vlan = nsim_dev->vfconfigs[vf].vlan;
283 	ivi->vlan_proto = nsim_dev->vfconfigs[vf].vlan_proto;
284 	ivi->qos = nsim_dev->vfconfigs[vf].qos;
285 	memcpy(&ivi->mac, nsim_dev->vfconfigs[vf].vf_mac, ETH_ALEN);
286 	ivi->spoofchk = nsim_dev->vfconfigs[vf].spoofchk_enabled;
287 	ivi->trusted = nsim_dev->vfconfigs[vf].trusted;
288 	ivi->rss_query_en = nsim_dev->vfconfigs[vf].rss_query_enabled;
289 
290 	return 0;
291 }
292 
293 static int nsim_set_vf_link_state(struct net_device *dev, int vf, int state)
294 {
295 	struct netdevsim *ns = netdev_priv(dev);
296 	struct nsim_dev *nsim_dev = ns->nsim_dev;
297 
298 	if (vf >= nsim_dev_get_vfs(nsim_dev))
299 		return -EINVAL;
300 
301 	switch (state) {
302 	case IFLA_VF_LINK_STATE_AUTO:
303 	case IFLA_VF_LINK_STATE_ENABLE:
304 	case IFLA_VF_LINK_STATE_DISABLE:
305 		break;
306 	default:
307 		return -EINVAL;
308 	}
309 
310 	nsim_dev->vfconfigs[vf].link_state = state;
311 
312 	return 0;
313 }
314 
315 static void nsim_taprio_stats(struct tc_taprio_qopt_stats *stats)
316 {
317 	stats->window_drops = 0;
318 	stats->tx_overruns = 0;
319 }
320 
321 static int nsim_setup_tc_taprio(struct net_device *dev,
322 				struct tc_taprio_qopt_offload *offload)
323 {
324 	int err = 0;
325 
326 	switch (offload->cmd) {
327 	case TAPRIO_CMD_REPLACE:
328 	case TAPRIO_CMD_DESTROY:
329 		break;
330 	case TAPRIO_CMD_STATS:
331 		nsim_taprio_stats(&offload->stats);
332 		break;
333 	default:
334 		err = -EOPNOTSUPP;
335 	}
336 
337 	return err;
338 }
339 
340 static LIST_HEAD(nsim_block_cb_list);
341 
342 static int
343 nsim_setup_tc(struct net_device *dev, enum tc_setup_type type, void *type_data)
344 {
345 	struct netdevsim *ns = netdev_priv(dev);
346 
347 	switch (type) {
348 	case TC_SETUP_QDISC_TAPRIO:
349 		return nsim_setup_tc_taprio(dev, type_data);
350 	case TC_SETUP_BLOCK:
351 		return flow_block_cb_setup_simple(type_data,
352 						  &nsim_block_cb_list,
353 						  nsim_setup_tc_block_cb,
354 						  ns, ns, true);
355 	default:
356 		return -EOPNOTSUPP;
357 	}
358 }
359 
360 static int
361 nsim_set_features(struct net_device *dev, netdev_features_t features)
362 {
363 	struct netdevsim *ns = netdev_priv(dev);
364 
365 	if ((dev->features & NETIF_F_HW_TC) > (features & NETIF_F_HW_TC))
366 		return nsim_bpf_disable_tc(ns);
367 
368 	return 0;
369 }
370 
371 static int nsim_get_iflink(const struct net_device *dev)
372 {
373 	struct netdevsim *nsim, *peer;
374 	int iflink;
375 
376 	nsim = netdev_priv(dev);
377 
378 	rcu_read_lock();
379 	peer = rcu_dereference(nsim->peer);
380 	iflink = peer ? READ_ONCE(peer->netdev->ifindex) :
381 			READ_ONCE(dev->ifindex);
382 	rcu_read_unlock();
383 
384 	return iflink;
385 }
386 
387 static int nsim_rcv(struct nsim_rq *rq, int budget)
388 {
389 	struct net_device *dev = rq->napi.dev;
390 	struct sk_buff *skb;
391 	unsigned int skblen;
392 	int i, ret;
393 
394 	for (i = 0; i < budget; i++) {
395 		if (skb_queue_empty(&rq->skb_queue))
396 			break;
397 
398 		skb = skb_dequeue(&rq->skb_queue);
399 		/* skb might be discard at netif_receive_skb, save the len */
400 		skblen = skb->len;
401 		skb_mark_napi_id(skb, &rq->napi);
402 		ret = netif_receive_skb(skb);
403 		if (ret == NET_RX_SUCCESS)
404 			dev_dstats_rx_add(dev, skblen);
405 		else
406 			dev_dstats_rx_dropped(dev);
407 	}
408 
409 	nsim_start_peer_tx_queue(dev, rq);
410 	return i;
411 }
412 
413 static int nsim_poll(struct napi_struct *napi, int budget)
414 {
415 	struct nsim_rq *rq = container_of(napi, struct nsim_rq, napi);
416 	int done;
417 
418 	done = nsim_rcv(rq, budget);
419 	if (done < budget)
420 		napi_complete_done(napi, done);
421 
422 	return done;
423 }
424 
425 static int nsim_create_page_pool(struct page_pool **p, struct napi_struct *napi)
426 {
427 	struct page_pool_params params = {
428 		.order = 0,
429 		.pool_size = NSIM_RING_SIZE,
430 		.nid = NUMA_NO_NODE,
431 		.dev = &napi->dev->dev,
432 		.napi = napi,
433 		.dma_dir = DMA_BIDIRECTIONAL,
434 		.netdev = napi->dev,
435 	};
436 	struct page_pool *pool;
437 
438 	pool = page_pool_create(&params);
439 	if (IS_ERR(pool))
440 		return PTR_ERR(pool);
441 
442 	*p = pool;
443 	return 0;
444 }
445 
446 static int nsim_init_napi(struct netdevsim *ns)
447 {
448 	struct net_device *dev = ns->netdev;
449 	struct nsim_rq *rq;
450 	int err, i;
451 
452 	for (i = 0; i < dev->num_rx_queues; i++) {
453 		rq = ns->rq[i];
454 
455 		netif_napi_add_config_locked(dev, &rq->napi, nsim_poll, i);
456 	}
457 
458 	for (i = 0; i < dev->num_rx_queues; i++) {
459 		rq = ns->rq[i];
460 
461 		err = nsim_create_page_pool(&rq->page_pool, &rq->napi);
462 		if (err)
463 			goto err_pp_destroy;
464 	}
465 
466 	return 0;
467 
468 err_pp_destroy:
469 	while (i--) {
470 		page_pool_destroy(ns->rq[i]->page_pool);
471 		ns->rq[i]->page_pool = NULL;
472 	}
473 
474 	for (i = 0; i < dev->num_rx_queues; i++)
475 		__netif_napi_del_locked(&ns->rq[i]->napi);
476 
477 	return err;
478 }
479 
480 static enum hrtimer_restart nsim_napi_schedule(struct hrtimer *timer)
481 {
482 	struct nsim_rq *rq;
483 
484 	rq = container_of(timer, struct nsim_rq, napi_timer);
485 	napi_schedule(&rq->napi);
486 
487 	return HRTIMER_NORESTART;
488 }
489 
490 static void nsim_rq_timer_init(struct nsim_rq *rq)
491 {
492 	hrtimer_setup(&rq->napi_timer, nsim_napi_schedule, CLOCK_MONOTONIC,
493 		      HRTIMER_MODE_REL);
494 }
495 
496 static void nsim_enable_napi(struct netdevsim *ns)
497 {
498 	struct net_device *dev = ns->netdev;
499 	int i;
500 
501 	for (i = 0; i < dev->num_rx_queues; i++) {
502 		struct nsim_rq *rq = ns->rq[i];
503 
504 		netif_queue_set_napi(dev, i, NETDEV_QUEUE_TYPE_RX, &rq->napi);
505 		napi_enable_locked(&rq->napi);
506 	}
507 }
508 
509 static int nsim_open(struct net_device *dev)
510 {
511 	struct netdevsim *ns = netdev_priv(dev);
512 	int err;
513 
514 	netdev_assert_locked(dev);
515 
516 	err = nsim_init_napi(ns);
517 	if (err)
518 		return err;
519 
520 	nsim_enable_napi(ns);
521 
522 	return 0;
523 }
524 
525 static void nsim_del_napi(struct netdevsim *ns)
526 {
527 	struct net_device *dev = ns->netdev;
528 	int i;
529 
530 	for (i = 0; i < dev->num_rx_queues; i++) {
531 		struct nsim_rq *rq = ns->rq[i];
532 
533 		napi_disable_locked(&rq->napi);
534 		__netif_napi_del_locked(&rq->napi);
535 	}
536 	synchronize_net();
537 
538 	for (i = 0; i < dev->num_rx_queues; i++) {
539 		page_pool_destroy(ns->rq[i]->page_pool);
540 		ns->rq[i]->page_pool = NULL;
541 	}
542 }
543 
544 static int nsim_stop(struct net_device *dev)
545 {
546 	struct netdevsim *ns = netdev_priv(dev);
547 	struct netdevsim *peer;
548 
549 	netdev_assert_locked(dev);
550 
551 	netif_carrier_off(dev);
552 	peer = rtnl_dereference(ns->peer);
553 	if (peer)
554 		netif_carrier_off(peer->netdev);
555 
556 	nsim_del_napi(ns);
557 
558 	return 0;
559 }
560 
561 static int nsim_shaper_set(struct net_shaper_binding *binding,
562 			   const struct net_shaper *shaper,
563 			   struct netlink_ext_ack *extack)
564 {
565 	return 0;
566 }
567 
568 static int nsim_shaper_del(struct net_shaper_binding *binding,
569 			   const struct net_shaper_handle *handle,
570 			   struct netlink_ext_ack *extack)
571 {
572 	return 0;
573 }
574 
575 static int nsim_shaper_group(struct net_shaper_binding *binding,
576 			     int leaves_count,
577 			     const struct net_shaper *leaves,
578 			     const struct net_shaper *root,
579 			     struct netlink_ext_ack *extack)
580 {
581 	return 0;
582 }
583 
584 static void nsim_shaper_cap(struct net_shaper_binding *binding,
585 			    enum net_shaper_scope scope,
586 			    unsigned long *flags)
587 {
588 	*flags = ULONG_MAX;
589 }
590 
591 static const struct net_shaper_ops nsim_shaper_ops = {
592 	.set			= nsim_shaper_set,
593 	.delete			= nsim_shaper_del,
594 	.group			= nsim_shaper_group,
595 	.capabilities		= nsim_shaper_cap,
596 };
597 
598 static const struct net_device_ops nsim_netdev_ops = {
599 	.ndo_start_xmit		= nsim_start_xmit,
600 	.ndo_set_rx_mode	= nsim_set_rx_mode,
601 	.ndo_set_mac_address	= eth_mac_addr,
602 	.ndo_validate_addr	= eth_validate_addr,
603 	.ndo_change_mtu		= nsim_change_mtu,
604 	.ndo_set_vf_mac		= nsim_set_vf_mac,
605 	.ndo_set_vf_vlan	= nsim_set_vf_vlan,
606 	.ndo_set_vf_rate	= nsim_set_vf_rate,
607 	.ndo_set_vf_spoofchk	= nsim_set_vf_spoofchk,
608 	.ndo_set_vf_trust	= nsim_set_vf_trust,
609 	.ndo_get_vf_config	= nsim_get_vf_config,
610 	.ndo_set_vf_link_state	= nsim_set_vf_link_state,
611 	.ndo_set_vf_rss_query_en = nsim_set_vf_rss_query_en,
612 	.ndo_setup_tc		= nsim_setup_tc,
613 	.ndo_set_features	= nsim_set_features,
614 	.ndo_get_iflink		= nsim_get_iflink,
615 	.ndo_bpf		= nsim_bpf,
616 	.ndo_open		= nsim_open,
617 	.ndo_stop		= nsim_stop,
618 	.net_shaper_ops		= &nsim_shaper_ops,
619 };
620 
621 static const struct net_device_ops nsim_vf_netdev_ops = {
622 	.ndo_start_xmit		= nsim_start_xmit,
623 	.ndo_set_rx_mode	= nsim_set_rx_mode,
624 	.ndo_set_mac_address	= eth_mac_addr,
625 	.ndo_validate_addr	= eth_validate_addr,
626 	.ndo_change_mtu		= nsim_change_mtu,
627 	.ndo_setup_tc		= nsim_setup_tc,
628 	.ndo_set_features	= nsim_set_features,
629 };
630 
631 /* We don't have true per-queue stats, yet, so do some random fakery here.
632  * Only report stuff for queue 0.
633  */
634 static void nsim_get_queue_stats_rx(struct net_device *dev, int idx,
635 				    struct netdev_queue_stats_rx *stats)
636 {
637 	struct rtnl_link_stats64 rtstats = {};
638 
639 	if (!idx)
640 		dev_get_stats(dev, &rtstats);
641 
642 	stats->packets = rtstats.rx_packets - !!rtstats.rx_packets;
643 	stats->bytes = rtstats.rx_bytes;
644 }
645 
646 static void nsim_get_queue_stats_tx(struct net_device *dev, int idx,
647 				    struct netdev_queue_stats_tx *stats)
648 {
649 	struct rtnl_link_stats64 rtstats = {};
650 
651 	if (!idx)
652 		dev_get_stats(dev, &rtstats);
653 
654 	stats->packets = rtstats.tx_packets - !!rtstats.tx_packets;
655 	stats->bytes = rtstats.tx_bytes;
656 }
657 
658 static void nsim_get_base_stats(struct net_device *dev,
659 				struct netdev_queue_stats_rx *rx,
660 				struct netdev_queue_stats_tx *tx)
661 {
662 	struct rtnl_link_stats64 rtstats = {};
663 
664 	dev_get_stats(dev, &rtstats);
665 
666 	rx->packets = !!rtstats.rx_packets;
667 	rx->bytes = 0;
668 	tx->packets = !!rtstats.tx_packets;
669 	tx->bytes = 0;
670 }
671 
672 static const struct netdev_stat_ops nsim_stat_ops = {
673 	.get_queue_stats_tx	= nsim_get_queue_stats_tx,
674 	.get_queue_stats_rx	= nsim_get_queue_stats_rx,
675 	.get_base_stats		= nsim_get_base_stats,
676 };
677 
678 static struct nsim_rq *nsim_queue_alloc(void)
679 {
680 	struct nsim_rq *rq;
681 
682 	rq = kzalloc(sizeof(*rq), GFP_KERNEL_ACCOUNT);
683 	if (!rq)
684 		return NULL;
685 
686 	skb_queue_head_init(&rq->skb_queue);
687 	nsim_rq_timer_init(rq);
688 	return rq;
689 }
690 
691 static void nsim_queue_free(struct net_device *dev, struct nsim_rq *rq)
692 {
693 	hrtimer_cancel(&rq->napi_timer);
694 	local_bh_disable();
695 	dev_dstats_rx_dropped_add(dev, rq->skb_queue.qlen);
696 	local_bh_enable();
697 	skb_queue_purge_reason(&rq->skb_queue, SKB_DROP_REASON_QUEUE_PURGE);
698 	kfree(rq);
699 }
700 
701 /* Queue reset mode is controlled by ns->rq_reset_mode.
702  * - normal - new NAPI new pool (old NAPI enabled when new added)
703  * - mode 1 - allocate new pool (NAPI is only disabled / enabled)
704  * - mode 2 - new NAPI new pool (old NAPI removed before new added)
705  * - mode 3 - new NAPI new pool (old NAPI disabled when new added)
706  */
707 struct nsim_queue_mem {
708 	struct nsim_rq *rq;
709 	struct page_pool *pp;
710 };
711 
712 static int
713 nsim_queue_mem_alloc(struct net_device *dev, void *per_queue_mem, int idx)
714 {
715 	struct nsim_queue_mem *qmem = per_queue_mem;
716 	struct netdevsim *ns = netdev_priv(dev);
717 	int err;
718 
719 	if (ns->rq_reset_mode > 3)
720 		return -EINVAL;
721 
722 	if (ns->rq_reset_mode == 1) {
723 		if (!netif_running(ns->netdev))
724 			return -ENETDOWN;
725 		return nsim_create_page_pool(&qmem->pp, &ns->rq[idx]->napi);
726 	}
727 
728 	qmem->rq = nsim_queue_alloc();
729 	if (!qmem->rq)
730 		return -ENOMEM;
731 
732 	err = nsim_create_page_pool(&qmem->rq->page_pool, &qmem->rq->napi);
733 	if (err)
734 		goto err_free;
735 
736 	if (!ns->rq_reset_mode)
737 		netif_napi_add_config_locked(dev, &qmem->rq->napi, nsim_poll,
738 					     idx);
739 
740 	return 0;
741 
742 err_free:
743 	nsim_queue_free(dev, qmem->rq);
744 	return err;
745 }
746 
747 static void nsim_queue_mem_free(struct net_device *dev, void *per_queue_mem)
748 {
749 	struct nsim_queue_mem *qmem = per_queue_mem;
750 	struct netdevsim *ns = netdev_priv(dev);
751 
752 	page_pool_destroy(qmem->pp);
753 	if (qmem->rq) {
754 		if (!ns->rq_reset_mode)
755 			netif_napi_del_locked(&qmem->rq->napi);
756 		page_pool_destroy(qmem->rq->page_pool);
757 		nsim_queue_free(dev, qmem->rq);
758 	}
759 }
760 
761 static int
762 nsim_queue_start(struct net_device *dev, void *per_queue_mem, int idx)
763 {
764 	struct nsim_queue_mem *qmem = per_queue_mem;
765 	struct netdevsim *ns = netdev_priv(dev);
766 
767 	netdev_assert_locked(dev);
768 
769 	if (ns->rq_reset_mode == 1) {
770 		ns->rq[idx]->page_pool = qmem->pp;
771 		napi_enable_locked(&ns->rq[idx]->napi);
772 		return 0;
773 	}
774 
775 	/* netif_napi_add()/_del() should normally be called from alloc/free,
776 	 * here we want to test various call orders.
777 	 */
778 	if (ns->rq_reset_mode == 2) {
779 		netif_napi_del_locked(&ns->rq[idx]->napi);
780 		netif_napi_add_config_locked(dev, &qmem->rq->napi, nsim_poll,
781 					     idx);
782 	} else if (ns->rq_reset_mode == 3) {
783 		netif_napi_add_config_locked(dev, &qmem->rq->napi, nsim_poll,
784 					     idx);
785 		netif_napi_del_locked(&ns->rq[idx]->napi);
786 	}
787 
788 	ns->rq[idx] = qmem->rq;
789 	napi_enable_locked(&ns->rq[idx]->napi);
790 
791 	return 0;
792 }
793 
794 static int nsim_queue_stop(struct net_device *dev, void *per_queue_mem, int idx)
795 {
796 	struct nsim_queue_mem *qmem = per_queue_mem;
797 	struct netdevsim *ns = netdev_priv(dev);
798 
799 	netdev_assert_locked(dev);
800 
801 	napi_disable_locked(&ns->rq[idx]->napi);
802 
803 	if (ns->rq_reset_mode == 1) {
804 		qmem->pp = ns->rq[idx]->page_pool;
805 		page_pool_disable_direct_recycling(qmem->pp);
806 	} else {
807 		qmem->rq = ns->rq[idx];
808 	}
809 
810 	return 0;
811 }
812 
813 static const struct netdev_queue_mgmt_ops nsim_queue_mgmt_ops = {
814 	.ndo_queue_mem_size	= sizeof(struct nsim_queue_mem),
815 	.ndo_queue_mem_alloc	= nsim_queue_mem_alloc,
816 	.ndo_queue_mem_free	= nsim_queue_mem_free,
817 	.ndo_queue_start	= nsim_queue_start,
818 	.ndo_queue_stop		= nsim_queue_stop,
819 };
820 
821 static ssize_t
822 nsim_qreset_write(struct file *file, const char __user *data,
823 		  size_t count, loff_t *ppos)
824 {
825 	struct netdevsim *ns = file->private_data;
826 	unsigned int queue, mode;
827 	char buf[32];
828 	ssize_t ret;
829 
830 	if (count >= sizeof(buf))
831 		return -EINVAL;
832 	if (copy_from_user(buf, data, count))
833 		return -EFAULT;
834 	buf[count] = '\0';
835 
836 	ret = sscanf(buf, "%u %u", &queue, &mode);
837 	if (ret != 2)
838 		return -EINVAL;
839 
840 	netdev_lock(ns->netdev);
841 	if (queue >= ns->netdev->real_num_rx_queues) {
842 		ret = -EINVAL;
843 		goto exit_unlock;
844 	}
845 
846 	ns->rq_reset_mode = mode;
847 	ret = netdev_rx_queue_restart(ns->netdev, queue);
848 	ns->rq_reset_mode = 0;
849 	if (ret)
850 		goto exit_unlock;
851 
852 	ret = count;
853 exit_unlock:
854 	netdev_unlock(ns->netdev);
855 	return ret;
856 }
857 
858 static const struct file_operations nsim_qreset_fops = {
859 	.open = simple_open,
860 	.write = nsim_qreset_write,
861 	.owner = THIS_MODULE,
862 };
863 
864 static ssize_t
865 nsim_pp_hold_read(struct file *file, char __user *data,
866 		  size_t count, loff_t *ppos)
867 {
868 	struct netdevsim *ns = file->private_data;
869 	char buf[3] = "n\n";
870 
871 	if (ns->page)
872 		buf[0] = 'y';
873 
874 	return simple_read_from_buffer(data, count, ppos, buf, 2);
875 }
876 
877 static ssize_t
878 nsim_pp_hold_write(struct file *file, const char __user *data,
879 		   size_t count, loff_t *ppos)
880 {
881 	struct netdevsim *ns = file->private_data;
882 	ssize_t ret;
883 	bool val;
884 
885 	ret = kstrtobool_from_user(data, count, &val);
886 	if (ret)
887 		return ret;
888 
889 	rtnl_lock();
890 	ret = count;
891 	if (val == !!ns->page)
892 		goto exit;
893 
894 	if (!netif_running(ns->netdev) && val) {
895 		ret = -ENETDOWN;
896 	} else if (val) {
897 		ns->page = page_pool_dev_alloc_pages(ns->rq[0]->page_pool);
898 		if (!ns->page)
899 			ret = -ENOMEM;
900 	} else {
901 		page_pool_put_full_page(ns->page->pp, ns->page, false);
902 		ns->page = NULL;
903 	}
904 
905 exit:
906 	rtnl_unlock();
907 	return ret;
908 }
909 
910 static const struct file_operations nsim_pp_hold_fops = {
911 	.open = simple_open,
912 	.read = nsim_pp_hold_read,
913 	.write = nsim_pp_hold_write,
914 	.llseek = generic_file_llseek,
915 	.owner = THIS_MODULE,
916 };
917 
918 static void nsim_setup(struct net_device *dev)
919 {
920 	ether_setup(dev);
921 	eth_hw_addr_random(dev);
922 
923 	dev->flags &= ~IFF_MULTICAST;
924 	dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
925 	dev->features |= NETIF_F_HIGHDMA |
926 			 NETIF_F_SG |
927 			 NETIF_F_FRAGLIST |
928 			 NETIF_F_HW_CSUM |
929 			 NETIF_F_LRO |
930 			 NETIF_F_TSO;
931 	dev->hw_features |= NETIF_F_HW_TC |
932 			    NETIF_F_SG |
933 			    NETIF_F_FRAGLIST |
934 			    NETIF_F_HW_CSUM |
935 			    NETIF_F_LRO |
936 			    NETIF_F_TSO;
937 	dev->pcpu_stat_type = NETDEV_PCPU_STAT_DSTATS;
938 	dev->max_mtu = ETH_MAX_MTU;
939 	dev->xdp_features = NETDEV_XDP_ACT_HW_OFFLOAD;
940 }
941 
942 static int nsim_queue_init(struct netdevsim *ns)
943 {
944 	struct net_device *dev = ns->netdev;
945 	int i;
946 
947 	ns->rq = kcalloc(dev->num_rx_queues, sizeof(*ns->rq),
948 			 GFP_KERNEL_ACCOUNT);
949 	if (!ns->rq)
950 		return -ENOMEM;
951 
952 	for (i = 0; i < dev->num_rx_queues; i++) {
953 		ns->rq[i] = nsim_queue_alloc();
954 		if (!ns->rq[i])
955 			goto err_free_prev;
956 	}
957 
958 	return 0;
959 
960 err_free_prev:
961 	while (i--)
962 		kfree(ns->rq[i]);
963 	kfree(ns->rq);
964 	return -ENOMEM;
965 }
966 
967 static void nsim_queue_uninit(struct netdevsim *ns)
968 {
969 	struct net_device *dev = ns->netdev;
970 	int i;
971 
972 	for (i = 0; i < dev->num_rx_queues; i++)
973 		nsim_queue_free(dev, ns->rq[i]);
974 
975 	kfree(ns->rq);
976 	ns->rq = NULL;
977 }
978 
979 static int nsim_init_netdevsim(struct netdevsim *ns)
980 {
981 	struct mock_phc *phc;
982 	int err;
983 
984 	phc = mock_phc_create(&ns->nsim_bus_dev->dev);
985 	if (IS_ERR(phc))
986 		return PTR_ERR(phc);
987 
988 	ns->phc = phc;
989 	ns->netdev->netdev_ops = &nsim_netdev_ops;
990 	ns->netdev->stat_ops = &nsim_stat_ops;
991 	ns->netdev->queue_mgmt_ops = &nsim_queue_mgmt_ops;
992 	netdev_lockdep_set_classes(ns->netdev);
993 
994 	err = nsim_udp_tunnels_info_create(ns->nsim_dev, ns->netdev);
995 	if (err)
996 		goto err_phc_destroy;
997 
998 	rtnl_lock();
999 	err = nsim_queue_init(ns);
1000 	if (err)
1001 		goto err_utn_destroy;
1002 
1003 	err = nsim_bpf_init(ns);
1004 	if (err)
1005 		goto err_rq_destroy;
1006 
1007 	nsim_macsec_init(ns);
1008 	nsim_ipsec_init(ns);
1009 
1010 	err = register_netdevice(ns->netdev);
1011 	if (err)
1012 		goto err_ipsec_teardown;
1013 	rtnl_unlock();
1014 
1015 	if (IS_ENABLED(CONFIG_DEBUG_NET)) {
1016 		ns->nb.notifier_call = netdev_debug_event;
1017 		if (register_netdevice_notifier_dev_net(ns->netdev, &ns->nb,
1018 							&ns->nn))
1019 			ns->nb.notifier_call = NULL;
1020 	}
1021 
1022 	return 0;
1023 
1024 err_ipsec_teardown:
1025 	nsim_ipsec_teardown(ns);
1026 	nsim_macsec_teardown(ns);
1027 	nsim_bpf_uninit(ns);
1028 err_rq_destroy:
1029 	nsim_queue_uninit(ns);
1030 err_utn_destroy:
1031 	rtnl_unlock();
1032 	nsim_udp_tunnels_info_destroy(ns->netdev);
1033 err_phc_destroy:
1034 	mock_phc_destroy(ns->phc);
1035 	return err;
1036 }
1037 
1038 static int nsim_init_netdevsim_vf(struct netdevsim *ns)
1039 {
1040 	int err;
1041 
1042 	ns->netdev->netdev_ops = &nsim_vf_netdev_ops;
1043 	rtnl_lock();
1044 	err = register_netdevice(ns->netdev);
1045 	rtnl_unlock();
1046 	return err;
1047 }
1048 
1049 static void nsim_exit_netdevsim(struct netdevsim *ns)
1050 {
1051 	nsim_udp_tunnels_info_destroy(ns->netdev);
1052 	mock_phc_destroy(ns->phc);
1053 }
1054 
1055 struct netdevsim *nsim_create(struct nsim_dev *nsim_dev,
1056 			      struct nsim_dev_port *nsim_dev_port,
1057 			      u8 perm_addr[ETH_ALEN])
1058 {
1059 	struct net_device *dev;
1060 	struct netdevsim *ns;
1061 	int err;
1062 
1063 	dev = alloc_netdev_mq(sizeof(*ns), "eth%d", NET_NAME_UNKNOWN, nsim_setup,
1064 			      nsim_dev->nsim_bus_dev->num_queues);
1065 	if (!dev)
1066 		return ERR_PTR(-ENOMEM);
1067 
1068 	if (perm_addr)
1069 		memcpy(dev->perm_addr, perm_addr, ETH_ALEN);
1070 
1071 	dev_net_set(dev, nsim_dev_net(nsim_dev));
1072 	ns = netdev_priv(dev);
1073 	ns->netdev = dev;
1074 	ns->nsim_dev = nsim_dev;
1075 	ns->nsim_dev_port = nsim_dev_port;
1076 	ns->nsim_bus_dev = nsim_dev->nsim_bus_dev;
1077 	SET_NETDEV_DEV(dev, &ns->nsim_bus_dev->dev);
1078 	SET_NETDEV_DEVLINK_PORT(dev, &nsim_dev_port->devlink_port);
1079 	nsim_ethtool_init(ns);
1080 	if (nsim_dev_port_is_pf(nsim_dev_port))
1081 		err = nsim_init_netdevsim(ns);
1082 	else
1083 		err = nsim_init_netdevsim_vf(ns);
1084 	if (err)
1085 		goto err_free_netdev;
1086 
1087 	ns->pp_dfs = debugfs_create_file("pp_hold", 0600, nsim_dev_port->ddir,
1088 					 ns, &nsim_pp_hold_fops);
1089 	ns->qr_dfs = debugfs_create_file("queue_reset", 0200,
1090 					 nsim_dev_port->ddir, ns,
1091 					 &nsim_qreset_fops);
1092 	return ns;
1093 
1094 err_free_netdev:
1095 	free_netdev(dev);
1096 	return ERR_PTR(err);
1097 }
1098 
1099 void nsim_destroy(struct netdevsim *ns)
1100 {
1101 	struct net_device *dev = ns->netdev;
1102 	struct netdevsim *peer;
1103 
1104 	debugfs_remove(ns->qr_dfs);
1105 	debugfs_remove(ns->pp_dfs);
1106 
1107 	if (ns->nb.notifier_call)
1108 		unregister_netdevice_notifier_dev_net(ns->netdev, &ns->nb,
1109 						      &ns->nn);
1110 
1111 	rtnl_lock();
1112 	peer = rtnl_dereference(ns->peer);
1113 	if (peer)
1114 		RCU_INIT_POINTER(peer->peer, NULL);
1115 	RCU_INIT_POINTER(ns->peer, NULL);
1116 	unregister_netdevice(dev);
1117 	if (nsim_dev_port_is_pf(ns->nsim_dev_port)) {
1118 		nsim_macsec_teardown(ns);
1119 		nsim_ipsec_teardown(ns);
1120 		nsim_bpf_uninit(ns);
1121 		nsim_queue_uninit(ns);
1122 	}
1123 	rtnl_unlock();
1124 	if (nsim_dev_port_is_pf(ns->nsim_dev_port))
1125 		nsim_exit_netdevsim(ns);
1126 
1127 	/* Put this intentionally late to exercise the orphaning path */
1128 	if (ns->page) {
1129 		page_pool_put_full_page(ns->page->pp, ns->page, false);
1130 		ns->page = NULL;
1131 	}
1132 
1133 	free_netdev(dev);
1134 }
1135 
1136 bool netdev_is_nsim(struct net_device *dev)
1137 {
1138 	return dev->netdev_ops == &nsim_netdev_ops;
1139 }
1140 
1141 static int nsim_validate(struct nlattr *tb[], struct nlattr *data[],
1142 			 struct netlink_ext_ack *extack)
1143 {
1144 	NL_SET_ERR_MSG_MOD(extack,
1145 			   "Please use: echo \"[ID] [PORT_COUNT] [NUM_QUEUES]\" > /sys/bus/netdevsim/new_device");
1146 	return -EOPNOTSUPP;
1147 }
1148 
1149 static struct rtnl_link_ops nsim_link_ops __read_mostly = {
1150 	.kind		= DRV_NAME,
1151 	.validate	= nsim_validate,
1152 };
1153 
1154 static int __init nsim_module_init(void)
1155 {
1156 	int err;
1157 
1158 	err = nsim_dev_init();
1159 	if (err)
1160 		return err;
1161 
1162 	err = nsim_bus_init();
1163 	if (err)
1164 		goto err_dev_exit;
1165 
1166 	err = rtnl_link_register(&nsim_link_ops);
1167 	if (err)
1168 		goto err_bus_exit;
1169 
1170 	return 0;
1171 
1172 err_bus_exit:
1173 	nsim_bus_exit();
1174 err_dev_exit:
1175 	nsim_dev_exit();
1176 	return err;
1177 }
1178 
1179 static void __exit nsim_module_exit(void)
1180 {
1181 	rtnl_link_unregister(&nsim_link_ops);
1182 	nsim_bus_exit();
1183 	nsim_dev_exit();
1184 }
1185 
1186 module_init(nsim_module_init);
1187 module_exit(nsim_module_exit);
1188 MODULE_LICENSE("GPL");
1189 MODULE_DESCRIPTION("Simulated networking device for testing");
1190 MODULE_ALIAS_RTNL_LINK(DRV_NAME);
1191