xref: /linux/drivers/net/netdevsim/netdev.c (revision 2151003e773c7e7dba4d64bed4bfc483681b5f6a)
1 /*
2  * Copyright (C) 2017 Netronome Systems, Inc.
3  *
4  * This software is licensed under the GNU General License Version 2,
5  * June 1991 as shown in the file COPYING in the top-level directory of this
6  * source tree.
7  *
8  * THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS"
9  * WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING,
10  * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
11  * FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE
12  * OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME
13  * THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
14  */
15 
16 #include <linux/debugfs.h>
17 #include <linux/etherdevice.h>
18 #include <linux/ethtool_netlink.h>
19 #include <linux/kernel.h>
20 #include <linux/module.h>
21 #include <linux/netdevice.h>
22 #include <linux/slab.h>
23 #include <net/netdev_queues.h>
24 #include <net/netdev_rx_queue.h>
25 #include <net/page_pool/helpers.h>
26 #include <net/netlink.h>
27 #include <net/net_shaper.h>
28 #include <net/pkt_cls.h>
29 #include <net/rtnetlink.h>
30 #include <net/udp_tunnel.h>
31 
32 #include "netdevsim.h"
33 
34 MODULE_IMPORT_NS("NETDEV_INTERNAL");
35 
36 #define NSIM_RING_SIZE		256
37 
38 static int nsim_napi_rx(struct nsim_rq *rq, struct sk_buff *skb)
39 {
40 	if (skb_queue_len(&rq->skb_queue) > NSIM_RING_SIZE) {
41 		dev_kfree_skb_any(skb);
42 		return NET_RX_DROP;
43 	}
44 
45 	skb_queue_tail(&rq->skb_queue, skb);
46 	return NET_RX_SUCCESS;
47 }
48 
49 static int nsim_forward_skb(struct net_device *dev, struct sk_buff *skb,
50 			    struct nsim_rq *rq)
51 {
52 	return __dev_forward_skb(dev, skb) ?: nsim_napi_rx(rq, skb);
53 }
54 
55 static netdev_tx_t nsim_start_xmit(struct sk_buff *skb, struct net_device *dev)
56 {
57 	struct netdevsim *ns = netdev_priv(dev);
58 	struct net_device *peer_dev;
59 	unsigned int len = skb->len;
60 	struct netdevsim *peer_ns;
61 	struct netdev_config *cfg;
62 	struct nsim_rq *rq;
63 	int rxq;
64 
65 	rcu_read_lock();
66 	if (!nsim_ipsec_tx(ns, skb))
67 		goto out_drop_free;
68 
69 	peer_ns = rcu_dereference(ns->peer);
70 	if (!peer_ns)
71 		goto out_drop_free;
72 
73 	peer_dev = peer_ns->netdev;
74 	rxq = skb_get_queue_mapping(skb);
75 	if (rxq >= peer_dev->num_rx_queues)
76 		rxq = rxq % peer_dev->num_rx_queues;
77 	rq = peer_ns->rq[rxq];
78 
79 	cfg = peer_dev->cfg;
80 	if (skb_is_nonlinear(skb) &&
81 	    (cfg->hds_config != ETHTOOL_TCP_DATA_SPLIT_ENABLED ||
82 	     (cfg->hds_config == ETHTOOL_TCP_DATA_SPLIT_ENABLED &&
83 	      cfg->hds_thresh > len)))
84 		skb_linearize(skb);
85 
86 	skb_tx_timestamp(skb);
87 	if (unlikely(nsim_forward_skb(peer_dev, skb, rq) == NET_RX_DROP))
88 		goto out_drop_cnt;
89 
90 	napi_schedule(&rq->napi);
91 
92 	rcu_read_unlock();
93 	u64_stats_update_begin(&ns->syncp);
94 	ns->tx_packets++;
95 	ns->tx_bytes += len;
96 	u64_stats_update_end(&ns->syncp);
97 	return NETDEV_TX_OK;
98 
99 out_drop_free:
100 	dev_kfree_skb(skb);
101 out_drop_cnt:
102 	rcu_read_unlock();
103 	u64_stats_update_begin(&ns->syncp);
104 	ns->tx_dropped++;
105 	u64_stats_update_end(&ns->syncp);
106 	return NETDEV_TX_OK;
107 }
108 
109 static void nsim_set_rx_mode(struct net_device *dev)
110 {
111 }
112 
113 static int nsim_change_mtu(struct net_device *dev, int new_mtu)
114 {
115 	struct netdevsim *ns = netdev_priv(dev);
116 
117 	if (ns->xdp.prog && new_mtu > NSIM_XDP_MAX_MTU)
118 		return -EBUSY;
119 
120 	WRITE_ONCE(dev->mtu, new_mtu);
121 
122 	return 0;
123 }
124 
125 static void
126 nsim_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats)
127 {
128 	struct netdevsim *ns = netdev_priv(dev);
129 	unsigned int start;
130 
131 	do {
132 		start = u64_stats_fetch_begin(&ns->syncp);
133 		stats->tx_bytes = ns->tx_bytes;
134 		stats->tx_packets = ns->tx_packets;
135 		stats->tx_dropped = ns->tx_dropped;
136 	} while (u64_stats_fetch_retry(&ns->syncp, start));
137 }
138 
139 static int
140 nsim_setup_tc_block_cb(enum tc_setup_type type, void *type_data, void *cb_priv)
141 {
142 	return nsim_bpf_setup_tc_block_cb(type, type_data, cb_priv);
143 }
144 
145 static int nsim_set_vf_mac(struct net_device *dev, int vf, u8 *mac)
146 {
147 	struct netdevsim *ns = netdev_priv(dev);
148 	struct nsim_dev *nsim_dev = ns->nsim_dev;
149 
150 	/* Only refuse multicast addresses, zero address can mean unset/any. */
151 	if (vf >= nsim_dev_get_vfs(nsim_dev) || is_multicast_ether_addr(mac))
152 		return -EINVAL;
153 	memcpy(nsim_dev->vfconfigs[vf].vf_mac, mac, ETH_ALEN);
154 
155 	return 0;
156 }
157 
158 static int nsim_set_vf_vlan(struct net_device *dev, int vf,
159 			    u16 vlan, u8 qos, __be16 vlan_proto)
160 {
161 	struct netdevsim *ns = netdev_priv(dev);
162 	struct nsim_dev *nsim_dev = ns->nsim_dev;
163 
164 	if (vf >= nsim_dev_get_vfs(nsim_dev) || vlan > 4095 || qos > 7)
165 		return -EINVAL;
166 
167 	nsim_dev->vfconfigs[vf].vlan = vlan;
168 	nsim_dev->vfconfigs[vf].qos = qos;
169 	nsim_dev->vfconfigs[vf].vlan_proto = vlan_proto;
170 
171 	return 0;
172 }
173 
174 static int nsim_set_vf_rate(struct net_device *dev, int vf, int min, int max)
175 {
176 	struct netdevsim *ns = netdev_priv(dev);
177 	struct nsim_dev *nsim_dev = ns->nsim_dev;
178 
179 	if (nsim_esw_mode_is_switchdev(ns->nsim_dev)) {
180 		pr_err("Not supported in switchdev mode. Please use devlink API.\n");
181 		return -EOPNOTSUPP;
182 	}
183 
184 	if (vf >= nsim_dev_get_vfs(nsim_dev))
185 		return -EINVAL;
186 
187 	nsim_dev->vfconfigs[vf].min_tx_rate = min;
188 	nsim_dev->vfconfigs[vf].max_tx_rate = max;
189 
190 	return 0;
191 }
192 
193 static int nsim_set_vf_spoofchk(struct net_device *dev, int vf, bool val)
194 {
195 	struct netdevsim *ns = netdev_priv(dev);
196 	struct nsim_dev *nsim_dev = ns->nsim_dev;
197 
198 	if (vf >= nsim_dev_get_vfs(nsim_dev))
199 		return -EINVAL;
200 	nsim_dev->vfconfigs[vf].spoofchk_enabled = val;
201 
202 	return 0;
203 }
204 
205 static int nsim_set_vf_rss_query_en(struct net_device *dev, int vf, bool val)
206 {
207 	struct netdevsim *ns = netdev_priv(dev);
208 	struct nsim_dev *nsim_dev = ns->nsim_dev;
209 
210 	if (vf >= nsim_dev_get_vfs(nsim_dev))
211 		return -EINVAL;
212 	nsim_dev->vfconfigs[vf].rss_query_enabled = val;
213 
214 	return 0;
215 }
216 
217 static int nsim_set_vf_trust(struct net_device *dev, int vf, bool val)
218 {
219 	struct netdevsim *ns = netdev_priv(dev);
220 	struct nsim_dev *nsim_dev = ns->nsim_dev;
221 
222 	if (vf >= nsim_dev_get_vfs(nsim_dev))
223 		return -EINVAL;
224 	nsim_dev->vfconfigs[vf].trusted = val;
225 
226 	return 0;
227 }
228 
229 static int
230 nsim_get_vf_config(struct net_device *dev, int vf, struct ifla_vf_info *ivi)
231 {
232 	struct netdevsim *ns = netdev_priv(dev);
233 	struct nsim_dev *nsim_dev = ns->nsim_dev;
234 
235 	if (vf >= nsim_dev_get_vfs(nsim_dev))
236 		return -EINVAL;
237 
238 	ivi->vf = vf;
239 	ivi->linkstate = nsim_dev->vfconfigs[vf].link_state;
240 	ivi->min_tx_rate = nsim_dev->vfconfigs[vf].min_tx_rate;
241 	ivi->max_tx_rate = nsim_dev->vfconfigs[vf].max_tx_rate;
242 	ivi->vlan = nsim_dev->vfconfigs[vf].vlan;
243 	ivi->vlan_proto = nsim_dev->vfconfigs[vf].vlan_proto;
244 	ivi->qos = nsim_dev->vfconfigs[vf].qos;
245 	memcpy(&ivi->mac, nsim_dev->vfconfigs[vf].vf_mac, ETH_ALEN);
246 	ivi->spoofchk = nsim_dev->vfconfigs[vf].spoofchk_enabled;
247 	ivi->trusted = nsim_dev->vfconfigs[vf].trusted;
248 	ivi->rss_query_en = nsim_dev->vfconfigs[vf].rss_query_enabled;
249 
250 	return 0;
251 }
252 
253 static int nsim_set_vf_link_state(struct net_device *dev, int vf, int state)
254 {
255 	struct netdevsim *ns = netdev_priv(dev);
256 	struct nsim_dev *nsim_dev = ns->nsim_dev;
257 
258 	if (vf >= nsim_dev_get_vfs(nsim_dev))
259 		return -EINVAL;
260 
261 	switch (state) {
262 	case IFLA_VF_LINK_STATE_AUTO:
263 	case IFLA_VF_LINK_STATE_ENABLE:
264 	case IFLA_VF_LINK_STATE_DISABLE:
265 		break;
266 	default:
267 		return -EINVAL;
268 	}
269 
270 	nsim_dev->vfconfigs[vf].link_state = state;
271 
272 	return 0;
273 }
274 
275 static void nsim_taprio_stats(struct tc_taprio_qopt_stats *stats)
276 {
277 	stats->window_drops = 0;
278 	stats->tx_overruns = 0;
279 }
280 
281 static int nsim_setup_tc_taprio(struct net_device *dev,
282 				struct tc_taprio_qopt_offload *offload)
283 {
284 	int err = 0;
285 
286 	switch (offload->cmd) {
287 	case TAPRIO_CMD_REPLACE:
288 	case TAPRIO_CMD_DESTROY:
289 		break;
290 	case TAPRIO_CMD_STATS:
291 		nsim_taprio_stats(&offload->stats);
292 		break;
293 	default:
294 		err = -EOPNOTSUPP;
295 	}
296 
297 	return err;
298 }
299 
300 static LIST_HEAD(nsim_block_cb_list);
301 
302 static int
303 nsim_setup_tc(struct net_device *dev, enum tc_setup_type type, void *type_data)
304 {
305 	struct netdevsim *ns = netdev_priv(dev);
306 
307 	switch (type) {
308 	case TC_SETUP_QDISC_TAPRIO:
309 		return nsim_setup_tc_taprio(dev, type_data);
310 	case TC_SETUP_BLOCK:
311 		return flow_block_cb_setup_simple(type_data,
312 						  &nsim_block_cb_list,
313 						  nsim_setup_tc_block_cb,
314 						  ns, ns, true);
315 	default:
316 		return -EOPNOTSUPP;
317 	}
318 }
319 
320 static int
321 nsim_set_features(struct net_device *dev, netdev_features_t features)
322 {
323 	struct netdevsim *ns = netdev_priv(dev);
324 
325 	if ((dev->features & NETIF_F_HW_TC) > (features & NETIF_F_HW_TC))
326 		return nsim_bpf_disable_tc(ns);
327 
328 	return 0;
329 }
330 
331 static int nsim_get_iflink(const struct net_device *dev)
332 {
333 	struct netdevsim *nsim, *peer;
334 	int iflink;
335 
336 	nsim = netdev_priv(dev);
337 
338 	rcu_read_lock();
339 	peer = rcu_dereference(nsim->peer);
340 	iflink = peer ? READ_ONCE(peer->netdev->ifindex) :
341 			READ_ONCE(dev->ifindex);
342 	rcu_read_unlock();
343 
344 	return iflink;
345 }
346 
347 static int nsim_rcv(struct nsim_rq *rq, int budget)
348 {
349 	struct sk_buff *skb;
350 	int i;
351 
352 	for (i = 0; i < budget; i++) {
353 		if (skb_queue_empty(&rq->skb_queue))
354 			break;
355 
356 		skb = skb_dequeue(&rq->skb_queue);
357 		netif_receive_skb(skb);
358 	}
359 
360 	return i;
361 }
362 
363 static int nsim_poll(struct napi_struct *napi, int budget)
364 {
365 	struct nsim_rq *rq = container_of(napi, struct nsim_rq, napi);
366 	int done;
367 
368 	done = nsim_rcv(rq, budget);
369 	napi_complete(napi);
370 
371 	return done;
372 }
373 
374 static int nsim_create_page_pool(struct page_pool **p, struct napi_struct *napi)
375 {
376 	struct page_pool_params params = {
377 		.order = 0,
378 		.pool_size = NSIM_RING_SIZE,
379 		.nid = NUMA_NO_NODE,
380 		.dev = &napi->dev->dev,
381 		.napi = napi,
382 		.dma_dir = DMA_BIDIRECTIONAL,
383 		.netdev = napi->dev,
384 	};
385 	struct page_pool *pool;
386 
387 	pool = page_pool_create(&params);
388 	if (IS_ERR(pool))
389 		return PTR_ERR(pool);
390 
391 	*p = pool;
392 	return 0;
393 }
394 
395 static int nsim_init_napi(struct netdevsim *ns)
396 {
397 	struct net_device *dev = ns->netdev;
398 	struct nsim_rq *rq;
399 	int err, i;
400 
401 	for (i = 0; i < dev->num_rx_queues; i++) {
402 		rq = ns->rq[i];
403 
404 		netif_napi_add_config(dev, &rq->napi, nsim_poll, i);
405 	}
406 
407 	for (i = 0; i < dev->num_rx_queues; i++) {
408 		rq = ns->rq[i];
409 
410 		err = nsim_create_page_pool(&rq->page_pool, &rq->napi);
411 		if (err)
412 			goto err_pp_destroy;
413 	}
414 
415 	return 0;
416 
417 err_pp_destroy:
418 	while (i--) {
419 		page_pool_destroy(ns->rq[i]->page_pool);
420 		ns->rq[i]->page_pool = NULL;
421 	}
422 
423 	for (i = 0; i < dev->num_rx_queues; i++)
424 		__netif_napi_del(&ns->rq[i]->napi);
425 
426 	return err;
427 }
428 
429 static void nsim_enable_napi(struct netdevsim *ns)
430 {
431 	struct net_device *dev = ns->netdev;
432 	int i;
433 
434 	for (i = 0; i < dev->num_rx_queues; i++) {
435 		struct nsim_rq *rq = ns->rq[i];
436 
437 		netif_queue_set_napi(dev, i, NETDEV_QUEUE_TYPE_RX, &rq->napi);
438 		napi_enable(&rq->napi);
439 	}
440 }
441 
442 static int nsim_open(struct net_device *dev)
443 {
444 	struct netdevsim *ns = netdev_priv(dev);
445 	int err;
446 
447 	err = nsim_init_napi(ns);
448 	if (err)
449 		return err;
450 
451 	nsim_enable_napi(ns);
452 
453 	return 0;
454 }
455 
456 static void nsim_del_napi(struct netdevsim *ns)
457 {
458 	struct net_device *dev = ns->netdev;
459 	int i;
460 
461 	for (i = 0; i < dev->num_rx_queues; i++) {
462 		struct nsim_rq *rq = ns->rq[i];
463 
464 		napi_disable(&rq->napi);
465 		__netif_napi_del(&rq->napi);
466 	}
467 	synchronize_net();
468 
469 	for (i = 0; i < dev->num_rx_queues; i++) {
470 		page_pool_destroy(ns->rq[i]->page_pool);
471 		ns->rq[i]->page_pool = NULL;
472 	}
473 }
474 
475 static int nsim_stop(struct net_device *dev)
476 {
477 	struct netdevsim *ns = netdev_priv(dev);
478 	struct netdevsim *peer;
479 
480 	netif_carrier_off(dev);
481 	peer = rtnl_dereference(ns->peer);
482 	if (peer)
483 		netif_carrier_off(peer->netdev);
484 
485 	nsim_del_napi(ns);
486 
487 	return 0;
488 }
489 
490 static int nsim_shaper_set(struct net_shaper_binding *binding,
491 			   const struct net_shaper *shaper,
492 			   struct netlink_ext_ack *extack)
493 {
494 	return 0;
495 }
496 
497 static int nsim_shaper_del(struct net_shaper_binding *binding,
498 			   const struct net_shaper_handle *handle,
499 			   struct netlink_ext_ack *extack)
500 {
501 	return 0;
502 }
503 
504 static int nsim_shaper_group(struct net_shaper_binding *binding,
505 			     int leaves_count,
506 			     const struct net_shaper *leaves,
507 			     const struct net_shaper *root,
508 			     struct netlink_ext_ack *extack)
509 {
510 	return 0;
511 }
512 
513 static void nsim_shaper_cap(struct net_shaper_binding *binding,
514 			    enum net_shaper_scope scope,
515 			    unsigned long *flags)
516 {
517 	*flags = ULONG_MAX;
518 }
519 
520 static const struct net_shaper_ops nsim_shaper_ops = {
521 	.set			= nsim_shaper_set,
522 	.delete			= nsim_shaper_del,
523 	.group			= nsim_shaper_group,
524 	.capabilities		= nsim_shaper_cap,
525 };
526 
527 static const struct net_device_ops nsim_netdev_ops = {
528 	.ndo_start_xmit		= nsim_start_xmit,
529 	.ndo_set_rx_mode	= nsim_set_rx_mode,
530 	.ndo_set_mac_address	= eth_mac_addr,
531 	.ndo_validate_addr	= eth_validate_addr,
532 	.ndo_change_mtu		= nsim_change_mtu,
533 	.ndo_get_stats64	= nsim_get_stats64,
534 	.ndo_set_vf_mac		= nsim_set_vf_mac,
535 	.ndo_set_vf_vlan	= nsim_set_vf_vlan,
536 	.ndo_set_vf_rate	= nsim_set_vf_rate,
537 	.ndo_set_vf_spoofchk	= nsim_set_vf_spoofchk,
538 	.ndo_set_vf_trust	= nsim_set_vf_trust,
539 	.ndo_get_vf_config	= nsim_get_vf_config,
540 	.ndo_set_vf_link_state	= nsim_set_vf_link_state,
541 	.ndo_set_vf_rss_query_en = nsim_set_vf_rss_query_en,
542 	.ndo_setup_tc		= nsim_setup_tc,
543 	.ndo_set_features	= nsim_set_features,
544 	.ndo_get_iflink		= nsim_get_iflink,
545 	.ndo_bpf		= nsim_bpf,
546 	.ndo_open		= nsim_open,
547 	.ndo_stop		= nsim_stop,
548 	.net_shaper_ops		= &nsim_shaper_ops,
549 };
550 
551 static const struct net_device_ops nsim_vf_netdev_ops = {
552 	.ndo_start_xmit		= nsim_start_xmit,
553 	.ndo_set_rx_mode	= nsim_set_rx_mode,
554 	.ndo_set_mac_address	= eth_mac_addr,
555 	.ndo_validate_addr	= eth_validate_addr,
556 	.ndo_change_mtu		= nsim_change_mtu,
557 	.ndo_get_stats64	= nsim_get_stats64,
558 	.ndo_setup_tc		= nsim_setup_tc,
559 	.ndo_set_features	= nsim_set_features,
560 };
561 
562 /* We don't have true per-queue stats, yet, so do some random fakery here.
563  * Only report stuff for queue 0.
564  */
565 static void nsim_get_queue_stats_rx(struct net_device *dev, int idx,
566 				    struct netdev_queue_stats_rx *stats)
567 {
568 	struct rtnl_link_stats64 rtstats = {};
569 
570 	if (!idx)
571 		nsim_get_stats64(dev, &rtstats);
572 
573 	stats->packets = rtstats.rx_packets - !!rtstats.rx_packets;
574 	stats->bytes = rtstats.rx_bytes;
575 }
576 
577 static void nsim_get_queue_stats_tx(struct net_device *dev, int idx,
578 				    struct netdev_queue_stats_tx *stats)
579 {
580 	struct rtnl_link_stats64 rtstats = {};
581 
582 	if (!idx)
583 		nsim_get_stats64(dev, &rtstats);
584 
585 	stats->packets = rtstats.tx_packets - !!rtstats.tx_packets;
586 	stats->bytes = rtstats.tx_bytes;
587 }
588 
589 static void nsim_get_base_stats(struct net_device *dev,
590 				struct netdev_queue_stats_rx *rx,
591 				struct netdev_queue_stats_tx *tx)
592 {
593 	struct rtnl_link_stats64 rtstats = {};
594 
595 	nsim_get_stats64(dev, &rtstats);
596 
597 	rx->packets = !!rtstats.rx_packets;
598 	rx->bytes = 0;
599 	tx->packets = !!rtstats.tx_packets;
600 	tx->bytes = 0;
601 }
602 
603 static const struct netdev_stat_ops nsim_stat_ops = {
604 	.get_queue_stats_tx	= nsim_get_queue_stats_tx,
605 	.get_queue_stats_rx	= nsim_get_queue_stats_rx,
606 	.get_base_stats		= nsim_get_base_stats,
607 };
608 
609 static struct nsim_rq *nsim_queue_alloc(void)
610 {
611 	struct nsim_rq *rq;
612 
613 	rq = kzalloc(sizeof(*rq), GFP_KERNEL_ACCOUNT);
614 	if (!rq)
615 		return NULL;
616 
617 	skb_queue_head_init(&rq->skb_queue);
618 	return rq;
619 }
620 
621 static void nsim_queue_free(struct nsim_rq *rq)
622 {
623 	skb_queue_purge_reason(&rq->skb_queue, SKB_DROP_REASON_QUEUE_PURGE);
624 	kfree(rq);
625 }
626 
627 /* Queue reset mode is controlled by ns->rq_reset_mode.
628  * - normal - new NAPI new pool (old NAPI enabled when new added)
629  * - mode 1 - allocate new pool (NAPI is only disabled / enabled)
630  * - mode 2 - new NAPI new pool (old NAPI removed before new added)
631  * - mode 3 - new NAPI new pool (old NAPI disabled when new added)
632  */
633 struct nsim_queue_mem {
634 	struct nsim_rq *rq;
635 	struct page_pool *pp;
636 };
637 
638 static int
639 nsim_queue_mem_alloc(struct net_device *dev, void *per_queue_mem, int idx)
640 {
641 	struct nsim_queue_mem *qmem = per_queue_mem;
642 	struct netdevsim *ns = netdev_priv(dev);
643 	int err;
644 
645 	if (ns->rq_reset_mode > 3)
646 		return -EINVAL;
647 
648 	if (ns->rq_reset_mode == 1) {
649 		if (!netif_running(ns->netdev))
650 			return -ENETDOWN;
651 		return nsim_create_page_pool(&qmem->pp, &ns->rq[idx]->napi);
652 	}
653 
654 	qmem->rq = nsim_queue_alloc();
655 	if (!qmem->rq)
656 		return -ENOMEM;
657 
658 	err = nsim_create_page_pool(&qmem->rq->page_pool, &qmem->rq->napi);
659 	if (err)
660 		goto err_free;
661 
662 	if (!ns->rq_reset_mode)
663 		netif_napi_add_config(dev, &qmem->rq->napi, nsim_poll, idx);
664 
665 	return 0;
666 
667 err_free:
668 	nsim_queue_free(qmem->rq);
669 	return err;
670 }
671 
672 static void nsim_queue_mem_free(struct net_device *dev, void *per_queue_mem)
673 {
674 	struct nsim_queue_mem *qmem = per_queue_mem;
675 	struct netdevsim *ns = netdev_priv(dev);
676 
677 	page_pool_destroy(qmem->pp);
678 	if (qmem->rq) {
679 		if (!ns->rq_reset_mode)
680 			netif_napi_del(&qmem->rq->napi);
681 		page_pool_destroy(qmem->rq->page_pool);
682 		nsim_queue_free(qmem->rq);
683 	}
684 }
685 
686 static int
687 nsim_queue_start(struct net_device *dev, void *per_queue_mem, int idx)
688 {
689 	struct nsim_queue_mem *qmem = per_queue_mem;
690 	struct netdevsim *ns = netdev_priv(dev);
691 
692 	if (ns->rq_reset_mode == 1) {
693 		ns->rq[idx]->page_pool = qmem->pp;
694 		napi_enable(&ns->rq[idx]->napi);
695 		return 0;
696 	}
697 
698 	/* netif_napi_add()/_del() should normally be called from alloc/free,
699 	 * here we want to test various call orders.
700 	 */
701 	if (ns->rq_reset_mode == 2) {
702 		netif_napi_del(&ns->rq[idx]->napi);
703 		netif_napi_add_config(dev, &qmem->rq->napi, nsim_poll, idx);
704 	} else if (ns->rq_reset_mode == 3) {
705 		netif_napi_add_config(dev, &qmem->rq->napi, nsim_poll, idx);
706 		netif_napi_del(&ns->rq[idx]->napi);
707 	}
708 
709 	ns->rq[idx] = qmem->rq;
710 	napi_enable(&ns->rq[idx]->napi);
711 
712 	return 0;
713 }
714 
715 static int nsim_queue_stop(struct net_device *dev, void *per_queue_mem, int idx)
716 {
717 	struct nsim_queue_mem *qmem = per_queue_mem;
718 	struct netdevsim *ns = netdev_priv(dev);
719 
720 	napi_disable(&ns->rq[idx]->napi);
721 
722 	if (ns->rq_reset_mode == 1) {
723 		qmem->pp = ns->rq[idx]->page_pool;
724 		page_pool_disable_direct_recycling(qmem->pp);
725 	} else {
726 		qmem->rq = ns->rq[idx];
727 	}
728 
729 	return 0;
730 }
731 
732 static const struct netdev_queue_mgmt_ops nsim_queue_mgmt_ops = {
733 	.ndo_queue_mem_size	= sizeof(struct nsim_queue_mem),
734 	.ndo_queue_mem_alloc	= nsim_queue_mem_alloc,
735 	.ndo_queue_mem_free	= nsim_queue_mem_free,
736 	.ndo_queue_start	= nsim_queue_start,
737 	.ndo_queue_stop		= nsim_queue_stop,
738 };
739 
740 static ssize_t
741 nsim_qreset_write(struct file *file, const char __user *data,
742 		  size_t count, loff_t *ppos)
743 {
744 	struct netdevsim *ns = file->private_data;
745 	unsigned int queue, mode;
746 	char buf[32];
747 	ssize_t ret;
748 
749 	if (count >= sizeof(buf))
750 		return -EINVAL;
751 	if (copy_from_user(buf, data, count))
752 		return -EFAULT;
753 	buf[count] = '\0';
754 
755 	ret = sscanf(buf, "%u %u", &queue, &mode);
756 	if (ret != 2)
757 		return -EINVAL;
758 
759 	rtnl_lock();
760 	if (queue >= ns->netdev->real_num_rx_queues) {
761 		ret = -EINVAL;
762 		goto exit_unlock;
763 	}
764 
765 	ns->rq_reset_mode = mode;
766 	ret = netdev_rx_queue_restart(ns->netdev, queue);
767 	ns->rq_reset_mode = 0;
768 	if (ret)
769 		goto exit_unlock;
770 
771 	ret = count;
772 exit_unlock:
773 	rtnl_unlock();
774 	return ret;
775 }
776 
777 static const struct file_operations nsim_qreset_fops = {
778 	.open = simple_open,
779 	.write = nsim_qreset_write,
780 	.owner = THIS_MODULE,
781 };
782 
783 static ssize_t
784 nsim_pp_hold_read(struct file *file, char __user *data,
785 		  size_t count, loff_t *ppos)
786 {
787 	struct netdevsim *ns = file->private_data;
788 	char buf[3] = "n\n";
789 
790 	if (ns->page)
791 		buf[0] = 'y';
792 
793 	return simple_read_from_buffer(data, count, ppos, buf, 2);
794 }
795 
796 static ssize_t
797 nsim_pp_hold_write(struct file *file, const char __user *data,
798 		   size_t count, loff_t *ppos)
799 {
800 	struct netdevsim *ns = file->private_data;
801 	ssize_t ret;
802 	bool val;
803 
804 	ret = kstrtobool_from_user(data, count, &val);
805 	if (ret)
806 		return ret;
807 
808 	rtnl_lock();
809 	ret = count;
810 	if (val == !!ns->page)
811 		goto exit;
812 
813 	if (!netif_running(ns->netdev) && val) {
814 		ret = -ENETDOWN;
815 	} else if (val) {
816 		ns->page = page_pool_dev_alloc_pages(ns->rq[0]->page_pool);
817 		if (!ns->page)
818 			ret = -ENOMEM;
819 	} else {
820 		page_pool_put_full_page(ns->page->pp, ns->page, false);
821 		ns->page = NULL;
822 	}
823 
824 exit:
825 	rtnl_unlock();
826 	return ret;
827 }
828 
829 static const struct file_operations nsim_pp_hold_fops = {
830 	.open = simple_open,
831 	.read = nsim_pp_hold_read,
832 	.write = nsim_pp_hold_write,
833 	.llseek = generic_file_llseek,
834 	.owner = THIS_MODULE,
835 };
836 
837 static void nsim_setup(struct net_device *dev)
838 {
839 	ether_setup(dev);
840 	eth_hw_addr_random(dev);
841 
842 	dev->tx_queue_len = 0;
843 	dev->flags &= ~IFF_MULTICAST;
844 	dev->priv_flags |= IFF_LIVE_ADDR_CHANGE |
845 			   IFF_NO_QUEUE;
846 	dev->features |= NETIF_F_HIGHDMA |
847 			 NETIF_F_SG |
848 			 NETIF_F_FRAGLIST |
849 			 NETIF_F_HW_CSUM |
850 			 NETIF_F_TSO;
851 	dev->hw_features |= NETIF_F_HW_TC |
852 			    NETIF_F_SG |
853 			    NETIF_F_FRAGLIST |
854 			    NETIF_F_HW_CSUM |
855 			    NETIF_F_TSO;
856 	dev->max_mtu = ETH_MAX_MTU;
857 	dev->xdp_features = NETDEV_XDP_ACT_HW_OFFLOAD;
858 }
859 
860 static int nsim_queue_init(struct netdevsim *ns)
861 {
862 	struct net_device *dev = ns->netdev;
863 	int i;
864 
865 	ns->rq = kcalloc(dev->num_rx_queues, sizeof(*ns->rq),
866 			 GFP_KERNEL_ACCOUNT);
867 	if (!ns->rq)
868 		return -ENOMEM;
869 
870 	for (i = 0; i < dev->num_rx_queues; i++) {
871 		ns->rq[i] = nsim_queue_alloc();
872 		if (!ns->rq[i])
873 			goto err_free_prev;
874 	}
875 
876 	return 0;
877 
878 err_free_prev:
879 	while (i--)
880 		kfree(ns->rq[i]);
881 	kfree(ns->rq);
882 	return -ENOMEM;
883 }
884 
885 static void nsim_queue_uninit(struct netdevsim *ns)
886 {
887 	struct net_device *dev = ns->netdev;
888 	int i;
889 
890 	for (i = 0; i < dev->num_rx_queues; i++)
891 		nsim_queue_free(ns->rq[i]);
892 
893 	kfree(ns->rq);
894 	ns->rq = NULL;
895 }
896 
897 static int nsim_init_netdevsim(struct netdevsim *ns)
898 {
899 	struct mock_phc *phc;
900 	int err;
901 
902 	phc = mock_phc_create(&ns->nsim_bus_dev->dev);
903 	if (IS_ERR(phc))
904 		return PTR_ERR(phc);
905 
906 	ns->phc = phc;
907 	ns->netdev->netdev_ops = &nsim_netdev_ops;
908 	ns->netdev->stat_ops = &nsim_stat_ops;
909 	ns->netdev->queue_mgmt_ops = &nsim_queue_mgmt_ops;
910 
911 	err = nsim_udp_tunnels_info_create(ns->nsim_dev, ns->netdev);
912 	if (err)
913 		goto err_phc_destroy;
914 
915 	rtnl_lock();
916 	err = nsim_queue_init(ns);
917 	if (err)
918 		goto err_utn_destroy;
919 
920 	err = nsim_bpf_init(ns);
921 	if (err)
922 		goto err_rq_destroy;
923 
924 	nsim_macsec_init(ns);
925 	nsim_ipsec_init(ns);
926 
927 	err = register_netdevice(ns->netdev);
928 	if (err)
929 		goto err_ipsec_teardown;
930 	rtnl_unlock();
931 	return 0;
932 
933 err_ipsec_teardown:
934 	nsim_ipsec_teardown(ns);
935 	nsim_macsec_teardown(ns);
936 	nsim_bpf_uninit(ns);
937 err_rq_destroy:
938 	nsim_queue_uninit(ns);
939 err_utn_destroy:
940 	rtnl_unlock();
941 	nsim_udp_tunnels_info_destroy(ns->netdev);
942 err_phc_destroy:
943 	mock_phc_destroy(ns->phc);
944 	return err;
945 }
946 
947 static int nsim_init_netdevsim_vf(struct netdevsim *ns)
948 {
949 	int err;
950 
951 	ns->netdev->netdev_ops = &nsim_vf_netdev_ops;
952 	rtnl_lock();
953 	err = register_netdevice(ns->netdev);
954 	rtnl_unlock();
955 	return err;
956 }
957 
958 static void nsim_exit_netdevsim(struct netdevsim *ns)
959 {
960 	nsim_udp_tunnels_info_destroy(ns->netdev);
961 	mock_phc_destroy(ns->phc);
962 }
963 
964 struct netdevsim *
965 nsim_create(struct nsim_dev *nsim_dev, struct nsim_dev_port *nsim_dev_port)
966 {
967 	struct net_device *dev;
968 	struct netdevsim *ns;
969 	int err;
970 
971 	dev = alloc_netdev_mq(sizeof(*ns), "eth%d", NET_NAME_UNKNOWN, nsim_setup,
972 			      nsim_dev->nsim_bus_dev->num_queues);
973 	if (!dev)
974 		return ERR_PTR(-ENOMEM);
975 
976 	dev_net_set(dev, nsim_dev_net(nsim_dev));
977 	ns = netdev_priv(dev);
978 	ns->netdev = dev;
979 	u64_stats_init(&ns->syncp);
980 	ns->nsim_dev = nsim_dev;
981 	ns->nsim_dev_port = nsim_dev_port;
982 	ns->nsim_bus_dev = nsim_dev->nsim_bus_dev;
983 	SET_NETDEV_DEV(dev, &ns->nsim_bus_dev->dev);
984 	SET_NETDEV_DEVLINK_PORT(dev, &nsim_dev_port->devlink_port);
985 	nsim_ethtool_init(ns);
986 	if (nsim_dev_port_is_pf(nsim_dev_port))
987 		err = nsim_init_netdevsim(ns);
988 	else
989 		err = nsim_init_netdevsim_vf(ns);
990 	if (err)
991 		goto err_free_netdev;
992 
993 	ns->pp_dfs = debugfs_create_file("pp_hold", 0600, nsim_dev_port->ddir,
994 					 ns, &nsim_pp_hold_fops);
995 	ns->qr_dfs = debugfs_create_file("queue_reset", 0200,
996 					 nsim_dev_port->ddir, ns,
997 					 &nsim_qreset_fops);
998 
999 	return ns;
1000 
1001 err_free_netdev:
1002 	free_netdev(dev);
1003 	return ERR_PTR(err);
1004 }
1005 
1006 void nsim_destroy(struct netdevsim *ns)
1007 {
1008 	struct net_device *dev = ns->netdev;
1009 	struct netdevsim *peer;
1010 
1011 	debugfs_remove(ns->qr_dfs);
1012 	debugfs_remove(ns->pp_dfs);
1013 
1014 	rtnl_lock();
1015 	peer = rtnl_dereference(ns->peer);
1016 	if (peer)
1017 		RCU_INIT_POINTER(peer->peer, NULL);
1018 	RCU_INIT_POINTER(ns->peer, NULL);
1019 	unregister_netdevice(dev);
1020 	if (nsim_dev_port_is_pf(ns->nsim_dev_port)) {
1021 		nsim_macsec_teardown(ns);
1022 		nsim_ipsec_teardown(ns);
1023 		nsim_bpf_uninit(ns);
1024 		nsim_queue_uninit(ns);
1025 	}
1026 	rtnl_unlock();
1027 	if (nsim_dev_port_is_pf(ns->nsim_dev_port))
1028 		nsim_exit_netdevsim(ns);
1029 
1030 	/* Put this intentionally late to exercise the orphaning path */
1031 	if (ns->page) {
1032 		page_pool_put_full_page(ns->page->pp, ns->page, false);
1033 		ns->page = NULL;
1034 	}
1035 
1036 	free_netdev(dev);
1037 }
1038 
1039 bool netdev_is_nsim(struct net_device *dev)
1040 {
1041 	return dev->netdev_ops == &nsim_netdev_ops;
1042 }
1043 
1044 static int nsim_validate(struct nlattr *tb[], struct nlattr *data[],
1045 			 struct netlink_ext_ack *extack)
1046 {
1047 	NL_SET_ERR_MSG_MOD(extack,
1048 			   "Please use: echo \"[ID] [PORT_COUNT] [NUM_QUEUES]\" > /sys/bus/netdevsim/new_device");
1049 	return -EOPNOTSUPP;
1050 }
1051 
1052 static struct rtnl_link_ops nsim_link_ops __read_mostly = {
1053 	.kind		= DRV_NAME,
1054 	.validate	= nsim_validate,
1055 };
1056 
1057 static int __init nsim_module_init(void)
1058 {
1059 	int err;
1060 
1061 	err = nsim_dev_init();
1062 	if (err)
1063 		return err;
1064 
1065 	err = nsim_bus_init();
1066 	if (err)
1067 		goto err_dev_exit;
1068 
1069 	err = rtnl_link_register(&nsim_link_ops);
1070 	if (err)
1071 		goto err_bus_exit;
1072 
1073 	return 0;
1074 
1075 err_bus_exit:
1076 	nsim_bus_exit();
1077 err_dev_exit:
1078 	nsim_dev_exit();
1079 	return err;
1080 }
1081 
1082 static void __exit nsim_module_exit(void)
1083 {
1084 	rtnl_link_unregister(&nsim_link_ops);
1085 	nsim_bus_exit();
1086 	nsim_dev_exit();
1087 }
1088 
1089 module_init(nsim_module_init);
1090 module_exit(nsim_module_exit);
1091 MODULE_LICENSE("GPL");
1092 MODULE_DESCRIPTION("Simulated networking device for testing");
1093 MODULE_ALIAS_RTNL_LINK(DRV_NAME);
1094