xref: /linux/drivers/net/netdevsim/netdev.c (revision 25489a4f556414445d342951615178368ee45cde)
1 /*
2  * Copyright (C) 2017 Netronome Systems, Inc.
3  *
4  * This software is licensed under the GNU General License Version 2,
5  * June 1991 as shown in the file COPYING in the top-level directory of this
6  * source tree.
7  *
8  * THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS"
9  * WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING,
10  * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
11  * FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE
12  * OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME
13  * THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
14  */
15 
16 #include <linux/debugfs.h>
17 #include <linux/etherdevice.h>
18 #include <linux/ethtool_netlink.h>
19 #include <linux/kernel.h>
20 #include <linux/module.h>
21 #include <linux/netdevice.h>
22 #include <linux/slab.h>
23 #include <net/netdev_queues.h>
24 #include <net/netdev_rx_queue.h>
25 #include <net/page_pool/helpers.h>
26 #include <net/netlink.h>
27 #include <net/net_shaper.h>
28 #include <net/netdev_lock.h>
29 #include <net/pkt_cls.h>
30 #include <net/rtnetlink.h>
31 #include <net/udp_tunnel.h>
32 #include <net/busy_poll.h>
33 
34 #include "netdevsim.h"
35 
36 MODULE_IMPORT_NS("NETDEV_INTERNAL");
37 
38 #define NSIM_RING_SIZE		256
39 
40 static int nsim_napi_rx(struct nsim_rq *rq, struct sk_buff *skb)
41 {
42 	if (skb_queue_len(&rq->skb_queue) > NSIM_RING_SIZE) {
43 		dev_kfree_skb_any(skb);
44 		return NET_RX_DROP;
45 	}
46 
47 	skb_queue_tail(&rq->skb_queue, skb);
48 	return NET_RX_SUCCESS;
49 }
50 
51 static int nsim_forward_skb(struct net_device *dev, struct sk_buff *skb,
52 			    struct nsim_rq *rq)
53 {
54 	return __dev_forward_skb(dev, skb) ?: nsim_napi_rx(rq, skb);
55 }
56 
57 static netdev_tx_t nsim_start_xmit(struct sk_buff *skb, struct net_device *dev)
58 {
59 	struct netdevsim *ns = netdev_priv(dev);
60 	struct net_device *peer_dev;
61 	unsigned int len = skb->len;
62 	struct netdevsim *peer_ns;
63 	struct netdev_config *cfg;
64 	struct nsim_rq *rq;
65 	int rxq;
66 
67 	rcu_read_lock();
68 	if (!nsim_ipsec_tx(ns, skb))
69 		goto out_drop_free;
70 
71 	peer_ns = rcu_dereference(ns->peer);
72 	if (!peer_ns)
73 		goto out_drop_free;
74 
75 	peer_dev = peer_ns->netdev;
76 	rxq = skb_get_queue_mapping(skb);
77 	if (rxq >= peer_dev->num_rx_queues)
78 		rxq = rxq % peer_dev->num_rx_queues;
79 	rq = peer_ns->rq[rxq];
80 
81 	cfg = peer_dev->cfg;
82 	if (skb_is_nonlinear(skb) &&
83 	    (cfg->hds_config != ETHTOOL_TCP_DATA_SPLIT_ENABLED ||
84 	     (cfg->hds_config == ETHTOOL_TCP_DATA_SPLIT_ENABLED &&
85 	      cfg->hds_thresh > len)))
86 		skb_linearize(skb);
87 
88 	skb_tx_timestamp(skb);
89 	if (unlikely(nsim_forward_skb(peer_dev, skb, rq) == NET_RX_DROP))
90 		goto out_drop_cnt;
91 
92 	if (!hrtimer_active(&rq->napi_timer))
93 		hrtimer_start(&rq->napi_timer, us_to_ktime(5), HRTIMER_MODE_REL);
94 
95 	rcu_read_unlock();
96 	dev_dstats_tx_add(dev, len);
97 	return NETDEV_TX_OK;
98 
99 out_drop_free:
100 	dev_kfree_skb(skb);
101 out_drop_cnt:
102 	rcu_read_unlock();
103 	dev_dstats_tx_dropped(dev);
104 	return NETDEV_TX_OK;
105 }
106 
107 static void nsim_set_rx_mode(struct net_device *dev)
108 {
109 }
110 
111 static int nsim_change_mtu(struct net_device *dev, int new_mtu)
112 {
113 	struct netdevsim *ns = netdev_priv(dev);
114 
115 	if (ns->xdp.prog && !ns->xdp.prog->aux->xdp_has_frags &&
116 	    new_mtu > NSIM_XDP_MAX_MTU)
117 		return -EBUSY;
118 
119 	WRITE_ONCE(dev->mtu, new_mtu);
120 
121 	return 0;
122 }
123 
124 static int
125 nsim_setup_tc_block_cb(enum tc_setup_type type, void *type_data, void *cb_priv)
126 {
127 	return nsim_bpf_setup_tc_block_cb(type, type_data, cb_priv);
128 }
129 
130 static int nsim_set_vf_mac(struct net_device *dev, int vf, u8 *mac)
131 {
132 	struct netdevsim *ns = netdev_priv(dev);
133 	struct nsim_dev *nsim_dev = ns->nsim_dev;
134 
135 	/* Only refuse multicast addresses, zero address can mean unset/any. */
136 	if (vf >= nsim_dev_get_vfs(nsim_dev) || is_multicast_ether_addr(mac))
137 		return -EINVAL;
138 	memcpy(nsim_dev->vfconfigs[vf].vf_mac, mac, ETH_ALEN);
139 
140 	return 0;
141 }
142 
143 static int nsim_set_vf_vlan(struct net_device *dev, int vf,
144 			    u16 vlan, u8 qos, __be16 vlan_proto)
145 {
146 	struct netdevsim *ns = netdev_priv(dev);
147 	struct nsim_dev *nsim_dev = ns->nsim_dev;
148 
149 	if (vf >= nsim_dev_get_vfs(nsim_dev) || vlan > 4095 || qos > 7)
150 		return -EINVAL;
151 
152 	nsim_dev->vfconfigs[vf].vlan = vlan;
153 	nsim_dev->vfconfigs[vf].qos = qos;
154 	nsim_dev->vfconfigs[vf].vlan_proto = vlan_proto;
155 
156 	return 0;
157 }
158 
159 static int nsim_set_vf_rate(struct net_device *dev, int vf, int min, int max)
160 {
161 	struct netdevsim *ns = netdev_priv(dev);
162 	struct nsim_dev *nsim_dev = ns->nsim_dev;
163 
164 	if (nsim_esw_mode_is_switchdev(ns->nsim_dev)) {
165 		pr_err("Not supported in switchdev mode. Please use devlink API.\n");
166 		return -EOPNOTSUPP;
167 	}
168 
169 	if (vf >= nsim_dev_get_vfs(nsim_dev))
170 		return -EINVAL;
171 
172 	nsim_dev->vfconfigs[vf].min_tx_rate = min;
173 	nsim_dev->vfconfigs[vf].max_tx_rate = max;
174 
175 	return 0;
176 }
177 
178 static int nsim_set_vf_spoofchk(struct net_device *dev, int vf, bool val)
179 {
180 	struct netdevsim *ns = netdev_priv(dev);
181 	struct nsim_dev *nsim_dev = ns->nsim_dev;
182 
183 	if (vf >= nsim_dev_get_vfs(nsim_dev))
184 		return -EINVAL;
185 	nsim_dev->vfconfigs[vf].spoofchk_enabled = val;
186 
187 	return 0;
188 }
189 
190 static int nsim_set_vf_rss_query_en(struct net_device *dev, int vf, bool val)
191 {
192 	struct netdevsim *ns = netdev_priv(dev);
193 	struct nsim_dev *nsim_dev = ns->nsim_dev;
194 
195 	if (vf >= nsim_dev_get_vfs(nsim_dev))
196 		return -EINVAL;
197 	nsim_dev->vfconfigs[vf].rss_query_enabled = val;
198 
199 	return 0;
200 }
201 
202 static int nsim_set_vf_trust(struct net_device *dev, int vf, bool val)
203 {
204 	struct netdevsim *ns = netdev_priv(dev);
205 	struct nsim_dev *nsim_dev = ns->nsim_dev;
206 
207 	if (vf >= nsim_dev_get_vfs(nsim_dev))
208 		return -EINVAL;
209 	nsim_dev->vfconfigs[vf].trusted = val;
210 
211 	return 0;
212 }
213 
214 static int
215 nsim_get_vf_config(struct net_device *dev, int vf, struct ifla_vf_info *ivi)
216 {
217 	struct netdevsim *ns = netdev_priv(dev);
218 	struct nsim_dev *nsim_dev = ns->nsim_dev;
219 
220 	if (vf >= nsim_dev_get_vfs(nsim_dev))
221 		return -EINVAL;
222 
223 	ivi->vf = vf;
224 	ivi->linkstate = nsim_dev->vfconfigs[vf].link_state;
225 	ivi->min_tx_rate = nsim_dev->vfconfigs[vf].min_tx_rate;
226 	ivi->max_tx_rate = nsim_dev->vfconfigs[vf].max_tx_rate;
227 	ivi->vlan = nsim_dev->vfconfigs[vf].vlan;
228 	ivi->vlan_proto = nsim_dev->vfconfigs[vf].vlan_proto;
229 	ivi->qos = nsim_dev->vfconfigs[vf].qos;
230 	memcpy(&ivi->mac, nsim_dev->vfconfigs[vf].vf_mac, ETH_ALEN);
231 	ivi->spoofchk = nsim_dev->vfconfigs[vf].spoofchk_enabled;
232 	ivi->trusted = nsim_dev->vfconfigs[vf].trusted;
233 	ivi->rss_query_en = nsim_dev->vfconfigs[vf].rss_query_enabled;
234 
235 	return 0;
236 }
237 
238 static int nsim_set_vf_link_state(struct net_device *dev, int vf, int state)
239 {
240 	struct netdevsim *ns = netdev_priv(dev);
241 	struct nsim_dev *nsim_dev = ns->nsim_dev;
242 
243 	if (vf >= nsim_dev_get_vfs(nsim_dev))
244 		return -EINVAL;
245 
246 	switch (state) {
247 	case IFLA_VF_LINK_STATE_AUTO:
248 	case IFLA_VF_LINK_STATE_ENABLE:
249 	case IFLA_VF_LINK_STATE_DISABLE:
250 		break;
251 	default:
252 		return -EINVAL;
253 	}
254 
255 	nsim_dev->vfconfigs[vf].link_state = state;
256 
257 	return 0;
258 }
259 
260 static void nsim_taprio_stats(struct tc_taprio_qopt_stats *stats)
261 {
262 	stats->window_drops = 0;
263 	stats->tx_overruns = 0;
264 }
265 
266 static int nsim_setup_tc_taprio(struct net_device *dev,
267 				struct tc_taprio_qopt_offload *offload)
268 {
269 	int err = 0;
270 
271 	switch (offload->cmd) {
272 	case TAPRIO_CMD_REPLACE:
273 	case TAPRIO_CMD_DESTROY:
274 		break;
275 	case TAPRIO_CMD_STATS:
276 		nsim_taprio_stats(&offload->stats);
277 		break;
278 	default:
279 		err = -EOPNOTSUPP;
280 	}
281 
282 	return err;
283 }
284 
285 static LIST_HEAD(nsim_block_cb_list);
286 
287 static int
288 nsim_setup_tc(struct net_device *dev, enum tc_setup_type type, void *type_data)
289 {
290 	struct netdevsim *ns = netdev_priv(dev);
291 
292 	switch (type) {
293 	case TC_SETUP_QDISC_TAPRIO:
294 		return nsim_setup_tc_taprio(dev, type_data);
295 	case TC_SETUP_BLOCK:
296 		return flow_block_cb_setup_simple(type_data,
297 						  &nsim_block_cb_list,
298 						  nsim_setup_tc_block_cb,
299 						  ns, ns, true);
300 	default:
301 		return -EOPNOTSUPP;
302 	}
303 }
304 
305 static int
306 nsim_set_features(struct net_device *dev, netdev_features_t features)
307 {
308 	struct netdevsim *ns = netdev_priv(dev);
309 
310 	if ((dev->features & NETIF_F_HW_TC) > (features & NETIF_F_HW_TC))
311 		return nsim_bpf_disable_tc(ns);
312 
313 	return 0;
314 }
315 
316 static int nsim_get_iflink(const struct net_device *dev)
317 {
318 	struct netdevsim *nsim, *peer;
319 	int iflink;
320 
321 	nsim = netdev_priv(dev);
322 
323 	rcu_read_lock();
324 	peer = rcu_dereference(nsim->peer);
325 	iflink = peer ? READ_ONCE(peer->netdev->ifindex) :
326 			READ_ONCE(dev->ifindex);
327 	rcu_read_unlock();
328 
329 	return iflink;
330 }
331 
332 static int nsim_rcv(struct nsim_rq *rq, int budget)
333 {
334 	struct net_device *dev = rq->napi.dev;
335 	struct sk_buff *skb;
336 	unsigned int skblen;
337 	int i, ret;
338 
339 	for (i = 0; i < budget; i++) {
340 		if (skb_queue_empty(&rq->skb_queue))
341 			break;
342 
343 		skb = skb_dequeue(&rq->skb_queue);
344 		/* skb might be discard at netif_receive_skb, save the len */
345 		skblen = skb->len;
346 		skb_mark_napi_id(skb, &rq->napi);
347 		ret = netif_receive_skb(skb);
348 		if (ret == NET_RX_SUCCESS)
349 			dev_dstats_rx_add(dev, skblen);
350 		else
351 			dev_dstats_rx_dropped(dev);
352 	}
353 
354 	return i;
355 }
356 
357 static int nsim_poll(struct napi_struct *napi, int budget)
358 {
359 	struct nsim_rq *rq = container_of(napi, struct nsim_rq, napi);
360 	int done;
361 
362 	done = nsim_rcv(rq, budget);
363 	if (done < budget)
364 		napi_complete_done(napi, done);
365 
366 	return done;
367 }
368 
369 static int nsim_create_page_pool(struct page_pool **p, struct napi_struct *napi)
370 {
371 	struct page_pool_params params = {
372 		.order = 0,
373 		.pool_size = NSIM_RING_SIZE,
374 		.nid = NUMA_NO_NODE,
375 		.dev = &napi->dev->dev,
376 		.napi = napi,
377 		.dma_dir = DMA_BIDIRECTIONAL,
378 		.netdev = napi->dev,
379 	};
380 	struct page_pool *pool;
381 
382 	pool = page_pool_create(&params);
383 	if (IS_ERR(pool))
384 		return PTR_ERR(pool);
385 
386 	*p = pool;
387 	return 0;
388 }
389 
390 static int nsim_init_napi(struct netdevsim *ns)
391 {
392 	struct net_device *dev = ns->netdev;
393 	struct nsim_rq *rq;
394 	int err, i;
395 
396 	for (i = 0; i < dev->num_rx_queues; i++) {
397 		rq = ns->rq[i];
398 
399 		netif_napi_add_config_locked(dev, &rq->napi, nsim_poll, i);
400 	}
401 
402 	for (i = 0; i < dev->num_rx_queues; i++) {
403 		rq = ns->rq[i];
404 
405 		err = nsim_create_page_pool(&rq->page_pool, &rq->napi);
406 		if (err)
407 			goto err_pp_destroy;
408 	}
409 
410 	return 0;
411 
412 err_pp_destroy:
413 	while (i--) {
414 		page_pool_destroy(ns->rq[i]->page_pool);
415 		ns->rq[i]->page_pool = NULL;
416 	}
417 
418 	for (i = 0; i < dev->num_rx_queues; i++)
419 		__netif_napi_del_locked(&ns->rq[i]->napi);
420 
421 	return err;
422 }
423 
424 static enum hrtimer_restart nsim_napi_schedule(struct hrtimer *timer)
425 {
426 	struct nsim_rq *rq;
427 
428 	rq = container_of(timer, struct nsim_rq, napi_timer);
429 	napi_schedule(&rq->napi);
430 
431 	return HRTIMER_NORESTART;
432 }
433 
434 static void nsim_rq_timer_init(struct nsim_rq *rq)
435 {
436 	hrtimer_setup(&rq->napi_timer, nsim_napi_schedule, CLOCK_MONOTONIC,
437 		      HRTIMER_MODE_REL);
438 }
439 
440 static void nsim_enable_napi(struct netdevsim *ns)
441 {
442 	struct net_device *dev = ns->netdev;
443 	int i;
444 
445 	for (i = 0; i < dev->num_rx_queues; i++) {
446 		struct nsim_rq *rq = ns->rq[i];
447 
448 		netif_queue_set_napi(dev, i, NETDEV_QUEUE_TYPE_RX, &rq->napi);
449 		napi_enable_locked(&rq->napi);
450 	}
451 }
452 
453 static int nsim_open(struct net_device *dev)
454 {
455 	struct netdevsim *ns = netdev_priv(dev);
456 	int err;
457 
458 	netdev_assert_locked(dev);
459 
460 	err = nsim_init_napi(ns);
461 	if (err)
462 		return err;
463 
464 	nsim_enable_napi(ns);
465 
466 	return 0;
467 }
468 
469 static void nsim_del_napi(struct netdevsim *ns)
470 {
471 	struct net_device *dev = ns->netdev;
472 	int i;
473 
474 	for (i = 0; i < dev->num_rx_queues; i++) {
475 		struct nsim_rq *rq = ns->rq[i];
476 
477 		napi_disable_locked(&rq->napi);
478 		__netif_napi_del_locked(&rq->napi);
479 	}
480 	synchronize_net();
481 
482 	for (i = 0; i < dev->num_rx_queues; i++) {
483 		page_pool_destroy(ns->rq[i]->page_pool);
484 		ns->rq[i]->page_pool = NULL;
485 	}
486 }
487 
488 static int nsim_stop(struct net_device *dev)
489 {
490 	struct netdevsim *ns = netdev_priv(dev);
491 	struct netdevsim *peer;
492 
493 	netdev_assert_locked(dev);
494 
495 	netif_carrier_off(dev);
496 	peer = rtnl_dereference(ns->peer);
497 	if (peer)
498 		netif_carrier_off(peer->netdev);
499 
500 	nsim_del_napi(ns);
501 
502 	return 0;
503 }
504 
505 static int nsim_shaper_set(struct net_shaper_binding *binding,
506 			   const struct net_shaper *shaper,
507 			   struct netlink_ext_ack *extack)
508 {
509 	return 0;
510 }
511 
512 static int nsim_shaper_del(struct net_shaper_binding *binding,
513 			   const struct net_shaper_handle *handle,
514 			   struct netlink_ext_ack *extack)
515 {
516 	return 0;
517 }
518 
519 static int nsim_shaper_group(struct net_shaper_binding *binding,
520 			     int leaves_count,
521 			     const struct net_shaper *leaves,
522 			     const struct net_shaper *root,
523 			     struct netlink_ext_ack *extack)
524 {
525 	return 0;
526 }
527 
528 static void nsim_shaper_cap(struct net_shaper_binding *binding,
529 			    enum net_shaper_scope scope,
530 			    unsigned long *flags)
531 {
532 	*flags = ULONG_MAX;
533 }
534 
535 static const struct net_shaper_ops nsim_shaper_ops = {
536 	.set			= nsim_shaper_set,
537 	.delete			= nsim_shaper_del,
538 	.group			= nsim_shaper_group,
539 	.capabilities		= nsim_shaper_cap,
540 };
541 
542 static const struct net_device_ops nsim_netdev_ops = {
543 	.ndo_start_xmit		= nsim_start_xmit,
544 	.ndo_set_rx_mode	= nsim_set_rx_mode,
545 	.ndo_set_mac_address	= eth_mac_addr,
546 	.ndo_validate_addr	= eth_validate_addr,
547 	.ndo_change_mtu		= nsim_change_mtu,
548 	.ndo_set_vf_mac		= nsim_set_vf_mac,
549 	.ndo_set_vf_vlan	= nsim_set_vf_vlan,
550 	.ndo_set_vf_rate	= nsim_set_vf_rate,
551 	.ndo_set_vf_spoofchk	= nsim_set_vf_spoofchk,
552 	.ndo_set_vf_trust	= nsim_set_vf_trust,
553 	.ndo_get_vf_config	= nsim_get_vf_config,
554 	.ndo_set_vf_link_state	= nsim_set_vf_link_state,
555 	.ndo_set_vf_rss_query_en = nsim_set_vf_rss_query_en,
556 	.ndo_setup_tc		= nsim_setup_tc,
557 	.ndo_set_features	= nsim_set_features,
558 	.ndo_get_iflink		= nsim_get_iflink,
559 	.ndo_bpf		= nsim_bpf,
560 	.ndo_open		= nsim_open,
561 	.ndo_stop		= nsim_stop,
562 	.net_shaper_ops		= &nsim_shaper_ops,
563 };
564 
565 static const struct net_device_ops nsim_vf_netdev_ops = {
566 	.ndo_start_xmit		= nsim_start_xmit,
567 	.ndo_set_rx_mode	= nsim_set_rx_mode,
568 	.ndo_set_mac_address	= eth_mac_addr,
569 	.ndo_validate_addr	= eth_validate_addr,
570 	.ndo_change_mtu		= nsim_change_mtu,
571 	.ndo_setup_tc		= nsim_setup_tc,
572 	.ndo_set_features	= nsim_set_features,
573 };
574 
575 /* We don't have true per-queue stats, yet, so do some random fakery here.
576  * Only report stuff for queue 0.
577  */
578 static void nsim_get_queue_stats_rx(struct net_device *dev, int idx,
579 				    struct netdev_queue_stats_rx *stats)
580 {
581 	struct rtnl_link_stats64 rtstats = {};
582 
583 	if (!idx)
584 		dev_get_stats(dev, &rtstats);
585 
586 	stats->packets = rtstats.rx_packets - !!rtstats.rx_packets;
587 	stats->bytes = rtstats.rx_bytes;
588 }
589 
590 static void nsim_get_queue_stats_tx(struct net_device *dev, int idx,
591 				    struct netdev_queue_stats_tx *stats)
592 {
593 	struct rtnl_link_stats64 rtstats = {};
594 
595 	if (!idx)
596 		dev_get_stats(dev, &rtstats);
597 
598 	stats->packets = rtstats.tx_packets - !!rtstats.tx_packets;
599 	stats->bytes = rtstats.tx_bytes;
600 }
601 
602 static void nsim_get_base_stats(struct net_device *dev,
603 				struct netdev_queue_stats_rx *rx,
604 				struct netdev_queue_stats_tx *tx)
605 {
606 	struct rtnl_link_stats64 rtstats = {};
607 
608 	dev_get_stats(dev, &rtstats);
609 
610 	rx->packets = !!rtstats.rx_packets;
611 	rx->bytes = 0;
612 	tx->packets = !!rtstats.tx_packets;
613 	tx->bytes = 0;
614 }
615 
616 static const struct netdev_stat_ops nsim_stat_ops = {
617 	.get_queue_stats_tx	= nsim_get_queue_stats_tx,
618 	.get_queue_stats_rx	= nsim_get_queue_stats_rx,
619 	.get_base_stats		= nsim_get_base_stats,
620 };
621 
622 static struct nsim_rq *nsim_queue_alloc(void)
623 {
624 	struct nsim_rq *rq;
625 
626 	rq = kzalloc(sizeof(*rq), GFP_KERNEL_ACCOUNT);
627 	if (!rq)
628 		return NULL;
629 
630 	skb_queue_head_init(&rq->skb_queue);
631 	nsim_rq_timer_init(rq);
632 	return rq;
633 }
634 
635 static void nsim_queue_free(struct net_device *dev, struct nsim_rq *rq)
636 {
637 	hrtimer_cancel(&rq->napi_timer);
638 	local_bh_disable();
639 	dev_dstats_rx_dropped_add(dev, rq->skb_queue.qlen);
640 	local_bh_enable();
641 	skb_queue_purge_reason(&rq->skb_queue, SKB_DROP_REASON_QUEUE_PURGE);
642 	kfree(rq);
643 }
644 
645 /* Queue reset mode is controlled by ns->rq_reset_mode.
646  * - normal - new NAPI new pool (old NAPI enabled when new added)
647  * - mode 1 - allocate new pool (NAPI is only disabled / enabled)
648  * - mode 2 - new NAPI new pool (old NAPI removed before new added)
649  * - mode 3 - new NAPI new pool (old NAPI disabled when new added)
650  */
651 struct nsim_queue_mem {
652 	struct nsim_rq *rq;
653 	struct page_pool *pp;
654 };
655 
656 static int
657 nsim_queue_mem_alloc(struct net_device *dev, void *per_queue_mem, int idx)
658 {
659 	struct nsim_queue_mem *qmem = per_queue_mem;
660 	struct netdevsim *ns = netdev_priv(dev);
661 	int err;
662 
663 	if (ns->rq_reset_mode > 3)
664 		return -EINVAL;
665 
666 	if (ns->rq_reset_mode == 1) {
667 		if (!netif_running(ns->netdev))
668 			return -ENETDOWN;
669 		return nsim_create_page_pool(&qmem->pp, &ns->rq[idx]->napi);
670 	}
671 
672 	qmem->rq = nsim_queue_alloc();
673 	if (!qmem->rq)
674 		return -ENOMEM;
675 
676 	err = nsim_create_page_pool(&qmem->rq->page_pool, &qmem->rq->napi);
677 	if (err)
678 		goto err_free;
679 
680 	if (!ns->rq_reset_mode)
681 		netif_napi_add_config_locked(dev, &qmem->rq->napi, nsim_poll,
682 					     idx);
683 
684 	return 0;
685 
686 err_free:
687 	nsim_queue_free(dev, qmem->rq);
688 	return err;
689 }
690 
691 static void nsim_queue_mem_free(struct net_device *dev, void *per_queue_mem)
692 {
693 	struct nsim_queue_mem *qmem = per_queue_mem;
694 	struct netdevsim *ns = netdev_priv(dev);
695 
696 	page_pool_destroy(qmem->pp);
697 	if (qmem->rq) {
698 		if (!ns->rq_reset_mode)
699 			netif_napi_del_locked(&qmem->rq->napi);
700 		page_pool_destroy(qmem->rq->page_pool);
701 		nsim_queue_free(dev, qmem->rq);
702 	}
703 }
704 
705 static int
706 nsim_queue_start(struct net_device *dev, void *per_queue_mem, int idx)
707 {
708 	struct nsim_queue_mem *qmem = per_queue_mem;
709 	struct netdevsim *ns = netdev_priv(dev);
710 
711 	netdev_assert_locked(dev);
712 
713 	if (ns->rq_reset_mode == 1) {
714 		ns->rq[idx]->page_pool = qmem->pp;
715 		napi_enable_locked(&ns->rq[idx]->napi);
716 		return 0;
717 	}
718 
719 	/* netif_napi_add()/_del() should normally be called from alloc/free,
720 	 * here we want to test various call orders.
721 	 */
722 	if (ns->rq_reset_mode == 2) {
723 		netif_napi_del_locked(&ns->rq[idx]->napi);
724 		netif_napi_add_config_locked(dev, &qmem->rq->napi, nsim_poll,
725 					     idx);
726 	} else if (ns->rq_reset_mode == 3) {
727 		netif_napi_add_config_locked(dev, &qmem->rq->napi, nsim_poll,
728 					     idx);
729 		netif_napi_del_locked(&ns->rq[idx]->napi);
730 	}
731 
732 	ns->rq[idx] = qmem->rq;
733 	napi_enable_locked(&ns->rq[idx]->napi);
734 
735 	return 0;
736 }
737 
738 static int nsim_queue_stop(struct net_device *dev, void *per_queue_mem, int idx)
739 {
740 	struct nsim_queue_mem *qmem = per_queue_mem;
741 	struct netdevsim *ns = netdev_priv(dev);
742 
743 	netdev_assert_locked(dev);
744 
745 	napi_disable_locked(&ns->rq[idx]->napi);
746 
747 	if (ns->rq_reset_mode == 1) {
748 		qmem->pp = ns->rq[idx]->page_pool;
749 		page_pool_disable_direct_recycling(qmem->pp);
750 	} else {
751 		qmem->rq = ns->rq[idx];
752 	}
753 
754 	return 0;
755 }
756 
757 static const struct netdev_queue_mgmt_ops nsim_queue_mgmt_ops = {
758 	.ndo_queue_mem_size	= sizeof(struct nsim_queue_mem),
759 	.ndo_queue_mem_alloc	= nsim_queue_mem_alloc,
760 	.ndo_queue_mem_free	= nsim_queue_mem_free,
761 	.ndo_queue_start	= nsim_queue_start,
762 	.ndo_queue_stop		= nsim_queue_stop,
763 };
764 
765 static ssize_t
766 nsim_qreset_write(struct file *file, const char __user *data,
767 		  size_t count, loff_t *ppos)
768 {
769 	struct netdevsim *ns = file->private_data;
770 	unsigned int queue, mode;
771 	char buf[32];
772 	ssize_t ret;
773 
774 	if (count >= sizeof(buf))
775 		return -EINVAL;
776 	if (copy_from_user(buf, data, count))
777 		return -EFAULT;
778 	buf[count] = '\0';
779 
780 	ret = sscanf(buf, "%u %u", &queue, &mode);
781 	if (ret != 2)
782 		return -EINVAL;
783 
784 	netdev_lock(ns->netdev);
785 	if (queue >= ns->netdev->real_num_rx_queues) {
786 		ret = -EINVAL;
787 		goto exit_unlock;
788 	}
789 
790 	ns->rq_reset_mode = mode;
791 	ret = netdev_rx_queue_restart(ns->netdev, queue);
792 	ns->rq_reset_mode = 0;
793 	if (ret)
794 		goto exit_unlock;
795 
796 	ret = count;
797 exit_unlock:
798 	netdev_unlock(ns->netdev);
799 	return ret;
800 }
801 
802 static const struct file_operations nsim_qreset_fops = {
803 	.open = simple_open,
804 	.write = nsim_qreset_write,
805 	.owner = THIS_MODULE,
806 };
807 
808 static ssize_t
809 nsim_pp_hold_read(struct file *file, char __user *data,
810 		  size_t count, loff_t *ppos)
811 {
812 	struct netdevsim *ns = file->private_data;
813 	char buf[3] = "n\n";
814 
815 	if (ns->page)
816 		buf[0] = 'y';
817 
818 	return simple_read_from_buffer(data, count, ppos, buf, 2);
819 }
820 
821 static ssize_t
822 nsim_pp_hold_write(struct file *file, const char __user *data,
823 		   size_t count, loff_t *ppos)
824 {
825 	struct netdevsim *ns = file->private_data;
826 	ssize_t ret;
827 	bool val;
828 
829 	ret = kstrtobool_from_user(data, count, &val);
830 	if (ret)
831 		return ret;
832 
833 	rtnl_lock();
834 	ret = count;
835 	if (val == !!ns->page)
836 		goto exit;
837 
838 	if (!netif_running(ns->netdev) && val) {
839 		ret = -ENETDOWN;
840 	} else if (val) {
841 		ns->page = page_pool_dev_alloc_pages(ns->rq[0]->page_pool);
842 		if (!ns->page)
843 			ret = -ENOMEM;
844 	} else {
845 		page_pool_put_full_page(ns->page->pp, ns->page, false);
846 		ns->page = NULL;
847 	}
848 
849 exit:
850 	rtnl_unlock();
851 	return ret;
852 }
853 
854 static const struct file_operations nsim_pp_hold_fops = {
855 	.open = simple_open,
856 	.read = nsim_pp_hold_read,
857 	.write = nsim_pp_hold_write,
858 	.llseek = generic_file_llseek,
859 	.owner = THIS_MODULE,
860 };
861 
862 static void nsim_setup(struct net_device *dev)
863 {
864 	ether_setup(dev);
865 	eth_hw_addr_random(dev);
866 
867 	dev->tx_queue_len = 0;
868 	dev->flags &= ~IFF_MULTICAST;
869 	dev->priv_flags |= IFF_LIVE_ADDR_CHANGE |
870 			   IFF_NO_QUEUE;
871 	dev->features |= NETIF_F_HIGHDMA |
872 			 NETIF_F_SG |
873 			 NETIF_F_FRAGLIST |
874 			 NETIF_F_HW_CSUM |
875 			 NETIF_F_LRO |
876 			 NETIF_F_TSO;
877 	dev->hw_features |= NETIF_F_HW_TC |
878 			    NETIF_F_SG |
879 			    NETIF_F_FRAGLIST |
880 			    NETIF_F_HW_CSUM |
881 			    NETIF_F_LRO |
882 			    NETIF_F_TSO;
883 	dev->pcpu_stat_type = NETDEV_PCPU_STAT_DSTATS;
884 	dev->max_mtu = ETH_MAX_MTU;
885 	dev->xdp_features = NETDEV_XDP_ACT_HW_OFFLOAD;
886 }
887 
888 static int nsim_queue_init(struct netdevsim *ns)
889 {
890 	struct net_device *dev = ns->netdev;
891 	int i;
892 
893 	ns->rq = kcalloc(dev->num_rx_queues, sizeof(*ns->rq),
894 			 GFP_KERNEL_ACCOUNT);
895 	if (!ns->rq)
896 		return -ENOMEM;
897 
898 	for (i = 0; i < dev->num_rx_queues; i++) {
899 		ns->rq[i] = nsim_queue_alloc();
900 		if (!ns->rq[i])
901 			goto err_free_prev;
902 	}
903 
904 	return 0;
905 
906 err_free_prev:
907 	while (i--)
908 		kfree(ns->rq[i]);
909 	kfree(ns->rq);
910 	return -ENOMEM;
911 }
912 
913 static void nsim_queue_uninit(struct netdevsim *ns)
914 {
915 	struct net_device *dev = ns->netdev;
916 	int i;
917 
918 	for (i = 0; i < dev->num_rx_queues; i++)
919 		nsim_queue_free(dev, ns->rq[i]);
920 
921 	kfree(ns->rq);
922 	ns->rq = NULL;
923 }
924 
925 static int nsim_init_netdevsim(struct netdevsim *ns)
926 {
927 	struct mock_phc *phc;
928 	int err;
929 
930 	phc = mock_phc_create(&ns->nsim_bus_dev->dev);
931 	if (IS_ERR(phc))
932 		return PTR_ERR(phc);
933 
934 	ns->phc = phc;
935 	ns->netdev->netdev_ops = &nsim_netdev_ops;
936 	ns->netdev->stat_ops = &nsim_stat_ops;
937 	ns->netdev->queue_mgmt_ops = &nsim_queue_mgmt_ops;
938 	netdev_lockdep_set_classes(ns->netdev);
939 
940 	err = nsim_udp_tunnels_info_create(ns->nsim_dev, ns->netdev);
941 	if (err)
942 		goto err_phc_destroy;
943 
944 	rtnl_lock();
945 	err = nsim_queue_init(ns);
946 	if (err)
947 		goto err_utn_destroy;
948 
949 	err = nsim_bpf_init(ns);
950 	if (err)
951 		goto err_rq_destroy;
952 
953 	nsim_macsec_init(ns);
954 	nsim_ipsec_init(ns);
955 
956 	err = register_netdevice(ns->netdev);
957 	if (err)
958 		goto err_ipsec_teardown;
959 	rtnl_unlock();
960 
961 	if (IS_ENABLED(CONFIG_DEBUG_NET)) {
962 		ns->nb.notifier_call = netdev_debug_event;
963 		if (register_netdevice_notifier_dev_net(ns->netdev, &ns->nb,
964 							&ns->nn))
965 			ns->nb.notifier_call = NULL;
966 	}
967 
968 	return 0;
969 
970 err_ipsec_teardown:
971 	nsim_ipsec_teardown(ns);
972 	nsim_macsec_teardown(ns);
973 	nsim_bpf_uninit(ns);
974 err_rq_destroy:
975 	nsim_queue_uninit(ns);
976 err_utn_destroy:
977 	rtnl_unlock();
978 	nsim_udp_tunnels_info_destroy(ns->netdev);
979 err_phc_destroy:
980 	mock_phc_destroy(ns->phc);
981 	return err;
982 }
983 
984 static int nsim_init_netdevsim_vf(struct netdevsim *ns)
985 {
986 	int err;
987 
988 	ns->netdev->netdev_ops = &nsim_vf_netdev_ops;
989 	rtnl_lock();
990 	err = register_netdevice(ns->netdev);
991 	rtnl_unlock();
992 	return err;
993 }
994 
995 static void nsim_exit_netdevsim(struct netdevsim *ns)
996 {
997 	nsim_udp_tunnels_info_destroy(ns->netdev);
998 	mock_phc_destroy(ns->phc);
999 }
1000 
1001 struct netdevsim *
1002 nsim_create(struct nsim_dev *nsim_dev, struct nsim_dev_port *nsim_dev_port)
1003 {
1004 	struct net_device *dev;
1005 	struct netdevsim *ns;
1006 	int err;
1007 
1008 	dev = alloc_netdev_mq(sizeof(*ns), "eth%d", NET_NAME_UNKNOWN, nsim_setup,
1009 			      nsim_dev->nsim_bus_dev->num_queues);
1010 	if (!dev)
1011 		return ERR_PTR(-ENOMEM);
1012 
1013 	dev_net_set(dev, nsim_dev_net(nsim_dev));
1014 	ns = netdev_priv(dev);
1015 	ns->netdev = dev;
1016 	ns->nsim_dev = nsim_dev;
1017 	ns->nsim_dev_port = nsim_dev_port;
1018 	ns->nsim_bus_dev = nsim_dev->nsim_bus_dev;
1019 	SET_NETDEV_DEV(dev, &ns->nsim_bus_dev->dev);
1020 	SET_NETDEV_DEVLINK_PORT(dev, &nsim_dev_port->devlink_port);
1021 	nsim_ethtool_init(ns);
1022 	if (nsim_dev_port_is_pf(nsim_dev_port))
1023 		err = nsim_init_netdevsim(ns);
1024 	else
1025 		err = nsim_init_netdevsim_vf(ns);
1026 	if (err)
1027 		goto err_free_netdev;
1028 
1029 	ns->pp_dfs = debugfs_create_file("pp_hold", 0600, nsim_dev_port->ddir,
1030 					 ns, &nsim_pp_hold_fops);
1031 	ns->qr_dfs = debugfs_create_file("queue_reset", 0200,
1032 					 nsim_dev_port->ddir, ns,
1033 					 &nsim_qreset_fops);
1034 
1035 	return ns;
1036 
1037 err_free_netdev:
1038 	free_netdev(dev);
1039 	return ERR_PTR(err);
1040 }
1041 
1042 void nsim_destroy(struct netdevsim *ns)
1043 {
1044 	struct net_device *dev = ns->netdev;
1045 	struct netdevsim *peer;
1046 
1047 	debugfs_remove(ns->qr_dfs);
1048 	debugfs_remove(ns->pp_dfs);
1049 
1050 	if (ns->nb.notifier_call)
1051 		unregister_netdevice_notifier_dev_net(ns->netdev, &ns->nb,
1052 						      &ns->nn);
1053 
1054 	rtnl_lock();
1055 	peer = rtnl_dereference(ns->peer);
1056 	if (peer)
1057 		RCU_INIT_POINTER(peer->peer, NULL);
1058 	RCU_INIT_POINTER(ns->peer, NULL);
1059 	unregister_netdevice(dev);
1060 	if (nsim_dev_port_is_pf(ns->nsim_dev_port)) {
1061 		nsim_macsec_teardown(ns);
1062 		nsim_ipsec_teardown(ns);
1063 		nsim_bpf_uninit(ns);
1064 		nsim_queue_uninit(ns);
1065 	}
1066 	rtnl_unlock();
1067 	if (nsim_dev_port_is_pf(ns->nsim_dev_port))
1068 		nsim_exit_netdevsim(ns);
1069 
1070 	/* Put this intentionally late to exercise the orphaning path */
1071 	if (ns->page) {
1072 		page_pool_put_full_page(ns->page->pp, ns->page, false);
1073 		ns->page = NULL;
1074 	}
1075 
1076 	free_netdev(dev);
1077 }
1078 
1079 bool netdev_is_nsim(struct net_device *dev)
1080 {
1081 	return dev->netdev_ops == &nsim_netdev_ops;
1082 }
1083 
1084 static int nsim_validate(struct nlattr *tb[], struct nlattr *data[],
1085 			 struct netlink_ext_ack *extack)
1086 {
1087 	NL_SET_ERR_MSG_MOD(extack,
1088 			   "Please use: echo \"[ID] [PORT_COUNT] [NUM_QUEUES]\" > /sys/bus/netdevsim/new_device");
1089 	return -EOPNOTSUPP;
1090 }
1091 
1092 static struct rtnl_link_ops nsim_link_ops __read_mostly = {
1093 	.kind		= DRV_NAME,
1094 	.validate	= nsim_validate,
1095 };
1096 
1097 static int __init nsim_module_init(void)
1098 {
1099 	int err;
1100 
1101 	err = nsim_dev_init();
1102 	if (err)
1103 		return err;
1104 
1105 	err = nsim_bus_init();
1106 	if (err)
1107 		goto err_dev_exit;
1108 
1109 	err = rtnl_link_register(&nsim_link_ops);
1110 	if (err)
1111 		goto err_bus_exit;
1112 
1113 	return 0;
1114 
1115 err_bus_exit:
1116 	nsim_bus_exit();
1117 err_dev_exit:
1118 	nsim_dev_exit();
1119 	return err;
1120 }
1121 
1122 static void __exit nsim_module_exit(void)
1123 {
1124 	rtnl_link_unregister(&nsim_link_ops);
1125 	nsim_bus_exit();
1126 	nsim_dev_exit();
1127 }
1128 
1129 module_init(nsim_module_init);
1130 module_exit(nsim_module_exit);
1131 MODULE_LICENSE("GPL");
1132 MODULE_DESCRIPTION("Simulated networking device for testing");
1133 MODULE_ALIAS_RTNL_LINK(DRV_NAME);
1134