xref: /linux/drivers/net/netdevsim/netdev.c (revision b74710eaff314d6afe4fb0bbe9bc7657bf226fd4)
1 /*
2  * Copyright (C) 2017 Netronome Systems, Inc.
3  *
4  * This software is licensed under the GNU General License Version 2,
5  * June 1991 as shown in the file COPYING in the top-level directory of this
6  * source tree.
7  *
8  * THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS"
9  * WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING,
10  * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
11  * FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE
12  * OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME
13  * THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
14  */
15 
16 #include <linux/debugfs.h>
17 #include <linux/etherdevice.h>
18 #include <linux/ethtool_netlink.h>
19 #include <linux/kernel.h>
20 #include <linux/module.h>
21 #include <linux/netdevice.h>
22 #include <linux/slab.h>
23 #include <net/netdev_queues.h>
24 #include <net/netdev_rx_queue.h>
25 #include <net/page_pool/helpers.h>
26 #include <net/netlink.h>
27 #include <net/net_shaper.h>
28 #include <net/netdev_lock.h>
29 #include <net/pkt_cls.h>
30 #include <net/rtnetlink.h>
31 #include <net/udp_tunnel.h>
32 #include <net/busy_poll.h>
33 
34 #include "netdevsim.h"
35 
36 MODULE_IMPORT_NS("NETDEV_INTERNAL");
37 
38 #define NSIM_RING_SIZE		256
39 
40 static int nsim_napi_rx(struct nsim_rq *rq, struct sk_buff *skb)
41 {
42 	if (skb_queue_len(&rq->skb_queue) > NSIM_RING_SIZE) {
43 		dev_kfree_skb_any(skb);
44 		return NET_RX_DROP;
45 	}
46 
47 	skb_queue_tail(&rq->skb_queue, skb);
48 	return NET_RX_SUCCESS;
49 }
50 
51 static int nsim_forward_skb(struct net_device *dev, struct sk_buff *skb,
52 			    struct nsim_rq *rq)
53 {
54 	return __dev_forward_skb(dev, skb) ?: nsim_napi_rx(rq, skb);
55 }
56 
57 static netdev_tx_t nsim_start_xmit(struct sk_buff *skb, struct net_device *dev)
58 {
59 	struct netdevsim *ns = netdev_priv(dev);
60 	struct net_device *peer_dev;
61 	unsigned int len = skb->len;
62 	struct netdevsim *peer_ns;
63 	struct netdev_config *cfg;
64 	struct nsim_rq *rq;
65 	int rxq;
66 
67 	rcu_read_lock();
68 	if (!nsim_ipsec_tx(ns, skb))
69 		goto out_drop_free;
70 
71 	peer_ns = rcu_dereference(ns->peer);
72 	if (!peer_ns)
73 		goto out_drop_free;
74 
75 	peer_dev = peer_ns->netdev;
76 	rxq = skb_get_queue_mapping(skb);
77 	if (rxq >= peer_dev->num_rx_queues)
78 		rxq = rxq % peer_dev->num_rx_queues;
79 	rq = peer_ns->rq[rxq];
80 
81 	cfg = peer_dev->cfg;
82 	if (skb_is_nonlinear(skb) &&
83 	    (cfg->hds_config != ETHTOOL_TCP_DATA_SPLIT_ENABLED ||
84 	     (cfg->hds_config == ETHTOOL_TCP_DATA_SPLIT_ENABLED &&
85 	      cfg->hds_thresh > len)))
86 		skb_linearize(skb);
87 
88 	skb_tx_timestamp(skb);
89 	if (unlikely(nsim_forward_skb(peer_dev, skb, rq) == NET_RX_DROP))
90 		goto out_drop_cnt;
91 
92 	if (!hrtimer_active(&rq->napi_timer))
93 		hrtimer_start(&rq->napi_timer, us_to_ktime(5), HRTIMER_MODE_REL);
94 
95 	rcu_read_unlock();
96 	u64_stats_update_begin(&ns->syncp);
97 	ns->tx_packets++;
98 	ns->tx_bytes += len;
99 	u64_stats_update_end(&ns->syncp);
100 	return NETDEV_TX_OK;
101 
102 out_drop_free:
103 	dev_kfree_skb(skb);
104 out_drop_cnt:
105 	rcu_read_unlock();
106 	u64_stats_update_begin(&ns->syncp);
107 	ns->tx_dropped++;
108 	u64_stats_update_end(&ns->syncp);
109 	return NETDEV_TX_OK;
110 }
111 
112 static void nsim_set_rx_mode(struct net_device *dev)
113 {
114 }
115 
116 static int nsim_change_mtu(struct net_device *dev, int new_mtu)
117 {
118 	struct netdevsim *ns = netdev_priv(dev);
119 
120 	if (ns->xdp.prog && !ns->xdp.prog->aux->xdp_has_frags &&
121 	    new_mtu > NSIM_XDP_MAX_MTU)
122 		return -EBUSY;
123 
124 	WRITE_ONCE(dev->mtu, new_mtu);
125 
126 	return 0;
127 }
128 
129 static void
130 nsim_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats)
131 {
132 	struct netdevsim *ns = netdev_priv(dev);
133 	unsigned int start;
134 
135 	do {
136 		start = u64_stats_fetch_begin(&ns->syncp);
137 		stats->tx_bytes = ns->tx_bytes;
138 		stats->tx_packets = ns->tx_packets;
139 		stats->tx_dropped = ns->tx_dropped;
140 	} while (u64_stats_fetch_retry(&ns->syncp, start));
141 }
142 
143 static int
144 nsim_setup_tc_block_cb(enum tc_setup_type type, void *type_data, void *cb_priv)
145 {
146 	return nsim_bpf_setup_tc_block_cb(type, type_data, cb_priv);
147 }
148 
149 static int nsim_set_vf_mac(struct net_device *dev, int vf, u8 *mac)
150 {
151 	struct netdevsim *ns = netdev_priv(dev);
152 	struct nsim_dev *nsim_dev = ns->nsim_dev;
153 
154 	/* Only refuse multicast addresses, zero address can mean unset/any. */
155 	if (vf >= nsim_dev_get_vfs(nsim_dev) || is_multicast_ether_addr(mac))
156 		return -EINVAL;
157 	memcpy(nsim_dev->vfconfigs[vf].vf_mac, mac, ETH_ALEN);
158 
159 	return 0;
160 }
161 
162 static int nsim_set_vf_vlan(struct net_device *dev, int vf,
163 			    u16 vlan, u8 qos, __be16 vlan_proto)
164 {
165 	struct netdevsim *ns = netdev_priv(dev);
166 	struct nsim_dev *nsim_dev = ns->nsim_dev;
167 
168 	if (vf >= nsim_dev_get_vfs(nsim_dev) || vlan > 4095 || qos > 7)
169 		return -EINVAL;
170 
171 	nsim_dev->vfconfigs[vf].vlan = vlan;
172 	nsim_dev->vfconfigs[vf].qos = qos;
173 	nsim_dev->vfconfigs[vf].vlan_proto = vlan_proto;
174 
175 	return 0;
176 }
177 
178 static int nsim_set_vf_rate(struct net_device *dev, int vf, int min, int max)
179 {
180 	struct netdevsim *ns = netdev_priv(dev);
181 	struct nsim_dev *nsim_dev = ns->nsim_dev;
182 
183 	if (nsim_esw_mode_is_switchdev(ns->nsim_dev)) {
184 		pr_err("Not supported in switchdev mode. Please use devlink API.\n");
185 		return -EOPNOTSUPP;
186 	}
187 
188 	if (vf >= nsim_dev_get_vfs(nsim_dev))
189 		return -EINVAL;
190 
191 	nsim_dev->vfconfigs[vf].min_tx_rate = min;
192 	nsim_dev->vfconfigs[vf].max_tx_rate = max;
193 
194 	return 0;
195 }
196 
197 static int nsim_set_vf_spoofchk(struct net_device *dev, int vf, bool val)
198 {
199 	struct netdevsim *ns = netdev_priv(dev);
200 	struct nsim_dev *nsim_dev = ns->nsim_dev;
201 
202 	if (vf >= nsim_dev_get_vfs(nsim_dev))
203 		return -EINVAL;
204 	nsim_dev->vfconfigs[vf].spoofchk_enabled = val;
205 
206 	return 0;
207 }
208 
209 static int nsim_set_vf_rss_query_en(struct net_device *dev, int vf, bool val)
210 {
211 	struct netdevsim *ns = netdev_priv(dev);
212 	struct nsim_dev *nsim_dev = ns->nsim_dev;
213 
214 	if (vf >= nsim_dev_get_vfs(nsim_dev))
215 		return -EINVAL;
216 	nsim_dev->vfconfigs[vf].rss_query_enabled = val;
217 
218 	return 0;
219 }
220 
221 static int nsim_set_vf_trust(struct net_device *dev, int vf, bool val)
222 {
223 	struct netdevsim *ns = netdev_priv(dev);
224 	struct nsim_dev *nsim_dev = ns->nsim_dev;
225 
226 	if (vf >= nsim_dev_get_vfs(nsim_dev))
227 		return -EINVAL;
228 	nsim_dev->vfconfigs[vf].trusted = val;
229 
230 	return 0;
231 }
232 
233 static int
234 nsim_get_vf_config(struct net_device *dev, int vf, struct ifla_vf_info *ivi)
235 {
236 	struct netdevsim *ns = netdev_priv(dev);
237 	struct nsim_dev *nsim_dev = ns->nsim_dev;
238 
239 	if (vf >= nsim_dev_get_vfs(nsim_dev))
240 		return -EINVAL;
241 
242 	ivi->vf = vf;
243 	ivi->linkstate = nsim_dev->vfconfigs[vf].link_state;
244 	ivi->min_tx_rate = nsim_dev->vfconfigs[vf].min_tx_rate;
245 	ivi->max_tx_rate = nsim_dev->vfconfigs[vf].max_tx_rate;
246 	ivi->vlan = nsim_dev->vfconfigs[vf].vlan;
247 	ivi->vlan_proto = nsim_dev->vfconfigs[vf].vlan_proto;
248 	ivi->qos = nsim_dev->vfconfigs[vf].qos;
249 	memcpy(&ivi->mac, nsim_dev->vfconfigs[vf].vf_mac, ETH_ALEN);
250 	ivi->spoofchk = nsim_dev->vfconfigs[vf].spoofchk_enabled;
251 	ivi->trusted = nsim_dev->vfconfigs[vf].trusted;
252 	ivi->rss_query_en = nsim_dev->vfconfigs[vf].rss_query_enabled;
253 
254 	return 0;
255 }
256 
257 static int nsim_set_vf_link_state(struct net_device *dev, int vf, int state)
258 {
259 	struct netdevsim *ns = netdev_priv(dev);
260 	struct nsim_dev *nsim_dev = ns->nsim_dev;
261 
262 	if (vf >= nsim_dev_get_vfs(nsim_dev))
263 		return -EINVAL;
264 
265 	switch (state) {
266 	case IFLA_VF_LINK_STATE_AUTO:
267 	case IFLA_VF_LINK_STATE_ENABLE:
268 	case IFLA_VF_LINK_STATE_DISABLE:
269 		break;
270 	default:
271 		return -EINVAL;
272 	}
273 
274 	nsim_dev->vfconfigs[vf].link_state = state;
275 
276 	return 0;
277 }
278 
279 static void nsim_taprio_stats(struct tc_taprio_qopt_stats *stats)
280 {
281 	stats->window_drops = 0;
282 	stats->tx_overruns = 0;
283 }
284 
285 static int nsim_setup_tc_taprio(struct net_device *dev,
286 				struct tc_taprio_qopt_offload *offload)
287 {
288 	int err = 0;
289 
290 	switch (offload->cmd) {
291 	case TAPRIO_CMD_REPLACE:
292 	case TAPRIO_CMD_DESTROY:
293 		break;
294 	case TAPRIO_CMD_STATS:
295 		nsim_taprio_stats(&offload->stats);
296 		break;
297 	default:
298 		err = -EOPNOTSUPP;
299 	}
300 
301 	return err;
302 }
303 
304 static LIST_HEAD(nsim_block_cb_list);
305 
306 static int
307 nsim_setup_tc(struct net_device *dev, enum tc_setup_type type, void *type_data)
308 {
309 	struct netdevsim *ns = netdev_priv(dev);
310 
311 	switch (type) {
312 	case TC_SETUP_QDISC_TAPRIO:
313 		return nsim_setup_tc_taprio(dev, type_data);
314 	case TC_SETUP_BLOCK:
315 		return flow_block_cb_setup_simple(type_data,
316 						  &nsim_block_cb_list,
317 						  nsim_setup_tc_block_cb,
318 						  ns, ns, true);
319 	default:
320 		return -EOPNOTSUPP;
321 	}
322 }
323 
324 static int
325 nsim_set_features(struct net_device *dev, netdev_features_t features)
326 {
327 	struct netdevsim *ns = netdev_priv(dev);
328 
329 	if ((dev->features & NETIF_F_HW_TC) > (features & NETIF_F_HW_TC))
330 		return nsim_bpf_disable_tc(ns);
331 
332 	return 0;
333 }
334 
335 static int nsim_get_iflink(const struct net_device *dev)
336 {
337 	struct netdevsim *nsim, *peer;
338 	int iflink;
339 
340 	nsim = netdev_priv(dev);
341 
342 	rcu_read_lock();
343 	peer = rcu_dereference(nsim->peer);
344 	iflink = peer ? READ_ONCE(peer->netdev->ifindex) :
345 			READ_ONCE(dev->ifindex);
346 	rcu_read_unlock();
347 
348 	return iflink;
349 }
350 
351 static int nsim_rcv(struct nsim_rq *rq, int budget)
352 {
353 	struct sk_buff *skb;
354 	int i;
355 
356 	for (i = 0; i < budget; i++) {
357 		if (skb_queue_empty(&rq->skb_queue))
358 			break;
359 
360 		skb = skb_dequeue(&rq->skb_queue);
361 		skb_mark_napi_id(skb, &rq->napi);
362 		netif_receive_skb(skb);
363 	}
364 
365 	return i;
366 }
367 
368 static int nsim_poll(struct napi_struct *napi, int budget)
369 {
370 	struct nsim_rq *rq = container_of(napi, struct nsim_rq, napi);
371 	int done;
372 
373 	done = nsim_rcv(rq, budget);
374 	napi_complete(napi);
375 
376 	return done;
377 }
378 
379 static int nsim_create_page_pool(struct page_pool **p, struct napi_struct *napi)
380 {
381 	struct page_pool_params params = {
382 		.order = 0,
383 		.pool_size = NSIM_RING_SIZE,
384 		.nid = NUMA_NO_NODE,
385 		.dev = &napi->dev->dev,
386 		.napi = napi,
387 		.dma_dir = DMA_BIDIRECTIONAL,
388 		.netdev = napi->dev,
389 	};
390 	struct page_pool *pool;
391 
392 	pool = page_pool_create(&params);
393 	if (IS_ERR(pool))
394 		return PTR_ERR(pool);
395 
396 	*p = pool;
397 	return 0;
398 }
399 
400 static int nsim_init_napi(struct netdevsim *ns)
401 {
402 	struct net_device *dev = ns->netdev;
403 	struct nsim_rq *rq;
404 	int err, i;
405 
406 	for (i = 0; i < dev->num_rx_queues; i++) {
407 		rq = ns->rq[i];
408 
409 		netif_napi_add_config_locked(dev, &rq->napi, nsim_poll, i);
410 	}
411 
412 	for (i = 0; i < dev->num_rx_queues; i++) {
413 		rq = ns->rq[i];
414 
415 		err = nsim_create_page_pool(&rq->page_pool, &rq->napi);
416 		if (err)
417 			goto err_pp_destroy;
418 	}
419 
420 	return 0;
421 
422 err_pp_destroy:
423 	while (i--) {
424 		page_pool_destroy(ns->rq[i]->page_pool);
425 		ns->rq[i]->page_pool = NULL;
426 	}
427 
428 	for (i = 0; i < dev->num_rx_queues; i++)
429 		__netif_napi_del_locked(&ns->rq[i]->napi);
430 
431 	return err;
432 }
433 
434 static enum hrtimer_restart nsim_napi_schedule(struct hrtimer *timer)
435 {
436 	struct nsim_rq *rq;
437 
438 	rq = container_of(timer, struct nsim_rq, napi_timer);
439 	napi_schedule(&rq->napi);
440 
441 	return HRTIMER_NORESTART;
442 }
443 
444 static void nsim_rq_timer_init(struct nsim_rq *rq)
445 {
446 	hrtimer_setup(&rq->napi_timer, nsim_napi_schedule, CLOCK_MONOTONIC,
447 		      HRTIMER_MODE_REL);
448 }
449 
450 static void nsim_enable_napi(struct netdevsim *ns)
451 {
452 	struct net_device *dev = ns->netdev;
453 	int i;
454 
455 	for (i = 0; i < dev->num_rx_queues; i++) {
456 		struct nsim_rq *rq = ns->rq[i];
457 
458 		netif_queue_set_napi(dev, i, NETDEV_QUEUE_TYPE_RX, &rq->napi);
459 		napi_enable_locked(&rq->napi);
460 	}
461 }
462 
463 static int nsim_open(struct net_device *dev)
464 {
465 	struct netdevsim *ns = netdev_priv(dev);
466 	int err;
467 
468 	netdev_assert_locked(dev);
469 
470 	err = nsim_init_napi(ns);
471 	if (err)
472 		return err;
473 
474 	nsim_enable_napi(ns);
475 
476 	return 0;
477 }
478 
479 static void nsim_del_napi(struct netdevsim *ns)
480 {
481 	struct net_device *dev = ns->netdev;
482 	int i;
483 
484 	for (i = 0; i < dev->num_rx_queues; i++) {
485 		struct nsim_rq *rq = ns->rq[i];
486 
487 		napi_disable_locked(&rq->napi);
488 		__netif_napi_del_locked(&rq->napi);
489 	}
490 	synchronize_net();
491 
492 	for (i = 0; i < dev->num_rx_queues; i++) {
493 		page_pool_destroy(ns->rq[i]->page_pool);
494 		ns->rq[i]->page_pool = NULL;
495 	}
496 }
497 
498 static int nsim_stop(struct net_device *dev)
499 {
500 	struct netdevsim *ns = netdev_priv(dev);
501 	struct netdevsim *peer;
502 
503 	netdev_assert_locked(dev);
504 
505 	netif_carrier_off(dev);
506 	peer = rtnl_dereference(ns->peer);
507 	if (peer)
508 		netif_carrier_off(peer->netdev);
509 
510 	nsim_del_napi(ns);
511 
512 	return 0;
513 }
514 
515 static int nsim_shaper_set(struct net_shaper_binding *binding,
516 			   const struct net_shaper *shaper,
517 			   struct netlink_ext_ack *extack)
518 {
519 	return 0;
520 }
521 
522 static int nsim_shaper_del(struct net_shaper_binding *binding,
523 			   const struct net_shaper_handle *handle,
524 			   struct netlink_ext_ack *extack)
525 {
526 	return 0;
527 }
528 
529 static int nsim_shaper_group(struct net_shaper_binding *binding,
530 			     int leaves_count,
531 			     const struct net_shaper *leaves,
532 			     const struct net_shaper *root,
533 			     struct netlink_ext_ack *extack)
534 {
535 	return 0;
536 }
537 
538 static void nsim_shaper_cap(struct net_shaper_binding *binding,
539 			    enum net_shaper_scope scope,
540 			    unsigned long *flags)
541 {
542 	*flags = ULONG_MAX;
543 }
544 
545 static const struct net_shaper_ops nsim_shaper_ops = {
546 	.set			= nsim_shaper_set,
547 	.delete			= nsim_shaper_del,
548 	.group			= nsim_shaper_group,
549 	.capabilities		= nsim_shaper_cap,
550 };
551 
552 static const struct net_device_ops nsim_netdev_ops = {
553 	.ndo_start_xmit		= nsim_start_xmit,
554 	.ndo_set_rx_mode	= nsim_set_rx_mode,
555 	.ndo_set_mac_address	= eth_mac_addr,
556 	.ndo_validate_addr	= eth_validate_addr,
557 	.ndo_change_mtu		= nsim_change_mtu,
558 	.ndo_get_stats64	= nsim_get_stats64,
559 	.ndo_set_vf_mac		= nsim_set_vf_mac,
560 	.ndo_set_vf_vlan	= nsim_set_vf_vlan,
561 	.ndo_set_vf_rate	= nsim_set_vf_rate,
562 	.ndo_set_vf_spoofchk	= nsim_set_vf_spoofchk,
563 	.ndo_set_vf_trust	= nsim_set_vf_trust,
564 	.ndo_get_vf_config	= nsim_get_vf_config,
565 	.ndo_set_vf_link_state	= nsim_set_vf_link_state,
566 	.ndo_set_vf_rss_query_en = nsim_set_vf_rss_query_en,
567 	.ndo_setup_tc		= nsim_setup_tc,
568 	.ndo_set_features	= nsim_set_features,
569 	.ndo_get_iflink		= nsim_get_iflink,
570 	.ndo_bpf		= nsim_bpf,
571 	.ndo_open		= nsim_open,
572 	.ndo_stop		= nsim_stop,
573 	.net_shaper_ops		= &nsim_shaper_ops,
574 };
575 
576 static const struct net_device_ops nsim_vf_netdev_ops = {
577 	.ndo_start_xmit		= nsim_start_xmit,
578 	.ndo_set_rx_mode	= nsim_set_rx_mode,
579 	.ndo_set_mac_address	= eth_mac_addr,
580 	.ndo_validate_addr	= eth_validate_addr,
581 	.ndo_change_mtu		= nsim_change_mtu,
582 	.ndo_get_stats64	= nsim_get_stats64,
583 	.ndo_setup_tc		= nsim_setup_tc,
584 	.ndo_set_features	= nsim_set_features,
585 };
586 
587 /* We don't have true per-queue stats, yet, so do some random fakery here.
588  * Only report stuff for queue 0.
589  */
590 static void nsim_get_queue_stats_rx(struct net_device *dev, int idx,
591 				    struct netdev_queue_stats_rx *stats)
592 {
593 	struct rtnl_link_stats64 rtstats = {};
594 
595 	if (!idx)
596 		nsim_get_stats64(dev, &rtstats);
597 
598 	stats->packets = rtstats.rx_packets - !!rtstats.rx_packets;
599 	stats->bytes = rtstats.rx_bytes;
600 }
601 
602 static void nsim_get_queue_stats_tx(struct net_device *dev, int idx,
603 				    struct netdev_queue_stats_tx *stats)
604 {
605 	struct rtnl_link_stats64 rtstats = {};
606 
607 	if (!idx)
608 		nsim_get_stats64(dev, &rtstats);
609 
610 	stats->packets = rtstats.tx_packets - !!rtstats.tx_packets;
611 	stats->bytes = rtstats.tx_bytes;
612 }
613 
614 static void nsim_get_base_stats(struct net_device *dev,
615 				struct netdev_queue_stats_rx *rx,
616 				struct netdev_queue_stats_tx *tx)
617 {
618 	struct rtnl_link_stats64 rtstats = {};
619 
620 	nsim_get_stats64(dev, &rtstats);
621 
622 	rx->packets = !!rtstats.rx_packets;
623 	rx->bytes = 0;
624 	tx->packets = !!rtstats.tx_packets;
625 	tx->bytes = 0;
626 }
627 
628 static const struct netdev_stat_ops nsim_stat_ops = {
629 	.get_queue_stats_tx	= nsim_get_queue_stats_tx,
630 	.get_queue_stats_rx	= nsim_get_queue_stats_rx,
631 	.get_base_stats		= nsim_get_base_stats,
632 };
633 
634 static struct nsim_rq *nsim_queue_alloc(void)
635 {
636 	struct nsim_rq *rq;
637 
638 	rq = kzalloc(sizeof(*rq), GFP_KERNEL_ACCOUNT);
639 	if (!rq)
640 		return NULL;
641 
642 	skb_queue_head_init(&rq->skb_queue);
643 	nsim_rq_timer_init(rq);
644 	return rq;
645 }
646 
647 static void nsim_queue_free(struct nsim_rq *rq)
648 {
649 	hrtimer_cancel(&rq->napi_timer);
650 	skb_queue_purge_reason(&rq->skb_queue, SKB_DROP_REASON_QUEUE_PURGE);
651 	kfree(rq);
652 }
653 
654 /* Queue reset mode is controlled by ns->rq_reset_mode.
655  * - normal - new NAPI new pool (old NAPI enabled when new added)
656  * - mode 1 - allocate new pool (NAPI is only disabled / enabled)
657  * - mode 2 - new NAPI new pool (old NAPI removed before new added)
658  * - mode 3 - new NAPI new pool (old NAPI disabled when new added)
659  */
660 struct nsim_queue_mem {
661 	struct nsim_rq *rq;
662 	struct page_pool *pp;
663 };
664 
665 static int
666 nsim_queue_mem_alloc(struct net_device *dev, void *per_queue_mem, int idx)
667 {
668 	struct nsim_queue_mem *qmem = per_queue_mem;
669 	struct netdevsim *ns = netdev_priv(dev);
670 	int err;
671 
672 	if (ns->rq_reset_mode > 3)
673 		return -EINVAL;
674 
675 	if (ns->rq_reset_mode == 1) {
676 		if (!netif_running(ns->netdev))
677 			return -ENETDOWN;
678 		return nsim_create_page_pool(&qmem->pp, &ns->rq[idx]->napi);
679 	}
680 
681 	qmem->rq = nsim_queue_alloc();
682 	if (!qmem->rq)
683 		return -ENOMEM;
684 
685 	err = nsim_create_page_pool(&qmem->rq->page_pool, &qmem->rq->napi);
686 	if (err)
687 		goto err_free;
688 
689 	if (!ns->rq_reset_mode)
690 		netif_napi_add_config_locked(dev, &qmem->rq->napi, nsim_poll,
691 					     idx);
692 
693 	return 0;
694 
695 err_free:
696 	nsim_queue_free(qmem->rq);
697 	return err;
698 }
699 
700 static void nsim_queue_mem_free(struct net_device *dev, void *per_queue_mem)
701 {
702 	struct nsim_queue_mem *qmem = per_queue_mem;
703 	struct netdevsim *ns = netdev_priv(dev);
704 
705 	page_pool_destroy(qmem->pp);
706 	if (qmem->rq) {
707 		if (!ns->rq_reset_mode)
708 			netif_napi_del_locked(&qmem->rq->napi);
709 		page_pool_destroy(qmem->rq->page_pool);
710 		nsim_queue_free(qmem->rq);
711 	}
712 }
713 
714 static int
715 nsim_queue_start(struct net_device *dev, void *per_queue_mem, int idx)
716 {
717 	struct nsim_queue_mem *qmem = per_queue_mem;
718 	struct netdevsim *ns = netdev_priv(dev);
719 
720 	netdev_assert_locked(dev);
721 
722 	if (ns->rq_reset_mode == 1) {
723 		ns->rq[idx]->page_pool = qmem->pp;
724 		napi_enable_locked(&ns->rq[idx]->napi);
725 		return 0;
726 	}
727 
728 	/* netif_napi_add()/_del() should normally be called from alloc/free,
729 	 * here we want to test various call orders.
730 	 */
731 	if (ns->rq_reset_mode == 2) {
732 		netif_napi_del_locked(&ns->rq[idx]->napi);
733 		netif_napi_add_config_locked(dev, &qmem->rq->napi, nsim_poll,
734 					     idx);
735 	} else if (ns->rq_reset_mode == 3) {
736 		netif_napi_add_config_locked(dev, &qmem->rq->napi, nsim_poll,
737 					     idx);
738 		netif_napi_del_locked(&ns->rq[idx]->napi);
739 	}
740 
741 	ns->rq[idx] = qmem->rq;
742 	napi_enable_locked(&ns->rq[idx]->napi);
743 
744 	return 0;
745 }
746 
747 static int nsim_queue_stop(struct net_device *dev, void *per_queue_mem, int idx)
748 {
749 	struct nsim_queue_mem *qmem = per_queue_mem;
750 	struct netdevsim *ns = netdev_priv(dev);
751 
752 	netdev_assert_locked(dev);
753 
754 	napi_disable_locked(&ns->rq[idx]->napi);
755 
756 	if (ns->rq_reset_mode == 1) {
757 		qmem->pp = ns->rq[idx]->page_pool;
758 		page_pool_disable_direct_recycling(qmem->pp);
759 	} else {
760 		qmem->rq = ns->rq[idx];
761 	}
762 
763 	return 0;
764 }
765 
766 static const struct netdev_queue_mgmt_ops nsim_queue_mgmt_ops = {
767 	.ndo_queue_mem_size	= sizeof(struct nsim_queue_mem),
768 	.ndo_queue_mem_alloc	= nsim_queue_mem_alloc,
769 	.ndo_queue_mem_free	= nsim_queue_mem_free,
770 	.ndo_queue_start	= nsim_queue_start,
771 	.ndo_queue_stop		= nsim_queue_stop,
772 };
773 
774 static ssize_t
775 nsim_qreset_write(struct file *file, const char __user *data,
776 		  size_t count, loff_t *ppos)
777 {
778 	struct netdevsim *ns = file->private_data;
779 	unsigned int queue, mode;
780 	char buf[32];
781 	ssize_t ret;
782 
783 	if (count >= sizeof(buf))
784 		return -EINVAL;
785 	if (copy_from_user(buf, data, count))
786 		return -EFAULT;
787 	buf[count] = '\0';
788 
789 	ret = sscanf(buf, "%u %u", &queue, &mode);
790 	if (ret != 2)
791 		return -EINVAL;
792 
793 	netdev_lock(ns->netdev);
794 	if (queue >= ns->netdev->real_num_rx_queues) {
795 		ret = -EINVAL;
796 		goto exit_unlock;
797 	}
798 
799 	ns->rq_reset_mode = mode;
800 	ret = netdev_rx_queue_restart(ns->netdev, queue);
801 	ns->rq_reset_mode = 0;
802 	if (ret)
803 		goto exit_unlock;
804 
805 	ret = count;
806 exit_unlock:
807 	netdev_unlock(ns->netdev);
808 	return ret;
809 }
810 
811 static const struct file_operations nsim_qreset_fops = {
812 	.open = simple_open,
813 	.write = nsim_qreset_write,
814 	.owner = THIS_MODULE,
815 };
816 
817 static ssize_t
818 nsim_pp_hold_read(struct file *file, char __user *data,
819 		  size_t count, loff_t *ppos)
820 {
821 	struct netdevsim *ns = file->private_data;
822 	char buf[3] = "n\n";
823 
824 	if (ns->page)
825 		buf[0] = 'y';
826 
827 	return simple_read_from_buffer(data, count, ppos, buf, 2);
828 }
829 
830 static ssize_t
831 nsim_pp_hold_write(struct file *file, const char __user *data,
832 		   size_t count, loff_t *ppos)
833 {
834 	struct netdevsim *ns = file->private_data;
835 	ssize_t ret;
836 	bool val;
837 
838 	ret = kstrtobool_from_user(data, count, &val);
839 	if (ret)
840 		return ret;
841 
842 	rtnl_lock();
843 	ret = count;
844 	if (val == !!ns->page)
845 		goto exit;
846 
847 	if (!netif_running(ns->netdev) && val) {
848 		ret = -ENETDOWN;
849 	} else if (val) {
850 		ns->page = page_pool_dev_alloc_pages(ns->rq[0]->page_pool);
851 		if (!ns->page)
852 			ret = -ENOMEM;
853 	} else {
854 		page_pool_put_full_page(ns->page->pp, ns->page, false);
855 		ns->page = NULL;
856 	}
857 
858 exit:
859 	rtnl_unlock();
860 	return ret;
861 }
862 
863 static const struct file_operations nsim_pp_hold_fops = {
864 	.open = simple_open,
865 	.read = nsim_pp_hold_read,
866 	.write = nsim_pp_hold_write,
867 	.llseek = generic_file_llseek,
868 	.owner = THIS_MODULE,
869 };
870 
871 static void nsim_setup(struct net_device *dev)
872 {
873 	ether_setup(dev);
874 	eth_hw_addr_random(dev);
875 
876 	dev->tx_queue_len = 0;
877 	dev->flags &= ~IFF_MULTICAST;
878 	dev->priv_flags |= IFF_LIVE_ADDR_CHANGE |
879 			   IFF_NO_QUEUE;
880 	dev->features |= NETIF_F_HIGHDMA |
881 			 NETIF_F_SG |
882 			 NETIF_F_FRAGLIST |
883 			 NETIF_F_HW_CSUM |
884 			 NETIF_F_LRO |
885 			 NETIF_F_TSO;
886 	dev->hw_features |= NETIF_F_HW_TC |
887 			    NETIF_F_SG |
888 			    NETIF_F_FRAGLIST |
889 			    NETIF_F_HW_CSUM |
890 			    NETIF_F_LRO |
891 			    NETIF_F_TSO;
892 	dev->max_mtu = ETH_MAX_MTU;
893 	dev->xdp_features = NETDEV_XDP_ACT_HW_OFFLOAD;
894 }
895 
896 static int nsim_queue_init(struct netdevsim *ns)
897 {
898 	struct net_device *dev = ns->netdev;
899 	int i;
900 
901 	ns->rq = kcalloc(dev->num_rx_queues, sizeof(*ns->rq),
902 			 GFP_KERNEL_ACCOUNT);
903 	if (!ns->rq)
904 		return -ENOMEM;
905 
906 	for (i = 0; i < dev->num_rx_queues; i++) {
907 		ns->rq[i] = nsim_queue_alloc();
908 		if (!ns->rq[i])
909 			goto err_free_prev;
910 	}
911 
912 	return 0;
913 
914 err_free_prev:
915 	while (i--)
916 		kfree(ns->rq[i]);
917 	kfree(ns->rq);
918 	return -ENOMEM;
919 }
920 
921 static void nsim_queue_uninit(struct netdevsim *ns)
922 {
923 	struct net_device *dev = ns->netdev;
924 	int i;
925 
926 	for (i = 0; i < dev->num_rx_queues; i++)
927 		nsim_queue_free(ns->rq[i]);
928 
929 	kfree(ns->rq);
930 	ns->rq = NULL;
931 }
932 
933 static int nsim_init_netdevsim(struct netdevsim *ns)
934 {
935 	struct mock_phc *phc;
936 	int err;
937 
938 	phc = mock_phc_create(&ns->nsim_bus_dev->dev);
939 	if (IS_ERR(phc))
940 		return PTR_ERR(phc);
941 
942 	ns->phc = phc;
943 	ns->netdev->netdev_ops = &nsim_netdev_ops;
944 	ns->netdev->stat_ops = &nsim_stat_ops;
945 	ns->netdev->queue_mgmt_ops = &nsim_queue_mgmt_ops;
946 	netdev_lockdep_set_classes(ns->netdev);
947 
948 	err = nsim_udp_tunnels_info_create(ns->nsim_dev, ns->netdev);
949 	if (err)
950 		goto err_phc_destroy;
951 
952 	rtnl_lock();
953 	err = nsim_queue_init(ns);
954 	if (err)
955 		goto err_utn_destroy;
956 
957 	err = nsim_bpf_init(ns);
958 	if (err)
959 		goto err_rq_destroy;
960 
961 	nsim_macsec_init(ns);
962 	nsim_ipsec_init(ns);
963 
964 	err = register_netdevice(ns->netdev);
965 	if (err)
966 		goto err_ipsec_teardown;
967 	rtnl_unlock();
968 
969 	if (IS_ENABLED(CONFIG_DEBUG_NET)) {
970 		ns->nb.notifier_call = netdev_debug_event;
971 		if (register_netdevice_notifier_dev_net(ns->netdev, &ns->nb,
972 							&ns->nn))
973 			ns->nb.notifier_call = NULL;
974 	}
975 
976 	return 0;
977 
978 err_ipsec_teardown:
979 	nsim_ipsec_teardown(ns);
980 	nsim_macsec_teardown(ns);
981 	nsim_bpf_uninit(ns);
982 err_rq_destroy:
983 	nsim_queue_uninit(ns);
984 err_utn_destroy:
985 	rtnl_unlock();
986 	nsim_udp_tunnels_info_destroy(ns->netdev);
987 err_phc_destroy:
988 	mock_phc_destroy(ns->phc);
989 	return err;
990 }
991 
992 static int nsim_init_netdevsim_vf(struct netdevsim *ns)
993 {
994 	int err;
995 
996 	ns->netdev->netdev_ops = &nsim_vf_netdev_ops;
997 	rtnl_lock();
998 	err = register_netdevice(ns->netdev);
999 	rtnl_unlock();
1000 	return err;
1001 }
1002 
1003 static void nsim_exit_netdevsim(struct netdevsim *ns)
1004 {
1005 	nsim_udp_tunnels_info_destroy(ns->netdev);
1006 	mock_phc_destroy(ns->phc);
1007 }
1008 
1009 struct netdevsim *
1010 nsim_create(struct nsim_dev *nsim_dev, struct nsim_dev_port *nsim_dev_port)
1011 {
1012 	struct net_device *dev;
1013 	struct netdevsim *ns;
1014 	int err;
1015 
1016 	dev = alloc_netdev_mq(sizeof(*ns), "eth%d", NET_NAME_UNKNOWN, nsim_setup,
1017 			      nsim_dev->nsim_bus_dev->num_queues);
1018 	if (!dev)
1019 		return ERR_PTR(-ENOMEM);
1020 
1021 	dev_net_set(dev, nsim_dev_net(nsim_dev));
1022 	ns = netdev_priv(dev);
1023 	ns->netdev = dev;
1024 	u64_stats_init(&ns->syncp);
1025 	ns->nsim_dev = nsim_dev;
1026 	ns->nsim_dev_port = nsim_dev_port;
1027 	ns->nsim_bus_dev = nsim_dev->nsim_bus_dev;
1028 	SET_NETDEV_DEV(dev, &ns->nsim_bus_dev->dev);
1029 	SET_NETDEV_DEVLINK_PORT(dev, &nsim_dev_port->devlink_port);
1030 	nsim_ethtool_init(ns);
1031 	if (nsim_dev_port_is_pf(nsim_dev_port))
1032 		err = nsim_init_netdevsim(ns);
1033 	else
1034 		err = nsim_init_netdevsim_vf(ns);
1035 	if (err)
1036 		goto err_free_netdev;
1037 
1038 	ns->pp_dfs = debugfs_create_file("pp_hold", 0600, nsim_dev_port->ddir,
1039 					 ns, &nsim_pp_hold_fops);
1040 	ns->qr_dfs = debugfs_create_file("queue_reset", 0200,
1041 					 nsim_dev_port->ddir, ns,
1042 					 &nsim_qreset_fops);
1043 
1044 	return ns;
1045 
1046 err_free_netdev:
1047 	free_netdev(dev);
1048 	return ERR_PTR(err);
1049 }
1050 
1051 void nsim_destroy(struct netdevsim *ns)
1052 {
1053 	struct net_device *dev = ns->netdev;
1054 	struct netdevsim *peer;
1055 
1056 	debugfs_remove(ns->qr_dfs);
1057 	debugfs_remove(ns->pp_dfs);
1058 
1059 	if (ns->nb.notifier_call)
1060 		unregister_netdevice_notifier_dev_net(ns->netdev, &ns->nb,
1061 						      &ns->nn);
1062 
1063 	rtnl_lock();
1064 	peer = rtnl_dereference(ns->peer);
1065 	if (peer)
1066 		RCU_INIT_POINTER(peer->peer, NULL);
1067 	RCU_INIT_POINTER(ns->peer, NULL);
1068 	unregister_netdevice(dev);
1069 	if (nsim_dev_port_is_pf(ns->nsim_dev_port)) {
1070 		nsim_macsec_teardown(ns);
1071 		nsim_ipsec_teardown(ns);
1072 		nsim_bpf_uninit(ns);
1073 		nsim_queue_uninit(ns);
1074 	}
1075 	rtnl_unlock();
1076 	if (nsim_dev_port_is_pf(ns->nsim_dev_port))
1077 		nsim_exit_netdevsim(ns);
1078 
1079 	/* Put this intentionally late to exercise the orphaning path */
1080 	if (ns->page) {
1081 		page_pool_put_full_page(ns->page->pp, ns->page, false);
1082 		ns->page = NULL;
1083 	}
1084 
1085 	free_netdev(dev);
1086 }
1087 
1088 bool netdev_is_nsim(struct net_device *dev)
1089 {
1090 	return dev->netdev_ops == &nsim_netdev_ops;
1091 }
1092 
1093 static int nsim_validate(struct nlattr *tb[], struct nlattr *data[],
1094 			 struct netlink_ext_ack *extack)
1095 {
1096 	NL_SET_ERR_MSG_MOD(extack,
1097 			   "Please use: echo \"[ID] [PORT_COUNT] [NUM_QUEUES]\" > /sys/bus/netdevsim/new_device");
1098 	return -EOPNOTSUPP;
1099 }
1100 
1101 static struct rtnl_link_ops nsim_link_ops __read_mostly = {
1102 	.kind		= DRV_NAME,
1103 	.validate	= nsim_validate,
1104 };
1105 
1106 static int __init nsim_module_init(void)
1107 {
1108 	int err;
1109 
1110 	err = nsim_dev_init();
1111 	if (err)
1112 		return err;
1113 
1114 	err = nsim_bus_init();
1115 	if (err)
1116 		goto err_dev_exit;
1117 
1118 	err = rtnl_link_register(&nsim_link_ops);
1119 	if (err)
1120 		goto err_bus_exit;
1121 
1122 	return 0;
1123 
1124 err_bus_exit:
1125 	nsim_bus_exit();
1126 err_dev_exit:
1127 	nsim_dev_exit();
1128 	return err;
1129 }
1130 
1131 static void __exit nsim_module_exit(void)
1132 {
1133 	rtnl_link_unregister(&nsim_link_ops);
1134 	nsim_bus_exit();
1135 	nsim_dev_exit();
1136 }
1137 
1138 module_init(nsim_module_init);
1139 module_exit(nsim_module_exit);
1140 MODULE_LICENSE("GPL");
1141 MODULE_DESCRIPTION("Simulated networking device for testing");
1142 MODULE_ALIAS_RTNL_LINK(DRV_NAME);
1143