xref: /linux/drivers/net/netdevsim/netdev.c (revision dfecb0c5af3b07ebfa84be63a7a21bfc9e29a872)
1 /*
2  * Copyright (C) 2017 Netronome Systems, Inc.
3  *
4  * This software is licensed under the GNU General License Version 2,
5  * June 1991 as shown in the file COPYING in the top-level directory of this
6  * source tree.
7  *
8  * THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS"
9  * WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING,
10  * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
11  * FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE
12  * OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME
13  * THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
14  */
15 
16 #include <linux/debugfs.h>
17 #include <linux/etherdevice.h>
18 #include <linux/ethtool_netlink.h>
19 #include <linux/kernel.h>
20 #include <linux/module.h>
21 #include <linux/netdevice.h>
22 #include <linux/slab.h>
23 #include <net/netdev_queues.h>
24 #include <net/netdev_rx_queue.h>
25 #include <net/page_pool/helpers.h>
26 #include <net/netlink.h>
27 #include <net/net_shaper.h>
28 #include <net/netdev_lock.h>
29 #include <net/pkt_cls.h>
30 #include <net/rtnetlink.h>
31 #include <net/udp_tunnel.h>
32 #include <net/busy_poll.h>
33 
34 #include "netdevsim.h"
35 
36 MODULE_IMPORT_NS("NETDEV_INTERNAL");
37 
38 #define NSIM_RING_SIZE		256
39 
40 static void nsim_start_peer_tx_queue(struct net_device *dev, struct nsim_rq *rq)
41 {
42 	struct netdevsim *ns = netdev_priv(dev);
43 	struct net_device *peer_dev;
44 	struct netdevsim *peer_ns;
45 	struct netdev_queue *txq;
46 	u16 idx;
47 
48 	idx = rq->napi.index;
49 	rcu_read_lock();
50 	peer_ns = rcu_dereference(ns->peer);
51 	if (!peer_ns)
52 		goto out;
53 
54 	/* TX device */
55 	peer_dev = peer_ns->netdev;
56 	if (dev->real_num_tx_queues != peer_dev->num_rx_queues)
57 		goto out;
58 
59 	txq = netdev_get_tx_queue(peer_dev, idx);
60 	if (!netif_tx_queue_stopped(txq))
61 		goto out;
62 
63 	netif_tx_wake_queue(txq);
64 out:
65 	rcu_read_unlock();
66 }
67 
68 static void nsim_stop_tx_queue(struct net_device *tx_dev,
69 			       struct net_device *rx_dev,
70 			       struct nsim_rq *rq,
71 			       u16 idx)
72 {
73 	/* If different queues size, do not stop, since it is not
74 	 * easy to find which TX queue is mapped here
75 	 */
76 	if (rx_dev->real_num_tx_queues != tx_dev->num_rx_queues)
77 		return;
78 
79 	/* rq is the queue on the receive side */
80 	netif_subqueue_try_stop(tx_dev, idx,
81 				NSIM_RING_SIZE - skb_queue_len(&rq->skb_queue),
82 				NSIM_RING_SIZE / 2);
83 }
84 
85 static int nsim_napi_rx(struct net_device *tx_dev, struct net_device *rx_dev,
86 			struct nsim_rq *rq, struct sk_buff *skb)
87 {
88 	if (skb_queue_len(&rq->skb_queue) > NSIM_RING_SIZE) {
89 		dev_kfree_skb_any(skb);
90 		return NET_RX_DROP;
91 	}
92 
93 	skb_queue_tail(&rq->skb_queue, skb);
94 
95 	/* Stop the peer TX queue avoiding dropping packets later */
96 	if (skb_queue_len(&rq->skb_queue) >= NSIM_RING_SIZE)
97 		nsim_stop_tx_queue(tx_dev, rx_dev, rq,
98 				   skb_get_queue_mapping(skb));
99 
100 	return NET_RX_SUCCESS;
101 }
102 
103 static int nsim_forward_skb(struct net_device *tx_dev,
104 			    struct net_device *rx_dev,
105 			    struct sk_buff *skb,
106 			    struct nsim_rq *rq,
107 			    struct skb_ext *psp_ext)
108 {
109 	int ret;
110 
111 	ret = __dev_forward_skb(rx_dev, skb);
112 	if (ret) {
113 		if (psp_ext)
114 			__skb_ext_put(psp_ext);
115 		return ret;
116 	}
117 
118 	nsim_psp_handle_ext(skb, psp_ext);
119 
120 	return nsim_napi_rx(tx_dev, rx_dev, rq, skb);
121 }
122 
123 static netdev_tx_t nsim_start_xmit(struct sk_buff *skb, struct net_device *dev)
124 {
125 	struct netdevsim *ns = netdev_priv(dev);
126 	struct skb_ext *psp_ext = NULL;
127 	struct net_device *peer_dev;
128 	unsigned int len = skb->len;
129 	struct netdevsim *peer_ns;
130 	struct netdev_config *cfg;
131 	struct nsim_rq *rq;
132 	int rxq;
133 	int dr;
134 
135 	rcu_read_lock();
136 	if (!nsim_ipsec_tx(ns, skb))
137 		goto out_drop_any;
138 
139 	/* Check if loopback mode is enabled */
140 	if (dev->features & NETIF_F_LOOPBACK) {
141 		peer_ns = ns;
142 		peer_dev = dev;
143 	} else {
144 		peer_ns = rcu_dereference(ns->peer);
145 		if (!peer_ns)
146 			goto out_drop_any;
147 		peer_dev = peer_ns->netdev;
148 	}
149 
150 	dr = nsim_do_psp(skb, ns, peer_ns, &psp_ext);
151 	if (dr)
152 		goto out_drop_free;
153 
154 	rxq = skb_get_queue_mapping(skb);
155 	if (rxq >= peer_dev->num_rx_queues)
156 		rxq = rxq % peer_dev->num_rx_queues;
157 	rq = peer_ns->rq[rxq];
158 
159 	cfg = peer_dev->cfg;
160 	if (skb_is_nonlinear(skb) &&
161 	    (cfg->hds_config != ETHTOOL_TCP_DATA_SPLIT_ENABLED ||
162 	     (cfg->hds_config == ETHTOOL_TCP_DATA_SPLIT_ENABLED &&
163 	      cfg->hds_thresh > len)))
164 		skb_linearize(skb);
165 
166 	skb_tx_timestamp(skb);
167 	if (unlikely(nsim_forward_skb(dev, peer_dev,
168 				      skb, rq, psp_ext) == NET_RX_DROP))
169 		goto out_drop_cnt;
170 
171 	if (!hrtimer_active(&rq->napi_timer))
172 		hrtimer_start(&rq->napi_timer, us_to_ktime(5), HRTIMER_MODE_REL);
173 
174 	rcu_read_unlock();
175 	dev_dstats_tx_add(dev, len);
176 	return NETDEV_TX_OK;
177 
178 out_drop_any:
179 	dr = SKB_DROP_REASON_NOT_SPECIFIED;
180 out_drop_free:
181 	kfree_skb_reason(skb, dr);
182 out_drop_cnt:
183 	rcu_read_unlock();
184 	dev_dstats_tx_dropped(dev);
185 	return NETDEV_TX_OK;
186 }
187 
188 static void nsim_set_rx_mode(struct net_device *dev)
189 {
190 }
191 
192 static int nsim_change_mtu(struct net_device *dev, int new_mtu)
193 {
194 	struct netdevsim *ns = netdev_priv(dev);
195 
196 	if (ns->xdp.prog && !ns->xdp.prog->aux->xdp_has_frags &&
197 	    new_mtu > NSIM_XDP_MAX_MTU)
198 		return -EBUSY;
199 
200 	WRITE_ONCE(dev->mtu, new_mtu);
201 
202 	return 0;
203 }
204 
205 static int nsim_set_vf_mac(struct net_device *dev, int vf, u8 *mac)
206 {
207 	struct netdevsim *ns = netdev_priv(dev);
208 	struct nsim_dev *nsim_dev = ns->nsim_dev;
209 
210 	/* Only refuse multicast addresses, zero address can mean unset/any. */
211 	if (vf >= nsim_dev_get_vfs(nsim_dev) || is_multicast_ether_addr(mac))
212 		return -EINVAL;
213 	memcpy(nsim_dev->vfconfigs[vf].vf_mac, mac, ETH_ALEN);
214 
215 	return 0;
216 }
217 
218 static int nsim_set_vf_vlan(struct net_device *dev, int vf,
219 			    u16 vlan, u8 qos, __be16 vlan_proto)
220 {
221 	struct netdevsim *ns = netdev_priv(dev);
222 	struct nsim_dev *nsim_dev = ns->nsim_dev;
223 
224 	if (vf >= nsim_dev_get_vfs(nsim_dev) || vlan > 4095 || qos > 7)
225 		return -EINVAL;
226 
227 	nsim_dev->vfconfigs[vf].vlan = vlan;
228 	nsim_dev->vfconfigs[vf].qos = qos;
229 	nsim_dev->vfconfigs[vf].vlan_proto = vlan_proto;
230 
231 	return 0;
232 }
233 
234 static int nsim_set_vf_rate(struct net_device *dev, int vf, int min, int max)
235 {
236 	struct netdevsim *ns = netdev_priv(dev);
237 	struct nsim_dev *nsim_dev = ns->nsim_dev;
238 
239 	if (nsim_esw_mode_is_switchdev(ns->nsim_dev)) {
240 		pr_err("Not supported in switchdev mode. Please use devlink API.\n");
241 		return -EOPNOTSUPP;
242 	}
243 
244 	if (vf >= nsim_dev_get_vfs(nsim_dev))
245 		return -EINVAL;
246 
247 	nsim_dev->vfconfigs[vf].min_tx_rate = min;
248 	nsim_dev->vfconfigs[vf].max_tx_rate = max;
249 
250 	return 0;
251 }
252 
253 static int nsim_set_vf_spoofchk(struct net_device *dev, int vf, bool val)
254 {
255 	struct netdevsim *ns = netdev_priv(dev);
256 	struct nsim_dev *nsim_dev = ns->nsim_dev;
257 
258 	if (vf >= nsim_dev_get_vfs(nsim_dev))
259 		return -EINVAL;
260 	nsim_dev->vfconfigs[vf].spoofchk_enabled = val;
261 
262 	return 0;
263 }
264 
265 static int nsim_set_vf_rss_query_en(struct net_device *dev, int vf, bool val)
266 {
267 	struct netdevsim *ns = netdev_priv(dev);
268 	struct nsim_dev *nsim_dev = ns->nsim_dev;
269 
270 	if (vf >= nsim_dev_get_vfs(nsim_dev))
271 		return -EINVAL;
272 	nsim_dev->vfconfigs[vf].rss_query_enabled = val;
273 
274 	return 0;
275 }
276 
277 static int nsim_set_vf_trust(struct net_device *dev, int vf, bool val)
278 {
279 	struct netdevsim *ns = netdev_priv(dev);
280 	struct nsim_dev *nsim_dev = ns->nsim_dev;
281 
282 	if (vf >= nsim_dev_get_vfs(nsim_dev))
283 		return -EINVAL;
284 	nsim_dev->vfconfigs[vf].trusted = val;
285 
286 	return 0;
287 }
288 
289 static int
290 nsim_get_vf_config(struct net_device *dev, int vf, struct ifla_vf_info *ivi)
291 {
292 	struct netdevsim *ns = netdev_priv(dev);
293 	struct nsim_dev *nsim_dev = ns->nsim_dev;
294 
295 	if (vf >= nsim_dev_get_vfs(nsim_dev))
296 		return -EINVAL;
297 
298 	ivi->vf = vf;
299 	ivi->linkstate = nsim_dev->vfconfigs[vf].link_state;
300 	ivi->min_tx_rate = nsim_dev->vfconfigs[vf].min_tx_rate;
301 	ivi->max_tx_rate = nsim_dev->vfconfigs[vf].max_tx_rate;
302 	ivi->vlan = nsim_dev->vfconfigs[vf].vlan;
303 	ivi->vlan_proto = nsim_dev->vfconfigs[vf].vlan_proto;
304 	ivi->qos = nsim_dev->vfconfigs[vf].qos;
305 	memcpy(&ivi->mac, nsim_dev->vfconfigs[vf].vf_mac, ETH_ALEN);
306 	ivi->spoofchk = nsim_dev->vfconfigs[vf].spoofchk_enabled;
307 	ivi->trusted = nsim_dev->vfconfigs[vf].trusted;
308 	ivi->rss_query_en = nsim_dev->vfconfigs[vf].rss_query_enabled;
309 
310 	return 0;
311 }
312 
313 static int nsim_set_vf_link_state(struct net_device *dev, int vf, int state)
314 {
315 	struct netdevsim *ns = netdev_priv(dev);
316 	struct nsim_dev *nsim_dev = ns->nsim_dev;
317 
318 	if (vf >= nsim_dev_get_vfs(nsim_dev))
319 		return -EINVAL;
320 
321 	switch (state) {
322 	case IFLA_VF_LINK_STATE_AUTO:
323 	case IFLA_VF_LINK_STATE_ENABLE:
324 	case IFLA_VF_LINK_STATE_DISABLE:
325 		break;
326 	default:
327 		return -EINVAL;
328 	}
329 
330 	nsim_dev->vfconfigs[vf].link_state = state;
331 
332 	return 0;
333 }
334 
335 static int
336 nsim_set_features(struct net_device *dev, netdev_features_t features)
337 {
338 	struct netdevsim *ns = netdev_priv(dev);
339 
340 	if ((dev->features & NETIF_F_HW_TC) > (features & NETIF_F_HW_TC))
341 		return nsim_bpf_disable_tc(ns);
342 
343 	return 0;
344 }
345 
346 static int nsim_get_iflink(const struct net_device *dev)
347 {
348 	struct netdevsim *nsim, *peer;
349 	int iflink;
350 
351 	nsim = netdev_priv(dev);
352 
353 	rcu_read_lock();
354 	peer = rcu_dereference(nsim->peer);
355 	iflink = peer ? READ_ONCE(peer->netdev->ifindex) :
356 			READ_ONCE(dev->ifindex);
357 	rcu_read_unlock();
358 
359 	return iflink;
360 }
361 
362 static int nsim_rcv(struct nsim_rq *rq, int budget)
363 {
364 	struct net_device *dev = rq->napi.dev;
365 	struct bpf_prog *xdp_prog;
366 	struct netdevsim *ns;
367 	struct sk_buff *skb;
368 	unsigned int skblen;
369 	int i, ret;
370 
371 	ns = netdev_priv(dev);
372 	xdp_prog = READ_ONCE(ns->xdp.prog);
373 
374 	for (i = 0; i < budget; i++) {
375 		if (skb_queue_empty(&rq->skb_queue))
376 			break;
377 
378 		skb = skb_dequeue(&rq->skb_queue);
379 
380 		if (xdp_prog) {
381 			/* skb might be freed directly by XDP, save the len */
382 			skblen = skb->len;
383 
384 			if (skb->ip_summed == CHECKSUM_PARTIAL)
385 				skb_checksum_help(skb);
386 			ret = do_xdp_generic(xdp_prog, &skb);
387 			if (ret != XDP_PASS) {
388 				dev_dstats_rx_add(dev, skblen);
389 				continue;
390 			}
391 		}
392 
393 		/* skb might be discard at netif_receive_skb, save the len */
394 		dev_dstats_rx_add(dev, skb->len);
395 		napi_gro_receive(&rq->napi, skb);
396 	}
397 
398 	nsim_start_peer_tx_queue(dev, rq);
399 	return i;
400 }
401 
402 static int nsim_poll(struct napi_struct *napi, int budget)
403 {
404 	struct nsim_rq *rq = container_of(napi, struct nsim_rq, napi);
405 	int done;
406 
407 	done = nsim_rcv(rq, budget);
408 	if (done < budget)
409 		napi_complete_done(napi, done);
410 
411 	return done;
412 }
413 
414 static int nsim_create_page_pool(struct page_pool **p, struct napi_struct *napi)
415 {
416 	struct page_pool_params params = {
417 		.order = 0,
418 		.pool_size = NSIM_RING_SIZE,
419 		.nid = NUMA_NO_NODE,
420 		.dev = &napi->dev->dev,
421 		.napi = napi,
422 		.dma_dir = DMA_BIDIRECTIONAL,
423 		.netdev = napi->dev,
424 	};
425 	struct page_pool *pool;
426 
427 	pool = page_pool_create(&params);
428 	if (IS_ERR(pool))
429 		return PTR_ERR(pool);
430 
431 	*p = pool;
432 	return 0;
433 }
434 
435 static int nsim_init_napi(struct netdevsim *ns)
436 {
437 	struct net_device *dev = ns->netdev;
438 	struct nsim_rq *rq;
439 	int err, i;
440 
441 	for (i = 0; i < dev->num_rx_queues; i++) {
442 		rq = ns->rq[i];
443 
444 		netif_napi_add_config_locked(dev, &rq->napi, nsim_poll, i);
445 	}
446 
447 	for (i = 0; i < dev->num_rx_queues; i++) {
448 		rq = ns->rq[i];
449 
450 		err = nsim_create_page_pool(&rq->page_pool, &rq->napi);
451 		if (err)
452 			goto err_pp_destroy;
453 	}
454 
455 	return 0;
456 
457 err_pp_destroy:
458 	while (i--) {
459 		page_pool_destroy(ns->rq[i]->page_pool);
460 		ns->rq[i]->page_pool = NULL;
461 	}
462 
463 	for (i = 0; i < dev->num_rx_queues; i++)
464 		__netif_napi_del_locked(&ns->rq[i]->napi);
465 
466 	return err;
467 }
468 
469 static enum hrtimer_restart nsim_napi_schedule(struct hrtimer *timer)
470 {
471 	struct nsim_rq *rq;
472 
473 	rq = container_of(timer, struct nsim_rq, napi_timer);
474 	napi_schedule(&rq->napi);
475 
476 	return HRTIMER_NORESTART;
477 }
478 
479 static void nsim_rq_timer_init(struct nsim_rq *rq)
480 {
481 	hrtimer_setup(&rq->napi_timer, nsim_napi_schedule, CLOCK_MONOTONIC,
482 		      HRTIMER_MODE_REL);
483 }
484 
485 static void nsim_enable_napi(struct netdevsim *ns)
486 {
487 	struct net_device *dev = ns->netdev;
488 	int i;
489 
490 	for (i = 0; i < dev->num_rx_queues; i++) {
491 		struct nsim_rq *rq = ns->rq[i];
492 
493 		netif_queue_set_napi(dev, i, NETDEV_QUEUE_TYPE_RX, &rq->napi);
494 		napi_enable_locked(&rq->napi);
495 	}
496 }
497 
498 static int nsim_open(struct net_device *dev)
499 {
500 	struct netdevsim *ns = netdev_priv(dev);
501 	struct netdevsim *peer;
502 	int err;
503 
504 	netdev_assert_locked(dev);
505 
506 	err = nsim_init_napi(ns);
507 	if (err)
508 		return err;
509 
510 	nsim_enable_napi(ns);
511 
512 	peer = rtnl_dereference(ns->peer);
513 	if (peer && netif_running(peer->netdev)) {
514 		netif_carrier_on(dev);
515 		netif_carrier_on(peer->netdev);
516 	}
517 
518 	return 0;
519 }
520 
521 static void nsim_del_napi(struct netdevsim *ns)
522 {
523 	struct net_device *dev = ns->netdev;
524 	int i;
525 
526 	for (i = 0; i < dev->num_rx_queues; i++) {
527 		struct nsim_rq *rq = ns->rq[i];
528 
529 		napi_disable_locked(&rq->napi);
530 		__netif_napi_del_locked(&rq->napi);
531 	}
532 	synchronize_net();
533 
534 	for (i = 0; i < dev->num_rx_queues; i++) {
535 		page_pool_destroy(ns->rq[i]->page_pool);
536 		ns->rq[i]->page_pool = NULL;
537 	}
538 }
539 
540 static int nsim_stop(struct net_device *dev)
541 {
542 	struct netdevsim *ns = netdev_priv(dev);
543 	struct netdevsim *peer;
544 
545 	netdev_assert_locked(dev);
546 
547 	netif_carrier_off(dev);
548 	peer = rtnl_dereference(ns->peer);
549 	if (peer)
550 		netif_carrier_off(peer->netdev);
551 
552 	nsim_del_napi(ns);
553 
554 	return 0;
555 }
556 
557 static int nsim_shaper_set(struct net_shaper_binding *binding,
558 			   const struct net_shaper *shaper,
559 			   struct netlink_ext_ack *extack)
560 {
561 	return 0;
562 }
563 
564 static int nsim_shaper_del(struct net_shaper_binding *binding,
565 			   const struct net_shaper_handle *handle,
566 			   struct netlink_ext_ack *extack)
567 {
568 	return 0;
569 }
570 
571 static int nsim_shaper_group(struct net_shaper_binding *binding,
572 			     int leaves_count,
573 			     const struct net_shaper *leaves,
574 			     const struct net_shaper *root,
575 			     struct netlink_ext_ack *extack)
576 {
577 	return 0;
578 }
579 
580 static void nsim_shaper_cap(struct net_shaper_binding *binding,
581 			    enum net_shaper_scope scope,
582 			    unsigned long *flags)
583 {
584 	*flags = ULONG_MAX;
585 }
586 
587 static const struct net_shaper_ops nsim_shaper_ops = {
588 	.set			= nsim_shaper_set,
589 	.delete			= nsim_shaper_del,
590 	.group			= nsim_shaper_group,
591 	.capabilities		= nsim_shaper_cap,
592 };
593 
594 static const struct net_device_ops nsim_netdev_ops = {
595 	.ndo_start_xmit		= nsim_start_xmit,
596 	.ndo_set_rx_mode	= nsim_set_rx_mode,
597 	.ndo_set_mac_address	= eth_mac_addr,
598 	.ndo_validate_addr	= eth_validate_addr,
599 	.ndo_change_mtu		= nsim_change_mtu,
600 	.ndo_set_vf_mac		= nsim_set_vf_mac,
601 	.ndo_set_vf_vlan	= nsim_set_vf_vlan,
602 	.ndo_set_vf_rate	= nsim_set_vf_rate,
603 	.ndo_set_vf_spoofchk	= nsim_set_vf_spoofchk,
604 	.ndo_set_vf_trust	= nsim_set_vf_trust,
605 	.ndo_get_vf_config	= nsim_get_vf_config,
606 	.ndo_set_vf_link_state	= nsim_set_vf_link_state,
607 	.ndo_set_vf_rss_query_en = nsim_set_vf_rss_query_en,
608 	.ndo_setup_tc		= nsim_setup_tc,
609 	.ndo_set_features	= nsim_set_features,
610 	.ndo_get_iflink		= nsim_get_iflink,
611 	.ndo_bpf		= nsim_bpf,
612 	.ndo_open		= nsim_open,
613 	.ndo_stop		= nsim_stop,
614 	.net_shaper_ops		= &nsim_shaper_ops,
615 };
616 
617 static const struct net_device_ops nsim_vf_netdev_ops = {
618 	.ndo_start_xmit		= nsim_start_xmit,
619 	.ndo_set_rx_mode	= nsim_set_rx_mode,
620 	.ndo_set_mac_address	= eth_mac_addr,
621 	.ndo_validate_addr	= eth_validate_addr,
622 	.ndo_change_mtu		= nsim_change_mtu,
623 	.ndo_setup_tc		= nsim_setup_tc,
624 	.ndo_set_features	= nsim_set_features,
625 };
626 
627 /* We don't have true per-queue stats, yet, so do some random fakery here.
628  * Only report stuff for queue 0.
629  */
630 static void nsim_get_queue_stats_rx(struct net_device *dev, int idx,
631 				    struct netdev_queue_stats_rx *stats)
632 {
633 	struct rtnl_link_stats64 rtstats = {};
634 
635 	if (!idx)
636 		dev_get_stats(dev, &rtstats);
637 
638 	stats->packets = rtstats.rx_packets - !!rtstats.rx_packets;
639 	stats->bytes = rtstats.rx_bytes;
640 }
641 
642 static void nsim_get_queue_stats_tx(struct net_device *dev, int idx,
643 				    struct netdev_queue_stats_tx *stats)
644 {
645 	struct rtnl_link_stats64 rtstats = {};
646 
647 	if (!idx)
648 		dev_get_stats(dev, &rtstats);
649 
650 	stats->packets = rtstats.tx_packets - !!rtstats.tx_packets;
651 	stats->bytes = rtstats.tx_bytes;
652 }
653 
654 static void nsim_get_base_stats(struct net_device *dev,
655 				struct netdev_queue_stats_rx *rx,
656 				struct netdev_queue_stats_tx *tx)
657 {
658 	struct rtnl_link_stats64 rtstats = {};
659 
660 	dev_get_stats(dev, &rtstats);
661 
662 	rx->packets = !!rtstats.rx_packets;
663 	rx->bytes = 0;
664 	tx->packets = !!rtstats.tx_packets;
665 	tx->bytes = 0;
666 }
667 
668 static const struct netdev_stat_ops nsim_stat_ops = {
669 	.get_queue_stats_tx	= nsim_get_queue_stats_tx,
670 	.get_queue_stats_rx	= nsim_get_queue_stats_rx,
671 	.get_base_stats		= nsim_get_base_stats,
672 };
673 
674 static struct nsim_rq *nsim_queue_alloc(void)
675 {
676 	struct nsim_rq *rq;
677 
678 	rq = kzalloc_obj(*rq, GFP_KERNEL_ACCOUNT);
679 	if (!rq)
680 		return NULL;
681 
682 	skb_queue_head_init(&rq->skb_queue);
683 	nsim_rq_timer_init(rq);
684 	return rq;
685 }
686 
687 static void nsim_queue_free(struct net_device *dev, struct nsim_rq *rq)
688 {
689 	hrtimer_cancel(&rq->napi_timer);
690 
691 	if (rq->skb_queue.qlen) {
692 		local_bh_disable();
693 		dev_dstats_rx_dropped_add(dev, rq->skb_queue.qlen);
694 		local_bh_enable();
695 	}
696 
697 	skb_queue_purge_reason(&rq->skb_queue, SKB_DROP_REASON_QUEUE_PURGE);
698 	kfree(rq);
699 }
700 
701 /* Queue reset mode is controlled by ns->rq_reset_mode.
702  * - normal - new NAPI new pool (old NAPI enabled when new added)
703  * - mode 1 - allocate new pool (NAPI is only disabled / enabled)
704  * - mode 2 - new NAPI new pool (old NAPI removed before new added)
705  * - mode 3 - new NAPI new pool (old NAPI disabled when new added)
706  */
707 struct nsim_queue_mem {
708 	struct nsim_rq *rq;
709 	struct page_pool *pp;
710 };
711 
712 static int
713 nsim_queue_mem_alloc(struct net_device *dev,
714 		     struct netdev_queue_config *qcfg,
715 		     void *per_queue_mem, int idx)
716 {
717 	struct nsim_queue_mem *qmem = per_queue_mem;
718 	struct netdevsim *ns = netdev_priv(dev);
719 	int err;
720 
721 	if (ns->rq_reset_mode > 3)
722 		return -EINVAL;
723 
724 	if (ns->rq_reset_mode == 1) {
725 		if (!netif_running(ns->netdev))
726 			return -ENETDOWN;
727 		return nsim_create_page_pool(&qmem->pp, &ns->rq[idx]->napi);
728 	}
729 
730 	qmem->rq = nsim_queue_alloc();
731 	if (!qmem->rq)
732 		return -ENOMEM;
733 
734 	err = nsim_create_page_pool(&qmem->rq->page_pool, &qmem->rq->napi);
735 	if (err)
736 		goto err_free;
737 
738 	if (!ns->rq_reset_mode)
739 		netif_napi_add_config_locked(dev, &qmem->rq->napi, nsim_poll,
740 					     idx);
741 
742 	return 0;
743 
744 err_free:
745 	nsim_queue_free(dev, qmem->rq);
746 	return err;
747 }
748 
749 static void nsim_queue_mem_free(struct net_device *dev, void *per_queue_mem)
750 {
751 	struct nsim_queue_mem *qmem = per_queue_mem;
752 	struct netdevsim *ns = netdev_priv(dev);
753 
754 	page_pool_destroy(qmem->pp);
755 	if (qmem->rq) {
756 		if (!ns->rq_reset_mode)
757 			netif_napi_del_locked(&qmem->rq->napi);
758 		page_pool_destroy(qmem->rq->page_pool);
759 		nsim_queue_free(dev, qmem->rq);
760 	}
761 }
762 
763 static int
764 nsim_queue_start(struct net_device *dev, struct netdev_queue_config *qcfg,
765 		 void *per_queue_mem, int idx)
766 {
767 	struct nsim_queue_mem *qmem = per_queue_mem;
768 	struct netdevsim *ns = netdev_priv(dev);
769 
770 	netdev_assert_locked(dev);
771 
772 	if (ns->rq_reset_mode == 1) {
773 		ns->rq[idx]->page_pool = qmem->pp;
774 		napi_enable_locked(&ns->rq[idx]->napi);
775 		return 0;
776 	}
777 
778 	/* netif_napi_add()/_del() should normally be called from alloc/free,
779 	 * here we want to test various call orders.
780 	 */
781 	if (ns->rq_reset_mode == 2) {
782 		netif_napi_del_locked(&ns->rq[idx]->napi);
783 		netif_napi_add_config_locked(dev, &qmem->rq->napi, nsim_poll,
784 					     idx);
785 	} else if (ns->rq_reset_mode == 3) {
786 		netif_napi_add_config_locked(dev, &qmem->rq->napi, nsim_poll,
787 					     idx);
788 		netif_napi_del_locked(&ns->rq[idx]->napi);
789 	}
790 
791 	ns->rq[idx] = qmem->rq;
792 	napi_enable_locked(&ns->rq[idx]->napi);
793 
794 	return 0;
795 }
796 
797 static int nsim_queue_stop(struct net_device *dev, void *per_queue_mem, int idx)
798 {
799 	struct nsim_queue_mem *qmem = per_queue_mem;
800 	struct netdevsim *ns = netdev_priv(dev);
801 
802 	netdev_assert_locked(dev);
803 
804 	napi_disable_locked(&ns->rq[idx]->napi);
805 
806 	if (ns->rq_reset_mode == 1) {
807 		qmem->pp = ns->rq[idx]->page_pool;
808 		page_pool_disable_direct_recycling(qmem->pp);
809 	} else {
810 		qmem->rq = ns->rq[idx];
811 	}
812 
813 	return 0;
814 }
815 
816 static const struct netdev_queue_mgmt_ops nsim_queue_mgmt_ops = {
817 	.ndo_queue_mem_size	= sizeof(struct nsim_queue_mem),
818 	.ndo_queue_mem_alloc	= nsim_queue_mem_alloc,
819 	.ndo_queue_mem_free	= nsim_queue_mem_free,
820 	.ndo_queue_start	= nsim_queue_start,
821 	.ndo_queue_stop		= nsim_queue_stop,
822 };
823 
824 static ssize_t
825 nsim_qreset_write(struct file *file, const char __user *data,
826 		  size_t count, loff_t *ppos)
827 {
828 	struct netdevsim *ns = file->private_data;
829 	unsigned int queue, mode;
830 	char buf[32];
831 	ssize_t ret;
832 
833 	if (count >= sizeof(buf))
834 		return -EINVAL;
835 	if (copy_from_user(buf, data, count))
836 		return -EFAULT;
837 	buf[count] = '\0';
838 
839 	ret = sscanf(buf, "%u %u", &queue, &mode);
840 	if (ret != 2)
841 		return -EINVAL;
842 
843 	netdev_lock(ns->netdev);
844 	if (queue >= ns->netdev->real_num_rx_queues) {
845 		ret = -EINVAL;
846 		goto exit_unlock;
847 	}
848 
849 	ns->rq_reset_mode = mode;
850 	ret = netdev_rx_queue_restart(ns->netdev, queue);
851 	ns->rq_reset_mode = 0;
852 	if (ret)
853 		goto exit_unlock;
854 
855 	ret = count;
856 exit_unlock:
857 	netdev_unlock(ns->netdev);
858 	return ret;
859 }
860 
861 static const struct file_operations nsim_qreset_fops = {
862 	.open = simple_open,
863 	.write = nsim_qreset_write,
864 	.owner = THIS_MODULE,
865 };
866 
867 static ssize_t
868 nsim_pp_hold_read(struct file *file, char __user *data,
869 		  size_t count, loff_t *ppos)
870 {
871 	struct netdevsim *ns = file->private_data;
872 	char buf[3] = "n\n";
873 
874 	if (ns->page)
875 		buf[0] = 'y';
876 
877 	return simple_read_from_buffer(data, count, ppos, buf, 2);
878 }
879 
880 static ssize_t
881 nsim_pp_hold_write(struct file *file, const char __user *data,
882 		   size_t count, loff_t *ppos)
883 {
884 	struct netdevsim *ns = file->private_data;
885 	ssize_t ret;
886 	bool val;
887 
888 	ret = kstrtobool_from_user(data, count, &val);
889 	if (ret)
890 		return ret;
891 
892 	rtnl_lock();
893 	ret = count;
894 	if (val == !!ns->page)
895 		goto exit;
896 
897 	if (!netif_running(ns->netdev) && val) {
898 		ret = -ENETDOWN;
899 	} else if (val) {
900 		ns->page = page_pool_dev_alloc_pages(ns->rq[0]->page_pool);
901 		if (!ns->page)
902 			ret = -ENOMEM;
903 	} else {
904 		page_pool_put_full_page(pp_page_to_nmdesc(ns->page)->pp,
905 					ns->page, false);
906 		ns->page = NULL;
907 	}
908 
909 exit:
910 	rtnl_unlock();
911 	return ret;
912 }
913 
914 static const struct file_operations nsim_pp_hold_fops = {
915 	.open = simple_open,
916 	.read = nsim_pp_hold_read,
917 	.write = nsim_pp_hold_write,
918 	.llseek = generic_file_llseek,
919 	.owner = THIS_MODULE,
920 };
921 
922 static void nsim_setup(struct net_device *dev)
923 {
924 	ether_setup(dev);
925 	eth_hw_addr_random(dev);
926 
927 	dev->flags &= ~IFF_MULTICAST;
928 	dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
929 	dev->features |= NETIF_F_HIGHDMA |
930 			 NETIF_F_SG |
931 			 NETIF_F_FRAGLIST |
932 			 NETIF_F_HW_CSUM |
933 			 NETIF_F_LRO |
934 			 NETIF_F_TSO;
935 	dev->hw_features |= NETIF_F_HW_TC |
936 			    NETIF_F_SG |
937 			    NETIF_F_FRAGLIST |
938 			    NETIF_F_HW_CSUM |
939 			    NETIF_F_LRO |
940 			    NETIF_F_TSO |
941 			    NETIF_F_LOOPBACK;
942 	dev->pcpu_stat_type = NETDEV_PCPU_STAT_DSTATS;
943 	dev->max_mtu = ETH_MAX_MTU;
944 	dev->xdp_features = NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_HW_OFFLOAD;
945 }
946 
947 static int nsim_queue_init(struct netdevsim *ns)
948 {
949 	struct net_device *dev = ns->netdev;
950 	int i;
951 
952 	ns->rq = kzalloc_objs(*ns->rq, dev->num_rx_queues, GFP_KERNEL_ACCOUNT);
953 	if (!ns->rq)
954 		return -ENOMEM;
955 
956 	for (i = 0; i < dev->num_rx_queues; i++) {
957 		ns->rq[i] = nsim_queue_alloc();
958 		if (!ns->rq[i])
959 			goto err_free_prev;
960 	}
961 
962 	return 0;
963 
964 err_free_prev:
965 	while (i--)
966 		kfree(ns->rq[i]);
967 	kfree(ns->rq);
968 	return -ENOMEM;
969 }
970 
971 static void nsim_queue_uninit(struct netdevsim *ns)
972 {
973 	struct net_device *dev = ns->netdev;
974 	int i;
975 
976 	for (i = 0; i < dev->num_rx_queues; i++)
977 		nsim_queue_free(dev, ns->rq[i]);
978 
979 	kfree(ns->rq);
980 	ns->rq = NULL;
981 }
982 
983 static int nsim_init_netdevsim(struct netdevsim *ns)
984 {
985 	struct netdevsim *peer;
986 	struct mock_phc *phc;
987 	int err;
988 
989 	phc = mock_phc_create(&ns->nsim_bus_dev->dev);
990 	if (IS_ERR(phc))
991 		return PTR_ERR(phc);
992 
993 	ns->phc = phc;
994 	ns->netdev->netdev_ops = &nsim_netdev_ops;
995 	ns->netdev->stat_ops = &nsim_stat_ops;
996 	ns->netdev->queue_mgmt_ops = &nsim_queue_mgmt_ops;
997 	netdev_lockdep_set_classes(ns->netdev);
998 
999 	err = nsim_udp_tunnels_info_create(ns->nsim_dev, ns->netdev);
1000 	if (err)
1001 		goto err_phc_destroy;
1002 
1003 	rtnl_lock();
1004 	err = nsim_queue_init(ns);
1005 	if (err)
1006 		goto err_utn_destroy;
1007 
1008 	err = nsim_bpf_init(ns);
1009 	if (err)
1010 		goto err_rq_destroy;
1011 
1012 	nsim_macsec_init(ns);
1013 	nsim_ipsec_init(ns);
1014 
1015 	err = register_netdevice(ns->netdev);
1016 	if (err)
1017 		goto err_ipsec_teardown;
1018 	rtnl_unlock();
1019 
1020 	err = nsim_psp_init(ns);
1021 	if (err)
1022 		goto err_unregister_netdev;
1023 
1024 	if (IS_ENABLED(CONFIG_DEBUG_NET)) {
1025 		ns->nb.notifier_call = netdev_debug_event;
1026 		if (register_netdevice_notifier_dev_net(ns->netdev, &ns->nb,
1027 							&ns->nn))
1028 			ns->nb.notifier_call = NULL;
1029 	}
1030 
1031 	return 0;
1032 
1033 err_unregister_netdev:
1034 	rtnl_lock();
1035 	peer = rtnl_dereference(ns->peer);
1036 	if (peer)
1037 		RCU_INIT_POINTER(peer->peer, NULL);
1038 	RCU_INIT_POINTER(ns->peer, NULL);
1039 	unregister_netdevice(ns->netdev);
1040 err_ipsec_teardown:
1041 	nsim_ipsec_teardown(ns);
1042 	nsim_macsec_teardown(ns);
1043 	nsim_bpf_uninit(ns);
1044 err_rq_destroy:
1045 	nsim_queue_uninit(ns);
1046 err_utn_destroy:
1047 	rtnl_unlock();
1048 	nsim_udp_tunnels_info_destroy(ns->netdev);
1049 err_phc_destroy:
1050 	mock_phc_destroy(ns->phc);
1051 	return err;
1052 }
1053 
1054 static int nsim_init_netdevsim_vf(struct netdevsim *ns)
1055 {
1056 	int err;
1057 
1058 	ns->netdev->netdev_ops = &nsim_vf_netdev_ops;
1059 	rtnl_lock();
1060 	err = register_netdevice(ns->netdev);
1061 	rtnl_unlock();
1062 	return err;
1063 }
1064 
1065 static void nsim_exit_netdevsim(struct netdevsim *ns)
1066 {
1067 	nsim_udp_tunnels_info_destroy(ns->netdev);
1068 	mock_phc_destroy(ns->phc);
1069 }
1070 
1071 struct netdevsim *nsim_create(struct nsim_dev *nsim_dev,
1072 			      struct nsim_dev_port *nsim_dev_port,
1073 			      u8 perm_addr[ETH_ALEN])
1074 {
1075 	struct net_device *dev;
1076 	struct netdevsim *ns;
1077 	int err;
1078 
1079 	dev = alloc_netdev_mq(sizeof(*ns), "eth%d", NET_NAME_UNKNOWN, nsim_setup,
1080 			      nsim_dev->nsim_bus_dev->num_queues);
1081 	if (!dev)
1082 		return ERR_PTR(-ENOMEM);
1083 
1084 	if (perm_addr)
1085 		memcpy(dev->perm_addr, perm_addr, ETH_ALEN);
1086 
1087 	dev_net_set(dev, nsim_dev_net(nsim_dev));
1088 	ns = netdev_priv(dev);
1089 	ns->netdev = dev;
1090 	ns->nsim_dev = nsim_dev;
1091 	ns->nsim_dev_port = nsim_dev_port;
1092 	ns->nsim_bus_dev = nsim_dev->nsim_bus_dev;
1093 	SET_NETDEV_DEV(dev, &ns->nsim_bus_dev->dev);
1094 	SET_NETDEV_DEVLINK_PORT(dev, &nsim_dev_port->devlink_port);
1095 	nsim_ethtool_init(ns);
1096 	if (nsim_dev_port_is_pf(nsim_dev_port))
1097 		err = nsim_init_netdevsim(ns);
1098 	else
1099 		err = nsim_init_netdevsim_vf(ns);
1100 	if (err)
1101 		goto err_free_netdev;
1102 
1103 	ns->pp_dfs = debugfs_create_file("pp_hold", 0600, nsim_dev_port->ddir,
1104 					 ns, &nsim_pp_hold_fops);
1105 	ns->qr_dfs = debugfs_create_file("queue_reset", 0200,
1106 					 nsim_dev_port->ddir, ns,
1107 					 &nsim_qreset_fops);
1108 	return ns;
1109 
1110 err_free_netdev:
1111 	free_netdev(dev);
1112 	return ERR_PTR(err);
1113 }
1114 
1115 void nsim_destroy(struct netdevsim *ns)
1116 {
1117 	struct net_device *dev = ns->netdev;
1118 	struct netdevsim *peer;
1119 
1120 	debugfs_remove(ns->qr_dfs);
1121 	debugfs_remove(ns->pp_dfs);
1122 
1123 	if (ns->nb.notifier_call)
1124 		unregister_netdevice_notifier_dev_net(ns->netdev, &ns->nb,
1125 						      &ns->nn);
1126 
1127 	nsim_psp_uninit(ns);
1128 
1129 	rtnl_lock();
1130 	peer = rtnl_dereference(ns->peer);
1131 	if (peer)
1132 		RCU_INIT_POINTER(peer->peer, NULL);
1133 	RCU_INIT_POINTER(ns->peer, NULL);
1134 	unregister_netdevice(dev);
1135 	if (nsim_dev_port_is_pf(ns->nsim_dev_port)) {
1136 		nsim_macsec_teardown(ns);
1137 		nsim_ipsec_teardown(ns);
1138 		nsim_bpf_uninit(ns);
1139 		nsim_queue_uninit(ns);
1140 	}
1141 	rtnl_unlock();
1142 	if (nsim_dev_port_is_pf(ns->nsim_dev_port))
1143 		nsim_exit_netdevsim(ns);
1144 
1145 	/* Put this intentionally late to exercise the orphaning path */
1146 	if (ns->page) {
1147 		page_pool_put_full_page(pp_page_to_nmdesc(ns->page)->pp,
1148 					ns->page, false);
1149 		ns->page = NULL;
1150 	}
1151 
1152 	free_netdev(dev);
1153 }
1154 
1155 bool netdev_is_nsim(struct net_device *dev)
1156 {
1157 	return dev->netdev_ops == &nsim_netdev_ops;
1158 }
1159 
1160 static int nsim_validate(struct nlattr *tb[], struct nlattr *data[],
1161 			 struct netlink_ext_ack *extack)
1162 {
1163 	NL_SET_ERR_MSG_MOD(extack,
1164 			   "Please use: echo \"[ID] [PORT_COUNT] [NUM_QUEUES]\" > /sys/bus/netdevsim/new_device");
1165 	return -EOPNOTSUPP;
1166 }
1167 
1168 static struct rtnl_link_ops nsim_link_ops __read_mostly = {
1169 	.kind		= DRV_NAME,
1170 	.validate	= nsim_validate,
1171 };
1172 
1173 static int __init nsim_module_init(void)
1174 {
1175 	int err;
1176 
1177 	err = nsim_dev_init();
1178 	if (err)
1179 		return err;
1180 
1181 	err = nsim_bus_init();
1182 	if (err)
1183 		goto err_dev_exit;
1184 
1185 	err = rtnl_link_register(&nsim_link_ops);
1186 	if (err)
1187 		goto err_bus_exit;
1188 
1189 	return 0;
1190 
1191 err_bus_exit:
1192 	nsim_bus_exit();
1193 err_dev_exit:
1194 	nsim_dev_exit();
1195 	return err;
1196 }
1197 
1198 static void __exit nsim_module_exit(void)
1199 {
1200 	rtnl_link_unregister(&nsim_link_ops);
1201 	nsim_bus_exit();
1202 	nsim_dev_exit();
1203 }
1204 
1205 module_init(nsim_module_init);
1206 module_exit(nsim_module_exit);
1207 MODULE_LICENSE("GPL");
1208 MODULE_DESCRIPTION("Simulated networking device for testing");
1209 MODULE_ALIAS_RTNL_LINK(DRV_NAME);
1210