xref: /linux/drivers/net/netdevsim/netdev.c (revision efcb9a4d32d3d9b924642c086b868bfbb9a07c13)
1 /*
2  * Copyright (C) 2017 Netronome Systems, Inc.
3  *
4  * This software is licensed under the GNU General License Version 2,
5  * June 1991 as shown in the file COPYING in the top-level directory of this
6  * source tree.
7  *
8  * THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS"
9  * WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING,
10  * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
11  * FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE
12  * OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME
13  * THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
14  */
15 
16 #include <linux/debugfs.h>
17 #include <linux/etherdevice.h>
18 #include <linux/ethtool_netlink.h>
19 #include <linux/kernel.h>
20 #include <linux/module.h>
21 #include <linux/netdevice.h>
22 #include <linux/slab.h>
23 #include <net/netdev_queues.h>
24 #include <net/netdev_rx_queue.h>
25 #include <net/page_pool/helpers.h>
26 #include <net/netlink.h>
27 #include <net/net_shaper.h>
28 #include <net/netdev_lock.h>
29 #include <net/pkt_cls.h>
30 #include <net/rtnetlink.h>
31 #include <net/udp_tunnel.h>
32 #include <net/busy_poll.h>
33 
34 #include "netdevsim.h"
35 
36 MODULE_IMPORT_NS("NETDEV_INTERNAL");
37 
38 #define NSIM_RING_SIZE		256
39 
40 static void nsim_start_peer_tx_queue(struct net_device *dev, struct nsim_rq *rq)
41 {
42 	struct netdevsim *ns = netdev_priv(dev);
43 	struct net_device *peer_dev;
44 	struct netdevsim *peer_ns;
45 	struct netdev_queue *txq;
46 	u16 idx;
47 
48 	idx = rq->napi.index;
49 	rcu_read_lock();
50 	peer_ns = rcu_dereference(ns->peer);
51 	if (!peer_ns)
52 		goto out;
53 
54 	/* TX device */
55 	peer_dev = peer_ns->netdev;
56 	if (dev->real_num_tx_queues != peer_dev->num_rx_queues)
57 		goto out;
58 
59 	txq = netdev_get_tx_queue(peer_dev, idx);
60 	if (!netif_tx_queue_stopped(txq))
61 		goto out;
62 
63 	netif_tx_wake_queue(txq);
64 out:
65 	rcu_read_unlock();
66 }
67 
68 static void nsim_stop_tx_queue(struct net_device *tx_dev,
69 			       struct net_device *rx_dev,
70 			       struct nsim_rq *rq,
71 			       u16 idx)
72 {
73 	/* If different queues size, do not stop, since it is not
74 	 * easy to find which TX queue is mapped here
75 	 */
76 	if (rx_dev->real_num_tx_queues != tx_dev->num_rx_queues)
77 		return;
78 
79 	/* rq is the queue on the receive side */
80 	netif_subqueue_try_stop(tx_dev, idx,
81 				NSIM_RING_SIZE - skb_queue_len(&rq->skb_queue),
82 				NSIM_RING_SIZE / 2);
83 }
84 
85 static int nsim_napi_rx(struct net_device *tx_dev, struct net_device *rx_dev,
86 			struct nsim_rq *rq, struct sk_buff *skb)
87 {
88 	if (skb_queue_len(&rq->skb_queue) > NSIM_RING_SIZE) {
89 		dev_kfree_skb_any(skb);
90 		return NET_RX_DROP;
91 	}
92 
93 	skb_queue_tail(&rq->skb_queue, skb);
94 
95 	/* Stop the peer TX queue avoiding dropping packets later */
96 	if (skb_queue_len(&rq->skb_queue) >= NSIM_RING_SIZE)
97 		nsim_stop_tx_queue(tx_dev, rx_dev, rq,
98 				   skb_get_queue_mapping(skb));
99 
100 	return NET_RX_SUCCESS;
101 }
102 
103 static int nsim_forward_skb(struct net_device *tx_dev,
104 			    struct net_device *rx_dev,
105 			    struct sk_buff *skb,
106 			    struct nsim_rq *rq,
107 			    struct skb_ext *psp_ext)
108 {
109 	int ret;
110 
111 	ret = __dev_forward_skb(rx_dev, skb);
112 	if (ret)
113 		return ret;
114 
115 	nsim_psp_handle_ext(skb, psp_ext);
116 
117 	return nsim_napi_rx(tx_dev, rx_dev, rq, skb);
118 }
119 
120 static netdev_tx_t nsim_start_xmit(struct sk_buff *skb, struct net_device *dev)
121 {
122 	struct netdevsim *ns = netdev_priv(dev);
123 	struct skb_ext *psp_ext = NULL;
124 	struct net_device *peer_dev;
125 	unsigned int len = skb->len;
126 	struct netdevsim *peer_ns;
127 	struct netdev_config *cfg;
128 	struct nsim_rq *rq;
129 	int rxq;
130 	int dr;
131 
132 	rcu_read_lock();
133 	if (!nsim_ipsec_tx(ns, skb))
134 		goto out_drop_any;
135 
136 	/* Check if loopback mode is enabled */
137 	if (dev->features & NETIF_F_LOOPBACK) {
138 		peer_ns = ns;
139 		peer_dev = dev;
140 	} else {
141 		peer_ns = rcu_dereference(ns->peer);
142 		if (!peer_ns)
143 			goto out_drop_any;
144 		peer_dev = peer_ns->netdev;
145 	}
146 
147 	dr = nsim_do_psp(skb, ns, peer_ns, &psp_ext);
148 	if (dr)
149 		goto out_drop_free;
150 
151 	rxq = skb_get_queue_mapping(skb);
152 	if (rxq >= peer_dev->num_rx_queues)
153 		rxq = rxq % peer_dev->num_rx_queues;
154 	rq = peer_ns->rq[rxq];
155 
156 	cfg = peer_dev->cfg;
157 	if (skb_is_nonlinear(skb) &&
158 	    (cfg->hds_config != ETHTOOL_TCP_DATA_SPLIT_ENABLED ||
159 	     (cfg->hds_config == ETHTOOL_TCP_DATA_SPLIT_ENABLED &&
160 	      cfg->hds_thresh > len)))
161 		skb_linearize(skb);
162 
163 	skb_tx_timestamp(skb);
164 	if (unlikely(nsim_forward_skb(dev, peer_dev,
165 				      skb, rq, psp_ext) == NET_RX_DROP))
166 		goto out_drop_cnt;
167 
168 	if (!hrtimer_active(&rq->napi_timer))
169 		hrtimer_start(&rq->napi_timer, us_to_ktime(5), HRTIMER_MODE_REL);
170 
171 	rcu_read_unlock();
172 	dev_dstats_tx_add(dev, len);
173 	return NETDEV_TX_OK;
174 
175 out_drop_any:
176 	dr = SKB_DROP_REASON_NOT_SPECIFIED;
177 out_drop_free:
178 	kfree_skb_reason(skb, dr);
179 out_drop_cnt:
180 	rcu_read_unlock();
181 	dev_dstats_tx_dropped(dev);
182 	return NETDEV_TX_OK;
183 }
184 
185 static void nsim_set_rx_mode(struct net_device *dev)
186 {
187 }
188 
189 static int nsim_change_mtu(struct net_device *dev, int new_mtu)
190 {
191 	struct netdevsim *ns = netdev_priv(dev);
192 
193 	if (ns->xdp.prog && !ns->xdp.prog->aux->xdp_has_frags &&
194 	    new_mtu > NSIM_XDP_MAX_MTU)
195 		return -EBUSY;
196 
197 	WRITE_ONCE(dev->mtu, new_mtu);
198 
199 	return 0;
200 }
201 
202 static int
203 nsim_setup_tc_block_cb(enum tc_setup_type type, void *type_data, void *cb_priv)
204 {
205 	return nsim_bpf_setup_tc_block_cb(type, type_data, cb_priv);
206 }
207 
208 static int nsim_set_vf_mac(struct net_device *dev, int vf, u8 *mac)
209 {
210 	struct netdevsim *ns = netdev_priv(dev);
211 	struct nsim_dev *nsim_dev = ns->nsim_dev;
212 
213 	/* Only refuse multicast addresses, zero address can mean unset/any. */
214 	if (vf >= nsim_dev_get_vfs(nsim_dev) || is_multicast_ether_addr(mac))
215 		return -EINVAL;
216 	memcpy(nsim_dev->vfconfigs[vf].vf_mac, mac, ETH_ALEN);
217 
218 	return 0;
219 }
220 
221 static int nsim_set_vf_vlan(struct net_device *dev, int vf,
222 			    u16 vlan, u8 qos, __be16 vlan_proto)
223 {
224 	struct netdevsim *ns = netdev_priv(dev);
225 	struct nsim_dev *nsim_dev = ns->nsim_dev;
226 
227 	if (vf >= nsim_dev_get_vfs(nsim_dev) || vlan > 4095 || qos > 7)
228 		return -EINVAL;
229 
230 	nsim_dev->vfconfigs[vf].vlan = vlan;
231 	nsim_dev->vfconfigs[vf].qos = qos;
232 	nsim_dev->vfconfigs[vf].vlan_proto = vlan_proto;
233 
234 	return 0;
235 }
236 
237 static int nsim_set_vf_rate(struct net_device *dev, int vf, int min, int max)
238 {
239 	struct netdevsim *ns = netdev_priv(dev);
240 	struct nsim_dev *nsim_dev = ns->nsim_dev;
241 
242 	if (nsim_esw_mode_is_switchdev(ns->nsim_dev)) {
243 		pr_err("Not supported in switchdev mode. Please use devlink API.\n");
244 		return -EOPNOTSUPP;
245 	}
246 
247 	if (vf >= nsim_dev_get_vfs(nsim_dev))
248 		return -EINVAL;
249 
250 	nsim_dev->vfconfigs[vf].min_tx_rate = min;
251 	nsim_dev->vfconfigs[vf].max_tx_rate = max;
252 
253 	return 0;
254 }
255 
256 static int nsim_set_vf_spoofchk(struct net_device *dev, int vf, bool val)
257 {
258 	struct netdevsim *ns = netdev_priv(dev);
259 	struct nsim_dev *nsim_dev = ns->nsim_dev;
260 
261 	if (vf >= nsim_dev_get_vfs(nsim_dev))
262 		return -EINVAL;
263 	nsim_dev->vfconfigs[vf].spoofchk_enabled = val;
264 
265 	return 0;
266 }
267 
268 static int nsim_set_vf_rss_query_en(struct net_device *dev, int vf, bool val)
269 {
270 	struct netdevsim *ns = netdev_priv(dev);
271 	struct nsim_dev *nsim_dev = ns->nsim_dev;
272 
273 	if (vf >= nsim_dev_get_vfs(nsim_dev))
274 		return -EINVAL;
275 	nsim_dev->vfconfigs[vf].rss_query_enabled = val;
276 
277 	return 0;
278 }
279 
280 static int nsim_set_vf_trust(struct net_device *dev, int vf, bool val)
281 {
282 	struct netdevsim *ns = netdev_priv(dev);
283 	struct nsim_dev *nsim_dev = ns->nsim_dev;
284 
285 	if (vf >= nsim_dev_get_vfs(nsim_dev))
286 		return -EINVAL;
287 	nsim_dev->vfconfigs[vf].trusted = val;
288 
289 	return 0;
290 }
291 
292 static int
293 nsim_get_vf_config(struct net_device *dev, int vf, struct ifla_vf_info *ivi)
294 {
295 	struct netdevsim *ns = netdev_priv(dev);
296 	struct nsim_dev *nsim_dev = ns->nsim_dev;
297 
298 	if (vf >= nsim_dev_get_vfs(nsim_dev))
299 		return -EINVAL;
300 
301 	ivi->vf = vf;
302 	ivi->linkstate = nsim_dev->vfconfigs[vf].link_state;
303 	ivi->min_tx_rate = nsim_dev->vfconfigs[vf].min_tx_rate;
304 	ivi->max_tx_rate = nsim_dev->vfconfigs[vf].max_tx_rate;
305 	ivi->vlan = nsim_dev->vfconfigs[vf].vlan;
306 	ivi->vlan_proto = nsim_dev->vfconfigs[vf].vlan_proto;
307 	ivi->qos = nsim_dev->vfconfigs[vf].qos;
308 	memcpy(&ivi->mac, nsim_dev->vfconfigs[vf].vf_mac, ETH_ALEN);
309 	ivi->spoofchk = nsim_dev->vfconfigs[vf].spoofchk_enabled;
310 	ivi->trusted = nsim_dev->vfconfigs[vf].trusted;
311 	ivi->rss_query_en = nsim_dev->vfconfigs[vf].rss_query_enabled;
312 
313 	return 0;
314 }
315 
316 static int nsim_set_vf_link_state(struct net_device *dev, int vf, int state)
317 {
318 	struct netdevsim *ns = netdev_priv(dev);
319 	struct nsim_dev *nsim_dev = ns->nsim_dev;
320 
321 	if (vf >= nsim_dev_get_vfs(nsim_dev))
322 		return -EINVAL;
323 
324 	switch (state) {
325 	case IFLA_VF_LINK_STATE_AUTO:
326 	case IFLA_VF_LINK_STATE_ENABLE:
327 	case IFLA_VF_LINK_STATE_DISABLE:
328 		break;
329 	default:
330 		return -EINVAL;
331 	}
332 
333 	nsim_dev->vfconfigs[vf].link_state = state;
334 
335 	return 0;
336 }
337 
338 static void nsim_taprio_stats(struct tc_taprio_qopt_stats *stats)
339 {
340 	stats->window_drops = 0;
341 	stats->tx_overruns = 0;
342 }
343 
344 static int nsim_setup_tc_taprio(struct net_device *dev,
345 				struct tc_taprio_qopt_offload *offload)
346 {
347 	int err = 0;
348 
349 	switch (offload->cmd) {
350 	case TAPRIO_CMD_REPLACE:
351 	case TAPRIO_CMD_DESTROY:
352 		break;
353 	case TAPRIO_CMD_STATS:
354 		nsim_taprio_stats(&offload->stats);
355 		break;
356 	default:
357 		err = -EOPNOTSUPP;
358 	}
359 
360 	return err;
361 }
362 
363 static LIST_HEAD(nsim_block_cb_list);
364 
365 static int
366 nsim_setup_tc(struct net_device *dev, enum tc_setup_type type, void *type_data)
367 {
368 	struct netdevsim *ns = netdev_priv(dev);
369 
370 	switch (type) {
371 	case TC_SETUP_QDISC_TAPRIO:
372 		return nsim_setup_tc_taprio(dev, type_data);
373 	case TC_SETUP_BLOCK:
374 		return flow_block_cb_setup_simple(type_data,
375 						  &nsim_block_cb_list,
376 						  nsim_setup_tc_block_cb,
377 						  ns, ns, true);
378 	default:
379 		return -EOPNOTSUPP;
380 	}
381 }
382 
383 static int
384 nsim_set_features(struct net_device *dev, netdev_features_t features)
385 {
386 	struct netdevsim *ns = netdev_priv(dev);
387 
388 	if ((dev->features & NETIF_F_HW_TC) > (features & NETIF_F_HW_TC))
389 		return nsim_bpf_disable_tc(ns);
390 
391 	return 0;
392 }
393 
394 static int nsim_get_iflink(const struct net_device *dev)
395 {
396 	struct netdevsim *nsim, *peer;
397 	int iflink;
398 
399 	nsim = netdev_priv(dev);
400 
401 	rcu_read_lock();
402 	peer = rcu_dereference(nsim->peer);
403 	iflink = peer ? READ_ONCE(peer->netdev->ifindex) :
404 			READ_ONCE(dev->ifindex);
405 	rcu_read_unlock();
406 
407 	return iflink;
408 }
409 
410 static int nsim_rcv(struct nsim_rq *rq, int budget)
411 {
412 	struct net_device *dev = rq->napi.dev;
413 	struct bpf_prog *xdp_prog;
414 	struct netdevsim *ns;
415 	struct sk_buff *skb;
416 	unsigned int skblen;
417 	int i, ret;
418 
419 	ns = netdev_priv(dev);
420 	xdp_prog = READ_ONCE(ns->xdp.prog);
421 
422 	for (i = 0; i < budget; i++) {
423 		if (skb_queue_empty(&rq->skb_queue))
424 			break;
425 
426 		skb = skb_dequeue(&rq->skb_queue);
427 
428 		if (xdp_prog) {
429 			/* skb might be freed directly by XDP, save the len */
430 			skblen = skb->len;
431 
432 			if (skb->ip_summed == CHECKSUM_PARTIAL)
433 				skb_checksum_help(skb);
434 			ret = do_xdp_generic(xdp_prog, &skb);
435 			if (ret != XDP_PASS) {
436 				dev_dstats_rx_add(dev, skblen);
437 				continue;
438 			}
439 		}
440 
441 		/* skb might be discard at netif_receive_skb, save the len */
442 		dev_dstats_rx_add(dev, skb->len);
443 		napi_gro_receive(&rq->napi, skb);
444 	}
445 
446 	nsim_start_peer_tx_queue(dev, rq);
447 	return i;
448 }
449 
450 static int nsim_poll(struct napi_struct *napi, int budget)
451 {
452 	struct nsim_rq *rq = container_of(napi, struct nsim_rq, napi);
453 	int done;
454 
455 	done = nsim_rcv(rq, budget);
456 	if (done < budget)
457 		napi_complete_done(napi, done);
458 
459 	return done;
460 }
461 
462 static int nsim_create_page_pool(struct page_pool **p, struct napi_struct *napi)
463 {
464 	struct page_pool_params params = {
465 		.order = 0,
466 		.pool_size = NSIM_RING_SIZE,
467 		.nid = NUMA_NO_NODE,
468 		.dev = &napi->dev->dev,
469 		.napi = napi,
470 		.dma_dir = DMA_BIDIRECTIONAL,
471 		.netdev = napi->dev,
472 	};
473 	struct page_pool *pool;
474 
475 	pool = page_pool_create(&params);
476 	if (IS_ERR(pool))
477 		return PTR_ERR(pool);
478 
479 	*p = pool;
480 	return 0;
481 }
482 
483 static int nsim_init_napi(struct netdevsim *ns)
484 {
485 	struct net_device *dev = ns->netdev;
486 	struct nsim_rq *rq;
487 	int err, i;
488 
489 	for (i = 0; i < dev->num_rx_queues; i++) {
490 		rq = ns->rq[i];
491 
492 		netif_napi_add_config_locked(dev, &rq->napi, nsim_poll, i);
493 	}
494 
495 	for (i = 0; i < dev->num_rx_queues; i++) {
496 		rq = ns->rq[i];
497 
498 		err = nsim_create_page_pool(&rq->page_pool, &rq->napi);
499 		if (err)
500 			goto err_pp_destroy;
501 	}
502 
503 	return 0;
504 
505 err_pp_destroy:
506 	while (i--) {
507 		page_pool_destroy(ns->rq[i]->page_pool);
508 		ns->rq[i]->page_pool = NULL;
509 	}
510 
511 	for (i = 0; i < dev->num_rx_queues; i++)
512 		__netif_napi_del_locked(&ns->rq[i]->napi);
513 
514 	return err;
515 }
516 
517 static enum hrtimer_restart nsim_napi_schedule(struct hrtimer *timer)
518 {
519 	struct nsim_rq *rq;
520 
521 	rq = container_of(timer, struct nsim_rq, napi_timer);
522 	napi_schedule(&rq->napi);
523 
524 	return HRTIMER_NORESTART;
525 }
526 
527 static void nsim_rq_timer_init(struct nsim_rq *rq)
528 {
529 	hrtimer_setup(&rq->napi_timer, nsim_napi_schedule, CLOCK_MONOTONIC,
530 		      HRTIMER_MODE_REL);
531 }
532 
533 static void nsim_enable_napi(struct netdevsim *ns)
534 {
535 	struct net_device *dev = ns->netdev;
536 	int i;
537 
538 	for (i = 0; i < dev->num_rx_queues; i++) {
539 		struct nsim_rq *rq = ns->rq[i];
540 
541 		netif_queue_set_napi(dev, i, NETDEV_QUEUE_TYPE_RX, &rq->napi);
542 		napi_enable_locked(&rq->napi);
543 	}
544 }
545 
546 static int nsim_open(struct net_device *dev)
547 {
548 	struct netdevsim *ns = netdev_priv(dev);
549 	struct netdevsim *peer;
550 	int err;
551 
552 	netdev_assert_locked(dev);
553 
554 	err = nsim_init_napi(ns);
555 	if (err)
556 		return err;
557 
558 	nsim_enable_napi(ns);
559 
560 	peer = rtnl_dereference(ns->peer);
561 	if (peer && netif_running(peer->netdev)) {
562 		netif_carrier_on(dev);
563 		netif_carrier_on(peer->netdev);
564 	}
565 
566 	return 0;
567 }
568 
569 static void nsim_del_napi(struct netdevsim *ns)
570 {
571 	struct net_device *dev = ns->netdev;
572 	int i;
573 
574 	for (i = 0; i < dev->num_rx_queues; i++) {
575 		struct nsim_rq *rq = ns->rq[i];
576 
577 		napi_disable_locked(&rq->napi);
578 		__netif_napi_del_locked(&rq->napi);
579 	}
580 	synchronize_net();
581 
582 	for (i = 0; i < dev->num_rx_queues; i++) {
583 		page_pool_destroy(ns->rq[i]->page_pool);
584 		ns->rq[i]->page_pool = NULL;
585 	}
586 }
587 
588 static int nsim_stop(struct net_device *dev)
589 {
590 	struct netdevsim *ns = netdev_priv(dev);
591 	struct netdevsim *peer;
592 
593 	netdev_assert_locked(dev);
594 
595 	netif_carrier_off(dev);
596 	peer = rtnl_dereference(ns->peer);
597 	if (peer)
598 		netif_carrier_off(peer->netdev);
599 
600 	nsim_del_napi(ns);
601 
602 	return 0;
603 }
604 
605 static int nsim_shaper_set(struct net_shaper_binding *binding,
606 			   const struct net_shaper *shaper,
607 			   struct netlink_ext_ack *extack)
608 {
609 	return 0;
610 }
611 
612 static int nsim_shaper_del(struct net_shaper_binding *binding,
613 			   const struct net_shaper_handle *handle,
614 			   struct netlink_ext_ack *extack)
615 {
616 	return 0;
617 }
618 
619 static int nsim_shaper_group(struct net_shaper_binding *binding,
620 			     int leaves_count,
621 			     const struct net_shaper *leaves,
622 			     const struct net_shaper *root,
623 			     struct netlink_ext_ack *extack)
624 {
625 	return 0;
626 }
627 
628 static void nsim_shaper_cap(struct net_shaper_binding *binding,
629 			    enum net_shaper_scope scope,
630 			    unsigned long *flags)
631 {
632 	*flags = ULONG_MAX;
633 }
634 
635 static const struct net_shaper_ops nsim_shaper_ops = {
636 	.set			= nsim_shaper_set,
637 	.delete			= nsim_shaper_del,
638 	.group			= nsim_shaper_group,
639 	.capabilities		= nsim_shaper_cap,
640 };
641 
642 static const struct net_device_ops nsim_netdev_ops = {
643 	.ndo_start_xmit		= nsim_start_xmit,
644 	.ndo_set_rx_mode	= nsim_set_rx_mode,
645 	.ndo_set_mac_address	= eth_mac_addr,
646 	.ndo_validate_addr	= eth_validate_addr,
647 	.ndo_change_mtu		= nsim_change_mtu,
648 	.ndo_set_vf_mac		= nsim_set_vf_mac,
649 	.ndo_set_vf_vlan	= nsim_set_vf_vlan,
650 	.ndo_set_vf_rate	= nsim_set_vf_rate,
651 	.ndo_set_vf_spoofchk	= nsim_set_vf_spoofchk,
652 	.ndo_set_vf_trust	= nsim_set_vf_trust,
653 	.ndo_get_vf_config	= nsim_get_vf_config,
654 	.ndo_set_vf_link_state	= nsim_set_vf_link_state,
655 	.ndo_set_vf_rss_query_en = nsim_set_vf_rss_query_en,
656 	.ndo_setup_tc		= nsim_setup_tc,
657 	.ndo_set_features	= nsim_set_features,
658 	.ndo_get_iflink		= nsim_get_iflink,
659 	.ndo_bpf		= nsim_bpf,
660 	.ndo_open		= nsim_open,
661 	.ndo_stop		= nsim_stop,
662 	.net_shaper_ops		= &nsim_shaper_ops,
663 };
664 
665 static const struct net_device_ops nsim_vf_netdev_ops = {
666 	.ndo_start_xmit		= nsim_start_xmit,
667 	.ndo_set_rx_mode	= nsim_set_rx_mode,
668 	.ndo_set_mac_address	= eth_mac_addr,
669 	.ndo_validate_addr	= eth_validate_addr,
670 	.ndo_change_mtu		= nsim_change_mtu,
671 	.ndo_setup_tc		= nsim_setup_tc,
672 	.ndo_set_features	= nsim_set_features,
673 };
674 
675 /* We don't have true per-queue stats, yet, so do some random fakery here.
676  * Only report stuff for queue 0.
677  */
678 static void nsim_get_queue_stats_rx(struct net_device *dev, int idx,
679 				    struct netdev_queue_stats_rx *stats)
680 {
681 	struct rtnl_link_stats64 rtstats = {};
682 
683 	if (!idx)
684 		dev_get_stats(dev, &rtstats);
685 
686 	stats->packets = rtstats.rx_packets - !!rtstats.rx_packets;
687 	stats->bytes = rtstats.rx_bytes;
688 }
689 
690 static void nsim_get_queue_stats_tx(struct net_device *dev, int idx,
691 				    struct netdev_queue_stats_tx *stats)
692 {
693 	struct rtnl_link_stats64 rtstats = {};
694 
695 	if (!idx)
696 		dev_get_stats(dev, &rtstats);
697 
698 	stats->packets = rtstats.tx_packets - !!rtstats.tx_packets;
699 	stats->bytes = rtstats.tx_bytes;
700 }
701 
702 static void nsim_get_base_stats(struct net_device *dev,
703 				struct netdev_queue_stats_rx *rx,
704 				struct netdev_queue_stats_tx *tx)
705 {
706 	struct rtnl_link_stats64 rtstats = {};
707 
708 	dev_get_stats(dev, &rtstats);
709 
710 	rx->packets = !!rtstats.rx_packets;
711 	rx->bytes = 0;
712 	tx->packets = !!rtstats.tx_packets;
713 	tx->bytes = 0;
714 }
715 
716 static const struct netdev_stat_ops nsim_stat_ops = {
717 	.get_queue_stats_tx	= nsim_get_queue_stats_tx,
718 	.get_queue_stats_rx	= nsim_get_queue_stats_rx,
719 	.get_base_stats		= nsim_get_base_stats,
720 };
721 
722 static struct nsim_rq *nsim_queue_alloc(void)
723 {
724 	struct nsim_rq *rq;
725 
726 	rq = kzalloc(sizeof(*rq), GFP_KERNEL_ACCOUNT);
727 	if (!rq)
728 		return NULL;
729 
730 	skb_queue_head_init(&rq->skb_queue);
731 	nsim_rq_timer_init(rq);
732 	return rq;
733 }
734 
735 static void nsim_queue_free(struct net_device *dev, struct nsim_rq *rq)
736 {
737 	hrtimer_cancel(&rq->napi_timer);
738 
739 	if (rq->skb_queue.qlen) {
740 		local_bh_disable();
741 		dev_dstats_rx_dropped_add(dev, rq->skb_queue.qlen);
742 		local_bh_enable();
743 	}
744 
745 	skb_queue_purge_reason(&rq->skb_queue, SKB_DROP_REASON_QUEUE_PURGE);
746 	kfree(rq);
747 }
748 
749 /* Queue reset mode is controlled by ns->rq_reset_mode.
750  * - normal - new NAPI new pool (old NAPI enabled when new added)
751  * - mode 1 - allocate new pool (NAPI is only disabled / enabled)
752  * - mode 2 - new NAPI new pool (old NAPI removed before new added)
753  * - mode 3 - new NAPI new pool (old NAPI disabled when new added)
754  */
755 struct nsim_queue_mem {
756 	struct nsim_rq *rq;
757 	struct page_pool *pp;
758 };
759 
760 static int
761 nsim_queue_mem_alloc(struct net_device *dev,
762 		     struct netdev_queue_config *qcfg,
763 		     void *per_queue_mem, int idx)
764 {
765 	struct nsim_queue_mem *qmem = per_queue_mem;
766 	struct netdevsim *ns = netdev_priv(dev);
767 	int err;
768 
769 	if (ns->rq_reset_mode > 3)
770 		return -EINVAL;
771 
772 	if (ns->rq_reset_mode == 1) {
773 		if (!netif_running(ns->netdev))
774 			return -ENETDOWN;
775 		return nsim_create_page_pool(&qmem->pp, &ns->rq[idx]->napi);
776 	}
777 
778 	qmem->rq = nsim_queue_alloc();
779 	if (!qmem->rq)
780 		return -ENOMEM;
781 
782 	err = nsim_create_page_pool(&qmem->rq->page_pool, &qmem->rq->napi);
783 	if (err)
784 		goto err_free;
785 
786 	if (!ns->rq_reset_mode)
787 		netif_napi_add_config_locked(dev, &qmem->rq->napi, nsim_poll,
788 					     idx);
789 
790 	return 0;
791 
792 err_free:
793 	nsim_queue_free(dev, qmem->rq);
794 	return err;
795 }
796 
797 static void nsim_queue_mem_free(struct net_device *dev, void *per_queue_mem)
798 {
799 	struct nsim_queue_mem *qmem = per_queue_mem;
800 	struct netdevsim *ns = netdev_priv(dev);
801 
802 	page_pool_destroy(qmem->pp);
803 	if (qmem->rq) {
804 		if (!ns->rq_reset_mode)
805 			netif_napi_del_locked(&qmem->rq->napi);
806 		page_pool_destroy(qmem->rq->page_pool);
807 		nsim_queue_free(dev, qmem->rq);
808 	}
809 }
810 
811 static int
812 nsim_queue_start(struct net_device *dev, struct netdev_queue_config *qcfg,
813 		 void *per_queue_mem, int idx)
814 {
815 	struct nsim_queue_mem *qmem = per_queue_mem;
816 	struct netdevsim *ns = netdev_priv(dev);
817 
818 	netdev_assert_locked(dev);
819 
820 	if (ns->rq_reset_mode == 1) {
821 		ns->rq[idx]->page_pool = qmem->pp;
822 		napi_enable_locked(&ns->rq[idx]->napi);
823 		return 0;
824 	}
825 
826 	/* netif_napi_add()/_del() should normally be called from alloc/free,
827 	 * here we want to test various call orders.
828 	 */
829 	if (ns->rq_reset_mode == 2) {
830 		netif_napi_del_locked(&ns->rq[idx]->napi);
831 		netif_napi_add_config_locked(dev, &qmem->rq->napi, nsim_poll,
832 					     idx);
833 	} else if (ns->rq_reset_mode == 3) {
834 		netif_napi_add_config_locked(dev, &qmem->rq->napi, nsim_poll,
835 					     idx);
836 		netif_napi_del_locked(&ns->rq[idx]->napi);
837 	}
838 
839 	ns->rq[idx] = qmem->rq;
840 	napi_enable_locked(&ns->rq[idx]->napi);
841 
842 	return 0;
843 }
844 
845 static int nsim_queue_stop(struct net_device *dev, void *per_queue_mem, int idx)
846 {
847 	struct nsim_queue_mem *qmem = per_queue_mem;
848 	struct netdevsim *ns = netdev_priv(dev);
849 
850 	netdev_assert_locked(dev);
851 
852 	napi_disable_locked(&ns->rq[idx]->napi);
853 
854 	if (ns->rq_reset_mode == 1) {
855 		qmem->pp = ns->rq[idx]->page_pool;
856 		page_pool_disable_direct_recycling(qmem->pp);
857 	} else {
858 		qmem->rq = ns->rq[idx];
859 	}
860 
861 	return 0;
862 }
863 
864 static const struct netdev_queue_mgmt_ops nsim_queue_mgmt_ops = {
865 	.ndo_queue_mem_size	= sizeof(struct nsim_queue_mem),
866 	.ndo_queue_mem_alloc	= nsim_queue_mem_alloc,
867 	.ndo_queue_mem_free	= nsim_queue_mem_free,
868 	.ndo_queue_start	= nsim_queue_start,
869 	.ndo_queue_stop		= nsim_queue_stop,
870 };
871 
872 static ssize_t
873 nsim_qreset_write(struct file *file, const char __user *data,
874 		  size_t count, loff_t *ppos)
875 {
876 	struct netdevsim *ns = file->private_data;
877 	unsigned int queue, mode;
878 	char buf[32];
879 	ssize_t ret;
880 
881 	if (count >= sizeof(buf))
882 		return -EINVAL;
883 	if (copy_from_user(buf, data, count))
884 		return -EFAULT;
885 	buf[count] = '\0';
886 
887 	ret = sscanf(buf, "%u %u", &queue, &mode);
888 	if (ret != 2)
889 		return -EINVAL;
890 
891 	netdev_lock(ns->netdev);
892 	if (queue >= ns->netdev->real_num_rx_queues) {
893 		ret = -EINVAL;
894 		goto exit_unlock;
895 	}
896 
897 	ns->rq_reset_mode = mode;
898 	ret = netdev_rx_queue_restart(ns->netdev, queue);
899 	ns->rq_reset_mode = 0;
900 	if (ret)
901 		goto exit_unlock;
902 
903 	ret = count;
904 exit_unlock:
905 	netdev_unlock(ns->netdev);
906 	return ret;
907 }
908 
909 static const struct file_operations nsim_qreset_fops = {
910 	.open = simple_open,
911 	.write = nsim_qreset_write,
912 	.owner = THIS_MODULE,
913 };
914 
915 static ssize_t
916 nsim_pp_hold_read(struct file *file, char __user *data,
917 		  size_t count, loff_t *ppos)
918 {
919 	struct netdevsim *ns = file->private_data;
920 	char buf[3] = "n\n";
921 
922 	if (ns->page)
923 		buf[0] = 'y';
924 
925 	return simple_read_from_buffer(data, count, ppos, buf, 2);
926 }
927 
928 static ssize_t
929 nsim_pp_hold_write(struct file *file, const char __user *data,
930 		   size_t count, loff_t *ppos)
931 {
932 	struct netdevsim *ns = file->private_data;
933 	ssize_t ret;
934 	bool val;
935 
936 	ret = kstrtobool_from_user(data, count, &val);
937 	if (ret)
938 		return ret;
939 
940 	rtnl_lock();
941 	ret = count;
942 	if (val == !!ns->page)
943 		goto exit;
944 
945 	if (!netif_running(ns->netdev) && val) {
946 		ret = -ENETDOWN;
947 	} else if (val) {
948 		ns->page = page_pool_dev_alloc_pages(ns->rq[0]->page_pool);
949 		if (!ns->page)
950 			ret = -ENOMEM;
951 	} else {
952 		page_pool_put_full_page(pp_page_to_nmdesc(ns->page)->pp,
953 					ns->page, false);
954 		ns->page = NULL;
955 	}
956 
957 exit:
958 	rtnl_unlock();
959 	return ret;
960 }
961 
962 static const struct file_operations nsim_pp_hold_fops = {
963 	.open = simple_open,
964 	.read = nsim_pp_hold_read,
965 	.write = nsim_pp_hold_write,
966 	.llseek = generic_file_llseek,
967 	.owner = THIS_MODULE,
968 };
969 
970 static void nsim_setup(struct net_device *dev)
971 {
972 	ether_setup(dev);
973 	eth_hw_addr_random(dev);
974 
975 	dev->flags &= ~IFF_MULTICAST;
976 	dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
977 	dev->features |= NETIF_F_HIGHDMA |
978 			 NETIF_F_SG |
979 			 NETIF_F_FRAGLIST |
980 			 NETIF_F_HW_CSUM |
981 			 NETIF_F_LRO |
982 			 NETIF_F_TSO;
983 	dev->hw_features |= NETIF_F_HW_TC |
984 			    NETIF_F_SG |
985 			    NETIF_F_FRAGLIST |
986 			    NETIF_F_HW_CSUM |
987 			    NETIF_F_LRO |
988 			    NETIF_F_TSO |
989 			    NETIF_F_LOOPBACK;
990 	dev->pcpu_stat_type = NETDEV_PCPU_STAT_DSTATS;
991 	dev->max_mtu = ETH_MAX_MTU;
992 	dev->xdp_features = NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_HW_OFFLOAD;
993 }
994 
995 static int nsim_queue_init(struct netdevsim *ns)
996 {
997 	struct net_device *dev = ns->netdev;
998 	int i;
999 
1000 	ns->rq = kcalloc(dev->num_rx_queues, sizeof(*ns->rq),
1001 			 GFP_KERNEL_ACCOUNT);
1002 	if (!ns->rq)
1003 		return -ENOMEM;
1004 
1005 	for (i = 0; i < dev->num_rx_queues; i++) {
1006 		ns->rq[i] = nsim_queue_alloc();
1007 		if (!ns->rq[i])
1008 			goto err_free_prev;
1009 	}
1010 
1011 	return 0;
1012 
1013 err_free_prev:
1014 	while (i--)
1015 		kfree(ns->rq[i]);
1016 	kfree(ns->rq);
1017 	return -ENOMEM;
1018 }
1019 
1020 static void nsim_queue_uninit(struct netdevsim *ns)
1021 {
1022 	struct net_device *dev = ns->netdev;
1023 	int i;
1024 
1025 	for (i = 0; i < dev->num_rx_queues; i++)
1026 		nsim_queue_free(dev, ns->rq[i]);
1027 
1028 	kfree(ns->rq);
1029 	ns->rq = NULL;
1030 }
1031 
1032 static int nsim_init_netdevsim(struct netdevsim *ns)
1033 {
1034 	struct netdevsim *peer;
1035 	struct mock_phc *phc;
1036 	int err;
1037 
1038 	phc = mock_phc_create(&ns->nsim_bus_dev->dev);
1039 	if (IS_ERR(phc))
1040 		return PTR_ERR(phc);
1041 
1042 	ns->phc = phc;
1043 	ns->netdev->netdev_ops = &nsim_netdev_ops;
1044 	ns->netdev->stat_ops = &nsim_stat_ops;
1045 	ns->netdev->queue_mgmt_ops = &nsim_queue_mgmt_ops;
1046 	netdev_lockdep_set_classes(ns->netdev);
1047 
1048 	err = nsim_udp_tunnels_info_create(ns->nsim_dev, ns->netdev);
1049 	if (err)
1050 		goto err_phc_destroy;
1051 
1052 	rtnl_lock();
1053 	err = nsim_queue_init(ns);
1054 	if (err)
1055 		goto err_utn_destroy;
1056 
1057 	err = nsim_bpf_init(ns);
1058 	if (err)
1059 		goto err_rq_destroy;
1060 
1061 	nsim_macsec_init(ns);
1062 	nsim_ipsec_init(ns);
1063 
1064 	err = register_netdevice(ns->netdev);
1065 	if (err)
1066 		goto err_ipsec_teardown;
1067 	rtnl_unlock();
1068 
1069 	err = nsim_psp_init(ns);
1070 	if (err)
1071 		goto err_unregister_netdev;
1072 
1073 	if (IS_ENABLED(CONFIG_DEBUG_NET)) {
1074 		ns->nb.notifier_call = netdev_debug_event;
1075 		if (register_netdevice_notifier_dev_net(ns->netdev, &ns->nb,
1076 							&ns->nn))
1077 			ns->nb.notifier_call = NULL;
1078 	}
1079 
1080 	return 0;
1081 
1082 err_unregister_netdev:
1083 	rtnl_lock();
1084 	peer = rtnl_dereference(ns->peer);
1085 	if (peer)
1086 		RCU_INIT_POINTER(peer->peer, NULL);
1087 	RCU_INIT_POINTER(ns->peer, NULL);
1088 	unregister_netdevice(ns->netdev);
1089 err_ipsec_teardown:
1090 	nsim_ipsec_teardown(ns);
1091 	nsim_macsec_teardown(ns);
1092 	nsim_bpf_uninit(ns);
1093 err_rq_destroy:
1094 	nsim_queue_uninit(ns);
1095 err_utn_destroy:
1096 	rtnl_unlock();
1097 	nsim_udp_tunnels_info_destroy(ns->netdev);
1098 err_phc_destroy:
1099 	mock_phc_destroy(ns->phc);
1100 	return err;
1101 }
1102 
1103 static int nsim_init_netdevsim_vf(struct netdevsim *ns)
1104 {
1105 	int err;
1106 
1107 	ns->netdev->netdev_ops = &nsim_vf_netdev_ops;
1108 	rtnl_lock();
1109 	err = register_netdevice(ns->netdev);
1110 	rtnl_unlock();
1111 	return err;
1112 }
1113 
1114 static void nsim_exit_netdevsim(struct netdevsim *ns)
1115 {
1116 	nsim_udp_tunnels_info_destroy(ns->netdev);
1117 	mock_phc_destroy(ns->phc);
1118 }
1119 
1120 struct netdevsim *nsim_create(struct nsim_dev *nsim_dev,
1121 			      struct nsim_dev_port *nsim_dev_port,
1122 			      u8 perm_addr[ETH_ALEN])
1123 {
1124 	struct net_device *dev;
1125 	struct netdevsim *ns;
1126 	int err;
1127 
1128 	dev = alloc_netdev_mq(sizeof(*ns), "eth%d", NET_NAME_UNKNOWN, nsim_setup,
1129 			      nsim_dev->nsim_bus_dev->num_queues);
1130 	if (!dev)
1131 		return ERR_PTR(-ENOMEM);
1132 
1133 	if (perm_addr)
1134 		memcpy(dev->perm_addr, perm_addr, ETH_ALEN);
1135 
1136 	dev_net_set(dev, nsim_dev_net(nsim_dev));
1137 	ns = netdev_priv(dev);
1138 	ns->netdev = dev;
1139 	ns->nsim_dev = nsim_dev;
1140 	ns->nsim_dev_port = nsim_dev_port;
1141 	ns->nsim_bus_dev = nsim_dev->nsim_bus_dev;
1142 	SET_NETDEV_DEV(dev, &ns->nsim_bus_dev->dev);
1143 	SET_NETDEV_DEVLINK_PORT(dev, &nsim_dev_port->devlink_port);
1144 	nsim_ethtool_init(ns);
1145 	if (nsim_dev_port_is_pf(nsim_dev_port))
1146 		err = nsim_init_netdevsim(ns);
1147 	else
1148 		err = nsim_init_netdevsim_vf(ns);
1149 	if (err)
1150 		goto err_free_netdev;
1151 
1152 	ns->pp_dfs = debugfs_create_file("pp_hold", 0600, nsim_dev_port->ddir,
1153 					 ns, &nsim_pp_hold_fops);
1154 	ns->qr_dfs = debugfs_create_file("queue_reset", 0200,
1155 					 nsim_dev_port->ddir, ns,
1156 					 &nsim_qreset_fops);
1157 	return ns;
1158 
1159 err_free_netdev:
1160 	free_netdev(dev);
1161 	return ERR_PTR(err);
1162 }
1163 
1164 void nsim_destroy(struct netdevsim *ns)
1165 {
1166 	struct net_device *dev = ns->netdev;
1167 	struct netdevsim *peer;
1168 
1169 	debugfs_remove(ns->qr_dfs);
1170 	debugfs_remove(ns->pp_dfs);
1171 
1172 	if (ns->nb.notifier_call)
1173 		unregister_netdevice_notifier_dev_net(ns->netdev, &ns->nb,
1174 						      &ns->nn);
1175 
1176 	nsim_psp_uninit(ns);
1177 
1178 	rtnl_lock();
1179 	peer = rtnl_dereference(ns->peer);
1180 	if (peer)
1181 		RCU_INIT_POINTER(peer->peer, NULL);
1182 	RCU_INIT_POINTER(ns->peer, NULL);
1183 	unregister_netdevice(dev);
1184 	if (nsim_dev_port_is_pf(ns->nsim_dev_port)) {
1185 		nsim_macsec_teardown(ns);
1186 		nsim_ipsec_teardown(ns);
1187 		nsim_bpf_uninit(ns);
1188 		nsim_queue_uninit(ns);
1189 	}
1190 	rtnl_unlock();
1191 	if (nsim_dev_port_is_pf(ns->nsim_dev_port))
1192 		nsim_exit_netdevsim(ns);
1193 
1194 	/* Put this intentionally late to exercise the orphaning path */
1195 	if (ns->page) {
1196 		page_pool_put_full_page(pp_page_to_nmdesc(ns->page)->pp,
1197 					ns->page, false);
1198 		ns->page = NULL;
1199 	}
1200 
1201 	free_netdev(dev);
1202 }
1203 
1204 bool netdev_is_nsim(struct net_device *dev)
1205 {
1206 	return dev->netdev_ops == &nsim_netdev_ops;
1207 }
1208 
1209 static int nsim_validate(struct nlattr *tb[], struct nlattr *data[],
1210 			 struct netlink_ext_ack *extack)
1211 {
1212 	NL_SET_ERR_MSG_MOD(extack,
1213 			   "Please use: echo \"[ID] [PORT_COUNT] [NUM_QUEUES]\" > /sys/bus/netdevsim/new_device");
1214 	return -EOPNOTSUPP;
1215 }
1216 
1217 static struct rtnl_link_ops nsim_link_ops __read_mostly = {
1218 	.kind		= DRV_NAME,
1219 	.validate	= nsim_validate,
1220 };
1221 
1222 static int __init nsim_module_init(void)
1223 {
1224 	int err;
1225 
1226 	err = nsim_dev_init();
1227 	if (err)
1228 		return err;
1229 
1230 	err = nsim_bus_init();
1231 	if (err)
1232 		goto err_dev_exit;
1233 
1234 	err = rtnl_link_register(&nsim_link_ops);
1235 	if (err)
1236 		goto err_bus_exit;
1237 
1238 	return 0;
1239 
1240 err_bus_exit:
1241 	nsim_bus_exit();
1242 err_dev_exit:
1243 	nsim_dev_exit();
1244 	return err;
1245 }
1246 
1247 static void __exit nsim_module_exit(void)
1248 {
1249 	rtnl_link_unregister(&nsim_link_ops);
1250 	nsim_bus_exit();
1251 	nsim_dev_exit();
1252 }
1253 
1254 module_init(nsim_module_init);
1255 module_exit(nsim_module_exit);
1256 MODULE_LICENSE("GPL");
1257 MODULE_DESCRIPTION("Simulated networking device for testing");
1258 MODULE_ALIAS_RTNL_LINK(DRV_NAME);
1259